├── storage_vector
    ├── graph_store.json
    └── index_store.json
├── chatbot_storage_vector
    ├── graph_store.json
    └── index_store.json
├── storage_graph
    ├── vector_store.json
    └── docstore.json
├── chatbot_storage_graph
    ├── vector_store.json
    └── index_store.json
├── requirements.txt
├── .devcontainer
    └── devcontainer.json
├── .gitignore
├── LICENSE
├── graph_rag_chatbot.py
├── kg_build_and_query_CN.py
├── kg_build_and_query.py
├── kg_retrieval_arguments_generation.py
└── notebooks
    └── KG_Building.ipynb


/storage_vector/graph_store.json:
--------------------------------------------------------------------------------
1 | {"graph_dict": {}}


--------------------------------------------------------------------------------
/chatbot_storage_vector/graph_store.json:
--------------------------------------------------------------------------------
1 | {"graph_dict": {}}


--------------------------------------------------------------------------------
/storage_graph/vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}}


--------------------------------------------------------------------------------
/chatbot_storage_graph/vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}}


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | llama-index==0.8.9
 2 | nebula3-python==3.4.0
 3 | networkx==3.0
 4 | nltk==3.8.1
 5 | openai==0.28.0
 6 | pyvis==0.3.2
 7 | PyYAML==6.0
 8 | tenacity==8.2.2
 9 | langchain==0.0.335
10 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Python 3",
 3 |   // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
 4 |   "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
 5 |   "customizations": {
 6 |     "codespaces": {
 7 |       "openFiles": [
 8 |         "README.md",
 9 |         "graph_rag_chatbot.py"
10 |       ]
11 |     },
12 |     "vscode": {
13 |       "settings": {},
14 |       "extensions": [
15 |         "ms-python.python",
16 |         "ms-python.vscode-pylance"
17 |       ]
18 |     }
19 |   },
20 |   "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21 |   "postAttachCommand": {
22 |     "server": "streamlit run graph_rag_chatbot.py --server.enableCORS false --server.enableXsrfProtection false"
23 |   },
24 |   "portsAttributes": {
25 |     "8501": {
26 |       "label": "Application",
27 |       "onAutoForward": "openPreview"
28 |     }
29 |   },
30 |   "forwardPorts": [
31 |     8501
32 |   ]
33 | }


--------------------------------------------------------------------------------
/storage_vector/index_store.json:
--------------------------------------------------------------------------------
1 | {"index_store/data": {"166b2ee2-b0d4-45f8-b311-a79820ee5cf1": {"__type__": "vector_store", "__data__": "{\"index_id\": \"166b2ee2-b0d4-45f8-b311-a79820ee5cf1\", \"summary\": null, \"nodes_dict\": {\"8ba94c42-6b74-4007-baac-8ccaee59ee67\": \"8ba94c42-6b74-4007-baac-8ccaee59ee67\", \"3d3256df-0fdb-44c1-b32c-ab4ef31d4ae6\": \"3d3256df-0fdb-44c1-b32c-ab4ef31d4ae6\", \"5a0378b1-0ca4-41c7-b122-dff7c8fc21c5\": \"5a0378b1-0ca4-41c7-b122-dff7c8fc21c5\", \"1cd198ab-b486-4755-9555-888b4b149114\": \"1cd198ab-b486-4755-9555-888b4b149114\", \"8da9bdfa-d3d2-496b-8931-4233ecad1d48\": \"8da9bdfa-d3d2-496b-8931-4233ecad1d48\", \"e2285c7e-2c45-4254-910d-da090dc09cf6\": \"e2285c7e-2c45-4254-910d-da090dc09cf6\", \"4e45f3f0-2630-488e-b034-9b8c756031a1\": \"4e45f3f0-2630-488e-b034-9b8c756031a1\", \"dee3350f-d259-43d1-9aee-a09a0e0288ed\": \"dee3350f-d259-43d1-9aee-a09a0e0288ed\", \"d0c55d0b-9b51-4259-ae33-e5e93b2b93c9\": \"d0c55d0b-9b51-4259-ae33-e5e93b2b93c9\", \"46e4cd84-a728-4ce9-8429-e4896660a989\": \"46e4cd84-a728-4ce9-8429-e4896660a989\", \"01b0d056-dc40-4c49-8652-14c1f40ed589\": \"01b0d056-dc40-4c49-8652-14c1f40ed589\", \"0885fe52-a626-4181-86dd-dd361fbd3103\": \"0885fe52-a626-4181-86dd-dd361fbd3103\", \"a8c5cfd9-babe-4819-b606-2fcdb38abebc\": \"a8c5cfd9-babe-4819-b606-2fcdb38abebc\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}


--------------------------------------------------------------------------------
/chatbot_storage_vector/index_store.json:
--------------------------------------------------------------------------------
1 | {"index_store/data": {"6e2e0948-8155-47b5-938c-f0d853f9c3eb": {"__type__": "vector_store", "__data__": "{\"index_id\": \"6e2e0948-8155-47b5-938c-f0d853f9c3eb\", \"summary\": null, \"nodes_dict\": {\"92c5163f-4c14-427c-93a4-4bbb937e1587\": \"92c5163f-4c14-427c-93a4-4bbb937e1587\", \"e880a846-2805-4666-8b4c-193497ae6382\": \"e880a846-2805-4666-8b4c-193497ae6382\", \"58762fdc-b514-41b3-941c-4fb72811b24b\": \"58762fdc-b514-41b3-941c-4fb72811b24b\", \"57439a61-a1fb-4cfa-9a29-ab68b1b2fc06\": \"57439a61-a1fb-4cfa-9a29-ab68b1b2fc06\", \"9a7d3f26-2bf6-4aa8-a09f-8eb11ca7804b\": \"9a7d3f26-2bf6-4aa8-a09f-8eb11ca7804b\", \"57a8264b-3dae-41bb-9e43-486ac86ae2a6\": \"57a8264b-3dae-41bb-9e43-486ac86ae2a6\", \"07d69618-e595-4429-8fd7-8e187eeab0b2\": \"07d69618-e595-4429-8fd7-8e187eeab0b2\", \"4a879920-3779-4d2c-8285-0ebc54f8c15f\": \"4a879920-3779-4d2c-8285-0ebc54f8c15f\", \"80e07855-ef77-4f90-9b35-6ac5f720e114\": \"80e07855-ef77-4f90-9b35-6ac5f720e114\", \"39e13326-91a9-4ad6-94c0-6cb81bf03554\": \"39e13326-91a9-4ad6-94c0-6cb81bf03554\", \"5cd4d907-c9bd-4600-854a-bee5075ce1e3\": \"5cd4d907-c9bd-4600-854a-bee5075ce1e3\", \"43540d40-edf1-4d77-8116-5e82fb8f7072\": \"43540d40-edf1-4d77-8116-5e82fb8f7072\", \"d0044489-ea56-45a7-88da-3e9223671a98\": \"d0044489-ea56-45a7-88da-3e9223671a98\", \"47736cc9-d856-4433-ba97-8abff89282e1\": \"47736cc9-d856-4433-ba97-8abff89282e1\", \"afc8503b-ffc9-4ad8-9c73-b7a7e52fcf3a\": \"afc8503b-ffc9-4ad8-9c73-b7a7e52fcf3a\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | .DS_Store
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | cover/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | .pybuilder/
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | #   For a library or package, you might want to ignore these files since the code is
 89 | #   intended to run in multiple environments; otherwise, check them in:
 90 | # .python-version
 91 | 
 92 | # pipenv
 93 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 94 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 95 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 96 | #   install all needed dependencies.
 97 | #Pipfile.lock
 98 | 
 99 | # poetry
100 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
102 | #   commonly ignored for libraries.
103 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104 | #poetry.lock
105 | 
106 | # pdm
107 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108 | #pdm.lock
109 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110 | #   in version control.
111 | #   https://pdm.fming.dev/#use-with-ide
112 | .pdm.toml
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 


--------------------------------------------------------------------------------
/chatbot_storage_graph/index_store.json:
--------------------------------------------------------------------------------
1 | {"index_store/data": {"ce5d8b78-0582-486c-a1f0-e16c2e7d25ff": {"__type__": "kg", "__data__": "{\"index_id\": \"ce5d8b78-0582-486c-a1f0-e16c2e7d25ff\", \"summary\": null, \"table\": {\"Guardians of the Galaxy Vol. 3\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\", \"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\", \"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"superhero film\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\"], \"Marvel Comics superhero team Guardians of the Galaxy\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\"], \"Marvel Studios\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\", \"4221c586-1635-4197-b181-10041043654c\", \"e62d6316-1abf-492b-a369-8838cf992225\", \"087f8c0c-f267-434f-81c3-007c98590d34\"], \"Walt Disney Studios Motion Pictures\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\"], \"Guardians of the Galaxy\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\", \"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\"], \"Guardians of the Galaxy Vol. 2\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\", \"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\"], \"Marvel Cinematic Universe\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\"], \"James Gunn\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\", \"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\", \"549babc5-dddb-4645-8970-7a0c2479f473\"], \"ensemble cast\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\"], \"Disneyland Paris\": [\"cdc5b306-bf83-4024-973e-d3e961fdb8f3\"], \"Quill's group\": [\"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"code\": [\"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"Rocket\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\", \"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"near-death experience\": [\"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"Lylla\": [\"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"Teefs\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\", \"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"Floor\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\", \"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"him\": [\"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"Quill\": [\"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"kill switch\": [\"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"Rocket's heart\": [\"8198a33d-3e69-4007-96d3-d587b0876cdf\"], \"Vin Diesel\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\"], \"Groot\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\"], \"member of the Guardians\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\"], \"tree-like humanoid\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\"], \"accomplice of Rocket\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\"], \"Bradley Cooper\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\"], \"former Avenger\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\"], \"genetically engineered raccoon-based bounty hunter\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\"], \"master of weapons and military tactics\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\"], \"Gunn\": [\"4221c586-1635-4197-b181-10041043654c\", \"e62d6316-1abf-492b-a369-8838cf992225\", \"81defee1-183d-49d3-80e7-1c0289104f9c\", \"88fe1f3b-0801-4044-823d-02e6b7d993a6\", \"d54dc454-e2dd-4d4d-a64a-ce4f89771e24\", \"087f8c0c-f267-434f-81c3-007c98590d34\", \"cc8c970f-5c70-465c-8e38-11269513f906\"], \"film tells Rocket's story\": [\"88fe1f3b-0801-4044-823d-02e6b7d993a6\"], \"Tara Strong\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Miss Minutes\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Mainframe\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Miley Cyrus\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Jared Gore\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Krugarr\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"sorcery powers\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"his sorcery\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Asim Chaudhry\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Mikaela Hoover\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Daniela Melchior\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Ura\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Miriam Shor\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"Recorder Vim\": [\"ac9ae5c5-b311-4081-894d-2df26760bf01\"], \"November 2014\": [\"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\"], \"Avengers: Infinity War\": [\"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\"], \"Marvel characters\": [\"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\"], \"the first two films\": [\"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\"], \"Marvel Cosmic Universe\": [\"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\"], \"Disney and Marvel\": [\"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\"], \"rape and pedophilia\": [\"12d075bb-3b2f-4a4d-8ed8-8d74f6fd919c\"], \"He\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"\\\"\\\"\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"work\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"myself\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"tweets\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"be provocative\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"business decisions\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"responsibility\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"Guardians cast members\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"support\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"fans\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"online petition\": [\"cc8c970f-5c70-465c-8e38-11269513f906\"], \"exit settlement\": [\"549babc5-dddb-4645-8970-7a0c2479f473\"], \"The Suicide Squad\": [\"087f8c0c-f267-434f-81c3-007c98590d34\", \"549babc5-dddb-4645-8970-7a0c2479f473\"], \"Warner Bros / DC Films\": [\"549babc5-dddb-4645-8970-7a0c2479f473\"], \"Horn\": [\"087f8c0c-f267-434f-81c3-007c98590d34\"], \"after being impressed by Gunn's response\": [\"087f8c0c-f267-434f-81c3-007c98590d34\"], \"Feige\": [\"087f8c0c-f267-434f-81c3-007c98590d34\"], \"Vol. 3\": [\"087f8c0c-f267-434f-81c3-007c98590d34\"], \"the film\": [\"087f8c0c-f267-434f-81c3-007c98590d34\"], \"for the film\": [\"087f8c0c-f267-434f-81c3-007c98590d34\"], \"five main stars\": [\"087f8c0c-f267-434f-81c3-007c98590d34\"], \"the sequel\": [\"087f8c0c-f267-434f-81c3-007c98590d34\"], \"the character of Rocket\": [\"087f8c0c-f267-434f-81c3-007c98590d34\"], \"Pre-production work\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"April 2021\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"Vol. 3 would take place after the events of Thor: Love and Thunder\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"Thor\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"Guardians\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"film by June\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"Bautista\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\", \"e62d6316-1abf-492b-a369-8838cf992225\"], \"he had not read a script for Vol. 3\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"Gillan\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"she and Klementieff had read the script together\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"script had \\\"basically stayed the same\\\" from three years prior\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"character who would become the film's main villain\": [\"81defee1-183d-49d3-80e7-1c0289104f9c\"], \"Principal photography\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"Trilith Studios\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"Henry Braham\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"cinematographer\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"for Vol. 2\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"for The Suicide Squad\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"for The Guardians of the Galaxy Holiday Special\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"Filming\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"in January or February 2019\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"in February 2021\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"Sylvester Stallone\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"he would return as Stakar Ogord\": [\"bd59bb8b-5014-4d75-988c-17edf7c99717\"], \"music\": [\"d54dc454-e2dd-4d4d-a64a-ce4f89771e24\"], \"soundtracks\": [\"d54dc454-e2dd-4d4d-a64a-ce4f89771e24\"], \"panicking\": [\"d54dc454-e2dd-4d4d-a64a-ce4f89771e24\"], \"choices\": [\"d54dc454-e2dd-4d4d-a64a-ce4f89771e24\"], \"list could grow\": [\"d54dc454-e2dd-4d4d-a64a-ce4f89771e24\"], \"songs\": [\"d54dc454-e2dd-4d4d-a64a-ce4f89771e24\"], \"soundtrack was not limited\": [\"d54dc454-e2dd-4d4d-a64a-ce4f89771e24\"], \"song\": [\"d54dc454-e2dd-4d4d-a64a-ce4f89771e24\"], \"version of \\\"Creep\\\"\": [\"d54dc454-e2dd-4d4d-a64a-ce4f89771e24\"], \"panel\": [\"4221c586-1635-4197-b181-10041043654c\"], \"first footage\": [\"4221c586-1635-4197-b181-10041043654c\"], \"Iwuji\": [\"4221c586-1635-4197-b181-10041043654c\"], \"High Evolutionary\": [\"4221c586-1635-4197-b181-10041043654c\"], \"footage was not released publicly\": [\"4221c586-1635-4197-b181-10041043654c\"], \"trailer\": [\"4221c586-1635-4197-b181-10041043654c\"], \"\\\"In the Meantime\\\" by Spacehog\": [\"4221c586-1635-4197-b181-10041043654c\"], \"Drew Taylor\": [\"4221c586-1635-4197-b181-10041043654c\"], \"trailer a doozy\": [\"4221c586-1635-4197-b181-10041043654c\"], \"Carson Burton\": [\"4221c586-1635-4197-b181-10041043654c\"], \"film would be \\\"shaping up to be an emotional ending\\\"\": [\"4221c586-1635-4197-b181-10041043654c\"], \"Jay Peters\": [\"4221c586-1635-4197-b181-10041043654c\"], \"trailer intrigued him as a \\\"wild ride across the stars\\\"\": [\"4221c586-1635-4197-b181-10041043654c\"], \"$359 million\": [\"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"$486.4 million\": [\"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"$845.4 million\": [\"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"$110 million\": [\"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"$48.2 million\": [\"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"$118.4 million\": [\"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"box office\": [\"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"$62 million\": [\"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"$32 million\": [\"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"$168.1 million\": [\"12a0c919-414f-4d10-919e-1d8e68bed371\"], \"Rotten Tomatoes\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"approval rating\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"391 reviews\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"7.2/10\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"Metacritic\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"weighted average score\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"63 critics\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"generally favorable reviews\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"CinemaScore\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"film an average grade\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"A+ to F scale\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"PostTrak\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"moviegoers gave\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"91%\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"Marvel Studios: Assembled\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"February 2021\": [\"68967c85-73af-4e82-b70f-5935a2baf525\"], \"April 2017\": [\"e62d6316-1abf-492b-a369-8838cf992225\"], \"story\": [\"e62d6316-1abf-492b-a369-8838cf992225\"], \"unlikely to return\": [\"e62d6316-1abf-492b-a369-8838cf992225\"], \"film centered on Drax and Mantis\": [\"e62d6316-1abf-492b-a369-8838cf992225\"], \"brilliant\": [\"e62d6316-1abf-492b-a369-8838cf992225\"], \"not heard any further updates\": [\"e62d6316-1abf-492b-a369-8838cf992225\"], \"Salda\\u00f1a\": [\"e62d6316-1abf-492b-a369-8838cf992225\"], \"Vol. 3 would be the last time\": [\"e62d6316-1abf-492b-a369-8838cf992225\"]}, \"rel_map\": {}, \"embedding_dict\": {}}"}}}


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/graph_rag_chatbot.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import logging
  4 | import random
  5 | 
  6 | sys.stdout.reconfigure(encoding="utf-8")
  7 | sys.stdin.reconfigure(encoding="utf-8")
  8 | 
  9 | import streamlit.components.v1 as components
 10 | 
 11 | import streamlit as st
 12 | 
 13 | import openai
 14 | from llama_index.llms import AzureOpenAI
 15 | from langchain.embeddings import OpenAIEmbeddings
 16 | from llama_index import LangchainEmbedding
 17 | from llama_index import (
 18 |     load_index_from_storage,
 19 |     LLMPredictor,
 20 |     ServiceContext,
 21 |     set_global_service_context,
 22 | )
 23 | 
 24 | from llama_index.tools.types import ToolMetadata
 25 | from llama_index.tools.query_engine import QueryEngineTool
 26 | 
 27 | 
 28 | from llama_index.query_engine import RetrieverQueryEngine
 29 | from llama_index.retrievers import KnowledgeGraphRAGRetriever
 30 | 
 31 | from llama_index.storage.storage_context import StorageContext
 32 | from llama_index.graph_stores import NebulaGraphStore
 33 | 
 34 | 
 35 | # logging.basicConfig(
 36 | #     stream=sys.stdout, level=logging.INFO, force=True,
 37 | # )
 38 | 
 39 | openai.api_type = "azure"
 40 | openai.api_base = st.secrets["OPENAI_API_BASE"]
 41 | openai.api_version = "2023-03-15-preview"  # azure gpt-3.5 turbo
 42 | openai.api_key = st.secrets["OPENAI_API_KEY"]
 43 | 
 44 | llm = AzureOpenAI(
 45 |     engine=st.secrets["DEPLOYMENT_NAME"],
 46 |     temperature=0,
 47 |     model="gpt-35-turbo",
 48 | )
 49 | llm_predictor = LLMPredictor(llm=llm)
 50 | 
 51 | embedding_llm = LangchainEmbedding(
 52 |     OpenAIEmbeddings(
 53 |         model="text-embedding-ada-002",
 54 |         deployment=st.secrets["EMBEDDING_DEPLOYMENT_NAME"],
 55 |         openai_api_key=openai.api_key,
 56 |         openai_api_base=openai.api_base,
 57 |         openai_api_type=openai.api_type,
 58 |         openai_api_version=openai.api_version,
 59 |     ),
 60 |     embed_batch_size=1,
 61 | )
 62 | 
 63 | service_context = ServiceContext.from_defaults(
 64 |     llm_predictor=llm_predictor,
 65 |     embed_model=embedding_llm,
 66 | )
 67 | 
 68 | set_global_service_context(service_context)
 69 | 
 70 | # Graph Store
 71 | 
 72 | os.environ["NEBULA_USER"] = st.secrets["graphd_user"]
 73 | os.environ["NEBULA_PASSWORD"] = st.secrets["graphd_password"]
 74 | os.environ[
 75 |     "NEBULA_ADDRESS"
 76 | ] = f"{st.secrets['graphd_host']}:{st.secrets['graphd_port']}"
 77 | 
 78 | space_name = "rag_workshop"
 79 | edge_types, rel_prop_names = ["relationship"], [
 80 |     "relationship"
 81 | ]  # default, could be omit if create from an empty kg
 82 | tags = ["entity"]  # default, could be omit if create from an empty kg
 83 | 
 84 | graph_store = NebulaGraphStore(
 85 |     space_name=space_name,
 86 |     edge_types=edge_types,
 87 |     rel_prop_names=rel_prop_names,
 88 |     tags=tags,
 89 | )
 90 | 
 91 | # Storage Context
 92 | 
 93 | storage_context = StorageContext.from_defaults(
 94 |     persist_dir="./chatbot_storage_graph", graph_store=graph_store
 95 | )
 96 | 
 97 | # KG Index
 98 | 
 99 | kg_index = load_index_from_storage(
100 |     storage_context=storage_context,
101 |     service_context=service_context,
102 |     max_triplets_per_chunk=10,
103 |     space_name=space_name,
104 |     edge_types=edge_types,
105 |     rel_prop_names=rel_prop_names,
106 |     tags=tags,
107 |     verbose=True,
108 | )
109 | 
110 | kg_index_query_engine = kg_index.as_query_engine(
111 |     include_text=False,
112 |     retriever_mode="keyword",
113 |     response_mode="tree_summarize",
114 | )
115 | 
116 | # Vector Index
117 | 
118 | storage_context_vector = StorageContext.from_defaults(
119 |     persist_dir="./chatbot_storage_vector"
120 | )
121 | vector_index = load_index_from_storage(
122 |     service_context=service_context, storage_context=storage_context_vector
123 | )
124 | 
125 | vector_rag_query_engine = vector_index.as_query_engine()
126 | 
127 | # Graph RAG Retriever
128 | 
129 | graph_rag_retriever = KnowledgeGraphRAGRetriever(
130 |     storage_context=storage_context,
131 |     service_context=service_context,
132 |     with_nl2graphquery=True,
133 |     llm=llm,
134 |     verbose=True,
135 | )
136 | 
137 | # Graph RAG Query Engine
138 | 
139 | # graph_rag_query_engine = RetrieverQueryEngine.from_args(
140 | #     graph_rag_retriever,
141 | #     service_context=service_context,
142 | #     verbose=True,
143 | # )
144 | 
145 | # # Query tools
146 | 
147 | # query_engine_tools = [
148 | #     QueryEngineTool(
149 | #         query_engine=graph_rag_query_engine,
150 | #         metadata=ToolMetadata(
151 | #             name="Guardians of the Galaxy Vol-3",
152 | #             description="Provides info about the movie guardians of the galaxy vol 3, extracted from wikipedia.",
153 | #         ),
154 | #     ),
155 | #     QueryEngineTool(
156 | #         query_engine=vector_rag_query_engine,
157 | #         metadata=ToolMetadata(
158 | #             name="Data Chunks based on Semantic Search",
159 | #             description="Provides info about the movie guardians of the galaxy vol 3, in the form of Data Chunks. "
160 | #             "Will search large piece of text and extract the most relevant information.",
161 | #         ),
162 | #     ),
163 | # ]
164 | 
165 | # Chatbot
166 | 
167 | # from llama_index.agent import ReActAgent
168 | from llama_index.memory import ChatMemoryBuffer
169 | 
170 | memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
171 | # chat_engine = ReActAgent.from_tools(
172 | #     query_engine_tools, llm=llm, memory=memory, verbose=True
173 | # )
174 | 
175 | chat_engine = kg_index.as_chat_engine(
176 |     chat_mode="react",
177 |     memory=memory,
178 |     verbose=True,
179 | )
180 | 
181 | # utils
182 | 
183 | 
184 | def cypher_to_all_paths(query):
185 |     # Find the MATCH and RETURN parts
186 |     match_parts = re.findall(r"(MATCH .+?(?=MATCH|$))", query, re.I | re.S)
187 |     return_part = re.search(r"RETURN .+", query).group()
188 | 
189 |     modified_matches = []
190 |     path_ids = []
191 | 
192 |     # Go through each MATCH part
193 |     for i, part in enumerate(match_parts):
194 |         path_id = f"path_{i}"
195 |         path_ids.append(path_id)
196 | 
197 |         # Replace the MATCH keyword with "MATCH path_i = "
198 |         modified_part = part.replace("MATCH ", f"MATCH {path_id} = ")
199 |         modified_matches.append(modified_part)
200 | 
201 |     # Join the modified MATCH parts
202 |     matches_string = " ".join(modified_matches)
203 | 
204 |     # Construct the new RETURN part
205 |     return_string = f"RETURN {', '.join(path_ids)};"
206 | 
207 |     # Remove the old RETURN part from matches_string
208 |     matches_string = matches_string.replace(return_part, "")
209 | 
210 |     # Combine everything
211 |     modified_query = f"{matches_string}\n{return_string}"
212 | 
213 |     return modified_query
214 | 
215 | 
216 | def result_to_df(result):
217 |     from typing import Dict
218 | 
219 |     import pandas as pd
220 | 
221 |     columns = result.keys()
222 |     d: Dict[str, list] = {}
223 |     for col_num in range(result.col_size()):
224 |         col_name = columns[col_num]
225 |         col_list = result.column_values(col_name)
226 |         d[col_name] = [x.cast() for x in col_list]
227 |     return pd.DataFrame(d)
228 | 
229 | 
230 | def render_pd_item(g, item):
231 |     from nebula3.data.DataObject import Node, PathWrapper, Relationship
232 | 
233 |     if isinstance(item, Node):
234 |         node_id = item.get_id().cast()
235 |         tags = item.tags()  # list of strings
236 |         props = dict()
237 |         for tag in tags:
238 |             props.update(item.properties(tag))
239 |         g.add_node(node_id, label=node_id, title=str(props))
240 |     elif isinstance(item, Relationship):
241 |         src_id = item.start_vertex_id().cast()
242 |         dst_id = item.end_vertex_id().cast()
243 |         edge_name = item.edge_name()
244 |         props = item.properties()
245 |         # ensure start and end vertex exist in graph
246 |         if not src_id in g.node_ids:
247 |             g.add_node(src_id)
248 |         if not dst_id in g.node_ids:
249 |             g.add_node(dst_id)
250 |         g.add_edge(src_id, dst_id, label=edge_name, title=str(props))
251 |     elif isinstance(item, PathWrapper):
252 |         for node in item.nodes():
253 |             render_pd_item(g, node)
254 |         for edge in item.relationships():
255 |             render_pd_item(g, edge)
256 |     elif isinstance(item, list):
257 |         for it in item:
258 |             render_pd_item(g, it)
259 | 
260 | 
261 | def create_pyvis_graph(result_df):
262 |     from pyvis.network import Network
263 | 
264 |     g = Network(
265 |         notebook=True,
266 |         directed=True,
267 |         cdn_resources="in_line",
268 |         height="500px",
269 |         width="100%",
270 |     )
271 |     for _, row in result_df.iterrows():
272 |         for item in row:
273 |             render_pd_item(g, item)
274 |     g.repulsion(
275 |         node_distance=100,
276 |         central_gravity=0.2,
277 |         spring_length=200,
278 |         spring_strength=0.05,
279 |         damping=0.09,
280 |     )
281 |     return g
282 | 
283 | 
284 | def query_nebulagraph(
285 |     query,
286 |     space_name=space_name,
287 |     address=st.secrets["graphd_host"],
288 |     port=9669,
289 |     user=st.secrets["graphd_user"],
290 |     password=st.secrets["graphd_password"],
291 | ):
292 |     from nebula3.Config import SessionPoolConfig
293 |     from nebula3.gclient.net.SessionPool import SessionPool
294 | 
295 |     config = SessionPoolConfig()
296 |     session_pool = SessionPool(user, password, space_name, [(address, port)])
297 |     session_pool.init(config)
298 |     return session_pool.execute(query)
299 | 
300 | 
301 | #### page
302 | 
303 | st.set_page_config(
304 |     page_title="Graph RAG Chat Bot",
305 |     page_icon="🌌",
306 |     layout="centered",
307 |     initial_sidebar_state="auto",
308 |     menu_items=None,
309 | )
310 | st.title("Demo: Graph RAG Chat Bot")
311 | 
312 | 
313 | st.info(
314 |     "See more about: [Graph RAG](https://www.siwei.io/graph-rag/) on how it works, KG built from [this demo](https://kg-llm-build.streamlit.app/).",
315 |     icon="📃",
316 | )
317 | 
318 | if "messages" not in st.session_state.keys():  # Initialize the chat messages history
319 |     st.session_state.messages = [
320 |         {
321 |             "role": "assistant",
322 |             "content": "Ask me question from the knowledge in **Guardians of the Galaxy Vol. 3.**",
323 |         }
324 |     ]
325 | 
326 | if prompt := st.chat_input(
327 |     "Your question"
328 | ):  # Prompt for user input and save to chat history
329 |     st.session_state.messages.append({"role": "user", "content": prompt})
330 | 
331 | for message in st.session_state.messages:  # Display the prior chat messages
332 |     with st.chat_message(message["role"]):
333 |         st.write(message["content"])
334 | 
335 | # Thanks to https://github.com/carolinedlu/llamaindex-chat-with-streamlit-docs/blob/main/streamlit_app.py?ref=blog.streamlit.io
336 | if st.session_state.messages[-1]["role"] != "assistant":
337 |     with st.chat_message("assistant"):
338 |         with st.spinner("Thinking with NebulaGraph..."):
339 |             response = chat_engine.chat(prompt)
340 |             st.write(response.response)
341 |             message = {"role": "assistant", "content": response.response}
342 |             st.session_state.messages.append(message)
343 | 
344 | # how it works
345 | with st.sidebar:
346 |     st.markdown(
347 |         """
348 | ## How it works
349 | """
350 |     )
351 |     prompt = st.text_input(label="", value="Who is Rocket?")
352 | 
353 |     if st.button("Inspect 🔎"):
354 |         response = kg_index_query_engine.query(prompt)
355 | 
356 |         answer_GraphRAG = str(response)
357 | 
358 |         related_entities = list(
359 |             list(response.metadata.values())[0]["kg_rel_map"].keys()
360 |         )
361 |         render_query = (
362 |             f"MATCH p=(n)-[*1..2]-() \n  WHERE id(n) IN {related_entities} \nRETURN p"
363 |         )
364 | 
365 |         st.markdown(
366 |             f"""
367 | > Query to NebulaGraph:
368 | 
369 | ```cypher
370 | {render_query}
371 | ```
372 | """
373 |         )
374 |         st.markdown("> The SubGraph Retrieved")
375 |         result = query_nebulagraph(render_query)
376 |         result_df = result_to_df(result)
377 | 
378 |         # create pyvis graph
379 |         g = create_pyvis_graph(result_df)
380 | 
381 |         # render with random file name
382 |         graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html")
383 | 
384 |         components.html(graph_html, height=500, scrolling=True)
385 | 
386 |         # st.write(f"*Answer*: {answer_GraphRAG}")
387 | 


--------------------------------------------------------------------------------
/kg_build_and_query_CN.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.stdout.reconfigure(encoding="utf-8")
  4 | sys.stdin.reconfigure(encoding="utf-8")
  5 | 
  6 | import streamlit as st
  7 | import streamlit.components.v1 as components
  8 | 
  9 | import re
 10 | 
 11 | import random
 12 | 
 13 | CODE_BUILD_KG = """
 14 | 
 15 | # 准备 GraphStore
 16 | 
 17 | os.environ['NEBULA_USER'] = "root"
 18 | os.environ['NEBULA_PASSWORD'] = "nebula" # default password
 19 | os.environ['NEBULA_ADDRESS'] = "127.0.0.1:9669" # assumed we have NebulaGraph installed locally
 20 | 
 21 | space_name = "guardians"
 22 | edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg
 23 | tags = ["entity"] # default, could be omit if create from an empty kg
 24 | 
 25 | graph_store = NebulaGraphStore(space_name=space_name, edge_types=edge_types, rel_prop_names=rel_prop_names, tags=tags)
 26 | storage_context = StorageContext.from_defaults(graph_store=graph_store)
 27 | 
 28 | # 从维基百科下载、预处理数据
 29 | 
 30 | from llama_index import download_loader
 31 | 
 32 | WikipediaReader = download_loader("WikipediaReader")
 33 | 
 34 | loader = WikipediaReader()
 35 | 
 36 | documents = loader.load_data(pages=['Guardians of the Galaxy Vol. 3'], auto_suggest=False)
 37 | 
 38 | # 利用 LLM 从文档中抽取知识三元组，并存储到 GraphStore（NebulaGraph）
 39 | 
 40 | kg_index = KnowledgeGraphIndex.from_documents(
 41 |     documents,
 42 |     storage_context=storage_context,
 43 |     max_triplets_per_chunk=10,
 44 |     service_context=service_context,
 45 |     space_name=space_name,
 46 |     edge_types=edge_types,
 47 |     rel_prop_names=rel_prop_names,
 48 |     tags=tags,
 49 |     include_embeddings=True,
 50 | )
 51 | 
 52 | """
 53 | 
 54 | CODE_NL2CYPHER_LANGCHAIN = """
 55 | ## Langchain
 56 | # Doc: https://python.langchain.com/docs/modules/chains/additional/graph_nebula_qa
 57 | 
 58 | from langchain.chat_models import ChatOpenAI
 59 | from langchain.chains import NebulaGraphQAChain
 60 | from langchain.graphs import NebulaGraph
 61 | 
 62 | graph = NebulaGraph(
 63 |     space=space_name,
 64 |     username="root",
 65 |     password="nebula",
 66 |     address="127.0.0.1",
 67 |     port=9669,
 68 |     session_pool_size=30,
 69 | )
 70 | 
 71 | chain = NebulaGraphQAChain.from_llm(
 72 |     llm, graph=graph, verbose=True
 73 | )
 74 | 
 75 | chain.run(
 76 |     "Tell me about Peter Quill?",
 77 | )
 78 | """
 79 | 
 80 | CODE_NL2CYPHER_LLAMAINDEX = """
 81 | 
 82 | ## Llama Index
 83 | # Doc: https://gpt-index.readthedocs.io/en/latest/examples/query_engine/knowledge_graph_query_engine.html
 84 | 
 85 | from llama_index.query_engine import KnowledgeGraphQueryEngine
 86 | 
 87 | from llama_index.storage.storage_context import StorageContext
 88 | from llama_index.graph_stores import NebulaGraphStore
 89 | 
 90 | nl2kg_query_engine = KnowledgeGraphQueryEngine(
 91 |     storage_context=storage_context,
 92 |     service_context=service_context,
 93 |     llm=llm,
 94 |     verbose=True,
 95 | )
 96 | 
 97 | response = nl2kg_query_engine.query(
 98 |     "Tell me about Peter Quill?",
 99 | )
100 | """
101 | 
102 | 
103 | import os
104 | import json
105 | import openai
106 | from llama_index.llms import AzureOpenAI
107 | from langchain.embeddings import OpenAIEmbeddings
108 | from llama_index import LangchainEmbedding
109 | from llama_index import (
110 |     VectorStoreIndex,
111 |     SimpleDirectoryReader,
112 |     KnowledgeGraphIndex,
113 |     LLMPredictor,
114 |     ServiceContext,
115 | )
116 | 
117 | from llama_index.storage.storage_context import StorageContext
118 | from llama_index.graph_stores import NebulaGraphStore
119 | 
120 | import logging
121 | import sys
122 | 
123 | logging.basicConfig(
124 |     stream=sys.stdout, level=logging.INFO
125 | )  # logging.DEBUG for more verbose output
126 | # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
127 | 
128 | openai.api_type = "azure"
129 | openai.api_base = st.secrets["OPENAI_API_BASE"]
130 | # openai.api_version = "2022-12-01" azure gpt-3
131 | openai.api_version = "2023-05-15"  # azure gpt-3.5 turbo
132 | openai.api_key = st.secrets["OPENAI_API_KEY"]
133 | 
134 | llm = AzureOpenAI(
135 |     engine=st.secrets["DEPLOYMENT_NAME"],
136 |     temperature=0,
137 |     model="gpt-35-turbo",
138 | )
139 | llm_predictor = LLMPredictor(llm=llm)
140 | 
141 | # You need to deploy your own embedding model as well as your own chat completion model
142 | embedding_llm = LangchainEmbedding(
143 |     OpenAIEmbeddings(
144 |         model="text-embedding-ada-002",
145 |         deployment=st.secrets["EMBEDDING_DEPLOYMENT_NAME"],
146 |         openai_api_key=openai.api_key,
147 |         openai_api_base=openai.api_base,
148 |         openai_api_type=openai.api_type,
149 |         openai_api_version=openai.api_version,
150 |     ),
151 |     embed_batch_size=1,
152 | )
153 | 
154 | service_context = ServiceContext.from_defaults(
155 |     llm_predictor=llm_predictor,
156 |     embed_model=embedding_llm,
157 | )
158 | os.environ["NEBULA_USER"] = st.secrets["graphd_user"]
159 | os.environ["NEBULA_PASSWORD"] = st.secrets["graphd_password"]
160 | os.environ[
161 |     "NEBULA_ADDRESS"
162 | ] = f"{st.secrets['graphd_host']}:{st.secrets['graphd_port']}"
163 | 
164 | space_name = "guardians"
165 | edge_types, rel_prop_names = ["relationship"], [
166 |     "relationship"
167 | ]  # default, could be omit if create from an empty kg
168 | tags = ["entity"]  # default, could be omit if create from an empty kg
169 | 
170 | graph_store = NebulaGraphStore(
171 |     space_name=space_name,
172 |     edge_types=edge_types,
173 |     rel_prop_names=rel_prop_names,
174 |     tags=tags,
175 | )
176 | storage_context = StorageContext.from_defaults(graph_store=graph_store)
177 | 
178 | from llama_index.query_engine import KnowledgeGraphQueryEngine
179 | 
180 | from llama_index.storage.storage_context import StorageContext
181 | from llama_index.graph_stores import NebulaGraphStore
182 | 
183 | nl2kg_query_engine = KnowledgeGraphQueryEngine(
184 |     storage_context=storage_context,
185 |     service_context=service_context,
186 |     llm=llm,
187 |     verbose=True,
188 | )
189 | 
190 | 
191 | def cypher_to_all_paths(query):
192 |     # Find the MATCH and RETURN parts
193 |     match_parts = re.findall(r"(MATCH .+?(?=MATCH|$))", query, re.I | re.S)
194 |     return_part = re.search(r"RETURN .+", query).group()
195 | 
196 |     modified_matches = []
197 |     path_ids = []
198 | 
199 |     # Go through each MATCH part
200 |     for i, part in enumerate(match_parts):
201 |         path_id = f"path_{i}"
202 |         path_ids.append(path_id)
203 | 
204 |         # Replace the MATCH keyword with "MATCH path_i = "
205 |         modified_part = part.replace("MATCH ", f"MATCH {path_id} = ")
206 |         modified_matches.append(modified_part)
207 | 
208 |     # Join the modified MATCH parts
209 |     matches_string = " ".join(modified_matches)
210 | 
211 |     # Construct the new RETURN part
212 |     return_string = f"RETURN {', '.join(path_ids)};"
213 | 
214 |     # Remove the old RETURN part from matches_string
215 |     matches_string = matches_string.replace(return_part, "")
216 | 
217 |     # Combine everything
218 |     modified_query = f"{matches_string}\n{return_string}"
219 | 
220 |     return modified_query
221 | 
222 | 
223 | # write string to file
224 | def result_to_df(result):
225 |     from typing import Dict
226 | 
227 |     import pandas as pd
228 | 
229 |     columns = result.keys()
230 |     d: Dict[str, list] = {}
231 |     for col_num in range(result.col_size()):
232 |         col_name = columns[col_num]
233 |         col_list = result.column_values(col_name)
234 |         d[col_name] = [x.cast() for x in col_list]
235 |     return pd.DataFrame(d)
236 | 
237 | 
238 | def render_pd_item(g, item):
239 |     from nebula3.data.DataObject import Node, PathWrapper, Relationship
240 | 
241 |     if isinstance(item, Node):
242 |         node_id = item.get_id().cast()
243 |         tags = item.tags()  # list of strings
244 |         props = dict()
245 |         for tag in tags:
246 |             props.update(item.properties(tag))
247 |         g.add_node(node_id, label=node_id, title=str(props))
248 |     elif isinstance(item, Relationship):
249 |         src_id = item.start_vertex_id().cast()
250 |         dst_id = item.end_vertex_id().cast()
251 |         edge_name = item.edge_name()
252 |         props = item.properties()
253 |         # ensure start and end vertex exist in graph
254 |         if not src_id in g.node_ids:
255 |             g.add_node(src_id)
256 |         if not dst_id in g.node_ids:
257 |             g.add_node(dst_id)
258 |         g.add_edge(src_id, dst_id, label=edge_name, title=str(props))
259 |     elif isinstance(item, PathWrapper):
260 |         for node in item.nodes():
261 |             render_pd_item(g, node)
262 |         for edge in item.relationships():
263 |             render_pd_item(g, edge)
264 |     elif isinstance(item, list):
265 |         for it in item:
266 |             render_pd_item(g, it)
267 | 
268 | 
269 | def create_pyvis_graph(result_df):
270 |     from pyvis.network import Network
271 | 
272 |     g = Network(
273 |         notebook=True,
274 |         directed=True,
275 |         cdn_resources="in_line",
276 |         height="500px",
277 |         width="100%",
278 |     )
279 |     for _, row in result_df.iterrows():
280 |         for item in row:
281 |             render_pd_item(g, item)
282 |     g.repulsion(
283 |         node_distance=100,
284 |         central_gravity=0.2,
285 |         spring_length=200,
286 |         spring_strength=0.05,
287 |         damping=0.09,
288 |     )
289 |     return g
290 | 
291 | 
292 | def query_nebulagraph(
293 |     query,
294 |     space_name=space_name,
295 |     address=st.secrets["graphd_host"],
296 |     port=9669,
297 |     user=st.secrets["graphd_user"],
298 |     password=st.secrets["graphd_password"],
299 | ):
300 |     from nebula3.Config import SessionPoolConfig
301 |     from nebula3.gclient.net.SessionPool import SessionPool
302 | 
303 |     config = SessionPoolConfig()
304 |     session_pool = SessionPool(user, password, space_name, [(address, port)])
305 |     session_pool.init(config)
306 |     return session_pool.execute(query)
307 | 
308 | 
309 | st.title("利用 LLM 构建、查询知识图谱")
310 | 
311 | (
312 |     tab_code_kg,
313 |     tab_notebook,
314 |     tab_graph_view,
315 |     tab_cypher,
316 |     tab_nl2cypher,
317 |     tab_code_nl2cypher,
318 | ) = st.tabs(
319 |     [
320 |         "代码:构建知识图谱",
321 |         "完整 Notebook",
322 |         "图谱可视化",
323 |         "Cypher 查询",
324 |         "自然语言查询",
325 |         "代码:NL2Cypher",
326 |     ]
327 | )
328 | 
329 | with tab_code_kg:
330 |     st.write("> 利用 LLM，几行代码构建知识图谱")
331 |     st.code(body=CODE_BUILD_KG, language="python")
332 | 
333 | with tab_notebook:
334 |     st.write("> 完整 Demo 过程 Notebook")
335 |     st.write(
336 |         """
337 | 
338 | 这个 Notebook 展示了如何利用 LLM 从不同类型的信息源（以维基百科为例）中抽取知识三元组，并存储到图数据库 NebulaGraph 中。
339 | 
340 | 本 Demo 中，我们先抽取了维基百科中关于《银河护卫队3》的信息，然后利用 LLM 生成的知识三元组，构建了一个图谱。
341 | 然后利用 Cypher 查询图谱，最后利用 LlamaIndex 和 Langchain 中的 NL2NebulaCypher，实现了自然语言查询图谱的功能。
342 | 
343 | 您可以点击其他标签亲自试玩图谱的可视化、Cypher 查询、自然语言查询（NL2NebulaCypher）等功能。
344 | 
345 |              """
346 |     )
347 |     # link to download notebook
348 |     st.markdown(
349 |         """
350 | 这里可以[下载](https://www.siwei.io/demo-dumps/kg-llm/KG_Building.ipynb) 完整的 Notebook。
351 | """
352 |     )
353 | 
354 |     components.iframe(
355 |         src="https://www.siwei.io/demo-dumps/kg-llm/KG_Building.html",
356 |         height=2000,
357 |         width=800,
358 |         scrolling=True,
359 |     )
360 | 
361 | with tab_graph_view:
362 |     st.write(
363 |         "> 图谱的可视化部分采样，知识来源[银河护卫队3](https://en.wikipedia.org/wiki/Guardians_of_the_Galaxy_Vol._3)"
364 |     )
365 | 
366 |     components.iframe(
367 |         src="https://www.siwei.io/demo-dumps/kg-llm/nebulagraph_draw_sample.html",
368 |         height=500,
369 |         scrolling=True,
370 |     )
371 | 
372 | with tab_cypher:
373 |     st.write("> Cypher 查询图库")
374 |     query_string = st.text_input(
375 |         label="输入查询语句", value="MATCH ()-[e]->() RETURN e LIMIT 25"
376 |     )
377 |     if st.button("> 执行"):
378 |         # run query
379 |         result = query_nebulagraph(query_string)
380 | 
381 |         # convert to pandas dataframe
382 |         result_df = result_to_df(result)
383 | 
384 |         # display pd dataframe
385 |         st.dataframe(result_df)
386 | 
387 |         # create pyvis graph
388 |         g = create_pyvis_graph(result_df)
389 | 
390 |         # render with random file name
391 |         import random
392 | 
393 |         graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html")
394 | 
395 |         components.html(graph_html, height=500, scrolling=True)
396 | 
397 | with tab_nl2cypher:
398 |     st.write("> 使用自然语言查询图库")
399 |     nl_query_string = st.text_input(
400 |         label="输入自然语言问题", value="Tell me about Peter Quill?"
401 |     )
402 |     if st.button("生成 Cypher 查询语句，并执行"):
403 |         response = nl2kg_query_engine.query(nl_query_string)
404 |         graph_query = list(response.metadata.values())[0]["graph_store_query"]
405 |         graph_query = graph_query.replace("WHERE", "\n  WHERE").replace(
406 |             "RETURN", "\nRETURN"
407 |         )
408 |         answer = str(response)
409 |         st.write(f"*答案*: {answer}")
410 |         st.markdown(
411 |             f"""
412 | ## 利用 LLM 生成的图查询语句
413 | ```cypher
414 | {graph_query}
415 | ```
416 | """
417 |         )
418 |         st.write("## 结果可视化")
419 |         render_query = cypher_to_all_paths(graph_query)
420 |         result = query_nebulagraph(render_query)
421 |         result_df = result_to_df(result)
422 | 
423 |         # create pyvis graph
424 |         g = create_pyvis_graph(result_df)
425 | 
426 |         # render with random file name
427 |         graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html")
428 | 
429 |         components.html(graph_html, height=500, scrolling=True)
430 | 
431 | 
432 | with tab_code_nl2cypher:
433 |     st.write("利用 Langchain 或者 Llama Index，我们可以只用几行代码就实现自然语言查询图谱（NL2NebulaCypher）")
434 | 
435 |     tab_langchain, tab_llamaindex = st.tabs(["Langchain", "Llama Index"])
436 |     with tab_langchain:
437 |         st.code(body=CODE_NL2CYPHER_LANGCHAIN, language="python")
438 |     with tab_llamaindex:
439 |         st.code(body=CODE_NL2CYPHER_LLAMAINDEX, language="python")
440 | 
441 |     st.markdown(
442 |         """
443 | 
444 | ## 参考文档
445 |                 
446 | - [Langchain: NebulaGraphQAChain](https://python.langchain.com/docs/modules/chains/additional/graph_nebula_qa)
447 | - [Llama Index: KnowledgeGraphQueryEngine](https://gpt-index.readthedocs.io/en/latest/examples/query_engine/knowledge_graph_query_engine.html)
448 | """
449 |     )
450 | 


--------------------------------------------------------------------------------
/kg_build_and_query.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.stdout.reconfigure(encoding="utf-8")
  4 | sys.stdin.reconfigure(encoding="utf-8")
  5 | 
  6 | import streamlit as st
  7 | import streamlit.components.v1 as components
  8 | 
  9 | import re
 10 | 
 11 | import random
 12 | 
 13 | CODE_BUILD_KG = """
 14 | 
 15 | # Prepare for GraphStore
 16 | 
 17 | os.environ['NEBULA_USER'] = "root"
 18 | os.environ['NEBULA_PASSWORD'] = "nebula" # default password
 19 | os.environ['NEBULA_ADDRESS'] = "127.0.0.1:9669" # assumed we have NebulaGraph installed locally
 20 | 
 21 | space_name = "guardians"
 22 | edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg
 23 | tags = ["entity"] # default, could be omit if create from an empty kg
 24 | 
 25 | graph_store = NebulaGraphStore(space_name=space_name, edge_types=edge_types, rel_prop_names=rel_prop_names, tags=tags)
 26 | storage_context = StorageContext.from_defaults(graph_store=graph_store)
 27 | 
 28 | # Download and Preprocess Data
 29 | 
 30 | from llama_index import download_loader
 31 | 
 32 | WikipediaReader = download_loader("WikipediaReader")
 33 | 
 34 | loader = WikipediaReader()
 35 | 
 36 | documents = loader.load_data(pages=['Guardians of the Galaxy Vol. 3'], auto_suggest=False)
 37 | 
 38 | # Build Knowledge Graph
 39 | 
 40 | kg_index = KnowledgeGraphIndex.from_documents(
 41 |     documents,
 42 |     storage_context=storage_context,
 43 |     max_triplets_per_chunk=10,
 44 |     service_context=service_context,
 45 |     space_name=space_name,
 46 |     edge_types=edge_types,
 47 |     rel_prop_names=rel_prop_names,
 48 |     tags=tags,
 49 |     include_embeddings=True,
 50 | )
 51 | 
 52 | """
 53 | 
 54 | CODE_NL2CYPHER_LANGCHAIN = """
 55 | ## Langchain
 56 | # Doc: https://python.langchain.com/docs/modules/chains/additional/graph_nebula_qa
 57 | 
 58 | from langchain.chat_models import ChatOpenAI
 59 | from langchain.chains import NebulaGraphQAChain
 60 | from langchain.graphs import NebulaGraph
 61 | 
 62 | graph = NebulaGraph(
 63 |     space=space_name,
 64 |     username="root",
 65 |     password="nebula",
 66 |     address="127.0.0.1",
 67 |     port=9669,
 68 |     session_pool_size=30,
 69 | )
 70 | 
 71 | chain = NebulaGraphQAChain.from_llm(
 72 |     llm, graph=graph, verbose=True
 73 | )
 74 | 
 75 | chain.run(
 76 |     "Tell me about Peter Quill?",
 77 | )
 78 | """
 79 | 
 80 | CODE_NL2CYPHER_LLAMAINDEX = """
 81 | 
 82 | ## Llama Index
 83 | # Doc: https://gpt-index.readthedocs.io/en/latest/examples/query_engine/knowledge_graph_query_engine.html
 84 | 
 85 | from llama_index.query_engine import KnowledgeGraphQueryEngine
 86 | 
 87 | from llama_index.storage.storage_context import StorageContext
 88 | from llama_index.graph_stores import NebulaGraphStore
 89 | 
 90 | nl2kg_query_engine = KnowledgeGraphQueryEngine(
 91 |     storage_context=storage_context,
 92 |     service_context=service_context,
 93 |     llm=llm,
 94 |     verbose=True,
 95 | )
 96 | 
 97 | response = nl2kg_query_engine.query(
 98 |     "Tell me about Peter Quill?",
 99 | )
100 | """
101 | 
102 | 
103 | import os
104 | import json
105 | import openai
106 | from llama_index.llms import AzureOpenAI
107 | from langchain.embeddings import OpenAIEmbeddings
108 | from llama_index import LangchainEmbedding
109 | from llama_index import (
110 |     VectorStoreIndex,
111 |     SimpleDirectoryReader,
112 |     KnowledgeGraphIndex,
113 |     LLMPredictor,
114 |     ServiceContext,
115 | )
116 | 
117 | from llama_index.storage.storage_context import StorageContext
118 | from llama_index.graph_stores import NebulaGraphStore
119 | 
120 | import logging
121 | import sys
122 | 
123 | logging.basicConfig(
124 |     stream=sys.stdout, level=logging.INFO
125 | )  # logging.DEBUG for more verbose output
126 | # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
127 | 
128 | openai.api_type = "azure"
129 | openai.api_base = st.secrets["OPENAI_API_BASE"]
130 | # openai.api_version = "2022-12-01" azure gpt-3
131 | openai.api_version = "2023-05-15"  # azure gpt-3.5 turbo
132 | openai.api_key = st.secrets["OPENAI_API_KEY"]
133 | 
134 | llm = AzureOpenAI(
135 |     engine=st.secrets["DEPLOYMENT_NAME"],
136 |     temperature=0,
137 |     model="gpt-35-turbo",
138 | )
139 | llm_predictor = LLMPredictor(llm=llm)
140 | 
141 | # You need to deploy your own embedding model as well as your own chat completion model
142 | embedding_llm = LangchainEmbedding(
143 |     OpenAIEmbeddings(
144 |         model="text-embedding-ada-002",
145 |         deployment=st.secrets["EMBEDDING_DEPLOYMENT_NAME"],
146 |         openai_api_key=openai.api_key,
147 |         openai_api_base=openai.api_base,
148 |         openai_api_type=openai.api_type,
149 |         openai_api_version=openai.api_version,
150 |     ),
151 |     embed_batch_size=1,
152 | )
153 | 
154 | service_context = ServiceContext.from_defaults(
155 |     llm_predictor=llm_predictor,
156 |     embed_model=embedding_llm,
157 | )
158 | os.environ["NEBULA_USER"] = st.secrets["graphd_user"]
159 | os.environ["NEBULA_PASSWORD"] = st.secrets["graphd_password"]
160 | os.environ[
161 |     "NEBULA_ADDRESS"
162 | ] = f"{st.secrets['graphd_host']}:{st.secrets['graphd_port']}"
163 | 
164 | space_name = "guardians"
165 | edge_types, rel_prop_names = ["relationship"], [
166 |     "relationship"
167 | ]  # default, could be omit if create from an empty kg
168 | tags = ["entity"]  # default, could be omit if create from an empty kg
169 | 
170 | graph_store = NebulaGraphStore(
171 |     space_name=space_name,
172 |     edge_types=edge_types,
173 |     rel_prop_names=rel_prop_names,
174 |     tags=tags,
175 | )
176 | storage_context = StorageContext.from_defaults(graph_store=graph_store)
177 | 
178 | from llama_index.query_engine import KnowledgeGraphQueryEngine
179 | 
180 | from llama_index.storage.storage_context import StorageContext
181 | from llama_index.graph_stores import NebulaGraphStore
182 | 
183 | nl2kg_query_engine = KnowledgeGraphQueryEngine(
184 |     storage_context=storage_context,
185 |     service_context=service_context,
186 |     llm=llm,
187 |     verbose=True,
188 | )
189 | 
190 | 
191 | def cypher_to_all_paths(query):
192 |     # Find the MATCH and RETURN parts
193 |     match_parts = re.findall(r"(MATCH .+?(?=MATCH|$))", query, re.I | re.S)
194 |     return_part = re.search(r"RETURN .+", query).group()
195 | 
196 |     modified_matches = []
197 |     path_ids = []
198 | 
199 |     # Go through each MATCH part
200 |     for i, part in enumerate(match_parts):
201 |         path_id = f"path_{i}"
202 |         path_ids.append(path_id)
203 | 
204 |         # Replace the MATCH keyword with "MATCH path_i = "
205 |         modified_part = part.replace("MATCH ", f"MATCH {path_id} = ")
206 |         modified_matches.append(modified_part)
207 | 
208 |     # Join the modified MATCH parts
209 |     matches_string = " ".join(modified_matches)
210 | 
211 |     # Construct the new RETURN part
212 |     return_string = f"RETURN {', '.join(path_ids)};"
213 | 
214 |     # Remove the old RETURN part from matches_string
215 |     matches_string = matches_string.replace(return_part, "")
216 | 
217 |     # Combine everything
218 |     modified_query = f"{matches_string}\n{return_string}"
219 | 
220 |     return modified_query
221 | 
222 | 
223 | # write string to file
224 | def result_to_df(result):
225 |     from typing import Dict
226 | 
227 |     import pandas as pd
228 | 
229 |     columns = result.keys()
230 |     d: Dict[str, list] = {}
231 |     for col_num in range(result.col_size()):
232 |         col_name = columns[col_num]
233 |         col_list = result.column_values(col_name)
234 |         d[col_name] = [x.cast() for x in col_list]
235 |     return pd.DataFrame(d)
236 | 
237 | 
238 | def render_pd_item(g, item):
239 |     from nebula3.data.DataObject import Node, PathWrapper, Relationship
240 | 
241 |     if isinstance(item, Node):
242 |         node_id = item.get_id().cast()
243 |         tags = item.tags()  # list of strings
244 |         props = dict()
245 |         for tag in tags:
246 |             props.update(item.properties(tag))
247 |         g.add_node(node_id, label=node_id, title=str(props))
248 |     elif isinstance(item, Relationship):
249 |         src_id = item.start_vertex_id().cast()
250 |         dst_id = item.end_vertex_id().cast()
251 |         edge_name = item.edge_name()
252 |         props = item.properties()
253 |         # ensure start and end vertex exist in graph
254 |         if not src_id in g.node_ids:
255 |             g.add_node(src_id)
256 |         if not dst_id in g.node_ids:
257 |             g.add_node(dst_id)
258 |         g.add_edge(src_id, dst_id, label=edge_name, title=str(props))
259 |     elif isinstance(item, PathWrapper):
260 |         for node in item.nodes():
261 |             render_pd_item(g, node)
262 |         for edge in item.relationships():
263 |             render_pd_item(g, edge)
264 |     elif isinstance(item, list):
265 |         for it in item:
266 |             render_pd_item(g, it)
267 | 
268 | 
269 | def create_pyvis_graph(result_df):
270 |     from pyvis.network import Network
271 | 
272 |     g = Network(
273 |         notebook=True,
274 |         directed=True,
275 |         cdn_resources="in_line",
276 |         height="500px",
277 |         width="100%",
278 |     )
279 |     for _, row in result_df.iterrows():
280 |         for item in row:
281 |             render_pd_item(g, item)
282 |     g.repulsion(
283 |         node_distance=100,
284 |         central_gravity=0.2,
285 |         spring_length=200,
286 |         spring_strength=0.05,
287 |         damping=0.09,
288 |     )
289 |     return g
290 | 
291 | 
292 | def query_nebulagraph(
293 |     query,
294 |     space_name=space_name,
295 |     address=st.secrets["graphd_host"],
296 |     port=9669,
297 |     user=st.secrets["graphd_user"],
298 |     password=st.secrets["graphd_password"],
299 | ):
300 |     from nebula3.Config import SessionPoolConfig
301 |     from nebula3.gclient.net.SessionPool import SessionPool
302 | 
303 |     config = SessionPoolConfig()
304 |     session_pool = SessionPool(user, password, space_name, [(address, port)])
305 |     session_pool.init(config)
306 |     return session_pool.execute(query)
307 | 
308 | 
309 | st.title("Demo: Knowledge Graph Build and Query with LLM")
310 | 
311 | (
312 |     tab_code_kg,
313 |     tab_notebook,
314 |     tab_graph_view,
315 |     tab_cypher,
316 |     tab_nl2cypher,
317 |     tab_code_nl2cypher,
318 | ) = st.tabs(
319 |     [
320 |         "Code: Build KG",
321 |         "Full Notebook",
322 |         "Graph View",
323 |         "Query",
324 |         "Natural Language to Cypher",
325 |         "Code: NL2Cypher",
326 |     ]
327 | )
328 | 
329 | with tab_code_kg:
330 |     st.write(
331 |         "With a few lines of code, we can build a knowledge graph with LLM, LlamaIndex and NebulaGraph."
332 |     )
333 |     st.write(
334 |         "See full notebook for more details and try Graph Visualizations, Query, and Natural Language to Cypher by clicking on the tabs on the right."
335 |     )
336 |     st.code(body=CODE_BUILD_KG, language="python")
337 | 
338 | with tab_notebook:
339 |     st.write("> Full Notebook")
340 |     st.markdown(
341 |         """
342 | 
343 | This is the full notebook to demonstrate how to:
344 | 
345 | - Extract from data sources and build a knowledge graph with LLM and Llama Index, NebulaGraph in 3 lines of code
346 | - Query the Knowledge Graph with nGQL and visualize the graph
347 | - Query the knowledge graph with natural language in 1 line of code(both Langchain and Llama Index)
348 |                 """
349 |     )
350 |     # link to download notebook
351 |     st.markdown(
352 |         """
353 | [Download](https://www.siwei.io/demo-dumps/kg-llm/KG_Building.ipynb) the notebook.
354 | """
355 |     )
356 | 
357 |     components.iframe(
358 |         src="https://www.siwei.io/demo-dumps/kg-llm/KG_Building.html",
359 |         height=2000,
360 |         width=800,
361 |         scrolling=True,
362 |     )
363 | 
364 | with tab_graph_view:
365 |     st.write(
366 |         "> Sub-Graph View of the Knowledge Graph about [Guardians of the Galaxy Vol. 3](https://en.wikipedia.org/wiki/Guardians_of_the_Galaxy_Vol._3)"
367 |     )
368 |     components.iframe(
369 |         src="https://www.siwei.io/demo-dumps/kg-llm/nebulagraph_draw_sample.html",
370 |         height=500,
371 |         scrolling=True,
372 |     )
373 | 
374 | with tab_cypher:
375 |     st.write("> Query Knowledge Graph in nGQL")
376 |     query_string = st.text_input(
377 |         label="Enter nGQL query string", value="MATCH ()-[e]->() RETURN e LIMIT 25"
378 |     )
379 |     if st.button("> execute"):
380 |         # run query
381 |         result = query_nebulagraph(query_string)
382 | 
383 |         # convert to pandas dataframe
384 |         result_df = result_to_df(result)
385 | 
386 |         # display pd dataframe
387 |         st.dataframe(result_df)
388 | 
389 |         # create pyvis graph
390 |         g = create_pyvis_graph(result_df)
391 | 
392 |         # render with random file name
393 |         import random
394 | 
395 |         graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html")
396 | 
397 |         components.html(graph_html, height=500, scrolling=True)
398 | 
399 | with tab_nl2cypher:
400 |     st.write("> Natural Language to Cypher")
401 |     nl_query_string = st.text_input(
402 |         label="Enter natural language query string", value="Tell me about Peter Quill?"
403 |     )
404 |     if st.button("Ask KG"):
405 |         response = nl2kg_query_engine.query(nl_query_string)
406 |         graph_query = list(response.metadata.values())[0]["graph_store_query"]
407 |         graph_query = graph_query.replace("WHERE", "\n  WHERE").replace(
408 |             "RETURN", "\nRETURN"
409 |         )
410 |         answer = str(response)
411 |         st.write(f"*Answer*: {answer}")
412 |         st.markdown(
413 |             f"""
414 | ## Generated NebulaGraph Cypher Query
415 | ```cypher
416 | {graph_query}
417 | ```
418 | """
419 |         )
420 |         st.write("## Rendered Graph")
421 |         render_query = cypher_to_all_paths(graph_query)
422 |         result = query_nebulagraph(render_query)
423 |         result_df = result_to_df(result)
424 | 
425 |         # create pyvis graph
426 |         g = create_pyvis_graph(result_df)
427 | 
428 |         # render with random file name
429 |         graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html")
430 | 
431 |         components.html(graph_html, height=500, scrolling=True)
432 | 
433 | 
434 | with tab_code_nl2cypher:
435 |     st.write(
436 |         "> Natural Language to NebulaGraph Cypher Code with Langchain and Llama Index"
437 |     )
438 |     tab_langchain, tab_llamaindex = st.tabs(["Langchain", "Llama Index"])
439 |     with tab_langchain:
440 |         st.code(body=CODE_NL2CYPHER_LANGCHAIN, language="python")
441 |     with tab_llamaindex:
442 |         st.code(body=CODE_NL2CYPHER_LLAMAINDEX, language="python")
443 | 
444 |     st.markdown(
445 |         """
446 | 
447 | ## References
448 |                 
449 | - [Langchain: NebulaGraphQAChain](https://python.langchain.com/docs/modules/chains/additional/graph_nebula_qa)
450 | - [Llama Index: KnowledgeGraphQueryEngine](https://gpt-index.readthedocs.io/en/latest/examples/query_engine/knowledge_graph_query_engine.html)
451 | """
452 |     )
453 | 


--------------------------------------------------------------------------------
/kg_retrieval_arguments_generation.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.stdout.reconfigure(encoding="utf-8")
  4 | sys.stdin.reconfigure(encoding="utf-8")
  5 | 
  6 | import streamlit as st
  7 | import streamlit.components.v1 as components
  8 | 
  9 | import re
 10 | 
 11 | import random
 12 | 
 13 | CODE_KG_RAG = """
 14 | 
 15 | # Build Knowledge Graph with KnowledgeGraphIndex 
 16 | 
 17 | kg_index = KnowledgeGraphIndex.from_documents(
 18 |     documents,
 19 |     storage_context=storage_context,
 20 |     max_triplets_per_chunk=10,
 21 |     service_context=service_context,
 22 |     space_name=space_name,
 23 |     edge_types=edge_types,
 24 |     rel_prop_names=rel_prop_names,
 25 |     tags=tags,
 26 |     include_embeddings=True,
 27 | )
 28 | 
 29 | # Create a Graph RAG Query Engine
 30 | 
 31 | kg_rag_query_engine = kg_index.as_query_engine(
 32 |     include_text=False,
 33 |     retriever_mode="keyword",
 34 |     response_mode="tree_summarize",
 35 | )
 36 | 
 37 | """
 38 | 
 39 | 
 40 | import os
 41 | import json
 42 | import openai
 43 | from llama_index.llms import AzureOpenAI
 44 | from langchain.embeddings import OpenAIEmbeddings
 45 | from llama_index import LangchainEmbedding
 46 | from llama_index import (
 47 |     VectorStoreIndex,
 48 |     SimpleDirectoryReader,
 49 |     KnowledgeGraphIndex,
 50 |     LLMPredictor,
 51 |     ServiceContext,
 52 | )
 53 | 
 54 | from llama_index.storage.storage_context import StorageContext
 55 | from llama_index.graph_stores import NebulaGraphStore
 56 | 
 57 | import logging
 58 | import sys
 59 | 
 60 | logging.basicConfig(
 61 |     stream=sys.stdout, level=logging.INFO
 62 | )  # logging.DEBUG for more verbose output
 63 | # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
 64 | 
 65 | openai.api_type = "azure"
 66 | openai.api_base = st.secrets["OPENAI_API_BASE"]
 67 | # openai.api_version = "2022-12-01" azure gpt-3
 68 | openai.api_version = "2023-05-15"  # azure gpt-3.5 turbo
 69 | openai.api_key = st.secrets["OPENAI_API_KEY"]
 70 | 
 71 | llm = AzureOpenAI(
 72 |     engine=st.secrets["DEPLOYMENT_NAME"],
 73 |     temperature=0,
 74 |     model="gpt-35-turbo",
 75 | )
 76 | llm_predictor = LLMPredictor(llm=llm)
 77 | 
 78 | # You need to deploy your own embedding model as well as your own chat completion model
 79 | embedding_llm = LangchainEmbedding(
 80 |     OpenAIEmbeddings(
 81 |         model="text-embedding-ada-002",
 82 |         deployment=st.secrets["EMBEDDING_DEPLOYMENT_NAME"],
 83 |         openai_api_key=openai.api_key,
 84 |         openai_api_base=openai.api_base,
 85 |         openai_api_type=openai.api_type,
 86 |         openai_api_version="2022-12-01",
 87 |     ),
 88 |     embed_batch_size=1,
 89 | )
 90 | 
 91 | service_context = ServiceContext.from_defaults(
 92 |     llm_predictor=llm_predictor,
 93 |     embed_model=embedding_llm,
 94 | )
 95 | os.environ["NEBULA_USER"] = st.secrets["graphd_user"]
 96 | os.environ["NEBULA_PASSWORD"] = st.secrets["graphd_password"]
 97 | os.environ[
 98 |     "NEBULA_ADDRESS"
 99 | ] = f"{st.secrets['graphd_host']}:{st.secrets['graphd_port']}"
100 | 
101 | space_name = "guardians"
102 | edge_types, rel_prop_names = ["relationship"], [
103 |     "relationship"
104 | ]  # default, could be omit if create from an empty kg
105 | tags = ["entity"]  # default, could be omit if create from an empty kg
106 | 
107 | graph_store = NebulaGraphStore(
108 |     space_name=space_name,
109 |     edge_types=edge_types,
110 |     rel_prop_names=rel_prop_names,
111 |     tags=tags,
112 | )
113 | 
114 | from llama_index import load_index_from_storage
115 | 
116 | storage_context = StorageContext.from_defaults(
117 |     persist_dir="./storage_graph", graph_store=graph_store
118 | )
119 | kg_index = load_index_from_storage(
120 |     storage_context=storage_context,
121 |     service_context=service_context,
122 |     max_triplets_per_chunk=10,
123 |     space_name=space_name,
124 |     edge_types=edge_types,
125 |     rel_prop_names=rel_prop_names,
126 |     tags=tags,
127 |     include_embeddings=True,
128 | )
129 | 
130 | storage_context_vector = StorageContext.from_defaults(persist_dir="./storage_vector")
131 | vector_index = load_index_from_storage(
132 |     service_context=service_context, storage_context=storage_context_vector
133 | )
134 | 
135 | from llama_index.query_engine import KnowledgeGraphQueryEngine
136 | 
137 | from llama_index.storage.storage_context import StorageContext
138 | from llama_index.graph_stores import NebulaGraphStore
139 | 
140 | nl2kg_query_engine = KnowledgeGraphQueryEngine(
141 |     storage_context=storage_context,
142 |     service_context=service_context,
143 |     llm=llm,
144 |     verbose=True,
145 | )
146 | 
147 | kg_rag_query_engine = kg_index.as_query_engine(
148 |     include_text=False,
149 |     retriever_mode="keyword",
150 |     response_mode="tree_summarize",
151 | )
152 | 
153 | vector_rag_query_engine = vector_index.as_query_engine()
154 | 
155 | # graph + vector rag
156 | # import QueryBundle
157 | from llama_index import QueryBundle
158 | 
159 | # import NodeWithScore
160 | from llama_index.schema import NodeWithScore
161 | 
162 | # Retrievers
163 | from llama_index.retrievers import BaseRetriever, VectorIndexRetriever, KGTableRetriever
164 | 
165 | from typing import List
166 | 
167 | 
168 | class CustomRetriever(BaseRetriever):
169 |     """Custom retriever that performs both Vector search and Knowledge Graph search"""
170 | 
171 |     def __init__(
172 |         self,
173 |         vector_retriever: VectorIndexRetriever,
174 |         kg_retriever: KGTableRetriever,
175 |         mode: str = "OR",
176 |     ) -> None:
177 |         """Init params."""
178 | 
179 |         self._vector_retriever = vector_retriever
180 |         self._kg_retriever = kg_retriever
181 |         if mode not in ("AND", "OR"):
182 |             raise ValueError("Invalid mode.")
183 |         self._mode = mode
184 | 
185 |     def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
186 |         """Retrieve nodes given query."""
187 | 
188 |         vector_nodes = self._vector_retriever.retrieve(query_bundle)
189 |         kg_nodes = self._kg_retriever.retrieve(query_bundle)
190 | 
191 |         vector_ids = {n.node.node_id for n in vector_nodes}
192 |         kg_ids = {n.node.node_id for n in kg_nodes}
193 | 
194 |         combined_dict = {n.node.node_id: n for n in vector_nodes}
195 |         combined_dict.update({n.node.node_id: n for n in kg_nodes})
196 | 
197 |         if self._mode == "AND":
198 |             retrieve_ids = vector_ids.intersection(kg_ids)
199 |         else:
200 |             retrieve_ids = vector_ids.union(kg_ids)
201 | 
202 |         retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
203 |         return retrieve_nodes
204 | 
205 | 
206 | from llama_index import get_response_synthesizer
207 | from llama_index.query_engine import RetrieverQueryEngine
208 | 
209 | # create custom retriever
210 | vector_retriever = VectorIndexRetriever(index=vector_index)
211 | kg_retriever = KGTableRetriever(
212 |     index=kg_index, retriever_mode="keyword", include_text=False
213 | )
214 | custom_retriever = CustomRetriever(vector_retriever, kg_retriever)
215 | 
216 | # create response synthesizer
217 | response_synthesizer = get_response_synthesizer(
218 |     service_context=service_context,
219 |     response_mode="tree_summarize",
220 | )
221 | 
222 | graph_vector_rag_query_engine = RetrieverQueryEngine(
223 |     retriever=custom_retriever,
224 |     response_synthesizer=response_synthesizer,
225 | )
226 | 
227 | 
228 | def cypher_to_all_paths(query):
229 |     # Find the MATCH and RETURN parts
230 |     match_parts = re.findall(r"(MATCH .+?(?=MATCH|$))", query, re.I | re.S)
231 |     return_part = re.search(r"RETURN .+", query).group()
232 | 
233 |     modified_matches = []
234 |     path_ids = []
235 | 
236 |     # Go through each MATCH part
237 |     for i, part in enumerate(match_parts):
238 |         path_id = f"path_{i}"
239 |         path_ids.append(path_id)
240 | 
241 |         # Replace the MATCH keyword with "MATCH path_i = "
242 |         modified_part = part.replace("MATCH ", f"MATCH {path_id} = ")
243 |         modified_matches.append(modified_part)
244 | 
245 |     # Join the modified MATCH parts
246 |     matches_string = " ".join(modified_matches)
247 | 
248 |     # Construct the new RETURN part
249 |     return_string = f"RETURN {', '.join(path_ids)};"
250 | 
251 |     # Remove the old RETURN part from matches_string
252 |     matches_string = matches_string.replace(return_part, "")
253 | 
254 |     # Combine everything
255 |     modified_query = f"{matches_string}\n{return_string}"
256 | 
257 |     return modified_query
258 | 
259 | 
260 | # write string to file
261 | def result_to_df(result):
262 |     from typing import Dict
263 | 
264 |     import pandas as pd
265 | 
266 |     columns = result.keys()
267 |     d: Dict[str, list] = {}
268 |     for col_num in range(result.col_size()):
269 |         col_name = columns[col_num]
270 |         col_list = result.column_values(col_name)
271 |         d[col_name] = [x.cast() for x in col_list]
272 |     return pd.DataFrame(d)
273 | 
274 | 
275 | def render_pd_item(g, item):
276 |     from nebula3.data.DataObject import Node, PathWrapper, Relationship
277 | 
278 |     if isinstance(item, Node):
279 |         node_id = item.get_id().cast()
280 |         tags = item.tags()  # list of strings
281 |         props = dict()
282 |         for tag in tags:
283 |             props.update(item.properties(tag))
284 |         g.add_node(node_id, label=node_id, title=str(props))
285 |     elif isinstance(item, Relationship):
286 |         src_id = item.start_vertex_id().cast()
287 |         dst_id = item.end_vertex_id().cast()
288 |         edge_name = item.edge_name()
289 |         props = item.properties()
290 |         # ensure start and end vertex exist in graph
291 |         if not src_id in g.node_ids:
292 |             g.add_node(src_id)
293 |         if not dst_id in g.node_ids:
294 |             g.add_node(dst_id)
295 |         g.add_edge(src_id, dst_id, label=edge_name, title=str(props))
296 |     elif isinstance(item, PathWrapper):
297 |         for node in item.nodes():
298 |             render_pd_item(g, node)
299 |         for edge in item.relationships():
300 |             render_pd_item(g, edge)
301 |     elif isinstance(item, list):
302 |         for it in item:
303 |             render_pd_item(g, it)
304 | 
305 | 
306 | def create_pyvis_graph(result_df):
307 |     from pyvis.network import Network
308 | 
309 |     g = Network(
310 |         notebook=True,
311 |         directed=True,
312 |         cdn_resources="in_line",
313 |         height="500px",
314 |         width="100%",
315 |     )
316 |     for _, row in result_df.iterrows():
317 |         for item in row:
318 |             render_pd_item(g, item)
319 |     g.repulsion(
320 |         node_distance=100,
321 |         central_gravity=0.2,
322 |         spring_length=200,
323 |         spring_strength=0.05,
324 |         damping=0.09,
325 |     )
326 |     return g
327 | 
328 | 
329 | def query_nebulagraph(
330 |     query,
331 |     space_name=space_name,
332 |     address=st.secrets["graphd_host"],
333 |     port=9669,
334 |     user=st.secrets["graphd_user"],
335 |     password=st.secrets["graphd_password"],
336 | ):
337 |     from nebula3.Config import SessionPoolConfig
338 |     from nebula3.gclient.net.SessionPool import SessionPool
339 | 
340 |     config = SessionPoolConfig()
341 |     session_pool = SessionPool(user, password, space_name, [(address, port)])
342 |     session_pool.init(config)
343 |     return session_pool.execute(query)
344 | 
345 | 
346 | st.title("Graph RAG vs RAG vs NL2Cypher")
347 | 
348 | (
349 |     tab_code_rag,
350 |     tab_notebook,
351 |     tab_NL2Cypher_vs_GraphRAG,
352 |     tab_Vector_vs_Graph_Vector,
353 | ) = st.tabs(
354 |     [
355 |         "Code: Graph RAG",
356 |         "Full Notebook",
357 |         "Demo: NL2Cypher vs Graph RAG",
358 |         "Demo: Vector vs Graph + Vector",
359 |     ]
360 | )
361 | 
362 | 
363 | with tab_code_rag:
364 |     st.write(
365 |         "To Create LLM Apps, we could leverage Knowledge Graph in different approaches: **NL2Cypher**, **Graph RAG** and **Graph + Vector RAG**, this Notebook demonstrates the know-how and the comparison between the different approaches."
366 |     )
367 |     st.write(
368 |         "See full notebook for more details and try different approaches online demo on corresponding tabs."
369 |     )
370 |     st.code(body=CODE_KG_RAG, language="python")
371 | 
372 | with tab_notebook:
373 |     st.write("> Full Notebook")
374 |     st.markdown(
375 |         """
376 | 
377 | This is the full notebook to demonstrate how to:
378 | 
379 | - Extract from data sources and build a knowledge graph with LLM and Llama Index, NebulaGraph in 3 lines of code
380 | - QA with NL2Cypher, 3 lines of code
381 | - QA with Graph RAG, 3 lines of code
382 | - QA with Graph + Vector RAG
383 | - Compare the performance of different approaches
384 |         """
385 |     )
386 |     # link to download notebook
387 |     st.markdown(
388 |         """
389 | [Download](https://www.siwei.io/demo-dumps/graph-rag/GraphRAG.ipynb) the notebook.
390 | """
391 |     )
392 | 
393 |     components.iframe(
394 |         src="https://www.siwei.io/demo-dumps/graph-rag/GraphRAG.html",
395 |         height=2000,
396 |         width=1000,
397 |         scrolling=True,
398 |     )
399 | 
400 | 
401 | with tab_NL2Cypher_vs_GraphRAG:
402 |     st.write("> NL2Cypher vs Graph RAG")
403 | 
404 |     query_string = st.text_input(
405 |         label="Enter natural language query string", value="Tell me about Peter Quill?"
406 |     )
407 |     col_NL2Cypher, col_GraphRAG = st.columns(2)
408 |     if st.button("Generate Answer with NL2Cypher and Graph RAG"):
409 |         response_NL2Cypher = nl2kg_query_engine.query(query_string)
410 |         response_GraphRAG = kg_rag_query_engine.query(query_string)
411 |         with col_NL2Cypher:
412 |             response = response_NL2Cypher
413 |             graph_query = list(response.metadata.values())[0]["graph_store_query"]
414 |             graph_query = graph_query.replace("WHERE", "\n  WHERE").replace(
415 |                 "RETURN", "\nRETURN"
416 |             )
417 |             answer_NL2Cypher = str(response)
418 |             st.markdown(
419 |                 f"""
420 | > Query used
421 | 
422 | ```cypher
423 | {graph_query}
424 | ```
425 | """
426 |             )
427 |             st.write("#### Rendered Graph")
428 |             render_query = cypher_to_all_paths(graph_query)
429 |             result = query_nebulagraph(render_query)
430 |             result_df = result_to_df(result)
431 | 
432 |             # create pyvis graph
433 |             g = create_pyvis_graph(result_df)
434 | 
435 |             # render with random file name
436 |             graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html")
437 | 
438 |             components.html(graph_html, height=500, scrolling=True)
439 | 
440 |             st.write(f"*Answer*: {answer_NL2Cypher}")
441 | 
442 |         with col_GraphRAG:
443 |             response = response_GraphRAG
444 |             answer_GraphRAG = str(response)
445 | 
446 |             related_entities = list(
447 |                 list(response.metadata.values())[0]["kg_rel_map"].keys()
448 |             )
449 |             render_query = f"MATCH p=(n)-[*1..2]-() \n  WHERE id(n) IN {related_entities} \nRETURN p"
450 | 
451 |             st.markdown(
452 |                 f"""
453 | > RAG Subgraph Query(depth=2)
454 | 
455 | ```cypher
456 | {render_query}
457 | ```
458 |                 """
459 |             )
460 |             st.write("#### Rendered Graph")
461 |             result = query_nebulagraph(render_query)
462 |             result_df = result_to_df(result)
463 | 
464 |             # create pyvis graph
465 |             g = create_pyvis_graph(result_df)
466 | 
467 |             # render with random file name
468 |             graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html")
469 | 
470 |             components.html(graph_html, height=500, scrolling=True)
471 | 
472 |             st.write(f"*Answer*: {answer_GraphRAG}")
473 |         st.write("## Compare the two QA result")
474 |         result = llm.complete(
475 |             f"""
476 | Compare the two QA result on "{query_string}", list the differences between them, to help evalute them. Output in markdown table.
477 | 
478 | Result from NL2Cypher: {str(response_NL2Cypher)}
479 | ---
480 | Result from Graph RAG: {str(response_GraphRAG)}
481 | """
482 |         )
483 |         st.markdown(result.text)
484 | 
485 | with tab_Vector_vs_Graph_Vector:
486 |     st.write("> Vector RAG vs Graph + Vector RAG")
487 |     query_string = st.text_input(
488 |         label="Type the question to answer", value="Tell me about Rocket?"
489 |     )
490 |     col_VectorRAG, col_GraphVectorRAG = st.columns(2)
491 |     if st.button("Generate Answer with Vector and Graph + Vector"):
492 |         response_VectorRAG = vector_rag_query_engine.query(query_string)
493 |         response_GraphVectorRAG = graph_vector_rag_query_engine.query(query_string)
494 |         with col_VectorRAG:
495 |             response = response_VectorRAG
496 |             answer_VectorRAG = str(response)
497 |             st.write(f"*Answer*: {answer_VectorRAG}")
498 | 
499 |         with col_GraphVectorRAG:
500 |             response = response_GraphVectorRAG
501 |             answer_GraphVectorRAG = str(response)
502 |             st.write(f"*Answer*: {answer_GraphVectorRAG}")
503 | 
504 |         st.write("## Compare the two QA result")
505 |         st.markdown(
506 |             llm.complete(
507 |                 f"""
508 | Compare the two QA result on "{query_string}", list the differences between them, to help evalute them. Output in markdown table.
509 | 
510 | Result from Vector RAG: {str(response_VectorRAG)}
511 | ---
512 | Result from Graph+Vector RAG: {str(response_GraphVectorRAG)}
513 | """
514 |             ).text
515 |         )
516 | 


--------------------------------------------------------------------------------
/notebooks/KG_Building.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "2816dd42",
   6 |    "metadata": {},
   7 |    "source": [
   8 |     "## Knowledge Graph Building with LLM\n",
   9 |     "\n",
  10 |     "```\n",
  11 |     "                                   ┌─────────────────────────┐\n",
  12 |     "                                   │                         │\n",
  13 |     "                                   │      Knowledge Graph    │\n",
  14 |     "                                   │      on NebulaGraph     │\n",
  15 |     "                                   │                         │\n",
  16 |     "                                   │                .───.    │\n",
  17 |     "                                   │           ┌──▶(     )   │\n",
  18 |     "                                   │           │    `───'    │\n",
  19 |     "┌────────────────────┐             │  .───.    │             │\n",
  20 |     "│ Data Sources       │             │ (     )───┘             │\n",
  21 |     "│                    │   Extract   │  `───'                  │\n",
  22 |     "│ Database, Wikepedia│━━With LLM━━━▶    │         .───.      │\n",
  23 |     "│ CSV, JSON Files    │             │    └───────▶(     )     │\n",
  24 |     "│ Web APIs...        │             │              `───'      │\n",
  25 |     "└────────────────────┘             │                ▲        │\n",
  26 |     "                                   │                │        │\n",
  27 |     "                                   │                │  .───. │\n",
  28 |     "                                   │                └─(     )│\n",
  29 |     "                                   │                   `───' │\n",
  30 |     "                                   │                         │\n",
  31 |     "                                   │                         │\n",
  32 |     "                                   └─────────────────────────┘\n",
  33 |     "```"
  34 |    ]
  35 |   },
  36 |   {
  37 |    "cell_type": "markdown",
  38 |    "id": "4e900489",
  39 |    "metadata": {},
  40 |    "source": [
  41 |     "# 1. Preparation\n",
  42 |     "\n",
  43 |     "## 1.1 Prepare for LLM"
  44 |    ]
  45 |   },
  46 |   {
  47 |    "cell_type": "code",
  48 |    "execution_count": null,
  49 |    "id": "895f797a",
  50 |    "metadata": {},
  51 |    "outputs": [],
  52 |    "source": [
  53 |     "# Only For OpenAI\n",
  54 |     "\n",
  55 |     "import os\n",
  56 |     "\n",
  57 |     "os.environ[\"OPENAI_API_KEY\"] = \"INSERT OPENAI KEY\"\n",
  58 |     "\n",
  59 |     "import logging\n",
  60 |     "import sys\n",
  61 |     "\n",
  62 |     "logging.basicConfig(\n",
  63 |     "    stream=sys.stdout, level=logging.INFO\n",
  64 |     ")  # logging.DEBUG for more verbose output\n",
  65 |     "\n",
  66 |     "from llama_index import (\n",
  67 |     "    KnowledgeGraphIndex,\n",
  68 |     "    LLMPredictor,\n",
  69 |     "    ServiceContext,\n",
  70 |     "    SimpleDirectoryReader,\n",
  71 |     ")\n",
  72 |     "from llama_index.storage.storage_context import StorageContext\n",
  73 |     "from llama_index.graph_stores import NebulaGraphStore\n",
  74 |     "\n",
  75 |     "\n",
  76 |     "from langchain import OpenAI\n",
  77 |     "from IPython.display import Markdown, display\n",
  78 |     "\n",
  79 |     "\n",
  80 |     "# define LLM\n",
  81 |     "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-002\"))\n",
  82 |     "service_context = ServiceContext.from_defaults(\n",
  83 |     "    llm_predictor=llm_predictor, chunk_size_limit=512\n",
  84 |     ")"
  85 |    ]
  86 |   },
  87 |   {
  88 |    "cell_type": "code",
  89 |    "execution_count": null,
  90 |    "id": "f9b21fcc",
  91 |    "metadata": {},
  92 |    "outputs": [],
  93 |    "source": [
  94 |     "# Only For Azure OpenAI\n",
  95 |     "\n",
  96 |     "import os\n",
  97 |     "import json\n",
  98 |     "import openai\n",
  99 |     "from langchain.llms import AzureOpenAI\n",
 100 |     "from langchain.embeddings import OpenAIEmbeddings\n",
 101 |     "from llama_index import LangchainEmbedding\n",
 102 |     "from llama_index import (\n",
 103 |     "    VectorStoreIndex,\n",
 104 |     "    SimpleDirectoryReader,\n",
 105 |     "    KnowledgeGraphIndex,\n",
 106 |     "    LLMPredictor,\n",
 107 |     "    ServiceContext,\n",
 108 |     ")\n",
 109 |     "\n",
 110 |     "from llama_index.storage.storage_context import StorageContext\n",
 111 |     "from llama_index.graph_stores import NebulaGraphStore\n",
 112 |     "\n",
 113 |     "import logging\n",
 114 |     "import sys\n",
 115 |     "\n",
 116 |     "from IPython.display import Markdown, display\n",
 117 |     "\n",
 118 |     "logging.basicConfig(\n",
 119 |     "    stream=sys.stdout, level=logging.INFO\n",
 120 |     ")  # logging.DEBUG for more verbose output\n",
 121 |     "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
 122 |     "\n",
 123 |     "openai.api_type = \"azure\"\n",
 124 |     "openai.api_base = \"INSERT AZURE API BASE\"\n",
 125 |     "openai.api_version = \"2022-12-01\"\n",
 126 |     "os.environ[\"OPENAI_API_KEY\"] = \"INSERT OPENAI KEY\"\n",
 127 |     "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
 128 |     "\n",
 129 |     "# define LLM\n",
 130 |     "llm = AzureOpenAI(\n",
 131 |     "    deployment_name=\"INSERT DEPLOYMENT NAME\",\n",
 132 |     "    temperature=0,\n",
 133 |     "    openai_api_version=openai.api_version,\n",
 134 |     "    model_kwargs={\n",
 135 |     "        \"api_key\": openai.api_key,\n",
 136 |     "        \"api_base\": openai.api_base,\n",
 137 |     "        \"api_type\": openai.api_type,\n",
 138 |     "        \"api_version\": openai.api_version,\n",
 139 |     "    },\n",
 140 |     ")\n",
 141 |     "llm_predictor = LLMPredictor(llm=llm)\n",
 142 |     "\n",
 143 |     "# You need to deploy your own embedding model as well as your own chat completion model\n",
 144 |     "embedding_llm = LangchainEmbedding(\n",
 145 |     "    OpenAIEmbeddings(\n",
 146 |     "        model=\"text-embedding-ada-002\",\n",
 147 |     "        deployment=\"INSERT DEPLOYMENT NAME\",\n",
 148 |     "        openai_api_key=openai.api_key,\n",
 149 |     "        openai_api_base=openai.api_base,\n",
 150 |     "        openai_api_type=openai.api_type,\n",
 151 |     "        openai_api_version=openai.api_version,\n",
 152 |     "    ),\n",
 153 |     "    embed_batch_size=1,\n",
 154 |     ")\n",
 155 |     "\n",
 156 |     "service_context = ServiceContext.from_defaults(\n",
 157 |     "    llm_predictor=llm_predictor,\n",
 158 |     "    embed_model=embedding_llm,\n",
 159 |     ")"
 160 |    ]
 161 |   },
 162 |   {
 163 |    "cell_type": "markdown",
 164 |    "id": "210dc3d4",
 165 |    "metadata": {},
 166 |    "source": [
 167 |     "## 1.2. Prepare for NebulaGraph as Graph Store"
 168 |    ]
 169 |   },
 170 |   {
 171 |    "cell_type": "markdown",
 172 |    "id": "ddb6beff",
 173 |    "metadata": {},
 174 |    "source": [
 175 |     "❗Access NebulaGraph Console to **create space** and **graph schema**\n",
 176 |     "\n",
 177 |     "```sql\n",
 178 |     "CREATE SPACE guardians(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);\n",
 179 |     ":sleep 10;\n",
 180 |     "USE guardians;\n",
 181 |     "CREATE TAG entity(name string);\n",
 182 |     "CREATE EDGE relationship(relationship string);\n",
 183 |     ":sleep 10;\n",
 184 |     "CREATE TAG INDEX entity_index ON entity(name(256));\n",
 185 |     "```"
 186 |    ]
 187 |   },
 188 |   {
 189 |    "cell_type": "code",
 190 |    "execution_count": null,
 191 |    "id": "7e9037c5",
 192 |    "metadata": {},
 193 |    "outputs": [],
 194 |    "source": [
 195 |     "os.environ[\"NEBULA_USER\"] = \"root\"\n",
 196 |     "os.environ[\"NEBULA_PASSWORD\"] = \"nebula\"  # default password\n",
 197 |     "os.environ[\n",
 198 |     "    \"NEBULA_ADDRESS\"\n",
 199 |     "] = \"127.0.0.1:9669\"  # assumed we have NebulaGraph installed locally\n",
 200 |     "\n",
 201 |     "space_name = \"guardians\"\n",
 202 |     "edge_types, rel_prop_names = [\"relationship\"], [\n",
 203 |     "    \"relationship\"\n",
 204 |     "]  # default, could be omit if create from an empty kg\n",
 205 |     "tags = [\"entity\"]  # default, could be omit if create from an empty kg\n",
 206 |     "\n",
 207 |     "graph_store = NebulaGraphStore(\n",
 208 |     "    space_name=space_name,\n",
 209 |     "    edge_types=edge_types,\n",
 210 |     "    rel_prop_names=rel_prop_names,\n",
 211 |     "    tags=tags,\n",
 212 |     ")\n",
 213 |     "storage_context = StorageContext.from_defaults(graph_store=graph_store)"
 214 |    ]
 215 |   },
 216 |   {
 217 |    "cell_type": "markdown",
 218 |    "id": "5f38240b",
 219 |    "metadata": {},
 220 |    "source": [
 221 |     "## 2. Build the Knowledge Graph\n",
 222 |     "\n"
 223 |    ]
 224 |   },
 225 |   {
 226 |    "cell_type": "markdown",
 227 |    "id": "7af875b5",
 228 |    "metadata": {},
 229 |    "source": [
 230 |     "### 2.1 Preprocess Data\n",
 231 |     "\n",
 232 |     "We will download and preprecess data from:\n",
 233 |     "    https://en.wikipedia.org/wiki/Guardians_of_the_Galaxy_Vol._3"
 234 |    ]
 235 |   },
 236 |   {
 237 |    "cell_type": "code",
 238 |    "execution_count": null,
 239 |    "id": "a13b7b67",
 240 |    "metadata": {},
 241 |    "outputs": [],
 242 |    "source": [
 243 |     "from llama_index import download_loader\n",
 244 |     "\n",
 245 |     "WikipediaReader = download_loader(\"WikipediaReader\")\n",
 246 |     "\n",
 247 |     "loader = WikipediaReader()\n",
 248 |     "\n",
 249 |     "documents = loader.load_data(\n",
 250 |     "    pages=[\"Guardians of the Galaxy Vol. 3\"], auto_suggest=False\n",
 251 |     ")"
 252 |    ]
 253 |   },
 254 |   {
 255 |    "cell_type": "markdown",
 256 |    "id": "1bc16445",
 257 |    "metadata": {},
 258 |    "source": [
 259 |     "### 2.2 Extract Triplets and Save to NebulaGraph"
 260 |    ]
 261 |   },
 262 |   {
 263 |    "cell_type": "markdown",
 264 |    "id": "e45cf6f9",
 265 |    "metadata": {},
 266 |    "source": [
 267 |     "We will persist it to disk and NebulaGraph, thus when using it, we don't need to extract again."
 268 |    ]
 269 |   },
 270 |   {
 271 |    "cell_type": "code",
 272 |    "execution_count": null,
 273 |    "id": "ac09be97",
 274 |    "metadata": {},
 275 |    "outputs": [],
 276 |    "source": [
 277 |     "kg_index = KnowledgeGraphIndex.from_documents(\n",
 278 |     "    documents,\n",
 279 |     "    storage_context=storage_context,\n",
 280 |     "    max_triplets_per_chunk=10,\n",
 281 |     "    service_context=service_context,\n",
 282 |     "    space_name=space_name,\n",
 283 |     "    edge_types=edge_types,\n",
 284 |     "    rel_prop_names=rel_prop_names,\n",
 285 |     "    tags=tags,\n",
 286 |     "    include_embeddings=True,\n",
 287 |     ")"
 288 |    ]
 289 |   },
 290 |   {
 291 |    "cell_type": "markdown",
 292 |    "id": "7d245e9b",
 293 |    "metadata": {},
 294 |    "source": [
 295 |     "Let's persist the context from memory to disk"
 296 |    ]
 297 |   },
 298 |   {
 299 |    "cell_type": "code",
 300 |    "execution_count": null,
 301 |    "id": "36374bbe",
 302 |    "metadata": {},
 303 |    "outputs": [],
 304 |    "source": [
 305 |     "kg_index.storage_context.persist(persist_dir=\"./storage_graph\")"
 306 |    ]
 307 |   },
 308 |   {
 309 |    "cell_type": "markdown",
 310 |    "id": "fbf8fcb2",
 311 |    "metadata": {},
 312 |    "source": [
 313 |     "The files are generated:"
 314 |    ]
 315 |   },
 316 |   {
 317 |    "cell_type": "code",
 318 |    "execution_count": null,
 319 |    "id": "ba90f2cc",
 320 |    "metadata": {},
 321 |    "outputs": [
 322 |     {
 323 |      "name": "stdout",
 324 |      "output_type": "stream",
 325 |      "text": [
 326 |       "total 9120\r\n",
 327 |       "-rw-r--r--@ 1 weyl  staff    66922 Jul 12 20:26 docstore.json\r\n",
 328 |       "-rw-r--r--@ 1 weyl  staff  4594860 Jul 12 20:26 index_store.json\r\n",
 329 |       "-rw-r--r--@ 1 weyl  staff       51 Jul 12 20:26 vector_store.json\r\n"
 330 |      ]
 331 |     }
 332 |    ],
 333 |    "source": [
 334 |     "!ls -l storage_graph"
 335 |    ]
 336 |   },
 337 |   {
 338 |    "cell_type": "markdown",
 339 |    "id": "28b6e3c9",
 340 |    "metadata": {},
 341 |    "source": [
 342 |     "### 2.3 Inspect the Graph we built\n",
 343 |     "\n",
 344 |     "We will leverage NebulaGraph Jupyter Extension, do remember to install it before next step:\n",
 345 |     "\n",
 346 |     "```bash\n",
 347 |     "$ pip install ipython-ngql\n",
 348 |     "```"
 349 |    ]
 350 |   },
 351 |   {
 352 |    "cell_type": "code",
 353 |    "execution_count": null,
 354 |    "id": "cc19ed17",
 355 |    "metadata": {},
 356 |    "outputs": [],
 357 |    "source": [
 358 |     "%load_ext ngql\n",
 359 |     "%ngql --address 127.0.0.1 --port 9669 --user root --password nebula\n",
 360 |     "%ngql USE guardians"
 361 |    ]
 362 |   },
 363 |   {
 364 |    "cell_type": "markdown",
 365 |    "id": "f00485c2",
 366 |    "metadata": {},
 367 |    "source": [
 368 |     "We could query 30 random edges:"
 369 |    ]
 370 |   },
 371 |   {
 372 |    "cell_type": "code",
 373 |    "execution_count": null,
 374 |    "id": "f7538a65",
 375 |    "metadata": {},
 376 |    "outputs": [
 377 |     {
 378 |      "name": "stdout",
 379 |      "output_type": "stream",
 380 |      "text": [
 381 |       "INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)\n",
 382 |       "Get connection to ('127.0.0.1', 9669)\n"
 383 |      ]
 384 |     },
 385 |     {
 386 |      "data": {
 387 |       "text/html": [
 388 |        "<div>\n",
 389 |        "<style scoped>\n",
 390 |        "    .dataframe tbody tr th:only-of-type {\n",
 391 |        "        vertical-align: middle;\n",
 392 |        "    }\n",
 393 |        "\n",
 394 |        "    .dataframe tbody tr th {\n",
 395 |        "        vertical-align: top;\n",
 396 |        "    }\n",
 397 |        "\n",
 398 |        "    .dataframe thead th {\n",
 399 |        "        text-align: right;\n",
 400 |        "    }\n",
 401 |        "</style>\n",
 402 |        "<table border=\"1\" class=\"dataframe\">\n",
 403 |        "  <thead>\n",
 404 |        "    <tr style=\"text-align: right;\">\n",
 405 |        "      <th></th>\n",
 406 |        "      <th>e</th>\n",
 407 |        "    </tr>\n",
 408 |        "  </thead>\n",
 409 |        "  <tbody>\n",
 410 |        "    <tr>\n",
 411 |        "      <th>0</th>\n",
 412 |        "      <td>(\"Adam Warlock\")-[:relationship@98688268702526...</td>\n",
 413 |        "    </tr>\n",
 414 |        "    <tr>\n",
 415 |        "      <th>1</th>\n",
 416 |        "      <td>(\"Alan F. Horn\")-[:relationship@-3866030880391...</td>\n",
 417 |        "    </tr>\n",
 418 |        "    <tr>\n",
 419 |        "      <th>2</th>\n",
 420 |        "      <td>(\"Alan F. Horn\")-[:relationship@-3866030880391...</td>\n",
 421 |        "    </tr>\n",
 422 |        "    <tr>\n",
 423 |        "      <th>3</th>\n",
 424 |        "      <td>(\"Bakalova\")-[:relationship@-78310709996010382...</td>\n",
 425 |        "    </tr>\n",
 426 |        "    <tr>\n",
 427 |        "      <th>4</th>\n",
 428 |        "      <td>(\"Bakalova\")-[:relationship@-18287293525973127...</td>\n",
 429 |        "    </tr>\n",
 430 |        "    <tr>\n",
 431 |        "      <th>5</th>\n",
 432 |        "      <td>(\"Bautista\")-[:relationship@262829015229588616...</td>\n",
 433 |        "    </tr>\n",
 434 |        "    <tr>\n",
 435 |        "      <th>6</th>\n",
 436 |        "      <td>(\"Bautista\")-[:relationship@264209192087427643...</td>\n",
 437 |        "    </tr>\n",
 438 |        "    <tr>\n",
 439 |        "      <th>7</th>\n",
 440 |        "      <td>(\"Chris Pratt\")-[:relationship@-53886203992796...</td>\n",
 441 |        "    </tr>\n",
 442 |        "    <tr>\n",
 443 |        "      <th>8</th>\n",
 444 |        "      <td>(\"Christopher Fairbank\")-[:relationship@704429...</td>\n",
 445 |        "    </tr>\n",
 446 |        "    <tr>\n",
 447 |        "      <th>9</th>\n",
 448 |        "      <td>(\"Cooper\")-[:relationship@2642091920874276436{...</td>\n",
 449 |        "    </tr>\n",
 450 |        "    <tr>\n",
 451 |        "      <th>10</th>\n",
 452 |        "      <td>(\"Daniela Melchior\")-[:relationship@5794733688...</td>\n",
 453 |        "    </tr>\n",
 454 |        "    <tr>\n",
 455 |        "      <th>11</th>\n",
 456 |        "      <td>(\"Dave Bautista\")-[:relationship@-538862039927...</td>\n",
 457 |        "    </tr>\n",
 458 |        "    <tr>\n",
 459 |        "      <th>12</th>\n",
 460 |        "      <td>(\"Debicki\")-[:relationship@2682825685616935037...</td>\n",
 461 |        "    </tr>\n",
 462 |        "    <tr>\n",
 463 |        "      <th>13</th>\n",
 464 |        "      <td>(\"Diesel\")-[:relationship@2642091920874276436{...</td>\n",
 465 |        "    </tr>\n",
 466 |        "    <tr>\n",
 467 |        "      <th>14</th>\n",
 468 |        "      <td>(\"Disney\")-[:relationship@-7269035608107002438...</td>\n",
 469 |        "    </tr>\n",
 470 |        "    <tr>\n",
 471 |        "      <th>15</th>\n",
 472 |        "      <td>(\"Disney\")-[:relationship@4594936970614874383{...</td>\n",
 473 |        "    </tr>\n",
 474 |        "    <tr>\n",
 475 |        "      <th>16</th>\n",
 476 |        "      <td>(\"Drax\")-[:relationship@1274897091364343563{re...</td>\n",
 477 |        "    </tr>\n",
 478 |        "    <tr>\n",
 479 |        "      <th>17</th>\n",
 480 |        "      <td>(\"Elizabeth Debicki\")-[:relationship@704429536...</td>\n",
 481 |        "    </tr>\n",
 482 |        "    <tr>\n",
 483 |        "      <th>18</th>\n",
 484 |        "      <td>(\"Gamora\")-[:relationship@2108090488737331578{...</td>\n",
 485 |        "    </tr>\n",
 486 |        "    <tr>\n",
 487 |        "      <th>19</th>\n",
 488 |        "      <td>(\"Gamora\")-[:relationship@4452575226635738814{...</td>\n",
 489 |        "    </tr>\n",
 490 |        "    <tr>\n",
 491 |        "      <th>20</th>\n",
 492 |        "      <td>(\"Gamora\")-[:relationship@7254563908946132317{...</td>\n",
 493 |        "    </tr>\n",
 494 |        "    <tr>\n",
 495 |        "      <th>21</th>\n",
 496 |        "      <td>(\"George MacKay\")-[:relationship@2027380399406...</td>\n",
 497 |        "    </tr>\n",
 498 |        "    <tr>\n",
 499 |        "      <th>22</th>\n",
 500 |        "      <td>(\"Gillan\")-[:relationship@-1827525784919523442...</td>\n",
 501 |        "    </tr>\n",
 502 |        "    <tr>\n",
 503 |        "      <th>23</th>\n",
 504 |        "      <td>(\"Gillan\")-[:relationship@1278621438198917644{...</td>\n",
 505 |        "    </tr>\n",
 506 |        "    <tr>\n",
 507 |        "      <th>24</th>\n",
 508 |        "      <td>(\"Gillan\")-[:relationship@2642091920874276436{...</td>\n",
 509 |        "    </tr>\n",
 510 |        "    <tr>\n",
 511 |        "      <th>25</th>\n",
 512 |        "      <td>(\"Gillan\")-[:relationship@7823655194542812825{...</td>\n",
 513 |        "    </tr>\n",
 514 |        "    <tr>\n",
 515 |        "      <th>26</th>\n",
 516 |        "      <td>(\"Gregg Henry\")-[:relationship@704429536949728...</td>\n",
 517 |        "    </tr>\n",
 518 |        "    <tr>\n",
 519 |        "      <th>27</th>\n",
 520 |        "      <td>(\"Guardians cast\")-[:relationship@-64051353433...</td>\n",
 521 |        "    </tr>\n",
 522 |        "    <tr>\n",
 523 |        "      <th>28</th>\n",
 524 |        "      <td>(\"Guardians of the Galaxy\")-[:relationship@790...</td>\n",
 525 |        "    </tr>\n",
 526 |        "    <tr>\n",
 527 |        "      <th>29</th>\n",
 528 |        "      <td>(\"Guardians of the Galaxy Vol. 3\")-[:relations...</td>\n",
 529 |        "    </tr>\n",
 530 |        "  </tbody>\n",
 531 |        "</table>\n",
 532 |        "</div>"
 533 |       ],
 534 |       "text/plain": [
 535 |        "                                                    e\n",
 536 |        "0   (\"Adam Warlock\")-[:relationship@98688268702526...\n",
 537 |        "1   (\"Alan F. Horn\")-[:relationship@-3866030880391...\n",
 538 |        "2   (\"Alan F. Horn\")-[:relationship@-3866030880391...\n",
 539 |        "3   (\"Bakalova\")-[:relationship@-78310709996010382...\n",
 540 |        "4   (\"Bakalova\")-[:relationship@-18287293525973127...\n",
 541 |        "5   (\"Bautista\")-[:relationship@262829015229588616...\n",
 542 |        "6   (\"Bautista\")-[:relationship@264209192087427643...\n",
 543 |        "7   (\"Chris Pratt\")-[:relationship@-53886203992796...\n",
 544 |        "8   (\"Christopher Fairbank\")-[:relationship@704429...\n",
 545 |        "9   (\"Cooper\")-[:relationship@2642091920874276436{...\n",
 546 |        "10  (\"Daniela Melchior\")-[:relationship@5794733688...\n",
 547 |        "11  (\"Dave Bautista\")-[:relationship@-538862039927...\n",
 548 |        "12  (\"Debicki\")-[:relationship@2682825685616935037...\n",
 549 |        "13  (\"Diesel\")-[:relationship@2642091920874276436{...\n",
 550 |        "14  (\"Disney\")-[:relationship@-7269035608107002438...\n",
 551 |        "15  (\"Disney\")-[:relationship@4594936970614874383{...\n",
 552 |        "16  (\"Drax\")-[:relationship@1274897091364343563{re...\n",
 553 |        "17  (\"Elizabeth Debicki\")-[:relationship@704429536...\n",
 554 |        "18  (\"Gamora\")-[:relationship@2108090488737331578{...\n",
 555 |        "19  (\"Gamora\")-[:relationship@4452575226635738814{...\n",
 556 |        "20  (\"Gamora\")-[:relationship@7254563908946132317{...\n",
 557 |        "21  (\"George MacKay\")-[:relationship@2027380399406...\n",
 558 |        "22  (\"Gillan\")-[:relationship@-1827525784919523442...\n",
 559 |        "23  (\"Gillan\")-[:relationship@1278621438198917644{...\n",
 560 |        "24  (\"Gillan\")-[:relationship@2642091920874276436{...\n",
 561 |        "25  (\"Gillan\")-[:relationship@7823655194542812825{...\n",
 562 |        "26  (\"Gregg Henry\")-[:relationship@704429536949728...\n",
 563 |        "27  (\"Guardians cast\")-[:relationship@-64051353433...\n",
 564 |        "28  (\"Guardians of the Galaxy\")-[:relationship@790...\n",
 565 |        "29  (\"Guardians of the Galaxy Vol. 3\")-[:relations..."
 566 |       ]
 567 |      },
 568 |      "execution_count": null,
 569 |      "metadata": {},
 570 |      "output_type": "execute_result"
 571 |     }
 572 |    ],
 573 |    "source": [
 574 |     "%ngql MATCH ()-[e]->() RETURN e LIMIT 30"
 575 |    ]
 576 |   },
 577 |   {
 578 |    "cell_type": "markdown",
 579 |    "id": "28977dd6",
 580 |    "metadata": {},
 581 |    "source": [
 582 |     "And **draw** it:"
 583 |    ]
 584 |   },
 585 |   {
 586 |    "cell_type": "code",
 587 |    "execution_count": null,
 588 |    "id": "97553264",
 589 |    "metadata": {},
 590 |    "outputs": [
 591 |     {
 592 |      "name": "stdout",
 593 |      "output_type": "stream",
 594 |      "text": [
 595 |       "nebulagraph_draw.html\n"
 596 |      ]
 597 |     },
 598 |     {
 599 |      "data": {
 600 |       "text/html": [
 601 |        "\n",
 602 |        "        <iframe\n",
 603 |        "            width=\"100%\"\n",
 604 |        "            height=\"500px\"\n",
 605 |        "            src=\"nebulagraph_draw.html\"\n",
 606 |        "            frameborder=\"0\"\n",
 607 |        "            allowfullscreen\n",
 608 |        "            \n",
 609 |        "        ></iframe>\n",
 610 |        "        "
 611 |       ],
 612 |       "text/plain": [
 613 |        "<IPython.lib.display.IFrame at 0x15040c450>"
 614 |       ]
 615 |      },
 616 |      "execution_count": null,
 617 |      "metadata": {},
 618 |      "output_type": "execute_result"
 619 |     }
 620 |    ],
 621 |    "source": [
 622 |     "%ng_draw"
 623 |    ]
 624 |   },
 625 |   {
 626 |    "cell_type": "markdown",
 627 |    "id": "bae60f9c",
 628 |    "metadata": {},
 629 |    "source": [
 630 |     "## NL2Cypher\n",
 631 |     "\n",
 632 |     "Now we have a Knowledge Graph built on top of Wikipedia. With NebulaGraph LLM tooling, we could query the KG in Natural language(NL2Cypher).\n",
 633 |     "\n",
 634 |     "First, let's use Llma Index:"
 635 |    ]
 636 |   },
 637 |   {
 638 |    "cell_type": "code",
 639 |    "execution_count": null,
 640 |    "id": "a8f6f8a1",
 641 |    "metadata": {},
 642 |    "outputs": [],
 643 |    "source": [
 644 |     "from llama_index.query_engine import KnowledgeGraphQueryEngine\n",
 645 |     "\n",
 646 |     "from llama_index.storage.storage_context import StorageContext\n",
 647 |     "from llama_index.graph_stores import NebulaGraphStore\n",
 648 |     "\n",
 649 |     "nl2kg_query_engine = KnowledgeGraphQueryEngine(\n",
 650 |     "    storage_context=storage_context,\n",
 651 |     "    service_context=service_context,\n",
 652 |     "    llm=llm,\n",
 653 |     "    verbose=True,\n",
 654 |     ")"
 655 |    ]
 656 |   },
 657 |   {
 658 |    "cell_type": "markdown",
 659 |    "id": "627f4b4d",
 660 |    "metadata": {},
 661 |    "source": [
 662 |     "We could see `KnowledgeGraphQueryEngine` could be used to **Generate Graph Query** and do query for us and fianlly LLM could help with the answer synthesis in one go!"
 663 |    ]
 664 |   },
 665 |   {
 666 |    "cell_type": "code",
 667 |    "execution_count": null,
 668 |    "id": "97e14a81",
 669 |    "metadata": {},
 670 |    "outputs": [
 671 |     {
 672 |      "name": "stdout",
 673 |      "output_type": "stream",
 674 |      "text": [
 675 |       "\u001b[33;1m\u001b[1;3mGraph Store Query: MATCH (p:`entity`)-[:relationship]->(e:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN e.`entity`.`name`;\n",
 676 |       "\u001b[0mINFO:llama_index.query_engine.knowledge_graph_query_engine:Graph Store Query: MATCH (p:`entity`)-[:relationship]->(e:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN e.`entity`.`name`;\n",
 677 |       "Graph Store Query: MATCH (p:`entity`)-[:relationship]->(e:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN e.`entity`.`name`;\n",
 678 |       "\u001b[33;1m\u001b[1;3mGraph Store Response: {'e.entity.name': ['Guardians of the Galaxy']}\n",
 679 |       "\u001b[0mINFO:llama_index.query_engine.knowledge_graph_query_engine:Graph Store Response: {'e.entity.name': ['Guardians of the Galaxy']}\n",
 680 |       "Graph Store Response: {'e.entity.name': ['Guardians of the Galaxy']}\n",
 681 |       "\u001b[32;1m\u001b[1;3mFinal Response: \n",
 682 |       "Peter Quill is a character from the Marvel Comics series Guardians of the Galaxy.\n",
 683 |       "\u001b[0m"
 684 |      ]
 685 |     },
 686 |     {
 687 |      "data": {
 688 |       "text/markdown": [
 689 |        "<b>\n",
 690 |        "Peter Quill is a character from the Marvel Comics series Guardians of the Galaxy.</b>"
 691 |       ],
 692 |       "text/plain": [
 693 |        "<IPython.core.display.Markdown object>"
 694 |       ]
 695 |      },
 696 |      "metadata": {},
 697 |      "output_type": "display_data"
 698 |     }
 699 |    ],
 700 |    "source": [
 701 |     "response = nl2kg_query_engine.query(\n",
 702 |     "    \"Tell me about Peter Quill?\",\n",
 703 |     ")\n",
 704 |     "display(Markdown(f\"<b>{response}</b>\"))"
 705 |    ]
 706 |   },
 707 |   {
 708 |    "cell_type": "markdown",
 709 |    "id": "ce948418",
 710 |    "metadata": {},
 711 |    "source": [
 712 |     "💡 Apart from the e2e KGQA, we could ask for only NL2Cypher like this with `generate_query`."
 713 |    ]
 714 |   },
 715 |   {
 716 |    "cell_type": "code",
 717 |    "execution_count": null,
 718 |    "id": "bb51418c",
 719 |    "metadata": {},
 720 |    "outputs": [
 721 |     {
 722 |      "data": {
 723 |       "text/markdown": [
 724 |        "\n",
 725 |        "```cypher\n",
 726 |        "MATCH (p:`entity`)-[:relationship]->(e:`entity`) \n",
 727 |        "  WHERE p.`entity`.`name` == 'Peter Quill' \n",
 728 |        "RETURN e.`entity`.`name`;\n",
 729 |        "```\n"
 730 |       ],
 731 |       "text/plain": [
 732 |        "<IPython.core.display.Markdown object>"
 733 |       ]
 734 |      },
 735 |      "metadata": {},
 736 |      "output_type": "display_data"
 737 |     }
 738 |    ],
 739 |    "source": [
 740 |     "graph_query = nl2kg_query_engine.generate_query(\n",
 741 |     "    \"Tell me about Peter Quill?\",\n",
 742 |     ")\n",
 743 |     "graph_query = graph_query.replace(\"WHERE\", \"\\n  WHERE\").replace(\"RETURN\", \"\\nRETURN\")\n",
 744 |     "\n",
 745 |     "display(\n",
 746 |     "    Markdown(\n",
 747 |     "        f\"\"\"\n",
 748 |     "```cypher\n",
 749 |     "{graph_query}\n",
 750 |     "```\n",
 751 |     "\"\"\"\n",
 752 |     "    )\n",
 753 |     ")"
 754 |    ]
 755 |   },
 756 |   {
 757 |    "cell_type": "markdown",
 758 |    "id": "ef9565ca",
 759 |    "metadata": {},
 760 |    "source": [
 761 |     "Then, of course we could run the query by ourselves with it!"
 762 |    ]
 763 |   },
 764 |   {
 765 |    "cell_type": "code",
 766 |    "execution_count": null,
 767 |    "id": "265fbfb6",
 768 |    "metadata": {},
 769 |    "outputs": [
 770 |     {
 771 |      "name": "stdout",
 772 |      "output_type": "stream",
 773 |      "text": [
 774 |       "INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)\n",
 775 |       "Get connection to ('127.0.0.1', 9669)\n"
 776 |      ]
 777 |     },
 778 |     {
 779 |      "data": {
 780 |       "text/html": [
 781 |        "<div>\n",
 782 |        "<style scoped>\n",
 783 |        "    .dataframe tbody tr th:only-of-type {\n",
 784 |        "        vertical-align: middle;\n",
 785 |        "    }\n",
 786 |        "\n",
 787 |        "    .dataframe tbody tr th {\n",
 788 |        "        vertical-align: top;\n",
 789 |        "    }\n",
 790 |        "\n",
 791 |        "    .dataframe thead th {\n",
 792 |        "        text-align: right;\n",
 793 |        "    }\n",
 794 |        "</style>\n",
 795 |        "<table border=\"1\" class=\"dataframe\">\n",
 796 |        "  <thead>\n",
 797 |        "    <tr style=\"text-align: right;\">\n",
 798 |        "      <th></th>\n",
 799 |        "      <th>e.entity.name</th>\n",
 800 |        "    </tr>\n",
 801 |        "  </thead>\n",
 802 |        "  <tbody>\n",
 803 |        "    <tr>\n",
 804 |        "      <th>0</th>\n",
 805 |        "      <td>Guardians of the Galaxy</td>\n",
 806 |        "    </tr>\n",
 807 |        "  </tbody>\n",
 808 |        "</table>\n",
 809 |        "</div>"
 810 |       ],
 811 |       "text/plain": [
 812 |        "             e.entity.name\n",
 813 |        "0  Guardians of the Galaxy"
 814 |       ]
 815 |      },
 816 |      "execution_count": null,
 817 |      "metadata": {},
 818 |      "output_type": "execute_result"
 819 |     }
 820 |    ],
 821 |    "source": [
 822 |     "%%ngql\n",
 823 |     "MATCH (p:`entity`)-[:relationship]->(e:`entity`)\n",
 824 |     "  WHERE p.`entity`.`name` == 'Peter Quill'\n",
 825 |     "RETURN e.`entity`.`name`;"
 826 |    ]
 827 |   },
 828 |   {
 829 |    "cell_type": "markdown",
 830 |    "id": "8edc77cb",
 831 |    "metadata": {},
 832 |    "source": [
 833 |     "Or we changed the return part to whole path, for drawing it!"
 834 |    ]
 835 |   },
 836 |   {
 837 |    "cell_type": "code",
 838 |    "execution_count": null,
 839 |    "id": "afe1fe35",
 840 |    "metadata": {
 841 |     "scrolled": true
 842 |    },
 843 |    "outputs": [
 844 |     {
 845 |      "name": "stdout",
 846 |      "output_type": "stream",
 847 |      "text": [
 848 |       "INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)\n",
 849 |       "Get connection to ('127.0.0.1', 9669)\n"
 850 |      ]
 851 |     },
 852 |     {
 853 |      "data": {
 854 |       "text/html": [
 855 |        "<div>\n",
 856 |        "<style scoped>\n",
 857 |        "    .dataframe tbody tr th:only-of-type {\n",
 858 |        "        vertical-align: middle;\n",
 859 |        "    }\n",
 860 |        "\n",
 861 |        "    .dataframe tbody tr th {\n",
 862 |        "        vertical-align: top;\n",
 863 |        "    }\n",
 864 |        "\n",
 865 |        "    .dataframe thead th {\n",
 866 |        "        text-align: right;\n",
 867 |        "    }\n",
 868 |        "</style>\n",
 869 |        "<table border=\"1\" class=\"dataframe\">\n",
 870 |        "  <thead>\n",
 871 |        "    <tr style=\"text-align: right;\">\n",
 872 |        "      <th></th>\n",
 873 |        "      <th>path_0</th>\n",
 874 |        "    </tr>\n",
 875 |        "  </thead>\n",
 876 |        "  <tbody>\n",
 877 |        "    <tr>\n",
 878 |        "      <th>0</th>\n",
 879 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
 880 |        "    </tr>\n",
 881 |        "  </tbody>\n",
 882 |        "</table>\n",
 883 |        "</div>"
 884 |       ],
 885 |       "text/plain": [
 886 |        "                                              path_0\n",
 887 |        "0  (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[..."
 888 |       ]
 889 |      },
 890 |      "execution_count": null,
 891 |      "metadata": {},
 892 |      "output_type": "execute_result"
 893 |     }
 894 |    ],
 895 |    "source": [
 896 |     "%%ngql\n",
 897 |     "MATCH path_0=(p:`entity`)-[:relationship]->(e:`entity`)\n",
 898 |     "  WHERE p.`entity`.`name` == 'Peter Quill'\n",
 899 |     "RETURN path_0;"
 900 |    ]
 901 |   },
 902 |   {
 903 |    "cell_type": "code",
 904 |    "execution_count": null,
 905 |    "id": "5de504de",
 906 |    "metadata": {
 907 |     "scrolled": false
 908 |    },
 909 |    "outputs": [
 910 |     {
 911 |      "name": "stdout",
 912 |      "output_type": "stream",
 913 |      "text": [
 914 |       "nebulagraph_draw.html\n"
 915 |      ]
 916 |     },
 917 |     {
 918 |      "data": {
 919 |       "text/html": [
 920 |        "\n",
 921 |        "        <iframe\n",
 922 |        "            width=\"100%\"\n",
 923 |        "            height=\"500px\"\n",
 924 |        "            src=\"nebulagraph_draw.html\"\n",
 925 |        "            frameborder=\"0\"\n",
 926 |        "            allowfullscreen\n",
 927 |        "            \n",
 928 |        "        ></iframe>\n",
 929 |        "        "
 930 |       ],
 931 |       "text/plain": [
 932 |        "<IPython.lib.display.IFrame at 0x1504e8f50>"
 933 |       ]
 934 |      },
 935 |      "execution_count": null,
 936 |      "metadata": {},
 937 |      "output_type": "execute_result"
 938 |     }
 939 |    ],
 940 |    "source": [
 941 |     "%ng_draw"
 942 |    ]
 943 |   },
 944 |   {
 945 |    "cell_type": "code",
 946 |    "execution_count": null,
 947 |    "id": "d6de8141",
 948 |    "metadata": {},
 949 |    "outputs": [],
 950 |    "source": [
 951 |     "!mv nebulagraph_draw.html nebulagraph_draw_nl2cypher.html"
 952 |    ]
 953 |   },
 954 |   {
 955 |    "cell_type": "markdown",
 956 |    "id": "cd7fe472",
 957 |    "metadata": {},
 958 |    "source": [
 959 |     "### NL2Cypher With Langchain\n",
 960 |     "\n",
 961 |     "Alternatively, we could do via Langchain **NebulaGraphQAChain**, see [docs](https://python.langchain.com/docs/modules/chains/additional/graph_nebula_qa)"
 962 |    ]
 963 |   },
 964 |   {
 965 |    "cell_type": "code",
 966 |    "execution_count": null,
 967 |    "id": "d51df174",
 968 |    "metadata": {},
 969 |    "outputs": [],
 970 |    "source": [
 971 |     "from langchain.chat_models import ChatOpenAI\n",
 972 |     "from langchain.chains import NebulaGraphQAChain\n",
 973 |     "from langchain.graphs import NebulaGraph\n",
 974 |     "\n",
 975 |     "graph = NebulaGraph(\n",
 976 |     "    space=space_name,\n",
 977 |     "    username=\"root\",\n",
 978 |     "    password=\"nebula\",\n",
 979 |     "    address=\"127.0.0.1\",\n",
 980 |     "    port=9669,\n",
 981 |     "    session_pool_size=30,\n",
 982 |     ")\n",
 983 |     "\n",
 984 |     "chain = NebulaGraphQAChain.from_llm(llm, graph=graph, verbose=True)"
 985 |    ]
 986 |   },
 987 |   {
 988 |    "cell_type": "code",
 989 |    "execution_count": null,
 990 |    "id": "96afe26b",
 991 |    "metadata": {},
 992 |    "outputs": [
 993 |     {
 994 |      "name": "stdout",
 995 |      "output_type": "stream",
 996 |      "text": [
 997 |       "\n",
 998 |       "\n",
 999 |       "\u001b[1m> Entering new  chain...\u001b[0m\n",
1000 |       "Generated nGQL:\n",
1001 |       "\u001b[32;1m\u001b[1;3m\n",
1002 |       "\n",
1003 |       "MATCH (p:`entity`)-[e:relationship]->(m:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN p.`entity`.`name`, e.relationship, m.`entity`.`name`;\u001b[0m\n",
1004 |       "Full Context:\n",
1005 |       "\u001b[32;1m\u001b[1;3m{'p.entity.name': ['Peter Quill'], 'e.relationship': ['is leader of'], 'm.entity.name': ['Guardians of the Galaxy']}\u001b[0m\n",
1006 |       "\n",
1007 |       "\u001b[1m> Finished chain.\u001b[0m\n"
1008 |      ]
1009 |     },
1010 |     {
1011 |      "data": {
1012 |       "text/plain": [
1013 |        "' Peter Quill is the leader of the Guardians of the Galaxy.'"
1014 |       ]
1015 |      },
1016 |      "execution_count": null,
1017 |      "metadata": {},
1018 |      "output_type": "execute_result"
1019 |     }
1020 |    ],
1021 |    "source": [
1022 |     "chain.run(\n",
1023 |     "    \"Tell me about Peter Quill?\",\n",
1024 |     ")"
1025 |    ]
1026 |   },
1027 |   {
1028 |    "cell_type": "markdown",
1029 |    "id": "c306e4be",
1030 |    "metadata": {},
1031 |    "source": [
1032 |     "## Graph RAG\n",
1033 |     "\n",
1034 |     "Apart from the NL2Cypher fashion of exploiting KG in QA, especially for complex tasks, we could also do it in the **Retrieval Arguments Generation** way."
1035 |    ]
1036 |   },
1037 |   {
1038 |    "cell_type": "code",
1039 |    "execution_count": null,
1040 |    "id": "122a4442",
1041 |    "metadata": {},
1042 |    "outputs": [
1043 |     {
1044 |      "name": "stdout",
1045 |      "output_type": "stream",
1046 |      "text": [
1047 |       "INFO:llama_index.indices.loading:Loading all indices.\n",
1048 |       "Loading all indices.\n"
1049 |      ]
1050 |     }
1051 |    ],
1052 |    "source": [
1053 |     "from llama_index import load_index_from_storage\n",
1054 |     "\n",
1055 |     "storage_context_graph = StorageContext.from_defaults(\n",
1056 |     "    persist_dir=\"./storage_graph\", graph_store=graph_store\n",
1057 |     ")\n",
1058 |     "kg_index_new = load_index_from_storage(\n",
1059 |     "    storage_context=storage_context_graph,\n",
1060 |     "    service_context=service_context,\n",
1061 |     "    max_triplets_per_chunk=10,\n",
1062 |     "    space_name=space_name,\n",
1063 |     "    edge_types=edge_types,\n",
1064 |     "    rel_prop_names=rel_prop_names,\n",
1065 |     "    tags=tags,\n",
1066 |     "    include_embeddings=True,\n",
1067 |     ")"
1068 |    ]
1069 |   },
1070 |   {
1071 |    "cell_type": "code",
1072 |    "execution_count": null,
1073 |    "id": "0e1bb6fe",
1074 |    "metadata": {},
1075 |    "outputs": [],
1076 |    "source": [
1077 |     "kg_rag_query_engine = kg_index_new.as_query_engine(\n",
1078 |     "    include_text=False,\n",
1079 |     "    retriever_mode=\"keyword\",\n",
1080 |     "    response_mode=\"tree_summarize\",\n",
1081 |     ")"
1082 |    ]
1083 |   },
1084 |   {
1085 |    "cell_type": "code",
1086 |    "execution_count": null,
1087 |    "id": "100395cb",
1088 |    "metadata": {},
1089 |    "outputs": [
1090 |     {
1091 |      "name": "stdout",
1092 |      "output_type": "stream",
1093 |      "text": [
1094 |       "INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me about Peter Quill?\n",
1095 |       "> Starting query: Tell me about Peter Quill?\n",
1096 |       "INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['biography', 'Peter Quill', 'Peter', 'Quill', 'information']\n",
1097 |       "> Query keywords: ['biography', 'Peter Quill', 'Peter', 'Quill', 'information']\n",
1098 |       "INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`\n",
1099 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'released in', '2014']\n",
1100 |       "Peter Quill ['portrays', 'Peter Quill']\n",
1101 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'reprised role from', 'Guardians of the Galaxy']\n",
1102 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy']\n",
1103 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'directed', 'Guardians of the Galaxy']\n",
1104 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'wrote', 'Guardians of the Galaxy']\n",
1105 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'sequel to', 'Guardians of the Galaxy']\n",
1106 |       "Quill ['speaks', ' fuck ']\n",
1107 |       "> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`\n",
1108 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'released in', '2014']\n",
1109 |       "Peter Quill ['portrays', 'Peter Quill']\n",
1110 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'reprised role from', 'Guardians of the Galaxy']\n",
1111 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy']\n",
1112 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'directed', 'Guardians of the Galaxy']\n",
1113 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'wrote', 'Guardians of the Galaxy']\n",
1114 |       "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'sequel to', 'Guardians of the Galaxy']\n",
1115 |       "Quill ['speaks', ' fuck ']\n"
1116 |      ]
1117 |     },
1118 |     {
1119 |      "data": {
1120 |       "text/markdown": [
1121 |        "<b>\n",
1122 |        "Peter Quill is the leader of the Guardians of the Galaxy, a superhero team released in 2014. He portrays the character of Peter Quill and reprised his role from the Guardians of the Galaxy. He was also the director and writer of the Guardians of the Galaxy and its sequel. Quill is known to speak with profanity.</b>"
1123 |       ],
1124 |       "text/plain": [
1125 |        "<IPython.core.display.Markdown object>"
1126 |       ]
1127 |      },
1128 |      "metadata": {},
1129 |      "output_type": "display_data"
1130 |     }
1131 |    ],
1132 |    "source": [
1133 |     "response = kg_rag_query_engine.query(\"Tell me about Peter Quill?\")\n",
1134 |     "display(Markdown(f\"<b>{response}</b>\"))"
1135 |    ]
1136 |   },
1137 |   {
1138 |    "cell_type": "code",
1139 |    "execution_count": null,
1140 |    "id": "c453a760",
1141 |    "metadata": {},
1142 |    "outputs": [
1143 |     {
1144 |      "name": "stdout",
1145 |      "output_type": "stream",
1146 |      "text": [
1147 |       "INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)\n",
1148 |       "Get connection to ('127.0.0.1', 9669)\n"
1149 |      ]
1150 |     },
1151 |     {
1152 |      "data": {
1153 |       "text/html": [
1154 |        "<div>\n",
1155 |        "<style scoped>\n",
1156 |        "    .dataframe tbody tr th:only-of-type {\n",
1157 |        "        vertical-align: middle;\n",
1158 |        "    }\n",
1159 |        "\n",
1160 |        "    .dataframe tbody tr th {\n",
1161 |        "        vertical-align: top;\n",
1162 |        "    }\n",
1163 |        "\n",
1164 |        "    .dataframe thead th {\n",
1165 |        "        text-align: right;\n",
1166 |        "    }\n",
1167 |        "</style>\n",
1168 |        "<table border=\"1\" class=\"dataframe\">\n",
1169 |        "  <thead>\n",
1170 |        "    <tr style=\"text-align: right;\">\n",
1171 |        "      <th></th>\n",
1172 |        "      <th>path0</th>\n",
1173 |        "    </tr>\n",
1174 |        "  </thead>\n",
1175 |        "  <tbody>\n",
1176 |        "    <tr>\n",
1177 |        "      <th>0</th>\n",
1178 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
1179 |        "    </tr>\n",
1180 |        "    <tr>\n",
1181 |        "      <th>1</th>\n",
1182 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})&lt;-...</td>\n",
1183 |        "    </tr>\n",
1184 |        "    <tr>\n",
1185 |        "      <th>2</th>\n",
1186 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
1187 |        "    </tr>\n",
1188 |        "    <tr>\n",
1189 |        "      <th>3</th>\n",
1190 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
1191 |        "    </tr>\n",
1192 |        "    <tr>\n",
1193 |        "      <th>4</th>\n",
1194 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
1195 |        "    </tr>\n",
1196 |        "    <tr>\n",
1197 |        "      <th>5</th>\n",
1198 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
1199 |        "    </tr>\n",
1200 |        "    <tr>\n",
1201 |        "      <th>6</th>\n",
1202 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
1203 |        "    </tr>\n",
1204 |        "    <tr>\n",
1205 |        "      <th>7</th>\n",
1206 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
1207 |        "    </tr>\n",
1208 |        "    <tr>\n",
1209 |        "      <th>8</th>\n",
1210 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
1211 |        "    </tr>\n",
1212 |        "    <tr>\n",
1213 |        "      <th>9</th>\n",
1214 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
1215 |        "    </tr>\n",
1216 |        "    <tr>\n",
1217 |        "      <th>10</th>\n",
1218 |        "      <td>(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...</td>\n",
1219 |        "    </tr>\n",
1220 |        "  </tbody>\n",
1221 |        "</table>\n",
1222 |        "</div>"
1223 |       ],
1224 |       "text/plain": [
1225 |        "                                                path0\n",
1226 |        "0   (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n",
1227 |        "1   (\"Peter Quill\" :entity{name: \"Peter Quill\"})<-...\n",
1228 |        "2   (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n",
1229 |        "3   (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n",
1230 |        "4   (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n",
1231 |        "5   (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n",
1232 |        "6   (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n",
1233 |        "7   (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n",
1234 |        "8   (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n",
1235 |        "9   (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n",
1236 |        "10  (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[..."
1237 |       ]
1238 |      },
1239 |      "execution_count": null,
1240 |      "metadata": {},
1241 |      "output_type": "execute_result"
1242 |     }
1243 |    ],
1244 |    "source": [
1245 |     "%%ngql\n",
1246 |     "MATCH path0=(p:`entity`)-[*1..2]-() WHERE p.`entity`.`name` == 'Peter Quill'\n",
1247 |     "RETURN path0;"
1248 |    ]
1249 |   },
1250 |   {
1251 |    "cell_type": "code",
1252 |    "execution_count": null,
1253 |    "id": "8014418d",
1254 |    "metadata": {
1255 |     "scrolled": false
1256 |    },
1257 |    "outputs": [
1258 |     {
1259 |      "name": "stdout",
1260 |      "output_type": "stream",
1261 |      "text": [
1262 |       "nebulagraph_draw.html\n"
1263 |      ]
1264 |     },
1265 |     {
1266 |      "data": {
1267 |       "text/html": [
1268 |        "\n",
1269 |        "        <iframe\n",
1270 |        "            width=\"100%\"\n",
1271 |        "            height=\"500px\"\n",
1272 |        "            src=\"nebulagraph_draw.html\"\n",
1273 |        "            frameborder=\"0\"\n",
1274 |        "            allowfullscreen\n",
1275 |        "            \n",
1276 |        "        ></iframe>\n",
1277 |        "        "
1278 |       ],
1279 |       "text/plain": [
1280 |        "<IPython.lib.display.IFrame at 0x15040df10>"
1281 |       ]
1282 |      },
1283 |      "execution_count": null,
1284 |      "metadata": {},
1285 |      "output_type": "execute_result"
1286 |     }
1287 |    ],
1288 |    "source": [
1289 |     "%ng_draw"
1290 |    ]
1291 |   },
1292 |   {
1293 |    "cell_type": "code",
1294 |    "execution_count": null,
1295 |    "id": "04ac7fd8",
1296 |    "metadata": {},
1297 |    "outputs": [],
1298 |    "source": [
1299 |     "!mv nebulagraph_draw.html nebulagraph_draw_rag.html"
1300 |    ]
1301 |   }
1302 |  ],
1303 |  "metadata": {
1304 |   "kernelspec": {
1305 |    "display_name": "Python 3 (ipykernel)",
1306 |    "language": "python",
1307 |    "name": "python3"
1308 |   },
1309 |   "language_info": {
1310 |    "codemirror_mode": {
1311 |     "name": "ipython",
1312 |     "version": 3
1313 |    },
1314 |    "file_extension": ".py",
1315 |    "mimetype": "text/x-python",
1316 |    "name": "python",
1317 |    "nbconvert_exporter": "python",
1318 |    "pygments_lexer": "ipython3",
1319 |    "version": "3.11.4"
1320 |   }
1321 |  },
1322 |  "nbformat": 4,
1323 |  "nbformat_minor": 5
1324 | }
1325 | 


--------------------------------------------------------------------------------
/storage_graph/docstore.json:
--------------------------------------------------------------------------------
1 | {"docstore/metadata": {"914e16a9-7a9f-4e06-ae6c-35e1e3296832": {"doc_hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "181df629-dabe-499f-b9e4-5517c1106e22": {"doc_hash": "1c0cfcf1846867db87e14694014a3b84dbacfd2298ec2e05fe9a82e79c7c881f", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69": {"doc_hash": "2de6f537170cfe777c202cb5b48c4e8ded65a0f307dfeaad4c63b5545d5068b1", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "6b4cd20b-0460-4cf6-838a-987a9ead426b": {"doc_hash": "dbed2b759f925de4023bcee94e0ba1cf8298e578a440c312a5788c45ed972083", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "23688f93-fc51-495e-ae70-c4b79a00d153": {"doc_hash": "f3037ba4ac0936be6f7692f84daacfb85cc65e6d80b93ddcaf2cb642aa3aea76", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "212ada4e-157a-4241-b453-30f8e5f8f3e1": {"doc_hash": "97a7ab4238b6213dabf3e04318e0478313f1f66b83155259b57c343f679c83e2", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "71898eef-f64f-4d47-a677-343a227d9524": {"doc_hash": "e48fadeb031fcf69647b29b0577ec12c02f472001d1dfc3853c40b0a29da708c", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "dc262176-f882-4b19-9a83-c9e81d77d3b8": {"doc_hash": "acb70087e4cc902de303a48e0672c1ea09ae704635bbc8a71f602857cf1c5054", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "758c1ea9-bb37-4813-b891-e2f90686393f": {"doc_hash": "eb15f34b0356f23e89ad5d135a76be4ee32646ceeb2c391167cd074d543244c1", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "7e403166-2350-4ff6-a516-3cbb25356f32": {"doc_hash": "405a764ce822658d4e2ceecc26b4b77c75fba0a9827abb24c1ed0a25b7fffcc8", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6": {"doc_hash": "2fa01cbd033355cb0cf08eca484e62114ebf2018ac45c855f3cb0bed6c4d5252", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "66f24e08-79cb-4559-8d91-44ab4270add3": {"doc_hash": "245d161980c9962b654c1051f50b83b9a21ca0fbdaab03df36408ac1e6dc4a5b", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "49d408fd-5bb2-4c41-b807-60daf4b1531a": {"doc_hash": "85975279279946ded84af9db8e61c024842ced9b9bfa5464ed571c6506e193b4", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "fc9299ee-ee60-4abc-8e2f-45295f64a7aa": {"doc_hash": "53ce5fb0c66d0b346ac988da8f3e954fac4af17c985b820c97514e6abf37b37f", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "bdf2433e-f430-4550-a566-bf5624374b70": {"doc_hash": "9b8169681ccd4608c0a72477730c40122550754dacb5932a9af709a15a523133", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "5fcd0003-fce2-49bc-a004-75e7ba06a7ad": {"doc_hash": "077ef8fce032bfceca02aa0332bce50087a91ee6f22b166dc4359c95a14f638c", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "08659ce9-39e2-48ca-816e-33f5af331d37": {"doc_hash": "0f85282e87b66185eddf746374f76e7de35e60c4b2f29050137e6a81cc7877e5", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "778fc2f5-0312-4503-85da-8cb294597451": {"doc_hash": "7fd46180ce995de60ba53f3363ce55baa003d9f1dfaff370c3ffc2edaffaeca6", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}}, "docstore/data": {"181df629-dabe-499f-b9e4-5517c1106e22": {"__data__": {"id_": "181df629-dabe-499f-b9e4-5517c1106e22", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "3": {"node_id": "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69", "node_type": null, "metadata": {}, "hash": "2de6f537170cfe777c202cb5b48c4e8ded65a0f307dfeaad4c63b5545d5068b1"}}, "hash": "1c0cfcf1846867db87e14694014a3b84dbacfd2298ec2e05fe9a82e79c7c881f", "text": "The following scientific events occurred or are scheduled to occur in 2023.\n\n\n== Events ==\n\n\n=== January ===\n\n\n=== February ===\n\n\n=== March ===\n\n\n=== April ===\n3 April\nFive employees at the National Hurricane Center publish a tropical cyclone report (TCR) on Hurricane Ian, which officially upgrades the hurricane from a Category 4 to a Category 5 on the Saffir\u2013Simpson scale. The TCR also stated that Hurricane Ian caused, with 90% confidence, $112.9 billion worth of damage to the United States, which made Ian the third-costliest United States hurricane on record as well as the costliest hurricane to strike Florida on record.\nAn unexplained rise of emissions of five chlorofluorocarbons (CFCs), successfully banned by the Montreal Protocol of 1989, is reported. Their climate impact in 2020 is roughly equivalent to that of the CO2e from Denmark in 2018.\nA study affirms and explains why a moderate decrease in body temperature extends lifespan.\n5 April\nThe NOAA reports that greenhouse gases continued to increase rapidly in 2022 and that CO2 levels in the atmosphere are now the highest in 4.3 million years.\nAn umbrella review summarizes scientific results on the extensive health effects of added-sugar foods and makes recommendations such as limiting sugar-sweetened beverages which are \"the largest source of added sugars\" and developing of policy such as advertising restrictions.\n6 April \u2013 A study shows neurons take up glucose (from food) and metabolize it by glycolysis. There was only limited research on how neurons get their energy in the context of links between glucose metabolism and cognition (brain health and performance).\n10 April \u2013 A study expands upon the role of elites' unsustainable consumption in urban water crises. In Cape Town, for example, the wealthiest 14% of the population use half of the city's water, while the poorest 62% use just a quarter.\n11 April \u2013 A study reports that genomic surveillance (GS) shows that a clonal lineage of the wheat blast fungus has spread", "start_char_idx": 0, "end_char_idx": 2006, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69": {"__data__": {"id_": "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "181df629-dabe-499f-b9e4-5517c1106e22", "node_type": null, "metadata": {}, "hash": "1c0cfcf1846867db87e14694014a3b84dbacfd2298ec2e05fe9a82e79c7c881f"}, "3": {"node_id": "6b4cd20b-0460-4cf6-838a-987a9ead426b", "node_type": null, "metadata": {}, "hash": "dbed2b759f925de4023bcee94e0ba1cf8298e578a440c312a5788c45ed972083"}}, "hash": "2de6f537170cfe777c202cb5b48c4e8ded65a0f307dfeaad4c63b5545d5068b1", "text": "(GS) shows that a clonal lineage of the wheat blast fungus has spread worldwide and that there is a need for GS to track and mitigate the potential pandemic threat to the global food supply as it may become fungicide-insensitive.\n13 April\nThe direct imaging of HIP 99770 b, a new exoplanet found 133 light years away, is reported by astronomers.\nA global trend towards more rapid-onset \"flash droughts\" hindering forecasting is reported.\n14 April\nJupiter Icy Moons Explorer (JUICE) is launched by the European Space Agency (ESA) to search for life in the Jovian system, with an expected arrival date of 2031.\nA review reports that a gender-affirming therapy in adolescents \u2013 gender transition for \"rapidly growing numbers\" of gender-dysphoric youth \u2013 is not supported by the evidence, and asks the field to honor principles of evidence-based medicine.\n17 April\nA new technique for improving the resolution of post-mortem MRI brain scans \"by 64 million times\" is reported by researchers, who capture the sharpest ever images of an entire mouse brain.\nA study expands upon the international Earth heat inventory from 2020, which provides a measure of the Earth energy imbalance (EEI) and allows for quantifying how much and where heat has accumulated in the Earth system with comprehensive data. It suggests that the EEI is the \"most fundamental global climate indicator\" to gauge climate change mitigation efforts.\n18 April\nAstronomers conclude that \"... planets in the habitable zones of stars with low metallicity are the best targets to search for complex life on land.\"\nA university reports a study (29 Mar) affirming the high level of economic losses from biological invasions, showing they have risen to the level of economic damage costs from floods or earthquakes, which are also rising.\n19 April\nA bolide is observed over Ukraine and Belarus for about five seconds. It is first observed at an altitude of 98 km above Velyka Dymerka, then passes directly", "start_char_idx": 1950, "end_char_idx": 3911, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "6b4cd20b-0460-4cf6-838a-987a9ead426b": {"__data__": {"id_": "6b4cd20b-0460-4cf6-838a-987a9ead426b", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69", "node_type": null, "metadata": {}, "hash": "2de6f537170cfe777c202cb5b48c4e8ded65a0f307dfeaad4c63b5545d5068b1"}, "3": {"node_id": "23688f93-fc51-495e-ae70-c4b79a00d153", "node_type": null, "metadata": {}, "hash": "f3037ba4ac0936be6f7692f84daacfb85cc65e6d80b93ddcaf2cb642aa3aea76"}}, "hash": "dbed2b759f925de4023bcee94e0ba1cf8298e578a440c312a5788c45ed972083", "text": "an altitude of 98 km above Velyka Dymerka, then passes directly above Kyiv at an altitude of 80 km and continues to the southwest with a speed of 29 km/s. A bright flare occurs at an altitude of 38 km, when the bolide's absolute magnitude reaches approximately \u201318.\nThe likely cause of grey hair is shown to be pigment-making cells losing their ability to mature into melanocytes.\nResearchers show parrots can and enjoy to use a videocalling system.\nA study with mice shows that microplastics pass the blood\u2013brain barrier (BBB), entering and accumulating in the brain, and identifies a key determinant for whether or not they pass the BBB.\n20 April\nA new 29-year record of ice sheet mass in Greenland and Antarctica is published as part of the IMBIE collaboration. It finds that the combined ice loss in these regions has more than tripled since the early 1990s, with 2019 seeing the greatest losses of any year on record. These findings have implications for future sea level rise.\nPaleoneurologists publish the first neuroevolutionary timeline about correlations of changes in the shape of the cerebral cortex and functions, showing \"variability in surface geometry relates to species' ecology and behaviour\" and cognition. It characterizes many of the neuromorphological events in the origin of distinct human intelligence over the past 77 million years.\nA UNICEF report indicates \"public perception of the importance of vaccines for children declined during the COVID-19 pandemic in 52 out of 55 countries studied\" with causal factors including \"growing access to misleading information\". On 26 April, news outlets report that Twitter is warned by EU digital policy-makers after a report indicated its recent policies \"boost\" Russian disinformation-based propaganda. On 17 April, Twitter introduces labels for rationales when tweets are made less visible which previously were semi-censored without any explanation. On 5 April, the first review of interventions against false conspiracy beliefs, with interventions \"that fostered an analytical mindset or taught critical", "start_char_idx": 3916, "end_char_idx": 5990, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "23688f93-fc51-495e-ae70-c4b79a00d153": {"__data__": {"id_": "23688f93-fc51-495e-ae70-c4b79a00d153", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "6b4cd20b-0460-4cf6-838a-987a9ead426b", "node_type": null, "metadata": {}, "hash": "dbed2b759f925de4023bcee94e0ba1cf8298e578a440c312a5788c45ed972083"}, "3": {"node_id": "212ada4e-157a-4241-b453-30f8e5f8f3e1", "node_type": null, "metadata": {}, "hash": "97a7ab4238b6213dabf3e04318e0478313f1f66b83155259b57c343f679c83e2"}}, "hash": "f3037ba4ac0936be6f7692f84daacfb85cc65e6d80b93ddcaf2cb642aa3aea76", "text": "with interventions \"that fostered an analytical mindset or taught critical thinking skills\" being most effective and preventive action being important.\n21 April \u2013 Researchers report the development of neuromorphic AI hardware using nanowires (see also 2020-04-20) physically mimicking the brain's activity in identifying and remembering an image from memory. On 26 April, a university reports on a demonstration (11 Mar) of multisensory motion cue integration by a neuromorphic nerve for robots.24 April\nAstronomers release close-up global images, for the first time, of the Martian moon Deimos that were taken by the Mars Hope orbiter.\nThe first review of issues identified in meta-science of metascience is published, providing an overview of ten \"questionable\" practices (QMPs) in the field such as \"overplaying the role of replication in science\" and preregistration potential.\nA policy study identifies reduction of car travel activity as the most important transportation policy option in reducing GHG emissions to levels comparable to carbon budget levels, with a \"decrease car distance driven and car ownership by over 80% as compared to current levels\" by 2027 being effective in \"edging close to the designated carbon budget\" in their case-study of London and electrification being highly insufficient. On 20 April, an international study indicates that the contemporary domestic policy-proposal of a general speed limit on highways in Germany, the only large country in the world without such, for a quick GHG emissions reduction would also be economically beneficial. It points to a climate change mitigation law (KSG) that mandated emission reductions in this sector that was changed in 2023 so as to remove these obligations.\n25 April\nScientists, based on new evidence, conclude that Rosalind Franklin was a contributor and \"equal player\" in the discovery process of DNA, rather than otherwise, as may have been presented subsequently after the time of the discovery.\nThe first gene silencing approach to Alzheimer's disease is reported, with a drug called BIIB080 used on the microtubule-associated protein tau (MAPT) gene. Patients in a Phase 1 trial were", "start_char_idx": 5979, "end_char_idx": 8150, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "212ada4e-157a-4241-b453-30f8e5f8f3e1": {"__data__": {"id_": "212ada4e-157a-4241-b453-30f8e5f8f3e1", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "23688f93-fc51-495e-ae70-c4b79a00d153", "node_type": null, "metadata": {}, "hash": "f3037ba4ac0936be6f7692f84daacfb85cc65e6d80b93ddcaf2cb642aa3aea76"}, "3": {"node_id": "71898eef-f64f-4d47-a677-343a227d9524", "node_type": null, "metadata": {}, "hash": "e48fadeb031fcf69647b29b0577ec12c02f472001d1dfc3853c40b0a29da708c"}}, "hash": "97a7ab4238b6213dabf3e04318e0478313f1f66b83155259b57c343f679c83e2", "text": "protein tau (MAPT) gene. Patients in a Phase 1 trial were found to have a greater than 50% reduction in levels of harmful tau protein after taking the drug.\n26 April\nAstronomers present an image, for the first time viewed together, of the shadow of the black hole in the center of the Messier 87 galaxy, and its related high-energy jet.\nThe first-ever global assessment of glacier mass loss from satellite radar altimetry is published. It shows that glaciers lost 2,720 gigatonnes of ice, about 2% of their volume, between 2010 and 2020.\n28 April \u2013 Progress in AI software:\nChatGPT is shown to outperform human doctors in responding to online medical questions when measured on quality and empathy by \"a team of licensed health care professionals\", albeit the chatbot may have previously been trained with these reddit question and answers threads.\nFurther LLM developments during what has been called an \"AI boom\" include: local or open source versions of LLaMA which was leaked in March, news outlets report on GPT4-based Auto-GPT that given natural language commands uses the Internet and other tools in attempts to understand and achieve its tasks with unclear or so-far little practicality, a systematic evaluation of answers from four \"generative search engines\" suggests their outputs \"appear informative, but frequently contain unsupported statements and inaccurate citations\", a multi-modal open source tool for understanding and generating speech, a data scientist argues that \"researchers need to collaborate to develop open-source LLMs that are transparent\" and independent, Stability AI launches an open source LLM.\nOn 12 April, researchers demonstrate an 'AI scientist' that can create of models of natural phenomena from knowledge axioms and experimental data, showing the software can rediscover physical laws using logical reasoning and few data points.\nPromising results of therapeutic candidates are reported: a review suggests daily vitamin D3 may reduce cancer mortality by around 12% (31 Mar), review of experimental phototherapies against dementia cognitive", "start_char_idx": 8169, "end_char_idx": 10249, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "71898eef-f64f-4d47-a677-343a227d9524": {"__data__": {"id_": "71898eef-f64f-4d47-a677-343a227d9524", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "212ada4e-157a-4241-b453-30f8e5f8f3e1", "node_type": null, "metadata": {}, "hash": "97a7ab4238b6213dabf3e04318e0478313f1f66b83155259b57c343f679c83e2"}, "3": {"node_id": "dc262176-f882-4b19-9a83-c9e81d77d3b8", "node_type": null, "metadata": {}, "hash": "acb70087e4cc902de303a48e0672c1ea09ae704635bbc8a71f602857cf1c5054"}}, "hash": "e48fadeb031fcf69647b29b0577ec12c02f472001d1dfc3853c40b0a29da708c", "text": "12% (31 Mar), review of experimental phototherapies against dementia cognitive decline (5 Apr), mice-tested L. reuteri-and-tryptophan-diet for checkpoint inhibitor potentiation (6 Apr), doxycycline post-exposure prophylaxis against STIs (6 Apr), an engineered probiotic against alcohol-induced damage (11 Apr), phase 2 trialed AXA1125 against long COVID fatigue (14 Apr), review finds cranberry products useful against UTIs in women (17 Apr), and macaques-tested low-intensity focus ultrasound delivery of AAV into brain regions against brain diseases (19 Apr). Progress in screening: an \u03b1-synuclein SAA (assay) against Parkinson's disease (12 Apr), and exogenously administered bioengineered sensors that amplify urinary cancer biomarkers for detection (24 Apr).\nPromising innovations relating to global challenges are reported: a laser-using drone-based methane plume localization method, approval of the first yeast-based cow-free dairy (Remilk), a Tor browser-equivalent Web browser for privacy-protected browsing when using a VPN (Mullvad browser), a concentrated solar-to-hydrogen device approaching viability, a method for fat tissue cultured meat, flexible organic solar cells on balloons in the 35 km stratosphere.\n\n\n=== May ===\n1 May\nA new brain-reading method for \"semantic decoding\" is demonstrated. The non-invasive system, based on 16 hours of fMRI data per participant and a transformer, is able to translate a person's neural activity into a continuous stream of text.\nNews outlets report the first study (6 Feb) modelling contemporary detectability of human civilization from afar which suggests overall radio-leakage from mobile towers would still be too weak to be detectable with humanity's next-generation radio telescopes from three of Earth's current closest nearby star-systems. Radar systems are not yet included in their model, while radar emissions during \u2013 and possibly since \u2013 the", "start_char_idx": 10228, "end_char_idx": 12137, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "dc262176-f882-4b19-9a83-c9e81d77d3b8": {"__data__": {"id_": "dc262176-f882-4b19-9a83-c9e81d77d3b8", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "71898eef-f64f-4d47-a677-343a227d9524", "node_type": null, "metadata": {}, "hash": "e48fadeb031fcf69647b29b0577ec12c02f472001d1dfc3853c40b0a29da708c"}, "3": {"node_id": "758c1ea9-bb37-4813-b891-e2f90686393f", "node_type": null, "metadata": {}, "hash": "eb15f34b0356f23e89ad5d135a76be4ee32646ceeb2c391167cd074d543244c1"}}, "hash": "acb70087e4cc902de303a48e0672c1ea09ae704635bbc8a71f602857cf1c5054", "text": "not yet included in their model, while radar emissions during \u2013 and possibly since \u2013 the Cold War are thought to be the first most detectable cue by which hypothetical extraterrestrials could detect humanity.\nThe second study, after one from early 2022 with similar results, about EEG data of dying humans finds a surge of gamma waves and increased functional connectivities in two of four patients. It provides data and analysis about the brain process of dying (terminal loss of sentience and life) and near-death experiences.\n2 May\nA new AI algorithm developed by Baidu is shown to boost the antibody response of COVID-19 mRNA vaccines by 128 times.\nA single-molecule valve is demonstrated, a breakthrough in nanoscale control of fluids.\nScientists report economic factors of neurology or mental health and cognition during child development: association of low income with brain structure and hippocampal volume, stronger associations in U.S. states with higher cost of living, and lower associations for stronger social safety nets for low-income-affected people.3 May\nDrug company Eli Lilly reports that donanemab can slow the pace of Alzheimer's disease by 35%, following a Phase 3 study in human patients.\nAstronomers using the Gemini South telescope report the first direct evidence of an exoplanet being swallowed by an ancient Sun-like star, a fate that likely awaits the Earth in five billion years.\n4 May \u2013 Westinghouse Electric's nuclear division announces the AP300, a miniature version of its signature AP1000 nuclear reactor.\n5 May \u2013 The World Health Organization announces that COVID-19 is no longer considered a global health emergency.\n8 May\nThe first infrared image of an asteroid belt outside our Solar System is captured by the James Webb Space Telescope. Three distinct rings of debris are shown to exist around Fomalhaut, a young star 25 light years away.\nAI successfully identifies people at the highest risk for pancreatic cancer up to three years before diagnosis, using solely the patients' medical records.\n10 May\nA rough draft of the", "start_char_idx": 12134, "end_char_idx": 14198, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "758c1ea9-bb37-4813-b891-e2f90686393f": {"__data__": {"id_": "758c1ea9-bb37-4813-b891-e2f90686393f", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "dc262176-f882-4b19-9a83-c9e81d77d3b8", "node_type": null, "metadata": {}, "hash": "acb70087e4cc902de303a48e0672c1ea09ae704635bbc8a71f602857cf1c5054"}, "3": {"node_id": "7e403166-2350-4ff6-a516-3cbb25356f32", "node_type": null, "metadata": {}, "hash": "405a764ce822658d4e2ceecc26b4b77c75fba0a9827abb24c1ed0a25b7fffcc8"}}, "hash": "eb15f34b0356f23e89ad5d135a76be4ee32646ceeb2c391167cd074d543244c1", "text": "using solely the patients' medical records.\n10 May\nA rough draft of the human \"pan-genome\" is presented, consisting of 47 genomes from a cohort of genetically diverse individuals. This aims to improve medical research by building on the earlier Human Genome Project.\nScientists demonstrate with experimental evolution how macroscopic multicellularity could have emerged on Earth.\n11 May \u2013 The discovery of 62 new moons of Saturn is reported, taking its total confirmed number to 145 and overtaking Jupiter.\n15 May\nThe National Institutes of Health begins a Phase 1 trial of an mRNA-based universal influenza vaccine, enrolling 50 volunteers.\nA study shows most extensively the neuro-molecular mechanics of how a fungal parasite affects behavior of insects.\nA study found that, of 70,000 monitored species, some 48% are experiencing population declines from human activity, whereas only 3% have increasing populations.\nBy publishing virome-related results, researchers close a major gap in the acceleratingly accumulating research into centenarians' microbiome characteristics for life extension.\n16 May \u2013 A software tool called Allegro is reported to accurately simulate 44 million atoms, running on the Perlmutter supercomputer.\n17 May\nScientists report, based on genetic studies, a more complicated pathway of human evolution than previously understood. According to the studies, humans evolved from different places and times in Africa, instead of from a single location and period of time.\nThe newly discovered exoplanet LP 791-18 d is theorised to be covered with volcanoes, due to the extreme gravitational pull of a super-Earth in the same system.\nA study proposes school curricula start including useful basic life support, noting that e.g. complemented video lessons could be effective.\n18 May\nAstronomers map the paths of potentially hazardous asteroids for the next 1,000 years. At least 28 asteroids of 1 km diameter or larger are found to have non-zero probabilities of a 'deep encounter' with", "start_char_idx": 14211, "end_char_idx": 16217, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "7e403166-2350-4ff6-a516-3cbb25356f32": {"__data__": {"id_": "7e403166-2350-4ff6-a516-3cbb25356f32", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "758c1ea9-bb37-4813-b891-e2f90686393f", "node_type": null, "metadata": {}, "hash": "eb15f34b0356f23e89ad5d135a76be4ee32646ceeb2c391167cd074d543244c1"}, "3": {"node_id": "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6", "node_type": null, "metadata": {}, "hash": "2fa01cbd033355cb0cf08eca484e62114ebf2018ac45c855f3cb0bed6c4d5252"}}, "hash": "405a764ce822658d4e2ceecc26b4b77c75fba0a9827abb24c1ed0a25b7fffcc8", "text": "are found to have non-zero probabilities of a 'deep encounter' with Earth.\nA study reports that more than 50% of freshwater lakes and reservoirs lost volume since 1992 due to human and climatic drivers.\n19 May\nA policies study review, based on a systematic examination of existing methane policies across sectors, concludes that both only \"about 13% of methane emissions are covered by methane mitigation policies\" and that the effectiveness of these policies \"is far from clear\".\nResearchers propose a methodological approach and quantifications for reparations from fossil fuel producers. Cross-disciplinary researchers propose academics make universities implement, leadingly, the plant-based dietary transition that an increasingly large and confirmed corpus of studies, to which these contributed to, concludes is vital (7 May).\nMetascience-related events\nNature reports China has \"overtaken the United States as the number one ranked country or territory for contributions to research articles published in the Nature Index group of high-quality natural-science journals\", remaining at second place overall. The Nature Index, since 2016, evaluates contribution by the number of articles published in a subgroup of their journals \u2013 other potential or less popular approaches and metrics for quantifications of success or impact can or could produce different rankings or annual tables and conclusions.\n34% of neuroscience papers and 23% of medical papers published in 2020 were probably fabricated or plagiarized, according to a preprint study, stemming from paper mills (9 May).\nA time-use research study (10 May) estimates the costs of manuscript (re)formatting to fit journal guidelines, ~$230 million or ~75 million hours of lost academics' time in 2021. As researchers, usually with little time, usually conduct these tasks themselves and manually and the, largely cosmetic, unstandardized changes are required before, not after, the paper is accepted for publication, the study proposes journals start allowing \"free-format submissions\".\nA study (25 May) highlights a list of problematic persuasive", "start_char_idx": 16222, "end_char_idx": 18331, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6": {"__data__": {"id_": "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "7e403166-2350-4ff6-a516-3cbb25356f32", "node_type": null, "metadata": {}, "hash": "405a764ce822658d4e2ceecc26b4b77c75fba0a9827abb24c1ed0a25b7fffcc8"}, "3": {"node_id": "66f24e08-79cb-4559-8d91-44ab4270add3", "node_type": null, "metadata": {}, "hash": "245d161980c9962b654c1051f50b83b9a21ca0fbdaab03df36408ac1e6dc4a5b"}}, "hash": "2fa01cbd033355cb0cf08eca484e62114ebf2018ac45c855f3cb0bed6c4d5252", "text": "submissions\".\nA study (25 May) highlights a list of problematic persuasive methods in academic articles, such as exaggerating the importance of the work or insufficient contextualization by \"Not citing previous work that decreases the perceived novelty of the current work\".21 May \u2013 IBM announces that it will begin development of a 100,000-qubit quantum computer, the world's largest and most powerful, to be completed by 2033.\n22 May \u2013 A study quantifies \"the human cost of global warming\", showing current policies \"leading to around 2.7\u2009\u00b0C global warming could by 2080\u20132100 leave one-third (22\u201339%)\" of people outside their climate niche (see also 4 May 2020) \u2013 humans' long-time range of mean annual temperatures to which their physiology may have largely adapted to. It indicates meeting the 1.5\u2009\u00b0C goal would decrease the population exposed to unprecedented heat\u2009~5-fold and ties itself to earlier research by initially noting that quantifying the social cost of carbon in monetary terms, as related or economics studies tend to do, may be inadequate.\n23 May\nUsing the Hubble Space Telescope and Gaia spacecraft, an analysis of proper motions of the closest known globular cluster, Messier 4, reveals an excess mass of roughly 800 solar masses in the center. This appears to not be extended, and could thus be the best kinematic evidence for an intermediate-mass black hole (even if an unusually compact cluster of compact objects, white dwarfs, neutron stars or stellar-mass black holes cannot be completely discounted).\nThe first survey study of academics' perception of the topic of \"unidentified aerial phenomena\" \u2013 also called UAP and one type of \"anomalous phenomena\" \u2013 suggests few academics are knowledgeable about the existing academic literature on UAP, such as studies by Knuth, Nolan and Loeb or of The Galileo Project. The majority was not or only slightly aware of the 2021 US Intelligence UFO Report. Results indicate", "start_char_idx": 18324, "end_char_idx": 20263, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "66f24e08-79cb-4559-8d91-44ab4270add3": {"__data__": {"id_": "66f24e08-79cb-4559-8d91-44ab4270add3", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6", "node_type": null, "metadata": {}, "hash": "2fa01cbd033355cb0cf08eca484e62114ebf2018ac45c855f3cb0bed6c4d5252"}, "3": {"node_id": "49d408fd-5bb2-4c41-b807-60daf4b1531a", "node_type": null, "metadata": {}, "hash": "85975279279946ded84af9db8e61c024842ced9b9bfa5464ed571c6506e193b4"}}, "hash": "245d161980c9962b654c1051f50b83b9a21ca0fbdaab03df36408ac1e6dc4a5b", "text": "not or only slightly aware of the 2021 US Intelligence UFO Report. Results indicate \"more academic research about UAP\" is rated by the respondent population (~4% of ~40,000 contacted) most commonly as \"Of Average Importance\", below and before \"Very Important\".\nA study identifies plastic chopping boards as a substantial source of ingested microplastics. Researchers show plastic recycling facilities are a major source of microplastic water pollution (1 May).\nComputational neuroscientists show that people with higher intelligence scores in HCP cognitive tests took more time to solve difficult problems and that their higher synchrony between brain areas allowed for better integration of evidence (or progress) from preceding working memory sub-problem processing. Reducing synchrony in \"avatar\" simulations, that were adjusted and tuned towards personalization, \"led decision-making circuits to quickly jump to conclusions\". Their codified results may be useful for an understanding of cognition to replicate or imitate in bio-inspired computing.\nResearchers report trends in reasons for HPV vaccine hesitancy during 2010\u201320.\n24 May\nScientists show how gene 'FAAH'-related disruption via genetic or epigenome editing can enable pain insensitivity (see also 10 March 2021). Their analyses, mainly about long non-coding RNA 'FAAH-OUT', following from decade-long study of a woman who can't feel pain or anxiety, could also enable novel therapeutic developments against other neurological problems.\nOne of the first empirical studies on what real users are shown during their typical use of popular Web search engines interprets its results to show that choices for unreliable news sources for their queries are driven primarily by users' own choices and less by the engine's algorithms. The Web scientists link their findings to the concept of filter bubbles which emphasizes the role of design- and personalization algorithms. On 2 May, a report accompanied by an open letter concludes that Alphabet Inc, against its voluntary promises, still runs climate misinformation ads. Statements by Elon Musk in 2022 suggest YouTube", "start_char_idx": 20259, "end_char_idx": 22386, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "49d408fd-5bb2-4c41-b807-60daf4b1531a": {"__data__": {"id_": "49d408fd-5bb2-4c41-b807-60daf4b1531a", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "66f24e08-79cb-4559-8d91-44ab4270add3", "node_type": null, "metadata": {}, "hash": "245d161980c9962b654c1051f50b83b9a21ca0fbdaab03df36408ac1e6dc4a5b"}, "3": {"node_id": "fc9299ee-ee60-4abc-8e2f-45295f64a7aa", "node_type": null, "metadata": {}, "hash": "53ce5fb0c66d0b346ac988da8f3e954fac4af17c985b820c97514e6abf37b37f"}}, "hash": "85975279279946ded84af9db8e61c024842ced9b9bfa5464ed571c6506e193b4", "text": "still runs climate misinformation ads. Statements by Elon Musk in 2022 suggest YouTube may also show ethically disputed advertising other than science-related misinformation such as extensively showing \"scam ads\".\n25 May\n5,000 marine species new to science are discovered in the Clipperton Fracture Zone, a proposed deep sea mining hotspot in the Pacific Ocean.\nAI is used to develop an experimental antibiotic called abaucin, which is shown to be effective against A. baumannii.\nEvidence for the existence of a second Kuiper Belt is presented by NASA scientists, which the New Horizons spacecraft could potentially visit during the late 2020s or early 2030s.\nA study reports observational evidence for problematic fast slowdown of the Antarctic bottom water current.\nNeuroengineers demonstrate induction of a torpor-like state in mice via ultrasound stimulation.\n29 May\nA new record high efficiency of 19.3% for organic solar cells is reported.\nMBR Explorer is announced by the United Arab Emirates Space Agency, an uncrewed mission to explore seven asteroids, which includes an attempted surface landing on 269 Justitia in 2034.\nScientists provide details of H5N1 bird flu's fast viral evolution of clade 2.3.4.4b including reassortment after \"explosive geographic expansion in 2021 among wild birds\", with relevance to measures such as existing candidate vaccines.\n31 May\nThe first X-ray of a single atom is reported.\nAn international study, using modelling and literature assessment, codifies, integrates into and quantifies \"safe and just Earth system boundaries\" (ESBs) with the context of Earth system stability and minimization of human harm. They expand upon earlier boundary frameworks by incorporating concepts such as intra- and intergenerational justice, propose that their framework may better enable a quantitative foundation for safeguarding the global commons, and report many of the ESBs are already exceeded.\nHealthcare systems related results are published: large increases in medication prices via sustained decrease in their use can cause poorer disease control (8 May),", "start_char_idx": 22383, "end_char_idx": 24475, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "fc9299ee-ee60-4abc-8e2f-45295f64a7aa": {"__data__": {"id_": "fc9299ee-ee60-4abc-8e2f-45295f64a7aa", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "49d408fd-5bb2-4c41-b807-60daf4b1531a", "node_type": null, "metadata": {}, "hash": "85975279279946ded84af9db8e61c024842ced9b9bfa5464ed571c6506e193b4"}, "3": {"node_id": "bdf2433e-f430-4550-a566-bf5624374b70", "node_type": null, "metadata": {}, "hash": "9b8169681ccd4608c0a72477730c40122550754dacb5932a9af709a15a523133"}}, "hash": "53ce5fb0c66d0b346ac988da8f3e954fac4af17c985b820c97514e6abf37b37f", "text": "decrease in their use can cause poorer disease control (8 May), widespread implementation of the particular Alzheimer's disease therapeutic solution lecanemab may increase annual U.S. Medicare spending by $2.0 to $5.1 billion (11 May), mailed HPV self-collection kits with scheduling assistance can lead to greater uptake of cervical cancer screening (11 May), cost-related medication nonadherence occurs in approximately 1 in 5 older adults in the U.S. in 2022 (18 May), and a QALY-based health economics study evaluates the cost-effectiveness of U.S. population-wide screening for CKD (23 May).\nResearch on potential current public risk sources is published: the common DBP and BPAF appear to have \"substantial impact on the integrity of the sperm nucleus and DNA structure\" in mice via oxidative stress (5 May), a preliminary study contextualizes \"time spent on social media\" as one of the \"least influential factors in adolescent mental health\" (8 May), ubiquitous environmental contaminant TCE appears to be a risk factor for Parkinson's disease (PD) (15 May), various pesticides are identified as potential risk factors for PD (16 May), researchers demonstrate a two zero-day vulnerabilities-based quick low-cost method \u2013 \"BrutePrint\" \u2013 for bypassing common smartphones' fingerprint authentication (18 May), and common sucralose impurity sucralose-6-acetate appears to be DNA damaging with sucralose-sweetened drinks potentially far exceeding the threshold of toxicological concern (29 May).\nPromising innovations relating to global challenges are demonstrated: an open source automated experimentation science platform (BacterAI) for predicting microbial metabolism with little data (4 May), a pesticide alternative against wheat seed loss (22 May), a low-cost smartphone-attachment (BPClip) for blood pressure measurement (29 May), an open source transfer learning-based system", "start_char_idx": 24497, "end_char_idx": 26382, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "bdf2433e-f430-4550-a566-bf5624374b70": {"__data__": {"id_": "bdf2433e-f430-4550-a566-bf5624374b70", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "fc9299ee-ee60-4abc-8e2f-45295f64a7aa", "node_type": null, "metadata": {}, "hash": "53ce5fb0c66d0b346ac988da8f3e954fac4af17c985b820c97514e6abf37b37f"}, "3": {"node_id": "5fcd0003-fce2-49bc-a004-75e7ba06a7ad", "node_type": null, "metadata": {}, "hash": "077ef8fce032bfceca02aa0332bce50087a91ee6f22b166dc4359c95a14f638c"}}, "hash": "9b8169681ccd4608c0a72477730c40122550754dacb5932a9af709a15a523133", "text": "for blood pressure measurement (29 May), an open source transfer learning-based system (Geneformer) for predicting how networks of interconnected human genes control or affect the function of cells (31 May).\nPromising results of therapeutic candidates are reported: phase\u2009I trialed ultrasound BBB-opening device against brain cancer (1 May), phase I trialed personalized mRNA vaccine against pancreatic cancer recurrence (10 May), a novel antibiotic (Streptothricin F) against ABR bacteria (9 May), an e-skin for prosthetic sensing (18 May), two-dose JYNNEOS vaccine against mpox appears ~86% (19 May) or ~66% (18 May) effective, and a xenografted mice-tested pan-KRAS-inhibitor against cancer (31 May).\n\n\n=== June ===\n1 June \u2013 Caltech reports the first successful beaming of solar energy from space down to a receiver on the ground, via the MAPLE instrument on its SSPD-1 spacecraft, launched into orbit in January.\n2 June \u2013 Physicist Lucas Lombriser reports an alternative way of interpreting the available scientific data which suggests that the notion of an expanding universe may be more a \"mirage\" than otherwise.\n5 June\nScientists report evidence that Homo naledi, an extinct species of archaic human discovered in 2013 in South Africa, and living as long as 500,000 years ago, buried their dead, created art in their caves and used fire.\nA 'chef' robot developed at the University of Cambridge is trained to watch and learn from cooking videos, and recreate dishes itself.\n6 June \u2013 A study in Nature finds that the first complete disappearance of Arctic sea ice could occur during the 2030s, a decade earlier than previously forecast.\n8 June\nUS scientists confirm that the next El Ni\u00f1o has begun, likely resulting in higher global temperatures in late 2023 and into 2024.\nTaurine given to a range of animal species is found to boost health and extend lifespan by up to 12%.\n14 June\nResearchers at the University of Cambridge and", "start_char_idx": 26361, "end_char_idx": 28297, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "5fcd0003-fce2-49bc-a004-75e7ba06a7ad": {"__data__": {"id_": "5fcd0003-fce2-49bc-a004-75e7ba06a7ad", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "bdf2433e-f430-4550-a566-bf5624374b70", "node_type": null, "metadata": {}, "hash": "9b8169681ccd4608c0a72477730c40122550754dacb5932a9af709a15a523133"}, "3": {"node_id": "08659ce9-39e2-48ca-816e-33f5af331d37", "node_type": null, "metadata": {}, "hash": "0f85282e87b66185eddf746374f76e7de35e60c4b2f29050137e6a81cc7877e5"}}, "hash": "077ef8fce032bfceca02aa0332bce50087a91ee6f22b166dc4359c95a14f638c", "text": "lifespan by up to 12%.\n14 June\nResearchers at the University of Cambridge and Caltech report the creation of the first synthetic human embryo from stem cells, without the need for sperm or egg cells.\nScientists report evidence that the planet Earth may have formed in just three million years, much faster than the 100 million years thought earlier.\nAstronomers report that the presence of phosphates on Enceladus, moon of the planet Saturn, has been detected, completing the discovery of all the basic chemical ingredients for life on the moon.\nIBM computer scientists report that a quantum computer produced better results for a physics problem than a conventional supercomputer.\nA machine learning model developed at the University of Edinburgh is trained to recognise the key features of chemicals with senolytic activity. It finds three chemicals \u2013 ginkgetin, periplocin and oleandrin \u2013 able to remove senescent cells without damaging healthy cells.\n17 June \u2013 Researchers report that a single gas-stove burner can raise the indoor concentrations of benzene, related to cancer risk, to more than that found in secondhand tobacco smoke.\n21 June \u2013 The first successful transplant of a functional cryopreserved mammalian kidney is reported by the University of Minnesota.\n22 June \u2013 A study in The Lancet predicts that by 2050, the worldwide number of adults with diabetes will more than double, from 529 million to over 1.3 billion. No country is expected to see a decline.\n26 June\nRetatrutide, an experimental drug for obesity, is shown to achieve a more than 24% mean weight reduction in human adults during a Phase 2 trial.\nAstronomers detect, for the first time, methenium, CH3+ (and/or carbon cation, C+), basic ingredients of life as we know it, in interstellar space.\n28 June \u2013 Astronomers report the possible detection of a gravitational wave background (GWB) in the Universe.\n29 June \u2013 Astronomers report using a new technique to detect, for the first time, the release of neutrinos from  the galactic plane of the Milky Way galaxy.\n\n\n=== July ===\n1", "start_char_idx": 28307, "end_char_idx": 30366, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "08659ce9-39e2-48ca-816e-33f5af331d37": {"__data__": {"id_": "08659ce9-39e2-48ca-816e-33f5af331d37", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "5fcd0003-fce2-49bc-a004-75e7ba06a7ad", "node_type": null, "metadata": {}, "hash": "077ef8fce032bfceca02aa0332bce50087a91ee6f22b166dc4359c95a14f638c"}, "3": {"node_id": "778fc2f5-0312-4503-85da-8cb294597451", "node_type": null, "metadata": {}, "hash": "7fd46180ce995de60ba53f3363ce55baa003d9f1dfaff370c3ffc2edaffaeca6"}}, "hash": "0f85282e87b66185eddf746374f76e7de35e60c4b2f29050137e6a81cc7877e5", "text": " the galactic plane of the Milky Way galaxy.\n\n\n=== July ===\n1 July \u2013 The ESA space telescope Euclid is launched, beginning a six-year mission to study dark energy and dark matter.\n5 July \u2013 Harvard astronomer Avi Loeb reports the possibility of finding interstellar material.\n10 July\nDynamic shell formation is demonstrated experimentally for the first time at the University of Rochester's Laboratory for Laser Energetics. Researchers claim their technique is a feasible target for mass production of fusion energy.\nThe highest albedo ever measured for an exoplanet is confirmed using data from the CHEOPS space telescope. The ultra-hot Neptune LTT 9779 b is shown to reflect 80% of incoming light from its star (compared to 75% for Venus), due to the high metal content of its clouds.\n11 July \u2013 Berkeley Earth reports that June 2023 was the warmest June since records began in 1850, and broke the previous record by 0.18 \u00b0C. Its temperature dataset suggests that 2023 is now 81% likely to become a new record year for global warming.\n12 July \u2013 Astronomers report considerable success of the James Webb Space Telescope (JWST) after its first year of operations.\n\n\n== Predicted and scheduled events ==\n\nSearch for extraterrestrial intelligence (SETI) and ufology\nFirst major observational campaign of the SETI project COSMIC.\nExpected public release date of the first study by NASA on UAP in mid-2023.\nExpected public first release of results from the international UFO investigation project The Galileo Project led by astronomer Avi Loeb.\nRocket Lab's Venus probe is scheduled to be launched and to arrive on Venus in October, partly to search for signs of life on Venus.\nExpected start of the Vera Rubin Observatory, the Qitai Radio Telescope, the European Spallation Source and the Jiangmen Underground Neutrino Observatory.\nNature has listed 11 clinical trials to watch in 2023. Results of the Participatory Evaluation (of) Aging", "start_char_idx": 30382, "end_char_idx": 32314, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "778fc2f5-0312-4503-85da-8cb294597451": {"__data__": {"id_": "778fc2f5-0312-4503-85da-8cb294597451", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "08659ce9-39e2-48ca-816e-33f5af331d37", "node_type": null, "metadata": {}, "hash": "0f85282e87b66185eddf746374f76e7de35e60c4b2f29050137e6a81cc7877e5"}}, "hash": "7fd46180ce995de60ba53f3363ce55baa003d9f1dfaff370c3ffc2edaffaeca6", "text": "to watch in 2023. Results of the Participatory Evaluation (of) Aging (With) Rapamycin (for) Longevity Study (PEARL) clinical trial investigating a life extension intervention are expected to be released.\nScience-related budgets\n US: several fields, research topics and agencies are provided with increased budgets, including the new Advanced Research Projects Agency for Health (ARPA-H). Various changes to the budgets of US institutions like NASA, FDA, EPA and NIH have been described.\n UK: \n\n\n=== Astronomical events ===\n\n\n== Awards ==\n\n\n== Deaths ==\n\n\n== See also ==\n\nCategory:Science events\nCategory:Science timelines\nList of emerging technologies\nList of years in science\n\n\n== References ==\n\n\n== External links ==\n Media related to 2023 in science at Wikimedia Commons", "start_char_idx": 32297, "end_char_idx": 33070, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}}, "docstore/ref_doc_info": {"914e16a9-7a9f-4e06-ae6c-35e1e3296832": {"node_ids": ["181df629-dabe-499f-b9e4-5517c1106e22", "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69", "6b4cd20b-0460-4cf6-838a-987a9ead426b", "23688f93-fc51-495e-ae70-c4b79a00d153", "212ada4e-157a-4241-b453-30f8e5f8f3e1", "71898eef-f64f-4d47-a677-343a227d9524", "dc262176-f882-4b19-9a83-c9e81d77d3b8", "758c1ea9-bb37-4813-b891-e2f90686393f", "7e403166-2350-4ff6-a516-3cbb25356f32", "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6", "66f24e08-79cb-4559-8d91-44ab4270add3", "49d408fd-5bb2-4c41-b807-60daf4b1531a", "fc9299ee-ee60-4abc-8e2f-45295f64a7aa", "bdf2433e-f430-4550-a566-bf5624374b70", "5fcd0003-fce2-49bc-a004-75e7ba06a7ad", "08659ce9-39e2-48ca-816e-33f5af331d37", "778fc2f5-0312-4503-85da-8cb294597451"], "metadata": {}}}}


--------------------------------------------------------------------------------