├── .gitignore ├── LICENSE ├── Neo4j_and_LangChain_for_Enhanced_Question_Answering.ipynb ├── RAG_with_Graph_Database.ipynb └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Mohd Kaif 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Neo4j_and_LangChain_for_Enhanced_Question_Answering.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "## Integrating Unstructured and Graph Knowledge with Neo4j and LangChain for Enhanced Question Answering" 21 | ], 22 | "metadata": { 23 | "id": "LXzvg_0MUzhA" 24 | } 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "source": [ 29 | "\n", 30 | "\n", 31 | "#### Installing Dependencies" 32 | ], 33 | "metadata": { 34 | "id": "b0AVqZ9XVC9Z" 35 | } 36 | }, 37 | { 38 | "cell_type": "code", 39 | "source": [ 40 | "! pip install -qU \\\n", 41 | " transformers \\\n", 42 | " datasets \\\n", 43 | " langchain \\\n", 44 | " openai \\\n", 45 | " wikipedia \\\n", 46 | " tiktoken \\\n", 47 | " neo4j \\\n", 48 | " python-dotenv" 49 | ], 50 | "metadata": { 51 | "colab": { 52 | "base_uri": "https://localhost:8080/" 53 | }, 54 | "id": "F52G_upjVIGt", 55 | "outputId": "9e26d2c5-6294-4c3f-de4f-5a51e8961003" 56 | }, 57 | "execution_count": 1, 58 | "outputs": [ 59 | { 60 | "output_type": "stream", 61 | "name": "stdout", 62 | "text": [ 63 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 64 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m493.7/493.7 kB\u001b[0m \u001b[31m27.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 65 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m86.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 66 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.0/77.0 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 67 | "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 68 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m90.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 69 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m192.4/192.4 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 70 | "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", 71 | " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", 72 | " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", 73 | " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", 74 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m29.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 75 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m96.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 76 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m77.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 77 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 78 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 79 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.3/43.3 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 80 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 81 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 82 | "\u001b[?25h Building wheel for wikipedia (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 83 | " Building wheel for neo4j (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", 84 | "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", 85 | "llmx 0.0.15a0 requires cohere, which is not installed.\u001b[0m\u001b[31m\n", 86 | "\u001b[0m" 87 | ] 88 | } 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "source": [ 94 | "#### Importing Packanges" 95 | ], 96 | "metadata": { 97 | "id": "B_5pjB2WX_DZ" 98 | } 99 | }, 100 | { 101 | "cell_type": "code", 102 | "source": [ 103 | "import os\n", 104 | "import re\n", 105 | "from langchain.vectorstores.neo4j_vector import Neo4jVector\n", 106 | "from langchain.document_loaders import WikipediaLoader\n", 107 | "from langchain.embeddings.openai import OpenAIEmbeddings\n", 108 | "from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter\n", 109 | "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n", 110 | "from dotenv import load_dotenv" 111 | ], 112 | "metadata": { 113 | "id": "R-h-iIDmYFGh" 114 | }, 115 | "execution_count": 26, 116 | "outputs": [] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "source": [ 121 | "#### Setting API's in Environment Variable[link text](https://)" 122 | ], 123 | "metadata": { 124 | "id": "_JY_gy3BqptG" 125 | } 126 | }, 127 | { 128 | "cell_type": "code", 129 | "source": [ 130 | "load_dotenv()\n", 131 | "os.environ[\"OPENAI_API_KEY\"] = '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'\n", 132 | "os.environ[\"NEO4J_URI\"] = 'neo4j+s://817ac93a.databases.neo4j.io'\n", 133 | "os.environ[\"NEO4J_USERNAME\"] = 'neo4j'\n", 134 | "os.environ[\"NEO4J_PASSWORD\"] = 'CN1zhoj9bQwUc4JpfRk6hufP9Muojw_bTmaYMcxJXg4'" 135 | ], 136 | "metadata": { 137 | "id": "BIHTHxNtYGRN" 138 | }, 139 | "execution_count": 27, 140 | "outputs": [] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "source": [ 145 | "print(os.getenv('OPENAI_API_KEY'))\n", 146 | "print(os.getenv(\"NEO4J_URI\"))\n", 147 | "print(os.getenv(\"NEO4J_USERNAME\"))\n", 148 | "print(os.getenv('NEO4J_PASSWORD'))" 149 | ], 150 | "metadata": { 151 | "colab": { 152 | "base_uri": "https://localhost:8080/" 153 | }, 154 | "id": "lhhy1qO5orHa", 155 | "outputId": "a99bdcfc-56e3-43c1-8926-562af2f19e44" 156 | }, 157 | "execution_count": 28, 158 | "outputs": [ 159 | { 160 | "output_type": "stream", 161 | "name": "stdout", 162 | "text": [ 163 | "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\n", 164 | "neo4j+s://817ac93a.databases.neo4j.io\n", 165 | "neo4j\n", 166 | "CN1zhoj9bQwUc4JpfRk6hufP9Muojw_bTmaYMcxJXg4\n" 167 | ] 168 | } 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "source": [ 174 | "#### Data Preprocessing" 175 | ], 176 | "metadata": { 177 | "id": "QiTvOmA8rgeZ" 178 | } 179 | }, 180 | { 181 | "cell_type": "code", 182 | "source": [ 183 | "from transformers import AutoTokenizer\n", 184 | "\n", 185 | "# Define the tokenizer using \"bert-base-uncased\"\n", 186 | "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", 187 | "\n", 188 | "# Function to calculate the number of tokens in a text\n", 189 | "def bert_len(text):\n", 190 | " tokens = tokenizer.encode(text)\n", 191 | " return len(tokens)\n", 192 | "\n", 193 | "# Example usage\n", 194 | "input_text = \"This is a sample sentence for tokenization.\"\n", 195 | "num_tokens = bert_len(input_text)\n", 196 | "print(f\"Number of tokens: {num_tokens}\")" 197 | ], 198 | "metadata": { 199 | "id": "lUF9fXmvYO1h", 200 | "colab": { 201 | "base_uri": "https://localhost:8080/" 202 | }, 203 | "outputId": "a5b6c25c-5672-455d-e785-4c20148aa013" 204 | }, 205 | "execution_count": 29, 206 | "outputs": [ 207 | { 208 | "output_type": "stream", 209 | "name": "stdout", 210 | "text": [ 211 | "Number of tokens: 11\n" 212 | ] 213 | } 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "source": [ 219 | "# Load Wikipedia articles related to \"Leonhard Euler\"\n", 220 | "raw_documents = WikipediaLoader(query=\"Leonhard Euler\").load()\n", 221 | "\n", 222 | "# Define a text splitter with specific parameters\n", 223 | "text_splitter = RecursiveCharacterTextSplitter(\n", 224 | " chunk_size=200, chunk_overlap=20, length_function=bert_len, separators=['\\n\\n', '\\n', ' ', '']\n", 225 | ")\n", 226 | "\n", 227 | "# Split the content of the first Wikipedia article into smaller documents\n", 228 | "documents = text_splitter.create_documents([raw_documents[0].page_content])" 229 | ], 230 | "metadata": { 231 | "id": "YQqG0o7KbmqZ", 232 | "colab": { 233 | "base_uri": "https://localhost:8080/" 234 | }, 235 | "outputId": "7b8e48d5-df80-4d1a-d192-11ce439fc62a" 236 | }, 237 | "execution_count": 30, 238 | "outputs": [ 239 | { 240 | "output_type": "stream", 241 | "name": "stderr", 242 | "text": [ 243 | "Token indices sequence length is longer than the specified maximum sequence length for this model (736 > 512). Running this sequence through the model will result in indexing errors\n" 244 | ] 245 | } 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "source": [ 251 | "print(len(documents))" 252 | ], 253 | "metadata": { 254 | "id": "c68Duv2Nbqqk", 255 | "colab": { 256 | "base_uri": "https://localhost:8080/" 257 | }, 258 | "outputId": "4510be6f-af0c-4c08-e814-da06bf75f7f2" 259 | }, 260 | "execution_count": 31, 261 | "outputs": [ 262 | { 263 | "output_type": "stream", 264 | "name": "stdout", 265 | "text": [ 266 | "18\n" 267 | ] 268 | } 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "source": [ 274 | "#### Initializing Graph Database Neo4j [link text](https://)" 275 | ], 276 | "metadata": { 277 | "id": "X043ugczr0X5" 278 | } 279 | }, 280 | { 281 | "cell_type": "code", 282 | "source": [ 283 | "# Instantiate Neo4j vector from documents\n", 284 | "neo4j_vector = Neo4jVector.from_documents(\n", 285 | " documents,\n", 286 | " OpenAIEmbeddings(),\n", 287 | " url=os.environ[\"NEO4J_URI\"],\n", 288 | " username=os.environ[\"NEO4J_USERNAME\"],\n", 289 | " password=os.environ[\"NEO4J_PASSWORD\"]\n", 290 | ")" 291 | ], 292 | "metadata": { 293 | "id": "RSHWwlbJcCi2" 294 | }, 295 | "execution_count": 32, 296 | "outputs": [] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "source": [ 301 | "#### Peroforming Similarity Search on Ingested Documents" 302 | ], 303 | "metadata": { 304 | "id": "34Fm9UgHwWdG" 305 | } 306 | }, 307 | { 308 | "cell_type": "code", 309 | "source": [ 310 | "# Define the query.\n", 311 | "query = \"Who were the siblings of Leonhard Euler?\"\n", 312 | "\n", 313 | "# Execute the query, get top 2 results.\n", 314 | "vector_results = neo4j_vector.similarity_search(query, k=2)\n", 315 | "\n", 316 | "# Print search results with separation.\n", 317 | "for i, res in enumerate(vector_results):\n", 318 | " print(res.page_content)\n", 319 | " if i != len(vector_results) - 1:\n", 320 | " print()\n", 321 | "\n", 322 | "# Store the content of the most similar result.\n", 323 | "vector_result = vector_results[0].page_content" 324 | ], 325 | "metadata": { 326 | "colab": { 327 | "base_uri": "https://localhost:8080/" 328 | }, 329 | "id": "qp8bl2hjruzg", 330 | "outputId": "b5922d9e-7f16-4250-f917-9af03d361fa4" 331 | }, 332 | "execution_count": 33, 333 | "outputs": [ 334 | { 335 | "output_type": "stream", 336 | "name": "stdout", 337 | "text": [ 338 | "== Early life ==\n", 339 | "Leonhard Euler was born on 15 April 1707, in Basel to Paul III Euler, a pastor of the Reformed Church, and Marguerite (née Brucker), whose ancestors include a number of well-known scholars in the classics. He was the oldest of four children, having two younger sisters, An\n", 340 | "\n", 341 | "== Early life ==\n", 342 | "Leonhard Euler was born on 15 April 1707, in Basel to Paul III Euler, a pastor of the Reformed Church, and Marguerite (née Brucker), whose ancestors include a number of well-known scholars in the classics. He was the oldest of four children, having two younger sisters, An\n" 343 | ] 344 | } 345 | ] 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "source": [ 350 | "#### Building Knowledge Graph" 351 | ], 352 | "metadata": { 353 | "id": "yLCySJqcxV3W" 354 | } 355 | }, 356 | { 357 | "cell_type": "code", 358 | "source": [ 359 | "# Necessary Libraries to setup the Neo4j DB QuestionAnswering Chain\n", 360 | "from langchain.chat_models import ChatOpenAI\n", 361 | "from langchain.chains import GraphCypherQAChain\n", 362 | "from langchain.graphs import Neo4jGraph" 363 | ], 364 | "metadata": { 365 | "id": "EOHIAZrLxh8N" 366 | }, 367 | "execution_count": 34, 368 | "outputs": [] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "source": [ 373 | "# Create a Neo4jGraph object by connecting to a Neo4j database.\n", 374 | "graph = Neo4jGraph(\n", 375 | " url=os.environ[\"NEO4J_URI\"],\n", 376 | " username=os.environ[\"NEO4J_USERNAME\"],\n", 377 | " password=os.environ[\"NEO4J_PASSWORD\"]\n", 378 | ")" 379 | ], 380 | "metadata": { 381 | "id": "N_sqke-SzfEQ" 382 | }, 383 | "execution_count": 35, 384 | "outputs": [] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "source": [ 389 | "# Print the schema of the Neo4j graph.\n", 390 | "print(graph.schema)" 391 | ], 392 | "metadata": { 393 | "colab": { 394 | "base_uri": "https://localhost:8080/" 395 | }, 396 | "id": "oHFGTHCEz_UD", 397 | "outputId": "eaf7ba8f-7188-4b5a-f4b3-f786f4a457ae" 398 | }, 399 | "execution_count": 36, 400 | "outputs": [ 401 | { 402 | "output_type": "stream", 403 | "name": "stdout", 404 | "text": [ 405 | "\n", 406 | " Node properties are the following:\n", 407 | " [{'labels': 'Chunk', 'properties': [{'property': 'id', 'type': 'STRING'}, {'property': 'embedding', 'type': 'LIST'}, {'property': 'text', 'type': 'STRING'}]}]\n", 408 | " Relationship properties are the following:\n", 409 | " []\n", 410 | " The relationships are the following:\n", 411 | " []\n", 412 | " \n" 413 | ] 414 | } 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "source": [ 420 | "# Create a question-answering chain using GPT-3 and a Neo4j graph, with verbose mode enabled.\n", 421 | "chain = GraphCypherQAChain.from_llm(\n", 422 | " ChatOpenAI(temperature=0), graph=graph, verbose=True\n", 423 | ")" 424 | ], 425 | "metadata": { 426 | "id": "wN_9M9fi0OV8" 427 | }, 428 | "execution_count": 37, 429 | "outputs": [] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "source": [ 434 | "# Use the question-answering chain to query the Neo4j graph.\n", 435 | "graph_result = chain.run(\"Who were the siblings of Leonhard Euler?\")" 436 | ], 437 | "metadata": { 438 | "colab": { 439 | "base_uri": "https://localhost:8080/" 440 | }, 441 | "id": "Iw0foDNr0c-v", 442 | "outputId": "8a776932-7768-4191-d2a4-37c2163b9d2e" 443 | }, 444 | "execution_count": 38, 445 | "outputs": [ 446 | { 447 | "output_type": "stream", 448 | "name": "stdout", 449 | "text": [ 450 | "\n", 451 | "\n", 452 | "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n", 453 | "Generated Cypher:\n", 454 | "\u001b[32;1m\u001b[1;3mMATCH (euler:Chunk {text: 'Leonhard Euler'})-[:SIBLING]->(sibling:Chunk)\n", 455 | "RETURN sibling.text\u001b[0m\n", 456 | "Full Context:\n", 457 | "\u001b[32;1m\u001b[1;3m[]\u001b[0m\n", 458 | "\n", 459 | "\u001b[1m> Finished chain.\u001b[0m\n" 460 | ] 461 | } 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "source": [ 467 | "graph_result" 468 | ], 469 | "metadata": { 470 | "colab": { 471 | "base_uri": "https://localhost:8080/", 472 | "height": 37 473 | }, 474 | "id": "ljHsyK3z0pAf", 475 | "outputId": "7d95b141-ce27-4404-9e09-1f12c07f3ab8" 476 | }, 477 | "execution_count": 39, 478 | "outputs": [ 479 | { 480 | "output_type": "execute_result", 481 | "data": { 482 | "text/plain": [ 483 | "\"I'm sorry, but I don't have any information about the siblings of Leonhard Euler.\"" 484 | ], 485 | "application/vnd.google.colaboratory.intrinsic+json": { 486 | "type": "string" 487 | } 488 | }, 489 | "metadata": {}, 490 | "execution_count": 39 491 | } 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "source": [], 497 | "metadata": { 498 | "id": "Ii1H4spv00ba" 499 | }, 500 | "execution_count": null, 501 | "outputs": [] 502 | } 503 | ] 504 | } 505 | -------------------------------------------------------------------------------- /RAG_with_Graph_Database.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "gpuType": "T4", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | }, 17 | "widgets": { 18 | "application/vnd.jupyter.widget-state+json": { 19 | "a5f9427673584f24b42922ebcf8714f4": { 20 | "model_module": "@jupyter-widgets/controls", 21 | "model_name": "VBoxModel", 22 | "model_module_version": "1.5.0", 23 | "state": { 24 | "_dom_classes": [], 25 | "_model_module": "@jupyter-widgets/controls", 26 | "_model_module_version": "1.5.0", 27 | "_model_name": "VBoxModel", 28 | "_view_count": null, 29 | "_view_module": "@jupyter-widgets/controls", 30 | "_view_module_version": "1.5.0", 31 | "_view_name": "VBoxView", 32 | "box_style": "", 33 | "children": [ 34 | "IPY_MODEL_78051657194346bc99fb58409ed67870", 35 | "IPY_MODEL_f716d2a99aae4bf4b09349c5d7fc695a", 36 | "IPY_MODEL_2f2d4b6731c042cf8b1d64db40f6234a", 37 | "IPY_MODEL_1d686d4c60474afd907b722e20af1452", 38 | "IPY_MODEL_3ada03b7a08244a797a4178bbe935318" 39 | ], 40 | "layout": "IPY_MODEL_5b26c881dd42490eb144e255ae685fcd" 41 | } 42 | }, 43 | "78051657194346bc99fb58409ed67870": { 44 | "model_module": "@jupyter-widgets/controls", 45 | "model_name": "HTMLModel", 46 | "model_module_version": "1.5.0", 47 | "state": { 48 | "_dom_classes": [], 49 | "_model_module": "@jupyter-widgets/controls", 50 | "_model_module_version": "1.5.0", 51 | "_model_name": "HTMLModel", 52 | "_view_count": null, 53 | "_view_module": "@jupyter-widgets/controls", 54 | "_view_module_version": "1.5.0", 55 | "_view_name": "HTMLView", 56 | "description": "", 57 | "description_tooltip": null, 58 | "layout": "IPY_MODEL_8b78231bf8a94374b6cf468e9fa85929", 59 | "placeholder": "​", 60 | "style": "IPY_MODEL_aecb1c13c7b4493ba613d690bdd2707a", 61 | "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" 62 | } 63 | }, 64 | "f716d2a99aae4bf4b09349c5d7fc695a": { 65 | "model_module": "@jupyter-widgets/controls", 66 | "model_name": "PasswordModel", 67 | "model_module_version": "1.5.0", 68 | "state": { 69 | "_dom_classes": [], 70 | "_model_module": "@jupyter-widgets/controls", 71 | "_model_module_version": "1.5.0", 72 | "_model_name": "PasswordModel", 73 | "_view_count": null, 74 | "_view_module": "@jupyter-widgets/controls", 75 | "_view_module_version": "1.5.0", 76 | "_view_name": "PasswordView", 77 | "continuous_update": true, 78 | "description": "Token:", 79 | "description_tooltip": null, 80 | "disabled": false, 81 | "layout": "IPY_MODEL_50b3cb1532554487b840a0a8539be7dc", 82 | "placeholder": "​", 83 | "style": "IPY_MODEL_0516db4733d84ca1a04be0d90f3cbfa9", 84 | "value": "" 85 | } 86 | }, 87 | "2f2d4b6731c042cf8b1d64db40f6234a": { 88 | "model_module": "@jupyter-widgets/controls", 89 | "model_name": "CheckboxModel", 90 | "model_module_version": "1.5.0", 91 | "state": { 92 | "_dom_classes": [], 93 | "_model_module": "@jupyter-widgets/controls", 94 | "_model_module_version": "1.5.0", 95 | "_model_name": "CheckboxModel", 96 | "_view_count": null, 97 | "_view_module": "@jupyter-widgets/controls", 98 | "_view_module_version": "1.5.0", 99 | "_view_name": "CheckboxView", 100 | "description": "Add token as git credential?", 101 | "description_tooltip": null, 102 | "disabled": false, 103 | "indent": true, 104 | "layout": "IPY_MODEL_ef0ef461e11a4c9f9c7106709388c9ea", 105 | "style": "IPY_MODEL_bc0abd1fc56e4b418d551936ccbf724d", 106 | "value": true 107 | } 108 | }, 109 | "1d686d4c60474afd907b722e20af1452": { 110 | "model_module": "@jupyter-widgets/controls", 111 | "model_name": "ButtonModel", 112 | "model_module_version": "1.5.0", 113 | "state": { 114 | "_dom_classes": [], 115 | "_model_module": "@jupyter-widgets/controls", 116 | "_model_module_version": "1.5.0", 117 | "_model_name": "ButtonModel", 118 | "_view_count": null, 119 | "_view_module": "@jupyter-widgets/controls", 120 | "_view_module_version": "1.5.0", 121 | "_view_name": "ButtonView", 122 | "button_style": "", 123 | "description": "Login", 124 | "disabled": false, 125 | "icon": "", 126 | "layout": "IPY_MODEL_febb82295cce487abe735f9ecb72796e", 127 | "style": "IPY_MODEL_2dd16270e324409aa8a5e5f8d9464665", 128 | "tooltip": "" 129 | } 130 | }, 131 | "3ada03b7a08244a797a4178bbe935318": { 132 | "model_module": "@jupyter-widgets/controls", 133 | "model_name": "HTMLModel", 134 | "model_module_version": "1.5.0", 135 | "state": { 136 | "_dom_classes": [], 137 | "_model_module": "@jupyter-widgets/controls", 138 | "_model_module_version": "1.5.0", 139 | "_model_name": "HTMLModel", 140 | "_view_count": null, 141 | "_view_module": "@jupyter-widgets/controls", 142 | "_view_module_version": "1.5.0", 143 | "_view_name": "HTMLView", 144 | "description": "", 145 | "description_tooltip": null, 146 | "layout": "IPY_MODEL_202446995dd6467fab421b26fdd4189d", 147 | "placeholder": "​", 148 | "style": "IPY_MODEL_e771c4cb03be41b2bf7c0ee82e255d5a", 149 | "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " 150 | } 151 | }, 152 | "5b26c881dd42490eb144e255ae685fcd": { 153 | "model_module": "@jupyter-widgets/base", 154 | "model_name": "LayoutModel", 155 | "model_module_version": "1.2.0", 156 | "state": { 157 | "_model_module": "@jupyter-widgets/base", 158 | "_model_module_version": "1.2.0", 159 | "_model_name": "LayoutModel", 160 | "_view_count": null, 161 | "_view_module": "@jupyter-widgets/base", 162 | "_view_module_version": "1.2.0", 163 | "_view_name": "LayoutView", 164 | "align_content": null, 165 | "align_items": "center", 166 | "align_self": null, 167 | "border": null, 168 | "bottom": null, 169 | "display": "flex", 170 | "flex": null, 171 | "flex_flow": "column", 172 | "grid_area": null, 173 | "grid_auto_columns": null, 174 | "grid_auto_flow": null, 175 | "grid_auto_rows": null, 176 | "grid_column": null, 177 | "grid_gap": null, 178 | "grid_row": null, 179 | "grid_template_areas": null, 180 | "grid_template_columns": null, 181 | "grid_template_rows": null, 182 | "height": null, 183 | "justify_content": null, 184 | "justify_items": null, 185 | "left": null, 186 | "margin": null, 187 | "max_height": null, 188 | "max_width": null, 189 | "min_height": null, 190 | "min_width": null, 191 | "object_fit": null, 192 | "object_position": null, 193 | "order": null, 194 | "overflow": null, 195 | "overflow_x": null, 196 | "overflow_y": null, 197 | "padding": null, 198 | "right": null, 199 | "top": null, 200 | "visibility": null, 201 | "width": "50%" 202 | } 203 | }, 204 | "8b78231bf8a94374b6cf468e9fa85929": { 205 | "model_module": "@jupyter-widgets/base", 206 | "model_name": "LayoutModel", 207 | "model_module_version": "1.2.0", 208 | "state": { 209 | "_model_module": "@jupyter-widgets/base", 210 | "_model_module_version": "1.2.0", 211 | "_model_name": "LayoutModel", 212 | "_view_count": null, 213 | "_view_module": "@jupyter-widgets/base", 214 | "_view_module_version": "1.2.0", 215 | "_view_name": "LayoutView", 216 | "align_content": null, 217 | "align_items": null, 218 | "align_self": null, 219 | "border": null, 220 | "bottom": null, 221 | "display": null, 222 | "flex": null, 223 | "flex_flow": null, 224 | "grid_area": null, 225 | "grid_auto_columns": null, 226 | "grid_auto_flow": null, 227 | "grid_auto_rows": null, 228 | "grid_column": null, 229 | "grid_gap": null, 230 | "grid_row": null, 231 | "grid_template_areas": null, 232 | "grid_template_columns": null, 233 | "grid_template_rows": null, 234 | "height": null, 235 | "justify_content": null, 236 | "justify_items": null, 237 | "left": null, 238 | "margin": null, 239 | "max_height": null, 240 | "max_width": null, 241 | "min_height": null, 242 | "min_width": null, 243 | "object_fit": null, 244 | "object_position": null, 245 | "order": null, 246 | "overflow": null, 247 | "overflow_x": null, 248 | "overflow_y": null, 249 | "padding": null, 250 | "right": null, 251 | "top": null, 252 | "visibility": null, 253 | "width": null 254 | } 255 | }, 256 | "aecb1c13c7b4493ba613d690bdd2707a": { 257 | "model_module": "@jupyter-widgets/controls", 258 | "model_name": "DescriptionStyleModel", 259 | "model_module_version": "1.5.0", 260 | "state": { 261 | "_model_module": "@jupyter-widgets/controls", 262 | "_model_module_version": "1.5.0", 263 | "_model_name": "DescriptionStyleModel", 264 | "_view_count": null, 265 | "_view_module": "@jupyter-widgets/base", 266 | "_view_module_version": "1.2.0", 267 | "_view_name": "StyleView", 268 | "description_width": "" 269 | } 270 | }, 271 | "50b3cb1532554487b840a0a8539be7dc": { 272 | "model_module": "@jupyter-widgets/base", 273 | "model_name": "LayoutModel", 274 | "model_module_version": "1.2.0", 275 | "state": { 276 | "_model_module": "@jupyter-widgets/base", 277 | "_model_module_version": "1.2.0", 278 | "_model_name": "LayoutModel", 279 | "_view_count": null, 280 | "_view_module": "@jupyter-widgets/base", 281 | "_view_module_version": "1.2.0", 282 | "_view_name": "LayoutView", 283 | "align_content": null, 284 | "align_items": null, 285 | "align_self": null, 286 | "border": null, 287 | "bottom": null, 288 | "display": null, 289 | "flex": null, 290 | "flex_flow": null, 291 | "grid_area": null, 292 | "grid_auto_columns": null, 293 | "grid_auto_flow": null, 294 | "grid_auto_rows": null, 295 | "grid_column": null, 296 | "grid_gap": null, 297 | "grid_row": null, 298 | "grid_template_areas": null, 299 | "grid_template_columns": null, 300 | "grid_template_rows": null, 301 | "height": null, 302 | "justify_content": null, 303 | "justify_items": null, 304 | "left": null, 305 | "margin": null, 306 | "max_height": null, 307 | "max_width": null, 308 | "min_height": null, 309 | "min_width": null, 310 | "object_fit": null, 311 | "object_position": null, 312 | "order": null, 313 | "overflow": null, 314 | "overflow_x": null, 315 | "overflow_y": null, 316 | "padding": null, 317 | "right": null, 318 | "top": null, 319 | "visibility": null, 320 | "width": null 321 | } 322 | }, 323 | "0516db4733d84ca1a04be0d90f3cbfa9": { 324 | "model_module": "@jupyter-widgets/controls", 325 | "model_name": "DescriptionStyleModel", 326 | "model_module_version": "1.5.0", 327 | "state": { 328 | "_model_module": "@jupyter-widgets/controls", 329 | "_model_module_version": "1.5.0", 330 | "_model_name": "DescriptionStyleModel", 331 | "_view_count": null, 332 | "_view_module": "@jupyter-widgets/base", 333 | "_view_module_version": "1.2.0", 334 | "_view_name": "StyleView", 335 | "description_width": "" 336 | } 337 | }, 338 | "ef0ef461e11a4c9f9c7106709388c9ea": { 339 | "model_module": "@jupyter-widgets/base", 340 | "model_name": "LayoutModel", 341 | "model_module_version": "1.2.0", 342 | "state": { 343 | "_model_module": "@jupyter-widgets/base", 344 | "_model_module_version": "1.2.0", 345 | "_model_name": "LayoutModel", 346 | "_view_count": null, 347 | "_view_module": "@jupyter-widgets/base", 348 | "_view_module_version": "1.2.0", 349 | "_view_name": "LayoutView", 350 | "align_content": null, 351 | "align_items": null, 352 | "align_self": null, 353 | "border": null, 354 | "bottom": null, 355 | "display": null, 356 | "flex": null, 357 | "flex_flow": null, 358 | "grid_area": null, 359 | "grid_auto_columns": null, 360 | "grid_auto_flow": null, 361 | "grid_auto_rows": null, 362 | "grid_column": null, 363 | "grid_gap": null, 364 | "grid_row": null, 365 | "grid_template_areas": null, 366 | "grid_template_columns": null, 367 | "grid_template_rows": null, 368 | "height": null, 369 | "justify_content": null, 370 | "justify_items": null, 371 | "left": null, 372 | "margin": null, 373 | "max_height": null, 374 | "max_width": null, 375 | "min_height": null, 376 | "min_width": null, 377 | "object_fit": null, 378 | "object_position": null, 379 | "order": null, 380 | "overflow": null, 381 | "overflow_x": null, 382 | "overflow_y": null, 383 | "padding": null, 384 | "right": null, 385 | "top": null, 386 | "visibility": null, 387 | "width": null 388 | } 389 | }, 390 | "bc0abd1fc56e4b418d551936ccbf724d": { 391 | "model_module": "@jupyter-widgets/controls", 392 | "model_name": "DescriptionStyleModel", 393 | "model_module_version": "1.5.0", 394 | "state": { 395 | "_model_module": "@jupyter-widgets/controls", 396 | "_model_module_version": "1.5.0", 397 | "_model_name": "DescriptionStyleModel", 398 | "_view_count": null, 399 | "_view_module": "@jupyter-widgets/base", 400 | "_view_module_version": "1.2.0", 401 | "_view_name": "StyleView", 402 | "description_width": "" 403 | } 404 | }, 405 | "febb82295cce487abe735f9ecb72796e": { 406 | "model_module": "@jupyter-widgets/base", 407 | "model_name": "LayoutModel", 408 | "model_module_version": "1.2.0", 409 | "state": { 410 | "_model_module": "@jupyter-widgets/base", 411 | "_model_module_version": "1.2.0", 412 | "_model_name": "LayoutModel", 413 | "_view_count": null, 414 | "_view_module": "@jupyter-widgets/base", 415 | "_view_module_version": "1.2.0", 416 | "_view_name": "LayoutView", 417 | "align_content": null, 418 | "align_items": null, 419 | "align_self": null, 420 | "border": null, 421 | "bottom": null, 422 | "display": null, 423 | "flex": null, 424 | "flex_flow": null, 425 | "grid_area": null, 426 | "grid_auto_columns": null, 427 | "grid_auto_flow": null, 428 | "grid_auto_rows": null, 429 | "grid_column": null, 430 | "grid_gap": null, 431 | "grid_row": null, 432 | "grid_template_areas": null, 433 | "grid_template_columns": null, 434 | "grid_template_rows": null, 435 | "height": null, 436 | "justify_content": null, 437 | "justify_items": null, 438 | "left": null, 439 | "margin": null, 440 | "max_height": null, 441 | "max_width": null, 442 | "min_height": null, 443 | "min_width": null, 444 | "object_fit": null, 445 | "object_position": null, 446 | "order": null, 447 | "overflow": null, 448 | "overflow_x": null, 449 | "overflow_y": null, 450 | "padding": null, 451 | "right": null, 452 | "top": null, 453 | "visibility": null, 454 | "width": null 455 | } 456 | }, 457 | "2dd16270e324409aa8a5e5f8d9464665": { 458 | "model_module": "@jupyter-widgets/controls", 459 | "model_name": "ButtonStyleModel", 460 | "model_module_version": "1.5.0", 461 | "state": { 462 | "_model_module": "@jupyter-widgets/controls", 463 | "_model_module_version": "1.5.0", 464 | "_model_name": "ButtonStyleModel", 465 | "_view_count": null, 466 | "_view_module": "@jupyter-widgets/base", 467 | "_view_module_version": "1.2.0", 468 | "_view_name": "StyleView", 469 | "button_color": null, 470 | "font_weight": "" 471 | } 472 | }, 473 | "202446995dd6467fab421b26fdd4189d": { 474 | "model_module": "@jupyter-widgets/base", 475 | "model_name": "LayoutModel", 476 | "model_module_version": "1.2.0", 477 | "state": { 478 | "_model_module": "@jupyter-widgets/base", 479 | "_model_module_version": "1.2.0", 480 | "_model_name": "LayoutModel", 481 | "_view_count": null, 482 | "_view_module": "@jupyter-widgets/base", 483 | "_view_module_version": "1.2.0", 484 | "_view_name": "LayoutView", 485 | "align_content": null, 486 | "align_items": null, 487 | "align_self": null, 488 | "border": null, 489 | "bottom": null, 490 | "display": null, 491 | "flex": null, 492 | "flex_flow": null, 493 | "grid_area": null, 494 | "grid_auto_columns": null, 495 | "grid_auto_flow": null, 496 | "grid_auto_rows": null, 497 | "grid_column": null, 498 | "grid_gap": null, 499 | "grid_row": null, 500 | "grid_template_areas": null, 501 | "grid_template_columns": null, 502 | "grid_template_rows": null, 503 | "height": null, 504 | "justify_content": null, 505 | "justify_items": null, 506 | "left": null, 507 | "margin": null, 508 | "max_height": null, 509 | "max_width": null, 510 | "min_height": null, 511 | "min_width": null, 512 | "object_fit": null, 513 | "object_position": null, 514 | "order": null, 515 | "overflow": null, 516 | "overflow_x": null, 517 | "overflow_y": null, 518 | "padding": null, 519 | "right": null, 520 | "top": null, 521 | "visibility": null, 522 | "width": null 523 | } 524 | }, 525 | "e771c4cb03be41b2bf7c0ee82e255d5a": { 526 | "model_module": "@jupyter-widgets/controls", 527 | "model_name": "DescriptionStyleModel", 528 | "model_module_version": "1.5.0", 529 | "state": { 530 | "_model_module": "@jupyter-widgets/controls", 531 | "_model_module_version": "1.5.0", 532 | "_model_name": "DescriptionStyleModel", 533 | "_view_count": null, 534 | "_view_module": "@jupyter-widgets/base", 535 | "_view_module_version": "1.2.0", 536 | "_view_name": "StyleView", 537 | "description_width": "" 538 | } 539 | }, 540 | "f73c306cce8d4be1aea3ad912e9ef5dc": { 541 | "model_module": "@jupyter-widgets/controls", 542 | "model_name": "HBoxModel", 543 | "model_module_version": "1.5.0", 544 | "state": { 545 | "_dom_classes": [], 546 | "_model_module": "@jupyter-widgets/controls", 547 | "_model_module_version": "1.5.0", 548 | "_model_name": "HBoxModel", 549 | "_view_count": null, 550 | "_view_module": "@jupyter-widgets/controls", 551 | "_view_module_version": "1.5.0", 552 | "_view_name": "HBoxView", 553 | "box_style": "", 554 | "children": [ 555 | "IPY_MODEL_6f7ec6e81fbc4babab80f5b352caedc8", 556 | "IPY_MODEL_6d830be2c25a4e499ed8f57758638e23", 557 | "IPY_MODEL_50deede7561347e8af1e0b213d33c3de" 558 | ], 559 | "layout": "IPY_MODEL_55b60bfe36b14fc6837959b6b9f56fac" 560 | } 561 | }, 562 | "6f7ec6e81fbc4babab80f5b352caedc8": { 563 | "model_module": "@jupyter-widgets/controls", 564 | "model_name": "HTMLModel", 565 | "model_module_version": "1.5.0", 566 | "state": { 567 | "_dom_classes": [], 568 | "_model_module": "@jupyter-widgets/controls", 569 | "_model_module_version": "1.5.0", 570 | "_model_name": "HTMLModel", 571 | "_view_count": null, 572 | "_view_module": "@jupyter-widgets/controls", 573 | "_view_module_version": "1.5.0", 574 | "_view_name": "HTMLView", 575 | "description": "", 576 | "description_tooltip": null, 577 | "layout": "IPY_MODEL_24db6f406a024e6f8c78ef83752fe336", 578 | "placeholder": "​", 579 | "style": "IPY_MODEL_42e137910a8b45859ae3517d5bd0cb17", 580 | "value": "Loading checkpoint shards: 100%" 581 | } 582 | }, 583 | "6d830be2c25a4e499ed8f57758638e23": { 584 | "model_module": "@jupyter-widgets/controls", 585 | "model_name": "FloatProgressModel", 586 | "model_module_version": "1.5.0", 587 | "state": { 588 | "_dom_classes": [], 589 | "_model_module": "@jupyter-widgets/controls", 590 | "_model_module_version": "1.5.0", 591 | "_model_name": "FloatProgressModel", 592 | "_view_count": null, 593 | "_view_module": "@jupyter-widgets/controls", 594 | "_view_module_version": "1.5.0", 595 | "_view_name": "ProgressView", 596 | "bar_style": "success", 597 | "description": "", 598 | "description_tooltip": null, 599 | "layout": "IPY_MODEL_fc1fe6d6b77145828e33cc228c694082", 600 | "max": 4, 601 | "min": 0, 602 | "orientation": "horizontal", 603 | "style": "IPY_MODEL_93c80c4fcf01411e8f70d4c7daf23423", 604 | "value": 4 605 | } 606 | }, 607 | "50deede7561347e8af1e0b213d33c3de": { 608 | "model_module": "@jupyter-widgets/controls", 609 | "model_name": "HTMLModel", 610 | "model_module_version": "1.5.0", 611 | "state": { 612 | "_dom_classes": [], 613 | "_model_module": "@jupyter-widgets/controls", 614 | "_model_module_version": "1.5.0", 615 | "_model_name": "HTMLModel", 616 | "_view_count": null, 617 | "_view_module": "@jupyter-widgets/controls", 618 | "_view_module_version": "1.5.0", 619 | "_view_name": "HTMLView", 620 | "description": "", 621 | "description_tooltip": null, 622 | "layout": "IPY_MODEL_52d69bc57f3c4299b9aad7c536735f99", 623 | "placeholder": "​", 624 | "style": "IPY_MODEL_78de396d6b2d4ce384faf393b84b73a2", 625 | "value": " 4/4 [01:25<00:00, 18.80s/it]" 626 | } 627 | }, 628 | "55b60bfe36b14fc6837959b6b9f56fac": { 629 | "model_module": "@jupyter-widgets/base", 630 | "model_name": "LayoutModel", 631 | "model_module_version": "1.2.0", 632 | "state": { 633 | "_model_module": "@jupyter-widgets/base", 634 | "_model_module_version": "1.2.0", 635 | "_model_name": "LayoutModel", 636 | "_view_count": null, 637 | "_view_module": "@jupyter-widgets/base", 638 | "_view_module_version": "1.2.0", 639 | "_view_name": "LayoutView", 640 | "align_content": null, 641 | "align_items": null, 642 | "align_self": null, 643 | "border": null, 644 | "bottom": null, 645 | "display": null, 646 | "flex": null, 647 | "flex_flow": null, 648 | "grid_area": null, 649 | "grid_auto_columns": null, 650 | "grid_auto_flow": null, 651 | "grid_auto_rows": null, 652 | "grid_column": null, 653 | "grid_gap": null, 654 | "grid_row": null, 655 | "grid_template_areas": null, 656 | "grid_template_columns": null, 657 | "grid_template_rows": null, 658 | "height": null, 659 | "justify_content": null, 660 | "justify_items": null, 661 | "left": null, 662 | "margin": null, 663 | "max_height": null, 664 | "max_width": null, 665 | "min_height": null, 666 | "min_width": null, 667 | "object_fit": null, 668 | "object_position": null, 669 | "order": null, 670 | "overflow": null, 671 | "overflow_x": null, 672 | "overflow_y": null, 673 | "padding": null, 674 | "right": null, 675 | "top": null, 676 | "visibility": null, 677 | "width": null 678 | } 679 | }, 680 | "24db6f406a024e6f8c78ef83752fe336": { 681 | "model_module": "@jupyter-widgets/base", 682 | "model_name": "LayoutModel", 683 | "model_module_version": "1.2.0", 684 | "state": { 685 | "_model_module": "@jupyter-widgets/base", 686 | "_model_module_version": "1.2.0", 687 | "_model_name": "LayoutModel", 688 | "_view_count": null, 689 | "_view_module": "@jupyter-widgets/base", 690 | "_view_module_version": "1.2.0", 691 | "_view_name": "LayoutView", 692 | "align_content": null, 693 | "align_items": null, 694 | "align_self": null, 695 | "border": null, 696 | "bottom": null, 697 | "display": null, 698 | "flex": null, 699 | "flex_flow": null, 700 | "grid_area": null, 701 | "grid_auto_columns": null, 702 | "grid_auto_flow": null, 703 | "grid_auto_rows": null, 704 | "grid_column": null, 705 | "grid_gap": null, 706 | "grid_row": null, 707 | "grid_template_areas": null, 708 | "grid_template_columns": null, 709 | "grid_template_rows": null, 710 | "height": null, 711 | "justify_content": null, 712 | "justify_items": null, 713 | "left": null, 714 | "margin": null, 715 | "max_height": null, 716 | "max_width": null, 717 | "min_height": null, 718 | "min_width": null, 719 | "object_fit": null, 720 | "object_position": null, 721 | "order": null, 722 | "overflow": null, 723 | "overflow_x": null, 724 | "overflow_y": null, 725 | "padding": null, 726 | "right": null, 727 | "top": null, 728 | "visibility": null, 729 | "width": null 730 | } 731 | }, 732 | "42e137910a8b45859ae3517d5bd0cb17": { 733 | "model_module": "@jupyter-widgets/controls", 734 | "model_name": "DescriptionStyleModel", 735 | "model_module_version": "1.5.0", 736 | "state": { 737 | "_model_module": "@jupyter-widgets/controls", 738 | "_model_module_version": "1.5.0", 739 | "_model_name": "DescriptionStyleModel", 740 | "_view_count": null, 741 | "_view_module": "@jupyter-widgets/base", 742 | "_view_module_version": "1.2.0", 743 | "_view_name": "StyleView", 744 | "description_width": "" 745 | } 746 | }, 747 | "fc1fe6d6b77145828e33cc228c694082": { 748 | "model_module": "@jupyter-widgets/base", 749 | "model_name": "LayoutModel", 750 | "model_module_version": "1.2.0", 751 | "state": { 752 | "_model_module": "@jupyter-widgets/base", 753 | "_model_module_version": "1.2.0", 754 | "_model_name": "LayoutModel", 755 | "_view_count": null, 756 | "_view_module": "@jupyter-widgets/base", 757 | "_view_module_version": "1.2.0", 758 | "_view_name": "LayoutView", 759 | "align_content": null, 760 | "align_items": null, 761 | "align_self": null, 762 | "border": null, 763 | "bottom": null, 764 | "display": null, 765 | "flex": null, 766 | "flex_flow": null, 767 | "grid_area": null, 768 | "grid_auto_columns": null, 769 | "grid_auto_flow": null, 770 | "grid_auto_rows": null, 771 | "grid_column": null, 772 | "grid_gap": null, 773 | "grid_row": null, 774 | "grid_template_areas": null, 775 | "grid_template_columns": null, 776 | "grid_template_rows": null, 777 | "height": null, 778 | "justify_content": null, 779 | "justify_items": null, 780 | "left": null, 781 | "margin": null, 782 | "max_height": null, 783 | "max_width": null, 784 | "min_height": null, 785 | "min_width": null, 786 | "object_fit": null, 787 | "object_position": null, 788 | "order": null, 789 | "overflow": null, 790 | "overflow_x": null, 791 | "overflow_y": null, 792 | "padding": null, 793 | "right": null, 794 | "top": null, 795 | "visibility": null, 796 | "width": null 797 | } 798 | }, 799 | "93c80c4fcf01411e8f70d4c7daf23423": { 800 | "model_module": "@jupyter-widgets/controls", 801 | "model_name": "ProgressStyleModel", 802 | "model_module_version": "1.5.0", 803 | "state": { 804 | "_model_module": "@jupyter-widgets/controls", 805 | "_model_module_version": "1.5.0", 806 | "_model_name": "ProgressStyleModel", 807 | "_view_count": null, 808 | "_view_module": "@jupyter-widgets/base", 809 | "_view_module_version": "1.2.0", 810 | "_view_name": "StyleView", 811 | "bar_color": null, 812 | "description_width": "" 813 | } 814 | }, 815 | "52d69bc57f3c4299b9aad7c536735f99": { 816 | "model_module": "@jupyter-widgets/base", 817 | "model_name": "LayoutModel", 818 | "model_module_version": "1.2.0", 819 | "state": { 820 | "_model_module": "@jupyter-widgets/base", 821 | "_model_module_version": "1.2.0", 822 | "_model_name": "LayoutModel", 823 | "_view_count": null, 824 | "_view_module": "@jupyter-widgets/base", 825 | "_view_module_version": "1.2.0", 826 | "_view_name": "LayoutView", 827 | "align_content": null, 828 | "align_items": null, 829 | "align_self": null, 830 | "border": null, 831 | "bottom": null, 832 | "display": null, 833 | "flex": null, 834 | "flex_flow": null, 835 | "grid_area": null, 836 | "grid_auto_columns": null, 837 | "grid_auto_flow": null, 838 | "grid_auto_rows": null, 839 | "grid_column": null, 840 | "grid_gap": null, 841 | "grid_row": null, 842 | "grid_template_areas": null, 843 | "grid_template_columns": null, 844 | "grid_template_rows": null, 845 | "height": null, 846 | "justify_content": null, 847 | "justify_items": null, 848 | "left": null, 849 | "margin": null, 850 | "max_height": null, 851 | "max_width": null, 852 | "min_height": null, 853 | "min_width": null, 854 | "object_fit": null, 855 | "object_position": null, 856 | "order": null, 857 | "overflow": null, 858 | "overflow_x": null, 859 | "overflow_y": null, 860 | "padding": null, 861 | "right": null, 862 | "top": null, 863 | "visibility": null, 864 | "width": null 865 | } 866 | }, 867 | "78de396d6b2d4ce384faf393b84b73a2": { 868 | "model_module": "@jupyter-widgets/controls", 869 | "model_name": "DescriptionStyleModel", 870 | "model_module_version": "1.5.0", 871 | "state": { 872 | "_model_module": "@jupyter-widgets/controls", 873 | "_model_module_version": "1.5.0", 874 | "_model_name": "DescriptionStyleModel", 875 | "_view_count": null, 876 | "_view_module": "@jupyter-widgets/base", 877 | "_view_module_version": "1.2.0", 878 | "_view_name": "StyleView", 879 | "description_width": "" 880 | } 881 | }, 882 | "88f08961299f438ea00dd88d61ec99d2": { 883 | "model_module": "@jupyter-widgets/controls", 884 | "model_name": "HBoxModel", 885 | "model_module_version": "1.5.0", 886 | "state": { 887 | "_dom_classes": [], 888 | "_model_module": "@jupyter-widgets/controls", 889 | "_model_module_version": "1.5.0", 890 | "_model_name": "HBoxModel", 891 | "_view_count": null, 892 | "_view_module": "@jupyter-widgets/controls", 893 | "_view_module_version": "1.5.0", 894 | "_view_name": "HBoxView", 895 | "box_style": "", 896 | "children": [ 897 | "IPY_MODEL_4ab68fde0d5845e498b23536ee61e828", 898 | "IPY_MODEL_476045c378f942a6b3066e6ffd8e93b6", 899 | "IPY_MODEL_31faf6bbc65340f6ae0bae2b20f825f0" 900 | ], 901 | "layout": "IPY_MODEL_d7a88f0f41e64fbb9eb04c208276d5e9" 902 | } 903 | }, 904 | "4ab68fde0d5845e498b23536ee61e828": { 905 | "model_module": "@jupyter-widgets/controls", 906 | "model_name": "HTMLModel", 907 | "model_module_version": "1.5.0", 908 | "state": { 909 | "_dom_classes": [], 910 | "_model_module": "@jupyter-widgets/controls", 911 | "_model_module_version": "1.5.0", 912 | "_model_name": "HTMLModel", 913 | "_view_count": null, 914 | "_view_module": "@jupyter-widgets/controls", 915 | "_view_module_version": "1.5.0", 916 | "_view_name": "HTMLView", 917 | "description": "", 918 | "description_tooltip": null, 919 | "layout": "IPY_MODEL_26d4da72a16b4381aef9244f59c536b7", 920 | "placeholder": "​", 921 | "style": "IPY_MODEL_d97e2daa67f8430fbe62fd64cf753f73", 922 | "value": "generation_config.json: 100%" 923 | } 924 | }, 925 | "476045c378f942a6b3066e6ffd8e93b6": { 926 | "model_module": "@jupyter-widgets/controls", 927 | "model_name": "FloatProgressModel", 928 | "model_module_version": "1.5.0", 929 | "state": { 930 | "_dom_classes": [], 931 | "_model_module": "@jupyter-widgets/controls", 932 | "_model_module_version": "1.5.0", 933 | "_model_name": "FloatProgressModel", 934 | "_view_count": null, 935 | "_view_module": "@jupyter-widgets/controls", 936 | "_view_module_version": "1.5.0", 937 | "_view_name": "ProgressView", 938 | "bar_style": "success", 939 | "description": "", 940 | "description_tooltip": null, 941 | "layout": "IPY_MODEL_9ba5c9837ffa48a1a98375cf8f7ea58c", 942 | "max": 137, 943 | "min": 0, 944 | "orientation": "horizontal", 945 | "style": "IPY_MODEL_7b0b028a1df640d49ec7b6a3ea8ed5cd", 946 | "value": 137 947 | } 948 | }, 949 | "31faf6bbc65340f6ae0bae2b20f825f0": { 950 | "model_module": "@jupyter-widgets/controls", 951 | "model_name": "HTMLModel", 952 | "model_module_version": "1.5.0", 953 | "state": { 954 | "_dom_classes": [], 955 | "_model_module": "@jupyter-widgets/controls", 956 | "_model_module_version": "1.5.0", 957 | "_model_name": "HTMLModel", 958 | "_view_count": null, 959 | "_view_module": "@jupyter-widgets/controls", 960 | "_view_module_version": "1.5.0", 961 | "_view_name": "HTMLView", 962 | "description": "", 963 | "description_tooltip": null, 964 | "layout": "IPY_MODEL_694328c260294586bf00c5f8904b26a2", 965 | "placeholder": "​", 966 | "style": "IPY_MODEL_e444efde30f146efba56fa1d0bcee1fd", 967 | "value": " 137/137 [00:00<00:00, 9.27kB/s]" 968 | } 969 | }, 970 | "d7a88f0f41e64fbb9eb04c208276d5e9": { 971 | "model_module": "@jupyter-widgets/base", 972 | "model_name": "LayoutModel", 973 | "model_module_version": "1.2.0", 974 | "state": { 975 | "_model_module": "@jupyter-widgets/base", 976 | "_model_module_version": "1.2.0", 977 | "_model_name": "LayoutModel", 978 | "_view_count": null, 979 | "_view_module": "@jupyter-widgets/base", 980 | "_view_module_version": "1.2.0", 981 | "_view_name": "LayoutView", 982 | "align_content": null, 983 | "align_items": null, 984 | "align_self": null, 985 | "border": null, 986 | "bottom": null, 987 | "display": null, 988 | "flex": null, 989 | "flex_flow": null, 990 | "grid_area": null, 991 | "grid_auto_columns": null, 992 | "grid_auto_flow": null, 993 | "grid_auto_rows": null, 994 | "grid_column": null, 995 | "grid_gap": null, 996 | "grid_row": null, 997 | "grid_template_areas": null, 998 | "grid_template_columns": null, 999 | "grid_template_rows": null, 1000 | "height": null, 1001 | "justify_content": null, 1002 | "justify_items": null, 1003 | "left": null, 1004 | "margin": null, 1005 | "max_height": null, 1006 | "max_width": null, 1007 | "min_height": null, 1008 | "min_width": null, 1009 | "object_fit": null, 1010 | "object_position": null, 1011 | "order": null, 1012 | "overflow": null, 1013 | "overflow_x": null, 1014 | "overflow_y": null, 1015 | "padding": null, 1016 | "right": null, 1017 | "top": null, 1018 | "visibility": null, 1019 | "width": null 1020 | } 1021 | }, 1022 | "26d4da72a16b4381aef9244f59c536b7": { 1023 | "model_module": "@jupyter-widgets/base", 1024 | "model_name": "LayoutModel", 1025 | "model_module_version": "1.2.0", 1026 | "state": { 1027 | "_model_module": "@jupyter-widgets/base", 1028 | "_model_module_version": "1.2.0", 1029 | "_model_name": "LayoutModel", 1030 | "_view_count": null, 1031 | "_view_module": "@jupyter-widgets/base", 1032 | "_view_module_version": "1.2.0", 1033 | "_view_name": "LayoutView", 1034 | "align_content": null, 1035 | "align_items": null, 1036 | "align_self": null, 1037 | "border": null, 1038 | "bottom": null, 1039 | "display": null, 1040 | "flex": null, 1041 | "flex_flow": null, 1042 | "grid_area": null, 1043 | "grid_auto_columns": null, 1044 | "grid_auto_flow": null, 1045 | "grid_auto_rows": null, 1046 | "grid_column": null, 1047 | "grid_gap": null, 1048 | "grid_row": null, 1049 | "grid_template_areas": null, 1050 | "grid_template_columns": null, 1051 | "grid_template_rows": null, 1052 | "height": null, 1053 | "justify_content": null, 1054 | "justify_items": null, 1055 | "left": null, 1056 | "margin": null, 1057 | "max_height": null, 1058 | "max_width": null, 1059 | "min_height": null, 1060 | "min_width": null, 1061 | "object_fit": null, 1062 | "object_position": null, 1063 | "order": null, 1064 | "overflow": null, 1065 | "overflow_x": null, 1066 | "overflow_y": null, 1067 | "padding": null, 1068 | "right": null, 1069 | "top": null, 1070 | "visibility": null, 1071 | "width": null 1072 | } 1073 | }, 1074 | "d97e2daa67f8430fbe62fd64cf753f73": { 1075 | "model_module": "@jupyter-widgets/controls", 1076 | "model_name": "DescriptionStyleModel", 1077 | "model_module_version": "1.5.0", 1078 | "state": { 1079 | "_model_module": "@jupyter-widgets/controls", 1080 | "_model_module_version": "1.5.0", 1081 | "_model_name": "DescriptionStyleModel", 1082 | "_view_count": null, 1083 | "_view_module": "@jupyter-widgets/base", 1084 | "_view_module_version": "1.2.0", 1085 | "_view_name": "StyleView", 1086 | "description_width": "" 1087 | } 1088 | }, 1089 | "9ba5c9837ffa48a1a98375cf8f7ea58c": { 1090 | "model_module": "@jupyter-widgets/base", 1091 | "model_name": "LayoutModel", 1092 | "model_module_version": "1.2.0", 1093 | "state": { 1094 | "_model_module": "@jupyter-widgets/base", 1095 | "_model_module_version": "1.2.0", 1096 | "_model_name": "LayoutModel", 1097 | "_view_count": null, 1098 | "_view_module": "@jupyter-widgets/base", 1099 | "_view_module_version": "1.2.0", 1100 | "_view_name": "LayoutView", 1101 | "align_content": null, 1102 | "align_items": null, 1103 | "align_self": null, 1104 | "border": null, 1105 | "bottom": null, 1106 | "display": null, 1107 | "flex": null, 1108 | "flex_flow": null, 1109 | "grid_area": null, 1110 | "grid_auto_columns": null, 1111 | "grid_auto_flow": null, 1112 | "grid_auto_rows": null, 1113 | "grid_column": null, 1114 | "grid_gap": null, 1115 | "grid_row": null, 1116 | "grid_template_areas": null, 1117 | "grid_template_columns": null, 1118 | "grid_template_rows": null, 1119 | "height": null, 1120 | "justify_content": null, 1121 | "justify_items": null, 1122 | "left": null, 1123 | "margin": null, 1124 | "max_height": null, 1125 | "max_width": null, 1126 | "min_height": null, 1127 | "min_width": null, 1128 | "object_fit": null, 1129 | "object_position": null, 1130 | "order": null, 1131 | "overflow": null, 1132 | "overflow_x": null, 1133 | "overflow_y": null, 1134 | "padding": null, 1135 | "right": null, 1136 | "top": null, 1137 | "visibility": null, 1138 | "width": null 1139 | } 1140 | }, 1141 | "7b0b028a1df640d49ec7b6a3ea8ed5cd": { 1142 | "model_module": "@jupyter-widgets/controls", 1143 | "model_name": "ProgressStyleModel", 1144 | "model_module_version": "1.5.0", 1145 | "state": { 1146 | "_model_module": "@jupyter-widgets/controls", 1147 | "_model_module_version": "1.5.0", 1148 | "_model_name": "ProgressStyleModel", 1149 | "_view_count": null, 1150 | "_view_module": "@jupyter-widgets/base", 1151 | "_view_module_version": "1.2.0", 1152 | "_view_name": "StyleView", 1153 | "bar_color": null, 1154 | "description_width": "" 1155 | } 1156 | }, 1157 | "694328c260294586bf00c5f8904b26a2": { 1158 | "model_module": "@jupyter-widgets/base", 1159 | "model_name": "LayoutModel", 1160 | "model_module_version": "1.2.0", 1161 | "state": { 1162 | "_model_module": "@jupyter-widgets/base", 1163 | "_model_module_version": "1.2.0", 1164 | "_model_name": "LayoutModel", 1165 | "_view_count": null, 1166 | "_view_module": "@jupyter-widgets/base", 1167 | "_view_module_version": "1.2.0", 1168 | "_view_name": "LayoutView", 1169 | "align_content": null, 1170 | "align_items": null, 1171 | "align_self": null, 1172 | "border": null, 1173 | "bottom": null, 1174 | "display": null, 1175 | "flex": null, 1176 | "flex_flow": null, 1177 | "grid_area": null, 1178 | "grid_auto_columns": null, 1179 | "grid_auto_flow": null, 1180 | "grid_auto_rows": null, 1181 | "grid_column": null, 1182 | "grid_gap": null, 1183 | "grid_row": null, 1184 | "grid_template_areas": null, 1185 | "grid_template_columns": null, 1186 | "grid_template_rows": null, 1187 | "height": null, 1188 | "justify_content": null, 1189 | "justify_items": null, 1190 | "left": null, 1191 | "margin": null, 1192 | "max_height": null, 1193 | "max_width": null, 1194 | "min_height": null, 1195 | "min_width": null, 1196 | "object_fit": null, 1197 | "object_position": null, 1198 | "order": null, 1199 | "overflow": null, 1200 | "overflow_x": null, 1201 | "overflow_y": null, 1202 | "padding": null, 1203 | "right": null, 1204 | "top": null, 1205 | "visibility": null, 1206 | "width": null 1207 | } 1208 | }, 1209 | "e444efde30f146efba56fa1d0bcee1fd": { 1210 | "model_module": "@jupyter-widgets/controls", 1211 | "model_name": "DescriptionStyleModel", 1212 | "model_module_version": "1.5.0", 1213 | "state": { 1214 | "_model_module": "@jupyter-widgets/controls", 1215 | "_model_module_version": "1.5.0", 1216 | "_model_name": "DescriptionStyleModel", 1217 | "_view_count": null, 1218 | "_view_module": "@jupyter-widgets/base", 1219 | "_view_module_version": "1.2.0", 1220 | "_view_name": "StyleView", 1221 | "description_width": "" 1222 | } 1223 | } 1224 | } 1225 | }, 1226 | "accelerator": "GPU" 1227 | }, 1228 | "cells": [ 1229 | { 1230 | "cell_type": "markdown", 1231 | "metadata": { 1232 | "id": "view-in-github", 1233 | "colab_type": "text" 1234 | }, 1235 | "source": [ 1236 | "\"Open" 1237 | ] 1238 | }, 1239 | { 1240 | "cell_type": "markdown", 1241 | "source": [ 1242 | "**Installing Dependencies:**" 1243 | ], 1244 | "metadata": { 1245 | "id": "98YkxTMk8MkF" 1246 | } 1247 | }, 1248 | { 1249 | "cell_type": "code", 1250 | "execution_count": 13, 1251 | "metadata": { 1252 | "id": "Pw24LkQ27bML", 1253 | "outputId": "a1444f5f-0dd0-4783-b8a0-f6b96d8135d1", 1254 | "colab": { 1255 | "base_uri": "https://localhost:8080/" 1256 | } 1257 | }, 1258 | "outputs": [ 1259 | { 1260 | "output_type": "stream", 1261 | "name": "stdout", 1262 | "text": [ 1263 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 1264 | "\u001b[?25hRequirement already satisfied: flash-attn in /usr/local/lib/python3.10/dist-packages (2.5.5)\n", 1265 | "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from flash-attn) (2.1.0)\n", 1266 | "Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from flash-attn) (0.7.0)\n", 1267 | "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from flash-attn) (23.2)\n", 1268 | "Requirement already satisfied: ninja in /usr/local/lib/python3.10/dist-packages (from flash-attn) (1.11.1.1)\n", 1269 | "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.13.1)\n", 1270 | "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (4.9.0)\n", 1271 | "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (1.12)\n", 1272 | "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.2.1)\n", 1273 | "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.1.3)\n", 1274 | "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2023.6.0)\n", 1275 | "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n", 1276 | "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n", 1277 | "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n", 1278 | "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (8.9.2.26)\n", 1279 | "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.3.1)\n", 1280 | "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (11.0.2.54)\n", 1281 | "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (10.3.2.106)\n", 1282 | "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (11.4.5.107)\n", 1283 | "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.0.106)\n", 1284 | "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.18.1)\n", 1285 | "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n", 1286 | "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.1.0)\n", 1287 | "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->flash-attn) (12.3.101)\n", 1288 | "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->flash-attn) (2.1.5)\n", 1289 | "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->flash-attn) (1.3.0)\n" 1290 | ] 1291 | } 1292 | ], 1293 | "source": [ 1294 | "!pip install -qU transformers\n", 1295 | "!pip install -qU langchain\n", 1296 | "!pip install -qU huggingface_hub\n", 1297 | "!pip install -qU tiktoken\n", 1298 | "!pip install -qU neo4j\n", 1299 | "!pip install -qU python-dotenv\n", 1300 | "!pip install -qU accelerate\n", 1301 | "!pip install -qU sentence_transformers\n", 1302 | "!pip install -qU bitsandbytes\n", 1303 | "!pip install -qU optimum\n", 1304 | "!pip install -qU unstructured unstructured[pdf]\n", 1305 | "!pip install flash-attn --no-build-isolation" 1306 | ] 1307 | }, 1308 | { 1309 | "cell_type": "code", 1310 | "source": [ 1311 | "import os\n", 1312 | "import re\n", 1313 | "from langchain.vectorstores.neo4j_vector import Neo4jVector\n", 1314 | "from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter\n", 1315 | "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n", 1316 | "from dotenv import load_dotenv" 1317 | ], 1318 | "metadata": { 1319 | "id": "wu99tQMp8cYs" 1320 | }, 1321 | "execution_count": 14, 1322 | "outputs": [] 1323 | }, 1324 | { 1325 | "cell_type": "markdown", 1326 | "source": [ 1327 | "**Setting API in Environment Variable:**" 1328 | ], 1329 | "metadata": { 1330 | "id": "ec1_j3db9-On" 1331 | } 1332 | }, 1333 | { 1334 | "cell_type": "code", 1335 | "source": [ 1336 | "from google.colab import drive\n", 1337 | "from huggingface_hub import notebook_login\n", 1338 | "notebook_login()\n", 1339 | "load_dotenv()\n", 1340 | "os.environ[\"NEO4J_URI\"] = 'neo4j+s://d5dffe81.databases.neo4j.io'\n", 1341 | "os.environ[\"NEO4J_USERNAME\"] = 'neo4j'\n", 1342 | "os.environ[\"NEO4J_PASSWORD\"] = 'C8A_mt9s8yar3i44Xi1bVbkrFVK3aCXE1w5cQvHv6LM'\n", 1343 | "os.environ['NEO4J_URL'] = \"bolt://server_ip:7687\"\n", 1344 | "drive.mount('/content/drive')" 1345 | ], 1346 | "metadata": { 1347 | "id": "TV8qynpS8ndS", 1348 | "outputId": "ce43227b-a99f-4863-98a7-c4ac47624dd1", 1349 | "colab": { 1350 | "base_uri": "https://localhost:8080/", 1351 | "height": 348, 1352 | "referenced_widgets": [ 1353 | "a5f9427673584f24b42922ebcf8714f4", 1354 | "78051657194346bc99fb58409ed67870", 1355 | "f716d2a99aae4bf4b09349c5d7fc695a", 1356 | "2f2d4b6731c042cf8b1d64db40f6234a", 1357 | "1d686d4c60474afd907b722e20af1452", 1358 | "3ada03b7a08244a797a4178bbe935318", 1359 | "5b26c881dd42490eb144e255ae685fcd", 1360 | "8b78231bf8a94374b6cf468e9fa85929", 1361 | "aecb1c13c7b4493ba613d690bdd2707a", 1362 | "50b3cb1532554487b840a0a8539be7dc", 1363 | "0516db4733d84ca1a04be0d90f3cbfa9", 1364 | "ef0ef461e11a4c9f9c7106709388c9ea", 1365 | "bc0abd1fc56e4b418d551936ccbf724d", 1366 | "febb82295cce487abe735f9ecb72796e", 1367 | "2dd16270e324409aa8a5e5f8d9464665", 1368 | "202446995dd6467fab421b26fdd4189d", 1369 | "e771c4cb03be41b2bf7c0ee82e255d5a" 1370 | ] 1371 | } 1372 | }, 1373 | "execution_count": 15, 1374 | "outputs": [ 1375 | { 1376 | "output_type": "display_data", 1377 | "data": { 1378 | "text/plain": [ 1379 | "VBox(children=(HTML(value='
bool:\n", 1649 | " for stop_ids in stop_token_ids:\n", 1650 | " if torch.equal(input_ids[0][-len(stop_ids):], stop_ids):\n", 1651 | " return True\n", 1652 | " return False\n", 1653 | "\n", 1654 | "stopping_criteria = StoppingCriteriaList([StopOnTokens()])" 1655 | ], 1656 | "metadata": { 1657 | "id": "UeHmT-GzHerh" 1658 | }, 1659 | "execution_count": null, 1660 | "outputs": [] 1661 | }, 1662 | { 1663 | "cell_type": "markdown", 1664 | "source": [ 1665 | "**Testing Huggingface Pipeline:**" 1666 | ], 1667 | "metadata": { 1668 | "id": "4DRI1tSuOPpd" 1669 | } 1670 | }, 1671 | { 1672 | "cell_type": "code", 1673 | "source": [ 1674 | "# Set up text generation pipeline\n", 1675 | "generate_text = transformers.pipeline(\n", 1676 | " model=model,\n", 1677 | " tokenizer=tokenizer,\n", 1678 | " return_full_text=True,\n", 1679 | " task='text-generation',\n", 1680 | " stopping_criteria=stopping_criteria,\n", 1681 | " temperature=0.3,\n", 1682 | " max_new_tokens=512,\n", 1683 | " repetition_penalty=1.1\n", 1684 | ")" 1685 | ], 1686 | "metadata": { 1687 | "id": "g3A-YXDjHrHu" 1688 | }, 1689 | "execution_count": null, 1690 | "outputs": [] 1691 | }, 1692 | { 1693 | "cell_type": "code", 1694 | "source": [ 1695 | "result = generate_text(\"What are the primary mechanisms underlying antibiotic resistance, and how can we develop strategies to combat it?\")\n", 1696 | "print(result)" 1697 | ], 1698 | "metadata": { 1699 | "id": "uTRpa0_mInOA" 1700 | }, 1701 | "execution_count": null, 1702 | "outputs": [] 1703 | }, 1704 | { 1705 | "cell_type": "code", 1706 | "source": [ 1707 | "from langchain.llms import HuggingFacePipeline\n", 1708 | "\n", 1709 | "llm = HuggingFacePipeline(pipeline=generate_text)\n", 1710 | "llm(prompt=\"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\")" 1711 | ], 1712 | "metadata": { 1713 | "id": "Uh3unyirTtTa" 1714 | }, 1715 | "execution_count": null, 1716 | "outputs": [] 1717 | }, 1718 | { 1719 | "cell_type": "markdown", 1720 | "source": [ 1721 | "**Loading Document Data:**" 1722 | ], 1723 | "metadata": { 1724 | "id": "kBNYzCfyOJRz" 1725 | } 1726 | }, 1727 | { 1728 | "cell_type": "code", 1729 | "source": [ 1730 | "from langchain_community.document_loaders import DirectoryLoader\n", 1731 | "loader = DirectoryLoader('/content/drive/MyDrive/BioMedical-Dataset', glob=\"**/*.pdf\")\n", 1732 | "documents = loader.load()" 1733 | ], 1734 | "metadata": { 1735 | "id": "knhECj2bOUkh" 1736 | }, 1737 | "execution_count": null, 1738 | "outputs": [] 1739 | }, 1740 | { 1741 | "cell_type": "code", 1742 | "source": [ 1743 | "print(len(documents))" 1744 | ], 1745 | "metadata": { 1746 | "id": "T0Yp2a8Goeqh" 1747 | }, 1748 | "execution_count": null, 1749 | "outputs": [] 1750 | }, 1751 | { 1752 | "cell_type": "code", 1753 | "source": [ 1754 | "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", 1755 | "\n", 1756 | "MARKDOWN_SEPARATORS = [\n", 1757 | " \"\\n#{1,6} \",\n", 1758 | " \"```\\n\",\n", 1759 | " \"\\n\\\\*\\\\*\\\\*+\\n\",\n", 1760 | " \"\\n---+\\n\",\n", 1761 | " \"\\n___+\\n\",\n", 1762 | " \"\\n\\n\",\n", 1763 | " \"\\n\",\n", 1764 | " \" \",\n", 1765 | " \"\",\n", 1766 | "]\n", 1767 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,\n", 1768 | " chunk_overlap=30,\n", 1769 | " add_start_index=True,\n", 1770 | " separators=MARKDOWN_SEPARATORS)\n", 1771 | "\n", 1772 | "processed_text_splits = text_splitter.split_documents(documents)" 1773 | ], 1774 | "metadata": { 1775 | "id": "6Tznkr3PIawL" 1776 | }, 1777 | "execution_count": null, 1778 | "outputs": [] 1779 | }, 1780 | { 1781 | "cell_type": "code", 1782 | "source": [ 1783 | "processed_text_splits[120].page_content" 1784 | ], 1785 | "metadata": { 1786 | "id": "91wesl_N4yYm" 1787 | }, 1788 | "execution_count": null, 1789 | "outputs": [] 1790 | }, 1791 | { 1792 | "cell_type": "code", 1793 | "source": [ 1794 | "print(len(processed_text_splits))" 1795 | ], 1796 | "metadata": { 1797 | "id": "El9WlHtHCiTv" 1798 | }, 1799 | "execution_count": null, 1800 | "outputs": [] 1801 | }, 1802 | { 1803 | "cell_type": "code", 1804 | "source": [ 1805 | "# Creating Embdeddings of the sentences and storing it into Graph DB\n", 1806 | "from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n", 1807 | "\n", 1808 | "model_name = \"BAAI/bge-base-en-v1.5\"\n", 1809 | "model_kwargs = {\"device\": \"cuda\"}\n", 1810 | "encode_kwargs = {\"normalize_embeddings\": True}\n", 1811 | "embeddings = HuggingFaceBgeEmbeddings(\n", 1812 | " model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs\n", 1813 | ")" 1814 | ], 1815 | "metadata": { 1816 | "id": "KqOudIQtIayw" 1817 | }, 1818 | "execution_count": null, 1819 | "outputs": [] 1820 | }, 1821 | { 1822 | "cell_type": "markdown", 1823 | "source": [ 1824 | "**Load Neo4j Graph:**" 1825 | ], 1826 | "metadata": { 1827 | "id": "WaGxQILaCwQv" 1828 | } 1829 | }, 1830 | { 1831 | "cell_type": "code", 1832 | "source": [ 1833 | "from langchain.graphs import Neo4jGraph\n", 1834 | "\n", 1835 | "graph = Neo4jGraph(\n", 1836 | " url=os.environ[\"NEO4J_URI\"],\n", 1837 | " username=os.environ[\"NEO4J_USERNAME\"],\n", 1838 | " password=os.environ[\"NEO4J_PASSWORD\"]\n", 1839 | ")" 1840 | ], 1841 | "metadata": { 1842 | "id": "v__99eeVDVdB" 1843 | }, 1844 | "execution_count": null, 1845 | "outputs": [] 1846 | }, 1847 | { 1848 | "cell_type": "markdown", 1849 | "source": [ 1850 | "**Creating a new custom Index using Cypher:**" 1851 | ], 1852 | "metadata": { 1853 | "id": "KasRmCh0bWJP" 1854 | } 1855 | }, 1856 | { 1857 | "cell_type": "code", 1858 | "source": [ 1859 | "# Create New index with custom embedding model and dimensions\n", 1860 | "# I have already created\n", 1861 | "'''\n", 1862 | "graph.query(\"\"\"\n", 1863 | "CALL db.index.vector.createNodeIndex(\n", 1864 | " 'KG-Enhanced-QnA-Biomedical',\n", 1865 | " 'text_splits',\n", 1866 | " 'embeddings',\n", 1867 | " 768,\n", 1868 | " 'cosine'\n", 1869 | ")\n", 1870 | "\"\"\")\n", 1871 | "'''" 1872 | ], 1873 | "metadata": { 1874 | "id": "Lhzqh0uKbVJu" 1875 | }, 1876 | "execution_count": null, 1877 | "outputs": [] 1878 | }, 1879 | { 1880 | "cell_type": "markdown", 1881 | "source": [ 1882 | "**Show Created Vector Index:**" 1883 | ], 1884 | "metadata": { 1885 | "id": "z3ayEjLyHTAm" 1886 | } 1887 | }, 1888 | { 1889 | "cell_type": "code", 1890 | "source": [ 1891 | "from neo4j import GraphDatabase\n", 1892 | "uri = os.environ[\"NEO4J_URI\"]\n", 1893 | "username = os.environ[\"NEO4J_USERNAME\"]\n", 1894 | "password = os.environ[\"NEO4J_PASSWORD\"]\n", 1895 | "\n", 1896 | "driver = GraphDatabase.driver(uri, auth=(username, password))\n", 1897 | "session = driver.session()\n", 1898 | "\n", 1899 | "result = session.run(\"SHOW VECTOR INDEXES\")\n", 1900 | "\n", 1901 | "for record in result:\n", 1902 | " print(record)" 1903 | ], 1904 | "metadata": { 1905 | "id": "SFLExy9OxptZ" 1906 | }, 1907 | "execution_count": null, 1908 | "outputs": [] 1909 | }, 1910 | { 1911 | "cell_type": "code", 1912 | "source": [ 1913 | "''' chunks = [{'text': document.page_content, 'embedding': embeddings.embed_query(document.page_content)}\n", 1914 | " for document in documents if len(document.page_content) > 50] '''" 1915 | ], 1916 | "metadata": { 1917 | "id": "ii7wVWOmJYBb" 1918 | }, 1919 | "execution_count": null, 1920 | "outputs": [] 1921 | }, 1922 | { 1923 | "cell_type": "code", 1924 | "source": [ 1925 | "'''\n", 1926 | "graph.query(\"\"\"\n", 1927 | "UNWIND $data AS row\n", 1928 | "CREATE (c:Chunk {text: row.text})\n", 1929 | "WITH c, row\n", 1930 | "CALL db.create.setVectorProperty(c, 'embedding', row.embedding)\n", 1931 | "YIELD node\n", 1932 | "RETURN distinct 'done'\n", 1933 | "\"\"\", {'data': chunks})\n", 1934 | "'''" 1935 | ], 1936 | "metadata": { 1937 | "id": "B0KQnPT8Lxqi" 1938 | }, 1939 | "execution_count": null, 1940 | "outputs": [] 1941 | }, 1942 | { 1943 | "cell_type": "code", 1944 | "source": [ 1945 | "'''\n", 1946 | "vector_search = \"\"\"\n", 1947 | "WITH $embedding AS e\n", 1948 | "CALL db.index.vector.queryNodes('KG-Enhanced-QnA-Biomedical',$k, e) yield node, score\n", 1949 | "RETURN node.text AS result\n", 1950 | "ORDER BY score DESC\n", 1951 | "LIMIT 3\n", 1952 | "\"\"\"\n", 1953 | "'''" 1954 | ], 1955 | "metadata": { 1956 | "id": "0Tbwl91LjKd4" 1957 | }, 1958 | "execution_count": null, 1959 | "outputs": [] 1960 | }, 1961 | { 1962 | "cell_type": "code", 1963 | "source": [ 1964 | "# Instantiate Neo4j vector from documents\n", 1965 | "neo4j_vector = Neo4jVector.from_documents(\n", 1966 | " processed_text_splits,\n", 1967 | " embeddings,\n", 1968 | " index_name='KG-Enhanced-QnA-Biomedical',\n", 1969 | " url=os.environ[\"NEO4J_URI\"],\n", 1970 | " username=os.environ[\"NEO4J_USERNAME\"],\n", 1971 | " password=os.environ[\"NEO4J_PASSWORD\"]\n", 1972 | ")" 1973 | ], 1974 | "metadata": { 1975 | "id": "HHjtYv6JDVfg" 1976 | }, 1977 | "execution_count": null, 1978 | "outputs": [] 1979 | }, 1980 | { 1981 | "cell_type": "code", 1982 | "source": [ 1983 | "# Performing Similarity Search\n", 1984 | "query = \"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\"\n", 1985 | "vector_results = neo4j_vector.similarity_search(query, k=2)\n", 1986 | "\n", 1987 | "for i, res in enumerate(vector_results):\n", 1988 | " print(res.page_content)\n", 1989 | " if i != len(vector_results) - 1:\n", 1990 | " print()\n", 1991 | "vector_result = vector_results[0].page_content" 1992 | ], 1993 | "metadata": { 1994 | "id": "UAV-XTaHJIjT" 1995 | }, 1996 | "execution_count": null, 1997 | "outputs": [] 1998 | }, 1999 | { 2000 | "cell_type": "code", 2001 | "source": [ 2002 | "from langchain.chains import GraphCypherQAChain\n", 2003 | "from langchain.graphs import Neo4jGraph" 2004 | ], 2005 | "metadata": { 2006 | "id": "Az7n09kuW4k4" 2007 | }, 2008 | "execution_count": null, 2009 | "outputs": [] 2010 | }, 2011 | { 2012 | "cell_type": "code", 2013 | "source": [ 2014 | "from langchain.chains.base import Chain\n", 2015 | "from langchain.chains.llm import LLMChain\n", 2016 | "from langchain.chat_models import ChatOpenAI\n", 2017 | "from langchain.chains.question_answering.stuff_prompt import CHAT_PROMPT\n", 2018 | "from langchain.callbacks.manager import CallbackManagerForChainRun\n", 2019 | "from typing import Any, Dict, List\n", 2020 | "from pydantic import Field" 2021 | ], 2022 | "metadata": { 2023 | "id": "uE_4FkZtMyaL" 2024 | }, 2025 | "execution_count": null, 2026 | "outputs": [] 2027 | }, 2028 | { 2029 | "cell_type": "code", 2030 | "source": [ 2031 | "vector_search = \"\"\"\n", 2032 | "WITH $embedding AS e\n", 2033 | "CALL db.index.vector.queryNodes('KG-Enhanced-QnA-Biomedical',$k, e) yield node, score\n", 2034 | "RETURN node.text AS result\n", 2035 | "ORDER BY score DESC\n", 2036 | "LIMIT 3\n", 2037 | "\"\"\"" 2038 | ], 2039 | "metadata": { 2040 | "id": "jqnt60GeNB3O" 2041 | }, 2042 | "execution_count": null, 2043 | "outputs": [] 2044 | }, 2045 | { 2046 | "cell_type": "code", 2047 | "source": [ 2048 | "print(graph.schema)" 2049 | ], 2050 | "metadata": { 2051 | "id": "yf3jH9ZGXPUN" 2052 | }, 2053 | "execution_count": null, 2054 | "outputs": [] 2055 | }, 2056 | { 2057 | "cell_type": "code", 2058 | "source": [ 2059 | "class Neo4jVectorChain(Chain):\n", 2060 | " graph: Neo4jGraph = Field(exclude=True)\n", 2061 | " input_key: str = \"query\"\n", 2062 | " output_key: str = \"result\"\n", 2063 | " embeddings: HuggingFaceBgeEmbeddings = HuggingFaceBgeEmbeddings()\n", 2064 | " qa_chain: LLMChain = LLMChain(llm=llm, prompt=CHAT_PROMPT)\n", 2065 | "\n", 2066 | " @property\n", 2067 | " def input_keys(self) -> List[str]:\n", 2068 | " return [self.input_key]\n", 2069 | "\n", 2070 | " @property\n", 2071 | " def output_keys(self) -> List[str]:\n", 2072 | " _output_keys = [self.output_key]\n", 2073 | " return _output_keys\n", 2074 | "\n", 2075 | " def _call(self, inputs: Dict[str, str], run_manager, k=3) -> Dict[str, Any]:\n", 2076 | " question = inputs[self.input_key]\n", 2077 | " embedding = self.embeddings.embed_query(question)\n", 2078 | "\n", 2079 | " context = self.graph.query(vector_search, {'embedding': embedding, 'k': 3})\n", 2080 | " context = [el['result'] for el in context]\n", 2081 | "\n", 2082 | " result = self.qa_chain({\"question\": question, \"context\": context})\n", 2083 | " final_result = result[self.qa_chain.output_key]\n", 2084 | " return {self.output_key: final_result}" 2085 | ], 2086 | "metadata": { 2087 | "id": "wYZQ44hsNT4y" 2088 | }, 2089 | "execution_count": null, 2090 | "outputs": [] 2091 | }, 2092 | { 2093 | "cell_type": "code", 2094 | "source": [ 2095 | "chain = Neo4jVectorChain(graph=graph, embeddings=embeddings, verbose=True)" 2096 | ], 2097 | "metadata": { 2098 | "id": "tksLTpsqOGGq" 2099 | }, 2100 | "execution_count": null, 2101 | "outputs": [] 2102 | }, 2103 | { 2104 | "cell_type": "code", 2105 | "source": [ 2106 | "graph_result = chain.run(\"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\")" 2107 | ], 2108 | "metadata": { 2109 | "id": "TS4Nwf6pONk1" 2110 | }, 2111 | "execution_count": null, 2112 | "outputs": [] 2113 | }, 2114 | { 2115 | "cell_type": "code", 2116 | "source": [ 2117 | "chain = GraphCypherQAChain.from_llm(\n", 2118 | " cypher_llm=llm,\n", 2119 | " qa_llm=llm,\n", 2120 | " graph=graph,\n", 2121 | " verbose=True,\n", 2122 | " return_intermediate_steps=True,\n", 2123 | " validate_cypher=True\n", 2124 | ")" 2125 | ], 2126 | "metadata": { 2127 | "id": "2Q-g7wGhXR0V" 2128 | }, 2129 | "execution_count": null, 2130 | "outputs": [] 2131 | }, 2132 | { 2133 | "cell_type": "code", 2134 | "source": [ 2135 | "graph_result = chain.run(\"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\")" 2136 | ], 2137 | "metadata": { 2138 | "id": "JUxlLbCEXczj" 2139 | }, 2140 | "execution_count": null, 2141 | "outputs": [] 2142 | } 2143 | ] 2144 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Retrieval Augmented Generation with LangChain and Neo4J Graph DB 2 | --------------------------------------------------------------------------------