├── .gitignore
├── LICENSE
├── Neo4j_and_LangChain_for_Enhanced_Question_Answering.ipynb
├── RAG_with_Graph_Database.ipynb
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Mohd Kaif
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Neo4j_and_LangChain_for_Enhanced_Question_Answering.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": []
  7 |     },
  8 |     "kernelspec": {
  9 |       "name": "python3",
 10 |       "display_name": "Python 3"
 11 |     },
 12 |     "language_info": {
 13 |       "name": "python"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "markdown",
 19 |       "source": [
 20 |         "## Integrating Unstructured and Graph Knowledge with Neo4j and LangChain for Enhanced Question Answering"
 21 |       ],
 22 |       "metadata": {
 23 |         "id": "LXzvg_0MUzhA"
 24 |       }
 25 |     },
 26 |     {
 27 |       "cell_type": "markdown",
 28 |       "source": [
 29 |         "\n",
 30 |         "\n",
 31 |         "#### Installing Dependencies"
 32 |       ],
 33 |       "metadata": {
 34 |         "id": "b0AVqZ9XVC9Z"
 35 |       }
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "source": [
 40 |         "! pip install -qU \\\n",
 41 |         "       transformers \\\n",
 42 |         "       datasets \\\n",
 43 |         "       langchain \\\n",
 44 |         "       openai \\\n",
 45 |         "       wikipedia \\\n",
 46 |         "       tiktoken \\\n",
 47 |         "       neo4j \\\n",
 48 |         "       python-dotenv"
 49 |       ],
 50 |       "metadata": {
 51 |         "colab": {
 52 |           "base_uri": "https://localhost:8080/"
 53 |         },
 54 |         "id": "F52G_upjVIGt",
 55 |         "outputId": "9e26d2c5-6294-4c3f-de4f-5a51e8961003"
 56 |       },
 57 |       "execution_count": 1,
 58 |       "outputs": [
 59 |         {
 60 |           "output_type": "stream",
 61 |           "name": "stdout",
 62 |           "text": [
 63 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 64 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m493.7/493.7 kB\u001b[0m \u001b[31m27.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 65 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m86.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 66 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.0/77.0 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 67 |             "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
 68 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m90.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 69 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m192.4/192.4 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 70 |             "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
 71 |             "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
 72 |             "  Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
 73 |             "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
 74 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m29.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 75 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m96.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 76 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m77.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 77 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 78 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 79 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.3/43.3 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 80 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 81 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 82 |             "\u001b[?25h  Building wheel for wikipedia (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
 83 |             "  Building wheel for neo4j (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
 84 |             "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
 85 |             "llmx 0.0.15a0 requires cohere, which is not installed.\u001b[0m\u001b[31m\n",
 86 |             "\u001b[0m"
 87 |           ]
 88 |         }
 89 |       ]
 90 |     },
 91 |     {
 92 |       "cell_type": "markdown",
 93 |       "source": [
 94 |         "#### Importing Packanges"
 95 |       ],
 96 |       "metadata": {
 97 |         "id": "B_5pjB2WX_DZ"
 98 |       }
 99 |     },
100 |     {
101 |       "cell_type": "code",
102 |       "source": [
103 |         "import os\n",
104 |         "import re\n",
105 |         "from langchain.vectorstores.neo4j_vector import Neo4jVector\n",
106 |         "from langchain.document_loaders import WikipediaLoader\n",
107 |         "from langchain.embeddings.openai import OpenAIEmbeddings\n",
108 |         "from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter\n",
109 |         "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
110 |         "from dotenv import load_dotenv"
111 |       ],
112 |       "metadata": {
113 |         "id": "R-h-iIDmYFGh"
114 |       },
115 |       "execution_count": 26,
116 |       "outputs": []
117 |     },
118 |     {
119 |       "cell_type": "markdown",
120 |       "source": [
121 |         "#### Setting API's in Environment Variable[link text](https://)"
122 |       ],
123 |       "metadata": {
124 |         "id": "_JY_gy3BqptG"
125 |       }
126 |     },
127 |     {
128 |       "cell_type": "code",
129 |       "source": [
130 |         "load_dotenv()\n",
131 |         "os.environ[\"OPENAI_API_KEY\"] = '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'\n",
132 |         "os.environ[\"NEO4J_URI\"] = 'neo4j+s://817ac93a.databases.neo4j.io'\n",
133 |         "os.environ[\"NEO4J_USERNAME\"] = 'neo4j'\n",
134 |         "os.environ[\"NEO4J_PASSWORD\"] = 'CN1zhoj9bQwUc4JpfRk6hufP9Muojw_bTmaYMcxJXg4'"
135 |       ],
136 |       "metadata": {
137 |         "id": "BIHTHxNtYGRN"
138 |       },
139 |       "execution_count": 27,
140 |       "outputs": []
141 |     },
142 |     {
143 |       "cell_type": "code",
144 |       "source": [
145 |         "print(os.getenv('OPENAI_API_KEY'))\n",
146 |         "print(os.getenv(\"NEO4J_URI\"))\n",
147 |         "print(os.getenv(\"NEO4J_USERNAME\"))\n",
148 |         "print(os.getenv('NEO4J_PASSWORD'))"
149 |       ],
150 |       "metadata": {
151 |         "colab": {
152 |           "base_uri": "https://localhost:8080/"
153 |         },
154 |         "id": "lhhy1qO5orHa",
155 |         "outputId": "a99bdcfc-56e3-43c1-8926-562af2f19e44"
156 |       },
157 |       "execution_count": 28,
158 |       "outputs": [
159 |         {
160 |           "output_type": "stream",
161 |           "name": "stdout",
162 |           "text": [
163 |             "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\n",
164 |             "neo4j+s://817ac93a.databases.neo4j.io\n",
165 |             "neo4j\n",
166 |             "CN1zhoj9bQwUc4JpfRk6hufP9Muojw_bTmaYMcxJXg4\n"
167 |           ]
168 |         }
169 |       ]
170 |     },
171 |     {
172 |       "cell_type": "markdown",
173 |       "source": [
174 |         "#### Data Preprocessing"
175 |       ],
176 |       "metadata": {
177 |         "id": "QiTvOmA8rgeZ"
178 |       }
179 |     },
180 |     {
181 |       "cell_type": "code",
182 |       "source": [
183 |         "from transformers import AutoTokenizer\n",
184 |         "\n",
185 |         "# Define the tokenizer using \"bert-base-uncased\"\n",
186 |         "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n",
187 |         "\n",
188 |         "# Function to calculate the number of tokens in a text\n",
189 |         "def bert_len(text):\n",
190 |         "    tokens = tokenizer.encode(text)\n",
191 |         "    return len(tokens)\n",
192 |         "\n",
193 |         "# Example usage\n",
194 |         "input_text = \"This is a sample sentence for tokenization.\"\n",
195 |         "num_tokens = bert_len(input_text)\n",
196 |         "print(f\"Number of tokens: {num_tokens}\")"
197 |       ],
198 |       "metadata": {
199 |         "id": "lUF9fXmvYO1h",
200 |         "colab": {
201 |           "base_uri": "https://localhost:8080/"
202 |         },
203 |         "outputId": "a5b6c25c-5672-455d-e785-4c20148aa013"
204 |       },
205 |       "execution_count": 29,
206 |       "outputs": [
207 |         {
208 |           "output_type": "stream",
209 |           "name": "stdout",
210 |           "text": [
211 |             "Number of tokens: 11\n"
212 |           ]
213 |         }
214 |       ]
215 |     },
216 |     {
217 |       "cell_type": "code",
218 |       "source": [
219 |         "# Load Wikipedia articles related to \"Leonhard Euler\"\n",
220 |         "raw_documents = WikipediaLoader(query=\"Leonhard Euler\").load()\n",
221 |         "\n",
222 |         "# Define a text splitter with specific parameters\n",
223 |         "text_splitter = RecursiveCharacterTextSplitter(\n",
224 |         "    chunk_size=200, chunk_overlap=20, length_function=bert_len, separators=['\\n\\n', '\\n', ' ', '']\n",
225 |         ")\n",
226 |         "\n",
227 |         "# Split the content of the first Wikipedia article into smaller documents\n",
228 |         "documents = text_splitter.create_documents([raw_documents[0].page_content])"
229 |       ],
230 |       "metadata": {
231 |         "id": "YQqG0o7KbmqZ",
232 |         "colab": {
233 |           "base_uri": "https://localhost:8080/"
234 |         },
235 |         "outputId": "7b8e48d5-df80-4d1a-d192-11ce439fc62a"
236 |       },
237 |       "execution_count": 30,
238 |       "outputs": [
239 |         {
240 |           "output_type": "stream",
241 |           "name": "stderr",
242 |           "text": [
243 |             "Token indices sequence length is longer than the specified maximum sequence length for this model (736 > 512). Running this sequence through the model will result in indexing errors\n"
244 |           ]
245 |         }
246 |       ]
247 |     },
248 |     {
249 |       "cell_type": "code",
250 |       "source": [
251 |         "print(len(documents))"
252 |       ],
253 |       "metadata": {
254 |         "id": "c68Duv2Nbqqk",
255 |         "colab": {
256 |           "base_uri": "https://localhost:8080/"
257 |         },
258 |         "outputId": "4510be6f-af0c-4c08-e814-da06bf75f7f2"
259 |       },
260 |       "execution_count": 31,
261 |       "outputs": [
262 |         {
263 |           "output_type": "stream",
264 |           "name": "stdout",
265 |           "text": [
266 |             "18\n"
267 |           ]
268 |         }
269 |       ]
270 |     },
271 |     {
272 |       "cell_type": "markdown",
273 |       "source": [
274 |         "#### Initializing Graph Database Neo4j [link text](https://)"
275 |       ],
276 |       "metadata": {
277 |         "id": "X043ugczr0X5"
278 |       }
279 |     },
280 |     {
281 |       "cell_type": "code",
282 |       "source": [
283 |         "# Instantiate Neo4j vector from documents\n",
284 |         "neo4j_vector = Neo4jVector.from_documents(\n",
285 |         "    documents,\n",
286 |         "    OpenAIEmbeddings(),\n",
287 |         "    url=os.environ[\"NEO4J_URI\"],\n",
288 |         "    username=os.environ[\"NEO4J_USERNAME\"],\n",
289 |         "    password=os.environ[\"NEO4J_PASSWORD\"]\n",
290 |         ")"
291 |       ],
292 |       "metadata": {
293 |         "id": "RSHWwlbJcCi2"
294 |       },
295 |       "execution_count": 32,
296 |       "outputs": []
297 |     },
298 |     {
299 |       "cell_type": "markdown",
300 |       "source": [
301 |         "#### Peroforming Similarity Search on Ingested Documents"
302 |       ],
303 |       "metadata": {
304 |         "id": "34Fm9UgHwWdG"
305 |       }
306 |     },
307 |     {
308 |       "cell_type": "code",
309 |       "source": [
310 |         "# Define the query.\n",
311 |         "query = \"Who were the siblings of Leonhard Euler?\"\n",
312 |         "\n",
313 |         "# Execute the query, get top 2 results.\n",
314 |         "vector_results = neo4j_vector.similarity_search(query, k=2)\n",
315 |         "\n",
316 |         "# Print search results with separation.\n",
317 |         "for i, res in enumerate(vector_results):\n",
318 |         "    print(res.page_content)\n",
319 |         "    if i != len(vector_results) - 1:\n",
320 |         "        print()\n",
321 |         "\n",
322 |         "# Store the content of the most similar result.\n",
323 |         "vector_result = vector_results[0].page_content"
324 |       ],
325 |       "metadata": {
326 |         "colab": {
327 |           "base_uri": "https://localhost:8080/"
328 |         },
329 |         "id": "qp8bl2hjruzg",
330 |         "outputId": "b5922d9e-7f16-4250-f917-9af03d361fa4"
331 |       },
332 |       "execution_count": 33,
333 |       "outputs": [
334 |         {
335 |           "output_type": "stream",
336 |           "name": "stdout",
337 |           "text": [
338 |             "== Early life ==\n",
339 |             "Leonhard Euler was born on 15 April 1707, in Basel to Paul III Euler, a pastor of the Reformed Church, and Marguerite (née Brucker), whose ancestors include a number of well-known scholars in the classics. He was the oldest of four children, having two younger sisters, An\n",
340 |             "\n",
341 |             "== Early life ==\n",
342 |             "Leonhard Euler was born on 15 April 1707, in Basel to Paul III Euler, a pastor of the Reformed Church, and Marguerite (née Brucker), whose ancestors include a number of well-known scholars in the classics. He was the oldest of four children, having two younger sisters, An\n"
343 |           ]
344 |         }
345 |       ]
346 |     },
347 |     {
348 |       "cell_type": "markdown",
349 |       "source": [
350 |         "#### Building Knowledge Graph"
351 |       ],
352 |       "metadata": {
353 |         "id": "yLCySJqcxV3W"
354 |       }
355 |     },
356 |     {
357 |       "cell_type": "code",
358 |       "source": [
359 |         "# Necessary Libraries to setup the Neo4j DB QuestionAnswering Chain\n",
360 |         "from langchain.chat_models import ChatOpenAI\n",
361 |         "from langchain.chains import GraphCypherQAChain\n",
362 |         "from langchain.graphs import Neo4jGraph"
363 |       ],
364 |       "metadata": {
365 |         "id": "EOHIAZrLxh8N"
366 |       },
367 |       "execution_count": 34,
368 |       "outputs": []
369 |     },
370 |     {
371 |       "cell_type": "code",
372 |       "source": [
373 |         "# Create a Neo4jGraph object by connecting to a Neo4j database.\n",
374 |         "graph = Neo4jGraph(\n",
375 |         "    url=os.environ[\"NEO4J_URI\"],\n",
376 |         "    username=os.environ[\"NEO4J_USERNAME\"],\n",
377 |         "    password=os.environ[\"NEO4J_PASSWORD\"]\n",
378 |         ")"
379 |       ],
380 |       "metadata": {
381 |         "id": "N_sqke-SzfEQ"
382 |       },
383 |       "execution_count": 35,
384 |       "outputs": []
385 |     },
386 |     {
387 |       "cell_type": "code",
388 |       "source": [
389 |         "# Print the schema of the Neo4j graph.\n",
390 |         "print(graph.schema)"
391 |       ],
392 |       "metadata": {
393 |         "colab": {
394 |           "base_uri": "https://localhost:8080/"
395 |         },
396 |         "id": "oHFGTHCEz_UD",
397 |         "outputId": "eaf7ba8f-7188-4b5a-f4b3-f786f4a457ae"
398 |       },
399 |       "execution_count": 36,
400 |       "outputs": [
401 |         {
402 |           "output_type": "stream",
403 |           "name": "stdout",
404 |           "text": [
405 |             "\n",
406 |             "        Node properties are the following:\n",
407 |             "        [{'labels': 'Chunk', 'properties': [{'property': 'id', 'type': 'STRING'}, {'property': 'embedding', 'type': 'LIST'}, {'property': 'text', 'type': 'STRING'}]}]\n",
408 |             "        Relationship properties are the following:\n",
409 |             "        []\n",
410 |             "        The relationships are the following:\n",
411 |             "        []\n",
412 |             "        \n"
413 |           ]
414 |         }
415 |       ]
416 |     },
417 |     {
418 |       "cell_type": "code",
419 |       "source": [
420 |         "# Create a question-answering chain using GPT-3 and a Neo4j graph, with verbose mode enabled.\n",
421 |         "chain = GraphCypherQAChain.from_llm(\n",
422 |         "    ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
423 |         ")"
424 |       ],
425 |       "metadata": {
426 |         "id": "wN_9M9fi0OV8"
427 |       },
428 |       "execution_count": 37,
429 |       "outputs": []
430 |     },
431 |     {
432 |       "cell_type": "code",
433 |       "source": [
434 |         "# Use the question-answering chain to query the Neo4j graph.\n",
435 |         "graph_result = chain.run(\"Who were the siblings of Leonhard Euler?\")"
436 |       ],
437 |       "metadata": {
438 |         "colab": {
439 |           "base_uri": "https://localhost:8080/"
440 |         },
441 |         "id": "Iw0foDNr0c-v",
442 |         "outputId": "8a776932-7768-4191-d2a4-37c2163b9d2e"
443 |       },
444 |       "execution_count": 38,
445 |       "outputs": [
446 |         {
447 |           "output_type": "stream",
448 |           "name": "stdout",
449 |           "text": [
450 |             "\n",
451 |             "\n",
452 |             "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
453 |             "Generated Cypher:\n",
454 |             "\u001b[32;1m\u001b[1;3mMATCH (euler:Chunk {text: 'Leonhard Euler'})-[:SIBLING]->(sibling:Chunk)\n",
455 |             "RETURN sibling.text\u001b[0m\n",
456 |             "Full Context:\n",
457 |             "\u001b[32;1m\u001b[1;3m[]\u001b[0m\n",
458 |             "\n",
459 |             "\u001b[1m> Finished chain.\u001b[0m\n"
460 |           ]
461 |         }
462 |       ]
463 |     },
464 |     {
465 |       "cell_type": "code",
466 |       "source": [
467 |         "graph_result"
468 |       ],
469 |       "metadata": {
470 |         "colab": {
471 |           "base_uri": "https://localhost:8080/",
472 |           "height": 37
473 |         },
474 |         "id": "ljHsyK3z0pAf",
475 |         "outputId": "7d95b141-ce27-4404-9e09-1f12c07f3ab8"
476 |       },
477 |       "execution_count": 39,
478 |       "outputs": [
479 |         {
480 |           "output_type": "execute_result",
481 |           "data": {
482 |             "text/plain": [
483 |               "\"I'm sorry, but I don't have any information about the siblings of Leonhard Euler.\""
484 |             ],
485 |             "application/vnd.google.colaboratory.intrinsic+json": {
486 |               "type": "string"
487 |             }
488 |           },
489 |           "metadata": {},
490 |           "execution_count": 39
491 |         }
492 |       ]
493 |     },
494 |     {
495 |       "cell_type": "code",
496 |       "source": [],
497 |       "metadata": {
498 |         "id": "Ii1H4spv00ba"
499 |       },
500 |       "execution_count": null,
501 |       "outputs": []
502 |     }
503 |   ]
504 | }
505 | 


--------------------------------------------------------------------------------
/RAG_with_Graph_Database.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "nbformat": 4,
   3 |   "nbformat_minor": 0,
   4 |   "metadata": {
   5 |     "colab": {
   6 |       "provenance": [],
   7 |       "gpuType": "T4",
   8 |       "include_colab_link": true
   9 |     },
  10 |     "kernelspec": {
  11 |       "name": "python3",
  12 |       "display_name": "Python 3"
  13 |     },
  14 |     "language_info": {
  15 |       "name": "python"
  16 |     },
  17 |     "widgets": {
  18 |       "application/vnd.jupyter.widget-state+json": {
  19 |         "a5f9427673584f24b42922ebcf8714f4": {
  20 |           "model_module": "@jupyter-widgets/controls",
  21 |           "model_name": "VBoxModel",
  22 |           "model_module_version": "1.5.0",
  23 |           "state": {
  24 |             "_dom_classes": [],
  25 |             "_model_module": "@jupyter-widgets/controls",
  26 |             "_model_module_version": "1.5.0",
  27 |             "_model_name": "VBoxModel",
  28 |             "_view_count": null,
  29 |             "_view_module": "@jupyter-widgets/controls",
  30 |             "_view_module_version": "1.5.0",
  31 |             "_view_name": "VBoxView",
  32 |             "box_style": "",
  33 |             "children": [
  34 |               "IPY_MODEL_78051657194346bc99fb58409ed67870",
  35 |               "IPY_MODEL_f716d2a99aae4bf4b09349c5d7fc695a",
  36 |               "IPY_MODEL_2f2d4b6731c042cf8b1d64db40f6234a",
  37 |               "IPY_MODEL_1d686d4c60474afd907b722e20af1452",
  38 |               "IPY_MODEL_3ada03b7a08244a797a4178bbe935318"
  39 |             ],
  40 |             "layout": "IPY_MODEL_5b26c881dd42490eb144e255ae685fcd"
  41 |           }
  42 |         },
  43 |         "78051657194346bc99fb58409ed67870": {
  44 |           "model_module": "@jupyter-widgets/controls",
  45 |           "model_name": "HTMLModel",
  46 |           "model_module_version": "1.5.0",
  47 |           "state": {
  48 |             "_dom_classes": [],
  49 |             "_model_module": "@jupyter-widgets/controls",
  50 |             "_model_module_version": "1.5.0",
  51 |             "_model_name": "HTMLModel",
  52 |             "_view_count": null,
  53 |             "_view_module": "@jupyter-widgets/controls",
  54 |             "_view_module_version": "1.5.0",
  55 |             "_view_name": "HTMLView",
  56 |             "description": "",
  57 |             "description_tooltip": null,
  58 |             "layout": "IPY_MODEL_8b78231bf8a94374b6cf468e9fa85929",
  59 |             "placeholder": "​",
  60 |             "style": "IPY_MODEL_aecb1c13c7b4493ba613d690bdd2707a",
  61 |             "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
  62 |           }
  63 |         },
  64 |         "f716d2a99aae4bf4b09349c5d7fc695a": {
  65 |           "model_module": "@jupyter-widgets/controls",
  66 |           "model_name": "PasswordModel",
  67 |           "model_module_version": "1.5.0",
  68 |           "state": {
  69 |             "_dom_classes": [],
  70 |             "_model_module": "@jupyter-widgets/controls",
  71 |             "_model_module_version": "1.5.0",
  72 |             "_model_name": "PasswordModel",
  73 |             "_view_count": null,
  74 |             "_view_module": "@jupyter-widgets/controls",
  75 |             "_view_module_version": "1.5.0",
  76 |             "_view_name": "PasswordView",
  77 |             "continuous_update": true,
  78 |             "description": "Token:",
  79 |             "description_tooltip": null,
  80 |             "disabled": false,
  81 |             "layout": "IPY_MODEL_50b3cb1532554487b840a0a8539be7dc",
  82 |             "placeholder": "​",
  83 |             "style": "IPY_MODEL_0516db4733d84ca1a04be0d90f3cbfa9",
  84 |             "value": ""
  85 |           }
  86 |         },
  87 |         "2f2d4b6731c042cf8b1d64db40f6234a": {
  88 |           "model_module": "@jupyter-widgets/controls",
  89 |           "model_name": "CheckboxModel",
  90 |           "model_module_version": "1.5.0",
  91 |           "state": {
  92 |             "_dom_classes": [],
  93 |             "_model_module": "@jupyter-widgets/controls",
  94 |             "_model_module_version": "1.5.0",
  95 |             "_model_name": "CheckboxModel",
  96 |             "_view_count": null,
  97 |             "_view_module": "@jupyter-widgets/controls",
  98 |             "_view_module_version": "1.5.0",
  99 |             "_view_name": "CheckboxView",
 100 |             "description": "Add token as git credential?",
 101 |             "description_tooltip": null,
 102 |             "disabled": false,
 103 |             "indent": true,
 104 |             "layout": "IPY_MODEL_ef0ef461e11a4c9f9c7106709388c9ea",
 105 |             "style": "IPY_MODEL_bc0abd1fc56e4b418d551936ccbf724d",
 106 |             "value": true
 107 |           }
 108 |         },
 109 |         "1d686d4c60474afd907b722e20af1452": {
 110 |           "model_module": "@jupyter-widgets/controls",
 111 |           "model_name": "ButtonModel",
 112 |           "model_module_version": "1.5.0",
 113 |           "state": {
 114 |             "_dom_classes": [],
 115 |             "_model_module": "@jupyter-widgets/controls",
 116 |             "_model_module_version": "1.5.0",
 117 |             "_model_name": "ButtonModel",
 118 |             "_view_count": null,
 119 |             "_view_module": "@jupyter-widgets/controls",
 120 |             "_view_module_version": "1.5.0",
 121 |             "_view_name": "ButtonView",
 122 |             "button_style": "",
 123 |             "description": "Login",
 124 |             "disabled": false,
 125 |             "icon": "",
 126 |             "layout": "IPY_MODEL_febb82295cce487abe735f9ecb72796e",
 127 |             "style": "IPY_MODEL_2dd16270e324409aa8a5e5f8d9464665",
 128 |             "tooltip": ""
 129 |           }
 130 |         },
 131 |         "3ada03b7a08244a797a4178bbe935318": {
 132 |           "model_module": "@jupyter-widgets/controls",
 133 |           "model_name": "HTMLModel",
 134 |           "model_module_version": "1.5.0",
 135 |           "state": {
 136 |             "_dom_classes": [],
 137 |             "_model_module": "@jupyter-widgets/controls",
 138 |             "_model_module_version": "1.5.0",
 139 |             "_model_name": "HTMLModel",
 140 |             "_view_count": null,
 141 |             "_view_module": "@jupyter-widgets/controls",
 142 |             "_view_module_version": "1.5.0",
 143 |             "_view_name": "HTMLView",
 144 |             "description": "",
 145 |             "description_tooltip": null,
 146 |             "layout": "IPY_MODEL_202446995dd6467fab421b26fdd4189d",
 147 |             "placeholder": "​",
 148 |             "style": "IPY_MODEL_e771c4cb03be41b2bf7c0ee82e255d5a",
 149 |             "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
 150 |           }
 151 |         },
 152 |         "5b26c881dd42490eb144e255ae685fcd": {
 153 |           "model_module": "@jupyter-widgets/base",
 154 |           "model_name": "LayoutModel",
 155 |           "model_module_version": "1.2.0",
 156 |           "state": {
 157 |             "_model_module": "@jupyter-widgets/base",
 158 |             "_model_module_version": "1.2.0",
 159 |             "_model_name": "LayoutModel",
 160 |             "_view_count": null,
 161 |             "_view_module": "@jupyter-widgets/base",
 162 |             "_view_module_version": "1.2.0",
 163 |             "_view_name": "LayoutView",
 164 |             "align_content": null,
 165 |             "align_items": "center",
 166 |             "align_self": null,
 167 |             "border": null,
 168 |             "bottom": null,
 169 |             "display": "flex",
 170 |             "flex": null,
 171 |             "flex_flow": "column",
 172 |             "grid_area": null,
 173 |             "grid_auto_columns": null,
 174 |             "grid_auto_flow": null,
 175 |             "grid_auto_rows": null,
 176 |             "grid_column": null,
 177 |             "grid_gap": null,
 178 |             "grid_row": null,
 179 |             "grid_template_areas": null,
 180 |             "grid_template_columns": null,
 181 |             "grid_template_rows": null,
 182 |             "height": null,
 183 |             "justify_content": null,
 184 |             "justify_items": null,
 185 |             "left": null,
 186 |             "margin": null,
 187 |             "max_height": null,
 188 |             "max_width": null,
 189 |             "min_height": null,
 190 |             "min_width": null,
 191 |             "object_fit": null,
 192 |             "object_position": null,
 193 |             "order": null,
 194 |             "overflow": null,
 195 |             "overflow_x": null,
 196 |             "overflow_y": null,
 197 |             "padding": null,
 198 |             "right": null,
 199 |             "top": null,
 200 |             "visibility": null,
 201 |             "width": "50%"
 202 |           }
 203 |         },
 204 |         "8b78231bf8a94374b6cf468e9fa85929": {
 205 |           "model_module": "@jupyter-widgets/base",
 206 |           "model_name": "LayoutModel",
 207 |           "model_module_version": "1.2.0",
 208 |           "state": {
 209 |             "_model_module": "@jupyter-widgets/base",
 210 |             "_model_module_version": "1.2.0",
 211 |             "_model_name": "LayoutModel",
 212 |             "_view_count": null,
 213 |             "_view_module": "@jupyter-widgets/base",
 214 |             "_view_module_version": "1.2.0",
 215 |             "_view_name": "LayoutView",
 216 |             "align_content": null,
 217 |             "align_items": null,
 218 |             "align_self": null,
 219 |             "border": null,
 220 |             "bottom": null,
 221 |             "display": null,
 222 |             "flex": null,
 223 |             "flex_flow": null,
 224 |             "grid_area": null,
 225 |             "grid_auto_columns": null,
 226 |             "grid_auto_flow": null,
 227 |             "grid_auto_rows": null,
 228 |             "grid_column": null,
 229 |             "grid_gap": null,
 230 |             "grid_row": null,
 231 |             "grid_template_areas": null,
 232 |             "grid_template_columns": null,
 233 |             "grid_template_rows": null,
 234 |             "height": null,
 235 |             "justify_content": null,
 236 |             "justify_items": null,
 237 |             "left": null,
 238 |             "margin": null,
 239 |             "max_height": null,
 240 |             "max_width": null,
 241 |             "min_height": null,
 242 |             "min_width": null,
 243 |             "object_fit": null,
 244 |             "object_position": null,
 245 |             "order": null,
 246 |             "overflow": null,
 247 |             "overflow_x": null,
 248 |             "overflow_y": null,
 249 |             "padding": null,
 250 |             "right": null,
 251 |             "top": null,
 252 |             "visibility": null,
 253 |             "width": null
 254 |           }
 255 |         },
 256 |         "aecb1c13c7b4493ba613d690bdd2707a": {
 257 |           "model_module": "@jupyter-widgets/controls",
 258 |           "model_name": "DescriptionStyleModel",
 259 |           "model_module_version": "1.5.0",
 260 |           "state": {
 261 |             "_model_module": "@jupyter-widgets/controls",
 262 |             "_model_module_version": "1.5.0",
 263 |             "_model_name": "DescriptionStyleModel",
 264 |             "_view_count": null,
 265 |             "_view_module": "@jupyter-widgets/base",
 266 |             "_view_module_version": "1.2.0",
 267 |             "_view_name": "StyleView",
 268 |             "description_width": ""
 269 |           }
 270 |         },
 271 |         "50b3cb1532554487b840a0a8539be7dc": {
 272 |           "model_module": "@jupyter-widgets/base",
 273 |           "model_name": "LayoutModel",
 274 |           "model_module_version": "1.2.0",
 275 |           "state": {
 276 |             "_model_module": "@jupyter-widgets/base",
 277 |             "_model_module_version": "1.2.0",
 278 |             "_model_name": "LayoutModel",
 279 |             "_view_count": null,
 280 |             "_view_module": "@jupyter-widgets/base",
 281 |             "_view_module_version": "1.2.0",
 282 |             "_view_name": "LayoutView",
 283 |             "align_content": null,
 284 |             "align_items": null,
 285 |             "align_self": null,
 286 |             "border": null,
 287 |             "bottom": null,
 288 |             "display": null,
 289 |             "flex": null,
 290 |             "flex_flow": null,
 291 |             "grid_area": null,
 292 |             "grid_auto_columns": null,
 293 |             "grid_auto_flow": null,
 294 |             "grid_auto_rows": null,
 295 |             "grid_column": null,
 296 |             "grid_gap": null,
 297 |             "grid_row": null,
 298 |             "grid_template_areas": null,
 299 |             "grid_template_columns": null,
 300 |             "grid_template_rows": null,
 301 |             "height": null,
 302 |             "justify_content": null,
 303 |             "justify_items": null,
 304 |             "left": null,
 305 |             "margin": null,
 306 |             "max_height": null,
 307 |             "max_width": null,
 308 |             "min_height": null,
 309 |             "min_width": null,
 310 |             "object_fit": null,
 311 |             "object_position": null,
 312 |             "order": null,
 313 |             "overflow": null,
 314 |             "overflow_x": null,
 315 |             "overflow_y": null,
 316 |             "padding": null,
 317 |             "right": null,
 318 |             "top": null,
 319 |             "visibility": null,
 320 |             "width": null
 321 |           }
 322 |         },
 323 |         "0516db4733d84ca1a04be0d90f3cbfa9": {
 324 |           "model_module": "@jupyter-widgets/controls",
 325 |           "model_name": "DescriptionStyleModel",
 326 |           "model_module_version": "1.5.0",
 327 |           "state": {
 328 |             "_model_module": "@jupyter-widgets/controls",
 329 |             "_model_module_version": "1.5.0",
 330 |             "_model_name": "DescriptionStyleModel",
 331 |             "_view_count": null,
 332 |             "_view_module": "@jupyter-widgets/base",
 333 |             "_view_module_version": "1.2.0",
 334 |             "_view_name": "StyleView",
 335 |             "description_width": ""
 336 |           }
 337 |         },
 338 |         "ef0ef461e11a4c9f9c7106709388c9ea": {
 339 |           "model_module": "@jupyter-widgets/base",
 340 |           "model_name": "LayoutModel",
 341 |           "model_module_version": "1.2.0",
 342 |           "state": {
 343 |             "_model_module": "@jupyter-widgets/base",
 344 |             "_model_module_version": "1.2.0",
 345 |             "_model_name": "LayoutModel",
 346 |             "_view_count": null,
 347 |             "_view_module": "@jupyter-widgets/base",
 348 |             "_view_module_version": "1.2.0",
 349 |             "_view_name": "LayoutView",
 350 |             "align_content": null,
 351 |             "align_items": null,
 352 |             "align_self": null,
 353 |             "border": null,
 354 |             "bottom": null,
 355 |             "display": null,
 356 |             "flex": null,
 357 |             "flex_flow": null,
 358 |             "grid_area": null,
 359 |             "grid_auto_columns": null,
 360 |             "grid_auto_flow": null,
 361 |             "grid_auto_rows": null,
 362 |             "grid_column": null,
 363 |             "grid_gap": null,
 364 |             "grid_row": null,
 365 |             "grid_template_areas": null,
 366 |             "grid_template_columns": null,
 367 |             "grid_template_rows": null,
 368 |             "height": null,
 369 |             "justify_content": null,
 370 |             "justify_items": null,
 371 |             "left": null,
 372 |             "margin": null,
 373 |             "max_height": null,
 374 |             "max_width": null,
 375 |             "min_height": null,
 376 |             "min_width": null,
 377 |             "object_fit": null,
 378 |             "object_position": null,
 379 |             "order": null,
 380 |             "overflow": null,
 381 |             "overflow_x": null,
 382 |             "overflow_y": null,
 383 |             "padding": null,
 384 |             "right": null,
 385 |             "top": null,
 386 |             "visibility": null,
 387 |             "width": null
 388 |           }
 389 |         },
 390 |         "bc0abd1fc56e4b418d551936ccbf724d": {
 391 |           "model_module": "@jupyter-widgets/controls",
 392 |           "model_name": "DescriptionStyleModel",
 393 |           "model_module_version": "1.5.0",
 394 |           "state": {
 395 |             "_model_module": "@jupyter-widgets/controls",
 396 |             "_model_module_version": "1.5.0",
 397 |             "_model_name": "DescriptionStyleModel",
 398 |             "_view_count": null,
 399 |             "_view_module": "@jupyter-widgets/base",
 400 |             "_view_module_version": "1.2.0",
 401 |             "_view_name": "StyleView",
 402 |             "description_width": ""
 403 |           }
 404 |         },
 405 |         "febb82295cce487abe735f9ecb72796e": {
 406 |           "model_module": "@jupyter-widgets/base",
 407 |           "model_name": "LayoutModel",
 408 |           "model_module_version": "1.2.0",
 409 |           "state": {
 410 |             "_model_module": "@jupyter-widgets/base",
 411 |             "_model_module_version": "1.2.0",
 412 |             "_model_name": "LayoutModel",
 413 |             "_view_count": null,
 414 |             "_view_module": "@jupyter-widgets/base",
 415 |             "_view_module_version": "1.2.0",
 416 |             "_view_name": "LayoutView",
 417 |             "align_content": null,
 418 |             "align_items": null,
 419 |             "align_self": null,
 420 |             "border": null,
 421 |             "bottom": null,
 422 |             "display": null,
 423 |             "flex": null,
 424 |             "flex_flow": null,
 425 |             "grid_area": null,
 426 |             "grid_auto_columns": null,
 427 |             "grid_auto_flow": null,
 428 |             "grid_auto_rows": null,
 429 |             "grid_column": null,
 430 |             "grid_gap": null,
 431 |             "grid_row": null,
 432 |             "grid_template_areas": null,
 433 |             "grid_template_columns": null,
 434 |             "grid_template_rows": null,
 435 |             "height": null,
 436 |             "justify_content": null,
 437 |             "justify_items": null,
 438 |             "left": null,
 439 |             "margin": null,
 440 |             "max_height": null,
 441 |             "max_width": null,
 442 |             "min_height": null,
 443 |             "min_width": null,
 444 |             "object_fit": null,
 445 |             "object_position": null,
 446 |             "order": null,
 447 |             "overflow": null,
 448 |             "overflow_x": null,
 449 |             "overflow_y": null,
 450 |             "padding": null,
 451 |             "right": null,
 452 |             "top": null,
 453 |             "visibility": null,
 454 |             "width": null
 455 |           }
 456 |         },
 457 |         "2dd16270e324409aa8a5e5f8d9464665": {
 458 |           "model_module": "@jupyter-widgets/controls",
 459 |           "model_name": "ButtonStyleModel",
 460 |           "model_module_version": "1.5.0",
 461 |           "state": {
 462 |             "_model_module": "@jupyter-widgets/controls",
 463 |             "_model_module_version": "1.5.0",
 464 |             "_model_name": "ButtonStyleModel",
 465 |             "_view_count": null,
 466 |             "_view_module": "@jupyter-widgets/base",
 467 |             "_view_module_version": "1.2.0",
 468 |             "_view_name": "StyleView",
 469 |             "button_color": null,
 470 |             "font_weight": ""
 471 |           }
 472 |         },
 473 |         "202446995dd6467fab421b26fdd4189d": {
 474 |           "model_module": "@jupyter-widgets/base",
 475 |           "model_name": "LayoutModel",
 476 |           "model_module_version": "1.2.0",
 477 |           "state": {
 478 |             "_model_module": "@jupyter-widgets/base",
 479 |             "_model_module_version": "1.2.0",
 480 |             "_model_name": "LayoutModel",
 481 |             "_view_count": null,
 482 |             "_view_module": "@jupyter-widgets/base",
 483 |             "_view_module_version": "1.2.0",
 484 |             "_view_name": "LayoutView",
 485 |             "align_content": null,
 486 |             "align_items": null,
 487 |             "align_self": null,
 488 |             "border": null,
 489 |             "bottom": null,
 490 |             "display": null,
 491 |             "flex": null,
 492 |             "flex_flow": null,
 493 |             "grid_area": null,
 494 |             "grid_auto_columns": null,
 495 |             "grid_auto_flow": null,
 496 |             "grid_auto_rows": null,
 497 |             "grid_column": null,
 498 |             "grid_gap": null,
 499 |             "grid_row": null,
 500 |             "grid_template_areas": null,
 501 |             "grid_template_columns": null,
 502 |             "grid_template_rows": null,
 503 |             "height": null,
 504 |             "justify_content": null,
 505 |             "justify_items": null,
 506 |             "left": null,
 507 |             "margin": null,
 508 |             "max_height": null,
 509 |             "max_width": null,
 510 |             "min_height": null,
 511 |             "min_width": null,
 512 |             "object_fit": null,
 513 |             "object_position": null,
 514 |             "order": null,
 515 |             "overflow": null,
 516 |             "overflow_x": null,
 517 |             "overflow_y": null,
 518 |             "padding": null,
 519 |             "right": null,
 520 |             "top": null,
 521 |             "visibility": null,
 522 |             "width": null
 523 |           }
 524 |         },
 525 |         "e771c4cb03be41b2bf7c0ee82e255d5a": {
 526 |           "model_module": "@jupyter-widgets/controls",
 527 |           "model_name": "DescriptionStyleModel",
 528 |           "model_module_version": "1.5.0",
 529 |           "state": {
 530 |             "_model_module": "@jupyter-widgets/controls",
 531 |             "_model_module_version": "1.5.0",
 532 |             "_model_name": "DescriptionStyleModel",
 533 |             "_view_count": null,
 534 |             "_view_module": "@jupyter-widgets/base",
 535 |             "_view_module_version": "1.2.0",
 536 |             "_view_name": "StyleView",
 537 |             "description_width": ""
 538 |           }
 539 |         },
 540 |         "f73c306cce8d4be1aea3ad912e9ef5dc": {
 541 |           "model_module": "@jupyter-widgets/controls",
 542 |           "model_name": "HBoxModel",
 543 |           "model_module_version": "1.5.0",
 544 |           "state": {
 545 |             "_dom_classes": [],
 546 |             "_model_module": "@jupyter-widgets/controls",
 547 |             "_model_module_version": "1.5.0",
 548 |             "_model_name": "HBoxModel",
 549 |             "_view_count": null,
 550 |             "_view_module": "@jupyter-widgets/controls",
 551 |             "_view_module_version": "1.5.0",
 552 |             "_view_name": "HBoxView",
 553 |             "box_style": "",
 554 |             "children": [
 555 |               "IPY_MODEL_6f7ec6e81fbc4babab80f5b352caedc8",
 556 |               "IPY_MODEL_6d830be2c25a4e499ed8f57758638e23",
 557 |               "IPY_MODEL_50deede7561347e8af1e0b213d33c3de"
 558 |             ],
 559 |             "layout": "IPY_MODEL_55b60bfe36b14fc6837959b6b9f56fac"
 560 |           }
 561 |         },
 562 |         "6f7ec6e81fbc4babab80f5b352caedc8": {
 563 |           "model_module": "@jupyter-widgets/controls",
 564 |           "model_name": "HTMLModel",
 565 |           "model_module_version": "1.5.0",
 566 |           "state": {
 567 |             "_dom_classes": [],
 568 |             "_model_module": "@jupyter-widgets/controls",
 569 |             "_model_module_version": "1.5.0",
 570 |             "_model_name": "HTMLModel",
 571 |             "_view_count": null,
 572 |             "_view_module": "@jupyter-widgets/controls",
 573 |             "_view_module_version": "1.5.0",
 574 |             "_view_name": "HTMLView",
 575 |             "description": "",
 576 |             "description_tooltip": null,
 577 |             "layout": "IPY_MODEL_24db6f406a024e6f8c78ef83752fe336",
 578 |             "placeholder": "​",
 579 |             "style": "IPY_MODEL_42e137910a8b45859ae3517d5bd0cb17",
 580 |             "value": "Loading checkpoint shards: 100%"
 581 |           }
 582 |         },
 583 |         "6d830be2c25a4e499ed8f57758638e23": {
 584 |           "model_module": "@jupyter-widgets/controls",
 585 |           "model_name": "FloatProgressModel",
 586 |           "model_module_version": "1.5.0",
 587 |           "state": {
 588 |             "_dom_classes": [],
 589 |             "_model_module": "@jupyter-widgets/controls",
 590 |             "_model_module_version": "1.5.0",
 591 |             "_model_name": "FloatProgressModel",
 592 |             "_view_count": null,
 593 |             "_view_module": "@jupyter-widgets/controls",
 594 |             "_view_module_version": "1.5.0",
 595 |             "_view_name": "ProgressView",
 596 |             "bar_style": "success",
 597 |             "description": "",
 598 |             "description_tooltip": null,
 599 |             "layout": "IPY_MODEL_fc1fe6d6b77145828e33cc228c694082",
 600 |             "max": 4,
 601 |             "min": 0,
 602 |             "orientation": "horizontal",
 603 |             "style": "IPY_MODEL_93c80c4fcf01411e8f70d4c7daf23423",
 604 |             "value": 4
 605 |           }
 606 |         },
 607 |         "50deede7561347e8af1e0b213d33c3de": {
 608 |           "model_module": "@jupyter-widgets/controls",
 609 |           "model_name": "HTMLModel",
 610 |           "model_module_version": "1.5.0",
 611 |           "state": {
 612 |             "_dom_classes": [],
 613 |             "_model_module": "@jupyter-widgets/controls",
 614 |             "_model_module_version": "1.5.0",
 615 |             "_model_name": "HTMLModel",
 616 |             "_view_count": null,
 617 |             "_view_module": "@jupyter-widgets/controls",
 618 |             "_view_module_version": "1.5.0",
 619 |             "_view_name": "HTMLView",
 620 |             "description": "",
 621 |             "description_tooltip": null,
 622 |             "layout": "IPY_MODEL_52d69bc57f3c4299b9aad7c536735f99",
 623 |             "placeholder": "​",
 624 |             "style": "IPY_MODEL_78de396d6b2d4ce384faf393b84b73a2",
 625 |             "value": " 4/4 [01:25&lt;00:00, 18.80s/it]"
 626 |           }
 627 |         },
 628 |         "55b60bfe36b14fc6837959b6b9f56fac": {
 629 |           "model_module": "@jupyter-widgets/base",
 630 |           "model_name": "LayoutModel",
 631 |           "model_module_version": "1.2.0",
 632 |           "state": {
 633 |             "_model_module": "@jupyter-widgets/base",
 634 |             "_model_module_version": "1.2.0",
 635 |             "_model_name": "LayoutModel",
 636 |             "_view_count": null,
 637 |             "_view_module": "@jupyter-widgets/base",
 638 |             "_view_module_version": "1.2.0",
 639 |             "_view_name": "LayoutView",
 640 |             "align_content": null,
 641 |             "align_items": null,
 642 |             "align_self": null,
 643 |             "border": null,
 644 |             "bottom": null,
 645 |             "display": null,
 646 |             "flex": null,
 647 |             "flex_flow": null,
 648 |             "grid_area": null,
 649 |             "grid_auto_columns": null,
 650 |             "grid_auto_flow": null,
 651 |             "grid_auto_rows": null,
 652 |             "grid_column": null,
 653 |             "grid_gap": null,
 654 |             "grid_row": null,
 655 |             "grid_template_areas": null,
 656 |             "grid_template_columns": null,
 657 |             "grid_template_rows": null,
 658 |             "height": null,
 659 |             "justify_content": null,
 660 |             "justify_items": null,
 661 |             "left": null,
 662 |             "margin": null,
 663 |             "max_height": null,
 664 |             "max_width": null,
 665 |             "min_height": null,
 666 |             "min_width": null,
 667 |             "object_fit": null,
 668 |             "object_position": null,
 669 |             "order": null,
 670 |             "overflow": null,
 671 |             "overflow_x": null,
 672 |             "overflow_y": null,
 673 |             "padding": null,
 674 |             "right": null,
 675 |             "top": null,
 676 |             "visibility": null,
 677 |             "width": null
 678 |           }
 679 |         },
 680 |         "24db6f406a024e6f8c78ef83752fe336": {
 681 |           "model_module": "@jupyter-widgets/base",
 682 |           "model_name": "LayoutModel",
 683 |           "model_module_version": "1.2.0",
 684 |           "state": {
 685 |             "_model_module": "@jupyter-widgets/base",
 686 |             "_model_module_version": "1.2.0",
 687 |             "_model_name": "LayoutModel",
 688 |             "_view_count": null,
 689 |             "_view_module": "@jupyter-widgets/base",
 690 |             "_view_module_version": "1.2.0",
 691 |             "_view_name": "LayoutView",
 692 |             "align_content": null,
 693 |             "align_items": null,
 694 |             "align_self": null,
 695 |             "border": null,
 696 |             "bottom": null,
 697 |             "display": null,
 698 |             "flex": null,
 699 |             "flex_flow": null,
 700 |             "grid_area": null,
 701 |             "grid_auto_columns": null,
 702 |             "grid_auto_flow": null,
 703 |             "grid_auto_rows": null,
 704 |             "grid_column": null,
 705 |             "grid_gap": null,
 706 |             "grid_row": null,
 707 |             "grid_template_areas": null,
 708 |             "grid_template_columns": null,
 709 |             "grid_template_rows": null,
 710 |             "height": null,
 711 |             "justify_content": null,
 712 |             "justify_items": null,
 713 |             "left": null,
 714 |             "margin": null,
 715 |             "max_height": null,
 716 |             "max_width": null,
 717 |             "min_height": null,
 718 |             "min_width": null,
 719 |             "object_fit": null,
 720 |             "object_position": null,
 721 |             "order": null,
 722 |             "overflow": null,
 723 |             "overflow_x": null,
 724 |             "overflow_y": null,
 725 |             "padding": null,
 726 |             "right": null,
 727 |             "top": null,
 728 |             "visibility": null,
 729 |             "width": null
 730 |           }
 731 |         },
 732 |         "42e137910a8b45859ae3517d5bd0cb17": {
 733 |           "model_module": "@jupyter-widgets/controls",
 734 |           "model_name": "DescriptionStyleModel",
 735 |           "model_module_version": "1.5.0",
 736 |           "state": {
 737 |             "_model_module": "@jupyter-widgets/controls",
 738 |             "_model_module_version": "1.5.0",
 739 |             "_model_name": "DescriptionStyleModel",
 740 |             "_view_count": null,
 741 |             "_view_module": "@jupyter-widgets/base",
 742 |             "_view_module_version": "1.2.0",
 743 |             "_view_name": "StyleView",
 744 |             "description_width": ""
 745 |           }
 746 |         },
 747 |         "fc1fe6d6b77145828e33cc228c694082": {
 748 |           "model_module": "@jupyter-widgets/base",
 749 |           "model_name": "LayoutModel",
 750 |           "model_module_version": "1.2.0",
 751 |           "state": {
 752 |             "_model_module": "@jupyter-widgets/base",
 753 |             "_model_module_version": "1.2.0",
 754 |             "_model_name": "LayoutModel",
 755 |             "_view_count": null,
 756 |             "_view_module": "@jupyter-widgets/base",
 757 |             "_view_module_version": "1.2.0",
 758 |             "_view_name": "LayoutView",
 759 |             "align_content": null,
 760 |             "align_items": null,
 761 |             "align_self": null,
 762 |             "border": null,
 763 |             "bottom": null,
 764 |             "display": null,
 765 |             "flex": null,
 766 |             "flex_flow": null,
 767 |             "grid_area": null,
 768 |             "grid_auto_columns": null,
 769 |             "grid_auto_flow": null,
 770 |             "grid_auto_rows": null,
 771 |             "grid_column": null,
 772 |             "grid_gap": null,
 773 |             "grid_row": null,
 774 |             "grid_template_areas": null,
 775 |             "grid_template_columns": null,
 776 |             "grid_template_rows": null,
 777 |             "height": null,
 778 |             "justify_content": null,
 779 |             "justify_items": null,
 780 |             "left": null,
 781 |             "margin": null,
 782 |             "max_height": null,
 783 |             "max_width": null,
 784 |             "min_height": null,
 785 |             "min_width": null,
 786 |             "object_fit": null,
 787 |             "object_position": null,
 788 |             "order": null,
 789 |             "overflow": null,
 790 |             "overflow_x": null,
 791 |             "overflow_y": null,
 792 |             "padding": null,
 793 |             "right": null,
 794 |             "top": null,
 795 |             "visibility": null,
 796 |             "width": null
 797 |           }
 798 |         },
 799 |         "93c80c4fcf01411e8f70d4c7daf23423": {
 800 |           "model_module": "@jupyter-widgets/controls",
 801 |           "model_name": "ProgressStyleModel",
 802 |           "model_module_version": "1.5.0",
 803 |           "state": {
 804 |             "_model_module": "@jupyter-widgets/controls",
 805 |             "_model_module_version": "1.5.0",
 806 |             "_model_name": "ProgressStyleModel",
 807 |             "_view_count": null,
 808 |             "_view_module": "@jupyter-widgets/base",
 809 |             "_view_module_version": "1.2.0",
 810 |             "_view_name": "StyleView",
 811 |             "bar_color": null,
 812 |             "description_width": ""
 813 |           }
 814 |         },
 815 |         "52d69bc57f3c4299b9aad7c536735f99": {
 816 |           "model_module": "@jupyter-widgets/base",
 817 |           "model_name": "LayoutModel",
 818 |           "model_module_version": "1.2.0",
 819 |           "state": {
 820 |             "_model_module": "@jupyter-widgets/base",
 821 |             "_model_module_version": "1.2.0",
 822 |             "_model_name": "LayoutModel",
 823 |             "_view_count": null,
 824 |             "_view_module": "@jupyter-widgets/base",
 825 |             "_view_module_version": "1.2.0",
 826 |             "_view_name": "LayoutView",
 827 |             "align_content": null,
 828 |             "align_items": null,
 829 |             "align_self": null,
 830 |             "border": null,
 831 |             "bottom": null,
 832 |             "display": null,
 833 |             "flex": null,
 834 |             "flex_flow": null,
 835 |             "grid_area": null,
 836 |             "grid_auto_columns": null,
 837 |             "grid_auto_flow": null,
 838 |             "grid_auto_rows": null,
 839 |             "grid_column": null,
 840 |             "grid_gap": null,
 841 |             "grid_row": null,
 842 |             "grid_template_areas": null,
 843 |             "grid_template_columns": null,
 844 |             "grid_template_rows": null,
 845 |             "height": null,
 846 |             "justify_content": null,
 847 |             "justify_items": null,
 848 |             "left": null,
 849 |             "margin": null,
 850 |             "max_height": null,
 851 |             "max_width": null,
 852 |             "min_height": null,
 853 |             "min_width": null,
 854 |             "object_fit": null,
 855 |             "object_position": null,
 856 |             "order": null,
 857 |             "overflow": null,
 858 |             "overflow_x": null,
 859 |             "overflow_y": null,
 860 |             "padding": null,
 861 |             "right": null,
 862 |             "top": null,
 863 |             "visibility": null,
 864 |             "width": null
 865 |           }
 866 |         },
 867 |         "78de396d6b2d4ce384faf393b84b73a2": {
 868 |           "model_module": "@jupyter-widgets/controls",
 869 |           "model_name": "DescriptionStyleModel",
 870 |           "model_module_version": "1.5.0",
 871 |           "state": {
 872 |             "_model_module": "@jupyter-widgets/controls",
 873 |             "_model_module_version": "1.5.0",
 874 |             "_model_name": "DescriptionStyleModel",
 875 |             "_view_count": null,
 876 |             "_view_module": "@jupyter-widgets/base",
 877 |             "_view_module_version": "1.2.0",
 878 |             "_view_name": "StyleView",
 879 |             "description_width": ""
 880 |           }
 881 |         },
 882 |         "88f08961299f438ea00dd88d61ec99d2": {
 883 |           "model_module": "@jupyter-widgets/controls",
 884 |           "model_name": "HBoxModel",
 885 |           "model_module_version": "1.5.0",
 886 |           "state": {
 887 |             "_dom_classes": [],
 888 |             "_model_module": "@jupyter-widgets/controls",
 889 |             "_model_module_version": "1.5.0",
 890 |             "_model_name": "HBoxModel",
 891 |             "_view_count": null,
 892 |             "_view_module": "@jupyter-widgets/controls",
 893 |             "_view_module_version": "1.5.0",
 894 |             "_view_name": "HBoxView",
 895 |             "box_style": "",
 896 |             "children": [
 897 |               "IPY_MODEL_4ab68fde0d5845e498b23536ee61e828",
 898 |               "IPY_MODEL_476045c378f942a6b3066e6ffd8e93b6",
 899 |               "IPY_MODEL_31faf6bbc65340f6ae0bae2b20f825f0"
 900 |             ],
 901 |             "layout": "IPY_MODEL_d7a88f0f41e64fbb9eb04c208276d5e9"
 902 |           }
 903 |         },
 904 |         "4ab68fde0d5845e498b23536ee61e828": {
 905 |           "model_module": "@jupyter-widgets/controls",
 906 |           "model_name": "HTMLModel",
 907 |           "model_module_version": "1.5.0",
 908 |           "state": {
 909 |             "_dom_classes": [],
 910 |             "_model_module": "@jupyter-widgets/controls",
 911 |             "_model_module_version": "1.5.0",
 912 |             "_model_name": "HTMLModel",
 913 |             "_view_count": null,
 914 |             "_view_module": "@jupyter-widgets/controls",
 915 |             "_view_module_version": "1.5.0",
 916 |             "_view_name": "HTMLView",
 917 |             "description": "",
 918 |             "description_tooltip": null,
 919 |             "layout": "IPY_MODEL_26d4da72a16b4381aef9244f59c536b7",
 920 |             "placeholder": "​",
 921 |             "style": "IPY_MODEL_d97e2daa67f8430fbe62fd64cf753f73",
 922 |             "value": "generation_config.json: 100%"
 923 |           }
 924 |         },
 925 |         "476045c378f942a6b3066e6ffd8e93b6": {
 926 |           "model_module": "@jupyter-widgets/controls",
 927 |           "model_name": "FloatProgressModel",
 928 |           "model_module_version": "1.5.0",
 929 |           "state": {
 930 |             "_dom_classes": [],
 931 |             "_model_module": "@jupyter-widgets/controls",
 932 |             "_model_module_version": "1.5.0",
 933 |             "_model_name": "FloatProgressModel",
 934 |             "_view_count": null,
 935 |             "_view_module": "@jupyter-widgets/controls",
 936 |             "_view_module_version": "1.5.0",
 937 |             "_view_name": "ProgressView",
 938 |             "bar_style": "success",
 939 |             "description": "",
 940 |             "description_tooltip": null,
 941 |             "layout": "IPY_MODEL_9ba5c9837ffa48a1a98375cf8f7ea58c",
 942 |             "max": 137,
 943 |             "min": 0,
 944 |             "orientation": "horizontal",
 945 |             "style": "IPY_MODEL_7b0b028a1df640d49ec7b6a3ea8ed5cd",
 946 |             "value": 137
 947 |           }
 948 |         },
 949 |         "31faf6bbc65340f6ae0bae2b20f825f0": {
 950 |           "model_module": "@jupyter-widgets/controls",
 951 |           "model_name": "HTMLModel",
 952 |           "model_module_version": "1.5.0",
 953 |           "state": {
 954 |             "_dom_classes": [],
 955 |             "_model_module": "@jupyter-widgets/controls",
 956 |             "_model_module_version": "1.5.0",
 957 |             "_model_name": "HTMLModel",
 958 |             "_view_count": null,
 959 |             "_view_module": "@jupyter-widgets/controls",
 960 |             "_view_module_version": "1.5.0",
 961 |             "_view_name": "HTMLView",
 962 |             "description": "",
 963 |             "description_tooltip": null,
 964 |             "layout": "IPY_MODEL_694328c260294586bf00c5f8904b26a2",
 965 |             "placeholder": "​",
 966 |             "style": "IPY_MODEL_e444efde30f146efba56fa1d0bcee1fd",
 967 |             "value": " 137/137 [00:00&lt;00:00, 9.27kB/s]"
 968 |           }
 969 |         },
 970 |         "d7a88f0f41e64fbb9eb04c208276d5e9": {
 971 |           "model_module": "@jupyter-widgets/base",
 972 |           "model_name": "LayoutModel",
 973 |           "model_module_version": "1.2.0",
 974 |           "state": {
 975 |             "_model_module": "@jupyter-widgets/base",
 976 |             "_model_module_version": "1.2.0",
 977 |             "_model_name": "LayoutModel",
 978 |             "_view_count": null,
 979 |             "_view_module": "@jupyter-widgets/base",
 980 |             "_view_module_version": "1.2.0",
 981 |             "_view_name": "LayoutView",
 982 |             "align_content": null,
 983 |             "align_items": null,
 984 |             "align_self": null,
 985 |             "border": null,
 986 |             "bottom": null,
 987 |             "display": null,
 988 |             "flex": null,
 989 |             "flex_flow": null,
 990 |             "grid_area": null,
 991 |             "grid_auto_columns": null,
 992 |             "grid_auto_flow": null,
 993 |             "grid_auto_rows": null,
 994 |             "grid_column": null,
 995 |             "grid_gap": null,
 996 |             "grid_row": null,
 997 |             "grid_template_areas": null,
 998 |             "grid_template_columns": null,
 999 |             "grid_template_rows": null,
1000 |             "height": null,
1001 |             "justify_content": null,
1002 |             "justify_items": null,
1003 |             "left": null,
1004 |             "margin": null,
1005 |             "max_height": null,
1006 |             "max_width": null,
1007 |             "min_height": null,
1008 |             "min_width": null,
1009 |             "object_fit": null,
1010 |             "object_position": null,
1011 |             "order": null,
1012 |             "overflow": null,
1013 |             "overflow_x": null,
1014 |             "overflow_y": null,
1015 |             "padding": null,
1016 |             "right": null,
1017 |             "top": null,
1018 |             "visibility": null,
1019 |             "width": null
1020 |           }
1021 |         },
1022 |         "26d4da72a16b4381aef9244f59c536b7": {
1023 |           "model_module": "@jupyter-widgets/base",
1024 |           "model_name": "LayoutModel",
1025 |           "model_module_version": "1.2.0",
1026 |           "state": {
1027 |             "_model_module": "@jupyter-widgets/base",
1028 |             "_model_module_version": "1.2.0",
1029 |             "_model_name": "LayoutModel",
1030 |             "_view_count": null,
1031 |             "_view_module": "@jupyter-widgets/base",
1032 |             "_view_module_version": "1.2.0",
1033 |             "_view_name": "LayoutView",
1034 |             "align_content": null,
1035 |             "align_items": null,
1036 |             "align_self": null,
1037 |             "border": null,
1038 |             "bottom": null,
1039 |             "display": null,
1040 |             "flex": null,
1041 |             "flex_flow": null,
1042 |             "grid_area": null,
1043 |             "grid_auto_columns": null,
1044 |             "grid_auto_flow": null,
1045 |             "grid_auto_rows": null,
1046 |             "grid_column": null,
1047 |             "grid_gap": null,
1048 |             "grid_row": null,
1049 |             "grid_template_areas": null,
1050 |             "grid_template_columns": null,
1051 |             "grid_template_rows": null,
1052 |             "height": null,
1053 |             "justify_content": null,
1054 |             "justify_items": null,
1055 |             "left": null,
1056 |             "margin": null,
1057 |             "max_height": null,
1058 |             "max_width": null,
1059 |             "min_height": null,
1060 |             "min_width": null,
1061 |             "object_fit": null,
1062 |             "object_position": null,
1063 |             "order": null,
1064 |             "overflow": null,
1065 |             "overflow_x": null,
1066 |             "overflow_y": null,
1067 |             "padding": null,
1068 |             "right": null,
1069 |             "top": null,
1070 |             "visibility": null,
1071 |             "width": null
1072 |           }
1073 |         },
1074 |         "d97e2daa67f8430fbe62fd64cf753f73": {
1075 |           "model_module": "@jupyter-widgets/controls",
1076 |           "model_name": "DescriptionStyleModel",
1077 |           "model_module_version": "1.5.0",
1078 |           "state": {
1079 |             "_model_module": "@jupyter-widgets/controls",
1080 |             "_model_module_version": "1.5.0",
1081 |             "_model_name": "DescriptionStyleModel",
1082 |             "_view_count": null,
1083 |             "_view_module": "@jupyter-widgets/base",
1084 |             "_view_module_version": "1.2.0",
1085 |             "_view_name": "StyleView",
1086 |             "description_width": ""
1087 |           }
1088 |         },
1089 |         "9ba5c9837ffa48a1a98375cf8f7ea58c": {
1090 |           "model_module": "@jupyter-widgets/base",
1091 |           "model_name": "LayoutModel",
1092 |           "model_module_version": "1.2.0",
1093 |           "state": {
1094 |             "_model_module": "@jupyter-widgets/base",
1095 |             "_model_module_version": "1.2.0",
1096 |             "_model_name": "LayoutModel",
1097 |             "_view_count": null,
1098 |             "_view_module": "@jupyter-widgets/base",
1099 |             "_view_module_version": "1.2.0",
1100 |             "_view_name": "LayoutView",
1101 |             "align_content": null,
1102 |             "align_items": null,
1103 |             "align_self": null,
1104 |             "border": null,
1105 |             "bottom": null,
1106 |             "display": null,
1107 |             "flex": null,
1108 |             "flex_flow": null,
1109 |             "grid_area": null,
1110 |             "grid_auto_columns": null,
1111 |             "grid_auto_flow": null,
1112 |             "grid_auto_rows": null,
1113 |             "grid_column": null,
1114 |             "grid_gap": null,
1115 |             "grid_row": null,
1116 |             "grid_template_areas": null,
1117 |             "grid_template_columns": null,
1118 |             "grid_template_rows": null,
1119 |             "height": null,
1120 |             "justify_content": null,
1121 |             "justify_items": null,
1122 |             "left": null,
1123 |             "margin": null,
1124 |             "max_height": null,
1125 |             "max_width": null,
1126 |             "min_height": null,
1127 |             "min_width": null,
1128 |             "object_fit": null,
1129 |             "object_position": null,
1130 |             "order": null,
1131 |             "overflow": null,
1132 |             "overflow_x": null,
1133 |             "overflow_y": null,
1134 |             "padding": null,
1135 |             "right": null,
1136 |             "top": null,
1137 |             "visibility": null,
1138 |             "width": null
1139 |           }
1140 |         },
1141 |         "7b0b028a1df640d49ec7b6a3ea8ed5cd": {
1142 |           "model_module": "@jupyter-widgets/controls",
1143 |           "model_name": "ProgressStyleModel",
1144 |           "model_module_version": "1.5.0",
1145 |           "state": {
1146 |             "_model_module": "@jupyter-widgets/controls",
1147 |             "_model_module_version": "1.5.0",
1148 |             "_model_name": "ProgressStyleModel",
1149 |             "_view_count": null,
1150 |             "_view_module": "@jupyter-widgets/base",
1151 |             "_view_module_version": "1.2.0",
1152 |             "_view_name": "StyleView",
1153 |             "bar_color": null,
1154 |             "description_width": ""
1155 |           }
1156 |         },
1157 |         "694328c260294586bf00c5f8904b26a2": {
1158 |           "model_module": "@jupyter-widgets/base",
1159 |           "model_name": "LayoutModel",
1160 |           "model_module_version": "1.2.0",
1161 |           "state": {
1162 |             "_model_module": "@jupyter-widgets/base",
1163 |             "_model_module_version": "1.2.0",
1164 |             "_model_name": "LayoutModel",
1165 |             "_view_count": null,
1166 |             "_view_module": "@jupyter-widgets/base",
1167 |             "_view_module_version": "1.2.0",
1168 |             "_view_name": "LayoutView",
1169 |             "align_content": null,
1170 |             "align_items": null,
1171 |             "align_self": null,
1172 |             "border": null,
1173 |             "bottom": null,
1174 |             "display": null,
1175 |             "flex": null,
1176 |             "flex_flow": null,
1177 |             "grid_area": null,
1178 |             "grid_auto_columns": null,
1179 |             "grid_auto_flow": null,
1180 |             "grid_auto_rows": null,
1181 |             "grid_column": null,
1182 |             "grid_gap": null,
1183 |             "grid_row": null,
1184 |             "grid_template_areas": null,
1185 |             "grid_template_columns": null,
1186 |             "grid_template_rows": null,
1187 |             "height": null,
1188 |             "justify_content": null,
1189 |             "justify_items": null,
1190 |             "left": null,
1191 |             "margin": null,
1192 |             "max_height": null,
1193 |             "max_width": null,
1194 |             "min_height": null,
1195 |             "min_width": null,
1196 |             "object_fit": null,
1197 |             "object_position": null,
1198 |             "order": null,
1199 |             "overflow": null,
1200 |             "overflow_x": null,
1201 |             "overflow_y": null,
1202 |             "padding": null,
1203 |             "right": null,
1204 |             "top": null,
1205 |             "visibility": null,
1206 |             "width": null
1207 |           }
1208 |         },
1209 |         "e444efde30f146efba56fa1d0bcee1fd": {
1210 |           "model_module": "@jupyter-widgets/controls",
1211 |           "model_name": "DescriptionStyleModel",
1212 |           "model_module_version": "1.5.0",
1213 |           "state": {
1214 |             "_model_module": "@jupyter-widgets/controls",
1215 |             "_model_module_version": "1.5.0",
1216 |             "_model_name": "DescriptionStyleModel",
1217 |             "_view_count": null,
1218 |             "_view_module": "@jupyter-widgets/base",
1219 |             "_view_module_version": "1.2.0",
1220 |             "_view_name": "StyleView",
1221 |             "description_width": ""
1222 |           }
1223 |         }
1224 |       }
1225 |     },
1226 |     "accelerator": "GPU"
1227 |   },
1228 |   "cells": [
1229 |     {
1230 |       "cell_type": "markdown",
1231 |       "metadata": {
1232 |         "id": "view-in-github",
1233 |         "colab_type": "text"
1234 |       },
1235 |       "source": [
1236 |         "<a href=\"https://colab.research.google.com/github/KaifAhmad1/RAG-with-KnowledgeGraph/blob/main/RAG_with_Graph_Database.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
1237 |       ]
1238 |     },
1239 |     {
1240 |       "cell_type": "markdown",
1241 |       "source": [
1242 |         "**Installing Dependencies:**"
1243 |       ],
1244 |       "metadata": {
1245 |         "id": "98YkxTMk8MkF"
1246 |       }
1247 |     },
1248 |     {
1249 |       "cell_type": "code",
1250 |       "execution_count": 13,
1251 |       "metadata": {
1252 |         "id": "Pw24LkQ27bML",
1253 |         "outputId": "a1444f5f-0dd0-4783-b8a0-f6b96d8135d1",
1254 |         "colab": {
1255 |           "base_uri": "https://localhost:8080/"
1256 |         }
1257 |       },
1258 |       "outputs": [
1259 |         {
1260 |           "output_type": "stream",
1261 |           "name": "stdout",
1262 |           "text": [
1263 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
1264 |             "\u001b[?25hRequirement already satisfied: flash-attn in /usr/local/lib/python3.10/dist-packages (2.5.5)\n",
1265 |             "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from flash-attn) (2.1.0)\n",
1266 |             "Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from flash-attn) (0.7.0)\n",
1267 |             "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from flash-attn) (23.2)\n",
1268 |             "Requirement already satisfied: ninja in /usr/local/lib/python3.10/dist-packages (from flash-attn) (1.11.1.1)\n",
1269 |             "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.13.1)\n",
1270 |             "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (4.9.0)\n",
1271 |             "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (1.12)\n",
1272 |             "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.2.1)\n",
1273 |             "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.1.3)\n",
1274 |             "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2023.6.0)\n",
1275 |             "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
1276 |             "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
1277 |             "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
1278 |             "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (8.9.2.26)\n",
1279 |             "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.3.1)\n",
1280 |             "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (11.0.2.54)\n",
1281 |             "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (10.3.2.106)\n",
1282 |             "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (11.4.5.107)\n",
1283 |             "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.0.106)\n",
1284 |             "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.18.1)\n",
1285 |             "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
1286 |             "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.1.0)\n",
1287 |             "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->flash-attn) (12.3.101)\n",
1288 |             "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->flash-attn) (2.1.5)\n",
1289 |             "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->flash-attn) (1.3.0)\n"
1290 |           ]
1291 |         }
1292 |       ],
1293 |       "source": [
1294 |         "!pip install -qU transformers\n",
1295 |         "!pip install -qU langchain\n",
1296 |         "!pip install -qU huggingface_hub\n",
1297 |         "!pip install -qU tiktoken\n",
1298 |         "!pip install -qU neo4j\n",
1299 |         "!pip install -qU python-dotenv\n",
1300 |         "!pip install -qU accelerate\n",
1301 |         "!pip install -qU sentence_transformers\n",
1302 |         "!pip install -qU  bitsandbytes\n",
1303 |         "!pip install -qU  optimum\n",
1304 |         "!pip install -qU unstructured unstructured[pdf]\n",
1305 |         "!pip install flash-attn --no-build-isolation"
1306 |       ]
1307 |     },
1308 |     {
1309 |       "cell_type": "code",
1310 |       "source": [
1311 |         "import os\n",
1312 |         "import re\n",
1313 |         "from langchain.vectorstores.neo4j_vector import Neo4jVector\n",
1314 |         "from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter\n",
1315 |         "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
1316 |         "from dotenv import load_dotenv"
1317 |       ],
1318 |       "metadata": {
1319 |         "id": "wu99tQMp8cYs"
1320 |       },
1321 |       "execution_count": 14,
1322 |       "outputs": []
1323 |     },
1324 |     {
1325 |       "cell_type": "markdown",
1326 |       "source": [
1327 |         "**Setting API in Environment Variable:**"
1328 |       ],
1329 |       "metadata": {
1330 |         "id": "ec1_j3db9-On"
1331 |       }
1332 |     },
1333 |     {
1334 |       "cell_type": "code",
1335 |       "source": [
1336 |         "from google.colab import drive\n",
1337 |         "from huggingface_hub import notebook_login\n",
1338 |         "notebook_login()\n",
1339 |         "load_dotenv()\n",
1340 |         "os.environ[\"NEO4J_URI\"] = 'neo4j+s://d5dffe81.databases.neo4j.io'\n",
1341 |         "os.environ[\"NEO4J_USERNAME\"] = 'neo4j'\n",
1342 |         "os.environ[\"NEO4J_PASSWORD\"] = 'C8A_mt9s8yar3i44Xi1bVbkrFVK3aCXE1w5cQvHv6LM'\n",
1343 |         "os.environ['NEO4J_URL'] = \"bolt://server_ip:7687\"\n",
1344 |         "drive.mount('/content/drive')"
1345 |       ],
1346 |       "metadata": {
1347 |         "id": "TV8qynpS8ndS",
1348 |         "outputId": "ce43227b-a99f-4863-98a7-c4ac47624dd1",
1349 |         "colab": {
1350 |           "base_uri": "https://localhost:8080/",
1351 |           "height": 348,
1352 |           "referenced_widgets": [
1353 |             "a5f9427673584f24b42922ebcf8714f4",
1354 |             "78051657194346bc99fb58409ed67870",
1355 |             "f716d2a99aae4bf4b09349c5d7fc695a",
1356 |             "2f2d4b6731c042cf8b1d64db40f6234a",
1357 |             "1d686d4c60474afd907b722e20af1452",
1358 |             "3ada03b7a08244a797a4178bbe935318",
1359 |             "5b26c881dd42490eb144e255ae685fcd",
1360 |             "8b78231bf8a94374b6cf468e9fa85929",
1361 |             "aecb1c13c7b4493ba613d690bdd2707a",
1362 |             "50b3cb1532554487b840a0a8539be7dc",
1363 |             "0516db4733d84ca1a04be0d90f3cbfa9",
1364 |             "ef0ef461e11a4c9f9c7106709388c9ea",
1365 |             "bc0abd1fc56e4b418d551936ccbf724d",
1366 |             "febb82295cce487abe735f9ecb72796e",
1367 |             "2dd16270e324409aa8a5e5f8d9464665",
1368 |             "202446995dd6467fab421b26fdd4189d",
1369 |             "e771c4cb03be41b2bf7c0ee82e255d5a"
1370 |           ]
1371 |         }
1372 |       },
1373 |       "execution_count": 15,
1374 |       "outputs": [
1375 |         {
1376 |           "output_type": "display_data",
1377 |           "data": {
1378 |             "text/plain": [
1379 |               "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
1380 |             ],
1381 |             "application/vnd.jupyter.widget-view+json": {
1382 |               "version_major": 2,
1383 |               "version_minor": 0,
1384 |               "model_id": "a5f9427673584f24b42922ebcf8714f4"
1385 |             }
1386 |           },
1387 |           "metadata": {}
1388 |         },
1389 |         {
1390 |           "output_type": "stream",
1391 |           "name": "stdout",
1392 |           "text": [
1393 |             "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
1394 |           ]
1395 |         }
1396 |       ]
1397 |     },
1398 |     {
1399 |       "cell_type": "markdown",
1400 |       "source": [
1401 |         "**Loading Model in Notebook:**"
1402 |       ],
1403 |       "metadata": {
1404 |         "id": "k1HiUGtY-hVg"
1405 |       }
1406 |     },
1407 |     {
1408 |       "cell_type": "code",
1409 |       "source": [
1410 |         "import torch\n",
1411 |         "from torch import cuda, bfloat16\n",
1412 |         "import transformers\n",
1413 |         "model_id = 'google/gemma-7b'\n",
1414 |         "device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'"
1415 |       ],
1416 |       "metadata": {
1417 |         "id": "Wb_P1tFB86E4"
1418 |       },
1419 |       "execution_count": 16,
1420 |       "outputs": []
1421 |     },
1422 |     {
1423 |       "cell_type": "code",
1424 |       "source": [
1425 |         "# begin initializing HF items, you need an access token\n",
1426 |         "model_config = transformers.AutoConfig.from_pretrained(\n",
1427 |         "    model_id,\n",
1428 |         ")"
1429 |       ],
1430 |       "metadata": {
1431 |         "id": "8-5HISb4-qgq"
1432 |       },
1433 |       "execution_count": 17,
1434 |       "outputs": []
1435 |     },
1436 |     {
1437 |       "cell_type": "code",
1438 |       "source": [
1439 |         "# BnB Configuration\n",
1440 |         "bnb_config = transformers.BitsAndBytesConfig(\n",
1441 |         "    load_in_4bit=True,\n",
1442 |         "    bnb_4bit_quant_type='nf4',\n",
1443 |         "    bnb_4bit_use_double_quant=True,\n",
1444 |         "    bnb_4bit_compute_dtype=bfloat16\n",
1445 |         ")"
1446 |       ],
1447 |       "metadata": {
1448 |         "id": "C45XOIQEAVDz"
1449 |       },
1450 |       "execution_count": 18,
1451 |       "outputs": []
1452 |     },
1453 |     {
1454 |       "cell_type": "code",
1455 |       "source": [
1456 |         "model = transformers.AutoModelForCausalLM.from_pretrained(\n",
1457 |         "    model_id,\n",
1458 |         "    config=model_config,\n",
1459 |         "    device_map='auto',\n",
1460 |         "    quantization_config=bnb_config,\n",
1461 |         "    low_cpu_mem_usage=True\n",
1462 |         ")"
1463 |       ],
1464 |       "metadata": {
1465 |         "colab": {
1466 |           "base_uri": "https://localhost:8080/",
1467 |           "height": 81,
1468 |           "referenced_widgets": [
1469 |             "f73c306cce8d4be1aea3ad912e9ef5dc",
1470 |             "6f7ec6e81fbc4babab80f5b352caedc8",
1471 |             "6d830be2c25a4e499ed8f57758638e23",
1472 |             "50deede7561347e8af1e0b213d33c3de",
1473 |             "55b60bfe36b14fc6837959b6b9f56fac",
1474 |             "24db6f406a024e6f8c78ef83752fe336",
1475 |             "42e137910a8b45859ae3517d5bd0cb17",
1476 |             "fc1fe6d6b77145828e33cc228c694082",
1477 |             "93c80c4fcf01411e8f70d4c7daf23423",
1478 |             "52d69bc57f3c4299b9aad7c536735f99",
1479 |             "78de396d6b2d4ce384faf393b84b73a2",
1480 |             "88f08961299f438ea00dd88d61ec99d2",
1481 |             "4ab68fde0d5845e498b23536ee61e828",
1482 |             "476045c378f942a6b3066e6ffd8e93b6",
1483 |             "31faf6bbc65340f6ae0bae2b20f825f0",
1484 |             "d7a88f0f41e64fbb9eb04c208276d5e9",
1485 |             "26d4da72a16b4381aef9244f59c536b7",
1486 |             "d97e2daa67f8430fbe62fd64cf753f73",
1487 |             "9ba5c9837ffa48a1a98375cf8f7ea58c",
1488 |             "7b0b028a1df640d49ec7b6a3ea8ed5cd",
1489 |             "694328c260294586bf00c5f8904b26a2",
1490 |             "e444efde30f146efba56fa1d0bcee1fd"
1491 |           ]
1492 |         },
1493 |         "id": "JSb__D2C-uMx",
1494 |         "outputId": "2f34fe24-fc5d-46f4-d7b6-622ac0af285f"
1495 |       },
1496 |       "execution_count": 19,
1497 |       "outputs": [
1498 |         {
1499 |           "output_type": "display_data",
1500 |           "data": {
1501 |             "text/plain": [
1502 |               "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
1503 |             ],
1504 |             "application/vnd.jupyter.widget-view+json": {
1505 |               "version_major": 2,
1506 |               "version_minor": 0,
1507 |               "model_id": "f73c306cce8d4be1aea3ad912e9ef5dc"
1508 |             }
1509 |           },
1510 |           "metadata": {}
1511 |         },
1512 |         {
1513 |           "output_type": "display_data",
1514 |           "data": {
1515 |             "text/plain": [
1516 |               "generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]"
1517 |             ],
1518 |             "application/vnd.jupyter.widget-view+json": {
1519 |               "version_major": 2,
1520 |               "version_minor": 0,
1521 |               "model_id": "88f08961299f438ea00dd88d61ec99d2"
1522 |             }
1523 |           },
1524 |           "metadata": {}
1525 |         }
1526 |       ]
1527 |     },
1528 |     {
1529 |       "cell_type": "code",
1530 |       "source": [
1531 |         "# How model looks like:\n",
1532 |         "model.eval()"
1533 |       ],
1534 |       "metadata": {
1535 |         "id": "nILHQLiu-zIs"
1536 |       },
1537 |       "execution_count": null,
1538 |       "outputs": []
1539 |     },
1540 |     {
1541 |       "cell_type": "code",
1542 |       "source": [
1543 |         "tokenizer = transformers.AutoTokenizer.from_pretrained(\n",
1544 |         "    model_id,\n",
1545 |         ")"
1546 |       ],
1547 |       "metadata": {
1548 |         "id": "HoHZiP3SFJHE"
1549 |       },
1550 |       "execution_count": null,
1551 |       "outputs": []
1552 |     },
1553 |     {
1554 |       "cell_type": "code",
1555 |       "source": [
1556 |         "def bytes_to_giga_bytes(bytes):\n",
1557 |         "  return bytes / 1024 / 1024 / 1024"
1558 |       ],
1559 |       "metadata": {
1560 |         "id": "_725TDgc6oit"
1561 |       },
1562 |       "execution_count": null,
1563 |       "outputs": []
1564 |     },
1565 |     {
1566 |       "cell_type": "code",
1567 |       "source": [
1568 |         "bytes_to_giga_bytes(torch.cuda.max_memory_allocated())"
1569 |       ],
1570 |       "metadata": {
1571 |         "id": "-6YYkQq_6sgT"
1572 |       },
1573 |       "execution_count": null,
1574 |       "outputs": []
1575 |     },
1576 |     {
1577 |       "cell_type": "code",
1578 |       "source": [
1579 |         "import gc\n",
1580 |         "import torch\n",
1581 |         "\n",
1582 |         "def flush():\n",
1583 |         "  gc.collect()\n",
1584 |         "  torch.cuda.empty_cache()\n",
1585 |         "  torch.cuda.reset_peak_memory_stats()"
1586 |       ],
1587 |       "metadata": {
1588 |         "id": "G6J7mgxF7Jit"
1589 |       },
1590 |       "execution_count": null,
1591 |       "outputs": []
1592 |     },
1593 |     {
1594 |       "cell_type": "code",
1595 |       "source": [
1596 |         "flush()"
1597 |       ],
1598 |       "metadata": {
1599 |         "id": "nnbBYqzc7ZBk"
1600 |       },
1601 |       "execution_count": null,
1602 |       "outputs": []
1603 |     },
1604 |     {
1605 |       "cell_type": "code",
1606 |       "source": [
1607 |         "# List of strings representing stop signals or markers\n",
1608 |         "stop_list = ['\\nHuman:', '\\n```\\n']\n",
1609 |         "stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]\n",
1610 |         "stop_token_ids"
1611 |       ],
1612 |       "metadata": {
1613 |         "id": "YTICB6xsGxw-"
1614 |       },
1615 |       "execution_count": null,
1616 |       "outputs": []
1617 |     },
1618 |     {
1619 |       "cell_type": "code",
1620 |       "source": [
1621 |         "# Convert token IDs to LongTensor objects\n",
1622 |         "import torch\n",
1623 |         "stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]\n",
1624 |         "stop_token_ids"
1625 |       ],
1626 |       "metadata": {
1627 |         "id": "7p6mgjI9HOcD"
1628 |       },
1629 |       "execution_count": null,
1630 |       "outputs": []
1631 |     },
1632 |     {
1633 |       "cell_type": "markdown",
1634 |       "source": [
1635 |         "**Stopping Criteria for Transformer Training:**"
1636 |       ],
1637 |       "metadata": {
1638 |         "id": "XNiM5KxWH9qV"
1639 |       }
1640 |     },
1641 |     {
1642 |       "cell_type": "code",
1643 |       "source": [
1644 |         "from transformers import StoppingCriteria, StoppingCriteriaList\n",
1645 |         "\n",
1646 |         "# Define a custom stopping criteria class\n",
1647 |         "class StopOnTokens(StoppingCriteria):\n",
1648 |         "    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:\n",
1649 |         "        for stop_ids in stop_token_ids:\n",
1650 |         "            if torch.equal(input_ids[0][-len(stop_ids):], stop_ids):\n",
1651 |         "                return True\n",
1652 |         "        return False\n",
1653 |         "\n",
1654 |         "stopping_criteria = StoppingCriteriaList([StopOnTokens()])"
1655 |       ],
1656 |       "metadata": {
1657 |         "id": "UeHmT-GzHerh"
1658 |       },
1659 |       "execution_count": null,
1660 |       "outputs": []
1661 |     },
1662 |     {
1663 |       "cell_type": "markdown",
1664 |       "source": [
1665 |         "**Testing Huggingface Pipeline:**"
1666 |       ],
1667 |       "metadata": {
1668 |         "id": "4DRI1tSuOPpd"
1669 |       }
1670 |     },
1671 |     {
1672 |       "cell_type": "code",
1673 |       "source": [
1674 |         "# Set up text generation pipeline\n",
1675 |         "generate_text = transformers.pipeline(\n",
1676 |         "    model=model,\n",
1677 |         "    tokenizer=tokenizer,\n",
1678 |         "    return_full_text=True,\n",
1679 |         "    task='text-generation',\n",
1680 |         "    stopping_criteria=stopping_criteria,\n",
1681 |         "    temperature=0.3,\n",
1682 |         "    max_new_tokens=512,\n",
1683 |         "    repetition_penalty=1.1\n",
1684 |         ")"
1685 |       ],
1686 |       "metadata": {
1687 |         "id": "g3A-YXDjHrHu"
1688 |       },
1689 |       "execution_count": null,
1690 |       "outputs": []
1691 |     },
1692 |     {
1693 |       "cell_type": "code",
1694 |       "source": [
1695 |         "result = generate_text(\"What are the primary mechanisms underlying antibiotic resistance, and how can we develop strategies to combat it?\")\n",
1696 |         "print(result)"
1697 |       ],
1698 |       "metadata": {
1699 |         "id": "uTRpa0_mInOA"
1700 |       },
1701 |       "execution_count": null,
1702 |       "outputs": []
1703 |     },
1704 |     {
1705 |       "cell_type": "code",
1706 |       "source": [
1707 |         "from langchain.llms import HuggingFacePipeline\n",
1708 |         "\n",
1709 |         "llm = HuggingFacePipeline(pipeline=generate_text)\n",
1710 |         "llm(prompt=\"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\")"
1711 |       ],
1712 |       "metadata": {
1713 |         "id": "Uh3unyirTtTa"
1714 |       },
1715 |       "execution_count": null,
1716 |       "outputs": []
1717 |     },
1718 |     {
1719 |       "cell_type": "markdown",
1720 |       "source": [
1721 |         "**Loading Document Data:**"
1722 |       ],
1723 |       "metadata": {
1724 |         "id": "kBNYzCfyOJRz"
1725 |       }
1726 |     },
1727 |     {
1728 |       "cell_type": "code",
1729 |       "source": [
1730 |         "from langchain_community.document_loaders import DirectoryLoader\n",
1731 |         "loader = DirectoryLoader('/content/drive/MyDrive/BioMedical-Dataset', glob=\"**/*.pdf\")\n",
1732 |         "documents = loader.load()"
1733 |       ],
1734 |       "metadata": {
1735 |         "id": "knhECj2bOUkh"
1736 |       },
1737 |       "execution_count": null,
1738 |       "outputs": []
1739 |     },
1740 |     {
1741 |       "cell_type": "code",
1742 |       "source": [
1743 |         "print(len(documents))"
1744 |       ],
1745 |       "metadata": {
1746 |         "id": "T0Yp2a8Goeqh"
1747 |       },
1748 |       "execution_count": null,
1749 |       "outputs": []
1750 |     },
1751 |     {
1752 |       "cell_type": "code",
1753 |       "source": [
1754 |         "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
1755 |         "\n",
1756 |         "MARKDOWN_SEPARATORS = [\n",
1757 |         "    \"\\n#{1,6} \",\n",
1758 |         "    \"```\\n\",\n",
1759 |         "    \"\\n\\\\*\\\\*\\\\*+\\n\",\n",
1760 |         "    \"\\n---+\\n\",\n",
1761 |         "    \"\\n___+\\n\",\n",
1762 |         "    \"\\n\\n\",\n",
1763 |         "    \"\\n\",\n",
1764 |         "    \" \",\n",
1765 |         "    \"\",\n",
1766 |         "]\n",
1767 |         "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,\n",
1768 |         "                                               chunk_overlap=30,\n",
1769 |         "                                               add_start_index=True,\n",
1770 |         "                                               separators=MARKDOWN_SEPARATORS)\n",
1771 |         "\n",
1772 |         "processed_text_splits = text_splitter.split_documents(documents)"
1773 |       ],
1774 |       "metadata": {
1775 |         "id": "6Tznkr3PIawL"
1776 |       },
1777 |       "execution_count": null,
1778 |       "outputs": []
1779 |     },
1780 |     {
1781 |       "cell_type": "code",
1782 |       "source": [
1783 |         "processed_text_splits[120].page_content"
1784 |       ],
1785 |       "metadata": {
1786 |         "id": "91wesl_N4yYm"
1787 |       },
1788 |       "execution_count": null,
1789 |       "outputs": []
1790 |     },
1791 |     {
1792 |       "cell_type": "code",
1793 |       "source": [
1794 |         "print(len(processed_text_splits))"
1795 |       ],
1796 |       "metadata": {
1797 |         "id": "El9WlHtHCiTv"
1798 |       },
1799 |       "execution_count": null,
1800 |       "outputs": []
1801 |     },
1802 |     {
1803 |       "cell_type": "code",
1804 |       "source": [
1805 |         "# Creating Embdeddings of the sentences and storing it into Graph DB\n",
1806 |         "from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n",
1807 |         "\n",
1808 |         "model_name = \"BAAI/bge-base-en-v1.5\"\n",
1809 |         "model_kwargs = {\"device\": \"cuda\"}\n",
1810 |         "encode_kwargs = {\"normalize_embeddings\": True}\n",
1811 |         "embeddings = HuggingFaceBgeEmbeddings(\n",
1812 |         "    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs\n",
1813 |         ")"
1814 |       ],
1815 |       "metadata": {
1816 |         "id": "KqOudIQtIayw"
1817 |       },
1818 |       "execution_count": null,
1819 |       "outputs": []
1820 |     },
1821 |     {
1822 |       "cell_type": "markdown",
1823 |       "source": [
1824 |         "**Load Neo4j Graph:**"
1825 |       ],
1826 |       "metadata": {
1827 |         "id": "WaGxQILaCwQv"
1828 |       }
1829 |     },
1830 |     {
1831 |       "cell_type": "code",
1832 |       "source": [
1833 |         "from langchain.graphs import Neo4jGraph\n",
1834 |         "\n",
1835 |         "graph = Neo4jGraph(\n",
1836 |         "    url=os.environ[\"NEO4J_URI\"],\n",
1837 |         "    username=os.environ[\"NEO4J_USERNAME\"],\n",
1838 |         "    password=os.environ[\"NEO4J_PASSWORD\"]\n",
1839 |         ")"
1840 |       ],
1841 |       "metadata": {
1842 |         "id": "v__99eeVDVdB"
1843 |       },
1844 |       "execution_count": null,
1845 |       "outputs": []
1846 |     },
1847 |     {
1848 |       "cell_type": "markdown",
1849 |       "source": [
1850 |         "**Creating a new custom Index using Cypher:**"
1851 |       ],
1852 |       "metadata": {
1853 |         "id": "KasRmCh0bWJP"
1854 |       }
1855 |     },
1856 |     {
1857 |       "cell_type": "code",
1858 |       "source": [
1859 |         "# Create New index with custom embedding model and dimensions\n",
1860 |         "# I have already created\n",
1861 |         "'''\n",
1862 |         "graph.query(\"\"\"\n",
1863 |         "CALL db.index.vector.createNodeIndex(\n",
1864 |         "  'KG-Enhanced-QnA-Biomedical',\n",
1865 |         "  'text_splits',\n",
1866 |         "  'embeddings',\n",
1867 |         "   768,\n",
1868 |         "   'cosine'\n",
1869 |         ")\n",
1870 |         "\"\"\")\n",
1871 |         "'''"
1872 |       ],
1873 |       "metadata": {
1874 |         "id": "Lhzqh0uKbVJu"
1875 |       },
1876 |       "execution_count": null,
1877 |       "outputs": []
1878 |     },
1879 |     {
1880 |       "cell_type": "markdown",
1881 |       "source": [
1882 |         "**Show Created Vector Index:**"
1883 |       ],
1884 |       "metadata": {
1885 |         "id": "z3ayEjLyHTAm"
1886 |       }
1887 |     },
1888 |     {
1889 |       "cell_type": "code",
1890 |       "source": [
1891 |         "from neo4j import GraphDatabase\n",
1892 |         "uri = os.environ[\"NEO4J_URI\"]\n",
1893 |         "username = os.environ[\"NEO4J_USERNAME\"]\n",
1894 |         "password = os.environ[\"NEO4J_PASSWORD\"]\n",
1895 |         "\n",
1896 |         "driver = GraphDatabase.driver(uri, auth=(username, password))\n",
1897 |         "session = driver.session()\n",
1898 |         "\n",
1899 |         "result = session.run(\"SHOW VECTOR INDEXES\")\n",
1900 |         "\n",
1901 |         "for record in result:\n",
1902 |         "   print(record)"
1903 |       ],
1904 |       "metadata": {
1905 |         "id": "SFLExy9OxptZ"
1906 |       },
1907 |       "execution_count": null,
1908 |       "outputs": []
1909 |     },
1910 |     {
1911 |       "cell_type": "code",
1912 |       "source": [
1913 |         "''' chunks = [{'text': document.page_content, 'embedding': embeddings.embed_query(document.page_content)}\n",
1914 |         "          for document in documents if len(document.page_content) >  50]  '''"
1915 |       ],
1916 |       "metadata": {
1917 |         "id": "ii7wVWOmJYBb"
1918 |       },
1919 |       "execution_count": null,
1920 |       "outputs": []
1921 |     },
1922 |     {
1923 |       "cell_type": "code",
1924 |       "source": [
1925 |         "'''\n",
1926 |         "graph.query(\"\"\"\n",
1927 |         "UNWIND $data AS row\n",
1928 |         "CREATE (c:Chunk {text: row.text})\n",
1929 |         "WITH c, row\n",
1930 |         "CALL db.create.setVectorProperty(c, 'embedding', row.embedding)\n",
1931 |         "YIELD node\n",
1932 |         "RETURN distinct 'done'\n",
1933 |         "\"\"\", {'data': chunks})\n",
1934 |         "'''"
1935 |       ],
1936 |       "metadata": {
1937 |         "id": "B0KQnPT8Lxqi"
1938 |       },
1939 |       "execution_count": null,
1940 |       "outputs": []
1941 |     },
1942 |     {
1943 |       "cell_type": "code",
1944 |       "source": [
1945 |         "'''\n",
1946 |         "vector_search = \"\"\"\n",
1947 |         "WITH $embedding AS e\n",
1948 |         "CALL db.index.vector.queryNodes('KG-Enhanced-QnA-Biomedical',$k, e) yield node, score\n",
1949 |         "RETURN node.text AS result\n",
1950 |         "ORDER BY score DESC\n",
1951 |         "LIMIT 3\n",
1952 |         "\"\"\"\n",
1953 |         "'''"
1954 |       ],
1955 |       "metadata": {
1956 |         "id": "0Tbwl91LjKd4"
1957 |       },
1958 |       "execution_count": null,
1959 |       "outputs": []
1960 |     },
1961 |     {
1962 |       "cell_type": "code",
1963 |       "source": [
1964 |         "# Instantiate Neo4j vector from documents\n",
1965 |         "neo4j_vector = Neo4jVector.from_documents(\n",
1966 |         "    processed_text_splits,\n",
1967 |         "    embeddings,\n",
1968 |         "    index_name='KG-Enhanced-QnA-Biomedical',\n",
1969 |         "     url=os.environ[\"NEO4J_URI\"],\n",
1970 |         "    username=os.environ[\"NEO4J_USERNAME\"],\n",
1971 |         "    password=os.environ[\"NEO4J_PASSWORD\"]\n",
1972 |         ")"
1973 |       ],
1974 |       "metadata": {
1975 |         "id": "HHjtYv6JDVfg"
1976 |       },
1977 |       "execution_count": null,
1978 |       "outputs": []
1979 |     },
1980 |     {
1981 |       "cell_type": "code",
1982 |       "source": [
1983 |         "# Performing Similarity Search\n",
1984 |         "query = \"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\"\n",
1985 |         "vector_results = neo4j_vector.similarity_search(query, k=2)\n",
1986 |         "\n",
1987 |         "for i, res in enumerate(vector_results):\n",
1988 |         "    print(res.page_content)\n",
1989 |         "    if i != len(vector_results) - 1:\n",
1990 |         "        print()\n",
1991 |         "vector_result = vector_results[0].page_content"
1992 |       ],
1993 |       "metadata": {
1994 |         "id": "UAV-XTaHJIjT"
1995 |       },
1996 |       "execution_count": null,
1997 |       "outputs": []
1998 |     },
1999 |     {
2000 |       "cell_type": "code",
2001 |       "source": [
2002 |         "from langchain.chains import GraphCypherQAChain\n",
2003 |         "from langchain.graphs import Neo4jGraph"
2004 |       ],
2005 |       "metadata": {
2006 |         "id": "Az7n09kuW4k4"
2007 |       },
2008 |       "execution_count": null,
2009 |       "outputs": []
2010 |     },
2011 |     {
2012 |       "cell_type": "code",
2013 |       "source": [
2014 |         "from langchain.chains.base import Chain\n",
2015 |         "from langchain.chains.llm import LLMChain\n",
2016 |         "from langchain.chat_models import ChatOpenAI\n",
2017 |         "from langchain.chains.question_answering.stuff_prompt import CHAT_PROMPT\n",
2018 |         "from langchain.callbacks.manager import CallbackManagerForChainRun\n",
2019 |         "from typing import Any, Dict, List\n",
2020 |         "from pydantic import Field"
2021 |       ],
2022 |       "metadata": {
2023 |         "id": "uE_4FkZtMyaL"
2024 |       },
2025 |       "execution_count": null,
2026 |       "outputs": []
2027 |     },
2028 |     {
2029 |       "cell_type": "code",
2030 |       "source": [
2031 |         "vector_search = \"\"\"\n",
2032 |         "WITH $embedding AS e\n",
2033 |         "CALL db.index.vector.queryNodes('KG-Enhanced-QnA-Biomedical',$k, e) yield node, score\n",
2034 |         "RETURN node.text AS result\n",
2035 |         "ORDER BY score DESC\n",
2036 |         "LIMIT 3\n",
2037 |         "\"\"\""
2038 |       ],
2039 |       "metadata": {
2040 |         "id": "jqnt60GeNB3O"
2041 |       },
2042 |       "execution_count": null,
2043 |       "outputs": []
2044 |     },
2045 |     {
2046 |       "cell_type": "code",
2047 |       "source": [
2048 |         "print(graph.schema)"
2049 |       ],
2050 |       "metadata": {
2051 |         "id": "yf3jH9ZGXPUN"
2052 |       },
2053 |       "execution_count": null,
2054 |       "outputs": []
2055 |     },
2056 |     {
2057 |       "cell_type": "code",
2058 |       "source": [
2059 |         "class Neo4jVectorChain(Chain):\n",
2060 |         "    graph: Neo4jGraph = Field(exclude=True)\n",
2061 |         "    input_key: str = \"query\"\n",
2062 |         "    output_key: str = \"result\"\n",
2063 |         "    embeddings: HuggingFaceBgeEmbeddings = HuggingFaceBgeEmbeddings()\n",
2064 |         "    qa_chain: LLMChain = LLMChain(llm=llm, prompt=CHAT_PROMPT)\n",
2065 |         "\n",
2066 |         "    @property\n",
2067 |         "    def input_keys(self) -> List[str]:\n",
2068 |         "        return [self.input_key]\n",
2069 |         "\n",
2070 |         "    @property\n",
2071 |         "    def output_keys(self) -> List[str]:\n",
2072 |         "        _output_keys = [self.output_key]\n",
2073 |         "        return _output_keys\n",
2074 |         "\n",
2075 |         "    def _call(self, inputs: Dict[str, str], run_manager, k=3) -> Dict[str, Any]:\n",
2076 |         "        question = inputs[self.input_key]\n",
2077 |         "        embedding = self.embeddings.embed_query(question)\n",
2078 |         "\n",
2079 |         "        context = self.graph.query(vector_search, {'embedding': embedding, 'k': 3})\n",
2080 |         "        context = [el['result'] for el in context]\n",
2081 |         "\n",
2082 |         "        result = self.qa_chain({\"question\": question, \"context\": context})\n",
2083 |         "        final_result = result[self.qa_chain.output_key]\n",
2084 |         "        return {self.output_key: final_result}"
2085 |       ],
2086 |       "metadata": {
2087 |         "id": "wYZQ44hsNT4y"
2088 |       },
2089 |       "execution_count": null,
2090 |       "outputs": []
2091 |     },
2092 |     {
2093 |       "cell_type": "code",
2094 |       "source": [
2095 |         "chain = Neo4jVectorChain(graph=graph, embeddings=embeddings, verbose=True)"
2096 |       ],
2097 |       "metadata": {
2098 |         "id": "tksLTpsqOGGq"
2099 |       },
2100 |       "execution_count": null,
2101 |       "outputs": []
2102 |     },
2103 |     {
2104 |       "cell_type": "code",
2105 |       "source": [
2106 |         "graph_result = chain.run(\"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\")"
2107 |       ],
2108 |       "metadata": {
2109 |         "id": "TS4Nwf6pONk1"
2110 |       },
2111 |       "execution_count": null,
2112 |       "outputs": []
2113 |     },
2114 |     {
2115 |       "cell_type": "code",
2116 |       "source": [
2117 |         "chain = GraphCypherQAChain.from_llm(\n",
2118 |         "    cypher_llm=llm,\n",
2119 |         "    qa_llm=llm,\n",
2120 |         "    graph=graph,\n",
2121 |         "    verbose=True,\n",
2122 |         "    return_intermediate_steps=True,\n",
2123 |         "    validate_cypher=True\n",
2124 |         ")"
2125 |       ],
2126 |       "metadata": {
2127 |         "id": "2Q-g7wGhXR0V"
2128 |       },
2129 |       "execution_count": null,
2130 |       "outputs": []
2131 |     },
2132 |     {
2133 |       "cell_type": "code",
2134 |       "source": [
2135 |         "graph_result = chain.run(\"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\")"
2136 |       ],
2137 |       "metadata": {
2138 |         "id": "JUxlLbCEXczj"
2139 |       },
2140 |       "execution_count": null,
2141 |       "outputs": []
2142 |     }
2143 |   ]
2144 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Retrieval Augmented Generation with LangChain and Neo4J Graph DB
2 | 


--------------------------------------------------------------------------------