├── .gitignore
├── LICENSE
├── Neo4j_and_LangChain_for_Enhanced_Question_Answering.ipynb
├── RAG_with_Graph_Database.ipynb
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Mohd Kaif
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Neo4j_and_LangChain_for_Enhanced_Question_Answering.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "source": [
20 | "## Integrating Unstructured and Graph Knowledge with Neo4j and LangChain for Enhanced Question Answering"
21 | ],
22 | "metadata": {
23 | "id": "LXzvg_0MUzhA"
24 | }
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "source": [
29 | "\n",
30 | "\n",
31 | "#### Installing Dependencies"
32 | ],
33 | "metadata": {
34 | "id": "b0AVqZ9XVC9Z"
35 | }
36 | },
37 | {
38 | "cell_type": "code",
39 | "source": [
40 | "! pip install -qU \\\n",
41 | " transformers \\\n",
42 | " datasets \\\n",
43 | " langchain \\\n",
44 | " openai \\\n",
45 | " wikipedia \\\n",
46 | " tiktoken \\\n",
47 | " neo4j \\\n",
48 | " python-dotenv"
49 | ],
50 | "metadata": {
51 | "colab": {
52 | "base_uri": "https://localhost:8080/"
53 | },
54 | "id": "F52G_upjVIGt",
55 | "outputId": "9e26d2c5-6294-4c3f-de4f-5a51e8961003"
56 | },
57 | "execution_count": 1,
58 | "outputs": [
59 | {
60 | "output_type": "stream",
61 | "name": "stdout",
62 | "text": [
63 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
64 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m493.7/493.7 kB\u001b[0m \u001b[31m27.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
65 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m86.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
66 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.0/77.0 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
67 | "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
68 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m90.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
69 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m192.4/192.4 kB\u001b[0m \u001b[31m19.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
70 | "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
71 | " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
72 | " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
73 | " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
74 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m29.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
75 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m96.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
76 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m77.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
77 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
78 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
79 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.3/43.3 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
80 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
81 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
82 | "\u001b[?25h Building wheel for wikipedia (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
83 | " Building wheel for neo4j (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
84 | "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
85 | "llmx 0.0.15a0 requires cohere, which is not installed.\u001b[0m\u001b[31m\n",
86 | "\u001b[0m"
87 | ]
88 | }
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "source": [
94 | "#### Importing Packanges"
95 | ],
96 | "metadata": {
97 | "id": "B_5pjB2WX_DZ"
98 | }
99 | },
100 | {
101 | "cell_type": "code",
102 | "source": [
103 | "import os\n",
104 | "import re\n",
105 | "from langchain.vectorstores.neo4j_vector import Neo4jVector\n",
106 | "from langchain.document_loaders import WikipediaLoader\n",
107 | "from langchain.embeddings.openai import OpenAIEmbeddings\n",
108 | "from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter\n",
109 | "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
110 | "from dotenv import load_dotenv"
111 | ],
112 | "metadata": {
113 | "id": "R-h-iIDmYFGh"
114 | },
115 | "execution_count": 26,
116 | "outputs": []
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "source": [
121 | "#### Setting API's in Environment Variable[link text](https://)"
122 | ],
123 | "metadata": {
124 | "id": "_JY_gy3BqptG"
125 | }
126 | },
127 | {
128 | "cell_type": "code",
129 | "source": [
130 | "load_dotenv()\n",
131 | "os.environ[\"OPENAI_API_KEY\"] = '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'\n",
132 | "os.environ[\"NEO4J_URI\"] = 'neo4j+s://817ac93a.databases.neo4j.io'\n",
133 | "os.environ[\"NEO4J_USERNAME\"] = 'neo4j'\n",
134 | "os.environ[\"NEO4J_PASSWORD\"] = 'CN1zhoj9bQwUc4JpfRk6hufP9Muojw_bTmaYMcxJXg4'"
135 | ],
136 | "metadata": {
137 | "id": "BIHTHxNtYGRN"
138 | },
139 | "execution_count": 27,
140 | "outputs": []
141 | },
142 | {
143 | "cell_type": "code",
144 | "source": [
145 | "print(os.getenv('OPENAI_API_KEY'))\n",
146 | "print(os.getenv(\"NEO4J_URI\"))\n",
147 | "print(os.getenv(\"NEO4J_USERNAME\"))\n",
148 | "print(os.getenv('NEO4J_PASSWORD'))"
149 | ],
150 | "metadata": {
151 | "colab": {
152 | "base_uri": "https://localhost:8080/"
153 | },
154 | "id": "lhhy1qO5orHa",
155 | "outputId": "a99bdcfc-56e3-43c1-8926-562af2f19e44"
156 | },
157 | "execution_count": 28,
158 | "outputs": [
159 | {
160 | "output_type": "stream",
161 | "name": "stdout",
162 | "text": [
163 | "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\n",
164 | "neo4j+s://817ac93a.databases.neo4j.io\n",
165 | "neo4j\n",
166 | "CN1zhoj9bQwUc4JpfRk6hufP9Muojw_bTmaYMcxJXg4\n"
167 | ]
168 | }
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "source": [
174 | "#### Data Preprocessing"
175 | ],
176 | "metadata": {
177 | "id": "QiTvOmA8rgeZ"
178 | }
179 | },
180 | {
181 | "cell_type": "code",
182 | "source": [
183 | "from transformers import AutoTokenizer\n",
184 | "\n",
185 | "# Define the tokenizer using \"bert-base-uncased\"\n",
186 | "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n",
187 | "\n",
188 | "# Function to calculate the number of tokens in a text\n",
189 | "def bert_len(text):\n",
190 | " tokens = tokenizer.encode(text)\n",
191 | " return len(tokens)\n",
192 | "\n",
193 | "# Example usage\n",
194 | "input_text = \"This is a sample sentence for tokenization.\"\n",
195 | "num_tokens = bert_len(input_text)\n",
196 | "print(f\"Number of tokens: {num_tokens}\")"
197 | ],
198 | "metadata": {
199 | "id": "lUF9fXmvYO1h",
200 | "colab": {
201 | "base_uri": "https://localhost:8080/"
202 | },
203 | "outputId": "a5b6c25c-5672-455d-e785-4c20148aa013"
204 | },
205 | "execution_count": 29,
206 | "outputs": [
207 | {
208 | "output_type": "stream",
209 | "name": "stdout",
210 | "text": [
211 | "Number of tokens: 11\n"
212 | ]
213 | }
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "source": [
219 | "# Load Wikipedia articles related to \"Leonhard Euler\"\n",
220 | "raw_documents = WikipediaLoader(query=\"Leonhard Euler\").load()\n",
221 | "\n",
222 | "# Define a text splitter with specific parameters\n",
223 | "text_splitter = RecursiveCharacterTextSplitter(\n",
224 | " chunk_size=200, chunk_overlap=20, length_function=bert_len, separators=['\\n\\n', '\\n', ' ', '']\n",
225 | ")\n",
226 | "\n",
227 | "# Split the content of the first Wikipedia article into smaller documents\n",
228 | "documents = text_splitter.create_documents([raw_documents[0].page_content])"
229 | ],
230 | "metadata": {
231 | "id": "YQqG0o7KbmqZ",
232 | "colab": {
233 | "base_uri": "https://localhost:8080/"
234 | },
235 | "outputId": "7b8e48d5-df80-4d1a-d192-11ce439fc62a"
236 | },
237 | "execution_count": 30,
238 | "outputs": [
239 | {
240 | "output_type": "stream",
241 | "name": "stderr",
242 | "text": [
243 | "Token indices sequence length is longer than the specified maximum sequence length for this model (736 > 512). Running this sequence through the model will result in indexing errors\n"
244 | ]
245 | }
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "source": [
251 | "print(len(documents))"
252 | ],
253 | "metadata": {
254 | "id": "c68Duv2Nbqqk",
255 | "colab": {
256 | "base_uri": "https://localhost:8080/"
257 | },
258 | "outputId": "4510be6f-af0c-4c08-e814-da06bf75f7f2"
259 | },
260 | "execution_count": 31,
261 | "outputs": [
262 | {
263 | "output_type": "stream",
264 | "name": "stdout",
265 | "text": [
266 | "18\n"
267 | ]
268 | }
269 | ]
270 | },
271 | {
272 | "cell_type": "markdown",
273 | "source": [
274 | "#### Initializing Graph Database Neo4j [link text](https://)"
275 | ],
276 | "metadata": {
277 | "id": "X043ugczr0X5"
278 | }
279 | },
280 | {
281 | "cell_type": "code",
282 | "source": [
283 | "# Instantiate Neo4j vector from documents\n",
284 | "neo4j_vector = Neo4jVector.from_documents(\n",
285 | " documents,\n",
286 | " OpenAIEmbeddings(),\n",
287 | " url=os.environ[\"NEO4J_URI\"],\n",
288 | " username=os.environ[\"NEO4J_USERNAME\"],\n",
289 | " password=os.environ[\"NEO4J_PASSWORD\"]\n",
290 | ")"
291 | ],
292 | "metadata": {
293 | "id": "RSHWwlbJcCi2"
294 | },
295 | "execution_count": 32,
296 | "outputs": []
297 | },
298 | {
299 | "cell_type": "markdown",
300 | "source": [
301 | "#### Peroforming Similarity Search on Ingested Documents"
302 | ],
303 | "metadata": {
304 | "id": "34Fm9UgHwWdG"
305 | }
306 | },
307 | {
308 | "cell_type": "code",
309 | "source": [
310 | "# Define the query.\n",
311 | "query = \"Who were the siblings of Leonhard Euler?\"\n",
312 | "\n",
313 | "# Execute the query, get top 2 results.\n",
314 | "vector_results = neo4j_vector.similarity_search(query, k=2)\n",
315 | "\n",
316 | "# Print search results with separation.\n",
317 | "for i, res in enumerate(vector_results):\n",
318 | " print(res.page_content)\n",
319 | " if i != len(vector_results) - 1:\n",
320 | " print()\n",
321 | "\n",
322 | "# Store the content of the most similar result.\n",
323 | "vector_result = vector_results[0].page_content"
324 | ],
325 | "metadata": {
326 | "colab": {
327 | "base_uri": "https://localhost:8080/"
328 | },
329 | "id": "qp8bl2hjruzg",
330 | "outputId": "b5922d9e-7f16-4250-f917-9af03d361fa4"
331 | },
332 | "execution_count": 33,
333 | "outputs": [
334 | {
335 | "output_type": "stream",
336 | "name": "stdout",
337 | "text": [
338 | "== Early life ==\n",
339 | "Leonhard Euler was born on 15 April 1707, in Basel to Paul III Euler, a pastor of the Reformed Church, and Marguerite (née Brucker), whose ancestors include a number of well-known scholars in the classics. He was the oldest of four children, having two younger sisters, An\n",
340 | "\n",
341 | "== Early life ==\n",
342 | "Leonhard Euler was born on 15 April 1707, in Basel to Paul III Euler, a pastor of the Reformed Church, and Marguerite (née Brucker), whose ancestors include a number of well-known scholars in the classics. He was the oldest of four children, having two younger sisters, An\n"
343 | ]
344 | }
345 | ]
346 | },
347 | {
348 | "cell_type": "markdown",
349 | "source": [
350 | "#### Building Knowledge Graph"
351 | ],
352 | "metadata": {
353 | "id": "yLCySJqcxV3W"
354 | }
355 | },
356 | {
357 | "cell_type": "code",
358 | "source": [
359 | "# Necessary Libraries to setup the Neo4j DB QuestionAnswering Chain\n",
360 | "from langchain.chat_models import ChatOpenAI\n",
361 | "from langchain.chains import GraphCypherQAChain\n",
362 | "from langchain.graphs import Neo4jGraph"
363 | ],
364 | "metadata": {
365 | "id": "EOHIAZrLxh8N"
366 | },
367 | "execution_count": 34,
368 | "outputs": []
369 | },
370 | {
371 | "cell_type": "code",
372 | "source": [
373 | "# Create a Neo4jGraph object by connecting to a Neo4j database.\n",
374 | "graph = Neo4jGraph(\n",
375 | " url=os.environ[\"NEO4J_URI\"],\n",
376 | " username=os.environ[\"NEO4J_USERNAME\"],\n",
377 | " password=os.environ[\"NEO4J_PASSWORD\"]\n",
378 | ")"
379 | ],
380 | "metadata": {
381 | "id": "N_sqke-SzfEQ"
382 | },
383 | "execution_count": 35,
384 | "outputs": []
385 | },
386 | {
387 | "cell_type": "code",
388 | "source": [
389 | "# Print the schema of the Neo4j graph.\n",
390 | "print(graph.schema)"
391 | ],
392 | "metadata": {
393 | "colab": {
394 | "base_uri": "https://localhost:8080/"
395 | },
396 | "id": "oHFGTHCEz_UD",
397 | "outputId": "eaf7ba8f-7188-4b5a-f4b3-f786f4a457ae"
398 | },
399 | "execution_count": 36,
400 | "outputs": [
401 | {
402 | "output_type": "stream",
403 | "name": "stdout",
404 | "text": [
405 | "\n",
406 | " Node properties are the following:\n",
407 | " [{'labels': 'Chunk', 'properties': [{'property': 'id', 'type': 'STRING'}, {'property': 'embedding', 'type': 'LIST'}, {'property': 'text', 'type': 'STRING'}]}]\n",
408 | " Relationship properties are the following:\n",
409 | " []\n",
410 | " The relationships are the following:\n",
411 | " []\n",
412 | " \n"
413 | ]
414 | }
415 | ]
416 | },
417 | {
418 | "cell_type": "code",
419 | "source": [
420 | "# Create a question-answering chain using GPT-3 and a Neo4j graph, with verbose mode enabled.\n",
421 | "chain = GraphCypherQAChain.from_llm(\n",
422 | " ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
423 | ")"
424 | ],
425 | "metadata": {
426 | "id": "wN_9M9fi0OV8"
427 | },
428 | "execution_count": 37,
429 | "outputs": []
430 | },
431 | {
432 | "cell_type": "code",
433 | "source": [
434 | "# Use the question-answering chain to query the Neo4j graph.\n",
435 | "graph_result = chain.run(\"Who were the siblings of Leonhard Euler?\")"
436 | ],
437 | "metadata": {
438 | "colab": {
439 | "base_uri": "https://localhost:8080/"
440 | },
441 | "id": "Iw0foDNr0c-v",
442 | "outputId": "8a776932-7768-4191-d2a4-37c2163b9d2e"
443 | },
444 | "execution_count": 38,
445 | "outputs": [
446 | {
447 | "output_type": "stream",
448 | "name": "stdout",
449 | "text": [
450 | "\n",
451 | "\n",
452 | "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
453 | "Generated Cypher:\n",
454 | "\u001b[32;1m\u001b[1;3mMATCH (euler:Chunk {text: 'Leonhard Euler'})-[:SIBLING]->(sibling:Chunk)\n",
455 | "RETURN sibling.text\u001b[0m\n",
456 | "Full Context:\n",
457 | "\u001b[32;1m\u001b[1;3m[]\u001b[0m\n",
458 | "\n",
459 | "\u001b[1m> Finished chain.\u001b[0m\n"
460 | ]
461 | }
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "source": [
467 | "graph_result"
468 | ],
469 | "metadata": {
470 | "colab": {
471 | "base_uri": "https://localhost:8080/",
472 | "height": 37
473 | },
474 | "id": "ljHsyK3z0pAf",
475 | "outputId": "7d95b141-ce27-4404-9e09-1f12c07f3ab8"
476 | },
477 | "execution_count": 39,
478 | "outputs": [
479 | {
480 | "output_type": "execute_result",
481 | "data": {
482 | "text/plain": [
483 | "\"I'm sorry, but I don't have any information about the siblings of Leonhard Euler.\""
484 | ],
485 | "application/vnd.google.colaboratory.intrinsic+json": {
486 | "type": "string"
487 | }
488 | },
489 | "metadata": {},
490 | "execution_count": 39
491 | }
492 | ]
493 | },
494 | {
495 | "cell_type": "code",
496 | "source": [],
497 | "metadata": {
498 | "id": "Ii1H4spv00ba"
499 | },
500 | "execution_count": null,
501 | "outputs": []
502 | }
503 | ]
504 | }
505 |
--------------------------------------------------------------------------------
/RAG_with_Graph_Database.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "gpuType": "T4",
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | },
17 | "widgets": {
18 | "application/vnd.jupyter.widget-state+json": {
19 | "a5f9427673584f24b42922ebcf8714f4": {
20 | "model_module": "@jupyter-widgets/controls",
21 | "model_name": "VBoxModel",
22 | "model_module_version": "1.5.0",
23 | "state": {
24 | "_dom_classes": [],
25 | "_model_module": "@jupyter-widgets/controls",
26 | "_model_module_version": "1.5.0",
27 | "_model_name": "VBoxModel",
28 | "_view_count": null,
29 | "_view_module": "@jupyter-widgets/controls",
30 | "_view_module_version": "1.5.0",
31 | "_view_name": "VBoxView",
32 | "box_style": "",
33 | "children": [
34 | "IPY_MODEL_78051657194346bc99fb58409ed67870",
35 | "IPY_MODEL_f716d2a99aae4bf4b09349c5d7fc695a",
36 | "IPY_MODEL_2f2d4b6731c042cf8b1d64db40f6234a",
37 | "IPY_MODEL_1d686d4c60474afd907b722e20af1452",
38 | "IPY_MODEL_3ada03b7a08244a797a4178bbe935318"
39 | ],
40 | "layout": "IPY_MODEL_5b26c881dd42490eb144e255ae685fcd"
41 | }
42 | },
43 | "78051657194346bc99fb58409ed67870": {
44 | "model_module": "@jupyter-widgets/controls",
45 | "model_name": "HTMLModel",
46 | "model_module_version": "1.5.0",
47 | "state": {
48 | "_dom_classes": [],
49 | "_model_module": "@jupyter-widgets/controls",
50 | "_model_module_version": "1.5.0",
51 | "_model_name": "HTMLModel",
52 | "_view_count": null,
53 | "_view_module": "@jupyter-widgets/controls",
54 | "_view_module_version": "1.5.0",
55 | "_view_name": "HTMLView",
56 | "description": "",
57 | "description_tooltip": null,
58 | "layout": "IPY_MODEL_8b78231bf8a94374b6cf468e9fa85929",
59 | "placeholder": "",
60 | "style": "IPY_MODEL_aecb1c13c7b4493ba613d690bdd2707a",
61 | "value": "
Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. "
62 | }
63 | },
64 | "f716d2a99aae4bf4b09349c5d7fc695a": {
65 | "model_module": "@jupyter-widgets/controls",
66 | "model_name": "PasswordModel",
67 | "model_module_version": "1.5.0",
68 | "state": {
69 | "_dom_classes": [],
70 | "_model_module": "@jupyter-widgets/controls",
71 | "_model_module_version": "1.5.0",
72 | "_model_name": "PasswordModel",
73 | "_view_count": null,
74 | "_view_module": "@jupyter-widgets/controls",
75 | "_view_module_version": "1.5.0",
76 | "_view_name": "PasswordView",
77 | "continuous_update": true,
78 | "description": "Token:",
79 | "description_tooltip": null,
80 | "disabled": false,
81 | "layout": "IPY_MODEL_50b3cb1532554487b840a0a8539be7dc",
82 | "placeholder": "",
83 | "style": "IPY_MODEL_0516db4733d84ca1a04be0d90f3cbfa9",
84 | "value": ""
85 | }
86 | },
87 | "2f2d4b6731c042cf8b1d64db40f6234a": {
88 | "model_module": "@jupyter-widgets/controls",
89 | "model_name": "CheckboxModel",
90 | "model_module_version": "1.5.0",
91 | "state": {
92 | "_dom_classes": [],
93 | "_model_module": "@jupyter-widgets/controls",
94 | "_model_module_version": "1.5.0",
95 | "_model_name": "CheckboxModel",
96 | "_view_count": null,
97 | "_view_module": "@jupyter-widgets/controls",
98 | "_view_module_version": "1.5.0",
99 | "_view_name": "CheckboxView",
100 | "description": "Add token as git credential?",
101 | "description_tooltip": null,
102 | "disabled": false,
103 | "indent": true,
104 | "layout": "IPY_MODEL_ef0ef461e11a4c9f9c7106709388c9ea",
105 | "style": "IPY_MODEL_bc0abd1fc56e4b418d551936ccbf724d",
106 | "value": true
107 | }
108 | },
109 | "1d686d4c60474afd907b722e20af1452": {
110 | "model_module": "@jupyter-widgets/controls",
111 | "model_name": "ButtonModel",
112 | "model_module_version": "1.5.0",
113 | "state": {
114 | "_dom_classes": [],
115 | "_model_module": "@jupyter-widgets/controls",
116 | "_model_module_version": "1.5.0",
117 | "_model_name": "ButtonModel",
118 | "_view_count": null,
119 | "_view_module": "@jupyter-widgets/controls",
120 | "_view_module_version": "1.5.0",
121 | "_view_name": "ButtonView",
122 | "button_style": "",
123 | "description": "Login",
124 | "disabled": false,
125 | "icon": "",
126 | "layout": "IPY_MODEL_febb82295cce487abe735f9ecb72796e",
127 | "style": "IPY_MODEL_2dd16270e324409aa8a5e5f8d9464665",
128 | "tooltip": ""
129 | }
130 | },
131 | "3ada03b7a08244a797a4178bbe935318": {
132 | "model_module": "@jupyter-widgets/controls",
133 | "model_name": "HTMLModel",
134 | "model_module_version": "1.5.0",
135 | "state": {
136 | "_dom_classes": [],
137 | "_model_module": "@jupyter-widgets/controls",
138 | "_model_module_version": "1.5.0",
139 | "_model_name": "HTMLModel",
140 | "_view_count": null,
141 | "_view_module": "@jupyter-widgets/controls",
142 | "_view_module_version": "1.5.0",
143 | "_view_name": "HTMLView",
144 | "description": "",
145 | "description_tooltip": null,
146 | "layout": "IPY_MODEL_202446995dd6467fab421b26fdd4189d",
147 | "placeholder": "",
148 | "style": "IPY_MODEL_e771c4cb03be41b2bf7c0ee82e255d5a",
149 | "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. "
150 | }
151 | },
152 | "5b26c881dd42490eb144e255ae685fcd": {
153 | "model_module": "@jupyter-widgets/base",
154 | "model_name": "LayoutModel",
155 | "model_module_version": "1.2.0",
156 | "state": {
157 | "_model_module": "@jupyter-widgets/base",
158 | "_model_module_version": "1.2.0",
159 | "_model_name": "LayoutModel",
160 | "_view_count": null,
161 | "_view_module": "@jupyter-widgets/base",
162 | "_view_module_version": "1.2.0",
163 | "_view_name": "LayoutView",
164 | "align_content": null,
165 | "align_items": "center",
166 | "align_self": null,
167 | "border": null,
168 | "bottom": null,
169 | "display": "flex",
170 | "flex": null,
171 | "flex_flow": "column",
172 | "grid_area": null,
173 | "grid_auto_columns": null,
174 | "grid_auto_flow": null,
175 | "grid_auto_rows": null,
176 | "grid_column": null,
177 | "grid_gap": null,
178 | "grid_row": null,
179 | "grid_template_areas": null,
180 | "grid_template_columns": null,
181 | "grid_template_rows": null,
182 | "height": null,
183 | "justify_content": null,
184 | "justify_items": null,
185 | "left": null,
186 | "margin": null,
187 | "max_height": null,
188 | "max_width": null,
189 | "min_height": null,
190 | "min_width": null,
191 | "object_fit": null,
192 | "object_position": null,
193 | "order": null,
194 | "overflow": null,
195 | "overflow_x": null,
196 | "overflow_y": null,
197 | "padding": null,
198 | "right": null,
199 | "top": null,
200 | "visibility": null,
201 | "width": "50%"
202 | }
203 | },
204 | "8b78231bf8a94374b6cf468e9fa85929": {
205 | "model_module": "@jupyter-widgets/base",
206 | "model_name": "LayoutModel",
207 | "model_module_version": "1.2.0",
208 | "state": {
209 | "_model_module": "@jupyter-widgets/base",
210 | "_model_module_version": "1.2.0",
211 | "_model_name": "LayoutModel",
212 | "_view_count": null,
213 | "_view_module": "@jupyter-widgets/base",
214 | "_view_module_version": "1.2.0",
215 | "_view_name": "LayoutView",
216 | "align_content": null,
217 | "align_items": null,
218 | "align_self": null,
219 | "border": null,
220 | "bottom": null,
221 | "display": null,
222 | "flex": null,
223 | "flex_flow": null,
224 | "grid_area": null,
225 | "grid_auto_columns": null,
226 | "grid_auto_flow": null,
227 | "grid_auto_rows": null,
228 | "grid_column": null,
229 | "grid_gap": null,
230 | "grid_row": null,
231 | "grid_template_areas": null,
232 | "grid_template_columns": null,
233 | "grid_template_rows": null,
234 | "height": null,
235 | "justify_content": null,
236 | "justify_items": null,
237 | "left": null,
238 | "margin": null,
239 | "max_height": null,
240 | "max_width": null,
241 | "min_height": null,
242 | "min_width": null,
243 | "object_fit": null,
244 | "object_position": null,
245 | "order": null,
246 | "overflow": null,
247 | "overflow_x": null,
248 | "overflow_y": null,
249 | "padding": null,
250 | "right": null,
251 | "top": null,
252 | "visibility": null,
253 | "width": null
254 | }
255 | },
256 | "aecb1c13c7b4493ba613d690bdd2707a": {
257 | "model_module": "@jupyter-widgets/controls",
258 | "model_name": "DescriptionStyleModel",
259 | "model_module_version": "1.5.0",
260 | "state": {
261 | "_model_module": "@jupyter-widgets/controls",
262 | "_model_module_version": "1.5.0",
263 | "_model_name": "DescriptionStyleModel",
264 | "_view_count": null,
265 | "_view_module": "@jupyter-widgets/base",
266 | "_view_module_version": "1.2.0",
267 | "_view_name": "StyleView",
268 | "description_width": ""
269 | }
270 | },
271 | "50b3cb1532554487b840a0a8539be7dc": {
272 | "model_module": "@jupyter-widgets/base",
273 | "model_name": "LayoutModel",
274 | "model_module_version": "1.2.0",
275 | "state": {
276 | "_model_module": "@jupyter-widgets/base",
277 | "_model_module_version": "1.2.0",
278 | "_model_name": "LayoutModel",
279 | "_view_count": null,
280 | "_view_module": "@jupyter-widgets/base",
281 | "_view_module_version": "1.2.0",
282 | "_view_name": "LayoutView",
283 | "align_content": null,
284 | "align_items": null,
285 | "align_self": null,
286 | "border": null,
287 | "bottom": null,
288 | "display": null,
289 | "flex": null,
290 | "flex_flow": null,
291 | "grid_area": null,
292 | "grid_auto_columns": null,
293 | "grid_auto_flow": null,
294 | "grid_auto_rows": null,
295 | "grid_column": null,
296 | "grid_gap": null,
297 | "grid_row": null,
298 | "grid_template_areas": null,
299 | "grid_template_columns": null,
300 | "grid_template_rows": null,
301 | "height": null,
302 | "justify_content": null,
303 | "justify_items": null,
304 | "left": null,
305 | "margin": null,
306 | "max_height": null,
307 | "max_width": null,
308 | "min_height": null,
309 | "min_width": null,
310 | "object_fit": null,
311 | "object_position": null,
312 | "order": null,
313 | "overflow": null,
314 | "overflow_x": null,
315 | "overflow_y": null,
316 | "padding": null,
317 | "right": null,
318 | "top": null,
319 | "visibility": null,
320 | "width": null
321 | }
322 | },
323 | "0516db4733d84ca1a04be0d90f3cbfa9": {
324 | "model_module": "@jupyter-widgets/controls",
325 | "model_name": "DescriptionStyleModel",
326 | "model_module_version": "1.5.0",
327 | "state": {
328 | "_model_module": "@jupyter-widgets/controls",
329 | "_model_module_version": "1.5.0",
330 | "_model_name": "DescriptionStyleModel",
331 | "_view_count": null,
332 | "_view_module": "@jupyter-widgets/base",
333 | "_view_module_version": "1.2.0",
334 | "_view_name": "StyleView",
335 | "description_width": ""
336 | }
337 | },
338 | "ef0ef461e11a4c9f9c7106709388c9ea": {
339 | "model_module": "@jupyter-widgets/base",
340 | "model_name": "LayoutModel",
341 | "model_module_version": "1.2.0",
342 | "state": {
343 | "_model_module": "@jupyter-widgets/base",
344 | "_model_module_version": "1.2.0",
345 | "_model_name": "LayoutModel",
346 | "_view_count": null,
347 | "_view_module": "@jupyter-widgets/base",
348 | "_view_module_version": "1.2.0",
349 | "_view_name": "LayoutView",
350 | "align_content": null,
351 | "align_items": null,
352 | "align_self": null,
353 | "border": null,
354 | "bottom": null,
355 | "display": null,
356 | "flex": null,
357 | "flex_flow": null,
358 | "grid_area": null,
359 | "grid_auto_columns": null,
360 | "grid_auto_flow": null,
361 | "grid_auto_rows": null,
362 | "grid_column": null,
363 | "grid_gap": null,
364 | "grid_row": null,
365 | "grid_template_areas": null,
366 | "grid_template_columns": null,
367 | "grid_template_rows": null,
368 | "height": null,
369 | "justify_content": null,
370 | "justify_items": null,
371 | "left": null,
372 | "margin": null,
373 | "max_height": null,
374 | "max_width": null,
375 | "min_height": null,
376 | "min_width": null,
377 | "object_fit": null,
378 | "object_position": null,
379 | "order": null,
380 | "overflow": null,
381 | "overflow_x": null,
382 | "overflow_y": null,
383 | "padding": null,
384 | "right": null,
385 | "top": null,
386 | "visibility": null,
387 | "width": null
388 | }
389 | },
390 | "bc0abd1fc56e4b418d551936ccbf724d": {
391 | "model_module": "@jupyter-widgets/controls",
392 | "model_name": "DescriptionStyleModel",
393 | "model_module_version": "1.5.0",
394 | "state": {
395 | "_model_module": "@jupyter-widgets/controls",
396 | "_model_module_version": "1.5.0",
397 | "_model_name": "DescriptionStyleModel",
398 | "_view_count": null,
399 | "_view_module": "@jupyter-widgets/base",
400 | "_view_module_version": "1.2.0",
401 | "_view_name": "StyleView",
402 | "description_width": ""
403 | }
404 | },
405 | "febb82295cce487abe735f9ecb72796e": {
406 | "model_module": "@jupyter-widgets/base",
407 | "model_name": "LayoutModel",
408 | "model_module_version": "1.2.0",
409 | "state": {
410 | "_model_module": "@jupyter-widgets/base",
411 | "_model_module_version": "1.2.0",
412 | "_model_name": "LayoutModel",
413 | "_view_count": null,
414 | "_view_module": "@jupyter-widgets/base",
415 | "_view_module_version": "1.2.0",
416 | "_view_name": "LayoutView",
417 | "align_content": null,
418 | "align_items": null,
419 | "align_self": null,
420 | "border": null,
421 | "bottom": null,
422 | "display": null,
423 | "flex": null,
424 | "flex_flow": null,
425 | "grid_area": null,
426 | "grid_auto_columns": null,
427 | "grid_auto_flow": null,
428 | "grid_auto_rows": null,
429 | "grid_column": null,
430 | "grid_gap": null,
431 | "grid_row": null,
432 | "grid_template_areas": null,
433 | "grid_template_columns": null,
434 | "grid_template_rows": null,
435 | "height": null,
436 | "justify_content": null,
437 | "justify_items": null,
438 | "left": null,
439 | "margin": null,
440 | "max_height": null,
441 | "max_width": null,
442 | "min_height": null,
443 | "min_width": null,
444 | "object_fit": null,
445 | "object_position": null,
446 | "order": null,
447 | "overflow": null,
448 | "overflow_x": null,
449 | "overflow_y": null,
450 | "padding": null,
451 | "right": null,
452 | "top": null,
453 | "visibility": null,
454 | "width": null
455 | }
456 | },
457 | "2dd16270e324409aa8a5e5f8d9464665": {
458 | "model_module": "@jupyter-widgets/controls",
459 | "model_name": "ButtonStyleModel",
460 | "model_module_version": "1.5.0",
461 | "state": {
462 | "_model_module": "@jupyter-widgets/controls",
463 | "_model_module_version": "1.5.0",
464 | "_model_name": "ButtonStyleModel",
465 | "_view_count": null,
466 | "_view_module": "@jupyter-widgets/base",
467 | "_view_module_version": "1.2.0",
468 | "_view_name": "StyleView",
469 | "button_color": null,
470 | "font_weight": ""
471 | }
472 | },
473 | "202446995dd6467fab421b26fdd4189d": {
474 | "model_module": "@jupyter-widgets/base",
475 | "model_name": "LayoutModel",
476 | "model_module_version": "1.2.0",
477 | "state": {
478 | "_model_module": "@jupyter-widgets/base",
479 | "_model_module_version": "1.2.0",
480 | "_model_name": "LayoutModel",
481 | "_view_count": null,
482 | "_view_module": "@jupyter-widgets/base",
483 | "_view_module_version": "1.2.0",
484 | "_view_name": "LayoutView",
485 | "align_content": null,
486 | "align_items": null,
487 | "align_self": null,
488 | "border": null,
489 | "bottom": null,
490 | "display": null,
491 | "flex": null,
492 | "flex_flow": null,
493 | "grid_area": null,
494 | "grid_auto_columns": null,
495 | "grid_auto_flow": null,
496 | "grid_auto_rows": null,
497 | "grid_column": null,
498 | "grid_gap": null,
499 | "grid_row": null,
500 | "grid_template_areas": null,
501 | "grid_template_columns": null,
502 | "grid_template_rows": null,
503 | "height": null,
504 | "justify_content": null,
505 | "justify_items": null,
506 | "left": null,
507 | "margin": null,
508 | "max_height": null,
509 | "max_width": null,
510 | "min_height": null,
511 | "min_width": null,
512 | "object_fit": null,
513 | "object_position": null,
514 | "order": null,
515 | "overflow": null,
516 | "overflow_x": null,
517 | "overflow_y": null,
518 | "padding": null,
519 | "right": null,
520 | "top": null,
521 | "visibility": null,
522 | "width": null
523 | }
524 | },
525 | "e771c4cb03be41b2bf7c0ee82e255d5a": {
526 | "model_module": "@jupyter-widgets/controls",
527 | "model_name": "DescriptionStyleModel",
528 | "model_module_version": "1.5.0",
529 | "state": {
530 | "_model_module": "@jupyter-widgets/controls",
531 | "_model_module_version": "1.5.0",
532 | "_model_name": "DescriptionStyleModel",
533 | "_view_count": null,
534 | "_view_module": "@jupyter-widgets/base",
535 | "_view_module_version": "1.2.0",
536 | "_view_name": "StyleView",
537 | "description_width": ""
538 | }
539 | },
540 | "f73c306cce8d4be1aea3ad912e9ef5dc": {
541 | "model_module": "@jupyter-widgets/controls",
542 | "model_name": "HBoxModel",
543 | "model_module_version": "1.5.0",
544 | "state": {
545 | "_dom_classes": [],
546 | "_model_module": "@jupyter-widgets/controls",
547 | "_model_module_version": "1.5.0",
548 | "_model_name": "HBoxModel",
549 | "_view_count": null,
550 | "_view_module": "@jupyter-widgets/controls",
551 | "_view_module_version": "1.5.0",
552 | "_view_name": "HBoxView",
553 | "box_style": "",
554 | "children": [
555 | "IPY_MODEL_6f7ec6e81fbc4babab80f5b352caedc8",
556 | "IPY_MODEL_6d830be2c25a4e499ed8f57758638e23",
557 | "IPY_MODEL_50deede7561347e8af1e0b213d33c3de"
558 | ],
559 | "layout": "IPY_MODEL_55b60bfe36b14fc6837959b6b9f56fac"
560 | }
561 | },
562 | "6f7ec6e81fbc4babab80f5b352caedc8": {
563 | "model_module": "@jupyter-widgets/controls",
564 | "model_name": "HTMLModel",
565 | "model_module_version": "1.5.0",
566 | "state": {
567 | "_dom_classes": [],
568 | "_model_module": "@jupyter-widgets/controls",
569 | "_model_module_version": "1.5.0",
570 | "_model_name": "HTMLModel",
571 | "_view_count": null,
572 | "_view_module": "@jupyter-widgets/controls",
573 | "_view_module_version": "1.5.0",
574 | "_view_name": "HTMLView",
575 | "description": "",
576 | "description_tooltip": null,
577 | "layout": "IPY_MODEL_24db6f406a024e6f8c78ef83752fe336",
578 | "placeholder": "",
579 | "style": "IPY_MODEL_42e137910a8b45859ae3517d5bd0cb17",
580 | "value": "Loading checkpoint shards: 100%"
581 | }
582 | },
583 | "6d830be2c25a4e499ed8f57758638e23": {
584 | "model_module": "@jupyter-widgets/controls",
585 | "model_name": "FloatProgressModel",
586 | "model_module_version": "1.5.0",
587 | "state": {
588 | "_dom_classes": [],
589 | "_model_module": "@jupyter-widgets/controls",
590 | "_model_module_version": "1.5.0",
591 | "_model_name": "FloatProgressModel",
592 | "_view_count": null,
593 | "_view_module": "@jupyter-widgets/controls",
594 | "_view_module_version": "1.5.0",
595 | "_view_name": "ProgressView",
596 | "bar_style": "success",
597 | "description": "",
598 | "description_tooltip": null,
599 | "layout": "IPY_MODEL_fc1fe6d6b77145828e33cc228c694082",
600 | "max": 4,
601 | "min": 0,
602 | "orientation": "horizontal",
603 | "style": "IPY_MODEL_93c80c4fcf01411e8f70d4c7daf23423",
604 | "value": 4
605 | }
606 | },
607 | "50deede7561347e8af1e0b213d33c3de": {
608 | "model_module": "@jupyter-widgets/controls",
609 | "model_name": "HTMLModel",
610 | "model_module_version": "1.5.0",
611 | "state": {
612 | "_dom_classes": [],
613 | "_model_module": "@jupyter-widgets/controls",
614 | "_model_module_version": "1.5.0",
615 | "_model_name": "HTMLModel",
616 | "_view_count": null,
617 | "_view_module": "@jupyter-widgets/controls",
618 | "_view_module_version": "1.5.0",
619 | "_view_name": "HTMLView",
620 | "description": "",
621 | "description_tooltip": null,
622 | "layout": "IPY_MODEL_52d69bc57f3c4299b9aad7c536735f99",
623 | "placeholder": "",
624 | "style": "IPY_MODEL_78de396d6b2d4ce384faf393b84b73a2",
625 | "value": " 4/4 [01:25<00:00, 18.80s/it]"
626 | }
627 | },
628 | "55b60bfe36b14fc6837959b6b9f56fac": {
629 | "model_module": "@jupyter-widgets/base",
630 | "model_name": "LayoutModel",
631 | "model_module_version": "1.2.0",
632 | "state": {
633 | "_model_module": "@jupyter-widgets/base",
634 | "_model_module_version": "1.2.0",
635 | "_model_name": "LayoutModel",
636 | "_view_count": null,
637 | "_view_module": "@jupyter-widgets/base",
638 | "_view_module_version": "1.2.0",
639 | "_view_name": "LayoutView",
640 | "align_content": null,
641 | "align_items": null,
642 | "align_self": null,
643 | "border": null,
644 | "bottom": null,
645 | "display": null,
646 | "flex": null,
647 | "flex_flow": null,
648 | "grid_area": null,
649 | "grid_auto_columns": null,
650 | "grid_auto_flow": null,
651 | "grid_auto_rows": null,
652 | "grid_column": null,
653 | "grid_gap": null,
654 | "grid_row": null,
655 | "grid_template_areas": null,
656 | "grid_template_columns": null,
657 | "grid_template_rows": null,
658 | "height": null,
659 | "justify_content": null,
660 | "justify_items": null,
661 | "left": null,
662 | "margin": null,
663 | "max_height": null,
664 | "max_width": null,
665 | "min_height": null,
666 | "min_width": null,
667 | "object_fit": null,
668 | "object_position": null,
669 | "order": null,
670 | "overflow": null,
671 | "overflow_x": null,
672 | "overflow_y": null,
673 | "padding": null,
674 | "right": null,
675 | "top": null,
676 | "visibility": null,
677 | "width": null
678 | }
679 | },
680 | "24db6f406a024e6f8c78ef83752fe336": {
681 | "model_module": "@jupyter-widgets/base",
682 | "model_name": "LayoutModel",
683 | "model_module_version": "1.2.0",
684 | "state": {
685 | "_model_module": "@jupyter-widgets/base",
686 | "_model_module_version": "1.2.0",
687 | "_model_name": "LayoutModel",
688 | "_view_count": null,
689 | "_view_module": "@jupyter-widgets/base",
690 | "_view_module_version": "1.2.0",
691 | "_view_name": "LayoutView",
692 | "align_content": null,
693 | "align_items": null,
694 | "align_self": null,
695 | "border": null,
696 | "bottom": null,
697 | "display": null,
698 | "flex": null,
699 | "flex_flow": null,
700 | "grid_area": null,
701 | "grid_auto_columns": null,
702 | "grid_auto_flow": null,
703 | "grid_auto_rows": null,
704 | "grid_column": null,
705 | "grid_gap": null,
706 | "grid_row": null,
707 | "grid_template_areas": null,
708 | "grid_template_columns": null,
709 | "grid_template_rows": null,
710 | "height": null,
711 | "justify_content": null,
712 | "justify_items": null,
713 | "left": null,
714 | "margin": null,
715 | "max_height": null,
716 | "max_width": null,
717 | "min_height": null,
718 | "min_width": null,
719 | "object_fit": null,
720 | "object_position": null,
721 | "order": null,
722 | "overflow": null,
723 | "overflow_x": null,
724 | "overflow_y": null,
725 | "padding": null,
726 | "right": null,
727 | "top": null,
728 | "visibility": null,
729 | "width": null
730 | }
731 | },
732 | "42e137910a8b45859ae3517d5bd0cb17": {
733 | "model_module": "@jupyter-widgets/controls",
734 | "model_name": "DescriptionStyleModel",
735 | "model_module_version": "1.5.0",
736 | "state": {
737 | "_model_module": "@jupyter-widgets/controls",
738 | "_model_module_version": "1.5.0",
739 | "_model_name": "DescriptionStyleModel",
740 | "_view_count": null,
741 | "_view_module": "@jupyter-widgets/base",
742 | "_view_module_version": "1.2.0",
743 | "_view_name": "StyleView",
744 | "description_width": ""
745 | }
746 | },
747 | "fc1fe6d6b77145828e33cc228c694082": {
748 | "model_module": "@jupyter-widgets/base",
749 | "model_name": "LayoutModel",
750 | "model_module_version": "1.2.0",
751 | "state": {
752 | "_model_module": "@jupyter-widgets/base",
753 | "_model_module_version": "1.2.0",
754 | "_model_name": "LayoutModel",
755 | "_view_count": null,
756 | "_view_module": "@jupyter-widgets/base",
757 | "_view_module_version": "1.2.0",
758 | "_view_name": "LayoutView",
759 | "align_content": null,
760 | "align_items": null,
761 | "align_self": null,
762 | "border": null,
763 | "bottom": null,
764 | "display": null,
765 | "flex": null,
766 | "flex_flow": null,
767 | "grid_area": null,
768 | "grid_auto_columns": null,
769 | "grid_auto_flow": null,
770 | "grid_auto_rows": null,
771 | "grid_column": null,
772 | "grid_gap": null,
773 | "grid_row": null,
774 | "grid_template_areas": null,
775 | "grid_template_columns": null,
776 | "grid_template_rows": null,
777 | "height": null,
778 | "justify_content": null,
779 | "justify_items": null,
780 | "left": null,
781 | "margin": null,
782 | "max_height": null,
783 | "max_width": null,
784 | "min_height": null,
785 | "min_width": null,
786 | "object_fit": null,
787 | "object_position": null,
788 | "order": null,
789 | "overflow": null,
790 | "overflow_x": null,
791 | "overflow_y": null,
792 | "padding": null,
793 | "right": null,
794 | "top": null,
795 | "visibility": null,
796 | "width": null
797 | }
798 | },
799 | "93c80c4fcf01411e8f70d4c7daf23423": {
800 | "model_module": "@jupyter-widgets/controls",
801 | "model_name": "ProgressStyleModel",
802 | "model_module_version": "1.5.0",
803 | "state": {
804 | "_model_module": "@jupyter-widgets/controls",
805 | "_model_module_version": "1.5.0",
806 | "_model_name": "ProgressStyleModel",
807 | "_view_count": null,
808 | "_view_module": "@jupyter-widgets/base",
809 | "_view_module_version": "1.2.0",
810 | "_view_name": "StyleView",
811 | "bar_color": null,
812 | "description_width": ""
813 | }
814 | },
815 | "52d69bc57f3c4299b9aad7c536735f99": {
816 | "model_module": "@jupyter-widgets/base",
817 | "model_name": "LayoutModel",
818 | "model_module_version": "1.2.0",
819 | "state": {
820 | "_model_module": "@jupyter-widgets/base",
821 | "_model_module_version": "1.2.0",
822 | "_model_name": "LayoutModel",
823 | "_view_count": null,
824 | "_view_module": "@jupyter-widgets/base",
825 | "_view_module_version": "1.2.0",
826 | "_view_name": "LayoutView",
827 | "align_content": null,
828 | "align_items": null,
829 | "align_self": null,
830 | "border": null,
831 | "bottom": null,
832 | "display": null,
833 | "flex": null,
834 | "flex_flow": null,
835 | "grid_area": null,
836 | "grid_auto_columns": null,
837 | "grid_auto_flow": null,
838 | "grid_auto_rows": null,
839 | "grid_column": null,
840 | "grid_gap": null,
841 | "grid_row": null,
842 | "grid_template_areas": null,
843 | "grid_template_columns": null,
844 | "grid_template_rows": null,
845 | "height": null,
846 | "justify_content": null,
847 | "justify_items": null,
848 | "left": null,
849 | "margin": null,
850 | "max_height": null,
851 | "max_width": null,
852 | "min_height": null,
853 | "min_width": null,
854 | "object_fit": null,
855 | "object_position": null,
856 | "order": null,
857 | "overflow": null,
858 | "overflow_x": null,
859 | "overflow_y": null,
860 | "padding": null,
861 | "right": null,
862 | "top": null,
863 | "visibility": null,
864 | "width": null
865 | }
866 | },
867 | "78de396d6b2d4ce384faf393b84b73a2": {
868 | "model_module": "@jupyter-widgets/controls",
869 | "model_name": "DescriptionStyleModel",
870 | "model_module_version": "1.5.0",
871 | "state": {
872 | "_model_module": "@jupyter-widgets/controls",
873 | "_model_module_version": "1.5.0",
874 | "_model_name": "DescriptionStyleModel",
875 | "_view_count": null,
876 | "_view_module": "@jupyter-widgets/base",
877 | "_view_module_version": "1.2.0",
878 | "_view_name": "StyleView",
879 | "description_width": ""
880 | }
881 | },
882 | "88f08961299f438ea00dd88d61ec99d2": {
883 | "model_module": "@jupyter-widgets/controls",
884 | "model_name": "HBoxModel",
885 | "model_module_version": "1.5.0",
886 | "state": {
887 | "_dom_classes": [],
888 | "_model_module": "@jupyter-widgets/controls",
889 | "_model_module_version": "1.5.0",
890 | "_model_name": "HBoxModel",
891 | "_view_count": null,
892 | "_view_module": "@jupyter-widgets/controls",
893 | "_view_module_version": "1.5.0",
894 | "_view_name": "HBoxView",
895 | "box_style": "",
896 | "children": [
897 | "IPY_MODEL_4ab68fde0d5845e498b23536ee61e828",
898 | "IPY_MODEL_476045c378f942a6b3066e6ffd8e93b6",
899 | "IPY_MODEL_31faf6bbc65340f6ae0bae2b20f825f0"
900 | ],
901 | "layout": "IPY_MODEL_d7a88f0f41e64fbb9eb04c208276d5e9"
902 | }
903 | },
904 | "4ab68fde0d5845e498b23536ee61e828": {
905 | "model_module": "@jupyter-widgets/controls",
906 | "model_name": "HTMLModel",
907 | "model_module_version": "1.5.0",
908 | "state": {
909 | "_dom_classes": [],
910 | "_model_module": "@jupyter-widgets/controls",
911 | "_model_module_version": "1.5.0",
912 | "_model_name": "HTMLModel",
913 | "_view_count": null,
914 | "_view_module": "@jupyter-widgets/controls",
915 | "_view_module_version": "1.5.0",
916 | "_view_name": "HTMLView",
917 | "description": "",
918 | "description_tooltip": null,
919 | "layout": "IPY_MODEL_26d4da72a16b4381aef9244f59c536b7",
920 | "placeholder": "",
921 | "style": "IPY_MODEL_d97e2daa67f8430fbe62fd64cf753f73",
922 | "value": "generation_config.json: 100%"
923 | }
924 | },
925 | "476045c378f942a6b3066e6ffd8e93b6": {
926 | "model_module": "@jupyter-widgets/controls",
927 | "model_name": "FloatProgressModel",
928 | "model_module_version": "1.5.0",
929 | "state": {
930 | "_dom_classes": [],
931 | "_model_module": "@jupyter-widgets/controls",
932 | "_model_module_version": "1.5.0",
933 | "_model_name": "FloatProgressModel",
934 | "_view_count": null,
935 | "_view_module": "@jupyter-widgets/controls",
936 | "_view_module_version": "1.5.0",
937 | "_view_name": "ProgressView",
938 | "bar_style": "success",
939 | "description": "",
940 | "description_tooltip": null,
941 | "layout": "IPY_MODEL_9ba5c9837ffa48a1a98375cf8f7ea58c",
942 | "max": 137,
943 | "min": 0,
944 | "orientation": "horizontal",
945 | "style": "IPY_MODEL_7b0b028a1df640d49ec7b6a3ea8ed5cd",
946 | "value": 137
947 | }
948 | },
949 | "31faf6bbc65340f6ae0bae2b20f825f0": {
950 | "model_module": "@jupyter-widgets/controls",
951 | "model_name": "HTMLModel",
952 | "model_module_version": "1.5.0",
953 | "state": {
954 | "_dom_classes": [],
955 | "_model_module": "@jupyter-widgets/controls",
956 | "_model_module_version": "1.5.0",
957 | "_model_name": "HTMLModel",
958 | "_view_count": null,
959 | "_view_module": "@jupyter-widgets/controls",
960 | "_view_module_version": "1.5.0",
961 | "_view_name": "HTMLView",
962 | "description": "",
963 | "description_tooltip": null,
964 | "layout": "IPY_MODEL_694328c260294586bf00c5f8904b26a2",
965 | "placeholder": "",
966 | "style": "IPY_MODEL_e444efde30f146efba56fa1d0bcee1fd",
967 | "value": " 137/137 [00:00<00:00, 9.27kB/s]"
968 | }
969 | },
970 | "d7a88f0f41e64fbb9eb04c208276d5e9": {
971 | "model_module": "@jupyter-widgets/base",
972 | "model_name": "LayoutModel",
973 | "model_module_version": "1.2.0",
974 | "state": {
975 | "_model_module": "@jupyter-widgets/base",
976 | "_model_module_version": "1.2.0",
977 | "_model_name": "LayoutModel",
978 | "_view_count": null,
979 | "_view_module": "@jupyter-widgets/base",
980 | "_view_module_version": "1.2.0",
981 | "_view_name": "LayoutView",
982 | "align_content": null,
983 | "align_items": null,
984 | "align_self": null,
985 | "border": null,
986 | "bottom": null,
987 | "display": null,
988 | "flex": null,
989 | "flex_flow": null,
990 | "grid_area": null,
991 | "grid_auto_columns": null,
992 | "grid_auto_flow": null,
993 | "grid_auto_rows": null,
994 | "grid_column": null,
995 | "grid_gap": null,
996 | "grid_row": null,
997 | "grid_template_areas": null,
998 | "grid_template_columns": null,
999 | "grid_template_rows": null,
1000 | "height": null,
1001 | "justify_content": null,
1002 | "justify_items": null,
1003 | "left": null,
1004 | "margin": null,
1005 | "max_height": null,
1006 | "max_width": null,
1007 | "min_height": null,
1008 | "min_width": null,
1009 | "object_fit": null,
1010 | "object_position": null,
1011 | "order": null,
1012 | "overflow": null,
1013 | "overflow_x": null,
1014 | "overflow_y": null,
1015 | "padding": null,
1016 | "right": null,
1017 | "top": null,
1018 | "visibility": null,
1019 | "width": null
1020 | }
1021 | },
1022 | "26d4da72a16b4381aef9244f59c536b7": {
1023 | "model_module": "@jupyter-widgets/base",
1024 | "model_name": "LayoutModel",
1025 | "model_module_version": "1.2.0",
1026 | "state": {
1027 | "_model_module": "@jupyter-widgets/base",
1028 | "_model_module_version": "1.2.0",
1029 | "_model_name": "LayoutModel",
1030 | "_view_count": null,
1031 | "_view_module": "@jupyter-widgets/base",
1032 | "_view_module_version": "1.2.0",
1033 | "_view_name": "LayoutView",
1034 | "align_content": null,
1035 | "align_items": null,
1036 | "align_self": null,
1037 | "border": null,
1038 | "bottom": null,
1039 | "display": null,
1040 | "flex": null,
1041 | "flex_flow": null,
1042 | "grid_area": null,
1043 | "grid_auto_columns": null,
1044 | "grid_auto_flow": null,
1045 | "grid_auto_rows": null,
1046 | "grid_column": null,
1047 | "grid_gap": null,
1048 | "grid_row": null,
1049 | "grid_template_areas": null,
1050 | "grid_template_columns": null,
1051 | "grid_template_rows": null,
1052 | "height": null,
1053 | "justify_content": null,
1054 | "justify_items": null,
1055 | "left": null,
1056 | "margin": null,
1057 | "max_height": null,
1058 | "max_width": null,
1059 | "min_height": null,
1060 | "min_width": null,
1061 | "object_fit": null,
1062 | "object_position": null,
1063 | "order": null,
1064 | "overflow": null,
1065 | "overflow_x": null,
1066 | "overflow_y": null,
1067 | "padding": null,
1068 | "right": null,
1069 | "top": null,
1070 | "visibility": null,
1071 | "width": null
1072 | }
1073 | },
1074 | "d97e2daa67f8430fbe62fd64cf753f73": {
1075 | "model_module": "@jupyter-widgets/controls",
1076 | "model_name": "DescriptionStyleModel",
1077 | "model_module_version": "1.5.0",
1078 | "state": {
1079 | "_model_module": "@jupyter-widgets/controls",
1080 | "_model_module_version": "1.5.0",
1081 | "_model_name": "DescriptionStyleModel",
1082 | "_view_count": null,
1083 | "_view_module": "@jupyter-widgets/base",
1084 | "_view_module_version": "1.2.0",
1085 | "_view_name": "StyleView",
1086 | "description_width": ""
1087 | }
1088 | },
1089 | "9ba5c9837ffa48a1a98375cf8f7ea58c": {
1090 | "model_module": "@jupyter-widgets/base",
1091 | "model_name": "LayoutModel",
1092 | "model_module_version": "1.2.0",
1093 | "state": {
1094 | "_model_module": "@jupyter-widgets/base",
1095 | "_model_module_version": "1.2.0",
1096 | "_model_name": "LayoutModel",
1097 | "_view_count": null,
1098 | "_view_module": "@jupyter-widgets/base",
1099 | "_view_module_version": "1.2.0",
1100 | "_view_name": "LayoutView",
1101 | "align_content": null,
1102 | "align_items": null,
1103 | "align_self": null,
1104 | "border": null,
1105 | "bottom": null,
1106 | "display": null,
1107 | "flex": null,
1108 | "flex_flow": null,
1109 | "grid_area": null,
1110 | "grid_auto_columns": null,
1111 | "grid_auto_flow": null,
1112 | "grid_auto_rows": null,
1113 | "grid_column": null,
1114 | "grid_gap": null,
1115 | "grid_row": null,
1116 | "grid_template_areas": null,
1117 | "grid_template_columns": null,
1118 | "grid_template_rows": null,
1119 | "height": null,
1120 | "justify_content": null,
1121 | "justify_items": null,
1122 | "left": null,
1123 | "margin": null,
1124 | "max_height": null,
1125 | "max_width": null,
1126 | "min_height": null,
1127 | "min_width": null,
1128 | "object_fit": null,
1129 | "object_position": null,
1130 | "order": null,
1131 | "overflow": null,
1132 | "overflow_x": null,
1133 | "overflow_y": null,
1134 | "padding": null,
1135 | "right": null,
1136 | "top": null,
1137 | "visibility": null,
1138 | "width": null
1139 | }
1140 | },
1141 | "7b0b028a1df640d49ec7b6a3ea8ed5cd": {
1142 | "model_module": "@jupyter-widgets/controls",
1143 | "model_name": "ProgressStyleModel",
1144 | "model_module_version": "1.5.0",
1145 | "state": {
1146 | "_model_module": "@jupyter-widgets/controls",
1147 | "_model_module_version": "1.5.0",
1148 | "_model_name": "ProgressStyleModel",
1149 | "_view_count": null,
1150 | "_view_module": "@jupyter-widgets/base",
1151 | "_view_module_version": "1.2.0",
1152 | "_view_name": "StyleView",
1153 | "bar_color": null,
1154 | "description_width": ""
1155 | }
1156 | },
1157 | "694328c260294586bf00c5f8904b26a2": {
1158 | "model_module": "@jupyter-widgets/base",
1159 | "model_name": "LayoutModel",
1160 | "model_module_version": "1.2.0",
1161 | "state": {
1162 | "_model_module": "@jupyter-widgets/base",
1163 | "_model_module_version": "1.2.0",
1164 | "_model_name": "LayoutModel",
1165 | "_view_count": null,
1166 | "_view_module": "@jupyter-widgets/base",
1167 | "_view_module_version": "1.2.0",
1168 | "_view_name": "LayoutView",
1169 | "align_content": null,
1170 | "align_items": null,
1171 | "align_self": null,
1172 | "border": null,
1173 | "bottom": null,
1174 | "display": null,
1175 | "flex": null,
1176 | "flex_flow": null,
1177 | "grid_area": null,
1178 | "grid_auto_columns": null,
1179 | "grid_auto_flow": null,
1180 | "grid_auto_rows": null,
1181 | "grid_column": null,
1182 | "grid_gap": null,
1183 | "grid_row": null,
1184 | "grid_template_areas": null,
1185 | "grid_template_columns": null,
1186 | "grid_template_rows": null,
1187 | "height": null,
1188 | "justify_content": null,
1189 | "justify_items": null,
1190 | "left": null,
1191 | "margin": null,
1192 | "max_height": null,
1193 | "max_width": null,
1194 | "min_height": null,
1195 | "min_width": null,
1196 | "object_fit": null,
1197 | "object_position": null,
1198 | "order": null,
1199 | "overflow": null,
1200 | "overflow_x": null,
1201 | "overflow_y": null,
1202 | "padding": null,
1203 | "right": null,
1204 | "top": null,
1205 | "visibility": null,
1206 | "width": null
1207 | }
1208 | },
1209 | "e444efde30f146efba56fa1d0bcee1fd": {
1210 | "model_module": "@jupyter-widgets/controls",
1211 | "model_name": "DescriptionStyleModel",
1212 | "model_module_version": "1.5.0",
1213 | "state": {
1214 | "_model_module": "@jupyter-widgets/controls",
1215 | "_model_module_version": "1.5.0",
1216 | "_model_name": "DescriptionStyleModel",
1217 | "_view_count": null,
1218 | "_view_module": "@jupyter-widgets/base",
1219 | "_view_module_version": "1.2.0",
1220 | "_view_name": "StyleView",
1221 | "description_width": ""
1222 | }
1223 | }
1224 | }
1225 | },
1226 | "accelerator": "GPU"
1227 | },
1228 | "cells": [
1229 | {
1230 | "cell_type": "markdown",
1231 | "metadata": {
1232 | "id": "view-in-github",
1233 | "colab_type": "text"
1234 | },
1235 | "source": [
1236 | "
"
1237 | ]
1238 | },
1239 | {
1240 | "cell_type": "markdown",
1241 | "source": [
1242 | "**Installing Dependencies:**"
1243 | ],
1244 | "metadata": {
1245 | "id": "98YkxTMk8MkF"
1246 | }
1247 | },
1248 | {
1249 | "cell_type": "code",
1250 | "execution_count": 13,
1251 | "metadata": {
1252 | "id": "Pw24LkQ27bML",
1253 | "outputId": "a1444f5f-0dd0-4783-b8a0-f6b96d8135d1",
1254 | "colab": {
1255 | "base_uri": "https://localhost:8080/"
1256 | }
1257 | },
1258 | "outputs": [
1259 | {
1260 | "output_type": "stream",
1261 | "name": "stdout",
1262 | "text": [
1263 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
1264 | "\u001b[?25hRequirement already satisfied: flash-attn in /usr/local/lib/python3.10/dist-packages (2.5.5)\n",
1265 | "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from flash-attn) (2.1.0)\n",
1266 | "Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from flash-attn) (0.7.0)\n",
1267 | "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from flash-attn) (23.2)\n",
1268 | "Requirement already satisfied: ninja in /usr/local/lib/python3.10/dist-packages (from flash-attn) (1.11.1.1)\n",
1269 | "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.13.1)\n",
1270 | "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (4.9.0)\n",
1271 | "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (1.12)\n",
1272 | "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.2.1)\n",
1273 | "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.1.3)\n",
1274 | "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2023.6.0)\n",
1275 | "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
1276 | "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
1277 | "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
1278 | "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (8.9.2.26)\n",
1279 | "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.3.1)\n",
1280 | "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (11.0.2.54)\n",
1281 | "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (10.3.2.106)\n",
1282 | "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (11.4.5.107)\n",
1283 | "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.0.106)\n",
1284 | "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.18.1)\n",
1285 | "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (12.1.105)\n",
1286 | "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.1.0)\n",
1287 | "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->flash-attn) (12.3.101)\n",
1288 | "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->flash-attn) (2.1.5)\n",
1289 | "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->flash-attn) (1.3.0)\n"
1290 | ]
1291 | }
1292 | ],
1293 | "source": [
1294 | "!pip install -qU transformers\n",
1295 | "!pip install -qU langchain\n",
1296 | "!pip install -qU huggingface_hub\n",
1297 | "!pip install -qU tiktoken\n",
1298 | "!pip install -qU neo4j\n",
1299 | "!pip install -qU python-dotenv\n",
1300 | "!pip install -qU accelerate\n",
1301 | "!pip install -qU sentence_transformers\n",
1302 | "!pip install -qU bitsandbytes\n",
1303 | "!pip install -qU optimum\n",
1304 | "!pip install -qU unstructured unstructured[pdf]\n",
1305 | "!pip install flash-attn --no-build-isolation"
1306 | ]
1307 | },
1308 | {
1309 | "cell_type": "code",
1310 | "source": [
1311 | "import os\n",
1312 | "import re\n",
1313 | "from langchain.vectorstores.neo4j_vector import Neo4jVector\n",
1314 | "from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter\n",
1315 | "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
1316 | "from dotenv import load_dotenv"
1317 | ],
1318 | "metadata": {
1319 | "id": "wu99tQMp8cYs"
1320 | },
1321 | "execution_count": 14,
1322 | "outputs": []
1323 | },
1324 | {
1325 | "cell_type": "markdown",
1326 | "source": [
1327 | "**Setting API in Environment Variable:**"
1328 | ],
1329 | "metadata": {
1330 | "id": "ec1_j3db9-On"
1331 | }
1332 | },
1333 | {
1334 | "cell_type": "code",
1335 | "source": [
1336 | "from google.colab import drive\n",
1337 | "from huggingface_hub import notebook_login\n",
1338 | "notebook_login()\n",
1339 | "load_dotenv()\n",
1340 | "os.environ[\"NEO4J_URI\"] = 'neo4j+s://d5dffe81.databases.neo4j.io'\n",
1341 | "os.environ[\"NEO4J_USERNAME\"] = 'neo4j'\n",
1342 | "os.environ[\"NEO4J_PASSWORD\"] = 'C8A_mt9s8yar3i44Xi1bVbkrFVK3aCXE1w5cQvHv6LM'\n",
1343 | "os.environ['NEO4J_URL'] = \"bolt://server_ip:7687\"\n",
1344 | "drive.mount('/content/drive')"
1345 | ],
1346 | "metadata": {
1347 | "id": "TV8qynpS8ndS",
1348 | "outputId": "ce43227b-a99f-4863-98a7-c4ac47624dd1",
1349 | "colab": {
1350 | "base_uri": "https://localhost:8080/",
1351 | "height": 348,
1352 | "referenced_widgets": [
1353 | "a5f9427673584f24b42922ebcf8714f4",
1354 | "78051657194346bc99fb58409ed67870",
1355 | "f716d2a99aae4bf4b09349c5d7fc695a",
1356 | "2f2d4b6731c042cf8b1d64db40f6234a",
1357 | "1d686d4c60474afd907b722e20af1452",
1358 | "3ada03b7a08244a797a4178bbe935318",
1359 | "5b26c881dd42490eb144e255ae685fcd",
1360 | "8b78231bf8a94374b6cf468e9fa85929",
1361 | "aecb1c13c7b4493ba613d690bdd2707a",
1362 | "50b3cb1532554487b840a0a8539be7dc",
1363 | "0516db4733d84ca1a04be0d90f3cbfa9",
1364 | "ef0ef461e11a4c9f9c7106709388c9ea",
1365 | "bc0abd1fc56e4b418d551936ccbf724d",
1366 | "febb82295cce487abe735f9ecb72796e",
1367 | "2dd16270e324409aa8a5e5f8d9464665",
1368 | "202446995dd6467fab421b26fdd4189d",
1369 | "e771c4cb03be41b2bf7c0ee82e255d5a"
1370 | ]
1371 | }
1372 | },
1373 | "execution_count": 15,
1374 | "outputs": [
1375 | {
1376 | "output_type": "display_data",
1377 | "data": {
1378 | "text/plain": [
1379 | "VBox(children=(HTML(value='
bool:\n",
1649 | " for stop_ids in stop_token_ids:\n",
1650 | " if torch.equal(input_ids[0][-len(stop_ids):], stop_ids):\n",
1651 | " return True\n",
1652 | " return False\n",
1653 | "\n",
1654 | "stopping_criteria = StoppingCriteriaList([StopOnTokens()])"
1655 | ],
1656 | "metadata": {
1657 | "id": "UeHmT-GzHerh"
1658 | },
1659 | "execution_count": null,
1660 | "outputs": []
1661 | },
1662 | {
1663 | "cell_type": "markdown",
1664 | "source": [
1665 | "**Testing Huggingface Pipeline:**"
1666 | ],
1667 | "metadata": {
1668 | "id": "4DRI1tSuOPpd"
1669 | }
1670 | },
1671 | {
1672 | "cell_type": "code",
1673 | "source": [
1674 | "# Set up text generation pipeline\n",
1675 | "generate_text = transformers.pipeline(\n",
1676 | " model=model,\n",
1677 | " tokenizer=tokenizer,\n",
1678 | " return_full_text=True,\n",
1679 | " task='text-generation',\n",
1680 | " stopping_criteria=stopping_criteria,\n",
1681 | " temperature=0.3,\n",
1682 | " max_new_tokens=512,\n",
1683 | " repetition_penalty=1.1\n",
1684 | ")"
1685 | ],
1686 | "metadata": {
1687 | "id": "g3A-YXDjHrHu"
1688 | },
1689 | "execution_count": null,
1690 | "outputs": []
1691 | },
1692 | {
1693 | "cell_type": "code",
1694 | "source": [
1695 | "result = generate_text(\"What are the primary mechanisms underlying antibiotic resistance, and how can we develop strategies to combat it?\")\n",
1696 | "print(result)"
1697 | ],
1698 | "metadata": {
1699 | "id": "uTRpa0_mInOA"
1700 | },
1701 | "execution_count": null,
1702 | "outputs": []
1703 | },
1704 | {
1705 | "cell_type": "code",
1706 | "source": [
1707 | "from langchain.llms import HuggingFacePipeline\n",
1708 | "\n",
1709 | "llm = HuggingFacePipeline(pipeline=generate_text)\n",
1710 | "llm(prompt=\"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\")"
1711 | ],
1712 | "metadata": {
1713 | "id": "Uh3unyirTtTa"
1714 | },
1715 | "execution_count": null,
1716 | "outputs": []
1717 | },
1718 | {
1719 | "cell_type": "markdown",
1720 | "source": [
1721 | "**Loading Document Data:**"
1722 | ],
1723 | "metadata": {
1724 | "id": "kBNYzCfyOJRz"
1725 | }
1726 | },
1727 | {
1728 | "cell_type": "code",
1729 | "source": [
1730 | "from langchain_community.document_loaders import DirectoryLoader\n",
1731 | "loader = DirectoryLoader('/content/drive/MyDrive/BioMedical-Dataset', glob=\"**/*.pdf\")\n",
1732 | "documents = loader.load()"
1733 | ],
1734 | "metadata": {
1735 | "id": "knhECj2bOUkh"
1736 | },
1737 | "execution_count": null,
1738 | "outputs": []
1739 | },
1740 | {
1741 | "cell_type": "code",
1742 | "source": [
1743 | "print(len(documents))"
1744 | ],
1745 | "metadata": {
1746 | "id": "T0Yp2a8Goeqh"
1747 | },
1748 | "execution_count": null,
1749 | "outputs": []
1750 | },
1751 | {
1752 | "cell_type": "code",
1753 | "source": [
1754 | "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
1755 | "\n",
1756 | "MARKDOWN_SEPARATORS = [\n",
1757 | " \"\\n#{1,6} \",\n",
1758 | " \"```\\n\",\n",
1759 | " \"\\n\\\\*\\\\*\\\\*+\\n\",\n",
1760 | " \"\\n---+\\n\",\n",
1761 | " \"\\n___+\\n\",\n",
1762 | " \"\\n\\n\",\n",
1763 | " \"\\n\",\n",
1764 | " \" \",\n",
1765 | " \"\",\n",
1766 | "]\n",
1767 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,\n",
1768 | " chunk_overlap=30,\n",
1769 | " add_start_index=True,\n",
1770 | " separators=MARKDOWN_SEPARATORS)\n",
1771 | "\n",
1772 | "processed_text_splits = text_splitter.split_documents(documents)"
1773 | ],
1774 | "metadata": {
1775 | "id": "6Tznkr3PIawL"
1776 | },
1777 | "execution_count": null,
1778 | "outputs": []
1779 | },
1780 | {
1781 | "cell_type": "code",
1782 | "source": [
1783 | "processed_text_splits[120].page_content"
1784 | ],
1785 | "metadata": {
1786 | "id": "91wesl_N4yYm"
1787 | },
1788 | "execution_count": null,
1789 | "outputs": []
1790 | },
1791 | {
1792 | "cell_type": "code",
1793 | "source": [
1794 | "print(len(processed_text_splits))"
1795 | ],
1796 | "metadata": {
1797 | "id": "El9WlHtHCiTv"
1798 | },
1799 | "execution_count": null,
1800 | "outputs": []
1801 | },
1802 | {
1803 | "cell_type": "code",
1804 | "source": [
1805 | "# Creating Embdeddings of the sentences and storing it into Graph DB\n",
1806 | "from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n",
1807 | "\n",
1808 | "model_name = \"BAAI/bge-base-en-v1.5\"\n",
1809 | "model_kwargs = {\"device\": \"cuda\"}\n",
1810 | "encode_kwargs = {\"normalize_embeddings\": True}\n",
1811 | "embeddings = HuggingFaceBgeEmbeddings(\n",
1812 | " model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs\n",
1813 | ")"
1814 | ],
1815 | "metadata": {
1816 | "id": "KqOudIQtIayw"
1817 | },
1818 | "execution_count": null,
1819 | "outputs": []
1820 | },
1821 | {
1822 | "cell_type": "markdown",
1823 | "source": [
1824 | "**Load Neo4j Graph:**"
1825 | ],
1826 | "metadata": {
1827 | "id": "WaGxQILaCwQv"
1828 | }
1829 | },
1830 | {
1831 | "cell_type": "code",
1832 | "source": [
1833 | "from langchain.graphs import Neo4jGraph\n",
1834 | "\n",
1835 | "graph = Neo4jGraph(\n",
1836 | " url=os.environ[\"NEO4J_URI\"],\n",
1837 | " username=os.environ[\"NEO4J_USERNAME\"],\n",
1838 | " password=os.environ[\"NEO4J_PASSWORD\"]\n",
1839 | ")"
1840 | ],
1841 | "metadata": {
1842 | "id": "v__99eeVDVdB"
1843 | },
1844 | "execution_count": null,
1845 | "outputs": []
1846 | },
1847 | {
1848 | "cell_type": "markdown",
1849 | "source": [
1850 | "**Creating a new custom Index using Cypher:**"
1851 | ],
1852 | "metadata": {
1853 | "id": "KasRmCh0bWJP"
1854 | }
1855 | },
1856 | {
1857 | "cell_type": "code",
1858 | "source": [
1859 | "# Create New index with custom embedding model and dimensions\n",
1860 | "# I have already created\n",
1861 | "'''\n",
1862 | "graph.query(\"\"\"\n",
1863 | "CALL db.index.vector.createNodeIndex(\n",
1864 | " 'KG-Enhanced-QnA-Biomedical',\n",
1865 | " 'text_splits',\n",
1866 | " 'embeddings',\n",
1867 | " 768,\n",
1868 | " 'cosine'\n",
1869 | ")\n",
1870 | "\"\"\")\n",
1871 | "'''"
1872 | ],
1873 | "metadata": {
1874 | "id": "Lhzqh0uKbVJu"
1875 | },
1876 | "execution_count": null,
1877 | "outputs": []
1878 | },
1879 | {
1880 | "cell_type": "markdown",
1881 | "source": [
1882 | "**Show Created Vector Index:**"
1883 | ],
1884 | "metadata": {
1885 | "id": "z3ayEjLyHTAm"
1886 | }
1887 | },
1888 | {
1889 | "cell_type": "code",
1890 | "source": [
1891 | "from neo4j import GraphDatabase\n",
1892 | "uri = os.environ[\"NEO4J_URI\"]\n",
1893 | "username = os.environ[\"NEO4J_USERNAME\"]\n",
1894 | "password = os.environ[\"NEO4J_PASSWORD\"]\n",
1895 | "\n",
1896 | "driver = GraphDatabase.driver(uri, auth=(username, password))\n",
1897 | "session = driver.session()\n",
1898 | "\n",
1899 | "result = session.run(\"SHOW VECTOR INDEXES\")\n",
1900 | "\n",
1901 | "for record in result:\n",
1902 | " print(record)"
1903 | ],
1904 | "metadata": {
1905 | "id": "SFLExy9OxptZ"
1906 | },
1907 | "execution_count": null,
1908 | "outputs": []
1909 | },
1910 | {
1911 | "cell_type": "code",
1912 | "source": [
1913 | "''' chunks = [{'text': document.page_content, 'embedding': embeddings.embed_query(document.page_content)}\n",
1914 | " for document in documents if len(document.page_content) > 50] '''"
1915 | ],
1916 | "metadata": {
1917 | "id": "ii7wVWOmJYBb"
1918 | },
1919 | "execution_count": null,
1920 | "outputs": []
1921 | },
1922 | {
1923 | "cell_type": "code",
1924 | "source": [
1925 | "'''\n",
1926 | "graph.query(\"\"\"\n",
1927 | "UNWIND $data AS row\n",
1928 | "CREATE (c:Chunk {text: row.text})\n",
1929 | "WITH c, row\n",
1930 | "CALL db.create.setVectorProperty(c, 'embedding', row.embedding)\n",
1931 | "YIELD node\n",
1932 | "RETURN distinct 'done'\n",
1933 | "\"\"\", {'data': chunks})\n",
1934 | "'''"
1935 | ],
1936 | "metadata": {
1937 | "id": "B0KQnPT8Lxqi"
1938 | },
1939 | "execution_count": null,
1940 | "outputs": []
1941 | },
1942 | {
1943 | "cell_type": "code",
1944 | "source": [
1945 | "'''\n",
1946 | "vector_search = \"\"\"\n",
1947 | "WITH $embedding AS e\n",
1948 | "CALL db.index.vector.queryNodes('KG-Enhanced-QnA-Biomedical',$k, e) yield node, score\n",
1949 | "RETURN node.text AS result\n",
1950 | "ORDER BY score DESC\n",
1951 | "LIMIT 3\n",
1952 | "\"\"\"\n",
1953 | "'''"
1954 | ],
1955 | "metadata": {
1956 | "id": "0Tbwl91LjKd4"
1957 | },
1958 | "execution_count": null,
1959 | "outputs": []
1960 | },
1961 | {
1962 | "cell_type": "code",
1963 | "source": [
1964 | "# Instantiate Neo4j vector from documents\n",
1965 | "neo4j_vector = Neo4jVector.from_documents(\n",
1966 | " processed_text_splits,\n",
1967 | " embeddings,\n",
1968 | " index_name='KG-Enhanced-QnA-Biomedical',\n",
1969 | " url=os.environ[\"NEO4J_URI\"],\n",
1970 | " username=os.environ[\"NEO4J_USERNAME\"],\n",
1971 | " password=os.environ[\"NEO4J_PASSWORD\"]\n",
1972 | ")"
1973 | ],
1974 | "metadata": {
1975 | "id": "HHjtYv6JDVfg"
1976 | },
1977 | "execution_count": null,
1978 | "outputs": []
1979 | },
1980 | {
1981 | "cell_type": "code",
1982 | "source": [
1983 | "# Performing Similarity Search\n",
1984 | "query = \"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\"\n",
1985 | "vector_results = neo4j_vector.similarity_search(query, k=2)\n",
1986 | "\n",
1987 | "for i, res in enumerate(vector_results):\n",
1988 | " print(res.page_content)\n",
1989 | " if i != len(vector_results) - 1:\n",
1990 | " print()\n",
1991 | "vector_result = vector_results[0].page_content"
1992 | ],
1993 | "metadata": {
1994 | "id": "UAV-XTaHJIjT"
1995 | },
1996 | "execution_count": null,
1997 | "outputs": []
1998 | },
1999 | {
2000 | "cell_type": "code",
2001 | "source": [
2002 | "from langchain.chains import GraphCypherQAChain\n",
2003 | "from langchain.graphs import Neo4jGraph"
2004 | ],
2005 | "metadata": {
2006 | "id": "Az7n09kuW4k4"
2007 | },
2008 | "execution_count": null,
2009 | "outputs": []
2010 | },
2011 | {
2012 | "cell_type": "code",
2013 | "source": [
2014 | "from langchain.chains.base import Chain\n",
2015 | "from langchain.chains.llm import LLMChain\n",
2016 | "from langchain.chat_models import ChatOpenAI\n",
2017 | "from langchain.chains.question_answering.stuff_prompt import CHAT_PROMPT\n",
2018 | "from langchain.callbacks.manager import CallbackManagerForChainRun\n",
2019 | "from typing import Any, Dict, List\n",
2020 | "from pydantic import Field"
2021 | ],
2022 | "metadata": {
2023 | "id": "uE_4FkZtMyaL"
2024 | },
2025 | "execution_count": null,
2026 | "outputs": []
2027 | },
2028 | {
2029 | "cell_type": "code",
2030 | "source": [
2031 | "vector_search = \"\"\"\n",
2032 | "WITH $embedding AS e\n",
2033 | "CALL db.index.vector.queryNodes('KG-Enhanced-QnA-Biomedical',$k, e) yield node, score\n",
2034 | "RETURN node.text AS result\n",
2035 | "ORDER BY score DESC\n",
2036 | "LIMIT 3\n",
2037 | "\"\"\""
2038 | ],
2039 | "metadata": {
2040 | "id": "jqnt60GeNB3O"
2041 | },
2042 | "execution_count": null,
2043 | "outputs": []
2044 | },
2045 | {
2046 | "cell_type": "code",
2047 | "source": [
2048 | "print(graph.schema)"
2049 | ],
2050 | "metadata": {
2051 | "id": "yf3jH9ZGXPUN"
2052 | },
2053 | "execution_count": null,
2054 | "outputs": []
2055 | },
2056 | {
2057 | "cell_type": "code",
2058 | "source": [
2059 | "class Neo4jVectorChain(Chain):\n",
2060 | " graph: Neo4jGraph = Field(exclude=True)\n",
2061 | " input_key: str = \"query\"\n",
2062 | " output_key: str = \"result\"\n",
2063 | " embeddings: HuggingFaceBgeEmbeddings = HuggingFaceBgeEmbeddings()\n",
2064 | " qa_chain: LLMChain = LLMChain(llm=llm, prompt=CHAT_PROMPT)\n",
2065 | "\n",
2066 | " @property\n",
2067 | " def input_keys(self) -> List[str]:\n",
2068 | " return [self.input_key]\n",
2069 | "\n",
2070 | " @property\n",
2071 | " def output_keys(self) -> List[str]:\n",
2072 | " _output_keys = [self.output_key]\n",
2073 | " return _output_keys\n",
2074 | "\n",
2075 | " def _call(self, inputs: Dict[str, str], run_manager, k=3) -> Dict[str, Any]:\n",
2076 | " question = inputs[self.input_key]\n",
2077 | " embedding = self.embeddings.embed_query(question)\n",
2078 | "\n",
2079 | " context = self.graph.query(vector_search, {'embedding': embedding, 'k': 3})\n",
2080 | " context = [el['result'] for el in context]\n",
2081 | "\n",
2082 | " result = self.qa_chain({\"question\": question, \"context\": context})\n",
2083 | " final_result = result[self.qa_chain.output_key]\n",
2084 | " return {self.output_key: final_result}"
2085 | ],
2086 | "metadata": {
2087 | "id": "wYZQ44hsNT4y"
2088 | },
2089 | "execution_count": null,
2090 | "outputs": []
2091 | },
2092 | {
2093 | "cell_type": "code",
2094 | "source": [
2095 | "chain = Neo4jVectorChain(graph=graph, embeddings=embeddings, verbose=True)"
2096 | ],
2097 | "metadata": {
2098 | "id": "tksLTpsqOGGq"
2099 | },
2100 | "execution_count": null,
2101 | "outputs": []
2102 | },
2103 | {
2104 | "cell_type": "code",
2105 | "source": [
2106 | "graph_result = chain.run(\"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\")"
2107 | ],
2108 | "metadata": {
2109 | "id": "TS4Nwf6pONk1"
2110 | },
2111 | "execution_count": null,
2112 | "outputs": []
2113 | },
2114 | {
2115 | "cell_type": "code",
2116 | "source": [
2117 | "chain = GraphCypherQAChain.from_llm(\n",
2118 | " cypher_llm=llm,\n",
2119 | " qa_llm=llm,\n",
2120 | " graph=graph,\n",
2121 | " verbose=True,\n",
2122 | " return_intermediate_steps=True,\n",
2123 | " validate_cypher=True\n",
2124 | ")"
2125 | ],
2126 | "metadata": {
2127 | "id": "2Q-g7wGhXR0V"
2128 | },
2129 | "execution_count": null,
2130 | "outputs": []
2131 | },
2132 | {
2133 | "cell_type": "code",
2134 | "source": [
2135 | "graph_result = chain.run(\"How can we enhance the specificity and efficiency of CRISPR/Cas9 gene-editing technology to minimize off-target effects and increase its potential for therapeutic applications?\")"
2136 | ],
2137 | "metadata": {
2138 | "id": "JUxlLbCEXczj"
2139 | },
2140 | "execution_count": null,
2141 | "outputs": []
2142 | }
2143 | ]
2144 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Retrieval Augmented Generation with LangChain and Neo4J Graph DB
2 |
--------------------------------------------------------------------------------