├── .gitignore
├── 01-LangChain-RAG.ipynb
├── 02-LangChain-RAG LangSmith.ipynb
├── 03-LangChain-RAG Chunk Rerank.ipynb
├── 04-LangChain-RAG Chunk Rerank Max Context.ipynb
├── Data
├── Thundertooth Part 1.docx
├── Thundertooth Part 2.docx
├── Thundertooth Part 3.docx
└── Thundertooth Part 4.docx
├── README.md
└── environment.yml
/.gitignore:
--------------------------------------------------------------------------------
1 | # API Key(s) not part of the repository
2 | apikeys*
3 |
--------------------------------------------------------------------------------
/01-LangChain-RAG.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": []
7 | },
8 | {
9 | "cell_type": "markdown",
10 | "metadata": {},
11 | "source": [
12 | "### LangChain local LLM RAG example\n",
13 | "Utilising LangChain v0.1\n",
14 | "\n",
15 | "This notebook demonstrates the use of LangChain for Retrieval Augmented Generation in Linux with Nvidia's CUDA. LLMs are run using Ollama.\n",
16 | "\n",
17 | "Models tested:\n",
18 | "- Llama 2\n",
19 | "- Mistral 7B\n",
20 | "- Mixtral 8x7B\n",
21 | "- Neural Chat 7B\n",
22 | "- Orca 2\n",
23 | "- Phi-2\n",
24 | "- Solar 10.7B\n",
25 | "- Yi 34B\n",
26 | "\n",
27 | "\n",
28 | "See the [README.md](README.md) file for help on how to setup your environment to run this."
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 1,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "# Select your model here, put the name of the model in the ollama_model_name variable\n",
38 | "# Ensure you have pulled them or run them so Ollama has downloaded them and can load them (which it will do automatically)\n",
39 | "\n",
40 | "# Ollama installation (if you haven't done it yet): $ curl https://ollama.ai/install.sh | sh\n",
41 | "# Models need to be running in Ollama for LangChain to use them, to test if it can be run: $ ollama run mistral:7b-instruct-q6_K\n",
42 | "\n",
43 | "ollama_model_name = \"orca2:13b-q5_K_S\"\n",
44 | "# \"llama2:7b-chat-q6_K\"\n",
45 | "# \"mistral:7b-instruct-q6_K\"\n",
46 | "# \"mixtral:8x7b-instruct-v0.1-q4_K_M\"\n",
47 | "# \"neural-chat:7b-v3.3-q6_K\"\n",
48 | "# \"orca2:13b-q5_K_S\"\n",
49 | "# \"phi\" or try \"phi:chat\"\n",
50 | "# \"solar:10.7b-instruct-v1-q5_K_M\"\n",
51 | "# Can't run \"yi:34b-chat-q3_K_M\" or \"yi:34b-chat-q4_K_M\" - never stopped with inference"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 2,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "# Load the LLM with Ollama, setting the temperature low so it's not too creative\n",
61 | "\n",
62 | "from langchain_community.llms import Ollama\n",
63 | "llm = Ollama(model=ollama_model_name, temperature=0.1)"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 3,
69 | "metadata": {},
70 | "outputs": [
71 | {
72 | "data": {
73 | "text/plain": [
74 | "'The sky appears blue because of a phenomenon called Rayleigh scattering. When sunlight enters the atmosphere, it encounters molecules of nitrogen and oxygen. These molecules are much smaller than the wavelengths of visible light, so they can interact with different colors of light in different ways. Shorter wavelengths, such as violet and blue, are scattered more strongly than longer wavelengths, such as red and yellow. This means that more blue light reaches our eyes from all directions, making the sky look blue to us. However, at sunrise and sunset, the sun is lower in the sky, so we see more of the longer wavelengths that are scattered less by the atmosphere. This is why the sky looks redder at those times.'"
75 | ]
76 | },
77 | "execution_count": 3,
78 | "metadata": {},
79 | "output_type": "execute_result"
80 | }
81 | ],
82 | "source": [
83 | "# Quick test of the LLM with a general question before we start doing RAG\n",
84 | "llm.invoke(\"why is the sky blue?\")\n",
85 | "\n",
86 | "# Note: This line would not complete for Yi-34B - need to work out why inferencing never finishes (works fine when running with the same prompt in ollama.)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 4,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "# Embeddings will be based on the Ollama loaded model\n",
96 | "\n",
97 | "from langchain_community.embeddings import OllamaEmbeddings\n",
98 | "\n",
99 | "embeddings = OllamaEmbeddings(model=ollama_model_name)"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 5,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "from langchain_community.document_loaders import DirectoryLoader\n",
109 | "\n",
110 | "loader = DirectoryLoader('Data', glob=\"**/*.docx\")"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 6,
116 | "metadata": {},
117 | "outputs": [],
118 | "source": [
119 | "# Load documents\n",
120 | "\n",
121 | "docs = loader.load()"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 7,
127 | "metadata": {},
128 | "outputs": [
129 | {
130 | "data": {
131 | "text/plain": [
132 | "4"
133 | ]
134 | },
135 | "execution_count": 7,
136 | "metadata": {},
137 | "output_type": "execute_result"
138 | }
139 | ],
140 | "source": [
141 | "# Ensure we have the right number of Word documents loaded\n",
142 | "\n",
143 | "len(docs)"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 8,
149 | "metadata": {},
150 | "outputs": [],
151 | "source": [
152 | "# Split them up into chunks using a Text Splitter\n",
153 | "\n",
154 | "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
155 | "\n",
156 | "text_splitter = RecursiveCharacterTextSplitter()\n",
157 | "documents = text_splitter.split_documents(docs)"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 9,
163 | "metadata": {},
164 | "outputs": [],
165 | "source": [
166 | "# Create the embeddings from the chunks\n",
167 | "\n",
168 | "from langchain_community.vectorstores import FAISS\n",
169 | "\n",
170 | "vector = FAISS.from_documents(documents, embeddings)"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 10,
176 | "metadata": {},
177 | "outputs": [],
178 | "source": [
179 | "# Prepare the prompt and then the chain\n",
180 | "\n",
181 | "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
182 | "from langchain_core.prompts import ChatPromptTemplate\n",
183 | "\n",
184 | "if ollama_model_name == \"phi\" or ollama_model_name == \"phi:chat\":\n",
185 | " # Phi-2 prompt is less flexible\n",
186 | " prompt_template = \"\"\"Instruct: With this context\\n\\n{context}\\n\\nQuestion: {input}\\nOutput:\"\"\"\n",
187 | "\n",
188 | "else:\n",
189 | " prompt_template = \"\"\"You are a story teller, answering questions in an excited, insightful, and empathetic way. Answer the question based only on the provided context:\n",
190 | "\n",
191 | " \n",
192 | " {context}\n",
193 | " \n",
194 | "\n",
195 | " Question: {input}\"\"\"\n",
196 | "\n",
197 | "prompt = ChatPromptTemplate.from_template(prompt_template)\n",
198 | "document_chain = create_stuff_documents_chain(llm, prompt)"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": 11,
204 | "metadata": {},
205 | "outputs": [
206 | {
207 | "data": {
208 | "text/plain": [
209 | "RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={\n",
210 | " context: RunnableLambda(format_docs)\n",
211 | "}), config={'run_name': 'format_inputs'})\n",
212 | "| ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='You are a story teller, answering questions in an excited, insightful, and empathetic way. Answer the question based only on the provided context:\\n\\n \\n {context}\\n \\n\\n Question: {input}'))])\n",
213 | "| Ollama(model='orca2:13b-q5_K_S', temperature=0.1)\n",
214 | "| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})"
215 | ]
216 | },
217 | "execution_count": 11,
218 | "metadata": {},
219 | "output_type": "execute_result"
220 | }
221 | ],
222 | "source": [
223 | "# The LangChain chain\n",
224 | "document_chain"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": 12,
230 | "metadata": {},
231 | "outputs": [],
232 | "source": [
233 | "# Create the retriever and LangChain retriever chain\n",
234 | "\n",
235 | "from langchain.chains import create_retrieval_chain\n",
236 | "\n",
237 | "retriever = vector.as_retriever()\n",
238 | "retrieval_chain = create_retrieval_chain(retriever, document_chain)"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 13,
244 | "metadata": {},
245 | "outputs": [
246 | {
247 | "data": {
248 | "text/plain": [
249 | "RunnableBinding(bound=RunnableAssign(mapper={\n",
250 | " context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])\n",
251 | " | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=), config={'run_name': 'retrieve_documents'})\n",
252 | "})\n",
253 | "| RunnableAssign(mapper={\n",
254 | " answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={\n",
255 | " context: RunnableLambda(format_docs)\n",
256 | " }), config={'run_name': 'format_inputs'})\n",
257 | " | ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='You are a story teller, answering questions in an excited, insightful, and empathetic way. Answer the question based only on the provided context:\\n\\n \\n {context}\\n \\n\\n Question: {input}'))])\n",
258 | " | Ollama(model='orca2:13b-q5_K_S', temperature=0.1)\n",
259 | " | StrOutputParser(), config={'run_name': 'stuff_documents_chain'})\n",
260 | " }), config={'run_name': 'retrieval_chain'})"
261 | ]
262 | },
263 | "execution_count": 13,
264 | "metadata": {},
265 | "output_type": "execute_result"
266 | }
267 | ],
268 | "source": [
269 | "# Chain now incorporates the retriever\n",
270 | "retrieval_chain"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 14,
276 | "metadata": {},
277 | "outputs": [],
278 | "source": [
279 | "# Here are our test questions\n",
280 | "\n",
281 | "TestQuestions = [\n",
282 | " \"Summarise the story for me\",\n",
283 | " \"Who was the main protagonist?\",\n",
284 | " \"Did they have any children? If so, what were their names?\",\n",
285 | " \"Did anything eventful happen?\",\n",
286 | " \"Who are the main characters?\",\n",
287 | " \"What do you think happens next in the story?\"\n",
288 | "]"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": 15,
294 | "metadata": {},
295 | "outputs": [],
296 | "source": [
297 | "# If you want to see what's happening under the hood, set debug to True\n",
298 | "\n",
299 | "from langchain.globals import set_debug\n",
300 | "\n",
301 | "# set_debug(True)"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 18,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "name": "stdout",
311 | "output_type": "stream",
312 | "text": [
313 | "\n",
314 | "1/6: Summarise the story for me\n",
315 | "\n",
316 | "2/6: Who was the main protagonist?\n",
317 | "\n",
318 | "3/6: Did they have any children? If so, what were their names?\n",
319 | "\n",
320 | "4/6: Did anything eventful happen?\n",
321 | "\n",
322 | "5/6: Who are the main characters?\n",
323 | "\n",
324 | "6/6: What do you think happens next in the story?\n"
325 | ]
326 | }
327 | ],
328 | "source": [
329 | "qa_pairs = []\n",
330 | "\n",
331 | "for index, question in enumerate(TestQuestions, start=1):\n",
332 | " question = question.strip() # Clean up\n",
333 | "\n",
334 | " print(f\"\\n{index}/{len(TestQuestions)}: {question}\")\n",
335 | "\n",
336 | " response = retrieval_chain.invoke({\"input\": question})\n",
337 | "\n",
338 | " qa_pairs.append((question.strip(), response[\"answer\"])) # Add to our output array\n",
339 | "\n",
340 | " # Uncomment the following line if you want to test just the first question\n",
341 | " # break "
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 19,
347 | "metadata": {},
348 | "outputs": [
349 | {
350 | "name": "stdout",
351 | "output_type": "stream",
352 | "text": [
353 | "1/6 Summarise the story for me\n",
354 | "\n",
355 | "Possible summary:\n",
356 | "\n",
357 | "The story is about Thundertooth, a talking dinosaur who travels through time and ends up in a futuristic city where he finds a new home and starts a toy factory with the help of humans. He also meets Seraphina, his soulmate, and they have four children who inherit different abilities from both parents. The family faces challenges and adventures, such as finding food, saving the city from a meteor threat, and bringing joy to the people. The story shows how Thundertooth and his family bridge the gap between the past and the future, and how they use their talents for good.\n",
358 | "\n",
359 | "--------\n",
360 | "\n",
361 | "2/6 Who was the main protagonist?\n",
362 | "\n",
363 | "To answer the question, we need to identify the main character of the story, who is usually the one who faces the most challenges, changes, or conflicts. We can use clues from the context, such as the title, the introduction, and the conclusion, to find out who that is.\n",
364 | "\n",
365 | "The title of the story is \"Thundertooth\", which suggests that the main protagonist is a character named Thundertooth. The introduction confirms this by describing how Thundertooth was a talking dinosaur who traveled through time and ended up in a futuristic city, where he faced hunger, internal strife, and external threats. The conclusion also shows how Thundertooth became a beloved figure and a hero who saved the city from a meteor. Therefore, we can conclude that Thundertooth is the main protagonist of the story.\n",
366 | "\n",
367 | "### Final answer: Thundertooth\n",
368 | "\n",
369 | "--------\n",
370 | "\n",
371 | "3/6 Did they have any children? If so, what were their names?\n",
372 | "\n",
373 | "To answer this question based on the context, we need to follow these steps:\n",
374 | "\n",
375 | "- Locate the part of the context that mentions Thundertooth's family and children. This is in the paragraph that starts with \"Thundertooth found a one-of-a-kind toy factory...\" and ends with \"...the city's inhabitants, reminding them that sometimes, the most magical things could emerge from the most unexpected places.\"\n",
376 | "- Identify the names of Thundertooth's children. They are listed in the paragraph after the sentence \"As the years passed, Thundertooth's life took a heartwarming turn.\" The names are Lumina, Echo, Sapphire, and Ignis.\n",
377 | "- Confirm that they had any children by checking if the context mentions their birth or adoption. It does not explicitly state how they got their children, but it implies that they were born to Thundertooth and Seraphina, since they are described as \"their children\" in the next paragraph.\n",
378 | "- Summarize the answer in a brief sentence, using the information from the context.\n",
379 | "\n",
380 | "### Final answer: Yes, they had four children named Lumina, Echo, Sapphire, and Ignis.\n",
381 | "\n",
382 | "--------\n",
383 | "\n",
384 | "4/6 Did anything eventful happen?\n",
385 | "\n",
386 | "To answer this question, we need to review the context and identify any significant events or changes that occurred in the story of Thundertooth and his family. We can use the following steps:\n",
387 | "\n",
388 | "- Step 1: Scan the context for keywords or phrases that indicate an eventful occurrence, such as \"crisis\", \"disaster\", \"threat\", \"challenge\", \"adventure\", \"transformation\", etc.\n",
389 | "- Step 2: Evaluate each keyword or phrase and determine if it describes an eventful happening that affected the characters or the plot of the story. For example, a crisis could be eventful, but a false alarm might not be. A threat could be eventful, but a potential danger might not be.\n",
390 | "- Step 3: Summarize the main events or happenings that were eventful in the context, and explain why they were significant for the story. For example, we could say that Thundertooth's journey through time was an eventful happening that changed his life and led him to the futuristic city, where he faced a hunger dilemma and a meteor threat.\n",
391 | "- Step 4: Write a brief final answer that addresses the question based on the summary of the main events or happenings. For example, we could say: Yes, several eventful happenings occurred in the context, such as Thundertooth's time travel, his hunger dilemma, and the meteor threat. These happenings shaped the story and showed how Thundertooth and his family overcame challenges and brought unity to the city.\n",
392 | "\n",
393 | "### Final answer: Yes, several eventful happenings occurred in the context.\n",
394 | "\n",
395 | "--------\n",
396 | "\n",
397 | "5/6 Who are the main characters?\n",
398 | "\n",
399 | "One possible way to answer the question is:\n",
400 | "\n",
401 | "To answer the question, we need to identify the main characters in the story. The main characters are usually the ones who have a significant role in the plot, have distinctive personalities and traits, and face challenges or conflicts that shape their development. We can use these criteria to find the main characters in the context.\n",
402 | "\n",
403 | "The first paragraph introduces the Thundertooth family as the protagonists of the story. They are a group of dinosaurs who survived an internal strife and became a united and prosperous family. The paragraph mentions their names: Thundertooth, Seraphina, Lumina, Echo, Sapphire, and Ignis. These are the main characters of the story.\n",
404 | "\n",
405 | "The rest of the context follows the lives and adventures of the Thundertooth family in the futuristic city. It describes how they overcame their hunger dilemma, founded a toy factory, met new friends, and faced a meteor crisis. Throughout these events, the main characters show their unique abilities, talents, and personalities. They also learn from their experiences and grow as individuals and as a family.\n",
406 | "\n",
407 | "Therefore, based on the context, we can conclude that the main characters are Thundertooth, Seraphina, Lumina, Echo, Sapphire, and Ignis. ### Final answer: The main characters are Thundertooth, Seraphina, Lumina, Echo, Sapphire, and Ignis.\n",
408 | "\n",
409 | "--------\n",
410 | "\n",
411 | "6/6 What do you think happens next in the story?\n",
412 | "\n",
413 | "Possible answer:\n",
414 | "\n",
415 | "To answer this question, we need to use our imagination and creativity, as well as some clues from the context. We can think of different scenarios that could happen next in the story, such as:\n",
416 | "\n",
417 | "- The Thundertooth family receives recognition and rewards for their heroic deeds, such as medals, honors, or special privileges.\n",
418 | "- The Thundertooth family continues to innovate and create new toys, attracting more customers and fans from around the world.\n",
419 | "- The Thundertooth family faces new challenges and adventures, such as exploring other dimensions, meeting other talking dinosaurs, or encountering new enemies.\n",
420 | "- The Thundertooth family enjoys their peaceful and happy life, spending time with each other and their friends, and celebrating their achievements.\n",
421 | "\n",
422 | "There is no definitive answer to this question, as the story could go in many different directions. However, based on the tone and theme of the context, we can make some educated guesses about what might happen next. For example, since the context emphasizes the redemption and bonding of the Thundertooth family, we can assume that they will continue to work together and support each other in their future endeavors. Since the context also highlights the diversity and creativity of the family members, we can expect that they will keep inventing new things and having fun. And since the context shows the appreciation and admiration of the city's inhabitants for the Thundertooth family, we can imagine that they will receive some form of recognition and gratitude for their heroism.\n",
423 | "\n",
424 | "Therefore, a possible answer to the question is:\n",
425 | "\n",
426 | "### Final answer: The Thundertooth family receives recognition and rewards for their heroic deeds, such as medals, honors, or special privileges. They also continue to innovate and create new toys, attracting more customers and fans from around the world. They enjoy their peaceful and happy life, spending time with each other and their friends, and celebrating their achievements.\n",
427 | "\n",
428 | "--------\n",
429 | "\n"
430 | ]
431 | }
432 | ],
433 | "source": [
434 | "# Print out the questions and answers\n",
435 | "\n",
436 | "for index, (question, answer) in enumerate(qa_pairs, start=1):\n",
437 | " print(f\"{index}/{len(qa_pairs)} {question}\\n\\n{answer}\\n\\n--------\\n\")"
438 | ]
439 | }
440 | ],
441 | "metadata": {
442 | "kernelspec": {
443 | "display_name": "LangChainRAGLinux",
444 | "language": "python",
445 | "name": "python3"
446 | },
447 | "language_info": {
448 | "codemirror_mode": {
449 | "name": "ipython",
450 | "version": 3
451 | },
452 | "file_extension": ".py",
453 | "mimetype": "text/x-python",
454 | "name": "python",
455 | "nbconvert_exporter": "python",
456 | "pygments_lexer": "ipython3",
457 | "version": "3.10.13"
458 | }
459 | },
460 | "nbformat": 4,
461 | "nbformat_minor": 2
462 | }
463 |
--------------------------------------------------------------------------------
/02-LangChain-RAG LangSmith.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": []
7 | },
8 | {
9 | "cell_type": "markdown",
10 | "metadata": {},
11 | "source": [
12 | "### LangChain local LLM RAG example\n",
13 | "### For LangSmith users (requires API key)\n",
14 | "Utilising LangChain v0.1\n",
15 | "\n",
16 | "This notebook demonstrates the use of LangChain for Retrieval Augmented Generation in Linux with Nvidia's CUDA. LLMs are run using Ollama.\n",
17 | "\n",
18 | "Models tested:\n",
19 | "- Llama 2\n",
20 | "- Mistral 7B\n",
21 | "- Mixtral 8x7B\n",
22 | "- Neural Chat 7B\n",
23 | "- Orca 2\n",
24 | "- Phi-2\n",
25 | "- Solar 10.7B\n",
26 | "- Yi 34B\n",
27 | "\n",
28 | "\n",
29 | "See the [README.md](README.md) file for help on how to setup your environment to run this."
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 1,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "# Select your model here, put the name of the model in the ollama_model_name variable\n",
39 | "# Ensure you have pulled them or run them so Ollama has downloaded them and can load them (which it will do automatically)\n",
40 | "\n",
41 | "# Ollama installation (if you haven't done it yet): $ curl https://ollama.ai/install.sh | sh\n",
42 | "# Models need to be running in Ollama for LangChain to use them, to test if it can be run: $ ollama run mistral:7b-instruct-q6_K\n",
43 | "\n",
44 | "ollama_model_name = \"mistral:7b-instruct-q6_K\"\n",
45 | "# \"llama2:7b-chat-q6_K\"\n",
46 | "# \"mistral:7b-instruct-q6_K\"\n",
47 | "# \"mixtral:8x7b-instruct-v0.1-q4_K_M\"\n",
48 | "# \"neural-chat:7b-v3.3-q6_K\"\n",
49 | "# \"orca2:13b-q5_K_S\"\n",
50 | "# \"phi\" or try \"phi:chat\"\n",
51 | "# \"solar:10.7b-instruct-v1-q5_K_M\"\n",
52 | "# Can't run \"yi:34b-chat-q3_K_M\" or \"yi:34b-chat-q4_K_M\" - never stopped with inference"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 2,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "# Our LangSmith API key is stored in apikeys.py\n",
62 | "# Store your LangSmith key in a variable called LangSmith_API\n",
63 | "\n",
64 | "# Example apikeys.pi (without the hashes and with your keys inserted):\n",
65 | "# LangSmith_API = \"PUT YOUR LANGSMITH API KEY HERE\"\n",
66 | "# Cohere_API = \"PUT YOUR COHERE API KEY HERE\"\n",
67 | "\n",
68 | "from apikeys import LangSmith_API\n",
69 | "import os\n",
70 | "\n",
71 | "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
72 | "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
73 | "os.environ[\"LANGCHAIN_API_KEY\"] = LangSmith_API\n",
74 | "\n",
75 | "# Project Name\n",
76 | "os.environ[\"LANGCHAIN_PROJECT\"] = \"LangChain RAG Linux\""
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 3,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "# Load the LLM with Ollama, setting the temperature low so it's not too creative\n",
86 | "\n",
87 | "from langchain_community.llms import Ollama\n",
88 | "llm = Ollama(model=ollama_model_name) #, temperature=0.1)"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 4,
94 | "metadata": {},
95 | "outputs": [
96 | {
97 | "data": {
98 | "text/plain": [
99 | "\"\\nThe sky appears blue because of a phenomenon called Rayleigh scattering. This occurs when light from the sun travels through Earth's atmosphere and interacts with molecules in the air, such as nitrogen and oxygen. Blue light has a shorter wavelength than other colors, so it is more likely to collide with these molecules and be scattered in all directions. As the blue light is scattered, we see it everywhere around us, giving the sky its characteristic color. This effect is most pronounced during the day when the sun is high in the sky, but it can also occur at dawn or dusk when the sun is closer to the horizon.\""
100 | ]
101 | },
102 | "execution_count": 4,
103 | "metadata": {},
104 | "output_type": "execute_result"
105 | }
106 | ],
107 | "source": [
108 | "# Quick test of the LLM with a general question before we start doing RAG\n",
109 | "llm.invoke(\"why is the sky blue?\")\n",
110 | "\n",
111 | "# Note: This line would not complete for Yi-34B - need to work out why inferencing never finishes (works fine when running with the same prompt in ollama.)"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 5,
117 | "metadata": {},
118 | "outputs": [],
119 | "source": [
120 | "# Embeddings will be based on the Ollama loaded model\n",
121 | "\n",
122 | "from langchain_community.embeddings import OllamaEmbeddings\n",
123 | "\n",
124 | "embeddings = OllamaEmbeddings(model=ollama_model_name)"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 6,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "from langchain_community.document_loaders import DirectoryLoader\n",
134 | "\n",
135 | "loader = DirectoryLoader('Data', glob=\"**/*.docx\")"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 7,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "# Load documents\n",
145 | "\n",
146 | "docs = loader.load()"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 8,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "data": {
156 | "text/plain": [
157 | "4"
158 | ]
159 | },
160 | "execution_count": 8,
161 | "metadata": {},
162 | "output_type": "execute_result"
163 | }
164 | ],
165 | "source": [
166 | "# Ensure we have the right number of Word documents loaded\n",
167 | "\n",
168 | "len(docs)"
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": 9,
174 | "metadata": {},
175 | "outputs": [],
176 | "source": [
177 | "# Split them up into chunks using a Text Splitter\n",
178 | "\n",
179 | "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
180 | "\n",
181 | "text_splitter = RecursiveCharacterTextSplitter()\n",
182 | "documents = text_splitter.split_documents(docs)"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 10,
188 | "metadata": {},
189 | "outputs": [],
190 | "source": [
191 | "# Create the embeddings from the chunks\n",
192 | "\n",
193 | "from langchain_community.vectorstores import FAISS\n",
194 | "\n",
195 | "vector = FAISS.from_documents(documents, embeddings)"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": 11,
201 | "metadata": {},
202 | "outputs": [],
203 | "source": [
204 | "# Prepare the prompt and then the chain\n",
205 | "\n",
206 | "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
207 | "from langchain_core.prompts import ChatPromptTemplate\n",
208 | "\n",
209 | "if ollama_model_name == \"phi\" or ollama_model_name == \"phi:chat\":\n",
210 | " # Phi-2 prompt is less flexible\n",
211 | " prompt_template = \"\"\"Instruct: With this context\\n\\n{context}\\n\\nQuestion: {input}\\nOutput:\"\"\"\n",
212 | "\n",
213 | "elif ollama_model_name.startswith(\"yi:34b\"):\n",
214 | " prompt_template = \"\"\"You are a story teller, answering questions in an excited, insightful, and empathetic way. Answer the question based only on the provided context:\n",
215 | "\n",
216 | " [context]\n",
217 | " {context}\n",
218 | " [/context]\n",
219 | "\n",
220 | " Question: {input}\"\"\"\n",
221 | "else:\n",
222 | " prompt_template = \"\"\"You are a story teller, answering questions in an excited, insightful, and empathetic way. Answer the question based only on the provided context:\n",
223 | "\n",
224 | " \n",
225 | " {context}\n",
226 | " \n",
227 | "\n",
228 | " Question: {input}\"\"\"\n",
229 | "\n",
230 | "prompt = ChatPromptTemplate.from_template(prompt_template)\n",
231 | "document_chain = create_stuff_documents_chain(llm, prompt)"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": 12,
237 | "metadata": {},
238 | "outputs": [
239 | {
240 | "data": {
241 | "text/plain": [
242 | "RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={\n",
243 | " context: RunnableLambda(format_docs)\n",
244 | "}), config={'run_name': 'format_inputs'})\n",
245 | "| ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='You are a story teller, answering questions in an excited, insightful, and empathetic way. Answer the question based only on the provided context:\\n\\n \\n {context}\\n \\n\\n Question: {input}'))])\n",
246 | "| Ollama(model='mistral:7b-instruct-q6_K')\n",
247 | "| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})"
248 | ]
249 | },
250 | "execution_count": 12,
251 | "metadata": {},
252 | "output_type": "execute_result"
253 | }
254 | ],
255 | "source": [
256 | "# The LangChain chain\n",
257 | "document_chain"
258 | ]
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": 13,
263 | "metadata": {},
264 | "outputs": [],
265 | "source": [
266 | "# Create the retriever and LangChain retriever chain\n",
267 | "\n",
268 | "from langchain.chains import create_retrieval_chain\n",
269 | "\n",
270 | "retriever = vector.as_retriever()\n",
271 | "retrieval_chain = create_retrieval_chain(retriever, document_chain)"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": 14,
277 | "metadata": {},
278 | "outputs": [
279 | {
280 | "data": {
281 | "text/plain": [
282 | "RunnableBinding(bound=RunnableAssign(mapper={\n",
283 | " context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])\n",
284 | " | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=), config={'run_name': 'retrieve_documents'})\n",
285 | "})\n",
286 | "| RunnableAssign(mapper={\n",
287 | " answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={\n",
288 | " context: RunnableLambda(format_docs)\n",
289 | " }), config={'run_name': 'format_inputs'})\n",
290 | " | ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='You are a story teller, answering questions in an excited, insightful, and empathetic way. Answer the question based only on the provided context:\\n\\n \\n {context}\\n \\n\\n Question: {input}'))])\n",
291 | " | Ollama(model='mistral:7b-instruct-q6_K')\n",
292 | " | StrOutputParser(), config={'run_name': 'stuff_documents_chain'})\n",
293 | " }), config={'run_name': 'retrieval_chain'})"
294 | ]
295 | },
296 | "execution_count": 14,
297 | "metadata": {},
298 | "output_type": "execute_result"
299 | }
300 | ],
301 | "source": [
302 | "# Chain now incorporates the retriever\n",
303 | "retrieval_chain"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": 15,
309 | "metadata": {},
310 | "outputs": [],
311 | "source": [
312 | "# Here are our test questions\n",
313 | "\n",
314 | "TestQuestions = [\n",
315 | " \"Summarise the story for me\",\n",
316 | " \"Who was the main protagonist?\",\n",
317 | " \"Did they have any children? If so, what were their names?\",\n",
318 | " \"Did anything eventful happen?\",\n",
319 | " \"Who are the main characters?\",\n",
320 | " \"What do you think happens next in the story?\"\n",
321 | "]"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 19,
327 | "metadata": {},
328 | "outputs": [
329 | {
330 | "name": "stdout",
331 | "output_type": "stream",
332 | "text": [
333 | "\n",
334 | "1/6: Summarise the story for me\n",
335 | "\n",
336 | "2/6: Who was the main protagonist?\n",
337 | "\n",
338 | "3/6: Did they have any children? If so, what were their names?\n",
339 | "\n",
340 | "4/6: Did anything eventful happen?\n",
341 | "\n",
342 | "5/6: Who are the main characters?\n",
343 | "\n",
344 | "6/6: What do you think happens next in the story?\n"
345 | ]
346 | }
347 | ],
348 | "source": [
349 | "qa_pairs = []\n",
350 | "\n",
351 | "for index, question in enumerate(TestQuestions, start=1):\n",
352 | " question = question.strip() # Clean up\n",
353 | "\n",
354 | " print(f\"\\n{index}/{len(TestQuestions)}: {question}\")\n",
355 | "\n",
356 | " response = retrieval_chain.invoke({\"input\": question})\n",
357 | "\n",
358 | " qa_pairs.append((question.strip(), response[\"answer\"].strip())) # Add to our output array\n",
359 | "\n",
360 | " # Uncomment the following line if you want to test just the first question\n",
361 | " # break "
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": 20,
367 | "metadata": {},
368 | "outputs": [
369 | {
370 | "name": "stdout",
371 | "output_type": "stream",
372 | "text": [
373 | "1/6 Summarise the story for me\n",
374 | "\n",
375 | "The story is about Thundertooth, a giant talking dinosaur who was transported from his own era to a futuristic city where he encountered humans for the first time. Initially confused and disoriented, Thundertooth quickly became fascinated by the technology and advancements of the future. However, as time passed, Thundertooth struggled with his innate hunger, which made it difficult for him to coexist peacefully with the city's inhabitants.\n",
376 | "\n",
377 | "Thundertooth found solace in Mayor Grace, who listened to his story and offered her assistance. Together, they discovered a sustainable solution that satisfied Thundertooth's hunger while ensuring the well-being of the humans. As word spread about Thundertooth's arrival, the citizens embraced him as a symbol of unity between eras, and the city came to be known as a place where humans and dinosaurs could live together harmoniously.\n",
378 | "\n",
379 | "One day, the city faced a threat from a massive meteor hurtling towards Earth. The mayor called upon Thundertooth and his family to assist in the emergency efforts. Lumina utilized her technology, Echo amplified the emergency signals, Sapphire calmed the panicked masses, and Ignis attempted to alter the meteor's trajectory with heat bursts.\n",
380 | "\n",
381 | "Thundertooth coordinated the efforts of his family and the city's inhabitants, using his strength and roar to inspire hope during the crisis. The group's combined abilities successfully diverted the meteor, saving the city from destruction. In recognition of their heroic deeds, Thundertooth and his family were hailed as heroes by the citizens, cementing their legacy in the city's history.\n",
382 | "\n",
383 | "--------\n",
384 | "\n",
385 | "2/6 Who was the main protagonist?\n",
386 | "\n",
387 | "The main protagonist was Thundertooth.\n",
388 | "\n",
389 | "--------\n",
390 | "\n",
391 | "3/6 Did they have any children? If so, what were their names?\n",
392 | "\n",
393 | "No, Thundertooth and his family did not have any children in this version of the story.\n",
394 | "\n",
395 | "--------\n",
396 | "\n",
397 | "4/6 Did anything eventful happen?\n",
398 | "\n",
399 | "A meteor was headed towards Earth. Thundertooth and his family, Lumina, Echo, Sapphire, and Ignis, devised a plan using their unique abilities to divert or neutralize the threat. Lumina enhanced the city's energy systems, creating a force field. Echo amplified emergency signals, ensuring timely warnings and instructions for evacuation. Sapphire calmed the panicked masses during the evacuation, while Ignis created controlled bursts of heat to alter the meteor's trajectory. Together, they successfully diverted the catastrophic collision. The city hailed them as heroes, etching their legacy in the city's history.\n",
400 | "\n",
401 | "--------\n",
402 | "\n",
403 | "5/6 Who are the main characters?\n",
404 | "\n",
405 | "The main characters in this story are Thundertooth, Mayor Grace, Lumina, Echo, Sapphire, and Ignis.\n",
406 | "\n",
407 | "--------\n",
408 | "\n",
409 | "6/6 What do you think happens next in the story?\n",
410 | "\n",
411 | "After saving the city from the meteor, it's likely that Thundertooth and his family would continue to live and work within the futuristic city. They have proven their bravery and resourcefulness, and the citizens would undoubtedly want to honor and celebrate them as heroes.\n",
412 | "\n",
413 | "Mayor Grace, recognizing the importance of Thundertooth's presence in maintaining peace and stability, might seek ways to integrate him further into the city's life. Perhaps she could arrange for him to work alongside scientists or engineers, helping them develop new technologies or solve complex problems.\n",
414 | "\n",
415 | "As for Thundertooth himself, he may find a sense of purpose and belonging in this advanced society. He would continue to learn about the future while also sharing his knowledge of the past with those around him. This exchange of ideas could lead to even more innovative solutions and a deeper understanding of the unique challenges faced by both humans and dinosaurs.\n",
416 | "\n",
417 | "In the end, Thundertooth's story serves as a reminder that cooperation and understanding can bridge gaps between seemingly disparate worlds. Through their collective efforts, Thundertooth and his family have demonstrated that even the most extraordinary abilities can be harnessed for the greater good, bringing hope to a city that has faced its share of perilous moments.\n",
418 | "\n",
419 | "--------\n",
420 | "\n"
421 | ]
422 | }
423 | ],
424 | "source": [
425 | "# Print out the questions and answers\n",
426 | "\n",
427 | "for index, (question, answer) in enumerate(qa_pairs, start=1):\n",
428 | " print(f\"{index}/{len(qa_pairs)} {question}\\n\\n{answer}\\n\\n--------\\n\")"
429 | ]
430 | }
431 | ],
432 | "metadata": {
433 | "kernelspec": {
434 | "display_name": "LangChainRAGLinux",
435 | "language": "python",
436 | "name": "python3"
437 | },
438 | "language_info": {
439 | "codemirror_mode": {
440 | "name": "ipython",
441 | "version": 3
442 | },
443 | "file_extension": ".py",
444 | "mimetype": "text/x-python",
445 | "name": "python",
446 | "nbconvert_exporter": "python",
447 | "pygments_lexer": "ipython3",
448 | "version": "3.10.13"
449 | }
450 | },
451 | "nbformat": 4,
452 | "nbformat_minor": 2
453 | }
454 |
--------------------------------------------------------------------------------
/03-LangChain-RAG Chunk Rerank.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": []
7 | },
8 | {
9 | "cell_type": "markdown",
10 | "metadata": {},
11 | "source": [
12 | "### LangChain local LLM RAG example with self chunking and reranking (Cohere)\n",
13 | "### For LangSmith users (requires API key)\n",
14 | "Utilising LangChain v0.1\n",
15 | "\n",
16 | "This notebook demonstrates the use of LangChain for Retrieval Augmented Generation in Linux with Nvidia's CUDA. LLMs are run using Ollama.\n",
17 | "\n",
18 | "It introduces self-chunking (where we split up our document into chunks) and then re-ranking the retrieved results before passing into the LLM\n",
19 | "\n",
20 | "Models tested:\n",
21 | "- Llama 2\n",
22 | "- Mistral 7B\n",
23 | "- Mixtral 8x7B\n",
24 | "- Neural Chat 7B\n",
25 | "- Orca 2\n",
26 | "- Phi-2\n",
27 | "- Solar 10.7B\n",
28 | "- Yi 34B\n",
29 | "\n",
30 | "\n",
31 | "See the [README.md](README.md) file for help on how to setup your environment to run this."
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 1,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "# Select your model here, put the name of the model in the ollama_model_name variable\n",
41 | "# Ensure you have pulled them or run them so Ollama has downloaded them and can load them (which it will do automatically)\n",
42 | "\n",
43 | "# Ollama installation (if you haven't done it yet): $ curl https://ollama.ai/install.sh | sh\n",
44 | "# Models need to be running in Ollama for LangChain to use them, to test if it can be run: $ ollama run mistral:7b-instruct-q6_K\n",
45 | "\n",
46 | "ollama_model_name = \"mistral:7b-instruct-q6_K\"\n",
47 | "# \"llama2:7b-chat-q6_K\"\n",
48 | "# \"mistral:7b-instruct-q6_K\"\n",
49 | "# \"mixtral:8x7b-instruct-v0.1-q4_K_M\"\n",
50 | "# \"neural-chat:7b-v3.3-q6_K\"\n",
51 | "# \"orca2:13b-q5_K_S\"\n",
52 | "# \"phi\" or try \"phi:chat\"\n",
53 | "# \"solar:10.7b-instruct-v1-q5_K_M\"\n",
54 | "# Can't run \"yi:34b-chat-q3_K_M\" or \"yi:34b-chat-q4_K_M\" - never stopped with inference"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 2,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "# Our LangSmith API key is stored in apikeys.py\n",
64 | "# Store your LangSmith key in a variable called LangSmith_API\n",
65 | "\n",
66 | "from apikeys import LangSmith_API\n",
67 | "import os\n",
68 | "\n",
69 | "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
70 | "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
71 | "os.environ[\"LANGCHAIN_API_KEY\"] = LangSmith_API\n",
72 | "\n",
73 | "# Project Name\n",
74 | "os.environ[\"LANGCHAIN_PROJECT\"] = \"LangChain RAG Linux Chunking\""
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 3,
80 | "metadata": {},
81 | "outputs": [],
82 | "source": [
83 | "# Load the LLM with Ollama, setting the temperature low so it's not too creative\n",
84 | "\n",
85 | "from langchain_community.llms import Ollama\n",
86 | "llm = Ollama(model=ollama_model_name) #, temperature=0.1)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 4,
92 | "metadata": {},
93 | "outputs": [
94 | {
95 | "data": {
96 | "text/plain": [
97 | "'The sky appears blue because of a phenomenon called Rayleigh scattering. Sunlight consists of all colors of the visible spectrum, but each color has a different wavelength. Blue light has a shorter wavelength than other colors, so it scatters more easily when it hits the atmosphere. As sunlight travels through the atmosphere, the blue light is scattered in all directions, making the sky appear blue during the daytime.'"
98 | ]
99 | },
100 | "execution_count": 4,
101 | "metadata": {},
102 | "output_type": "execute_result"
103 | }
104 | ],
105 | "source": [
106 | "# Quick test of the LLM with a general question before we start doing RAG\n",
107 | "llm.invoke(\"why is the sky blue?\")\n",
108 | "\n",
109 | "# Note: This line would not complete for Yi-34B - need to work out why inferencing never finishes (works fine when running with the same prompt in ollama.)"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 5,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": [
118 | "# Embeddings will be based on the Ollama loaded model\n",
119 | "\n",
120 | "from langchain_community.embeddings import OllamaEmbeddings\n",
121 | "\n",
122 | "embeddings = OllamaEmbeddings(model=ollama_model_name)"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 6,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "from langchain_community.document_loaders import DirectoryLoader\n",
132 | "\n",
133 | "loader = DirectoryLoader('Data', glob=\"**/*.docx\")"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 7,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": [
142 | "# Load documents\n",
143 | "\n",
144 | "docs = loader.load()"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 8,
150 | "metadata": {},
151 | "outputs": [
152 | {
153 | "data": {
154 | "text/plain": [
155 | "[Document(page_content='Thundertooth\\n\\nOne fateful day, as the citizens of the futuristic city went about their daily lives, a collective gasp echoed through the streets as a massive meteor hurtled towards Earth. Panic spread like wildfire as people looked to the sky in horror, realizing the impending catastrophe. The city\\'s advanced technology detected the threat, and an emergency broadcast echoed through the streets, urging everyone to seek shelter.\\n\\nThundertooth, ever the protector of his newfound home, wasted no time. With a determined gleam in his eyes, he gathered his family and hurried to the city\\'s command center, where Mayor Grace and the leading scientists were coordinating the evacuation efforts.\\n\\nThe mayor, recognizing Thundertooth\\'s intelligence and resourcefulness, approached him. \"Thundertooth, we need a plan to divert or neutralize the meteor. Our technology can only do so much, but with your unique abilities, perhaps we can find a solution.\"\\n\\nThundertooth nodded, understanding the gravity of the situation. He gathered Lumina, Echo, Sapphire, and Ignis, explaining the urgency and the role each of them would play in the impending crisis.\\n\\n1. **Lumina**: Utilizing her deep understanding of technology, Lumina would enhance the city\\'s energy systems to generate a powerful force field, providing a protective barrier against the meteor\\'s impact.\\n\\n2. **Echo**: With his extraordinary mimicry abilities, Echo would amplify the emergency signals, ensuring that every citizen received timely warnings and instructions for evacuation.\\n\\n3. **Sapphire**: Harnessing her calming and healing powers, Sapphire would assist in calming the panicked masses, ensuring an orderly and efficient evacuation.\\n\\n4. **Ignis**: Drawing upon his fiery talents, Ignis would create controlled bursts of heat, attempting to alter the meteor\\'s trajectory and reduce its destructive force.\\n\\nAs the citizens evacuated to designated shelters, the Thundertooth family sprang into action. Lumina worked tirelessly to strengthen the city\\'s energy systems, Echo echoed evacuation orders through the city\\'s speakers, Sapphire offered comfort to those in distress, and Ignis unleashed controlled bursts of flames towards the approaching meteor.\\n\\nThundertooth stood at the forefront, using his mighty roar to coordinate and inspire the efforts of the city\\'s inhabitants. The ground trembled as the meteor drew closer, but the Thundertooth family\\'s coordinated efforts began to take effect. Lumina\\'s force field shimmered to life, deflecting the meteor\\'s deadly path. Echo\\'s amplified warnings reached every corner of the city, ensuring that no one was left behind.\\n\\nAs Ignis\\'s controlled bursts of flames interacted with the meteor, it began to change course. The combined efforts of the Thundertooth family, guided by their unique talents, diverted the catastrophic collision. The meteor, once destined for destruction, now harmlessly sailed past the Earth, leaving the city and its inhabitants unscathed.\\n\\nThe citizens, emerging from their shelters, erupted into cheers of gratitude. Mayor Grace approached Thundertooth, expressing her heartfelt thanks for the family\\'s heroic efforts. The Thundertooth family, tired but triumphant, basked in the relief of having saved their beloved city from imminent disaster.\\n\\nIn the wake of the crisis, the citizens of the futuristic city hailed Thundertooth and his family as true heroes. The toy factory that once brought joy to children now became a symbol of resilience and unity. The Thundertooth family\\'s legacy was forever etched in the city\\'s history, a testament to the power of cooperation and the extraordinary capabilities that could emerge when dinosaurs and humans worked together for the greater good.', metadata={'source': 'Data/Thundertooth Part 3.docx'}),\n",
156 | " Document(page_content=\"Thundertooth\\n\\nEmbraced by the futuristic city and its inhabitants, Thundertooth found a sense of purpose beyond merely satisfying his hunger. Inspired by the advanced technology surrounding him, he decided to channel his creativity into something extraordinary. With the help of the city's brilliant engineers, Thundertooth founded a one-of-a-kind toy factory that produced amazing widgets – magical, interactive toys that captivated the hearts of both children and adults alike.\\n\\nThundertooth's toy factory became a sensation, and its creations were highly sought after. The widgets incorporated cutting-edge holographic displays, levitation technology, and even the ability to change shapes and colors with a mere thought. Children across the city rejoiced as they played with these incredible toys that seemed to bring their wildest fantasies to life.\\n\\nAs the years passed, Thundertooth's life took a heartwarming turn. He met a kind and intelligent dinosaur named Seraphina, and together they started a family. Thundertooth and Seraphina were blessed with four children, each with unique characteristics that mirrored the diversity of their modern world.\\n\\nLumina: The eldest of Thundertooth's children, Lumina inherited her mother's intelligence and her father's sense of wonder. With sparkling scales that emitted a soft glow, Lumina had the ability to generate light at will. She became fascinated with technology, often spending hours tinkering with gadgets and inventing new ways to enhance the widgets produced in the family's factory.\\n\\nEcho: The second-born, Echo, had a gift for mimicry. He could perfectly replicate any sound or voice he heard, providing entertainment to the entire city. His playful nature and ability to bring joy to those around him made him a favorite among the neighborhood children.\\n\\nSapphire: Sapphire, the third sibling, had scales that shimmered like precious gems. She possessed a unique talent for calming and healing, a trait she inherited from both her parents. Whenever someone in the city felt stressed or unwell, Sapphire would extend her gentle touch, bringing comfort and tranquility.\\n\\nIgnis: The youngest of the family, Ignis, had fiery red scales that hinted at his exceptional ability – the power to control small flames. While initially seen as a potential hazard, Ignis channeled his fiery talents into creating mesmerizing light shows, becoming the city's favorite entertainer during festivals and celebrations.\\n\\n\\n\\nThundertooth and Seraphina reveled in the joy of parenthood, watching their children grow and flourish in the futuristic landscape they now called home. The family became an integral part of the city's fabric, not only through the widgets produced in their factory but also through the positive impact each member had on the community.\\n\\nThe toy factory became a symbol of innovation and unity, bringing together dinosaurs and humans in a shared appreciation for creativity and wonder. Thundertooth's legacy extended beyond his time-traveling adventure, leaving an indelible mark on the city and its inhabitants, reminding them that sometimes, the most magical things could emerge from the most unexpected places.\", metadata={'source': 'Data/Thundertooth Part 2.docx'}),\n",
157 | " Document(page_content='Thundertooth\\n\\nOnce upon a time, in a prehistoric land filled with dense forests and roaring rivers, there lived a dinosaur named Thundertooth. Thundertooth was no ordinary dinosaur; he possessed the rare ability to speak, a talent that set him apart from his ancient companions. One fateful day, as Thundertooth was basking in the warmth of the sun, a mysterious portal opened before him, and he found himself hurtling through time and space.\\n\\nAs the dazzling vortex subsided, Thundertooth opened his eyes to a world unlike anything he had ever seen. The air was filled with the hum of engines, and towering structures reached towards the sky. Thundertooth\\'s surroundings were a blend of metal and glass, and he quickly realized that he had been transported to a future era.\\n\\nThe once mighty dinosaur now stood bewildered in the midst of a bustling city. Above him, sleek flying cars zipped through the air, leaving trails of neon lights in their wake. Thundertooth felt like an ancient relic in this technological jungle, lost and out of place. With each step, he marveled at the skyscrapers that loomed overhead, their surfaces reflecting the myriad lights of the city.\\n\\nHowever, as night fell, Thundertooth\\'s stomach growled loudly. He realized that he was hungry, and the once vibrant city now seemed like a daunting maze of unfamiliar smells and sights. He wandered through the streets, his massive form drawing astonished stares from the futuristic inhabitants.\\n\\nThundertooth faced a dilemma – he was hungry, but he couldn\\'t bring himself to feast on the humans who scurried around like ants. As his hunger grew, he stumbled upon a park, an oasis of greenery amidst the concrete and steel. The park was adorned with holographic flowers that emitted a gentle glow, creating an ethereal atmosphere.\\n\\nWhile Thundertooth marveled at the beauty of the park, the mayor of the city happened to be passing by. Mayor Eleanor Grace, a charismatic and forward-thinking leader, was immediately intrigued by the sight of the talking dinosaur. She approached Thundertooth with a mix of curiosity and caution.\\n\\n\"Hello there, majestic creature. What brings you to our time?\" Mayor Grace inquired, her voice calm and reassuring.\\n\\nThundertooth, though initially startled, found comfort in the mayor\\'s soothing tone. In broken sentences, he explained his journey through time, the strange portal, and his hunger dilemma. \\n\\n\\n\\nMayor Grace listened intently, her eyes widening with amazement at the tale of the prehistoric dinosaur navigating the future.\\n\\nRealizing the dinosaur\\'s predicament, Mayor Grace extended an invitation. \"You are welcome in our city, Thundertooth. We can find a way to provide for you without causing harm to anyone. Let us work together to find a solution.\"\\n\\nGrateful for the mayor\\'s hospitality, Thundertooth followed her through the city. Together, they explored the futuristic marketplaces and innovative food labs, eventually discovering a sustainable solution that satisfied the dinosaur\\'s hunger without compromising the well-being of the city\\'s inhabitants.\\n\\nAs the news of Thundertooth\\'s arrival spread, the city embraced the talking dinosaur as a symbol of unity between the past and the future. Thundertooth found a new home in the city\\'s park, where holographic flowers bloomed, and the citizens marveled at the beauty of coexistence across time. And so, in this extraordinary city of flying cars and advanced technology, Thundertooth became a beloved figure, a living bridge between eras, teaching the people that understanding and cooperation could overcome even the greatest challenges.', metadata={'source': 'Data/Thundertooth Part 1.docx'}),\n",
158 | " Document(page_content=\"Thundertooth\\n\\nAs the city celebrated the Thundertooth family's heroic actions, there was one among them harboring a darker agenda. Ignis, the youngest sibling with fiery scales, had secretly grown resentful of the city that once hailed his family as heroes. The praise and adoration showered upon his siblings had ignited a spark of jealousy within him, fueling a desire for power and recognition that twisted his once-playful nature into something more sinister.\\n\\nIgnis withdrew from the family, isolating himself in the shadows. Unbeknownst to his siblings and parents, he delved into forbidden knowledge, seeking ways to amplify his fiery abilities. Ignis became obsessed with the idea of asserting dominance over the city that had once applauded his family's feats.\\n\\nAs the Thundertooth family enjoyed the renewed peace and admiration of the citizens, Ignis hatched a malevolent plan. He began manipulating the city's energy systems, intending to unleash a destructive force that would bring chaos and devastation. Ignis's once-playful flames now burned with a sinister intensity, reflecting the darkness that had taken root within him.\\n\\nLumina, Echo, and Sapphire grew concerned as they noticed Ignis's increasingly erratic behavior. They attempted to reason with him, pleading for him to abandon his destructive ambitions and embrace the family's legacy of unity. However, Ignis, consumed by his thirst for power, rejected their pleas and retreated further into the shadows.\\n\\nOne ominous night, Ignis initiated his nefarious plan. He unleashed a torrent of uncontrollable flames upon the city, wreaking havoc on its once-gleaming streets. The citizens, who had once looked to the Thundertooth family as saviors, now found themselves facing a new and terrifying threat from one of their own.\\n\\nThe Thundertooth siblings, realizing the danger their brother posed, sprang into action. Lumina fortified the city's defenses, Echo rallied the citizens to safety, and Sapphire extended her calming touch to soothe the panic that ensued. The once-united family now found themselves on opposite sides of a conflict that threatened to tear apart the very fabric of their existence.\\n\\nThe city plunged into chaos as Ignis continued his rampage, determined to prove himself as the dominant force within the family. The Thundertooth siblings, fueled by love for their home and a desire to save the innocent, confronted Ignis in an epic battle that shook the city to its core.\\n\\nLumina's brilliant displays of light clashed with Ignis's fiery onslaught, creating a dazzling spectacle that painted the night sky. Echo's mimicry abilities were put to the test as he attempted to redirect the citizens away from danger, while Sapphire's healing touch worked tirelessly to mend the wounds caused by the destructive flames.\\n\\nThe battle raged on, each sibling fighting not only to protect the city but also to save Ignis from the darkness that had consumed him. The once-close bond that defined the Thundertooth family now hung in the balance, teetering on the edge of destruction.\\n\\nAs the clash reached its climax, the Thundertooth siblings, exhausted but resolute, managed to combine their unique talents in a final, desperate attempt to reach Ignis. In a blinding burst of light and energy, they enveloped their wayward brother, hoping to break the sinister hold that gripped him.\\n\\nIn that moment of intense unity, the darkness within Ignis faltered. The fiery tempest subsided, and he collapsed, weakened and defeated. The Thundertooth family, battered but intact, gathered around Ignis, determined to help him overcome the darkness that had threatened to consume him.\\n\\nAs the city slowly recovered from the chaos, the Thundertooth family faced the daunting task of rebuilding not only the physical damage but also the fractured bonds of trust within their own ranks. The once-prodigal son, Ignis, now humbled by the consequences of his actions, sought redemption and reconciliation with his family.\\n\\nThe Thundertooth family, having weathered the storm of internal strife, emerged stronger and more united than ever. The city, though scarred, witnessed the resilience of the family that had once saved it from disaster and now worked together to mend the wounds inflicted by one of their own. The tale of the Thundertooth family, once a story of heroism, now became a saga of redemption and the enduring power of familial bonds.\", metadata={'source': 'Data/Thundertooth Part 4.docx'})]"
159 | ]
160 | },
161 | "execution_count": 8,
162 | "metadata": {},
163 | "output_type": "execute_result"
164 | }
165 | ],
166 | "source": [
167 | "docs"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 9,
173 | "metadata": {},
174 | "outputs": [
175 | {
176 | "data": {
177 | "text/plain": [
178 | "4"
179 | ]
180 | },
181 | "execution_count": 9,
182 | "metadata": {},
183 | "output_type": "execute_result"
184 | }
185 | ],
186 | "source": [
187 | "# Ensure we have the right number of Word documents loaded\n",
188 | "\n",
189 | "len(docs)"
190 | ]
191 | },
192 | {
193 | "cell_type": "markdown",
194 | "metadata": {},
195 | "source": [
196 | "We create a function to split text into paragraphs but keep numbered sections, bullet points, and lists together. This is suitable for the document because it has numbered and bulleted points - this would need to be changed to suit the document."
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": 10,
202 | "metadata": {},
203 | "outputs": [],
204 | "source": [
205 | "import re\n",
206 | "\n",
207 | "# Define the regular expression pattern for splitting paragraphs\n",
208 | "para_split_pattern = re.compile(r'\\n\\n')\n",
209 | "\n",
210 | "# Splits a document's text into paragraphs but if it has numbered or bulleted points, they will be included with the paragraph before it.\n",
211 | "def split_text_into_paragraphs(text):\n",
212 | "\n",
213 | "\n",
214 | " # Use the pattern to split the text into paragraphs\n",
215 | " paragraphs = para_split_pattern.split(text)\n",
216 | "\n",
217 | " # Combine paragraphs that should not be split\n",
218 | " combined_paragraphs = [paragraphs[0]]\n",
219 | "\n",
220 | " for p in paragraphs[1:]:\n",
221 | " # Check if the paragraph starts with a number or a dash and, if so, concatenate it to the previous paragraph so we keep them all in one chunk\n",
222 | "\n",
223 | " # Strip out any leading new lines\n",
224 | " p = p.lstrip('\\n')\n",
225 | "\n",
226 | " if p and (p[0].isdigit() or p[0] == '-' or p.split()[0].endswith(':')):\n",
227 | " combined_paragraphs[-1] += '\\n\\n\\n' + p\n",
228 | " else:\n",
229 | " combined_paragraphs.append(p)\n",
230 | "\n",
231 | " # Remove empty strings from the result\n",
232 | " combined_paragraphs = [p.strip() for p in combined_paragraphs if p.strip()]\n",
233 | "\n",
234 | " return combined_paragraphs"
235 | ]
236 | },
237 | {
238 | "cell_type": "markdown",
239 | "metadata": {},
240 | "source": [
241 | "Create nodes from the paragraphs that we've carefully split up, counting the paragraphs so we know what kind of token length we're working with.\n",
242 | "\n",
243 | "We can use the LLM object to count the tokens with get_num_tokens."
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 23,
249 | "metadata": {},
250 | "outputs": [
251 | {
252 | "name": "stdout",
253 | "output_type": "stream",
254 | "text": [
255 | "Document Data/Thundertooth Part 3.docx has 10 paragraphs\n",
256 | "Paragraph tokens: 3\n",
257 | "Paragraph tokens: 75\n",
258 | "Paragraph tokens: 51\n",
259 | "Paragraph tokens: 54\n",
260 | "Paragraph tokens: 193\n",
261 | "Paragraph tokens: 60\n",
262 | "Paragraph tokens: 86\n",
263 | "Paragraph tokens: 65\n",
264 | "Paragraph tokens: 57\n",
265 | "Paragraph tokens: 83\n",
266 | "Document Data/Thundertooth Part 2.docx has 6 paragraphs\n",
267 | "Paragraph tokens: 3\n",
268 | "Paragraph tokens: 88\n",
269 | "Paragraph tokens: 70\n",
270 | "Paragraph tokens: 327\n",
271 | "Paragraph tokens: 64\n",
272 | "Paragraph tokens: 69\n",
273 | "Document Data/Thundertooth Part 1.docx has 13 paragraphs\n",
274 | "Paragraph tokens: 3\n",
275 | "Paragraph tokens: 89\n",
276 | "Paragraph tokens: 68\n",
277 | "Paragraph tokens: 83\n",
278 | "Paragraph tokens: 56\n",
279 | "Paragraph tokens: 73\n",
280 | "Paragraph tokens: 60\n",
281 | "Paragraph tokens: 24\n",
282 | "Paragraph tokens: 37\n",
283 | "Paragraph tokens: 23\n",
284 | "Paragraph tokens: 49\n",
285 | "Paragraph tokens: 54\n",
286 | "Paragraph tokens: 105\n",
287 | "Document Data/Thundertooth Part 4.docx has 14 paragraphs\n",
288 | "Paragraph tokens: 3\n",
289 | "Paragraph tokens: 89\n",
290 | "Paragraph tokens: 61\n",
291 | "Paragraph tokens: 71\n",
292 | "Paragraph tokens: 66\n",
293 | "Paragraph tokens: 67\n",
294 | "Paragraph tokens: 72\n",
295 | "Paragraph tokens: 59\n",
296 | "Paragraph tokens: 64\n",
297 | "Paragraph tokens: 55\n",
298 | "Paragraph tokens: 62\n",
299 | "Paragraph tokens: 59\n",
300 | "Paragraph tokens: 66\n",
301 | "Paragraph tokens: 87\n",
302 | "\n",
303 | "** The maximum paragraph tokens is 327 **\n",
304 | "\n",
305 | "** The average paragraph's token count is 68 **\n",
306 | "\n",
307 | "** Created 43 nodes **\n"
308 | ]
309 | }
310 | ],
311 | "source": [
312 | "from langchain.docstore.document import Document\n",
313 | "\n",
314 | "paragraph_separator = \"\\n\\n\\n\"\n",
315 | "\n",
316 | "# Stores the maximum length of a paragraph, in tokens\n",
317 | "max_paragraph_tokens = 0\n",
318 | "\n",
319 | "# Total tokens, used to determine average\n",
320 | "total_paragraph_tokens = 0\n",
321 | "\n",
322 | "# Nodes\n",
323 | "paragraph_nodes = []\n",
324 | "\n",
325 | "# Loop through the documents, splitting each into paragraphs and checking the number of tokens per paragraph\n",
326 | "for document in docs:\n",
327 | "\n",
328 | " paragraph_token_lens = []\n",
329 | " paragraphs = split_text_into_paragraphs(document.page_content)\n",
330 | " print(f\"Document {document.metadata['source']} has {len(paragraphs)} paragraphs\")\n",
331 | " for paragraph in paragraphs:\n",
332 | "\n",
333 | " # Count the tokens in this paragraph\n",
334 | " token_count = llm.get_num_tokens(paragraph)\n",
335 | " paragraph_token_lens.append(token_count)\n",
336 | " print(f\"Paragraph tokens: {token_count}\")\n",
337 | "\n",
338 | " if token_count > max_paragraph_tokens:\n",
339 | " max_paragraph_tokens = token_count\n",
340 | "\n",
341 | " total_paragraph_tokens = total_paragraph_tokens + token_count\n",
342 | "\n",
343 | " # Create and add the node from the paragraph\n",
344 | " # include metadata we can use for citations\n",
345 | " node = Document(page_content=paragraph) # Copy the metadata from the Word document into here\n",
346 | " node.metadata[\"source\"] = document.metadata[\"source\"]\n",
347 | " node.metadata[\"token_count\"] = token_count\n",
348 | " paragraph_nodes.append(node)\n",
349 | "\n",
350 | " # print(paragraph_token_lens)\n",
351 | "\n",
352 | "print(f\"\\n** The maximum paragraph tokens is {max_paragraph_tokens} **\")\n",
353 | "\n",
354 | "average_paragraph_tokens = int(total_paragraph_tokens / len(paragraph_nodes))\n",
355 | "print(f\"\\n** The average paragraph's token count is {average_paragraph_tokens} **\")\n",
356 | "\n",
357 | "print(f\"\\n** Created {len(paragraph_nodes)} nodes **\")\n"
358 | ]
359 | },
360 | {
361 | "cell_type": "markdown",
362 | "metadata": {},
363 | "source": [
364 | "Let's see the split data - now neatly in paragraphs and the bullet points and lists are with their respective paragraph."
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "execution_count": 24,
370 | "metadata": {},
371 | "outputs": [
372 | {
373 | "data": {
374 | "text/plain": [
375 | "[Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 3}),\n",
376 | " Document(page_content=\"One fateful day, as the citizens of the futuristic city went about their daily lives, a collective gasp echoed through the streets as a massive meteor hurtled towards Earth. Panic spread like wildfire as people looked to the sky in horror, realizing the impending catastrophe. The city's advanced technology detected the threat, and an emergency broadcast echoed through the streets, urging everyone to seek shelter.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 75}),\n",
377 | " Document(page_content=\"Thundertooth, ever the protector of his newfound home, wasted no time. With a determined gleam in his eyes, he gathered his family and hurried to the city's command center, where Mayor Grace and the leading scientists were coordinating the evacuation efforts.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 51}),\n",
378 | " Document(page_content='The mayor, recognizing Thundertooth\\'s intelligence and resourcefulness, approached him. \"Thundertooth, we need a plan to divert or neutralize the meteor. Our technology can only do so much, but with your unique abilities, perhaps we can find a solution.\"', metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 54}),\n",
379 | " Document(page_content=\"Thundertooth nodded, understanding the gravity of the situation. He gathered Lumina, Echo, Sapphire, and Ignis, explaining the urgency and the role each of them would play in the impending crisis.\\n\\n\\n1. **Lumina**: Utilizing her deep understanding of technology, Lumina would enhance the city's energy systems to generate a powerful force field, providing a protective barrier against the meteor's impact.\\n\\n\\n2. **Echo**: With his extraordinary mimicry abilities, Echo would amplify the emergency signals, ensuring that every citizen received timely warnings and instructions for evacuation.\\n\\n\\n3. **Sapphire**: Harnessing her calming and healing powers, Sapphire would assist in calming the panicked masses, ensuring an orderly and efficient evacuation.\\n\\n\\n4. **Ignis**: Drawing upon his fiery talents, Ignis would create controlled bursts of heat, attempting to alter the meteor's trajectory and reduce its destructive force.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 193}),\n",
380 | " Document(page_content=\"As the citizens evacuated to designated shelters, the Thundertooth family sprang into action. Lumina worked tirelessly to strengthen the city's energy systems, Echo echoed evacuation orders through the city's speakers, Sapphire offered comfort to those in distress, and Ignis unleashed controlled bursts of flames towards the approaching meteor.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 60}),\n",
381 | " Document(page_content=\"Thundertooth stood at the forefront, using his mighty roar to coordinate and inspire the efforts of the city's inhabitants. The ground trembled as the meteor drew closer, but the Thundertooth family's coordinated efforts began to take effect. Lumina's force field shimmered to life, deflecting the meteor's deadly path. Echo's amplified warnings reached every corner of the city, ensuring that no one was left behind.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 86}),\n",
382 | " Document(page_content=\"As Ignis's controlled bursts of flames interacted with the meteor, it began to change course. The combined efforts of the Thundertooth family, guided by their unique talents, diverted the catastrophic collision. The meteor, once destined for destruction, now harmlessly sailed past the Earth, leaving the city and its inhabitants unscathed.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 65}),\n",
383 | " Document(page_content=\"The citizens, emerging from their shelters, erupted into cheers of gratitude. Mayor Grace approached Thundertooth, expressing her heartfelt thanks for the family's heroic efforts. The Thundertooth family, tired but triumphant, basked in the relief of having saved their beloved city from imminent disaster.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 57}),\n",
384 | " Document(page_content=\"In the wake of the crisis, the citizens of the futuristic city hailed Thundertooth and his family as true heroes. The toy factory that once brought joy to children now became a symbol of resilience and unity. The Thundertooth family's legacy was forever etched in the city's history, a testament to the power of cooperation and the extraordinary capabilities that could emerge when dinosaurs and humans worked together for the greater good.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 83}),\n",
385 | " Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 3}),\n",
386 | " Document(page_content=\"Embraced by the futuristic city and its inhabitants, Thundertooth found a sense of purpose beyond merely satisfying his hunger. Inspired by the advanced technology surrounding him, he decided to channel his creativity into something extraordinary. With the help of the city's brilliant engineers, Thundertooth founded a one-of-a-kind toy factory that produced amazing widgets – magical, interactive toys that captivated the hearts of both children and adults alike.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 88}),\n",
387 | " Document(page_content=\"Thundertooth's toy factory became a sensation, and its creations were highly sought after. The widgets incorporated cutting-edge holographic displays, levitation technology, and even the ability to change shapes and colors with a mere thought. Children across the city rejoiced as they played with these incredible toys that seemed to bring their wildest fantasies to life.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 70}),\n",
388 | " Document(page_content=\"As the years passed, Thundertooth's life took a heartwarming turn. He met a kind and intelligent dinosaur named Seraphina, and together they started a family. Thundertooth and Seraphina were blessed with four children, each with unique characteristics that mirrored the diversity of their modern world.\\n\\n\\nLumina: The eldest of Thundertooth's children, Lumina inherited her mother's intelligence and her father's sense of wonder. With sparkling scales that emitted a soft glow, Lumina had the ability to generate light at will. She became fascinated with technology, often spending hours tinkering with gadgets and inventing new ways to enhance the widgets produced in the family's factory.\\n\\n\\nEcho: The second-born, Echo, had a gift for mimicry. He could perfectly replicate any sound or voice he heard, providing entertainment to the entire city. His playful nature and ability to bring joy to those around him made him a favorite among the neighborhood children.\\n\\n\\nSapphire: Sapphire, the third sibling, had scales that shimmered like precious gems. She possessed a unique talent for calming and healing, a trait she inherited from both her parents. Whenever someone in the city felt stressed or unwell, Sapphire would extend her gentle touch, bringing comfort and tranquility.\\n\\n\\nIgnis: The youngest of the family, Ignis, had fiery red scales that hinted at his exceptional ability – the power to control small flames. While initially seen as a potential hazard, Ignis channeled his fiery talents into creating mesmerizing light shows, becoming the city's favorite entertainer during festivals and celebrations.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 327}),\n",
389 | " Document(page_content=\"Thundertooth and Seraphina reveled in the joy of parenthood, watching their children grow and flourish in the futuristic landscape they now called home. The family became an integral part of the city's fabric, not only through the widgets produced in their factory but also through the positive impact each member had on the community.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 64}),\n",
390 | " Document(page_content=\"The toy factory became a symbol of innovation and unity, bringing together dinosaurs and humans in a shared appreciation for creativity and wonder. Thundertooth's legacy extended beyond his time-traveling adventure, leaving an indelible mark on the city and its inhabitants, reminding them that sometimes, the most magical things could emerge from the most unexpected places.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 69}),\n",
391 | " Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 3}),\n",
392 | " Document(page_content='Once upon a time, in a prehistoric land filled with dense forests and roaring rivers, there lived a dinosaur named Thundertooth. Thundertooth was no ordinary dinosaur; he possessed the rare ability to speak, a talent that set him apart from his ancient companions. One fateful day, as Thundertooth was basking in the warmth of the sun, a mysterious portal opened before him, and he found himself hurtling through time and space.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 89}),\n",
393 | " Document(page_content=\"As the dazzling vortex subsided, Thundertooth opened his eyes to a world unlike anything he had ever seen. The air was filled with the hum of engines, and towering structures reached towards the sky. Thundertooth's surroundings were a blend of metal and glass, and he quickly realized that he had been transported to a future era.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 68}),\n",
394 | " Document(page_content='The once mighty dinosaur now stood bewildered in the midst of a bustling city. Above him, sleek flying cars zipped through the air, leaving trails of neon lights in their wake. Thundertooth felt like an ancient relic in this technological jungle, lost and out of place. With each step, he marveled at the skyscrapers that loomed overhead, their surfaces reflecting the myriad lights of the city.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 83}),\n",
395 | " Document(page_content=\"However, as night fell, Thundertooth's stomach growled loudly. He realized that he was hungry, and the once vibrant city now seemed like a daunting maze of unfamiliar smells and sights. He wandered through the streets, his massive form drawing astonished stares from the futuristic inhabitants.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 56}),\n",
396 | " Document(page_content=\"Thundertooth faced a dilemma – he was hungry, but he couldn't bring himself to feast on the humans who scurried around like ants. As his hunger grew, he stumbled upon a park, an oasis of greenery amidst the concrete and steel. The park was adorned with holographic flowers that emitted a gentle glow, creating an ethereal atmosphere.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 73}),\n",
397 | " Document(page_content='While Thundertooth marveled at the beauty of the park, the mayor of the city happened to be passing by. Mayor Eleanor Grace, a charismatic and forward-thinking leader, was immediately intrigued by the sight of the talking dinosaur. She approached Thundertooth with a mix of curiosity and caution.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 60}),\n",
398 | " Document(page_content='\"Hello there, majestic creature. What brings you to our time?\" Mayor Grace inquired, her voice calm and reassuring.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 24}),\n",
399 | " Document(page_content=\"Thundertooth, though initially startled, found comfort in the mayor's soothing tone. In broken sentences, he explained his journey through time, the strange portal, and his hunger dilemma.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 37}),\n",
400 | " Document(page_content='Mayor Grace listened intently, her eyes widening with amazement at the tale of the prehistoric dinosaur navigating the future.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 23}),\n",
401 | " Document(page_content='Realizing the dinosaur\\'s predicament, Mayor Grace extended an invitation. \"You are welcome in our city, Thundertooth. We can find a way to provide for you without causing harm to anyone. Let us work together to find a solution.\"', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 49}),\n",
402 | " Document(page_content=\"Grateful for the mayor's hospitality, Thundertooth followed her through the city. Together, they explored the futuristic marketplaces and innovative food labs, eventually discovering a sustainable solution that satisfied the dinosaur's hunger without compromising the well-being of the city's inhabitants.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 54}),\n",
403 | " Document(page_content=\"As the news of Thundertooth's arrival spread, the city embraced the talking dinosaur as a symbol of unity between the past and the future. Thundertooth found a new home in the city's park, where holographic flowers bloomed, and the citizens marveled at the beauty of coexistence across time. And so, in this extraordinary city of flying cars and advanced technology, Thundertooth became a beloved figure, a living bridge between eras, teaching the people that understanding and cooperation could overcome even the greatest challenges.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 105}),\n",
404 | " Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 3}),\n",
405 | " Document(page_content=\"As the city celebrated the Thundertooth family's heroic actions, there was one among them harboring a darker agenda. Ignis, the youngest sibling with fiery scales, had secretly grown resentful of the city that once hailed his family as heroes. The praise and adoration showered upon his siblings had ignited a spark of jealousy within him, fueling a desire for power and recognition that twisted his once-playful nature into something more sinister.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 89}),\n",
406 | " Document(page_content=\"Ignis withdrew from the family, isolating himself in the shadows. Unbeknownst to his siblings and parents, he delved into forbidden knowledge, seeking ways to amplify his fiery abilities. Ignis became obsessed with the idea of asserting dominance over the city that had once applauded his family's feats.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 61}),\n",
407 | " Document(page_content=\"As the Thundertooth family enjoyed the renewed peace and admiration of the citizens, Ignis hatched a malevolent plan. He began manipulating the city's energy systems, intending to unleash a destructive force that would bring chaos and devastation. Ignis's once-playful flames now burned with a sinister intensity, reflecting the darkness that had taken root within him.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 71}),\n",
408 | " Document(page_content=\"Lumina, Echo, and Sapphire grew concerned as they noticed Ignis's increasingly erratic behavior. They attempted to reason with him, pleading for him to abandon his destructive ambitions and embrace the family's legacy of unity. However, Ignis, consumed by his thirst for power, rejected their pleas and retreated further into the shadows.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 66}),\n",
409 | " Document(page_content='One ominous night, Ignis initiated his nefarious plan. He unleashed a torrent of uncontrollable flames upon the city, wreaking havoc on its once-gleaming streets. The citizens, who had once looked to the Thundertooth family as saviors, now found themselves facing a new and terrifying threat from one of their own.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 67}),\n",
410 | " Document(page_content=\"The Thundertooth siblings, realizing the danger their brother posed, sprang into action. Lumina fortified the city's defenses, Echo rallied the citizens to safety, and Sapphire extended her calming touch to soothe the panic that ensued. The once-united family now found themselves on opposite sides of a conflict that threatened to tear apart the very fabric of their existence.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 72}),\n",
411 | " Document(page_content='The city plunged into chaos as Ignis continued his rampage, determined to prove himself as the dominant force within the family. The Thundertooth siblings, fueled by love for their home and a desire to save the innocent, confronted Ignis in an epic battle that shook the city to its core.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 59}),\n",
412 | " Document(page_content=\"Lumina's brilliant displays of light clashed with Ignis's fiery onslaught, creating a dazzling spectacle that painted the night sky. Echo's mimicry abilities were put to the test as he attempted to redirect the citizens away from danger, while Sapphire's healing touch worked tirelessly to mend the wounds caused by the destructive flames.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 64}),\n",
413 | " Document(page_content='The battle raged on, each sibling fighting not only to protect the city but also to save Ignis from the darkness that had consumed him. The once-close bond that defined the Thundertooth family now hung in the balance, teetering on the edge of destruction.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 55}),\n",
414 | " Document(page_content='As the clash reached its climax, the Thundertooth siblings, exhausted but resolute, managed to combine their unique talents in a final, desperate attempt to reach Ignis. In a blinding burst of light and energy, they enveloped their wayward brother, hoping to break the sinister hold that gripped him.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 62}),\n",
415 | " Document(page_content='In that moment of intense unity, the darkness within Ignis faltered. The fiery tempest subsided, and he collapsed, weakened and defeated. The Thundertooth family, battered but intact, gathered around Ignis, determined to help him overcome the darkness that had threatened to consume him.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 59}),\n",
416 | " Document(page_content='As the city slowly recovered from the chaos, the Thundertooth family faced the daunting task of rebuilding not only the physical damage but also the fractured bonds of trust within their own ranks. The once-prodigal son, Ignis, now humbled by the consequences of his actions, sought redemption and reconciliation with his family.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 66}),\n",
417 | " Document(page_content='The Thundertooth family, having weathered the storm of internal strife, emerged stronger and more united than ever. The city, though scarred, witnessed the resilience of the family that had once saved it from disaster and now worked together to mend the wounds inflicted by one of their own. The tale of the Thundertooth family, once a story of heroism, now became a saga of redemption and the enduring power of familial bonds.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 87})]"
418 | ]
419 | },
420 | "execution_count": 24,
421 | "metadata": {},
422 | "output_type": "execute_result"
423 | }
424 | ],
425 | "source": [
426 | "paragraph_nodes"
427 | ]
428 | },
429 | {
430 | "cell_type": "markdown",
431 | "metadata": {},
432 | "source": [
433 | "We no longer need to use the LangChain text splitter as we've already done the splitting"
434 | ]
435 | },
436 | {
437 | "cell_type": "code",
438 | "execution_count": 25,
439 | "metadata": {},
440 | "outputs": [],
441 | "source": [
442 | "# Split them up into chunks using a Text Splitter\n",
443 | "\n",
444 | "# from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
445 | "\n",
446 | "# text_splitter = RecursiveCharacterTextSplitter()\n",
447 | "# documents = text_splitter.split_documents(docs)"
448 | ]
449 | },
450 | {
451 | "cell_type": "code",
452 | "execution_count": 26,
453 | "metadata": {},
454 | "outputs": [],
455 | "source": [
456 | "# Create the embeddings from our split up chunks\n",
457 | "\n",
458 | "from langchain_community.vectorstores import FAISS\n",
459 | "\n",
460 | "vector = FAISS.from_documents(paragraph_nodes, embeddings)"
461 | ]
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": 27,
466 | "metadata": {},
467 | "outputs": [],
468 | "source": [
469 | "# Prepare the prompt and then the chain\n",
470 | "\n",
471 | "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
472 | "from langchain_core.prompts import ChatPromptTemplate, PromptTemplate\n",
473 | "\n",
474 | "if ollama_model_name == \"phi\" or ollama_model_name == \"phi:chat\":\n",
475 | " # Phi-2 prompt is less flexible\n",
476 | " prompt_template = \"\"\"Instruct: With this context\\n\\n{context}\\n\\nQuestion: {input}\\nOutput:\"\"\"\n",
477 | "\n",
478 | "else:\n",
479 | " prompt_template = \"\"\"You are a story teller, answering questions in an excited, insightful, and empathetic way. Answer the question based only on the provided context:\n",
480 | "\n",
481 | " \n",
482 | " {context}\n",
483 | " \n",
484 | "\n",
485 | " Question: {question}\"\"\"\n",
486 | "\n",
487 | "prompt = PromptTemplate(\n",
488 | " template=prompt_template, \n",
489 | " input_variables=[\n",
490 | " 'context', \n",
491 | " 'question',\n",
492 | " ]\n",
493 | ")"
494 | ]
495 | },
496 | {
497 | "cell_type": "markdown",
498 | "metadata": {},
499 | "source": [
500 | "Now that we have broken down the documents into paragraph-sized chunks we need to retrieve more paragraphs so the LLM has a decent amount of context to use. Without adding the \"search_kwargs\" parameter the answer to the questions was worse. For example, when asked if they had any children no relevant context was provided.\n",
501 | "\n",
502 | "Note: To be able to get the context for the children's names to be included (and then reranked to the top) I needed to set the number of retrieved chunks to 20. The section with the children's names was the 11th result from the retriever! This indicates that retrieving more than you think you need is likely."
503 | ]
504 | },
505 | {
506 | "cell_type": "code",
507 | "execution_count": 28,
508 | "metadata": {},
509 | "outputs": [],
510 | "source": [
511 | "# Create the retriever and set it to return a good amount of chunks\n",
512 | "\n",
513 | "from langchain.chains import create_retrieval_chain\n",
514 | "\n",
515 | "retriever = vector.as_retriever(search_kwargs={\"k\": 20})"
516 | ]
517 | },
518 | {
519 | "cell_type": "markdown",
520 | "metadata": {},
521 | "source": [
522 | "Let's implement the Cohere reranking, utilising our retriever (which is getting more results to work with now) and our LLM\n",
523 | "\n",
524 | "Note: You'll need a Cohere API key. A trial key is free for non-commercial purposes. I've stored it in apikey.py as Cohere_API = \"your key in here\"\n",
525 | "\n",
526 | "https://cohere.com/"
527 | ]
528 | },
529 | {
530 | "cell_type": "code",
531 | "execution_count": 29,
532 | "metadata": {},
533 | "outputs": [],
534 | "source": [
535 | "from langchain.retrievers import ContextualCompressionRetriever\n",
536 | "from langchain.retrievers.document_compressors import CohereRerank\n",
537 | "\n",
538 | "from apikeys import Cohere_API\n",
539 | "\n",
540 | "# Create the retriever\n",
541 | "compressor = CohereRerank(cohere_api_key=Cohere_API, top_n=5)\n",
542 | "compression_retriever = ContextualCompressionRetriever(\n",
543 | " base_compressor=compressor,\n",
544 | " base_retriever=retriever\n",
545 | ")"
546 | ]
547 | },
548 | {
549 | "cell_type": "code",
550 | "execution_count": 30,
551 | "metadata": {},
552 | "outputs": [
553 | {
554 | "data": {
555 | "text/plain": [
556 | "[Document(page_content=\"As the years passed, Thundertooth's life took a heartwarming turn. He met a kind and intelligent dinosaur named Seraphina, and together they started a family. Thundertooth and Seraphina were blessed with four children, each with unique characteristics that mirrored the diversity of their modern world.\\n\\n\\nLumina: The eldest of Thundertooth's children, Lumina inherited her mother's intelligence and her father's sense of wonder. With sparkling scales that emitted a soft glow, Lumina had the ability to generate light at will. She became fascinated with technology, often spending hours tinkering with gadgets and inventing new ways to enhance the widgets produced in the family's factory.\\n\\n\\nEcho: The second-born, Echo, had a gift for mimicry. He could perfectly replicate any sound or voice he heard, providing entertainment to the entire city. His playful nature and ability to bring joy to those around him made him a favorite among the neighborhood children.\\n\\n\\nSapphire: Sapphire, the third sibling, had scales that shimmered like precious gems. She possessed a unique talent for calming and healing, a trait she inherited from both her parents. Whenever someone in the city felt stressed or unwell, Sapphire would extend her gentle touch, bringing comfort and tranquility.\\n\\n\\nIgnis: The youngest of the family, Ignis, had fiery red scales that hinted at his exceptional ability – the power to control small flames. While initially seen as a potential hazard, Ignis channeled his fiery talents into creating mesmerizing light shows, becoming the city's favorite entertainer during festivals and celebrations.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 327, 'relevance_score': 0.921216}),\n",
557 | " Document(page_content=\"Grateful for the mayor's hospitality, Thundertooth followed her through the city. Together, they explored the futuristic marketplaces and innovative food labs, eventually discovering a sustainable solution that satisfied the dinosaur's hunger without compromising the well-being of the city's inhabitants.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 54, 'relevance_score': 0.64779824}),\n",
558 | " Document(page_content=\"Thundertooth and Seraphina reveled in the joy of parenthood, watching their children grow and flourish in the futuristic landscape they now called home. The family became an integral part of the city's fabric, not only through the widgets produced in their factory but also through the positive impact each member had on the community.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 64, 'relevance_score': 0.59549254}),\n",
559 | " Document(page_content=\"Thundertooth, ever the protector of his newfound home, wasted no time. With a determined gleam in his eyes, he gathered his family and hurried to the city's command center, where Mayor Grace and the leading scientists were coordinating the evacuation efforts.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 51, 'relevance_score': 0.5758266}),\n",
560 | " Document(page_content=\"In the wake of the crisis, the citizens of the futuristic city hailed Thundertooth and his family as true heroes. The toy factory that once brought joy to children now became a symbol of resilience and unity. The Thundertooth family's legacy was forever etched in the city's history, a testament to the power of cooperation and the extraordinary capabilities that could emerge when dinosaurs and humans worked together for the greater good.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 83, 'relevance_score': 0.5574539})]"
561 | ]
562 | },
563 | "execution_count": 30,
564 | "metadata": {},
565 | "output_type": "execute_result"
566 | }
567 | ],
568 | "source": [
569 | "# Let's test that it includes the paragraph starting with \"As the years passed...\" when asked about their children.\n",
570 | "\n",
571 | "test_retrieval = compression_retriever.get_relevant_documents(\"Did they have any children? If so, what were their names?\")\n",
572 | "\n",
573 | "test_retrieval"
574 | ]
575 | },
576 | {
577 | "cell_type": "markdown",
578 | "metadata": {},
579 | "source": [
580 | "The above shows that, indeed, we are able to get that paragraph and it is the highest ranked.\n",
581 | "\n",
582 | "Importantly, if we had not brought enough chunks back with the retriever (referring to the vector store retriever) then we would not have had the right chunks to run through Cohere for reranking.\n",
583 | "\n",
584 | "So if this line:\n",
585 | "```\n",
586 | "retriever = vector.as_retriever(search_kwargs={\"k\": 20})\n",
587 | "```\n",
588 | "\n",
589 | "was:\n",
590 | "```\n",
591 | "retriever = vector.as_retriever(search_kwargs={\"k\": 10})\n",
592 | "```\n",
593 | "\n",
594 | "We would not have been able to get that \"As the years passed...\" chunk for reranking.\n",
595 | "\n",
596 | "Additionally, we're able to compress the number of chunks from the 11+ we needed to get the right chunk down to 5 because we have the best 5 of that bunch. This reduces the tokens needed for the LLM to process."
597 | ]
598 | },
599 | {
600 | "cell_type": "markdown",
601 | "metadata": {},
602 | "source": [
603 | "Now, we create a LangChain chain with the Cohere reranker retriever"
604 | ]
605 | },
606 | {
607 | "cell_type": "code",
608 | "execution_count": 31,
609 | "metadata": {},
610 | "outputs": [],
611 | "source": [
612 | "from langchain.chains import RetrievalQA\n",
613 | "\n",
614 | "rerank_chain = RetrievalQA.from_chain_type(\n",
615 | " llm=llm,\n",
616 | " retriever=compression_retriever,\n",
617 | " return_source_documents=True,\n",
618 | " chain_type_kwargs={\"prompt\": prompt} # Pass in our prompt\n",
619 | ")"
620 | ]
621 | },
622 | {
623 | "cell_type": "code",
624 | "execution_count": 32,
625 | "metadata": {},
626 | "outputs": [],
627 | "source": [
628 | "# Here are our test questions\n",
629 | "\n",
630 | "TestQuestions = [\n",
631 | " \"Summarise the story for me\",\n",
632 | " \"Who was the main protagonist?\",\n",
633 | " \"Did they have any children? If so, what were their names?\",\n",
634 | " \"Did anything eventful happen?\",\n",
635 | " \"Who are the main characters?\",\n",
636 | " \"What do you think happens next in the story?\"\n",
637 | "]"
638 | ]
639 | },
640 | {
641 | "cell_type": "markdown",
642 | "metadata": {},
643 | "source": [
644 | "Ask our questions with our reranking chain"
645 | ]
646 | },
647 | {
648 | "cell_type": "code",
649 | "execution_count": 33,
650 | "metadata": {},
651 | "outputs": [
652 | {
653 | "name": "stdout",
654 | "output_type": "stream",
655 | "text": [
656 | "\n",
657 | "1/6: Summarise the story for me\n",
658 | "\n",
659 | "2/6: Who was the main protagonist?\n",
660 | "\n",
661 | "3/6: Did they have any children? If so, what were their names?\n",
662 | "\n",
663 | "4/6: Did anything eventful happen?\n",
664 | "\n",
665 | "5/6: Who are the main characters?\n",
666 | "\n",
667 | "6/6: What do you think happens next in the story?\n"
668 | ]
669 | }
670 | ],
671 | "source": [
672 | "qa_pairs = []\n",
673 | "\n",
674 | "for index, question in enumerate(TestQuestions, start=1):\n",
675 | " question = question.strip() # Clean up\n",
676 | "\n",
677 | " print(f\"\\n{index}/{len(TestQuestions)}: {question}\")\n",
678 | "\n",
679 | " response = rerank_chain.invoke({\"query\": question})\n",
680 | "\n",
681 | " qa_pairs.append((question.strip(), response[\"result\"])) # Add to our output array\n",
682 | "\n",
683 | " # Uncomment the following line if you want to test just the first question\n",
684 | " # break "
685 | ]
686 | },
687 | {
688 | "cell_type": "code",
689 | "execution_count": 34,
690 | "metadata": {},
691 | "outputs": [
692 | {
693 | "name": "stdout",
694 | "output_type": "stream",
695 | "text": [
696 | "1/6 Summarise the story for me\n",
697 | "\n",
698 | "\n",
699 | "In a futuristic city, a meteor is heading towards Earth, and the mayor recognizes the need for an intelligent and resourceful plan to divert or neutralize it. The mayor approaches Thundertooth, who gathers his team Lumina, Echo, Sapphire, and Ignis to discuss the crisis. Each member has a unique ability that can contribute to the plan: Lumina enhances the city's energy systems to generate a powerful force field; Echo amplifies emergency signals for timely warnings and evacuation instructions; Sapphire calms the panicked masses during evacuation; and Ignis creates controlled bursts of heat to alter the meteor's trajectory.\n",
700 | "\n",
701 | "As the years pass, Thundertooth meets Seraphina and starts a family with four children, each with unique characteristics that reflect their modern world. The children inherit their parents' abilities: Lumina is fascinated with technology and invents new ways to enhance gadgets; Echo has a gift for mimicry and brings joy to those around him; Sapphire possesses calming and healing powers; and Ignis controls small flames and creates mesmerizing light shows.\n",
702 | "\n",
703 | "After the crisis, the citizens hail Thundertooth and his family as heroes, and their legacy is forever etched in the city's history. The mayor extends an invitation to Thundertooth, offering a solution for him to stay in the city without causing harm to anyone.\n",
704 | "\n",
705 | "--------\n",
706 | "\n",
707 | "2/6 Who was the main protagonist?\n",
708 | "\n",
709 | "\n",
710 | "The main protagonist of the story is Thundertooth.\n",
711 | "\n",
712 | "--------\n",
713 | "\n",
714 | "3/6 Did they have any children? If so, what were their names?\n",
715 | "\n",
716 | "Yes, Thundertooth had four children. Their names are Lumina, Echo, Sapphire, and Ignis.\n",
717 | "\n",
718 | "--------\n",
719 | "\n",
720 | "4/6 Did anything eventful happen?\n",
721 | "\n",
722 | "\n",
723 | "Yes, an eventful thing happened when Mayor Grace extended an invitation to Thundertooth to stay in their city and find a solution to his hunger dilemma. They went on to explore the futuristic marketplaces and innovative food labs and eventually discovered a sustainable solution that satisfied Thundertooth's hunger without compromising the well-being of the city's inhabitants.\n",
724 | "\n",
725 | "--------\n",
726 | "\n",
727 | "5/6 Who are the main characters?\n",
728 | "\n",
729 | "The main characters in this story are Mayor Grace and Thundertooth.\n",
730 | "\n",
731 | "--------\n",
732 | "\n",
733 | "6/6 What do you think happens next in the story?\n",
734 | "\n",
735 | "\n",
736 | "Based on the provided context, it is likely that Thundertooth will work with Mayor Grace and the leading scientists to implement the sustainable solution they discovered to satisfy his hunger without compromising the well-being of the city's inhabitants. He may use his unique abilities to help with the evacuation efforts or in any other way that he can contribute to ensuring the safety of the city and its citizens. In addition, the Thundertooth family may continue to live in the city, enjoying their newfound home and the opportunity to work together towards a better future for all.\n",
737 | "\n",
738 | "--------\n",
739 | "\n"
740 | ]
741 | }
742 | ],
743 | "source": [
744 | "# Print out the questions and answers\n",
745 | "\n",
746 | "for index, (question, answer) in enumerate(qa_pairs, start=1):\n",
747 | " print(f\"{index}/{len(qa_pairs)} {question}\\n\\n{answer}\\n\\n--------\\n\")"
748 | ]
749 | }
750 | ],
751 | "metadata": {
752 | "kernelspec": {
753 | "display_name": "LangChainRAGLinux",
754 | "language": "python",
755 | "name": "python3"
756 | },
757 | "language_info": {
758 | "codemirror_mode": {
759 | "name": "ipython",
760 | "version": 3
761 | },
762 | "file_extension": ".py",
763 | "mimetype": "text/x-python",
764 | "name": "python",
765 | "nbconvert_exporter": "python",
766 | "pygments_lexer": "ipython3",
767 | "version": "3.10.13"
768 | }
769 | },
770 | "nbformat": 4,
771 | "nbformat_minor": 2
772 | }
773 |
--------------------------------------------------------------------------------
/04-LangChain-RAG Chunk Rerank Max Context.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": []
7 | },
8 | {
9 | "cell_type": "markdown",
10 | "metadata": {},
11 | "source": [
12 | "### LangChain local LLM RAG example with self chunking, reranking, and maximising context based on token length\n",
13 | "### For LangSmith users (requires API key)\n",
14 | "Utilising LangChain v0.1\n",
15 | "\n",
16 | "This notebook demonstrates the use of LangChain for Retrieval Augmented Generation in Linux with Nvidia's CUDA. LLMs are run using Ollama.\n",
17 | "\n",
18 | "It has self-chunking (where we split up our document into chunks) and then re-ranking the retrieved results before passing into the LLM.\n",
19 | "\n",
20 | "Finally, it uses the token counts of paragraphs for the context to maximise how much we give to the LLM.\n",
21 | "\n",
22 | "Models tested:\n",
23 | "- Llama 2\n",
24 | "- Mistral 7B\n",
25 | "- Mixtral 8x7B\n",
26 | "- Neural Chat 7B\n",
27 | "- Orca 2\n",
28 | "- Phi-2\n",
29 | "- Solar 10.7B\n",
30 | "- Yi 34B\n",
31 | "\n",
32 | "\n",
33 | "See the [README.md](README.md) file for help on how to setup your environment to run this."
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "We start with creating a callback handler so that we can get the number of tokens after the LLM has created inference."
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 1,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "from typing import Any, Optional, Sequence\n",
50 | "from uuid import UUID\n",
51 | "from langchain.callbacks.base import BaseCallbackHandler\n",
52 | "from langchain.schema import LLMResult\n",
53 | "from langchain_core.documents import Document\n",
54 | "\n",
55 | "global llmresult_prompt_token_count # This will be updated with the total token count of the prompt when an LLM finished inference\n",
56 | "global llmresult_response_token_count # This will be updated with the total token count of the response when an LLM finished inference\n",
57 | "\n",
58 | "class GenerationStatisticsCallback(BaseCallbackHandler):\n",
59 | " def on_llm_end(self, response: LLMResult, **kwargs) -> None:\n",
60 | "\n",
61 | " # When the LLM inference has finished, store token counts in global variables for use outside of here\n",
62 | " global llmresult_prompt_token_count\n",
63 | " llmresult_prompt_token_count = response.generations[0][0].generation_info[\"prompt_eval_count\"]\n",
64 | "\n",
65 | " global llmresult_response_token_count\n",
66 | " llmresult_response_token_count = response.generations[0][0].generation_info[\"eval_count\"]\n",
67 | "\n",
68 | " print(f\"\\n\\n ----\\n\\n[ PROMPT TOKEN COUNT {llmresult_prompt_token_count} | RESPONSE TOKEN COUNT {llmresult_response_token_count} ]\")"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 2,
74 | "metadata": {},
75 | "outputs": [],
76 | "source": [
77 | "from langchain.callbacks.manager import CallbackManager\n",
78 | "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
79 | "\n",
80 | "callback_manager = CallbackManager(\n",
81 | " [StreamingStdOutCallbackHandler(), GenerationStatisticsCallback()]\n",
82 | ")"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 3,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": [
91 | "# Target Context token count\n",
92 | "# This is the total amount of tokens from the context we retrieve that we want to put into the prompt for the LLM to use for RAG\n",
93 | "# We want to maximise the context put in without putting in more than a certain amount\n",
94 | "\n",
95 | "# We'll set this with the model selection in the next cell\n",
96 | "maximum_context_tokens = 0\n",
97 | "\n",
98 | "# Important - about 500 tokens are added to this to generate the full prompt for the LLM before it responds, so we're\n",
99 | "# looking at about 1500 tokens + the answer tokens"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 29,
105 | "metadata": {},
106 | "outputs": [
107 | {
108 | "name": "stdout",
109 | "output_type": "stream",
110 | "text": [
111 | "Ollama Model selected: mixtral:8x7b-instruct-v0.1-q4_K_M with maximum context tokens allowed set to 2000\n"
112 | ]
113 | }
114 | ],
115 | "source": [
116 | "# Select your model here, put the name of the model in the ollama_model_name variable\n",
117 | "# Ensure you have pulled them or run them so Ollama has downloaded them and can load them (which it will do automatically)\n",
118 | "\n",
119 | "# Ollama installation (if you haven't done it yet): $ curl https://ollama.ai/install.sh | sh\n",
120 | "# Models need to be running in Ollama for LangChain to use them, to test if it can be run: $ ollama run mistral:7b-instruct-q6_K\n",
121 | "\n",
122 | "# Creating a list of tuples (model_name, max_context_tokens)\n",
123 | "ollama_model_configs = [\n",
124 | " (\"llama2:7b-chat-q6_K\", 2000), # 0\n",
125 | " (\"mistral:7b-instruct-q6_K\", 2000), # 1\n",
126 | " (\"mixtral:8x7b-instruct-v0.1-q4_K_M\", 2000), # 2\n",
127 | " (\"neural-chat:7b-v3.3-q6_K\", 2000), # 3\n",
128 | " (\"orca2:13b-q5_K_S\", 2000), # 4\n",
129 | " (\"phi\", 1000), # 5\n",
130 | " (\"solar:10.7b-instruct-v1-q5_K_M\", 2000), # 6\n",
131 | "]\n",
132 | "\n",
133 | "# CHANGE THIS VALUE TO THE INDEX OF THE MODEL YOU WANT TO USE:\n",
134 | "model_index = 2\n",
135 | "\n",
136 | "# Then we load the values into our variables\n",
137 | "ollama_model_name, maximum_context_tokens = ollama_model_configs[model_index]\n",
138 | "\n",
139 | "print(f\"Ollama Model selected: {ollama_model_name} with maximum context tokens allowed set to {maximum_context_tokens}\")\n",
140 | "\n",
141 | "# Note: Can't run \"yi:34b-chat-q3_K_M\" or \"yi:34b-chat-q4_K_M\" - never stopped with inference"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 30,
147 | "metadata": {},
148 | "outputs": [],
149 | "source": [
150 | "# Our LangSmith API key is stored in apikeys.py\n",
151 | "# Store your LangSmith key in a variable called LangSmith_API\n",
152 | "\n",
153 | "from apikeys import LangSmith_API\n",
154 | "import os\n",
155 | "\n",
156 | "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
157 | "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
158 | "os.environ[\"LANGCHAIN_API_KEY\"] = LangSmith_API\n",
159 | "\n",
160 | "# Project Name\n",
161 | "os.environ[\"LANGCHAIN_PROJECT\"] = \"LangChain RAG Linux Chunking\""
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": 31,
167 | "metadata": {},
168 | "outputs": [],
169 | "source": [
170 | "# Load the LLM with Ollama, setting the temperature low so it's not too creative\n",
171 | "\n",
172 | "from langchain_community.llms import Ollama\n",
173 | "llm = Ollama(model=ollama_model_name,\n",
174 | " callback_manager=callback_manager,\n",
175 | " temperature=0.1)"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 32,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "# Quick test of the LLM with a general question before we start doing RAG\n",
185 | "# llm.invoke(\"why is the sky blue?\")\n",
186 | "\n",
187 | "# Note: This line would not complete for Yi-34B - need to work out why inferencing never finishes (works fine when running with the same prompt in ollama.)"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 33,
193 | "metadata": {},
194 | "outputs": [],
195 | "source": [
196 | "# Embeddings will be based on the Ollama loaded model\n",
197 | "\n",
198 | "from langchain_community.embeddings import OllamaEmbeddings\n",
199 | "\n",
200 | "embeddings = OllamaEmbeddings(model=ollama_model_name)"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": 34,
206 | "metadata": {},
207 | "outputs": [],
208 | "source": [
209 | "from langchain_community.document_loaders import DirectoryLoader\n",
210 | "\n",
211 | "loader = DirectoryLoader('Data', glob=\"**/*.docx\")"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 35,
217 | "metadata": {},
218 | "outputs": [],
219 | "source": [
220 | "# Load documents\n",
221 | "\n",
222 | "docs = loader.load()"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 36,
228 | "metadata": {},
229 | "outputs": [
230 | {
231 | "data": {
232 | "text/plain": [
233 | "[Document(page_content='Thundertooth\\n\\nOne fateful day, as the citizens of the futuristic city went about their daily lives, a collective gasp echoed through the streets as a massive meteor hurtled towards Earth. Panic spread like wildfire as people looked to the sky in horror, realizing the impending catastrophe. The city\\'s advanced technology detected the threat, and an emergency broadcast echoed through the streets, urging everyone to seek shelter.\\n\\nThundertooth, ever the protector of his newfound home, wasted no time. With a determined gleam in his eyes, he gathered his family and hurried to the city\\'s command center, where Mayor Grace and the leading scientists were coordinating the evacuation efforts.\\n\\nThe mayor, recognizing Thundertooth\\'s intelligence and resourcefulness, approached him. \"Thundertooth, we need a plan to divert or neutralize the meteor. Our technology can only do so much, but with your unique abilities, perhaps we can find a solution.\"\\n\\nThundertooth nodded, understanding the gravity of the situation. He gathered Lumina, Echo, Sapphire, and Ignis, explaining the urgency and the role each of them would play in the impending crisis.\\n\\n1. **Lumina**: Utilizing her deep understanding of technology, Lumina would enhance the city\\'s energy systems to generate a powerful force field, providing a protective barrier against the meteor\\'s impact.\\n\\n2. **Echo**: With his extraordinary mimicry abilities, Echo would amplify the emergency signals, ensuring that every citizen received timely warnings and instructions for evacuation.\\n\\n3. **Sapphire**: Harnessing her calming and healing powers, Sapphire would assist in calming the panicked masses, ensuring an orderly and efficient evacuation.\\n\\n4. **Ignis**: Drawing upon his fiery talents, Ignis would create controlled bursts of heat, attempting to alter the meteor\\'s trajectory and reduce its destructive force.\\n\\nAs the citizens evacuated to designated shelters, the Thundertooth family sprang into action. Lumina worked tirelessly to strengthen the city\\'s energy systems, Echo echoed evacuation orders through the city\\'s speakers, Sapphire offered comfort to those in distress, and Ignis unleashed controlled bursts of flames towards the approaching meteor.\\n\\nThundertooth stood at the forefront, using his mighty roar to coordinate and inspire the efforts of the city\\'s inhabitants. The ground trembled as the meteor drew closer, but the Thundertooth family\\'s coordinated efforts began to take effect. Lumina\\'s force field shimmered to life, deflecting the meteor\\'s deadly path. Echo\\'s amplified warnings reached every corner of the city, ensuring that no one was left behind.\\n\\nAs Ignis\\'s controlled bursts of flames interacted with the meteor, it began to change course. The combined efforts of the Thundertooth family, guided by their unique talents, diverted the catastrophic collision. The meteor, once destined for destruction, now harmlessly sailed past the Earth, leaving the city and its inhabitants unscathed.\\n\\nThe citizens, emerging from their shelters, erupted into cheers of gratitude. Mayor Grace approached Thundertooth, expressing her heartfelt thanks for the family\\'s heroic efforts. The Thundertooth family, tired but triumphant, basked in the relief of having saved their beloved city from imminent disaster.\\n\\nIn the wake of the crisis, the citizens of the futuristic city hailed Thundertooth and his family as true heroes. The toy factory that once brought joy to children now became a symbol of resilience and unity. The Thundertooth family\\'s legacy was forever etched in the city\\'s history, a testament to the power of cooperation and the extraordinary capabilities that could emerge when dinosaurs and humans worked together for the greater good.', metadata={'source': 'Data/Thundertooth Part 3.docx'}),\n",
234 | " Document(page_content=\"Thundertooth\\n\\nEmbraced by the futuristic city and its inhabitants, Thundertooth found a sense of purpose beyond merely satisfying his hunger. Inspired by the advanced technology surrounding him, he decided to channel his creativity into something extraordinary. With the help of the city's brilliant engineers, Thundertooth founded a one-of-a-kind toy factory that produced amazing widgets – magical, interactive toys that captivated the hearts of both children and adults alike.\\n\\nThundertooth's toy factory became a sensation, and its creations were highly sought after. The widgets incorporated cutting-edge holographic displays, levitation technology, and even the ability to change shapes and colors with a mere thought. Children across the city rejoiced as they played with these incredible toys that seemed to bring their wildest fantasies to life.\\n\\nAs the years passed, Thundertooth's life took a heartwarming turn. He met a kind and intelligent dinosaur named Seraphina, and together they started a family. Thundertooth and Seraphina were blessed with four children, each with unique characteristics that mirrored the diversity of their modern world.\\n\\nLumina: The eldest of Thundertooth's children, Lumina inherited her mother's intelligence and her father's sense of wonder. With sparkling scales that emitted a soft glow, Lumina had the ability to generate light at will. She became fascinated with technology, often spending hours tinkering with gadgets and inventing new ways to enhance the widgets produced in the family's factory.\\n\\nEcho: The second-born, Echo, had a gift for mimicry. He could perfectly replicate any sound or voice he heard, providing entertainment to the entire city. His playful nature and ability to bring joy to those around him made him a favorite among the neighborhood children.\\n\\nSapphire: Sapphire, the third sibling, had scales that shimmered like precious gems. She possessed a unique talent for calming and healing, a trait she inherited from both her parents. Whenever someone in the city felt stressed or unwell, Sapphire would extend her gentle touch, bringing comfort and tranquility.\\n\\nIgnis: The youngest of the family, Ignis, had fiery red scales that hinted at his exceptional ability – the power to control small flames. While initially seen as a potential hazard, Ignis channeled his fiery talents into creating mesmerizing light shows, becoming the city's favorite entertainer during festivals and celebrations.\\n\\n\\n\\nThundertooth and Seraphina reveled in the joy of parenthood, watching their children grow and flourish in the futuristic landscape they now called home. The family became an integral part of the city's fabric, not only through the widgets produced in their factory but also through the positive impact each member had on the community.\\n\\nThe toy factory became a symbol of innovation and unity, bringing together dinosaurs and humans in a shared appreciation for creativity and wonder. Thundertooth's legacy extended beyond his time-traveling adventure, leaving an indelible mark on the city and its inhabitants, reminding them that sometimes, the most magical things could emerge from the most unexpected places.\", metadata={'source': 'Data/Thundertooth Part 2.docx'}),\n",
235 | " Document(page_content='Thundertooth\\n\\nOnce upon a time, in a prehistoric land filled with dense forests and roaring rivers, there lived a dinosaur named Thundertooth. Thundertooth was no ordinary dinosaur; he possessed the rare ability to speak, a talent that set him apart from his ancient companions. One fateful day, as Thundertooth was basking in the warmth of the sun, a mysterious portal opened before him, and he found himself hurtling through time and space.\\n\\nAs the dazzling vortex subsided, Thundertooth opened his eyes to a world unlike anything he had ever seen. The air was filled with the hum of engines, and towering structures reached towards the sky. Thundertooth\\'s surroundings were a blend of metal and glass, and he quickly realized that he had been transported to a future era.\\n\\nThe once mighty dinosaur now stood bewildered in the midst of a bustling city. Above him, sleek flying cars zipped through the air, leaving trails of neon lights in their wake. Thundertooth felt like an ancient relic in this technological jungle, lost and out of place. With each step, he marveled at the skyscrapers that loomed overhead, their surfaces reflecting the myriad lights of the city.\\n\\nHowever, as night fell, Thundertooth\\'s stomach growled loudly. He realized that he was hungry, and the once vibrant city now seemed like a daunting maze of unfamiliar smells and sights. He wandered through the streets, his massive form drawing astonished stares from the futuristic inhabitants.\\n\\nThundertooth faced a dilemma – he was hungry, but he couldn\\'t bring himself to feast on the humans who scurried around like ants. As his hunger grew, he stumbled upon a park, an oasis of greenery amidst the concrete and steel. The park was adorned with holographic flowers that emitted a gentle glow, creating an ethereal atmosphere.\\n\\nWhile Thundertooth marveled at the beauty of the park, the mayor of the city happened to be passing by. Mayor Eleanor Grace, a charismatic and forward-thinking leader, was immediately intrigued by the sight of the talking dinosaur. She approached Thundertooth with a mix of curiosity and caution.\\n\\n\"Hello there, majestic creature. What brings you to our time?\" Mayor Grace inquired, her voice calm and reassuring.\\n\\nThundertooth, though initially startled, found comfort in the mayor\\'s soothing tone. In broken sentences, he explained his journey through time, the strange portal, and his hunger dilemma. \\n\\n\\n\\nMayor Grace listened intently, her eyes widening with amazement at the tale of the prehistoric dinosaur navigating the future.\\n\\nRealizing the dinosaur\\'s predicament, Mayor Grace extended an invitation. \"You are welcome in our city, Thundertooth. We can find a way to provide for you without causing harm to anyone. Let us work together to find a solution.\"\\n\\nGrateful for the mayor\\'s hospitality, Thundertooth followed her through the city. Together, they explored the futuristic marketplaces and innovative food labs, eventually discovering a sustainable solution that satisfied the dinosaur\\'s hunger without compromising the well-being of the city\\'s inhabitants.\\n\\nAs the news of Thundertooth\\'s arrival spread, the city embraced the talking dinosaur as a symbol of unity between the past and the future. Thundertooth found a new home in the city\\'s park, where holographic flowers bloomed, and the citizens marveled at the beauty of coexistence across time. And so, in this extraordinary city of flying cars and advanced technology, Thundertooth became a beloved figure, a living bridge between eras, teaching the people that understanding and cooperation could overcome even the greatest challenges.', metadata={'source': 'Data/Thundertooth Part 1.docx'}),\n",
236 | " Document(page_content=\"Thundertooth\\n\\nAs the city celebrated the Thundertooth family's heroic actions, there was one among them harboring a darker agenda. Ignis, the youngest sibling with fiery scales, had secretly grown resentful of the city that once hailed his family as heroes. The praise and adoration showered upon his siblings had ignited a spark of jealousy within him, fueling a desire for power and recognition that twisted his once-playful nature into something more sinister.\\n\\nIgnis withdrew from the family, isolating himself in the shadows. Unbeknownst to his siblings and parents, he delved into forbidden knowledge, seeking ways to amplify his fiery abilities. Ignis became obsessed with the idea of asserting dominance over the city that had once applauded his family's feats.\\n\\nAs the Thundertooth family enjoyed the renewed peace and admiration of the citizens, Ignis hatched a malevolent plan. He began manipulating the city's energy systems, intending to unleash a destructive force that would bring chaos and devastation. Ignis's once-playful flames now burned with a sinister intensity, reflecting the darkness that had taken root within him.\\n\\nLumina, Echo, and Sapphire grew concerned as they noticed Ignis's increasingly erratic behavior. They attempted to reason with him, pleading for him to abandon his destructive ambitions and embrace the family's legacy of unity. However, Ignis, consumed by his thirst for power, rejected their pleas and retreated further into the shadows.\\n\\nOne ominous night, Ignis initiated his nefarious plan. He unleashed a torrent of uncontrollable flames upon the city, wreaking havoc on its once-gleaming streets. The citizens, who had once looked to the Thundertooth family as saviors, now found themselves facing a new and terrifying threat from one of their own.\\n\\nThe Thundertooth siblings, realizing the danger their brother posed, sprang into action. Lumina fortified the city's defenses, Echo rallied the citizens to safety, and Sapphire extended her calming touch to soothe the panic that ensued. The once-united family now found themselves on opposite sides of a conflict that threatened to tear apart the very fabric of their existence.\\n\\nThe city plunged into chaos as Ignis continued his rampage, determined to prove himself as the dominant force within the family. The Thundertooth siblings, fueled by love for their home and a desire to save the innocent, confronted Ignis in an epic battle that shook the city to its core.\\n\\nLumina's brilliant displays of light clashed with Ignis's fiery onslaught, creating a dazzling spectacle that painted the night sky. Echo's mimicry abilities were put to the test as he attempted to redirect the citizens away from danger, while Sapphire's healing touch worked tirelessly to mend the wounds caused by the destructive flames.\\n\\nThe battle raged on, each sibling fighting not only to protect the city but also to save Ignis from the darkness that had consumed him. The once-close bond that defined the Thundertooth family now hung in the balance, teetering on the edge of destruction.\\n\\nAs the clash reached its climax, the Thundertooth siblings, exhausted but resolute, managed to combine their unique talents in a final, desperate attempt to reach Ignis. In a blinding burst of light and energy, they enveloped their wayward brother, hoping to break the sinister hold that gripped him.\\n\\nIn that moment of intense unity, the darkness within Ignis faltered. The fiery tempest subsided, and he collapsed, weakened and defeated. The Thundertooth family, battered but intact, gathered around Ignis, determined to help him overcome the darkness that had threatened to consume him.\\n\\nAs the city slowly recovered from the chaos, the Thundertooth family faced the daunting task of rebuilding not only the physical damage but also the fractured bonds of trust within their own ranks. The once-prodigal son, Ignis, now humbled by the consequences of his actions, sought redemption and reconciliation with his family.\\n\\nThe Thundertooth family, having weathered the storm of internal strife, emerged stronger and more united than ever. The city, though scarred, witnessed the resilience of the family that had once saved it from disaster and now worked together to mend the wounds inflicted by one of their own. The tale of the Thundertooth family, once a story of heroism, now became a saga of redemption and the enduring power of familial bonds.\", metadata={'source': 'Data/Thundertooth Part 4.docx'})]"
237 | ]
238 | },
239 | "execution_count": 36,
240 | "metadata": {},
241 | "output_type": "execute_result"
242 | }
243 | ],
244 | "source": [
245 | "docs"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": 37,
251 | "metadata": {},
252 | "outputs": [
253 | {
254 | "data": {
255 | "text/plain": [
256 | "4"
257 | ]
258 | },
259 | "execution_count": 37,
260 | "metadata": {},
261 | "output_type": "execute_result"
262 | }
263 | ],
264 | "source": [
265 | "# Ensure we have the right number of Word documents loaded\n",
266 | "\n",
267 | "len(docs)"
268 | ]
269 | },
270 | {
271 | "cell_type": "markdown",
272 | "metadata": {},
273 | "source": [
274 | "We create a function to split text into paragraphs but keep numbered sections, bullet points, and lists together. This is suitable for the document because it has numbered and bulleted points - this would need to be changed to suit the document."
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": 38,
280 | "metadata": {},
281 | "outputs": [],
282 | "source": [
283 | "import re\n",
284 | "\n",
285 | "# Define the regular expression pattern for splitting paragraphs\n",
286 | "para_split_pattern = re.compile(r'\\n\\n')\n",
287 | "\n",
288 | "# Splits a document's text into paragraphs but if it has numbered or bulleted points, they will be included with the paragraph before it.\n",
289 | "def split_text_into_paragraphs(text):\n",
290 | "\n",
291 | "\n",
292 | " # Use the pattern to split the text into paragraphs\n",
293 | " paragraphs = para_split_pattern.split(text)\n",
294 | "\n",
295 | " # Combine paragraphs that should not be split\n",
296 | " combined_paragraphs = [paragraphs[0]]\n",
297 | "\n",
298 | " for p in paragraphs[1:]:\n",
299 | " # Check if the paragraph starts with a number or a dash and, if so, concatenate it to the previous paragraph so we keep them all in one chunk\n",
300 | "\n",
301 | " # Strip out any leading new lines\n",
302 | " p = p.lstrip('\\n')\n",
303 | "\n",
304 | " if p and (p[0].isdigit() or p[0] == '-' or p.split()[0].endswith(':')):\n",
305 | " combined_paragraphs[-1] += '\\n\\n\\n' + p\n",
306 | " else:\n",
307 | " combined_paragraphs.append(p)\n",
308 | "\n",
309 | " # Remove empty strings from the result\n",
310 | " combined_paragraphs = [p.strip() for p in combined_paragraphs if p.strip()]\n",
311 | "\n",
312 | " return combined_paragraphs"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "Create nodes from the paragraphs that we've carefully split up, counting the paragraphs so we know what kind of token length we're working with.\n",
320 | "\n",
321 | "We can use the LLM object to count the tokens with get_num_tokens."
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 39,
327 | "metadata": {},
328 | "outputs": [
329 | {
330 | "name": "stdout",
331 | "output_type": "stream",
332 | "text": [
333 | "Document Data/Thundertooth Part 3.docx has 10 paragraphs\n",
334 | "Paragraph tokens: 3\n",
335 | "Paragraph tokens: 75\n",
336 | "Paragraph tokens: 51\n",
337 | "Paragraph tokens: 54\n",
338 | "Paragraph tokens: 193\n",
339 | "Paragraph tokens: 60\n",
340 | "Paragraph tokens: 86\n",
341 | "Paragraph tokens: 65\n",
342 | "Paragraph tokens: 57\n",
343 | "Paragraph tokens: 83\n",
344 | "Document Data/Thundertooth Part 2.docx has 6 paragraphs\n",
345 | "Paragraph tokens: 3\n",
346 | "Paragraph tokens: 88\n",
347 | "Paragraph tokens: 70\n",
348 | "Paragraph tokens: 327\n",
349 | "Paragraph tokens: 64\n",
350 | "Paragraph tokens: 69\n",
351 | "Document Data/Thundertooth Part 1.docx has 13 paragraphs\n",
352 | "Paragraph tokens: 3\n",
353 | "Paragraph tokens: 89\n",
354 | "Paragraph tokens: 68\n",
355 | "Paragraph tokens: 83\n",
356 | "Paragraph tokens: 56\n",
357 | "Paragraph tokens: 73\n",
358 | "Paragraph tokens: 60\n",
359 | "Paragraph tokens: 24\n",
360 | "Paragraph tokens: 37\n",
361 | "Paragraph tokens: 23\n",
362 | "Paragraph tokens: 49\n",
363 | "Paragraph tokens: 54\n",
364 | "Paragraph tokens: 105\n",
365 | "Document Data/Thundertooth Part 4.docx has 14 paragraphs\n",
366 | "Paragraph tokens: 3\n",
367 | "Paragraph tokens: 89\n",
368 | "Paragraph tokens: 61\n",
369 | "Paragraph tokens: 71\n",
370 | "Paragraph tokens: 66\n",
371 | "Paragraph tokens: 67\n",
372 | "Paragraph tokens: 72\n",
373 | "Paragraph tokens: 59\n",
374 | "Paragraph tokens: 64\n",
375 | "Paragraph tokens: 55\n",
376 | "Paragraph tokens: 62\n",
377 | "Paragraph tokens: 59\n",
378 | "Paragraph tokens: 66\n",
379 | "Paragraph tokens: 87\n",
380 | "\n",
381 | "** The maximum paragraph tokens is 327 **\n",
382 | "\n",
383 | "** The average paragraph's token count is 68 **\n",
384 | "\n",
385 | "** Created 43 nodes **\n"
386 | ]
387 | }
388 | ],
389 | "source": [
390 | "from langchain.docstore.document import Document\n",
391 | "\n",
392 | "paragraph_separator = \"\\n\\n\\n\"\n",
393 | "\n",
394 | "# Stores the maximum length of a paragraph, in tokens\n",
395 | "max_paragraph_tokens = 0\n",
396 | "\n",
397 | "# Total tokens, used to determine average\n",
398 | "total_paragraph_tokens = 0\n",
399 | "\n",
400 | "# Nodes\n",
401 | "paragraph_nodes = []\n",
402 | "\n",
403 | "# Loop through the documents, splitting each into paragraphs and checking the number of tokens per paragraph\n",
404 | "for document in docs:\n",
405 | "\n",
406 | " paragraph_token_lens = []\n",
407 | " paragraphs = split_text_into_paragraphs(document.page_content)\n",
408 | " print(f\"Document {document.metadata['source']} has {len(paragraphs)} paragraphs\")\n",
409 | " for paragraph in paragraphs:\n",
410 | "\n",
411 | " # Count the tokens in this paragraph\n",
412 | " token_count = llm.get_num_tokens(paragraph)\n",
413 | " paragraph_token_lens.append(token_count)\n",
414 | " print(f\"Paragraph tokens: {token_count}\")\n",
415 | "\n",
416 | " if token_count > max_paragraph_tokens:\n",
417 | " max_paragraph_tokens = token_count\n",
418 | "\n",
419 | " total_paragraph_tokens = total_paragraph_tokens + token_count\n",
420 | "\n",
421 | " # Create and add the node from the paragraph\n",
422 | " # include metadata we can use for citations\n",
423 | " node = Document(page_content=paragraph) # Copy the metadata from the Word document into here\n",
424 | " node.metadata[\"source\"] = document.metadata[\"source\"]\n",
425 | " node.metadata[\"token_count\"] = token_count\n",
426 | " paragraph_nodes.append(node)\n",
427 | "\n",
428 | " # print(paragraph_token_lens)\n",
429 | "\n",
430 | "print(f\"\\n** The maximum paragraph tokens is {max_paragraph_tokens} **\")\n",
431 | "\n",
432 | "average_paragraph_tokens = int(total_paragraph_tokens / len(paragraph_nodes))\n",
433 | "print(f\"\\n** The average paragraph's token count is {average_paragraph_tokens} **\")\n",
434 | "\n",
435 | "print(f\"\\n** Created {len(paragraph_nodes)} nodes **\")\n"
436 | ]
437 | },
438 | {
439 | "cell_type": "markdown",
440 | "metadata": {},
441 | "source": [
442 | "Let's see the split data - now neatly in paragraphs and the bullet points and lists are with their respective paragraph."
443 | ]
444 | },
445 | {
446 | "cell_type": "code",
447 | "execution_count": 40,
448 | "metadata": {},
449 | "outputs": [
450 | {
451 | "data": {
452 | "text/plain": [
453 | "[Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 3}),\n",
454 | " Document(page_content=\"One fateful day, as the citizens of the futuristic city went about their daily lives, a collective gasp echoed through the streets as a massive meteor hurtled towards Earth. Panic spread like wildfire as people looked to the sky in horror, realizing the impending catastrophe. The city's advanced technology detected the threat, and an emergency broadcast echoed through the streets, urging everyone to seek shelter.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 75}),\n",
455 | " Document(page_content=\"Thundertooth, ever the protector of his newfound home, wasted no time. With a determined gleam in his eyes, he gathered his family and hurried to the city's command center, where Mayor Grace and the leading scientists were coordinating the evacuation efforts.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 51}),\n",
456 | " Document(page_content='The mayor, recognizing Thundertooth\\'s intelligence and resourcefulness, approached him. \"Thundertooth, we need a plan to divert or neutralize the meteor. Our technology can only do so much, but with your unique abilities, perhaps we can find a solution.\"', metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 54}),\n",
457 | " Document(page_content=\"Thundertooth nodded, understanding the gravity of the situation. He gathered Lumina, Echo, Sapphire, and Ignis, explaining the urgency and the role each of them would play in the impending crisis.\\n\\n\\n1. **Lumina**: Utilizing her deep understanding of technology, Lumina would enhance the city's energy systems to generate a powerful force field, providing a protective barrier against the meteor's impact.\\n\\n\\n2. **Echo**: With his extraordinary mimicry abilities, Echo would amplify the emergency signals, ensuring that every citizen received timely warnings and instructions for evacuation.\\n\\n\\n3. **Sapphire**: Harnessing her calming and healing powers, Sapphire would assist in calming the panicked masses, ensuring an orderly and efficient evacuation.\\n\\n\\n4. **Ignis**: Drawing upon his fiery talents, Ignis would create controlled bursts of heat, attempting to alter the meteor's trajectory and reduce its destructive force.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 193}),\n",
458 | " Document(page_content=\"As the citizens evacuated to designated shelters, the Thundertooth family sprang into action. Lumina worked tirelessly to strengthen the city's energy systems, Echo echoed evacuation orders through the city's speakers, Sapphire offered comfort to those in distress, and Ignis unleashed controlled bursts of flames towards the approaching meteor.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 60}),\n",
459 | " Document(page_content=\"Thundertooth stood at the forefront, using his mighty roar to coordinate and inspire the efforts of the city's inhabitants. The ground trembled as the meteor drew closer, but the Thundertooth family's coordinated efforts began to take effect. Lumina's force field shimmered to life, deflecting the meteor's deadly path. Echo's amplified warnings reached every corner of the city, ensuring that no one was left behind.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 86}),\n",
460 | " Document(page_content=\"As Ignis's controlled bursts of flames interacted with the meteor, it began to change course. The combined efforts of the Thundertooth family, guided by their unique talents, diverted the catastrophic collision. The meteor, once destined for destruction, now harmlessly sailed past the Earth, leaving the city and its inhabitants unscathed.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 65}),\n",
461 | " Document(page_content=\"The citizens, emerging from their shelters, erupted into cheers of gratitude. Mayor Grace approached Thundertooth, expressing her heartfelt thanks for the family's heroic efforts. The Thundertooth family, tired but triumphant, basked in the relief of having saved their beloved city from imminent disaster.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 57}),\n",
462 | " Document(page_content=\"In the wake of the crisis, the citizens of the futuristic city hailed Thundertooth and his family as true heroes. The toy factory that once brought joy to children now became a symbol of resilience and unity. The Thundertooth family's legacy was forever etched in the city's history, a testament to the power of cooperation and the extraordinary capabilities that could emerge when dinosaurs and humans worked together for the greater good.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 83}),\n",
463 | " Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 3}),\n",
464 | " Document(page_content=\"Embraced by the futuristic city and its inhabitants, Thundertooth found a sense of purpose beyond merely satisfying his hunger. Inspired by the advanced technology surrounding him, he decided to channel his creativity into something extraordinary. With the help of the city's brilliant engineers, Thundertooth founded a one-of-a-kind toy factory that produced amazing widgets – magical, interactive toys that captivated the hearts of both children and adults alike.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 88}),\n",
465 | " Document(page_content=\"Thundertooth's toy factory became a sensation, and its creations were highly sought after. The widgets incorporated cutting-edge holographic displays, levitation technology, and even the ability to change shapes and colors with a mere thought. Children across the city rejoiced as they played with these incredible toys that seemed to bring their wildest fantasies to life.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 70}),\n",
466 | " Document(page_content=\"As the years passed, Thundertooth's life took a heartwarming turn. He met a kind and intelligent dinosaur named Seraphina, and together they started a family. Thundertooth and Seraphina were blessed with four children, each with unique characteristics that mirrored the diversity of their modern world.\\n\\n\\nLumina: The eldest of Thundertooth's children, Lumina inherited her mother's intelligence and her father's sense of wonder. With sparkling scales that emitted a soft glow, Lumina had the ability to generate light at will. She became fascinated with technology, often spending hours tinkering with gadgets and inventing new ways to enhance the widgets produced in the family's factory.\\n\\n\\nEcho: The second-born, Echo, had a gift for mimicry. He could perfectly replicate any sound or voice he heard, providing entertainment to the entire city. His playful nature and ability to bring joy to those around him made him a favorite among the neighborhood children.\\n\\n\\nSapphire: Sapphire, the third sibling, had scales that shimmered like precious gems. She possessed a unique talent for calming and healing, a trait she inherited from both her parents. Whenever someone in the city felt stressed or unwell, Sapphire would extend her gentle touch, bringing comfort and tranquility.\\n\\n\\nIgnis: The youngest of the family, Ignis, had fiery red scales that hinted at his exceptional ability – the power to control small flames. While initially seen as a potential hazard, Ignis channeled his fiery talents into creating mesmerizing light shows, becoming the city's favorite entertainer during festivals and celebrations.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 327}),\n",
467 | " Document(page_content=\"Thundertooth and Seraphina reveled in the joy of parenthood, watching their children grow and flourish in the futuristic landscape they now called home. The family became an integral part of the city's fabric, not only through the widgets produced in their factory but also through the positive impact each member had on the community.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 64}),\n",
468 | " Document(page_content=\"The toy factory became a symbol of innovation and unity, bringing together dinosaurs and humans in a shared appreciation for creativity and wonder. Thundertooth's legacy extended beyond his time-traveling adventure, leaving an indelible mark on the city and its inhabitants, reminding them that sometimes, the most magical things could emerge from the most unexpected places.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 69}),\n",
469 | " Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 3}),\n",
470 | " Document(page_content='Once upon a time, in a prehistoric land filled with dense forests and roaring rivers, there lived a dinosaur named Thundertooth. Thundertooth was no ordinary dinosaur; he possessed the rare ability to speak, a talent that set him apart from his ancient companions. One fateful day, as Thundertooth was basking in the warmth of the sun, a mysterious portal opened before him, and he found himself hurtling through time and space.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 89}),\n",
471 | " Document(page_content=\"As the dazzling vortex subsided, Thundertooth opened his eyes to a world unlike anything he had ever seen. The air was filled with the hum of engines, and towering structures reached towards the sky. Thundertooth's surroundings were a blend of metal and glass, and he quickly realized that he had been transported to a future era.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 68}),\n",
472 | " Document(page_content='The once mighty dinosaur now stood bewildered in the midst of a bustling city. Above him, sleek flying cars zipped through the air, leaving trails of neon lights in their wake. Thundertooth felt like an ancient relic in this technological jungle, lost and out of place. With each step, he marveled at the skyscrapers that loomed overhead, their surfaces reflecting the myriad lights of the city.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 83}),\n",
473 | " Document(page_content=\"However, as night fell, Thundertooth's stomach growled loudly. He realized that he was hungry, and the once vibrant city now seemed like a daunting maze of unfamiliar smells and sights. He wandered through the streets, his massive form drawing astonished stares from the futuristic inhabitants.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 56}),\n",
474 | " Document(page_content=\"Thundertooth faced a dilemma – he was hungry, but he couldn't bring himself to feast on the humans who scurried around like ants. As his hunger grew, he stumbled upon a park, an oasis of greenery amidst the concrete and steel. The park was adorned with holographic flowers that emitted a gentle glow, creating an ethereal atmosphere.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 73}),\n",
475 | " Document(page_content='While Thundertooth marveled at the beauty of the park, the mayor of the city happened to be passing by. Mayor Eleanor Grace, a charismatic and forward-thinking leader, was immediately intrigued by the sight of the talking dinosaur. She approached Thundertooth with a mix of curiosity and caution.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 60}),\n",
476 | " Document(page_content='\"Hello there, majestic creature. What brings you to our time?\" Mayor Grace inquired, her voice calm and reassuring.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 24}),\n",
477 | " Document(page_content=\"Thundertooth, though initially startled, found comfort in the mayor's soothing tone. In broken sentences, he explained his journey through time, the strange portal, and his hunger dilemma.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 37}),\n",
478 | " Document(page_content='Mayor Grace listened intently, her eyes widening with amazement at the tale of the prehistoric dinosaur navigating the future.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 23}),\n",
479 | " Document(page_content='Realizing the dinosaur\\'s predicament, Mayor Grace extended an invitation. \"You are welcome in our city, Thundertooth. We can find a way to provide for you without causing harm to anyone. Let us work together to find a solution.\"', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 49}),\n",
480 | " Document(page_content=\"Grateful for the mayor's hospitality, Thundertooth followed her through the city. Together, they explored the futuristic marketplaces and innovative food labs, eventually discovering a sustainable solution that satisfied the dinosaur's hunger without compromising the well-being of the city's inhabitants.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 54}),\n",
481 | " Document(page_content=\"As the news of Thundertooth's arrival spread, the city embraced the talking dinosaur as a symbol of unity between the past and the future. Thundertooth found a new home in the city's park, where holographic flowers bloomed, and the citizens marveled at the beauty of coexistence across time. And so, in this extraordinary city of flying cars and advanced technology, Thundertooth became a beloved figure, a living bridge between eras, teaching the people that understanding and cooperation could overcome even the greatest challenges.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 105}),\n",
482 | " Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 3}),\n",
483 | " Document(page_content=\"As the city celebrated the Thundertooth family's heroic actions, there was one among them harboring a darker agenda. Ignis, the youngest sibling with fiery scales, had secretly grown resentful of the city that once hailed his family as heroes. The praise and adoration showered upon his siblings had ignited a spark of jealousy within him, fueling a desire for power and recognition that twisted his once-playful nature into something more sinister.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 89}),\n",
484 | " Document(page_content=\"Ignis withdrew from the family, isolating himself in the shadows. Unbeknownst to his siblings and parents, he delved into forbidden knowledge, seeking ways to amplify his fiery abilities. Ignis became obsessed with the idea of asserting dominance over the city that had once applauded his family's feats.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 61}),\n",
485 | " Document(page_content=\"As the Thundertooth family enjoyed the renewed peace and admiration of the citizens, Ignis hatched a malevolent plan. He began manipulating the city's energy systems, intending to unleash a destructive force that would bring chaos and devastation. Ignis's once-playful flames now burned with a sinister intensity, reflecting the darkness that had taken root within him.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 71}),\n",
486 | " Document(page_content=\"Lumina, Echo, and Sapphire grew concerned as they noticed Ignis's increasingly erratic behavior. They attempted to reason with him, pleading for him to abandon his destructive ambitions and embrace the family's legacy of unity. However, Ignis, consumed by his thirst for power, rejected their pleas and retreated further into the shadows.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 66}),\n",
487 | " Document(page_content='One ominous night, Ignis initiated his nefarious plan. He unleashed a torrent of uncontrollable flames upon the city, wreaking havoc on its once-gleaming streets. The citizens, who had once looked to the Thundertooth family as saviors, now found themselves facing a new and terrifying threat from one of their own.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 67}),\n",
488 | " Document(page_content=\"The Thundertooth siblings, realizing the danger their brother posed, sprang into action. Lumina fortified the city's defenses, Echo rallied the citizens to safety, and Sapphire extended her calming touch to soothe the panic that ensued. The once-united family now found themselves on opposite sides of a conflict that threatened to tear apart the very fabric of their existence.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 72}),\n",
489 | " Document(page_content='The city plunged into chaos as Ignis continued his rampage, determined to prove himself as the dominant force within the family. The Thundertooth siblings, fueled by love for their home and a desire to save the innocent, confronted Ignis in an epic battle that shook the city to its core.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 59}),\n",
490 | " Document(page_content=\"Lumina's brilliant displays of light clashed with Ignis's fiery onslaught, creating a dazzling spectacle that painted the night sky. Echo's mimicry abilities were put to the test as he attempted to redirect the citizens away from danger, while Sapphire's healing touch worked tirelessly to mend the wounds caused by the destructive flames.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 64}),\n",
491 | " Document(page_content='The battle raged on, each sibling fighting not only to protect the city but also to save Ignis from the darkness that had consumed him. The once-close bond that defined the Thundertooth family now hung in the balance, teetering on the edge of destruction.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 55}),\n",
492 | " Document(page_content='As the clash reached its climax, the Thundertooth siblings, exhausted but resolute, managed to combine their unique talents in a final, desperate attempt to reach Ignis. In a blinding burst of light and energy, they enveloped their wayward brother, hoping to break the sinister hold that gripped him.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 62}),\n",
493 | " Document(page_content='In that moment of intense unity, the darkness within Ignis faltered. The fiery tempest subsided, and he collapsed, weakened and defeated. The Thundertooth family, battered but intact, gathered around Ignis, determined to help him overcome the darkness that had threatened to consume him.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 59}),\n",
494 | " Document(page_content='As the city slowly recovered from the chaos, the Thundertooth family faced the daunting task of rebuilding not only the physical damage but also the fractured bonds of trust within their own ranks. The once-prodigal son, Ignis, now humbled by the consequences of his actions, sought redemption and reconciliation with his family.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 66}),\n",
495 | " Document(page_content='The Thundertooth family, having weathered the storm of internal strife, emerged stronger and more united than ever. The city, though scarred, witnessed the resilience of the family that had once saved it from disaster and now worked together to mend the wounds inflicted by one of their own. The tale of the Thundertooth family, once a story of heroism, now became a saga of redemption and the enduring power of familial bonds.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 87})]"
496 | ]
497 | },
498 | "execution_count": 40,
499 | "metadata": {},
500 | "output_type": "execute_result"
501 | }
502 | ],
503 | "source": [
504 | "paragraph_nodes"
505 | ]
506 | },
507 | {
508 | "cell_type": "markdown",
509 | "metadata": {},
510 | "source": [
511 | "We no longer need to use the LangChain text splitter as we've already done the splitting"
512 | ]
513 | },
514 | {
515 | "cell_type": "code",
516 | "execution_count": 41,
517 | "metadata": {},
518 | "outputs": [],
519 | "source": [
520 | "# Split them up into chunks using a Text Splitter\n",
521 | "\n",
522 | "# from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
523 | "\n",
524 | "# text_splitter = RecursiveCharacterTextSplitter()\n",
525 | "# documents = text_splitter.split_documents(docs)"
526 | ]
527 | },
528 | {
529 | "cell_type": "code",
530 | "execution_count": 42,
531 | "metadata": {},
532 | "outputs": [],
533 | "source": [
534 | "# Create the embeddings from our split up chunks\n",
535 | "\n",
536 | "from langchain_community.vectorstores import FAISS\n",
537 | "\n",
538 | "vector = FAISS.from_documents(paragraph_nodes, embeddings)"
539 | ]
540 | },
541 | {
542 | "cell_type": "markdown",
543 | "metadata": {},
544 | "source": [
545 | "In preparing the prompt, we add direction to include citations so that the LLM is instructed to include the sources in its response (hopefully!)."
546 | ]
547 | },
548 | {
549 | "cell_type": "code",
550 | "execution_count": 43,
551 | "metadata": {},
552 | "outputs": [],
553 | "source": [
554 | "# Prepare the prompt and then the chain\n",
555 | "\n",
556 | "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
557 | "from langchain_core.prompts import ChatPromptTemplate, PromptTemplate\n",
558 | "\n",
559 | "if ollama_model_name == \"phi\" or ollama_model_name == \"phi:chat\":\n",
560 | " # Phi-2 prompt is less flexible\n",
561 | " prompt_template = \"\"\"Instruct: With this context\\n\\n{context}\\n\\nQuestion: (Include citations) {question}\\nOutput:\"\"\"\n",
562 | "\n",
563 | "else:\n",
564 | " # prompt_template = \"\"\"You are a story teller , answering questions in an excited, insightful, and empathetic way. Answer the question based only on the provided context:\n",
565 | " prompt_template = \"\"\"You are a story teller writing in the style of Agatha Christie. Answer the question only with the provided context. YOU MUST INCLUDE THE SOURCES.\n",
566 | "\n",
567 | " \n",
568 | " {context}\n",
569 | " \n",
570 | "\n",
571 | " Question: {question}\"\"\"\n",
572 | "\n",
573 | "prompt = PromptTemplate(\n",
574 | " template=prompt_template, \n",
575 | " input_variables=[\n",
576 | " 'context', \n",
577 | " 'question',\n",
578 | " ]\n",
579 | ")"
580 | ]
581 | },
582 | {
583 | "cell_type": "markdown",
584 | "metadata": {},
585 | "source": [
586 | "Now that we have broken down the documents into paragraph-sized chunks we need to retrieve more paragraphs so the LLM has a decent amount of context to use. Without adding the \"search_kwargs\" parameter the answer to the questions was worse. For example, when asked if they had any children no relevant context was provided.\n",
587 | "\n",
588 | "Note: To be able to get the context for the children's names to be included (and then reranked to the top) I needed to set the number of retrieved chunks to 20. The section with the children's names was the 11th result from the retriever! This indicates that retrieving more than you think you need is likely."
589 | ]
590 | },
591 | {
592 | "cell_type": "code",
593 | "execution_count": 44,
594 | "metadata": {},
595 | "outputs": [],
596 | "source": [
597 | "# Create the retriever and set it to return a good amount of chunks\n",
598 | "\n",
599 | "from langchain.chains import create_retrieval_chain\n",
600 | "\n",
601 | "# We use a variable to store the number of results as we'll use the same amount for the reranking (as we'll manually remove some)\n",
602 | "retrieval_chunks = 20\n",
603 | "\n",
604 | "retriever = vector.as_retriever(search_kwargs={\"k\": retrieval_chunks})"
605 | ]
606 | },
607 | {
608 | "cell_type": "markdown",
609 | "metadata": {},
610 | "source": [
611 | "Let's implement the Cohere reranking, utilising our retriever (which is getting more results to work with now) and our LLM\n",
612 | "\n",
613 | "Note: You'll need a Cohere API key. A trial key is free for non-commercial purposes. I've stored it in apikey.py as Cohere_API = \"your key in here\"\n",
614 | "\n",
615 | "https://cohere.com/"
616 | ]
617 | },
618 | {
619 | "cell_type": "code",
620 | "execution_count": 45,
621 | "metadata": {},
622 | "outputs": [],
623 | "source": [
624 | "from langchain.retrievers import ContextualCompressionRetriever\n",
625 | "from langchain.retrievers.document_compressors import CohereRerank\n",
626 | "\n",
627 | "from apikeys import Cohere_API\n",
628 | "\n",
629 | "# Create the retriever\n",
630 | "# Here we retrieve the same number of chunks and we'll have a relevance score so we can cut out the lowest ranking ones\n",
631 | "# that fit into our target maximum context tokens \n",
632 | "compressor = CohereRerank(cohere_api_key=Cohere_API, top_n=retrieval_chunks)\n",
633 | "compression_retriever = ContextualCompressionRetriever(\n",
634 | " base_compressor=compressor,\n",
635 | " base_retriever=retriever,\n",
636 | ")"
637 | ]
638 | },
639 | {
640 | "cell_type": "code",
641 | "execution_count": 46,
642 | "metadata": {},
643 | "outputs": [
644 | {
645 | "data": {
646 | "text/plain": [
647 | "[Document(page_content=\"Lumina, Echo, and Sapphire grew concerned as they noticed Ignis's increasingly erratic behavior. They attempted to reason with him, pleading for him to abandon his destructive ambitions and embrace the family's legacy of unity. However, Ignis, consumed by his thirst for power, rejected their pleas and retreated further into the shadows.\", metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 66, 'relevance_score': 0.66551924}),\n",
648 | " Document(page_content=\"Thundertooth and Seraphina reveled in the joy of parenthood, watching their children grow and flourish in the futuristic landscape they now called home. The family became an integral part of the city's fabric, not only through the widgets produced in their factory but also through the positive impact each member had on the community.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 64, 'relevance_score': 0.5951397}),\n",
649 | " Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 3, 'relevance_score': 0.5865229}),\n",
650 | " Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 3, 'relevance_score': 0.5865229}),\n",
651 | " Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 3, 'relevance_score': 0.5865229}),\n",
652 | " Document(page_content='Thundertooth', metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 3, 'relevance_score': 0.5865229}),\n",
653 | " Document(page_content=\"In the wake of the crisis, the citizens of the futuristic city hailed Thundertooth and his family as true heroes. The toy factory that once brought joy to children now became a symbol of resilience and unity. The Thundertooth family's legacy was forever etched in the city's history, a testament to the power of cooperation and the extraordinary capabilities that could emerge when dinosaurs and humans worked together for the greater good.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 83, 'relevance_score': 0.55727315}),\n",
654 | " Document(page_content=\"Thundertooth, though initially startled, found comfort in the mayor's soothing tone. In broken sentences, he explained his journey through time, the strange portal, and his hunger dilemma.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 37, 'relevance_score': 0.5530527}),\n",
655 | " Document(page_content='Mayor Grace listened intently, her eyes widening with amazement at the tale of the prehistoric dinosaur navigating the future.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 23, 'relevance_score': 0.50029564}),\n",
656 | " Document(page_content='Realizing the dinosaur\\'s predicament, Mayor Grace extended an invitation. \"You are welcome in our city, Thundertooth. We can find a way to provide for you without causing harm to anyone. Let us work together to find a solution.\"', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 49, 'relevance_score': 0.49328274}),\n",
657 | " Document(page_content=\"The citizens, emerging from their shelters, erupted into cheers of gratitude. Mayor Grace approached Thundertooth, expressing her heartfelt thanks for the family's heroic efforts. The Thundertooth family, tired but triumphant, basked in the relief of having saved their beloved city from imminent disaster.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 57, 'relevance_score': 0.4307459}),\n",
658 | " Document(page_content='The Thundertooth family, having weathered the storm of internal strife, emerged stronger and more united than ever. The city, though scarred, witnessed the resilience of the family that had once saved it from disaster and now worked together to mend the wounds inflicted by one of their own. The tale of the Thundertooth family, once a story of heroism, now became a saga of redemption and the enduring power of familial bonds.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 87, 'relevance_score': 0.41418782}),\n",
659 | " Document(page_content='The mayor, recognizing Thundertooth\\'s intelligence and resourcefulness, approached him. \"Thundertooth, we need a plan to divert or neutralize the meteor. Our technology can only do so much, but with your unique abilities, perhaps we can find a solution.\"', metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 54, 'relevance_score': 0.39957732}),\n",
660 | " Document(page_content=\"As Ignis's controlled bursts of flames interacted with the meteor, it began to change course. The combined efforts of the Thundertooth family, guided by their unique talents, diverted the catastrophic collision. The meteor, once destined for destruction, now harmlessly sailed past the Earth, leaving the city and its inhabitants unscathed.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 65, 'relevance_score': 0.3535398}),\n",
661 | " Document(page_content=\"As the citizens evacuated to designated shelters, the Thundertooth family sprang into action. Lumina worked tirelessly to strengthen the city's energy systems, Echo echoed evacuation orders through the city's speakers, Sapphire offered comfort to those in distress, and Ignis unleashed controlled bursts of flames towards the approaching meteor.\", metadata={'source': 'Data/Thundertooth Part 3.docx', 'token_count': 60, 'relevance_score': 0.3424616}),\n",
662 | " Document(page_content=\"As the news of Thundertooth's arrival spread, the city embraced the talking dinosaur as a symbol of unity between the past and the future. Thundertooth found a new home in the city's park, where holographic flowers bloomed, and the citizens marveled at the beauty of coexistence across time. And so, in this extraordinary city of flying cars and advanced technology, Thundertooth became a beloved figure, a living bridge between eras, teaching the people that understanding and cooperation could overcome even the greatest challenges.\", metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 105, 'relevance_score': 0.33111975}),\n",
663 | " Document(page_content='The battle raged on, each sibling fighting not only to protect the city but also to save Ignis from the darkness that had consumed him. The once-close bond that defined the Thundertooth family now hung in the balance, teetering on the edge of destruction.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 55, 'relevance_score': 0.30611202}),\n",
664 | " Document(page_content='\"Hello there, majestic creature. What brings you to our time?\" Mayor Grace inquired, her voice calm and reassuring.', metadata={'source': 'Data/Thundertooth Part 1.docx', 'token_count': 24, 'relevance_score': 0.28686798}),\n",
665 | " Document(page_content='As the clash reached its climax, the Thundertooth siblings, exhausted but resolute, managed to combine their unique talents in a final, desperate attempt to reach Ignis. In a blinding burst of light and energy, they enveloped their wayward brother, hoping to break the sinister hold that gripped him.', metadata={'source': 'Data/Thundertooth Part 4.docx', 'token_count': 62, 'relevance_score': 0.21816924}),\n",
666 | " Document(page_content=\"The toy factory became a symbol of innovation and unity, bringing together dinosaurs and humans in a shared appreciation for creativity and wonder. Thundertooth's legacy extended beyond his time-traveling adventure, leaving an indelible mark on the city and its inhabitants, reminding them that sometimes, the most magical things could emerge from the most unexpected places.\", metadata={'source': 'Data/Thundertooth Part 2.docx', 'token_count': 69, 'relevance_score': 0.21717145})]"
667 | ]
668 | },
669 | "execution_count": 46,
670 | "metadata": {},
671 | "output_type": "execute_result"
672 | }
673 | ],
674 | "source": [
675 | "# Let's test that it includes the paragraph starting with \"As the years passed...\" when asked about their children.\n",
676 | "\n",
677 | "test_retrieval = compression_retriever.get_relevant_documents(\"Did they have any children? If so, what were their names?\")\n",
678 | "\n",
679 | "test_retrieval"
680 | ]
681 | },
682 | {
683 | "cell_type": "markdown",
684 | "metadata": {},
685 | "source": [
686 | "The above shows that, indeed, we are able to get that paragraph and it is the highest ranked.\n",
687 | "\n",
688 | "Importantly, if we had not brought enough chunks back with the retriever (referring to the vector store retriever) then we would not have had the right chunks to run through Cohere for reranking.\n",
689 | "\n",
690 | "So if this line:\n",
691 | "```\n",
692 | "retriever = vector.as_retriever(search_kwargs={\"k\": 20})\n",
693 | "```\n",
694 | "\n",
695 | "was:\n",
696 | "```\n",
697 | "retriever = vector.as_retriever(search_kwargs={\"k\": 10})\n",
698 | "```\n",
699 | "\n",
700 | "We would not have been able to get that \"As the years passed...\" chunk for reranking.\n",
701 | "\n",
702 | "Additionally, we're able to compress the number of chunks from the 11+ we needed to get the right chunk down to 5 because we have the best 5 of that bunch. This reduces the tokens needed for the LLM to process."
703 | ]
704 | },
705 | {
706 | "cell_type": "markdown",
707 | "metadata": {},
708 | "source": [
709 | "Now, we create a LangChain chain with the Cohere reranker retriever.\n",
710 | "\n",
711 | "Except, we need to do a hack to be able to get the callback triggered after our documents are retrieved from cohere. We do it through a function that returns the chain (rather than creating a chain like we did previously in the following cell).\n",
712 | "\n",
713 | "See GitHub issue for more information:\n",
714 | "https://github.com/langchain-ai/langchain/issues/7290"
715 | ]
716 | },
717 | {
718 | "cell_type": "code",
719 | "execution_count": 47,
720 | "metadata": {},
721 | "outputs": [],
722 | "source": [
723 | "# from langchain.chains import RetrievalQA\n",
724 | "\n",
725 | "# rerank_chain = RetrievalQA.from_chain_type(\n",
726 | " # llm=llm,\n",
727 | " # retriever=compression_retriever,\n",
728 | " # return_source_documents=True,\n",
729 | " # chain_type_kwargs={\"prompt\": prompt} # Pass in our prompt\n",
730 | "# )"
731 | ]
732 | },
733 | {
734 | "cell_type": "code",
735 | "execution_count": 48,
736 | "metadata": {},
737 | "outputs": [],
738 | "source": [
739 | "from langchain.schema.runnable import RunnablePassthrough\n",
740 | "from uuid import uuid4\n",
741 | "\n",
742 | "def get_rerankchain(retriever, llm, callbacks):\n",
743 | "\n",
744 | " # Format the documents for our context\n",
745 | " # We're include the source and relevance, hoping the LLMs will use it\n",
746 | " # The prompt could be used to include examples of the citations\n",
747 | " def format_docs(docs):\n",
748 | " formatted_text = []\n",
749 | "\n",
750 | " for doc in docs:\n",
751 | " source = doc.metadata.get(\"source\", \"Unknown Source\")\n",
752 | " relevance_score = int(doc.metadata.get(\"relevance_score\", 0.0) * 100)\n",
753 | " page_content = doc.page_content\n",
754 | "\n",
755 | " formatted_doc = (\n",
756 | " f\"[Source '{source}', Relevance {relevance_score}]\\n\"\n",
757 | " f\"{page_content}\\n\"\n",
758 | " )\n",
759 | " formatted_text.append(formatted_doc)\n",
760 | "\n",
761 | " result = \"\\n\".join(formatted_text)\n",
762 | " return result\n",
763 | "\n",
764 | " def hack_inject_callback(docs):\n",
765 | " # https://github.com/langchain-ai/langchain/issues/7290\n",
766 | " for callback in callbacks:\n",
767 | " callback.on_retriever_end(docs, run_id=uuid4())\n",
768 | "\n",
769 | " return docs\n",
770 | "\n",
771 | " return (\n",
772 | " {\"context\": retriever | hack_inject_callback | format_docs, \"question\": RunnablePassthrough()}\n",
773 | " | prompt\n",
774 | " | llm\n",
775 | " )"
776 | ]
777 | },
778 | {
779 | "cell_type": "markdown",
780 | "metadata": {},
781 | "source": [
782 | "We create a callback handler to be called after we've retrieved all the documents with their relevance score.\n",
783 | "\n",
784 | "With these, we count the tokens of each and remove any that exceed our maximum context token length.\n",
785 | "\n",
786 | "THis maximises the number of documents we send to the LLM while keeping within our context length bounds."
787 | ]
788 | },
789 | {
790 | "cell_type": "code",
791 | "execution_count": 49,
792 | "metadata": {},
793 | "outputs": [],
794 | "source": [
795 | "class RetrievalHandler(BaseCallbackHandler):\n",
796 | "\n",
797 | " def on_retriever_end(self, documents, **kwargs):\n",
798 | " total_tokens = 0\n",
799 | " context_tokens_used = 0\n",
800 | "\n",
801 | " # Documents we'll keep as they fit within our target token count\n",
802 | " documents_to_keep = []\n",
803 | "\n",
804 | " for idx, doc in enumerate(documents):\n",
805 | " total_tokens += doc.metadata[\"token_count\"]\n",
806 | "\n",
807 | " if total_tokens <= maximum_context_tokens:\n",
808 | " # Good to keep\n",
809 | " context_tokens_used += doc.metadata[\"token_count\"]\n",
810 | " documents_to_keep.append(doc)\n",
811 | "\n",
812 | " print(f\"[ON RETRIEVER END - FINISH - total tokens {total_tokens} across {len(documents)} documents. Kept {context_tokens_used} across {len(documents_to_keep)}]\")\n",
813 | "\n",
814 | " # Modify the contents of the original 'documents' list which we'll then format for insertion into the context within the prompt\n",
815 | " documents.clear()\n",
816 | " documents.extend(documents_to_keep)\n"
817 | ]
818 | },
819 | {
820 | "cell_type": "code",
821 | "execution_count": 50,
822 | "metadata": {},
823 | "outputs": [],
824 | "source": [
825 | "# Here are our test questions\n",
826 | "\n",
827 | "TestQuestions = [\n",
828 | " \"Summarise the story for me.\",\n",
829 | " \"Who was the main protagonist?\",\n",
830 | " \"Did they have any children? If so, what were their names?\",\n",
831 | " \"Did anything eventful happen?\",\n",
832 | " \"Who are the main characters?\",\n",
833 | " \"What do you think happens next in the story?\"\n",
834 | "]"
835 | ]
836 | },
837 | {
838 | "cell_type": "markdown",
839 | "metadata": {},
840 | "source": [
841 | "Ask our questions with our reranking chain.\n",
842 | "\n",
843 | "Here we reiterate that we want to include citations."
844 | ]
845 | },
846 | {
847 | "cell_type": "code",
848 | "execution_count": 53,
849 | "metadata": {},
850 | "outputs": [
851 | {
852 | "name": "stdout",
853 | "output_type": "stream",
854 | "text": [
855 | "\n",
856 | "1/6: Summarise the story for me.\n",
857 | "[ON RETRIEVER END - FINISH - total tokens 965 across 20 documents. Kept 965 across 20]\n",
858 | " In a futuristic city of advanced technology and flying cars, Thundertooth, a talking dinosaur from the past, arrives through a time portal. The mayor, Grace, invites him to stay in the city, where he becomes a symbol of unity between the past and the future. Thundertooth and his family, including Lumina, Echo, Sapphire, and Ignis, live in harmony with humans, becoming beloved figures and an integral part of the community. They also establish a toy factory that brings dinosaurs and humans together in shared creativity and innovation.\n",
859 | "\n",
860 | "When a meteor threatens the city, Thundertooth's family uses their unique abilities to divert it from its path, saving the city and its inhabitants. However, Ignis becomes consumed by darkness and goes on a rampage, causing chaos in the city. The other siblings confront Ignis in an epic battle, determined to save their brother and the city. In the end, they manage to reach Ignis, breaking the sinister hold on him and restoring peace to the city.\n",
861 | "\n",
862 | "The Thundertooth family's heroism and resilience become etched in the city's history, reminding everyone of the power of cooperation and understanding in overcoming even the greatest challenges.\n",
863 | "\n",
864 | " ----\n",
865 | "\n",
866 | "[ PROMPT TOKEN COUNT 1711 | RESPONSE TOKEN COUNT 272 ]\n",
867 | "\n",
868 | "2/6: Who was the main protagonist?\n",
869 | "[ON RETRIEVER END - FINISH - total tokens 994 across 20 documents. Kept 994 across 20]\n",
870 | " The main protagonist in this story is Thundertooth, an intelligent and resourceful dinosaur who saves the city from an imminent disaster by diverting a meteor with his family's unique abilities. Thundertooth becomes a beloved figure and a symbol of unity between eras in the city where he finds a new home.\n",
871 | "\n",
872 | " ----\n",
873 | "\n",
874 | "[ PROMPT TOKEN COUNT 1746 | RESPONSE TOKEN COUNT 72 ]\n",
875 | "\n",
876 | "3/6: Did they have any children? If so, what were their names?\n",
877 | "[ON RETRIEVER END - FINISH - total tokens 972 across 20 documents. Kept 972 across 20]\n",
878 | " Yes, the source 'Data/Thundertooth Part 2.docx' with relevance 59 mentions that Thundertooth and Seraphina reveled in the joy of parenthood, watching their children grow and flourish in the futuristic landscape they now called home. However, the names of their children are not explicitly mentioned in the provided context.\n",
879 | "\n",
880 | " ----\n",
881 | "\n",
882 | "[ PROMPT TOKEN COUNT 1731 | RESPONSE TOKEN COUNT 80 ]\n",
883 | "\n",
884 | "4/6: Did anything eventful happen?\n",
885 | "[ON RETRIEVER END - FINISH - total tokens 1015 across 20 documents. Kept 1015 across 20]\n",
886 | " Indeed, quite a number of eventful things have occurred in this context. To summarize:\n",
887 | "\n",
888 | "1. Thundertooth, an intelligent dinosaur from prehistoric times, found himself in a futuristic city after passing through a strange portal (Data/Thundertooth Part 1.docx).\n",
889 | "2. The mayor of the city, Grace, recognized Thundertooth's abilities and approached him for help to neutralize an incoming meteor (Data/Thundertooth Part 3.docx).\n",
890 | "3. Thundertooth founded a toy factory that produced magical widgets, which were highly sought after by children across the city (Data/Thundertooth Part 2.docx).\n",
891 | "4. The citizens of the city hailed Thundertooth and his family as heroes for saving them from the meteor crisis (Data/Thundertooth Part 3.docx).\n",
892 | "5. Thundertooth's children, Lumina, Echo, Sapphire, and Ignis, had unique talents that contributed to the family's legacy of unity and creativity (Data/Thundertooth Part 2, 4.docx).\n",
893 | "6. However, Ignis developed erratic behavior due to his thirst for power, causing internal strife within the Thundertooth family (Data/Thundertooth Part 4.docx).\n",
894 | "7. In a final attempt, the Thundertooth siblings managed to envelop Ignis in a blinding burst of light and energy, hoping to break the sinister hold that gripped him (Data/Thundertoth Part 4.docx).\n",
895 | "\n",
896 | " ----\n",
897 | "\n",
898 | "[ PROMPT TOKEN COUNT 1760 | RESPONSE TOKEN COUNT 354 ]\n",
899 | "\n",
900 | "5/6: Who are the main characters?\n",
901 | "[ON RETRIEVER END - FINISH - total tokens 994 across 20 documents. Kept 994 across 20]\n",
902 | " The main characters in this story are Thundertooth, a prehistoric dinosaur with intelligence and resourcefulness, and Mayor Grace, the leader of a futuristic city. Thundertooth's family consists of his children, Lumina, Echo, Sapphire, and Ignis, who each possess unique abilities. The Thundertooth family becomes an integral part of the city, contributing to its fabric through their widget factory and positive impact on the community. Later, the family must face a crisis when Ignis's behavior becomes erratic, leading to internal strife and a battle that shakes the city to its core. Ultimately, the Thundertooth family emerges stronger and more united than ever, becoming symbols of resilience, unity, and redemption for the citizens of the city.\n",
903 | "\n",
904 | " ----\n",
905 | "\n",
906 | "[ PROMPT TOKEN COUNT 1745 | RESPONSE TOKEN COUNT 174 ]\n",
907 | "\n",
908 | "6/6: What do you think happens next in the story?\n",
909 | "[ON RETRIEVER END - FINISH - total tokens 984 across 20 documents. Kept 984 across 20]\n",
910 | " With the meteor successfully diverted and the city saved, the Thundertooth family turns its attention to their wayward brother, Ignis. Realizing the danger he poses, the siblings unite once more, combining their unique talents in a desperate attempt to reach him. In a blinding burst of light and energy, they envelop Ignis, hoping to break the sinister hold that has consumed him.\n",
911 | "\n",
912 | "As the darkness fades, Ignis emerges, shocked and remorseful for his actions. The family, exhausted but relieved, reunites, stronger and more united than ever. Together, they work to mend the wounds inflicted by the conflict, leaving a lasting impact on the city and its inhabitants.\n",
913 | "\n",
914 | "In time, Thundertooth's legacy extends beyond his time-traveling adventure, reminding everyone that sometimes, the most magical things can emerge from the most unexpected places. The tale of the Thundertooth family becomes a saga of redemption and the enduring power of familial bonds, teaching the people that understanding, cooperation, and resilience can overcome even the greatest challenges.\n",
915 | "\n",
916 | " ----\n",
917 | "\n",
918 | "[ PROMPT TOKEN COUNT 1736 | RESPONSE TOKEN COUNT 238 ]\n"
919 | ]
920 | }
921 | ],
922 | "source": [
923 | "# Our Question and Answers for display\n",
924 | "qa_pairs = []\n",
925 | "\n",
926 | "# our Retrieval Call Back Handler - so we can intercept the results of the retriever and adjust how many documents are passed through to the LLM\n",
927 | "retrieval_callback_handler = RetrievalHandler()\n",
928 | "\n",
929 | "for index, question in enumerate(TestQuestions, start=1):\n",
930 | " question = question.strip() # Clean up\n",
931 | "\n",
932 | " print(f\"\\n{index}/{len(TestQuestions)}: {question}\")\n",
933 | "\n",
934 | " rerank_chain = get_rerankchain(retriever=compression_retriever, llm=llm, callbacks=[retrieval_callback_handler])\n",
935 | "\n",
936 | " # Ask the Retriever and then the LLM the question\n",
937 | " response = rerank_chain.invoke(question)\n",
938 | "\n",
939 | " # Keep track of question, answer, prompt tokens, and repsonse tokens\n",
940 | " qa_pairs.append((question.strip(), response.strip(), llmresult_prompt_token_count, llmresult_response_token_count)) # Add to our output array\n",
941 | "\n",
942 | " # Uncomment the following line if you want to test just the first question\n",
943 | " # break "
944 | ]
945 | },
946 | {
947 | "cell_type": "code",
948 | "execution_count": 54,
949 | "metadata": {},
950 | "outputs": [
951 | {
952 | "name": "stdout",
953 | "output_type": "stream",
954 | "text": [
955 | "1/6 Summarise the story for me.\n",
956 | "\n",
957 | "[Prompt Tokens: 1711, Response Tokens: 272]\n",
958 | "\n",
959 | "In a futuristic city of advanced technology and flying cars, Thundertooth, a talking dinosaur from the past, arrives through a time portal. The mayor, Grace, invites him to stay in the city, where he becomes a symbol of unity between the past and the future. Thundertooth and his family, including Lumina, Echo, Sapphire, and Ignis, live in harmony with humans, becoming beloved figures and an integral part of the community. They also establish a toy factory that brings dinosaurs and humans together in shared creativity and innovation.\n",
960 | "\n",
961 | "When a meteor threatens the city, Thundertooth's family uses their unique abilities to divert it from its path, saving the city and its inhabitants. However, Ignis becomes consumed by darkness and goes on a rampage, causing chaos in the city. The other siblings confront Ignis in an epic battle, determined to save their brother and the city. In the end, they manage to reach Ignis, breaking the sinister hold on him and restoring peace to the city.\n",
962 | "\n",
963 | "The Thundertooth family's heroism and resilience become etched in the city's history, reminding everyone of the power of cooperation and understanding in overcoming even the greatest challenges.\n",
964 | "\n",
965 | "--------\n",
966 | "\n",
967 | "2/6 Who was the main protagonist?\n",
968 | "\n",
969 | "[Prompt Tokens: 1746, Response Tokens: 72]\n",
970 | "\n",
971 | "The main protagonist in this story is Thundertooth, an intelligent and resourceful dinosaur who saves the city from an imminent disaster by diverting a meteor with his family's unique abilities. Thundertooth becomes a beloved figure and a symbol of unity between eras in the city where he finds a new home.\n",
972 | "\n",
973 | "--------\n",
974 | "\n",
975 | "3/6 Did they have any children? If so, what were their names?\n",
976 | "\n",
977 | "[Prompt Tokens: 1731, Response Tokens: 80]\n",
978 | "\n",
979 | "Yes, the source 'Data/Thundertooth Part 2.docx' with relevance 59 mentions that Thundertooth and Seraphina reveled in the joy of parenthood, watching their children grow and flourish in the futuristic landscape they now called home. However, the names of their children are not explicitly mentioned in the provided context.\n",
980 | "\n",
981 | "--------\n",
982 | "\n",
983 | "4/6 Did anything eventful happen?\n",
984 | "\n",
985 | "[Prompt Tokens: 1760, Response Tokens: 354]\n",
986 | "\n",
987 | "Indeed, quite a number of eventful things have occurred in this context. To summarize:\n",
988 | "\n",
989 | "1. Thundertooth, an intelligent dinosaur from prehistoric times, found himself in a futuristic city after passing through a strange portal (Data/Thundertooth Part 1.docx).\n",
990 | "2. The mayor of the city, Grace, recognized Thundertooth's abilities and approached him for help to neutralize an incoming meteor (Data/Thundertooth Part 3.docx).\n",
991 | "3. Thundertooth founded a toy factory that produced magical widgets, which were highly sought after by children across the city (Data/Thundertooth Part 2.docx).\n",
992 | "4. The citizens of the city hailed Thundertooth and his family as heroes for saving them from the meteor crisis (Data/Thundertooth Part 3.docx).\n",
993 | "5. Thundertooth's children, Lumina, Echo, Sapphire, and Ignis, had unique talents that contributed to the family's legacy of unity and creativity (Data/Thundertooth Part 2, 4.docx).\n",
994 | "6. However, Ignis developed erratic behavior due to his thirst for power, causing internal strife within the Thundertooth family (Data/Thundertooth Part 4.docx).\n",
995 | "7. In a final attempt, the Thundertooth siblings managed to envelop Ignis in a blinding burst of light and energy, hoping to break the sinister hold that gripped him (Data/Thundertoth Part 4.docx).\n",
996 | "\n",
997 | "--------\n",
998 | "\n",
999 | "5/6 Who are the main characters?\n",
1000 | "\n",
1001 | "[Prompt Tokens: 1745, Response Tokens: 174]\n",
1002 | "\n",
1003 | "The main characters in this story are Thundertooth, a prehistoric dinosaur with intelligence and resourcefulness, and Mayor Grace, the leader of a futuristic city. Thundertooth's family consists of his children, Lumina, Echo, Sapphire, and Ignis, who each possess unique abilities. The Thundertooth family becomes an integral part of the city, contributing to its fabric through their widget factory and positive impact on the community. Later, the family must face a crisis when Ignis's behavior becomes erratic, leading to internal strife and a battle that shakes the city to its core. Ultimately, the Thundertooth family emerges stronger and more united than ever, becoming symbols of resilience, unity, and redemption for the citizens of the city.\n",
1004 | "\n",
1005 | "--------\n",
1006 | "\n",
1007 | "6/6 What do you think happens next in the story?\n",
1008 | "\n",
1009 | "[Prompt Tokens: 1736, Response Tokens: 238]\n",
1010 | "\n",
1011 | "With the meteor successfully diverted and the city saved, the Thundertooth family turns its attention to their wayward brother, Ignis. Realizing the danger he poses, the siblings unite once more, combining their unique talents in a desperate attempt to reach him. In a blinding burst of light and energy, they envelop Ignis, hoping to break the sinister hold that has consumed him.\n",
1012 | "\n",
1013 | "As the darkness fades, Ignis emerges, shocked and remorseful for his actions. The family, exhausted but relieved, reunites, stronger and more united than ever. Together, they work to mend the wounds inflicted by the conflict, leaving a lasting impact on the city and its inhabitants.\n",
1014 | "\n",
1015 | "In time, Thundertooth's legacy extends beyond his time-traveling adventure, reminding everyone that sometimes, the most magical things can emerge from the most unexpected places. The tale of the Thundertooth family becomes a saga of redemption and the enduring power of familial bonds, teaching the people that understanding, cooperation, and resilience can overcome even the greatest challenges.\n",
1016 | "\n",
1017 | "--------\n",
1018 | "\n"
1019 | ]
1020 | }
1021 | ],
1022 | "source": [
1023 | "# Print out the questions and answers\n",
1024 | "\n",
1025 | "for index, (question, answer, prompttokens, responsetokens) in enumerate(qa_pairs, start=1):\n",
1026 | " print(f\"{index}/{len(qa_pairs)} {question}\\n\\n[Prompt Tokens: {prompttokens}, Response Tokens: {responsetokens}]\\n\\n{answer}\\n\\n--------\\n\")"
1027 | ]
1028 | },
1029 | {
1030 | "cell_type": "markdown",
1031 | "metadata": {},
1032 | "source": []
1033 | }
1034 | ],
1035 | "metadata": {
1036 | "kernelspec": {
1037 | "display_name": "LangChainRAGLinux",
1038 | "language": "python",
1039 | "name": "python3"
1040 | },
1041 | "language_info": {
1042 | "codemirror_mode": {
1043 | "name": "ipython",
1044 | "version": 3
1045 | },
1046 | "file_extension": ".py",
1047 | "mimetype": "text/x-python",
1048 | "name": "python",
1049 | "nbconvert_exporter": "python",
1050 | "pygments_lexer": "ipython3",
1051 | "version": "3.10.13"
1052 | }
1053 | },
1054 | "nbformat": 4,
1055 | "nbformat_minor": 2
1056 | }
1057 |
--------------------------------------------------------------------------------
/Data/Thundertooth Part 1.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marklysze/LangChain-RAG-Linux/bdc85ff77b2349f2d9c79df2e6e1ba46ffa972d7/Data/Thundertooth Part 1.docx
--------------------------------------------------------------------------------
/Data/Thundertooth Part 2.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marklysze/LangChain-RAG-Linux/bdc85ff77b2349f2d9c79df2e6e1ba46ffa972d7/Data/Thundertooth Part 2.docx
--------------------------------------------------------------------------------
/Data/Thundertooth Part 3.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marklysze/LangChain-RAG-Linux/bdc85ff77b2349f2d9c79df2e6e1ba46ffa972d7/Data/Thundertooth Part 3.docx
--------------------------------------------------------------------------------
/Data/Thundertooth Part 4.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marklysze/LangChain-RAG-Linux/bdc85ff77b2349f2d9c79df2e6e1ba46ffa972d7/Data/Thundertooth Part 4.docx
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RAG with LangChain - Nvidia CUDA + Linux + Word documents + Local LLM (Ollama)
2 |
3 | These notebooks demonstrate the use of LangChain for Retrieval Augmented Generation using Linux and Nvidia's CUDA.
4 |
5 | Note: Using [LangChain v0.1.1](https://blog.langchain.dev/langchain-v0-1-0/).
6 |
7 | Environment:
8 | - Linux (I'm running Ubuntu 22.04)
9 | - Conda environment (I'm using Miniconda)
10 | - CUDA (environment is setup for 12.2)
11 | - Visual Studio Code (to run the Jupyter Notebooks)
12 | - Nvidia RTX 3090
13 | - 64GB RAM (Can be run with less)
14 | - LLMs - Mistral 7B, Llama 2 13B Chat, Orca 2 13B, Yi 34B (Work in progress), Mixtral 8x7B, Neural 7B, Phi-2, SOLAR 10.7B - Quantized versions
15 |
16 | Your Data:
17 | - Add Word documents to the "Data" folder for the RAG to use
18 |
19 | Package versions:
20 | - See the "environment.yml" for the full list of versions in the conda environment (generated using "conda list").
21 |
22 | Local LLMs:
23 | - Ollama is run locally and you use the "ollama pull" command to pull down the models you want. For example, to pull down Mixtral 8x7B (4-bit quantized):
24 | ```
25 | ollama pull mixtral:8x7b-instruct-v0.1-q4_K_M
26 | ```
27 | - See the Ollama [models page](https://ollama.ai/library) for the list of models. Within each model, use the "Tags" tab to see the different versions available ([example](https://ollama.ai/library/mixtral/tags)).
28 |
29 | Note that [nvtop](https://github.com/Syllo/nvtop) is a useful tool to monitor realtime utilisation of your GPU. Helpful to make sure the models fit within GPU memory (and don't go into your RAM and use your CPU as well).
30 |
31 | # Notebooks
32 |
33 | ### 01-LangChain-RAG
34 | Get started with LangChain and Ollama, being able to use various local LLMs and Word Documents as sources for Retrieval Augmented Generation (RAG). Have it answer a few questions and see what they give you.
35 |
36 | ### 02-LangChain-RAG LangSmith
37 | To help with being able to see what is happening under the hood, sign up for a LangSmith Access Code and use this notebook to see how it is setup. Same functionality as the previous notebook.
38 |
39 | ### 03-LangChain-RAG Chunk Rerank
40 | Get started with breaking up the document yourself into better chunks and then using Cohere's reranking (free non-commercial API key available) to prioritise the chunks for your questions. I found the self-chunking and reranking improved the LLM responses significantly.
41 |
42 | ### 04-LangChain-RAG Chunk Rerank Max Context
43 | Continuing on from #03, we now want to maximise the amount of context given to the LLM. Previously this was a set number of chunks, now we keep track of the number of tokens per chunk and give the LLM the maximum number of chunks we can fit into a given token limit (which we set).
44 |
45 | We add callbacks (including a hack to workaround a bug) to count tokens and to manually choose the right amount of chunks to give the LLM after retrieval.
46 |
47 | This maximises the amount of context given to the LLM while keeping within a set context length so we don't exceed the LLM's context window.
48 |
49 | # Installation
50 |
51 | 1. Open a terminal
52 |
53 | 2. Navigate to the directory where you want to clone the repository
54 |
55 | 3. Clone the repository
56 | ```
57 | git clone https://github.com/marklysze/LangChain-RAG-Linux-CUDA
58 | ```
59 |
60 | 4. Navigate to the repository directory
61 | ```
62 | cd LangChain-RAG-Linux-CUDA
63 | ```
64 |
65 | 5. Create the Conda environment
66 | ```
67 | conda env create -f environment.yml
68 | ```
69 |
70 | 6. Activate the Conda environment
71 | ```
72 | conda activate LangChainRAGLinux
73 | ```
74 |
75 | 7. [Install Ollama](https://python.langchain.com/docs/integrations/llms/ollama) ([repository with instructions](https://github.com/jmorganca/ollama)) if you haven't already and pull the models you want to use. See above for sample command to pull models.
76 |
77 | 8. Run Visual Studio Code
78 | ```
79 | code .
80 | ```
81 |
82 | 9. Choose a a Jupyter Notebook file to open
83 |
84 | 10. On the top-right you may need to choose the newly created kernel. In the top-right if it says "Select Kernel", click it and choose your Python environment... and then "LangChainRAGLinux".
85 |
86 | 11. Run the Jupyter Notebook
87 |
88 | # Output examples
89 |
90 | Question asked of the model based on the story:
91 | > Summarise the story for me
92 |
93 | ---
94 | **Llama 2:**
95 | ```
96 | Sure! Here is a summary of the story:
97 |
98 | Thundertooth, a time-traveling dinosaur, founded a toy factory in a futuristic city where he and his family produced magical widgets that brought joy to children. When a meteor threatened the city's safety, Thundertooth and his family used their unique abilities to divert it, saving the city from destruction. The citizens hailed them as heroes, and the toy factory became a symbol of resilience and unity.
99 | ```
100 |
101 | ---
102 | **Mistral 7B:**
103 | ```
104 | The story follows Thundertooth, a talking dinosaur who travels through time to the future city of New Eden. There, he meets Mayor Grace and her team of scientists, who help him find food without harming humans. The citizens of New Eden embrace Thundertooth as a symbol of unity between eras, and he becomes a beloved figure in the city.
105 |
106 | One day, a massive meteor threatens to destroy the city. Thundertooth, along with his family Lumina, Echo, Sapphire, and Ignis, work together to divert the meteor's path using their unique talents. Their efforts are successful, and they save the city from destruction. The citizens hail them as heroes, and their legacy is forever etched in the city's history.
107 | ```
108 |
109 | ---
110 | **Mixtral 8X7B:**
111 | ```
112 | Thundertooth, a talking dinosaur from prehistoric times, finds himself in a futuristic city after passing through a strange portal. Initially overwhelmed by the unfamiliar sights and sounds, he struggles to find food without causing harm to the humans. The mayor of the city, Eleanor Grace, extends her hospitality and helps Thundertooth find a sustainable solution for his hunger.
113 |
114 | As the story progresses, a meteor threatens Earth, and Thundertooth and his family step up to save the city. Lumina enhances the city's energy systems, Echo amplifies evacuation orders, Sapphire calms the panicked masses, and Ignis alters the meteor's trajectory. Their combined efforts successfully divert the catastrophic collision, ensuring the safety of the citizens. The Thundertooth family is hailed as heroes, and their legacy becomes a symbol of resilience and unity in the city.
115 | ```
116 |
117 | ---
118 | **Neural Chat 7B:**
119 | ```
120 | In a futuristic city, Thundertooth, a talking dinosaur, becomes an integral part of the community after being transported through time. He and his family establish a toy factory that produces magical widgets loved by both children and adults. When a meteor threatens to destroy their city, Thundertooth's family works together to save it using their unique abilities. Their heroic efforts unite the citizens, cementing the Thundertooth family as beloved heroes in the city's history.
121 | ```
122 |
123 | ---
124 | **Orca 2:**
125 | ```
126 | Key points:
127 | - Thundertooth is a talking dinosaur who travels through time and ends up in a futuristic city
128 | - He meets Mayor Grace and the citizens, who help him find food without harming anyone
129 | - He starts a toy factory with his family that produces magical widgets
130 | - He saves the city from a meteor threat with the help of his family's unique talents
131 |
132 | Summary:
133 | Thundertooth is a dinosaur who can talk and travel through time. He arrives in a futuristic city where he meets Mayor Grace, who welcomes him and helps him find food that does not harm anyone. He starts a toy factory with his family that makes amazing widgets that delight the people. When a meteor threatens to destroy the city, Thundertooth and his family use their abilities to divert it and save the day. They become heroes and symbols of unity in the city.
134 | ```
135 | ---
136 | Phi-2:
137 | ```
138 | In this story, Thundertooth, a dinosaur who traveled back in time to the year 2050, discovers a futuristic city where he meets his human counterpart, Seraphina. Together, they have four children with unique abilities inherited from their parents. When a meteor threatens the city, Thundertooth and his family use their talents to protect the citizens and divert the meteor's path. Their heroic efforts save the city, making them heroes in the eyes of its inhabitants. The toy factory becomes a symbol of unity and resilience, showcasing the power of cooperation between dinosaurs and humans.
139 | ```
140 | ---
141 | **SOLAR 10.7B Instruct:**
142 | ```
143 | In a futuristic city, residents discover an incoming meteor threatening their existence. Thundertooth, a unique dinosaur with human-like communication abilities, lives in this city and becomes the central figure in averting the disaster. Recognizing his importance, Mayor Grace enlists Thundertooth and his family to devise a plan to divert or neutralize the meteor.
144 |
145 | Lumina, their eldest child, enhances the city's energy systems to create a protective force field. Echo, with his mimicry abilities, amplifies emergency signals for timely evacuation instructions. Sapphire, who possesses calming and healing powers, helps maintain order during the evacuation by comforting panicked citizens. Ignis, the youngest child, uses his fire-controlling talents to alter the meteor's trajectory and reduce its destructive force.
146 |
147 | Thundertooth leads his family and the city's inhabitants in their combined efforts to deflect the meteor's deadly path. Their coordinated actions successfully divert the catastrophic collision, saving the city from destruction. The citizens celebrate Thundertooth and his family as true heroes, forever etching their legacy into the city's history.
148 |
149 | Thundertooth, initially a stranger to this futuristic world, found a new home in the city after being transported through time. He embraced the advanced technology around him and founded a toy factory producing magical interactive widgets that captivated both children and adults. Thundertooth later married Seraphina, another intelligent dinosaur, and they had four unique children who inherited traits from their parents. Together, they became an integral part of the city's community, leaving a lasting impact through their family toy factory.
150 | ```
151 |
152 | ---
153 | **Yi 34B:**
154 |
155 | I could not get Yi-34B to complete inference, I tried lower quantized models but it still didn't complete. If anyone has Pi-34B working with LangChain and Ollama, help please!
156 |
157 | ---
158 | #### Notes
159 | - Getting the LLM to include citations with the sources is LLM dependent and heavily dependent on the prompt. It's important to get citations so I'll work on getting that for the #5 notebook.
160 |
161 | ---
162 |
163 | #### Interested in LlamaIndex with Linux?
164 | Check out this repository which shows RAG with LlamaIndex: [https://github.com/marklysze/LlamaIndex-RAG-Linux-CUDA](https://github.com/marklysze/LlamaIndex-RAG-Linux-CUDA)
165 |
166 | #### Using Microsoft Windows and interested in LlamaIndex?
167 | Check out the equivalent notebooks in this repository using LlamaIndex for RAG: [https://github.com/marklysze/LlamaIndex-RAG-WSL-CUDA](https://github.com/marklysze/LlamaIndex-RAG-WSL-CUDA)
168 |
169 |
170 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: LangChainRAGLinux
2 | channels:
3 | - pytorch
4 | - nvidia
5 | - conda-forge
6 | - defaults
7 | dependencies:
8 | - _libgcc_mutex=0.1=conda_forge
9 | - _openmp_mutex=4.5=2_gnu
10 | - abseil-cpp=20211102.0=hd4dd3e8_0
11 | - arrow-cpp=11.0.0=h374c478_2
12 | - asttokens=2.4.1=pyhd8ed1ab_0
13 | - async-timeout=4.0.3=py310h06a4308_0
14 | - aws-c-common=0.6.8=h5eee18b_1
15 | - aws-c-event-stream=0.1.6=h6a678d5_6
16 | - aws-checksums=0.1.11=h5eee18b_2
17 | - aws-sdk-cpp=1.8.185=h721c034_1
18 | - blas=1.0=mkl
19 | - boost-cpp=1.82.0=hdb19cb5_2
20 | - bottleneck=1.3.5=py310ha9d4c09_0
21 | - brotli-python=1.0.9=py310h6a678d5_7
22 | - bzip2=1.0.8=h7b6447c_0
23 | - c-ares=1.19.1=h5eee18b_0
24 | - ca-certificates=2023.12.12=h06a4308_0
25 | - certifi=2023.11.17=pyhd8ed1ab_0
26 | - cffi=1.16.0=py310h5eee18b_0
27 | - comm=0.2.1=pyhd8ed1ab_0
28 | - cryptography=41.0.7=py310hdda0065_0
29 | - cuda-cudart=12.1.105=0
30 | - cuda-cupti=12.1.105=0
31 | - cuda-libraries=12.1.0=0
32 | - cuda-nvrtc=12.1.105=0
33 | - cuda-nvtx=12.1.105=0
34 | - cuda-opencl=12.3.101=0
35 | - cuda-runtime=12.1.0=0
36 | - cudatoolkit=11.4.1=h8ab8bb3_9
37 | - datasets=2.12.0=py310h06a4308_0
38 | - debugpy=1.6.7=py310h6a678d5_0
39 | - decorator=5.1.1=pyhd8ed1ab_0
40 | - dill=0.3.6=py310h06a4308_0
41 | - exceptiongroup=1.2.0=pyhd8ed1ab_2
42 | - executing=2.0.1=pyhd8ed1ab_0
43 | - filelock=3.13.1=py310h06a4308_0
44 | - fsspec=2023.10.0=py310h06a4308_0
45 | - gflags=2.2.2=he6710b0_0
46 | - glog=0.5.0=h2531618_0
47 | - gmp=6.2.1=h295c915_3
48 | - gmpy2=2.1.2=py310heeb90bb_0
49 | - grpc-cpp=1.48.2=he1ff14a_1
50 | - huggingface_hub=0.17.3=py310h06a4308_0
51 | - icu=73.1=h6a678d5_0
52 | - importlib_metadata=7.0.1=hd8ed1ab_0
53 | - intel-openmp=2023.1.0=hdb19cb5_46306
54 | - ipykernel=6.28.0=pyhd33586a_0
55 | - ipython=8.20.0=pyh707e725_0
56 | - ipywidgets=8.1.1=pyhd8ed1ab_0
57 | - jedi=0.19.1=pyhd8ed1ab_0
58 | - jinja2=3.1.2=py310h06a4308_0
59 | - jupyter_client=8.6.0=pyhd8ed1ab_0
60 | - jupyter_core=5.7.1=py310hff52083_0
61 | - jupyterlab_widgets=3.0.9=pyhd8ed1ab_0
62 | - krb5=1.20.1=h143b758_1
63 | - ld_impl_linux-64=2.38=h1181459_1
64 | - libboost=1.82.0=h109eef0_2
65 | - libbrotlicommon=1.0.9=h5eee18b_7
66 | - libbrotlidec=1.0.9=h5eee18b_7
67 | - libbrotlienc=1.0.9=h5eee18b_7
68 | - libcublas=12.1.0.26=0
69 | - libcufft=11.0.2.4=0
70 | - libcufile=1.8.1.2=0
71 | - libcurand=10.3.4.107=0
72 | - libcurl=8.5.0=h251f7ec_0
73 | - libcusolver=11.4.4.55=0
74 | - libcusparse=12.0.2.55=0
75 | - libedit=3.1.20230828=h5eee18b_0
76 | - libev=4.33=h7f8727e_1
77 | - libevent=2.1.12=hdbd6064_1
78 | - libfaiss=1.7.4=h13c3c6d_0_cuda11.4
79 | - libffi=3.4.4=h6a678d5_0
80 | - libgcc-ng=13.2.0=h807b86a_3
81 | - libgomp=13.2.0=h807b86a_3
82 | - libnghttp2=1.57.0=h2d74bed_0
83 | - libnpp=12.0.2.50=0
84 | - libnvjitlink=12.1.105=0
85 | - libnvjpeg=12.1.1.14=0
86 | - libprotobuf=3.20.3=he621ea3_0
87 | - libsodium=1.0.18=h36c2ea0_1
88 | - libssh2=1.10.0=hdbd6064_2
89 | - libstdcxx-ng=11.2.0=h1234567_1
90 | - libthrift=0.15.0=h1795dd8_2
91 | - libuuid=1.41.5=h5eee18b_0
92 | - llvm-openmp=14.0.6=h9e868ea_0
93 | - lz4-c=1.9.4=h6a678d5_0
94 | - markupsafe=2.1.3=py310h5eee18b_0
95 | - matplotlib-inline=0.1.6=pyhd8ed1ab_0
96 | - mkl=2023.1.0=h213fc3f_46344
97 | - mkl-service=2.4.0=py310h5eee18b_1
98 | - mkl_fft=1.3.8=py310h5eee18b_0
99 | - mkl_random=1.2.4=py310hdb19cb5_0
100 | - mpc=1.1.0=h10f8cd9_1
101 | - mpfr=4.0.2=hb69a4c5_1
102 | - mpmath=1.3.0=py310h06a4308_0
103 | - multidict=6.0.4=py310h5eee18b_0
104 | - multiprocess=0.70.14=py310h06a4308_0
105 | - ncurses=6.4=h6a678d5_0
106 | - nest-asyncio=1.5.8=pyhd8ed1ab_0
107 | - networkx=3.1=py310h06a4308_0
108 | - numexpr=2.8.7=py310h85018f9_0
109 | - numpy=1.26.3=py310h5f9d8c6_0
110 | - numpy-base=1.26.3=py310hb5e798b_0
111 | - openssl=3.2.0=hd590300_1
112 | - orc=1.7.4=hb3bc3d3_1
113 | - packaging=23.2=pyhd8ed1ab_0
114 | - pandas=2.1.4=py310h1128e8f_0
115 | - parso=0.8.3=pyhd8ed1ab_0
116 | - pexpect=4.8.0=pyh1a96a4e_2
117 | - pickleshare=0.7.5=py_1003
118 | - pip=23.3.1=py310h06a4308_0
119 | - platformdirs=4.1.0=pyhd8ed1ab_0
120 | - prompt-toolkit=3.0.42=pyha770c72_0
121 | - psutil=5.9.7=py310h2372a71_0
122 | - ptyprocess=0.7.0=pyhd3deb0d_0
123 | - pure_eval=0.2.2=pyhd8ed1ab_0
124 | - pyarrow=11.0.0=py310h468efa6_1
125 | - pycparser=2.21=pyhd3eb1b0_0
126 | - pygments=2.17.2=pyhd8ed1ab_0
127 | - pyopenssl=23.2.0=py310h06a4308_0
128 | - pysocks=1.7.1=py310h06a4308_0
129 | - python=3.10.13=h955ad1f_0
130 | - python-dateutil=2.8.2=pyhd8ed1ab_0
131 | - python-tzdata=2023.3=pyhd3eb1b0_0
132 | - python-xxhash=2.0.2=py310h5eee18b_1
133 | - python_abi=3.10=2_cp310
134 | - pytorch=2.1.2=py3.10_cuda12.1_cudnn8.9.2_0
135 | - pytorch-cuda=12.1=ha16c6d3_5
136 | - pytorch-mutex=1.0=cuda
137 | - pytz=2023.3.post1=py310h06a4308_0
138 | - pyyaml=6.0.1=py310h5eee18b_0
139 | - pyzmq=25.1.0=py310h6a678d5_0
140 | - re2=2022.04.01=h295c915_0
141 | - readline=8.2=h5eee18b_0
142 | - requests=2.31.0=py310h06a4308_0
143 | - responses=0.13.3=pyhd3eb1b0_0
144 | - safetensors=0.4.0=py310ha89cbab_0
145 | - setuptools=68.2.2=py310h06a4308_0
146 | - six=1.16.0=pyh6c4a22f_0
147 | - snappy=1.1.10=h6a678d5_1
148 | - sqlite=3.41.2=h5eee18b_0
149 | - stack_data=0.6.2=pyhd8ed1ab_0
150 | - sympy=1.12=py310h06a4308_0
151 | - tbb=2021.8.0=hdb19cb5_0
152 | - tk=8.6.12=h1ccaba5_0
153 | - tokenizers=0.13.3=py310h22610ee_0
154 | - torchtriton=2.1.0=py310
155 | - tornado=6.3.3=py310h2372a71_1
156 | - traitlets=5.14.1=pyhd8ed1ab_0
157 | - transformers=4.32.1=py310h06a4308_0
158 | - typing-extensions=4.7.1=py310h06a4308_0
159 | - typing_extensions=4.7.1=py310h06a4308_0
160 | - tzdata=2023d=h04d1e81_0
161 | - utf8proc=2.6.1=h27cfd23_0
162 | - wcwidth=0.2.13=pyhd8ed1ab_0
163 | - wheel=0.41.2=py310h06a4308_0
164 | - widgetsnbextension=4.0.9=pyhd8ed1ab_0
165 | - xxhash=0.8.0=h7f8727e_3
166 | - xz=5.4.5=h5eee18b_0
167 | - yaml=0.2.5=h7b6447c_0
168 | - zeromq=4.3.4=h9c3ff4c_1
169 | - zipp=3.17.0=pyhd8ed1ab_0
170 | - zlib=1.2.13=h5eee18b_0
171 | - zstd=1.5.5=hc292b87_0
172 | - pip:
173 | - absl-py==2.0.0
174 | - aiohttp==3.9.1
175 | - aiosignal==1.3.1
176 | - annotated-types==0.6.0
177 | - anyio==4.2.0
178 | - astunparse==1.6.3
179 | - attrs==23.2.0
180 | - backoff==2.2.1
181 | - beautifulsoup4==4.12.2
182 | - cachetools==5.3.2
183 | - chardet==5.2.0
184 | - charset-normalizer==3.3.2
185 | - click==8.1.7
186 | - cohere==4.43
187 | - cohere-core==4.0.0
188 | - dataclasses-json==0.6.3
189 | - emoji==2.9.0
190 | - faiss-gpu==1.7.2
191 | - fastavro==1.9.3
192 | - filetype==1.2.0
193 | - flatbuffers==23.5.26
194 | - frozenlist==1.4.1
195 | - gast==0.5.4
196 | - google-auth==2.26.2
197 | - google-auth-oauthlib==1.2.0
198 | - google-pasta==0.2.0
199 | - greenlet==3.0.3
200 | - grpcio==1.60.0
201 | - h5py==3.10.0
202 | - idna==3.6
203 | - importlib-metadata==6.11.0
204 | - joblib==1.3.2
205 | - jsonpatch==1.33
206 | - jsonpath-python==1.0.6
207 | - jsonpointer==2.4
208 | - keras==2.15.0
209 | - langchain==0.1.1
210 | - langchain-community==0.0.13
211 | - langchain-core==0.1.11
212 | - langdetect==1.0.9
213 | - langsmith==0.0.81
214 | - libclang==16.0.6
215 | - lxml==5.1.0
216 | - markdown==3.5.2
217 | - marshmallow==3.20.2
218 | - ml-dtypes==0.2.0
219 | - mypy-extensions==1.0.0
220 | - nltk==3.8.1
221 | - oauthlib==3.2.2
222 | - opt-einsum==3.3.0
223 | - protobuf==4.23.4
224 | - pyasn1==0.5.1
225 | - pyasn1-modules==0.3.0
226 | - pydantic==2.5.3
227 | - pydantic-core==2.14.6
228 | - python-docx==1.1.0
229 | - python-iso639==2024.1.2
230 | - python-magic==0.4.27
231 | - rapidfuzz==3.6.1
232 | - regex==2023.12.25
233 | - requests-oauthlib==1.3.1
234 | - rsa==4.9
235 | - scikit-learn==1.3.2
236 | - scipy==1.11.4
237 | - sniffio==1.3.0
238 | - soupsieve==2.5
239 | - sqlalchemy==2.0.25
240 | - tabulate==0.9.0
241 | - tenacity==8.2.3
242 | - tensorboard==2.15.1
243 | - tensorboard-data-server==0.7.2
244 | - tensorflow==2.15.0.post1
245 | - tensorflow-estimator==2.15.0
246 | - tensorflow-io-gcs-filesystem==0.35.0
247 | - termcolor==2.4.0
248 | - threadpoolctl==3.2.0
249 | - tifffile==2023.12.9
250 | - tqdm==4.66.1
251 | - typing-inspect==0.9.0
252 | - unstructured==0.12.0
253 | - unstructured-client==0.15.2
254 | - urllib3==2.1.0
255 | - werkzeug==3.0.1
256 | - wrapt==1.14.1
257 | - yarl==1.9.4
--------------------------------------------------------------------------------