├── .gitignore ├── 01 - Quickstart ├── 01 - Quickstart.ipynb └── README.md ├── 02 - Integrating Chat History ├── 02 - Adding Chat History.ipynb ├── README.md └── chat_history.db ├── 03 - Implementing Streaming Capabilities ├── 03 - Streaming.ipynb ├── README.md ├── main.py └── static │ └── index.html ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /.env 2 | /.venv 3 | Not published/ -------------------------------------------------------------------------------- /01 - Quickstart/01 - Quickstart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Part 1\n", 8 | "\n", 9 | "#### Dependencies\n", 10 | "We’ll use the following packages:" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4 python-dotenv" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 10, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "True" 31 | ] 32 | }, 33 | "execution_count": 10, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "from dotenv import load_dotenv\n", 40 | "load_dotenv()" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 11, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "import bs4\n", 50 | "from langchain import hub\n", 51 | "from langchain_community.document_loaders import WebBaseLoader\n", 52 | "from langchain_chroma import Chroma\n", 53 | "from langchain_core.output_parsers import StrOutputParser\n", 54 | "from langchain_core.runnables import RunnablePassthrough\n", 55 | "from langchain_openai import OpenAIEmbeddings\n", 56 | "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", 57 | "from langchain_openai import ChatOpenAI" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 12, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "llm = ChatOpenAI(model=\"gpt-3.5-turbo\")\n", 67 | "# Load, chunk and index the contents of the blog.\n", 68 | "loader = WebBaseLoader(\n", 69 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", 70 | " bs_kwargs=dict(\n", 71 | " parse_only=bs4.SoupStrainer(\n", 72 | " class_=(\"post-content\", \"post-title\", \"post-header\")\n", 73 | " )\n", 74 | " ),\n", 75 | ")\n", 76 | "docs = loader.load()\n", 77 | "\n", 78 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", 79 | "splits = text_splitter.split_documents(docs)\n", 80 | "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n", 81 | "\n", 82 | "# Retrieve and generate using the relevant snippets of the blog.\n", 83 | "retriever = vectorstore.as_retriever()\n", 84 | "prompt = hub.pull(\"rlm/rag-prompt\")\n", 85 | "\n", 86 | "def format_docs(docs):\n", 87 | " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", 88 | "\n", 89 | "\n", 90 | "rag_chain = (\n", 91 | " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", 92 | " | prompt\n", 93 | " | llm\n", 94 | " | StrOutputParser()\n", 95 | ")" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "### Running the Chain" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 13, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/plain": [ 113 | "'Task Decomposition is a technique where complex tasks are broken down into smaller and simpler steps to enhance model performance. It involves transforming big tasks into manageable tasks by decomposing them into multiple steps. This process can be done using prompting techniques, task-specific instructions, or with human inputs.'" 114 | ] 115 | }, 116 | "execution_count": 13, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "rag_chain.invoke(\"What is Task Decomposition?\")" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### Testing the Retriever" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 14, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "[Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", 141 | " Document(page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", 142 | " Document(page_content='Resources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n", 143 | " Document(page_content=\"(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\", metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})]" 144 | ] 145 | }, 146 | "execution_count": 14, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "retriever.invoke(\"What is Task Decomposition?\")" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "# cleanup\n", 162 | "vectorstore.delete_collection()" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## Detailed walkthrough\n", 170 | "Let’s go through the above code step-by-step to really understand what’s going on.\n", 171 | "\n", 172 | "### 1. Indexing: Load" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 15, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "loader = WebBaseLoader(\n", 182 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", 183 | " bs_kwargs=dict(\n", 184 | " parse_only=bs4.SoupStrainer(\n", 185 | " class_=(\"post-content\", \"post-title\", \"post-header\")\n", 186 | " )\n", 187 | " ),\n", 188 | ")\n", 189 | "docs = loader.load()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "len(docs[0].page_content)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "print(docs[0].page_content[:500])" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "print(docs[0].metadata)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "### 2. Indexing: Chunk and Index" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 16, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", 233 | "\n", 234 | "text_splitter = RecursiveCharacterTextSplitter(\n", 235 | " chunk_size=1000, chunk_overlap=200, add_start_index=True\n", 236 | ")\n", 237 | "all_splits = text_splitter.split_documents(docs)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "len(all_splits)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "len(all_splits[0].page_content)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "all_splits[10].metadata" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "### 3. Indexing: Store" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 17, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "from langchain_chroma import Chroma\n", 281 | "from langchain_openai import OpenAIEmbeddings\n", 282 | "\n", 283 | "vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "### 4. Retrieval and Generation: Retrieve" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 18, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [ 299 | "retriever = vectorstore.as_retriever(search_type=\"similarity\", \n", 300 | " search_kwargs={\"k\": 6})" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 19, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "retrieved_docs = retriever.invoke(\"What are the approaches to Task Decomposition?\")" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 20, 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "data": { 319 | "text/plain": [ 320 | "6" 321 | ] 322 | }, 323 | "execution_count": 20, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "len(retrieved_docs)" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 21, 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "name": "stdout", 339 | "output_type": "stream", 340 | "text": [ 341 | "Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\n", 342 | "Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.\n" 343 | ] 344 | } 345 | ], 346 | "source": [ 347 | "print(retrieved_docs[0].page_content)" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "### 5. Retrieval and Generation: Generate" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 22, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "from langchain_openai import ChatOpenAI\n", 364 | "\n", 365 | "llm = ChatOpenAI(model=\"gpt-3.5-turbo\")" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 23, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "from langchain import hub\n", 375 | "\n", 376 | "prompt = hub.pull(\"rlm/rag-prompt\")" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 24, 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "data": { 386 | "text/plain": [ 387 | "ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template=\"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\\nQuestion: {question} \\nContext: {context} \\nAnswer:\"))])" 388 | ] 389 | }, 390 | "execution_count": 24, 391 | "metadata": {}, 392 | "output_type": "execute_result" 393 | } 394 | ], 395 | "source": [ 396 | "prompt" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 25, 402 | "metadata": {}, 403 | "outputs": [ 404 | { 405 | "data": { 406 | "text/plain": [ 407 | "[HumanMessage(content=\"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\\nQuestion: filler question \\nContext: filler context \\nAnswer:\")]" 408 | ] 409 | }, 410 | "execution_count": 25, 411 | "metadata": {}, 412 | "output_type": "execute_result" 413 | } 414 | ], 415 | "source": [ 416 | "example_messages = prompt.invoke(\n", 417 | " {\"context\": \"filler context\", \"question\": \"filler question\"}\n", 418 | ").to_messages()\n", 419 | "example_messages" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 26, 425 | "metadata": {}, 426 | "outputs": [ 427 | { 428 | "name": "stdout", 429 | "output_type": "stream", 430 | "text": [ 431 | "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n", 432 | "Question: filler question \n", 433 | "Context: filler context \n", 434 | "Answer:\n" 435 | ] 436 | } 437 | ], 438 | "source": [ 439 | "print(example_messages[0].content)" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 27, 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [ 448 | "from langchain_core.output_parsers import StrOutputParser\n", 449 | "from langchain_core.runnables import RunnablePassthrough\n", 450 | "\n", 451 | "\n", 452 | "def format_docs(docs):\n", 453 | " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", 454 | "\n", 455 | "\n", 456 | "rag_chain = (\n", 457 | " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", 458 | " | prompt\n", 459 | " | llm\n", 460 | " | StrOutputParser()\n", 461 | ")" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 28, 467 | "metadata": {}, 468 | "outputs": [ 469 | { 470 | "name": "stdout", 471 | "output_type": "stream", 472 | "text": [ 473 | "Task decomposition involves breaking down complex tasks into smaller and simpler steps. Techniques like Chain of Thought and Tree of Thoughts help models decompose hard tasks into manageable components. It can be achieved through simple prompting, task-specific instructions, or human inputs." 474 | ] 475 | } 476 | ], 477 | "source": [ 478 | "for chunk in rag_chain.stream(\"What is Task Decomposition?\"):\n", 479 | " print(chunk, end=\"\", flush=True)" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 29, 485 | "metadata": {}, 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/plain": [ 490 | "'Task decomposition is a method of breaking down complex tasks into smaller and simpler steps to make them more manageable for agents. It can be achieved through techniques like Chain of Thought or Tree of Thoughts, which help in structuring the problem-solving process. Thanks for asking!'" 491 | ] 492 | }, 493 | "execution_count": 29, 494 | "metadata": {}, 495 | "output_type": "execute_result" 496 | } 497 | ], 498 | "source": [ 499 | "from langchain_core.prompts import PromptTemplate\n", 500 | "\n", 501 | "template = \"\"\"Use the following pieces of context to answer the question at the end.\n", 502 | "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n", 503 | "Use three sentences maximum and keep the answer as concise as possible.\n", 504 | "Always say \"thanks for asking!\" at the end of the answer.\n", 505 | "\n", 506 | "{context}\n", 507 | "\n", 508 | "Question: {question}\n", 509 | "\n", 510 | "Helpful Answer:\"\"\"\n", 511 | "custom_rag_prompt = PromptTemplate.from_template(template)\n", 512 | "\n", 513 | "rag_chain = (\n", 514 | " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", 515 | " | custom_rag_prompt\n", 516 | " | llm\n", 517 | " | StrOutputParser()\n", 518 | ")\n", 519 | "\n", 520 | "rag_chain.invoke(\"What is Task Decomposition?\")" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 30, 526 | "metadata": {}, 527 | "outputs": [ 528 | { 529 | "data": { 530 | "text/plain": [ 531 | "\"La décomposition des tâches consiste à diviser une tâche complexe en étapes plus simples pour faciliter son exécution. Elle peut être réalisée par des techniques de modélisation comme la Chaîne de Pensée ou l'Arbre de Pensée, qui aident à décomposer les problèmes en plusieurs étapes gérables. Ces méthodes permettent de mieux comprendre le processus de réflexion du modèle ou de l'agent autonome. Merci d'avoir posé la question !\"" 532 | ] 533 | }, 534 | "execution_count": 30, 535 | "metadata": {}, 536 | "output_type": "execute_result" 537 | } 538 | ], 539 | "source": [ 540 | "from langchain_core.prompts import PromptTemplate\n", 541 | "\n", 542 | "template = \"\"\"Use the following pieces of context to answer the question at the end.\n", 543 | "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n", 544 | "Use three sentences maximum and keep the answer as concise as possible.\n", 545 | "Answer in French.\n", 546 | "Always say \"merci d'avoir posé la question !\" at the end of the answer.\n", 547 | "\n", 548 | "{context}\n", 549 | "\n", 550 | "Question: {question}\n", 551 | "\n", 552 | "Helpful Answer (in French):\"\"\"\n", 553 | "custom_rag_prompt = PromptTemplate.from_template(template)\n", 554 | "\n", 555 | "rag_chain = (\n", 556 | " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", 557 | " | custom_rag_prompt\n", 558 | " | llm\n", 559 | " | StrOutputParser()\n", 560 | ")\n", 561 | "\n", 562 | "rag_chain.invoke(\"What is Task Decomposition?\")" 563 | ] 564 | } 565 | ], 566 | "metadata": { 567 | "kernelspec": { 568 | "display_name": ".venv", 569 | "language": "python", 570 | "name": "python3" 571 | }, 572 | "language_info": { 573 | "codemirror_mode": { 574 | "name": "ipython", 575 | "version": 3 576 | }, 577 | "file_extension": ".py", 578 | "mimetype": "text/x-python", 579 | "name": "python", 580 | "nbconvert_exporter": "python", 581 | "pygments_lexer": "ipython3", 582 | "version": "3.11.7" 583 | } 584 | }, 585 | "nbformat": 4, 586 | "nbformat_minor": 2 587 | } 588 | -------------------------------------------------------------------------------- /01 - Quickstart/README.md: -------------------------------------------------------------------------------- 1 | # Mastering LangChain RAG: Quick Start Guide to LangChain RAG (Part 1) 2 | 3 | Welcome to the repository accompanying the "Mastering LangChain RAG" series. This repository contains the code examples mentioned in the article "Quick Start Guide to LangChain RAG" and will be updated as the series progresses. 4 | 5 | ## Overview 6 | 7 | This repository is part of a six-article series on LangChain's Retrieval-Augmented Generation (RAG) technology. The series aims to equip developers, data scientists, and AI enthusiasts with the knowledge to implement and optimize RAG in their projects. 8 | 9 | ## Series Outline 10 | 11 | 1. **[Quick Start Guide to LangChain RAG](https://medium.com/@eric_vaillancourt/mastering-langchain-rag-a-comprehensive-tutorial-series-part-1-28faf6257fea)**: Basics of setting up LangChain RAG. 12 | 2. **[Integrating Chat History](https://medium.com/@eric_vaillancourt/mastering-langchain-rag-integrating-chat-history-part-2-4c80eae11b43)**: Incorporate chat history into your RAG model. 13 | 3. **[Implementing Streaming Capabilities](https://medium.com/@eric_vaillancourt/mastering-langchain-rag-implementing-streaming-capabilities-part-3-e3f4885ea66a)**: Handle real-time data processing with RAG. 14 | 4. **Returning Sources with Results**: Configure RAG to provide sources along with responses. 15 | 5. **Adding Citations to Your Results**: Include citations in your results for verifiability. 16 | 6. **Putting It All Together**: Build a comprehensive RAG application integrating all components. 17 | 18 | ## Quick Start Guide to LangChain RAG 19 | 20 | ### Introduction 21 | 22 | LangChain's Retrieval-Augmented Generation (RAG) is a powerful technique that supplements the static knowledge of large language models (LLMs) with dynamic, external data sources, enabling more accurate and contextually relevant responses. 23 | 24 | ### Setup Environment 25 | 26 | Ensure your development environment is prepared with the necessary dependencies: 27 | 28 | ```bash 29 | pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4 python-dotenv 30 | ``` 31 | 32 | ### Environment Variables 33 | 34 | Set the `OPENAI_API_KEY` for the embeddings model. This can be done directly or loaded from a `.env` file: 35 | 36 | ```python 37 | from dotenv import load_dotenv 38 | load_dotenv() 39 | ``` 40 | 41 | Create a `.env` file with the following content: 42 | 43 | ``` 44 | OPENAI_API_KEY = "your-key-here" 45 | ``` 46 | 47 | ### Code Examples 48 | 49 | #### Import Required Modules 50 | 51 | ```python 52 | import bs4 53 | from langchain import hub 54 | from langchain_community.document_loaders import WebBaseLoader 55 | from langchain_chroma import Chroma 56 | from langchain_core.output_parsers import StrOutputParser 57 | from langchain_core.runnables import RunnablePassthrough 58 | from langchain_openai import OpenAIEmbeddings 59 | from langchain_text_splitters import RecursiveCharacterTextSplitter 60 | from langchain_openai import ChatOpenAI 61 | ``` 62 | 63 | #### Initialize Language Model and Load Blog Content 64 | 65 | ```python 66 | llm = ChatOpenAI(model="gpt-3.5-turbo") 67 | 68 | loader = WebBaseLoader( 69 | web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), 70 | bs_kwargs=dict( 71 | parse_only=bs4.SoupStrainer( 72 | class_=("post-content", "post-title", "post-header") 73 | ) 74 | ), 75 | ) 76 | docs = loader.load() 77 | ``` 78 | 79 | #### Document Splitting and Embedding 80 | 81 | ```python 82 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 83 | splits = text_splitter.split_documents(docs) 84 | vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) 85 | ``` 86 | 87 | #### Setup Retrieval-Augmented Generation Chain 88 | 89 | ```python 90 | retriever = vectorstore.as_retriever() 91 | prompt = hub.pull("rlm/rag-prompt") 92 | 93 | def format_docs(docs): 94 | return "\n\n".join(doc.page_content for doc in docs) 95 | 96 | rag_chain = ( 97 | {"context": retriever | format_docs, "question": RunnablePassthrough()} 98 | | prompt 99 | | llm 100 | | StrOutputParser() 101 | ) 102 | ``` 103 | 104 | ### Running the Chain 105 | 106 | ```python 107 | response = rag_chain.invoke("What is Task Decomposition?") 108 | print(response) 109 | ``` 110 | 111 | ### Testing the Retriever 112 | 113 | ```python 114 | retrieved_docs = retriever.invoke("What is Task Decomposition?") 115 | for doc in retrieved_docs: 116 | print(doc.page_content) 117 | ``` 118 | 119 | ### Cleanup 120 | 121 | ```python 122 | vectorstore.delete_collection() 123 | ``` 124 | 125 | ## Conclusion 126 | 127 | This repository provides the foundational code to get started with LangChain's RAG technology. For a detailed explanation of each step, refer to the accompanying [Medium article](https://medium.com/). Stay tuned for the upcoming articles in the series to further enhance your understanding and application of RAG. 128 | 129 | ## Support 130 | 131 | If you find this repository helpful, consider supporting my work by [buying me a coffee](https://www.buymeacoffee.com/evaillancourt). 132 | 133 | ## Contact 134 | 135 | For any questions or suggestions, feel free to contact me at [eric@ericvaillancourt.ca](mailto:eric@ericvaillancourt.ca). 136 | 137 | --- 138 | 139 | -------------------------------------------------------------------------------- /02 - Integrating Chat History/02 - Adding Chat History.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Part 2 \n", 8 | "\n", 9 | "#### Dependencies\n", 10 | "We'll use the following packages:" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4 python-dotenv sqlalchemy" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "from dotenv import load_dotenv\n", 29 | "load_dotenv()" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "#### Chain without chat history" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "import bs4\n", 46 | "from langchain import hub\n", 47 | "from langchain_chroma import Chroma\n", 48 | "from langchain_community.document_loaders import WebBaseLoader\n", 49 | "from langchain_core.output_parsers import StrOutputParser\n", 50 | "from langchain_core.runnables import RunnablePassthrough\n", 51 | "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", 52 | "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", 53 | "\n", 54 | "# Load, chunk and index the contents of the blog.\n", 55 | "loader = WebBaseLoader(\n", 56 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", 57 | " bs_kwargs=dict(\n", 58 | " parse_only=bs4.SoupStrainer(\n", 59 | " class_=(\"post-content\", \"post-title\", \"post-header\")\n", 60 | " )\n", 61 | " ),\n", 62 | ")\n", 63 | "docs = loader.load()\n", 64 | "\n", 65 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", 66 | "splits = text_splitter.split_documents(docs)\n", 67 | "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n", 68 | "\n", 69 | "# Retrieve and generate using the relevant snippets of the blog.\n", 70 | "retriever = vectorstore.as_retriever()\n", 71 | "prompt = hub.pull(\"rlm/rag-prompt\")\n", 72 | "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n", 73 | "\n", 74 | "\n", 75 | "def format_docs(docs):\n", 76 | " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", 77 | "\n", 78 | "\n", 79 | "rag_chain = (\n", 80 | " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", 81 | " | prompt\n", 82 | " | llm\n", 83 | " | StrOutputParser()\n", 84 | ")" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "rag_chain.invoke(\"What is Task Decomposition?\")" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "rag_chain.invoke(\"What was the last question?\")" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "### Contextualizing the question" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "import bs4\n", 119 | "from langchain.chains import create_history_aware_retriever, create_retrieval_chain\n", 120 | "from langchain.chains.combine_documents import create_stuff_documents_chain\n", 121 | "from langchain_chroma import Chroma\n", 122 | "from langchain_community.chat_message_histories import ChatMessageHistory\n", 123 | "from langchain_community.document_loaders import WebBaseLoader\n", 124 | "from langchain_core.chat_history import BaseChatMessageHistory\n", 125 | "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", 126 | "from langchain_core.runnables.history import RunnableWithMessageHistory\n", 127 | "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", 128 | "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", 129 | "\n", 130 | "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n", 131 | "\n", 132 | "\n", 133 | "### Construct retriever ###\n", 134 | "loader = WebBaseLoader(\n", 135 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", 136 | " bs_kwargs=dict(\n", 137 | " parse_only=bs4.SoupStrainer(\n", 138 | " class_=(\"post-content\", \"post-title\", \"post-header\")\n", 139 | " )\n", 140 | " ),\n", 141 | ")\n", 142 | "docs = loader.load()\n", 143 | "\n", 144 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", 145 | "splits = text_splitter.split_documents(docs)\n", 146 | "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n", 147 | "retriever = vectorstore.as_retriever()\n", 148 | "\n", 149 | "\n", 150 | "### Contextualize question ###\n", 151 | "contextualize_q_system_prompt = \"\"\"Given a chat history and the latest user question \\\n", 152 | "which might reference context in the chat history, formulate a standalone question \\\n", 153 | "which can be understood without the chat history. Do NOT answer the question, \\\n", 154 | "just reformulate it if needed and otherwise return it as is.\"\"\"\n", 155 | "contextualize_q_prompt = ChatPromptTemplate.from_messages(\n", 156 | " [\n", 157 | " (\"system\", contextualize_q_system_prompt),\n", 158 | " MessagesPlaceholder(\"chat_history\"),\n", 159 | " (\"human\", \"{input}\"),\n", 160 | " ]\n", 161 | ")\n", 162 | "history_aware_retriever = create_history_aware_retriever(\n", 163 | " llm, retriever, contextualize_q_prompt\n", 164 | ")\n", 165 | "\n", 166 | "\n", 167 | "### Answer question ###\n", 168 | "qa_system_prompt = \"\"\"You are an assistant for question-answering tasks. \\\n", 169 | "Use the following pieces of retrieved context to answer the question. \\\n", 170 | "If you don't know the answer, just say that you don't know. \\\n", 171 | "Use three sentences maximum and keep the answer concise.\\\n", 172 | "\n", 173 | "{context}\"\"\"\n", 174 | "qa_prompt = ChatPromptTemplate.from_messages(\n", 175 | " [\n", 176 | " (\"system\", qa_system_prompt),\n", 177 | " MessagesPlaceholder(\"chat_history\"),\n", 178 | " (\"human\", \"{input}\"),\n", 179 | " ]\n", 180 | ")\n", 181 | "question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)\n", 182 | "\n", 183 | "rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)\n", 184 | "\n", 185 | "\n", 186 | "### Statefully manage chat history ###\n", 187 | "store = {}\n", 188 | "\n", 189 | "\n", 190 | "def get_session_history(session_id: str) -> BaseChatMessageHistory:\n", 191 | " if session_id not in store:\n", 192 | " store[session_id] = ChatMessageHistory()\n", 193 | " return store[session_id]\n", 194 | "\n", 195 | "\n", 196 | "conversational_rag_chain = RunnableWithMessageHistory(\n", 197 | " rag_chain,\n", 198 | " get_session_history,\n", 199 | " input_messages_key=\"input\",\n", 200 | " history_messages_key=\"chat_history\",\n", 201 | " output_messages_key=\"answer\",\n", 202 | ")" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "conversational_rag_chain.invoke(\n", 212 | " {\"input\": \"What is Task Decomposition?\"},\n", 213 | " config={\"configurable\": {\"session_id\": \"abc123\"}\n", 214 | " }, \n", 215 | ")[\"answer\"]" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "conversational_rag_chain.invoke(\n", 225 | " {\"input\": \"What are common ways of doing it?\"},\n", 226 | " config={\"configurable\": {\"session_id\": \"abc123\"}},\n", 227 | ")[\"answer\"]" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "print(store)" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "#### Adding Persistence with SQLAlchemy" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "import bs4\n", 253 | "from sqlalchemy import create_engine, Column, Integer, String, Text, ForeignKey\n", 254 | "from sqlalchemy.orm import sessionmaker, relationship, declarative_base\n", 255 | "from sqlalchemy.exc import SQLAlchemyError\n", 256 | "from langchain.chains import create_history_aware_retriever, create_retrieval_chain\n", 257 | "from langchain.chains.combine_documents import create_stuff_documents_chain\n", 258 | "from langchain_chroma import Chroma\n", 259 | "from langchain_community.chat_message_histories import ChatMessageHistory\n", 260 | "from langchain_community.document_loaders import WebBaseLoader\n", 261 | "from langchain_core.chat_history import BaseChatMessageHistory\n", 262 | "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", 263 | "from langchain_core.runnables.history import RunnableWithMessageHistory\n", 264 | "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", 265 | "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", 266 | "\n", 267 | "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n", 268 | "\n", 269 | "# Define the SQLite database\n", 270 | "DATABASE_URL = \"sqlite:///chat_history.db\"\n", 271 | "Base = declarative_base()\n", 272 | "\n", 273 | "class Session(Base):\n", 274 | " __tablename__ = \"sessions\"\n", 275 | " id = Column(Integer, primary_key=True)\n", 276 | " session_id = Column(String, unique=True, nullable=False)\n", 277 | " messages = relationship(\"Message\", back_populates=\"session\")\n", 278 | "\n", 279 | "class Message(Base):\n", 280 | " __tablename__ = \"messages\"\n", 281 | " id = Column(Integer, primary_key=True)\n", 282 | " session_id = Column(Integer, ForeignKey(\"sessions.id\"), nullable=False)\n", 283 | " role = Column(String, nullable=False)\n", 284 | " content = Column(Text, nullable=False)\n", 285 | " session = relationship(\"Session\", back_populates=\"messages\")\n", 286 | "\n", 287 | "# Create the database and the tables\n", 288 | "engine = create_engine(DATABASE_URL)\n", 289 | "Base.metadata.create_all(engine)\n", 290 | "SessionLocal = sessionmaker(bind=engine)\n", 291 | "\n", 292 | "def get_db():\n", 293 | " db = SessionLocal()\n", 294 | " try:\n", 295 | " yield db\n", 296 | " finally:\n", 297 | " db.close()\n", 298 | "\n", 299 | "# Function to save a single message\n", 300 | "def save_message(session_id: str, role: str, content: str):\n", 301 | " db = next(get_db())\n", 302 | " try:\n", 303 | " session = db.query(Session).filter(Session.session_id == session_id).first()\n", 304 | " if not session:\n", 305 | " session = Session(session_id=session_id)\n", 306 | " db.add(session)\n", 307 | " db.commit()\n", 308 | " db.refresh(session)\n", 309 | "\n", 310 | " db.add(Message(session_id=session.id, role=role, content=content))\n", 311 | " db.commit()\n", 312 | " except SQLAlchemyError:\n", 313 | " db.rollback()\n", 314 | " finally:\n", 315 | " db.close()\n", 316 | "\n", 317 | "# Function to load chat history\n", 318 | "def load_session_history(session_id: str) -> BaseChatMessageHistory:\n", 319 | " db = next(get_db())\n", 320 | " chat_history = ChatMessageHistory()\n", 321 | " try:\n", 322 | " session = db.query(Session).filter(Session.session_id == session_id).first()\n", 323 | " if session:\n", 324 | " for message in session.messages:\n", 325 | " chat_history.add_message({\"role\": message.role, \"content\": message.content})\n", 326 | " except SQLAlchemyError:\n", 327 | " pass\n", 328 | " finally:\n", 329 | " db.close()\n", 330 | "\n", 331 | " return chat_history\n", 332 | "\n", 333 | "# Modify the get_session_history function to use the database\n", 334 | "def get_session_history(session_id: str) -> BaseChatMessageHistory:\n", 335 | " if session_id not in store:\n", 336 | " store[session_id] = load_session_history(session_id)\n", 337 | " return store[session_id]\n", 338 | "\n", 339 | "# Ensure you save the chat history to the database when needed\n", 340 | "def save_all_sessions():\n", 341 | " for session_id, chat_history in store.items():\n", 342 | " for message in chat_history.messages:\n", 343 | " save_message(session_id, message[\"role\"], message[\"content\"])\n", 344 | "\n", 345 | "# Example of saving all sessions before exiting the application\n", 346 | "import atexit\n", 347 | "atexit.register(save_all_sessions)\n", 348 | "\n", 349 | "### Construct retriever ###\n", 350 | "loader = WebBaseLoader(\n", 351 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", 352 | " bs_kwargs=dict(\n", 353 | " parse_only=bs4.SoupStrainer(\n", 354 | " class_=(\"post-content\", \"post-title\", \"post-header\")\n", 355 | " )\n", 356 | " ),\n", 357 | ")\n", 358 | "docs = loader.load()\n", 359 | "\n", 360 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", 361 | "splits = text_splitter.split_documents(docs)\n", 362 | "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n", 363 | "retriever = vectorstore.as_retriever()\n", 364 | "\n", 365 | "### Contextualize question ###\n", 366 | "contextualize_q_system_prompt = \"\"\"Given a chat history and the latest user question \\\n", 367 | "which might reference context in the chat history, formulate a standalone question \\\n", 368 | "which can be understood without the chat history. Do NOT answer the question, \\\n", 369 | "just reformulate it if needed and otherwise return it as is.\"\"\"\n", 370 | "contextualize_q_prompt = ChatPromptTemplate.from_messages(\n", 371 | " [\n", 372 | " (\"system\", contextualize_q_system_prompt),\n", 373 | " MessagesPlaceholder(\"chat_history\"),\n", 374 | " (\"human\", \"{input}\"),\n", 375 | " ]\n", 376 | ")\n", 377 | "history_aware_retriever = create_history_aware_retriever(\n", 378 | " llm, retriever, contextualize_q_prompt\n", 379 | ")\n", 380 | "\n", 381 | "### Answer question ###\n", 382 | "qa_system_prompt = \"\"\"You are an assistant for question-answering tasks. \\\n", 383 | "Use the following pieces of retrieved context to answer the question. \\\n", 384 | "If you don't know the answer, just say that you don't know. \\\n", 385 | "Use three sentences maximum and keep the answer concise.\\\n", 386 | "\n", 387 | "{context}\"\"\"\n", 388 | "qa_prompt = ChatPromptTemplate.from_messages(\n", 389 | " [\n", 390 | " (\"system\", qa_system_prompt),\n", 391 | " MessagesPlaceholder(\"chat_history\"),\n", 392 | " (\"human\", \"{input}\"),\n", 393 | " ]\n", 394 | ")\n", 395 | "question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)\n", 396 | "\n", 397 | "rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)\n", 398 | "\n", 399 | "### Statefully manage chat history ###\n", 400 | "store = {}\n", 401 | "\n", 402 | "conversational_rag_chain = RunnableWithMessageHistory(\n", 403 | " rag_chain,\n", 404 | " get_session_history,\n", 405 | " input_messages_key=\"input\",\n", 406 | " history_messages_key=\"chat_history\",\n", 407 | " output_messages_key=\"answer\",\n", 408 | ")\n", 409 | "\n", 410 | "# Invoke the chain and save the messages after invocation\n", 411 | "def invoke_and_save(session_id, input_text):\n", 412 | " # Save the user question with role \"human\"\n", 413 | " save_message(session_id, \"human\", input_text)\n", 414 | " \n", 415 | " result = conversational_rag_chain.invoke(\n", 416 | " {\"input\": input_text},\n", 417 | " config={\"configurable\": {\"session_id\": session_id}}\n", 418 | " )[\"answer\"]\n", 419 | "\n", 420 | " # Save the AI answer with role \"ai\"\n", 421 | " save_message(session_id, \"ai\", result)\n", 422 | " return result\n", 423 | "\n", 424 | "\n" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "\n", 434 | "# Example usage\n", 435 | "result = invoke_and_save(\"abc123\", \"What are the types of memory?\")\n", 436 | "print(result)\n" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [ 445 | "# Example usage\n", 446 | "result = invoke_and_save(\"abc123\", \"What was my previous question?\")\n", 447 | "print(result)" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "# Example usage\n", 457 | "result = invoke_and_save(\"abc123\", \"Can you list them again?\")\n", 458 | "print(result)" 459 | ] 460 | } 461 | ], 462 | "metadata": { 463 | "kernelspec": { 464 | "display_name": ".venv", 465 | "language": "python", 466 | "name": "python3" 467 | }, 468 | "language_info": { 469 | "codemirror_mode": { 470 | "name": "ipython", 471 | "version": 3 472 | }, 473 | "file_extension": ".py", 474 | "mimetype": "text/x-python", 475 | "name": "python", 476 | "nbconvert_exporter": "python", 477 | "pygments_lexer": "ipython3", 478 | "version": "3.11.7" 479 | } 480 | }, 481 | "nbformat": 4, 482 | "nbformat_minor": 2 483 | } 484 | -------------------------------------------------------------------------------- /02 - Integrating Chat History/README.md: -------------------------------------------------------------------------------- 1 | # Mastering LangChain RAG: Integrating Chat History (Part 2) 2 | 3 | Welcome to the second part of our in-depth series on LangChain's Retrieval-Augmented Generation (RAG) technology. This repository contains the code examples and explanations for integrating chat history into a RAG-based application. By following this tutorial, you will learn how to maintain context in conversations and improve the interaction quality of your Q&A applications. 4 | 5 | ## Overview 6 | 7 | In this tutorial, we focus on incorporating chat history into our RAG model to maintain context and improve the quality of interactions in chat-like conversations. Additionally, we cover how to save chat history to an SQL database using SQLAlchemy, ensuring robust and scalable storage. 8 | 9 | ## Introduction 10 | 11 | In many Q&A applications, facilitating a dynamic, back-and-forth conversation between the user and the system is essential. This requires the application to maintain a "memory" of past interactions, allowing it to reference and integrate previous exchanges into its current processing. 12 | 13 | ## Series Outline 14 | 15 | 1. **[Quick Start Guide to LangChain RAG](https://medium.com/@eric_vaillancourt/mastering-langchain-rag-a-comprehensive-tutorial-series-part-1-28faf6257fea)**: Basics of setting up LangChain RAG. 16 | 2. **[Integrating Chat History](https://medium.com/@eric_vaillancourt/mastering-langchain-rag-integrating-chat-history-part-2-4c80eae11b43)**: Incorporate chat history into your RAG model. 17 | 3. **[Implementing Streaming Capabilities](https://medium.com/@eric_vaillancourt/mastering-langchain-rag-implementing-streaming-capabilities-part-3-e3f4885ea66a)**: Handle real-time data processing with RAG. 18 | 4. **Returning Sources with Results**: Configure RAG to provide sources along with responses. 19 | 5. **Adding Citations to Your Results**: Include citations in your results for verifiability. 20 | 6. **Putting It All Together**: Build a comprehensive RAG application integrating all components. 21 | 22 | 23 | ## Getting Started 24 | 25 | All code examples mentioned in this tutorial can be found in the `02 - Integrating Chat History` folder. To get started, clone this repository and navigate to the relevant folder: 26 | 27 | ```bash 28 | git clone https://github.com/ericvaillancourt/RAG-tutorial.git 29 | cd RAG-tutorial/02 - Integrating Chat History 30 | ``` 31 | 32 | ## Environment Setup 33 | 34 | Ensure your development environment is prepared with the necessary dependencies. You can install the required packages using pip: 35 | 36 | ```bash 37 | pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4 python-dotenv sqlalchemy 38 | ``` 39 | 40 | You also need to set the `OPENAI_API_KEY` environment variable for the embeddings model. This can be done directly or loaded from a `.env` file. Create a `.env` file with the following content: 41 | 42 | ``` 43 | OPENAI_API_KEY=your-key-here 44 | ``` 45 | 46 | Load the environment variable in your code: 47 | 48 | ```python 49 | from dotenv import load_dotenv 50 | load_dotenv() 51 | ``` 52 | 53 | ## Code Explanation 54 | 55 | ### Updating the Prompt 56 | 57 | We modify the application’s prompt to include historical messages as input. This change ensures that the system can access prior interactions and use them to understand and respond to new inquiries more effectively. 58 | 59 | ### Contextualizing Questions 60 | 61 | We introduce a sub-chain that utilizes both historical messages and the latest user question. This sub-chain is designed to reformulate a question whenever it references past discussions. 62 | 63 | ### Setting Up the Database 64 | 65 | To persist chat histories, we use SQLAlchemy to set up an SQLite database. We define models for sessions and messages, and create utility functions to save and load chat history. 66 | 67 | ### Creating the History-Aware Retriever 68 | 69 | We create a history-aware retriever that integrates chat history for context-aware processing. 70 | 71 | ### Building the Q&A Chain 72 | 73 | We build a comprehensive Q&A chain that handles inputs and produces outputs that include not just the query and its context, but also a well-integrated response, keeping track of the entire conversation history. 74 | 75 | ### Managing Chat History 76 | 77 | We manage chat history using a dictionary structure and ensure that chat histories are saved and retrieved efficiently. 78 | 79 | ### Saving and Loading Messages 80 | 81 | We define functions to save and load individual messages to and from the database, ensuring persistent storage. 82 | 83 | ## Usage 84 | 85 | Invoke the chain and save the chat history. Here is an example of how to use the modified function to interact with the chain and persist the conversation: 86 | 87 | ```python 88 | result = invoke_and_save("abc123", "What is Task Decomposition?") 89 | print(result) 90 | 91 | result = invoke_and_save("abc123", "What are common ways of doing it?") 92 | print(result) 93 | ``` 94 | 95 | ## Conclusion 96 | 97 | In this tutorial, we explored how to enhance the functionality of Q&A applications by integrating historical interactions into the application logic and ensuring persistent storage with SQLAlchemy. By automating the management of chat history, we improve the application’s ability to engage users in a meaningful dialogue. 98 | 99 | 100 | ## Support 101 | 102 | If you found this tutorial helpful, please consider supporting my work by buying me a coffee or two! 103 | 104 | [](https://www.buymeacoffee.com/evaillancourt) 105 | 106 | Thank you for following along, and I look forward to continuing this journey with you in the next parts of our series. 107 | 108 | --- 109 | 110 | Eric Vaillancourt 111 | 112 | GitHub: [ericvaillancourt](https://github.com/ericvaillancourt) -------------------------------------------------------------------------------- /02 - Integrating Chat History/chat_history.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericvaillancourt/RAG-tutorial/3a1862ad403face667bb9afa1f1c691fcab55699/02 - Integrating Chat History/chat_history.db -------------------------------------------------------------------------------- /03 - Implementing Streaming Capabilities/03 - Streaming.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4 fastapi" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "True" 21 | ] 22 | }, 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "from dotenv import load_dotenv\n", 30 | "load_dotenv()" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### Chain with sources" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import bs4\n", 47 | "from langchain import hub\n", 48 | "from langchain_chroma import Chroma\n", 49 | "from langchain_community.document_loaders import WebBaseLoader\n", 50 | "from langchain_core.output_parsers import StrOutputParser\n", 51 | "from langchain_core.runnables import RunnableParallel, RunnablePassthrough\n", 52 | "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", 53 | "from langchain_text_splitters import RecursiveCharacterTextSplitter" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "# Load, chunk and index the contents of the blog.\n", 63 | "bs_strainer = bs4.SoupStrainer(class_=(\"post-content\", \"post-title\", \"post-header\"))\n", 64 | "loader = WebBaseLoader(\n", 65 | " web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n", 66 | " bs_kwargs={\"parse_only\": bs_strainer},\n", 67 | ")\n", 68 | "docs = loader.load()\n", 69 | "\n", 70 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", 71 | "splits = text_splitter.split_documents(docs)\n", 72 | "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n", 73 | "\n", 74 | "# Retrieve and generate using the relevant snippets of the blog.\n", 75 | "retriever = vectorstore.as_retriever()\n", 76 | "prompt = hub.pull(\"rlm/rag-prompt\")\n", 77 | "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n", 78 | "\n", 79 | "\n", 80 | "def format_docs(docs):\n", 81 | " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", 82 | "\n", 83 | "\n", 84 | "rag_chain_from_docs = (\n", 85 | " RunnablePassthrough.assign(context=(lambda x: format_docs(x[\"context\"])))\n", 86 | " | prompt\n", 87 | " | llm\n", 88 | " | StrOutputParser()\n", 89 | ")\n", 90 | "\n", 91 | "rag_chain_with_source = RunnableParallel(\n", 92 | " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", 93 | ").assign(answer=rag_chain_from_docs)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "{'question': 'What is Task Decomposition'}\n", 106 | "{'context': [Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}), Document(page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}), Document(page_content='Resources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}), Document(page_content='Finite context length: The restricted context capacity limits the inclusion of historical information, detailed instructions, API call context, and responses. The design of the system has to work with this limited communication bandwidth, while mechanisms like self-reflection to learn from past mistakes would benefit a lot from long or infinite context windows. Although vector stores and retrieval can provide access to a larger knowledge pool, their representation power is not as powerful as full attention.\\n\\n\\nChallenges in long-term planning and task decomposition: Planning over a lengthy history and effectively exploring the solution space remain challenging. LLMs struggle to adjust plans when faced with unexpected errors, making them less robust compared to humans who learn from trial and error.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})]}\n", 107 | "{'answer': ''}\n", 108 | "{'answer': 'Task'}\n", 109 | "{'answer': ' decomposition'}\n", 110 | "{'answer': ' is'}\n", 111 | "{'answer': ' a'}\n", 112 | "{'answer': ' technique'}\n", 113 | "{'answer': ' used'}\n", 114 | "{'answer': ' to'}\n", 115 | "{'answer': ' break'}\n", 116 | "{'answer': ' down'}\n", 117 | "{'answer': ' complex'}\n", 118 | "{'answer': ' tasks'}\n", 119 | "{'answer': ' into'}\n", 120 | "{'answer': ' smaller'}\n", 121 | "{'answer': ' and'}\n", 122 | "{'answer': ' simpler'}\n", 123 | "{'answer': ' steps'}\n", 124 | "{'answer': '.'}\n", 125 | "{'answer': ' This'}\n", 126 | "{'answer': ' approach'}\n", 127 | "{'answer': ' allows'}\n", 128 | "{'answer': ' agents'}\n", 129 | "{'answer': ' to'}\n", 130 | "{'answer': ' plan'}\n", 131 | "{'answer': ' ahead'}\n", 132 | "{'answer': ' and'}\n", 133 | "{'answer': ' tackle'}\n", 134 | "{'answer': ' each'}\n", 135 | "{'answer': ' step'}\n", 136 | "{'answer': ' sequentially'}\n", 137 | "{'answer': '.'}\n", 138 | "{'answer': ' Task'}\n", 139 | "{'answer': ' decomposition'}\n", 140 | "{'answer': ' can'}\n", 141 | "{'answer': ' be'}\n", 142 | "{'answer': ' achieved'}\n", 143 | "{'answer': ' through'}\n", 144 | "{'answer': ' various'}\n", 145 | "{'answer': ' methods'}\n", 146 | "{'answer': ' such'}\n", 147 | "{'answer': ' as'}\n", 148 | "{'answer': ' prompting'}\n", 149 | "{'answer': ' with'}\n", 150 | "{'answer': ' specific'}\n", 151 | "{'answer': ' instructions'}\n", 152 | "{'answer': ' or'}\n", 153 | "{'answer': ' human'}\n", 154 | "{'answer': ' inputs'}\n", 155 | "{'answer': '.'}\n", 156 | "{'answer': ''}\n" 157 | ] 158 | } 159 | ], 160 | "source": [ 161 | "for chunk in rag_chain_with_source.stream(\"What is Task Decomposition\"):\n", 162 | " print(chunk)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "#### We can add some logic to compile our stream as it's being returned:" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "output = {}\n", 179 | "curr_key = None\n", 180 | "for chunk in rag_chain_with_source.stream(\"What is Task Decomposition\"):\n", 181 | " for key in chunk:\n", 182 | " if key not in output:\n", 183 | " output[key] = chunk[key]\n", 184 | " else:\n", 185 | " output[key] += chunk[key]\n", 186 | " if key != curr_key:\n", 187 | " print(f\"\\n\\n{key}: {chunk[key]}\", end=\"\", flush=True)\n", 188 | " else:\n", 189 | " print(chunk[key], end=\"\", flush=True)\n", 190 | " curr_key = key\n", 191 | "output" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "### Streaming intermediate steps\n", 199 | "Suppose we want to stream not only the final outputs of the chain, but also some intermediate steps. As an example let's take our Chat history chain. Here we reformulate the user question before passing it to the retriever. This reformulated question is not returned as part of the final output. We could modify our chain to return the new question, but for demonstration purposes we'll leave it as is." 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", 209 | "\n", 210 | "contextualize_q_system_prompt = \"\"\"Given a chat history and the latest user question \\\n", 211 | "which might reference context in the chat history, formulate a standalone question \\\n", 212 | "which can be understood without the chat history. Do NOT answer the question, \\\n", 213 | "just reformulate it if needed and otherwise return it as is.\"\"\"\n", 214 | "contextualize_q_prompt = ChatPromptTemplate.from_messages(\n", 215 | " [\n", 216 | " (\"system\", contextualize_q_system_prompt),\n", 217 | " MessagesPlaceholder(variable_name=\"chat_history\"),\n", 218 | " (\"human\", \"{question}\"),\n", 219 | " ]\n", 220 | ").with_config(tags=[\"contextualize_q_system_prompt\"])\n", 221 | "\n", 222 | "contextualize_q_chain = (contextualize_q_prompt | llm | StrOutputParser()).with_config(\n", 223 | " tags=[\"contextualize_q_chain\"]\n", 224 | ")\n", 225 | "\n", 226 | "qa_system_prompt = \"\"\"You are an assistant for question-answering tasks. \\\n", 227 | "Use the following pieces of retrieved context to answer the question. \\\n", 228 | "If you don't know the answer, just say that you don't know. \\\n", 229 | "Use three sentences maximum and keep the answer concise.\\\n", 230 | "\n", 231 | "{context}\"\"\"\n", 232 | "qa_prompt = ChatPromptTemplate.from_messages(\n", 233 | " [\n", 234 | " (\"system\", qa_system_prompt),\n", 235 | " MessagesPlaceholder(variable_name=\"chat_history\"),\n", 236 | " (\"human\", \"{question}\"),\n", 237 | " ]\n", 238 | ")\n", 239 | "\n", 240 | "\n", 241 | "def contextualized_question(input: dict):\n", 242 | " if input.get(\"chat_history\"):\n", 243 | " return contextualize_q_chain \n", 244 | " else:\n", 245 | " return input[\"question\"]\n", 246 | "\n", 247 | "\n", 248 | "rag_chain = (\n", 249 | " RunnablePassthrough.assign(context=contextualized_question | retriever | format_docs)\n", 250 | " | qa_prompt\n", 251 | " | llm\n", 252 | ")\n" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "You can stream all steps (default) or include/exclude steps by name, tags or metadata. In this case we'll only stream intermediate steps that are part of the contextualize_q_chain and the final output. Notice that when defining the contextualize_q_chain we gave it a corresponding tag, which we can now filter on:" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "from langchain_core.messages import HumanMessage\n", 269 | "\n", 270 | "chat_history = []\n", 271 | "\n", 272 | "question = \"What is Task Decomposition?\"\n", 273 | "ai_msg = rag_chain.invoke({\"question\": question, \"chat_history\": chat_history})\n", 274 | "chat_history.extend([HumanMessage(content=question), ai_msg])\n", 275 | "\n", 276 | "second_question = \"What are common ways of doing it?\"\n", 277 | "\n", 278 | "async for chunk in rag_chain.astream_events(\n", 279 | " {\"question\": second_question, \"chat_history\": chat_history},\n", 280 | " include_tags=[\"contextualize_q_system_prompt\"],\n", 281 | " include_names=[\"StrOutputParser\"],\n", 282 | " include_types=[\"on_parser_end\"],\n", 283 | " version=\"v1\",\n", 284 | "):\n", 285 | " print(chunk)\n", 286 | " " 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "If we wanted to get our retrieved docs, we could filter on name \"Retriever\":" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "\n", 303 | "async for chunk in rag_chain.astream_events(\n", 304 | " {\"question\": second_question, \"chat_history\": chat_history},\n", 305 | " include_names=[\"Retriever\"],\n", 306 | " version=\"v1\",):\n", 307 | " print(chunk)\n" 308 | ] 309 | } 310 | ], 311 | "metadata": { 312 | "kernelspec": { 313 | "display_name": ".venv", 314 | "language": "python", 315 | "name": "python3" 316 | }, 317 | "language_info": { 318 | "codemirror_mode": { 319 | "name": "ipython", 320 | "version": 3 321 | }, 322 | "file_extension": ".py", 323 | "mimetype": "text/x-python", 324 | "name": "python", 325 | "nbconvert_exporter": "python", 326 | "pygments_lexer": "ipython3", 327 | "version": "3.11.7" 328 | } 329 | }, 330 | "nbformat": 4, 331 | "nbformat_minor": 2 332 | } 333 | -------------------------------------------------------------------------------- /03 - Implementing Streaming Capabilities/README.md: -------------------------------------------------------------------------------- 1 | # Mastering LangChain RAG: Implementing Streaming Capabilities (Part 3) 2 | 3 | Welcome to the third part of our in-depth series on LangChain's Retrieval-Augmented Generation (RAG) technology. This repository contains the code examples and explanations for implementing streaming capabilities in a RAG-based application. By following this tutorial, you will learn how to handle real-time data processing with RAG, perfect for applications requiring immediate responses. 4 | 5 | ## Overview 6 | 7 | In this tutorial, we focus on implementing streaming with RAG to handle real-time data processing efficiently. This is particularly useful for applications requiring immediate responses. Additionally, we cover how to integrate sources with the responses to add transparency and credibility to the generated outputs. 8 | 9 | ## Introduction 10 | 11 | In many Q&A applications, providing real-time answers while maintaining source transparency is crucial for establishing trust and credibility. This tutorial explores practical approaches for integrating streaming capabilities and source transparency into your applications. 12 | 13 | ## Series Outline 14 | 15 | 1. **[Quick Start Guide to LangChain RAG](https://medium.com/@eric_vaillancourt/mastering-langchain-rag-a-comprehensive-tutorial-series-part-1-28faf6257fea)**: Basics of setting up LangChain RAG. 16 | 2. **[Integrating Chat History](https://medium.com/@eric_vaillancourt/mastering-langchain-rag-integrating-chat-history-part-2-4c80eae11b43)**: Incorporate chat history into your RAG model. 17 | 3. **[Implementing Streaming Capabilities](https://medium.com/@eric_vaillancourt/mastering-langchain-rag-implementing-streaming-capabilities-part-3-e3f4885ea66a)**: Handle real-time data processing with RAG. 18 | 4. **Returning Sources with Results**: Configure RAG to provide sources along with responses. 19 | 5. **Adding Citations to Your Results**: Include citations in your results for verifiability. 20 | 6. **Putting It All Together**: Build a comprehensive RAG application integrating all components. 21 | 22 | 23 | ## Getting Started 24 | 25 | All code examples mentioned in this tutorial can be found in the `03 - Implementing Streaming Capabilities` folder. To get started, clone this repository and navigate to the relevant folder: 26 | 27 | ```bash 28 | git clone https://github.com/ericvaillancourt/RAG-tutorial.git 29 | cd RAG-tutorial/03 - Implementing Streaming Capabilities 30 | ``` 31 | 32 | ## Environment Setup 33 | 34 | Ensure your development environment is prepared with the necessary dependencies. You can install the required packages using pip: 35 | 36 | ```bash 37 | pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4 python-dotenv sqlalchemy fastapi 38 | ``` 39 | 40 | You also need to set the `OPENAI_API_KEY` environment variable for the embeddings model. This can be done directly or loaded from a `.env` file. Create a `.env` file with the following content: 41 | 42 | ``` 43 | OPENAI_API_KEY=your-key-here 44 | ``` 45 | 46 | Load the environment variable in your code: 47 | 48 | ```python 49 | from dotenv import load_dotenv 50 | load_dotenv() 51 | ``` 52 | 53 | ## Code Explanation 54 | 55 | ### Importing Libraries and Modules 56 | 57 | We start by importing necessary libraries and modules, including `BeautifulSoup`, `langchain`, and `FastAPI`. 58 | 59 | ### Loading and Processing Documents 60 | 61 | We load and process documents from the web using `WebBaseLoader` and `RecursiveCharacterTextSplitter`, then create embeddings using `OpenAIEmbeddings`. 62 | 63 | ### Setting Up the Retriever and LLM 64 | 65 | We set up a retriever to fetch relevant documents and configure the language model (LLM) for generating responses. 66 | 67 | ### Implementing the Q&A Chain 68 | 69 | We implement the Q&A chain using `RunnableParallel` and `RunnablePassthrough` to handle the context and question processing. 70 | 71 | ### Streaming Data with FastAPI 72 | 73 | We set up a FastAPI application to stream data to the client using Server-Sent Events (SSE), enabling real-time updates. 74 | 75 | ### Creating the HTML Frontend 76 | 77 | We create an HTML frontend that connects to the FastAPI streaming endpoint and displays the streamed data in real-time. 78 | 79 | ## Usage 80 | 81 | Run the FastAPI server to start streaming data: 82 | 83 | ```bash 84 | python main.py 85 | ``` 86 | 87 | Access the frontend by navigating to `http://localhost:8000` and submit your questions to see real-time streamed responses. 88 | 89 | ## Conclusion 90 | 91 | In this tutorial, we explored how to implement streaming capabilities in a RAG-based application using FastAPI and SSE. This approach enhances user experience by providing real-time updates and maintaining source transparency. 92 | 93 | 94 | ## Support 95 | 96 | If you found this tutorial helpful, please consider supporting my work by buying me a coffee or two! 97 | 98 | [](https://www.buymeacoffee.com/evaillancourt) 99 | 100 | Thank you for following along, and I look forward to continuing this journey with you in the next parts of our series. 101 | 102 | --- 103 | 104 | Eric Vaillancourt 105 | 106 | GitHub: [ericvaillancourt](https://github.com/ericvaillancourt) -------------------------------------------------------------------------------- /03 - Implementing Streaming Capabilities/main.py: -------------------------------------------------------------------------------- 1 | import json 2 | from fastapi import FastAPI 3 | from fastapi.responses import StreamingResponse, FileResponse 4 | from fastapi.middleware.cors import CORSMiddleware 5 | from langchain_core.messages import AIMessageChunk 6 | #from logging import logging 7 | 8 | from dotenv import load_dotenv 9 | import bs4 10 | from langchain import hub 11 | from langchain_chroma import Chroma 12 | from langchain_community.document_loaders import WebBaseLoader 13 | from langchain_core.output_parsers import StrOutputParser 14 | from langchain_core.runnables import RunnableParallel, RunnablePassthrough 15 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings 16 | from langchain_text_splitters import RecursiveCharacterTextSplitter 17 | 18 | load_dotenv() 19 | 20 | # Load, chunk and index the contents of the blog. 21 | bs_strainer = bs4.SoupStrainer(class_=("post-content", "post-title", "post-header")) 22 | loader = WebBaseLoader( 23 | web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), 24 | bs_kwargs={"parse_only": bs_strainer}, 25 | ) 26 | docs = loader.load() 27 | 28 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 29 | splits = text_splitter.split_documents(docs) 30 | vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) 31 | 32 | # Retrieve and generate using the relevant snippets of the blog. 33 | retriever = vectorstore.as_retriever().with_config( 34 | tags=["retriever"] 35 | ) 36 | # We need to add streaming=True 37 | llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, streaming=True) 38 | 39 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 40 | 41 | contextualize_q_system_prompt = """Given a chat history and the latest user question \ 42 | which might reference context in the chat history, formulate a standalone question \ 43 | which can be understood without the chat history. Do NOT answer the question, \ 44 | just reformulate it if needed and otherwise return it as is.""" 45 | contextualize_q_prompt = ChatPromptTemplate.from_messages( 46 | [ 47 | ("system", contextualize_q_system_prompt), 48 | MessagesPlaceholder(variable_name="chat_history"), 49 | ("human", "{question}"), 50 | ] 51 | ) 52 | contextualize_q_chain = (contextualize_q_prompt | llm | StrOutputParser()).with_config( 53 | tags=["contextualize_q_chain"] 54 | ) 55 | 56 | qa_system_prompt = """You are an assistant for question-answering tasks. \ 57 | Use the following pieces of retrieved context to answer the question. \ 58 | If you don't know the answer, just say that you don't know. \ 59 | Use three sentences maximum and keep the answer concise.\ 60 | 61 | {context}""" 62 | qa_prompt = ChatPromptTemplate.from_messages( 63 | [ 64 | ("system", qa_system_prompt), 65 | MessagesPlaceholder(variable_name="chat_history"), 66 | ("human", "{question}"), 67 | ] 68 | ) 69 | 70 | def contextualized_question(input: dict): 71 | if input.get("chat_history"): 72 | return contextualize_q_chain 73 | else: 74 | return input["question"] 75 | 76 | def format_docs(docs): 77 | return "\n\n".join(doc.page_content for doc in docs) 78 | 79 | rag_chain = ( 80 | RunnablePassthrough.assign(context=contextualize_q_chain | retriever | format_docs) 81 | | qa_prompt 82 | | llm 83 | ).with_config( 84 | tags=["main_chain"] 85 | ) 86 | 87 | app = FastAPI() 88 | 89 | # Allow CORS for all origins (for testing purposes; restrict in production) 90 | app.add_middleware( 91 | CORSMiddleware, 92 | allow_origins=["*"], 93 | allow_credentials=True, 94 | allow_methods=["*"], 95 | allow_headers=["*"], 96 | ) 97 | 98 | @app.get("/") 99 | async def root(): 100 | return FileResponse("static/index.html") 101 | 102 | def serialize_aimessagechunk(chunk): 103 | """ 104 | Custom serializer for AIMessageChunk objects. 105 | Convert the AIMessageChunk object to a serializable format. 106 | """ 107 | if isinstance(chunk, AIMessageChunk): 108 | return chunk.content 109 | else: 110 | raise TypeError( 111 | f"Object of type {type(chunk).__name__} is not correctly formatted for serialization" 112 | ) 113 | 114 | async def generate_chat_events(message): 115 | try: 116 | async for event in rag_chain.astream_events(message, version="v1"): 117 | # Only get the answer 118 | sources_tags = ['seq:step:3', 'main_chain'] 119 | if all(value in event["tags"] for value in sources_tags) and event["event"] == "on_chat_model_stream": 120 | chunk_content = serialize_aimessagechunk(event["data"]["chunk"]) 121 | if len(chunk_content) != 0: 122 | data_dict = {"data": chunk_content} 123 | data_json = json.dumps(data_dict) 124 | yield f"data: {data_json}\n\n" 125 | 126 | # Get the reformulated question 127 | sources_tags = ['seq:step:2', 'main_chain', 'contextualize_q_chain'] 128 | if all(value in event["tags"] for value in sources_tags) and event["event"] == "on_chat_model_stream": 129 | chunk_content = serialize_aimessagechunk(event["data"]["chunk"]) 130 | if len(chunk_content) != 0: 131 | data_dict = {"reformulated": chunk_content} 132 | data_json = json.dumps(data_dict) 133 | yield f"data: {data_json}\n\n" 134 | 135 | # Get the context 136 | sources_tags = ['main_chain', 'retriever'] 137 | if all(value in event["tags"] for value in sources_tags) and event["event"] == "on_retriever_end": 138 | documents = event['data']['output']['documents'] 139 | # Create a new list to contain the formatted documents 140 | formatted_documents = [] 141 | # Iterate over each document in the original list 142 | for doc in documents: 143 | 144 | # Create a new dictionary for each document with the required format 145 | formatted_doc = { 146 | 'page_content': doc.page_content, 147 | 'metadata': { 148 | 'source': doc.metadata['source'], 149 | }, 150 | 'type': 'Document' 151 | } 152 | # Add the formatted document to the final list 153 | formatted_documents.append(formatted_doc) 154 | 155 | # Create the final dictionary with the key "context" 156 | final_output = {'context': formatted_documents} 157 | 158 | # Convert the dictionary to a JSON string 159 | data_json = json.dumps(final_output) 160 | yield f"data: {data_json}\n\n" 161 | if event["event"] == "on_chat_model_end": 162 | print("Chat model has completed one response.") 163 | 164 | except Exception as e: 165 | print('error'+ str(e)) 166 | 167 | @app.get("/chat_stream/{message}") 168 | async def chat_stream_events(message: str): 169 | return StreamingResponse(generate_chat_events({"question": message, "chat_history": []}), media_type="text/event-stream") 170 | 171 | if __name__ == "__main__": 172 | import uvicorn 173 | 174 | uvicorn.run(app, host="0.0.0.0", port=8000) 175 | -------------------------------------------------------------------------------- /03 - Implementing Streaming Capabilities/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 |