├── .env.example ├── .gitignore ├── .tool-versions ├── LICENSE ├── README.md ├── assets └── demo.gif ├── backend ├── Dockerfile ├── main.py └── requirements.txt ├── docker-compose.yml ├── frontend ├── Dockerfile ├── app.py └── requirements.txt ├── notebooks └── retrieval.ipynb ├── poetry.lock ├── pyproject.toml └── src ├── __init__.py └── utils.py /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="key" 2 | GOOGLE_API_KEY="key" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | # Mac stuff 163 | **.DS_Store 164 | 165 | # Sample data 166 | data/ 167 | **.pdf -------------------------------------------------------------------------------- /.tool-versions: -------------------------------------------------------------------------------- 1 | poetry 1.4.0 2 | python 3.11.2 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Ruan Pretorius 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rag-qa 2 | 3 | [![GitHub stars](https://img.shields.io/github/stars/ruankie/rag-qa)](https://github.com/ruankie/rag-qa/stargazers) 4 | [![GitHub last commit](https://img.shields.io/github/last-commit/ruankie/rag-qa)](https://github.com/ruankie/rag-qa/commits/main) 5 | 6 | ## Description 7 | 8 | RAG-QA is a free, containerised question-answer framework that allows you to ask questions to your documents in an intuitive way. 9 | 10 | This app uses a method called retrieval augmented generation (RAG) to retrieve information that is relevant to your question from your uploaded document. It then uses a large language model (LLM) to answer the question with the retrieved context. 11 | 12 | The current implementation uses the following components: 13 | 14 | - **LLM:** [Google Gemini Pro](https://ai.google.dev/) 15 | - **Embedding Model:** [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) 16 | - **Vector Database:** [Chroma DB](https://www.trychroma.com/) 17 | - **Frontend:** [Streamlit](https://streamlit.io/) 18 | - **Backend:** [FastAPI](https://fastapi.tiangolo.com/) 19 | 20 | ## Demo 21 | 22 | > This demo shows the app answering a question related to Alphabet Inc's Q3 financial result from 2023. Notice the app frontend is shown on the left; the logs are shown on the upper right; the PDF report is shown on the bottom left. 23 | 24 | ![demo](./assets/demo.gif) 25 | 26 | ## Usage 27 | 28 | > Note: The first time you run this, it might take a while to build all the images and download the embedding models. 29 | 30 | 1. You will need an API key from OpenAI or Google. You can create one for free here: 31 | - [Google](https://ai.google.dev/) - to use models like Gemini (Recommended since it's free) 32 | - [OpenIA](https://platform.openai.com/account/api-keys) - to use models like GPT4 33 | 2. Set up your API keys in a file called `.env` (see `.env.example` for an example) 34 | 35 | 3. Now set up the backend and frontend 36 | 37 | ```shell 38 | docker compose up 39 | ``` 40 | 41 | 4. Navigate to the frontend in your browser: [http://localhost:8501/](http://localhost:8501/) 42 | 5. Upload a PDF document that you would like to ask a question about 43 | 6. Ask a question in the chat input section and wait for a response 44 | 45 | ## Development 46 | 47 | 1. (Optional) [Download](https://asdf-vm.com/guide/getting-started.html#_2-download-asdf) and [install](https://asdf-vm.com/guide/getting-started.html#_3-install-asdf) [asdf](https://asdf-vm.com/) on your machine to manage the version of Python and Poetry used in this project. Once done, run `asdf install` to install the versions specified in `.tool-versions`. Alternatively, install them manually as described below: 48 | 2. [Install Poetry](https://python-poetry.org/docs/#installation) on your machine 49 | 3. [Install Python 3](https://www.python.org/downloads/) on your machine 50 | 4. Create a virtual environment for your project using the command `poetry install`. This will install all the basic dependencies specified in your `pyproject.toml` file. 51 | 5. Set up your API keys in a file called `.env` (see `.env.example` for an example) 52 | 6. When you run your backend and frontend containers locally, use `docker compose up --build` to ensure the latest changes are reflected in the containers. 53 | -------------------------------------------------------------------------------- /assets/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruankie/rag-qa/ee92b23da227cc0b61578c06eb33dc42a2075b16/assets/demo.gif -------------------------------------------------------------------------------- /backend/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use an official Python runtime as a parent image 2 | FROM python:3.11 3 | 4 | WORKDIR /app 5 | COPY . /app 6 | RUN pip install -r requirements.txt 7 | EXPOSE 8000 8 | 9 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] 10 | -------------------------------------------------------------------------------- /backend/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | RAG-QA backend logic. 3 | """ 4 | 5 | from fastapi import FastAPI 6 | from pydantic import BaseModel 7 | from src.utils import get_answer 8 | 9 | app = FastAPI() 10 | 11 | 12 | class Question(BaseModel): 13 | """Question object used for RAG QA.""" 14 | 15 | question: str 16 | pdf: str 17 | 18 | 19 | @app.get("/") 20 | def root(): 21 | """Check status of backend server.""" 22 | return {"status": "Server running."} 23 | 24 | 25 | @app.post("/ask") 26 | def ask_question(question: Question): 27 | """Run the QA RAG cycle on a document and return the answer.""" 28 | ans = get_answer(pdf_string=question.pdf, question=question.question) 29 | return ans 30 | -------------------------------------------------------------------------------- /backend/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi==0.109.0 2 | uvicorn==0.25.0 3 | langchain==0.1.0 4 | langchain-community==0.0.12 5 | chromadb==0.4.22 6 | sentence-transformers==2.2.2 7 | langchain-google-genai==0.0.6 8 | pypdf==3.17.4 -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | services: 3 | 4 | backend: 5 | build: ./backend 6 | ports: 7 | - "8000:8000" 8 | volumes: 9 | - ./backend/:/app 10 | - ./src/:/app/src 11 | networks: 12 | - my-ragnet 13 | env_file: 14 | - .env 15 | 16 | frontend: 17 | build: ./frontend 18 | ports: 19 | - "8501:8501" 20 | volumes: 21 | - ./frontend/:/app 22 | depends_on: 23 | - backend 24 | networks: 25 | - my-ragnet 26 | 27 | networks: 28 | my-ragnet: 29 | external: false -------------------------------------------------------------------------------- /frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | 3 | ENV PYTHONUNBUFFERED=1 4 | ENV PYTHONIOENCODING=UTF-8 5 | 6 | WORKDIR /app 7 | COPY . /app 8 | RUN pip install -r requirements.txt 9 | EXPOSE 8501 10 | 11 | CMD ["streamlit", "run", "app.py"] 12 | -------------------------------------------------------------------------------- /frontend/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | RAG-QA frontend logic. 3 | """ 4 | 5 | import base64 6 | import logging 7 | import requests 8 | import streamlit as st 9 | 10 | # Set up logger 11 | logging.basicConfig(level=logging.INFO) 12 | logger = logging.getLogger() 13 | 14 | # Set page title 15 | st.title("💬 PDF QA") 16 | 17 | # Create a file uploader in the sidebar 18 | st.sidebar.title("Upload a PDF file") 19 | uploaded_file = st.sidebar.file_uploader( 20 | label="Choose a file", type="pdf", accept_multiple_files=False 21 | ) 22 | 23 | # Create a chat interface 24 | message = st.chat_message("assistant") 25 | message.write("Ask a question to your page.") 26 | 27 | 28 | # Once file and question received 29 | prompt = st.chat_input("Ask something") 30 | if prompt and uploaded_file is not None: 31 | # Update user with messages 32 | message = st.chat_message("user") 33 | logger.info(f"User message received: {prompt}") 34 | message.write(prompt) 35 | message = st.chat_message("assistant") 36 | message.write("Thinking...") 37 | 38 | # Encode the PDF file as base64 string 39 | logger.info("Encoding PDF file as base64 string") 40 | encoded_pdf = base64.b64encode(uploaded_file.read()).decode("ascii") 41 | json_payload = {"question": prompt, "pdf": encoded_pdf} 42 | 43 | # Send request and display answer to user 44 | logger.info("Sending request to backend") 45 | response = requests.post("http://backend:8000/ask", json=json_payload, timeout=120) 46 | 47 | logger.info("Showing response from backend") 48 | message.write(response.json()) 49 | -------------------------------------------------------------------------------- /frontend/requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit==1.30.0 2 | requests==2.31.0 3 | -------------------------------------------------------------------------------- /notebooks/retrieval.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"markdown","id":"86fc5bb2-017f-434e-8cd6-53ab214a5604","metadata":{"id":"86fc5bb2-017f-434e-8cd6-53ab214a5604"},"source":["# Quickstart\n","\n","[![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/use_cases/question_answering/quickstart.ipynb)\n","\n","Adapted from: https://python.langchain.com/docs/use_cases/question_answering/quickstart"]},{"cell_type":"markdown","id":"51ef48de-70b6-4f43-8e0b-ab9b84c9c02a","metadata":{"id":"51ef48de-70b6-4f43-8e0b-ab9b84c9c02a"},"source":["We need to set environment variable `OPENAI_API_KEY`, which can be done directly or loaded from a `.env` file like so:"]},{"cell_type":"code","execution_count":1,"id":"143787ca-d8e6-4dc9-8281-4374f4d71720","metadata":{"id":"143787ca-d8e6-4dc9-8281-4374f4d71720"},"outputs":[{"data":{"text/plain":["True"]},"execution_count":1,"metadata":{},"output_type":"execute_result"}],"source":["import dotenv\n","dotenv.load_dotenv()"]},{"cell_type":"code","execution_count":2,"id":"af0dc499","metadata":{},"outputs":[],"source":["from src.utils import url_qa"]},{"cell_type":"code","execution_count":3,"id":"c949e560","metadata":{},"outputs":[],"source":["ans = url_qa(\n"," url=\"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n"," question=\"What is Task Decomposition?\"\n",")"]},{"cell_type":"code","execution_count":null,"id":"5f5bf2c4","metadata":{},"outputs":[],"source":[]},{"cell_type":"markdown","id":"842cf72d-abbc-468e-a2eb-022470347727","metadata":{"id":"842cf72d-abbc-468e-a2eb-022470347727"},"source":["## Detailed walkthrough\n","\n","Let's go through the above code step-by-step to really understand what's going on."]},{"cell_type":"markdown","id":"ba5daed6","metadata":{"id":"ba5daed6"},"source":["## 1. Indexing: Load\n","\n","We need to first load the blog post contents. We can use [DocumentLoaders](/docs/modules/data_connection/document_loaders/) for this, which are objects that load in data from a source and return a list of [Documents](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html). A `Document` is an object with some `page_content` (str) and `metadata` (dict).\n","\n","In this case we'll use the [WebBaseLoader](/docs/integrations/document_loaders/web_base), which uses `urllib` to load HTML form web URLs and `BeautifulSoup` to parse it to text. We can customize the HTML -> text parsing by passing in parameters to the `BeautifulSoup` parser via `bs_kwargs` (see [BeautifulSoup docs](https://beautiful-soup-4.readthedocs.io/en/latest/#beautifulsoup)). In this case only HTML tags with class \"post-content\", \"post-title\", or \"post-header\" are relevant, so we'll remove all others."]},{"cell_type":"code","execution_count":2,"id":"cf4d5c72","metadata":{"id":"cf4d5c72"},"outputs":[],"source":["import bs4\n","from langchain_community.document_loaders import WebBaseLoader\n","\n","# Only keep post title, headers, and content from the full HTML.\n","bs4_strainer = bs4.SoupStrainer(class_=(\"post-title\", \"post-header\", \"post-content\"))\n","loader = WebBaseLoader(\n"," web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n"," bs_kwargs={\"parse_only\": bs4_strainer},\n",")\n","docs = loader.load()"]},{"cell_type":"code","execution_count":3,"id":"207f87a3-effa-4457-b013-6d233bc7a088","metadata":{"id":"207f87a3-effa-4457-b013-6d233bc7a088","outputId":"15f3af0f-0289-4d1c-fa02-b5aa6fa0b1e7"},"outputs":[{"data":{"text/plain":["42824"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["len(docs[0].page_content)"]},{"cell_type":"code","execution_count":4,"id":"52469796-5ce4-4c12-bd2a-a903872dac33","metadata":{"id":"52469796-5ce4-4c12-bd2a-a903872dac33","outputId":"6e08cb28-a40c-42a4-e9a6-1bef9bb4f159"},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","\n"," LLM Powered Autonomous Agents\n"," \n","Date: June 23, 2023 | Estimated Reading Time: 31 min | Author: Lilian Weng\n","\n","\n","Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\n","Agent System Overview#\n","In\n"]}],"source":["print(docs[0].page_content[:500])"]},{"cell_type":"code","execution_count":null,"id":"29258be4","metadata":{},"outputs":[],"source":[]},{"cell_type":"code","execution_count":8,"id":"b32b7f0f","metadata":{},"outputs":[],"source":["from langchain_community.document_loaders import PyPDFLoader\n","from langchain.text_splitter import RecursiveCharacterTextSplitter\n","\n","loader = PyPDFLoader(\"https://arxiv.org/pdf/2103.15348.pdf\")\n","all_splits = loader.load_and_split(\n"," text_splitter=RecursiveCharacterTextSplitter(\n"," chunk_size=1000, chunk_overlap=200, add_start_index=True\n"," )\n",")"]},{"cell_type":"code","execution_count":9,"id":"8d744e6b","metadata":{},"outputs":[{"data":{"text/plain":["57"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["len(all_splits)"]},{"cell_type":"code","execution_count":null,"id":"635996d1","metadata":{},"outputs":[],"source":[]},{"cell_type":"markdown","id":"ee5c6556-56be-4067-adbc-98b5aa19ef6e","metadata":{"id":"ee5c6556-56be-4067-adbc-98b5aa19ef6e"},"source":["### Go deeper\n","`DocumentLoader`: Object that loads data from a source as list of `Documents`.\n","- [Docs](/docs/modules/data_connection/document_loaders/): Detailed documentation on how to use `DocumentLoaders`.\n","- [Integrations](/docs/integrations/document_loaders/): 160+ integrations to choose from.\n","- [Interface](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.base.BaseLoader.html): API reference  for the base interface."]},{"cell_type":"markdown","id":"fd2cc9a7","metadata":{"id":"fd2cc9a7"},"source":["## 2. Indexing: Split\n","\n","Our loaded document is over 42k characters long. This is too long to fit in the context window of many models. Even for those models that could fit the full post in their context window, models can struggle to find information in very long inputs.\n","\n","To handle this we'll split the `Document` into chunks for embedding and vector storage. This should help us retrieve only the most relevant bits of the blog post at run time.\n","\n","In this case we'll split our documents into chunks of 1000 characters with 200 characters of overlap between chunks. The overlap helps mitigate the possibility of separating a statement from important context related to it. We use the [RecursiveCharacterTextSplitter](/docs/modules/data_connection/document_transformers/recursive_text_splitter), which will recursively split the document using common separators like new lines until each chunk is the appropriate size. This is the recommended text splitter for generic text use cases.\n","\n","We set `add_start_index=True` so that the character index at which each split Document starts within the initial Document is preserved as metadata attribute \"start_index\"."]},{"cell_type":"code","execution_count":5,"id":"4b11c01d","metadata":{"id":"4b11c01d"},"outputs":[],"source":["from langchain.text_splitter import RecursiveCharacterTextSplitter\n","\n","text_splitter = RecursiveCharacterTextSplitter(\n"," chunk_size=1000, chunk_overlap=200, add_start_index=True\n",")\n","all_splits = text_splitter.split_documents(docs)"]},{"cell_type":"code","execution_count":6,"id":"3741eb67-9caf-40f2-a001-62f49349bff5","metadata":{"id":"3741eb67-9caf-40f2-a001-62f49349bff5","outputId":"21939afc-3a10-43aa-d415-569b23fe31e3"},"outputs":[{"data":{"text/plain":["66"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["len(all_splits)"]},{"cell_type":"code","execution_count":7,"id":"f868d0e5-5670-4d54-b562-f50265e907f4","metadata":{"id":"f868d0e5-5670-4d54-b562-f50265e907f4","outputId":"805211a6-ace2-450c-b0d0-0218756484c0"},"outputs":[{"data":{"text/plain":["969"]},"execution_count":7,"metadata":{},"output_type":"execute_result"}],"source":["len(all_splits[0].page_content)"]},{"cell_type":"code","execution_count":8,"id":"5c9e5f27-c8e3-4ca7-8a8e-45c5de2901cc","metadata":{"id":"5c9e5f27-c8e3-4ca7-8a8e-45c5de2901cc","outputId":"349a2950-ab87-49a7-dd6d-e7bf4a29e9f5"},"outputs":[{"data":{"text/plain":["{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',\n"," 'start_index': 7056}"]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["all_splits[10].metadata"]},{"cell_type":"markdown","id":"0a33bd4d","metadata":{"id":"0a33bd4d"},"source":["### Go deeper\n","\n","`TextSplitter`: Object that splits a list of `Document`s into smaller chunks. Subclass of `DocumentTransformer`s.\n","- Explore `Context-aware splitters`, which keep the location (\"context\") of each split in the original `Document`:\n"," - [Markdown files](/docs/modules/data_connection/document_transformers/markdown_header_metadata)\n"," - [Code (py or js)](/docs/integrations/document_loaders/source_code)\n"," - [Scientific papers](/docs/integrations/document_loaders/grobid)\n","- [Interface](https://api.python.langchain.com/en/latest/text_splitter/langchain.text_splitter.TextSplitter.html): API reference for the base interface.\n","\n","`DocumentTransformer`: Object that performs a transformation on a list of `Document`s.\n","- [Docs](/docs/modules/data_connection/document_transformers/): Detailed documentation on how to use `DocumentTransformers`\n","- [Integrations](/docs/integrations/document_transformers/)\n","- [Interface](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.transformers.BaseDocumentTransformer.html): API reference for the base interface.\n"]},{"cell_type":"markdown","id":"46547031-2352-4321-9970-d6ea27285c2e","metadata":{"id":"46547031-2352-4321-9970-d6ea27285c2e"},"source":["## 3. Indexing: Store\n","\n","Now we need to index our 66 text chunks so that we can search over them at runtime. The most common way to do this is to embed the contents of each document split and insert these embeddings into a vector database (or vector store). When we want to search over our splits, we take a text search query, embed it, and perform some sort of \"similarity\" search to identify the stored splits with the most similar embeddings to our query embedding. The simplest similarity measure is cosine similarity — we measure the cosine of the angle between each pair of embeddings (which are high dimensional vectors).\n","\n","We can embed and store all of our document splits in a single command using the [Chroma](/docs/integrations/vectorstores/chroma) vector store and [OpenAIEmbeddings](/docs/integrations/text_embedding/openai) model."]},{"cell_type":"code","execution_count":10,"id":"e9c302c8","metadata":{"id":"e9c302c8"},"outputs":[{"name":"stderr","output_type":"stream","text":["/Users/ruan/Library/Caches/pypoetry/virtualenvs/rag-qa-jxFCGv5h-py3.11/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n"," from .autonotebook import tqdm as notebook_tqdm\n"]}],"source":["from langchain_community.vectorstores import Chroma\n","# from langchain_openai import OpenAIEmbeddings\n","# from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings\n","from langchain_community.embeddings import HuggingFaceEmbeddings\n","\n","embedding_function = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n","vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_function)"]},{"cell_type":"markdown","id":"dc6f22b0","metadata":{"id":"dc6f22b0"},"source":["### Go deeper\n","`Embeddings`: Wrapper around a text embedding model, used for converting text to embeddings.\n","- [Docs](/docs/modules/data_connection/text_embedding): Detailed documentation on how to use embeddings.\n","- [Integrations](/docs/integrations/text_embedding/): 30+ integrations to choose from.\n","- [Interface](https://api.python.langchain.com/en/latest/embeddings/langchain_core.embeddings.Embeddings.html): API reference for the base interface.\n","\n","`VectorStore`: Wrapper around a vector database, used for storing and querying embeddings.\n","- [Docs](/docs/modules/data_connection/vectorstores/): Detailed documentation on how to use vector stores.\n","- [Integrations](/docs/integrations/vectorstores/): 40+ integrations to choose from.\n","- [Interface](https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStore.html): API reference for the base interface.\n","\n","This completes the **Indexing** portion of the pipeline. At this point we have a query-able vector store containing the chunked contents of our blog post. Given a user question, we should ideally be able to return the snippets of the blog post that answer the question."]},{"cell_type":"markdown","id":"70d64d40-e475-43d9-b64c-925922bb5ef7","metadata":{"id":"70d64d40-e475-43d9-b64c-925922bb5ef7"},"source":["## 4. Retrieval and Generation: Retrieve\n","\n","Now let's write the actual application logic. We want to create a simple application that takes a user question, searches for documents relevant to that question, passes the retrieved documents and initial question to a model, and returns an answer.\n","\n","First we need to define our logic for searching over documents. LangChain defines a [Retriever](/docs/modules/data_connection/retrievers/) interface which wraps an index that can return relevant `Documents` given a string query.\n","\n","The most common type of `Retriever` is the [VectorStoreRetriever](/docs/modules/data_connection/retrievers/vectorstore), which uses the similarity search capabilities of a vector store to facillitate retrieval. Any `VectorStore` can easily be turned into a `Retriever` with `VectorStore.as_retriever()`:"]},{"cell_type":"code","execution_count":11,"id":"4414df0d-5d43-46d0-85a9-5f47be0dd099","metadata":{"id":"4414df0d-5d43-46d0-85a9-5f47be0dd099"},"outputs":[],"source":["retriever = vectorstore.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": 6})"]},{"cell_type":"code","execution_count":15,"id":"e2c26b7d","metadata":{"id":"e2c26b7d"},"outputs":[],"source":["# retrieved_docs = retriever.invoke(\"What are the approaches to Task Decomposition?\")\n","retrieved_docs = retriever.invoke(\"What does the abstract at the beginning of the document say?\")"]},{"cell_type":"code","execution_count":16,"id":"8684291d-0f5e-453a-8d3e-ff9feea765d0","metadata":{"id":"8684291d-0f5e-453a-8d3e-ff9feea765d0","outputId":"2f3cd364-94c5-4ee1-c928-08d27690d3b4"},"outputs":[{"data":{"text/plain":["6"]},"execution_count":16,"metadata":{},"output_type":"execute_result"}],"source":["len(retrieved_docs)"]},{"cell_type":"code","execution_count":17,"id":"9a5dc074-816d-409a-b005-ab4eddfd76af","metadata":{"id":"9a5dc074-816d-409a-b005-ab4eddfd76af","outputId":"0c60575a-5dce-4dc6-94d8-11312b04ee13"},"outputs":[{"name":"stdout","output_type":"stream","text":["nition accuracy.\n","As shown in Figure 4 (a), the\n","document contains columns of text\n","written vertically15, a common style\n","in Japanese. Due to scanning noise\n","and archaic printing technology, the\n","columns can be skewed or have vari-\n","able widths, and hence cannot be eas-\n","ily identified via rule-based methods.\n","Within each column, words are sepa-\n","rated by white spaces of variable size,\n","and the vertical positions of objects\n","can be an indicator of their layout\n","type.\n","15A document page consists of eight rows like this. For simplicity we skip the row\n","segmentation discussion and refer readers to the source code when available.\n"]}],"source":["print(retrieved_docs[0].page_content)"]},{"cell_type":"markdown","id":"5d5a113b","metadata":{"id":"5d5a113b"},"source":["### Go deeper\n","Vector stores are commonly used for retrieval, but there are other ways to do retrieval, too.\n","\n","`Retriever`: An object that returns `Document`s given a text query\n","- [Docs](/docs/modules/data_connection/retrievers/): Further documentation on the interface and built-in retrieval techniques. Some of which include:\n"," - `MultiQueryRetriever` [generates variants of the input question](/docs/modules/data_connection/retrievers/MultiQueryRetriever) to improve retrieval hit rate.\n"," - `MultiVectorRetriever` (diagram below) instead generates [variants of the embeddings](/docs/modules/data_connection/retrievers/multi_vector), also in order to improve retrieval hit rate.\n"," - `Max marginal relevance` selects for [relevance and diversity](https://www.cs.cmu.edu/~jgc/publication/The_Use_MMR_Diversity_Based_LTMIR_1998.pdf) among the retrieved documents to avoid passing in duplicate context.\n"," - Documents can be filtered during vector store retrieval using [`metadata` filters](/docs/use_cases/question_answering/document-context-aware-QA).\n","- [Integrations](/docs/integrations/retrievers/): Integrations with retrieval services.\n","- [Interface](https://api.python.langchain.com/en/latest/retrievers/langchain_core.retrievers.BaseRetriever.html): API reference for the base interface."]},{"cell_type":"markdown","id":"415d6824","metadata":{"id":"415d6824"},"source":["## 5. Retrieval and Generation: Generate\n","\n","Let's put it all together into a chain that takes a question, retrieves relevant documents, constructs a prompt, passes that to a model, and parses the output.\n","\n","We'll use the gpt-3.5-turbo OpenAI chat model, but any LangChain `LLM` or `ChatModel` could be substituted in."]},{"cell_type":"code","execution_count":18,"id":"d34d998c-9abf-4e01-a4ad-06dadfcf131c","metadata":{"id":"d34d998c-9abf-4e01-a4ad-06dadfcf131c"},"outputs":[],"source":["# from langchain_openai import ChatOpenAI\n","# llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n","\n","from langchain_google_genai import ChatGoogleGenerativeAI\n","\n","llm = ChatGoogleGenerativeAI(model=\"gemini-pro\", temperature=0)"]},{"cell_type":"markdown","id":"bc826723-36fc-45d1-a3ef-df8c2c8471a8","metadata":{"id":"bc826723-36fc-45d1-a3ef-df8c2c8471a8"},"source":["We'll use a prompt for RAG that is checked into the LangChain prompt hub ([here](https://smith.langchain.com/hub/rlm/rag-prompt))."]},{"cell_type":"code","execution_count":19,"id":"bede955b-9aeb-4fd3-964d-8e43f214ce70","metadata":{"id":"bede955b-9aeb-4fd3-964d-8e43f214ce70"},"outputs":[],"source":["# from langchain import hub\n","# prompt = hub.pull(\"rlm/rag-prompt\")\n","\n","from langchain_core.prompts import PromptTemplate\n","\n","template = \"\"\"Use the following pieces of context to answer the question at the end.\n","If you don't know the answer, just say that you don't know, don't try to make up an answer.\n","Keep the answer as concise as possible.\n","\n","{context}\n","\n","Question: {question}\n","\n","Helpful Answer:\"\"\"\n","prompt = PromptTemplate.from_template(template)"]},{"cell_type":"code","execution_count":20,"id":"11c35354-f275-47ec-9f72-ebd5c23731eb","metadata":{"id":"11c35354-f275-47ec-9f72-ebd5c23731eb","outputId":"fae2bead-cc5c-4f56-b335-9cd661fe9420"},"outputs":[{"data":{"text/plain":["[HumanMessage(content=\"Use the following pieces of context to answer the question at the end.\\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\nKeep the answer as concise as possible.\\n\\nfiller context\\n\\nQuestion: filler question\\n\\nHelpful Answer:\")]"]},"execution_count":20,"metadata":{},"output_type":"execute_result"}],"source":["example_messages = prompt.invoke(\n"," {\"context\": \"filler context\", \"question\": \"filler question\"}\n",").to_messages()\n","example_messages"]},{"cell_type":"code","execution_count":21,"id":"2ccc50fa-5fa2-4f80-8685-58ec2255523a","metadata":{"id":"2ccc50fa-5fa2-4f80-8685-58ec2255523a","outputId":"7c0cf250-44e2-48ce-c85b-67f7a67438ec"},"outputs":[{"name":"stdout","output_type":"stream","text":["Use the following pieces of context to answer the question at the end.\n","If you don't know the answer, just say that you don't know, don't try to make up an answer.\n","Keep the answer as concise as possible.\n","\n","filler context\n","\n","Question: filler question\n","\n","Helpful Answer:\n"]}],"source":["print(example_messages[0].content)"]},{"cell_type":"markdown","id":"51f9a210-1eee-4054-99d7-9d9ddf7e3593","metadata":{"id":"51f9a210-1eee-4054-99d7-9d9ddf7e3593"},"source":["We'll use the [LCEL Runnable](/docs/expression_language/) protocol to define the chain, allowing us to\n","- pipe together components and functions in a transparent way\n","- automatically trace our chain in LangSmith\n","- get streaming, async, and batched calling out of the box"]},{"cell_type":"code","execution_count":22,"id":"99fa1aec","metadata":{"id":"99fa1aec"},"outputs":[],"source":["from langchain_core.output_parsers import StrOutputParser\n","from langchain_core.runnables import RunnablePassthrough\n","\n","\n","def format_docs(docs):\n"," return \"\\n\\n\".join(doc.page_content for doc in docs)\n","\n","\n","rag_chain = (\n"," {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n"," | prompt\n"," | llm\n"," | StrOutputParser()\n",")"]},{"cell_type":"code","execution_count":23,"id":"8655a152-d7cf-466f-b1bc-fbff9ae2b889","metadata":{"id":"8655a152-d7cf-466f-b1bc-fbff9ae2b889","outputId":"2810e04c-4959-4ce3-eb71-caf987daaf13"},"outputs":[{"name":"stdout","output_type":"stream","text":["LayoutParser provides a comprehensive toolkit for deep learning-based document image analysis. It is easy to install and use, supports high-level customization, and enables easy labeling and training of DL models on unique document image datasets. The LayoutParser community platform facilitates sharing DL models and DIA pipelines, inviting discussion and promoting code reproducibility and reusability. The LayoutParser team is committed to keeping the library updated continuously and bringing the most recent advances in DL-based DIA to a diverse audience of end-users."]}],"source":["for chunk in rag_chain.stream(\"What is the paper conclusion?\"):\n"," print(chunk, end=\"\", flush=True)"]},{"cell_type":"markdown","id":"2c000e5f-2b7f-4eb9-8876-9f4b186b4a08","metadata":{"id":"2c000e5f-2b7f-4eb9-8876-9f4b186b4a08"},"source":[":::tip\n","\n","Check out the [LangSmith trace](https://smith.langchain.com/public/1799e8db-8a6d-4eb2-84d5-46e8d7d5a99b/r)\n","\n",":::"]},{"cell_type":"markdown","id":"f7d52c84","metadata":{"id":"f7d52c84"},"source":["### Go deeper\n","\n","#### Choosing a model\n","`ChatModel`: An LLM-backed chat model. Takes in a sequence of messages and returns a message.\n","- [Docs](/docs/modules/model_io/chat/): Detailed documentation on\n","- [Integrations](/docs/integrations/chat/): 25+ integrations to choose from.\n","- [Interface](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.chat_models.BaseChatModel.html): API reference for the base interface.\n","\n","`LLM`: A text-in-text-out LLM. Takes in a string and returns a string.\n","- [Docs](/docs/modules/model_io/llms)\n","- [Integrations](/docs/integrations/llms): 75+ integrations to choose from.\n","- [Interface](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.llms.BaseLLM.html): API reference for the base interface.\n","\n","See a guide on RAG with locally-running models [here](/docs/use_cases/question_answering/local_retrieval_qa)."]},{"cell_type":"markdown","id":"fa82f437","metadata":{"id":"fa82f437"},"source":["#### Customizing the prompt\n","\n","As shown above, we can load prompts (e.g., [this RAG prompt](https://smith.langchain.com/hub/rlm/rag-prompt)) from the prompt hub. The prompt can also be easily customized:"]},{"cell_type":"code","execution_count":21,"id":"e4fee704","metadata":{"id":"e4fee704","outputId":"9ce63bf2-bfce-4711-892f-4fdb3cdd322d"},"outputs":[{"data":{"text/plain":["'Task decomposition is the process of breaking down a complex task into smaller, more manageable subtasks. This can be done using a variety of methods, such as chain of thought (CoT), tree of thoughts (ToT), or simply by using task-specific instructions. Thanks for asking!'"]},"execution_count":21,"metadata":{},"output_type":"execute_result"}],"source":["from langchain_core.prompts import PromptTemplate\n","\n","template = \"\"\"Use the following pieces of context to answer the question at the end.\n","If you don't know the answer, just say that you don't know, don't try to make up an answer.\n","Keep the answer as concise as possible.\n","\n","{context}\n","\n","Question: {question}\n","\n","Helpful Answer:\"\"\"\n","custom_rag_prompt = PromptTemplate.from_template(template)\n","\n","rag_chain = (\n"," {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n"," | custom_rag_prompt\n"," | llm\n"," | StrOutputParser()\n",")\n","\n","rag_chain.invoke(\"What is Task Decomposition?\")"]},{"cell_type":"markdown","id":"94b952e6-dc4b-415b-9cf3-1ad333e48366","metadata":{"id":"94b952e6-dc4b-415b-9cf3-1ad333e48366"},"source":[":::tip\n","\n","Check out the [LangSmith trace](https://smith.langchain.com/public/da23c4d8-3b33-47fd-84df-a3a582eedf84/r)\n","\n",":::"]},{"cell_type":"markdown","id":"580e18de-132d-4009-ba67-4aaf2c7717a2","metadata":{"id":"580e18de-132d-4009-ba67-4aaf2c7717a2"},"source":["## Next steps\n","\n","That's a lot of content we've covered in a short amount of time. There's plenty of features, integrations, and extensions to explore in each of the above sections. Along from the **Go deeper** sources mentioned above, good next steps include:\n","\n","- [Return sources](/docs/use_cases/question_answering/sources): Learn how to return source documents\n","- [Streaming](/docs/use_cases/question_answering/streaming): Learn how to stream outputs and intermediate steps\n","- [Add chat history](/docs/use_cases/question_answering/chat_history): Learn how to add chat history to your app"]}],"metadata":{"colab":{"provenance":[{"file_id":"https://github.com/langchain-ai/langchain/blob/master/docs/docs/use_cases/question_answering/quickstart.ipynb","timestamp":1705256074169}]},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.2"}},"nbformat":4,"nbformat_minor":5} 2 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "rag-qa" 3 | version = "0.1.0" 4 | description = "A containerised QA framework to ask questions to your documents." 5 | authors = ["Ruan Pretorius"] 6 | license = "MIT" 7 | readme = "README.md" 8 | packages = [ 9 | { include = "src" }, 10 | ] 11 | 12 | [tool.poetry.dependencies] 13 | python = "^3.11" 14 | python-dotenv = "^1.0.0" 15 | jupyterlab = "^4.0.10" 16 | fastapi = "^0.109.0" 17 | uvicorn = "^0.25.0" 18 | langchain = "^0.1.0" 19 | streamlit = "^1.30.0" 20 | requests = "^2.31.0" 21 | langchain-community = "^0.0.12" 22 | chromadb = "^0.4.22" 23 | bs4 = "^0.0.1" 24 | langchainhub = "^0.1.14" 25 | langchain-openai = "^0.0.2.post1" 26 | sentence-transformers = "^2.2.2" 27 | langchain-google-genai = "^0.0.6" 28 | pypdf = "^3.17.4" 29 | python-multipart = "^0.0.6" 30 | 31 | [tool.poetry.group.dev.dependencies] 32 | pytest = "^7.3.1" 33 | black = "^23.3.0" 34 | pylint = "^2.17.4" 35 | 36 | [build-system] 37 | requires = ["poetry-core"] 38 | build-backend = "poetry.core.masonry.api" 39 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruankie/rag-qa/ee92b23da227cc0b61578c06eb33dc42a2075b16/src/__init__.py -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions used in the RAG app. 3 | """ 4 | 5 | import base64 6 | import logging 7 | from typing import List 8 | 9 | from langchain_core.documents import Document 10 | from langchain_core.vectorstores import VectorStoreRetriever 11 | from langchain_core.language_models.chat_models import BaseChatModel 12 | from langchain_core.runnables.base import Runnable 13 | from langchain_core.output_parsers import StrOutputParser 14 | from langchain_core.runnables import RunnablePassthrough 15 | from langchain_core.prompts import PromptTemplate 16 | from langchain.text_splitter import RecursiveCharacterTextSplitter 17 | from langchain_community.vectorstores import Chroma 18 | from langchain_community.embeddings import HuggingFaceEmbeddings 19 | from langchain_community.document_loaders import PyPDFLoader 20 | from langchain_google_genai import ChatGoogleGenerativeAI 21 | 22 | # Set up logger 23 | logging.basicConfig(level=logging.INFO) 24 | logger = logging.getLogger() 25 | 26 | TEMPLATE = """Use the following pieces of context to answer the question at the end. 27 | If you don't know the answer, just say that you don't know, don't try to make up an answer. 28 | Be helpful in your answer and be sure to reference the following context when possible. 29 | 30 | {context} 31 | 32 | Question: {question} 33 | 34 | Answer:""" 35 | 36 | prompt = PromptTemplate.from_template(TEMPLATE) 37 | llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0) 38 | 39 | 40 | def _save_doc_locally(pdf_string: str, path: str = "./document.pdf") -> str: 41 | """ 42 | Decodes the base64 string representation of the pdf document 43 | and saves it to a local path. 44 | 45 | Args: 46 | pdf_string (str): Base64 string representation of the pdf document. 47 | path (str): Local path to save pdf document to. 48 | 49 | Returns: 50 | str: Local path where pdf document was saved to. 51 | """ 52 | logger.info("Saving pdf document locally") 53 | # Decode the base64 string to bytes 54 | decoded_bytes = base64.b64decode(pdf_string) 55 | if decoded_bytes[0:4] != b"%PDF": 56 | raise TypeError("Invalid PDF file received.") 57 | 58 | # Save pdf file 59 | with open(path, "wb") as _f: 60 | _f.write(decoded_bytes) 61 | 62 | return path 63 | 64 | 65 | def _load_and_split_doc(pdf_path: str) -> List[Document]: 66 | """ 67 | Loads the content of the pdf document and separates it 68 | into chunks for embedding. 69 | 70 | Args: 71 | pdf_path (str): Local path to pdf document. 72 | 73 | Returns: 74 | List[Document]: Document chunks (splits). 75 | """ 76 | logger.info("Splitting pdf document into chunks") 77 | loader = PyPDFLoader(pdf_path) 78 | splits = loader.load_and_split( 79 | text_splitter=RecursiveCharacterTextSplitter( 80 | chunk_size=500, chunk_overlap=50, add_start_index=True 81 | ) 82 | ) 83 | return splits 84 | 85 | 86 | def _get_embedding_retriever(splits: List[Document]) -> VectorStoreRetriever: 87 | """ 88 | Embeds all the document chunks/splits and returns the 89 | appropriate similarity retriever for the vector database used. 90 | 91 | Args: 92 | splits (List[Document]): Document chunks/splits. 93 | 94 | Returns: 95 | VectorStoreRetriever: Vector store retriever. 96 | """ 97 | logger.info("Embedding document chunks") 98 | embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") 99 | vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_function) 100 | retriever = vectorstore.as_retriever( 101 | search_type="similarity", search_kwargs={"k": 5} 102 | ) 103 | return retriever 104 | 105 | 106 | def _format_docs(docs: List[str]) -> str: 107 | """ 108 | Join all documents into single string, separated 109 | by double newline characters. 110 | """ 111 | return "\n\n".join(doc.page_content for doc in docs) 112 | 113 | 114 | def _get_chain( 115 | retriever: VectorStoreRetriever, prompt: PromptTemplate, llm: BaseChatModel 116 | ) -> Runnable: 117 | """ 118 | Creates a RAG chain using LCEL (LangChain Expression Language). 119 | 120 | Args: 121 | retriever (VectorStoreRetriever): A vector store retriever. 122 | prompt (PromptTemplate): The prompt template that includes the content 123 | retrieved from the vector store and the question. 124 | llm (BaseChatModel): The LLM that will be used to answer the question. 125 | 126 | Returns: 127 | Runnable: A Runnable object that represents the RAG chain. 128 | """ 129 | logger.info("Getting RAG chain") 130 | rag_chain = ( 131 | {"context": retriever | _format_docs, "question": RunnablePassthrough()} 132 | | prompt 133 | | llm 134 | | StrOutputParser() 135 | ) 136 | return rag_chain 137 | 138 | 139 | def get_answer(pdf_string: str, question: str) -> str: 140 | """ 141 | Run the RAG cycle and return the answer to the question. 142 | 143 | Args: 144 | pdf_string (str): The base64 encoded string representation of the PDF document. 145 | question (str): The question you want to ask about the PDF document. 146 | 147 | Returns: 148 | str: The answer to the given question. 149 | """ 150 | logger.info("Initiating RAG QA cycle") 151 | pdf_path = _save_doc_locally(pdf_string, path="./document.pdf") 152 | splits = _load_and_split_doc(pdf_path) 153 | retriever = _get_embedding_retriever(splits) 154 | rag_chain = _get_chain(retriever, prompt, llm) 155 | 156 | logger.info("Invoking RAG chain") 157 | response = rag_chain.invoke(question) 158 | 159 | logger.info("Replying with chain response") 160 | return response 161 | --------------------------------------------------------------------------------