├── .env.example
├── .gitignore
├── .tool-versions
├── LICENSE
├── README.md
├── assets
    └── demo.gif
├── backend
    ├── Dockerfile
    ├── main.py
    └── requirements.txt
├── docker-compose.yml
├── frontend
    ├── Dockerfile
    ├── app.py
    └── requirements.txt
├── notebooks
    └── retrieval.ipynb
├── poetry.lock
├── pyproject.toml
└── src
    ├── __init__.py
    └── utils.py


/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY="key"
2 | GOOGLE_API_KEY="key"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | # Mac stuff
163 | **.DS_Store
164 | 
165 | # Sample data
166 | data/
167 | **.pdf


--------------------------------------------------------------------------------
/.tool-versions:
--------------------------------------------------------------------------------
1 | poetry 1.4.0
2 | python 3.11.2


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Ruan Pretorius
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # rag-qa
 2 | 
 3 | [![GitHub stars](https://img.shields.io/github/stars/ruankie/rag-qa)](https://github.com/ruankie/rag-qa/stargazers)
 4 | [![GitHub last commit](https://img.shields.io/github/last-commit/ruankie/rag-qa)](https://github.com/ruankie/rag-qa/commits/main)
 5 | 
 6 | ## Description
 7 | 
 8 | RAG-QA is a free, containerised question-answer framework that allows you to ask questions to your documents in an intuitive way.
 9 | 
10 | This app uses a method called retrieval augmented generation (RAG) to retrieve information that is relevant to your question from your uploaded document. It then uses a large language model (LLM) to answer the question with the retrieved context.
11 | 
12 | The current implementation uses the following components:
13 | 
14 | - **LLM:** [Google Gemini Pro](https://ai.google.dev/)
15 | - **Embedding Model:** [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
16 | - **Vector Database:** [Chroma DB](https://www.trychroma.com/)
17 | - **Frontend:** [Streamlit](https://streamlit.io/)
18 | - **Backend:** [FastAPI](https://fastapi.tiangolo.com/)
19 | 
20 | ## Demo
21 | 
22 | > This demo shows the app answering a question related to Alphabet Inc's Q3 financial result from 2023. Notice the app frontend is shown on the left; the logs are shown on the upper right; the PDF report is shown on the bottom left.
23 | 
24 | ![demo](./assets/demo.gif)
25 | 
26 | ## Usage
27 | 
28 | > Note: The first time you run this, it might take a while to build all the images and download the embedding models.
29 | 
30 | 1. You will need an API key from OpenAI or Google. You can create one for free here:
31 |     - [Google](https://ai.google.dev/) - to use models like Gemini (Recommended since it's free)
32 |     - [OpenIA](https://platform.openai.com/account/api-keys) - to use models like GPT4
33 | 2. Set up your API keys in a file called `.env` (see `.env.example` for an example)
34 | 
35 | 3. Now set up the backend and frontend
36 | 
37 |     ```shell
38 |     docker compose up
39 |     ```
40 | 
41 | 4. Navigate to the frontend in your browser: [http://localhost:8501/](http://localhost:8501/)
42 | 5. Upload a PDF document that you would like to ask a question about
43 | 6. Ask a question in the chat input section and wait for a response
44 | 
45 | ## Development
46 | 
47 | 1. (Optional) [Download](https://asdf-vm.com/guide/getting-started.html#_2-download-asdf) and [install](https://asdf-vm.com/guide/getting-started.html#_3-install-asdf) [asdf](https://asdf-vm.com/) on your machine to manage the version of Python and Poetry used in this project. Once done, run `asdf install` to install the versions specified in `.tool-versions`. Alternatively, install them manually as described below:
48 | 2. [Install Poetry](https://python-poetry.org/docs/#installation) on your machine
49 | 3. [Install Python 3](https://www.python.org/downloads/) on your machine
50 | 4. Create a virtual environment for your project using the command `poetry install`. This will install all the basic dependencies specified in your `pyproject.toml` file.
51 | 5. Set up your API keys in a file called `.env` (see `.env.example` for an example)
52 | 6. When you run your backend and frontend containers locally, use `docker compose up --build` to ensure the latest changes are reflected in the containers.
53 | 


--------------------------------------------------------------------------------
/assets/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ruankie/rag-qa/ee92b23da227cc0b61578c06eb33dc42a2075b16/assets/demo.gif


--------------------------------------------------------------------------------
/backend/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use an official Python runtime as a parent image
 2 | FROM python:3.11
 3 | 
 4 | WORKDIR /app
 5 | COPY . /app
 6 | RUN pip install -r requirements.txt
 7 | EXPOSE 8000
 8 | 
 9 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
10 | 


--------------------------------------------------------------------------------
/backend/main.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RAG-QA backend logic.
 3 | """
 4 | 
 5 | from fastapi import FastAPI
 6 | from pydantic import BaseModel
 7 | from src.utils import get_answer
 8 | 
 9 | app = FastAPI()
10 | 
11 | 
12 | class Question(BaseModel):
13 |     """Question object used for RAG QA."""
14 | 
15 |     question: str
16 |     pdf: str
17 | 
18 | 
19 | @app.get("/")
20 | def root():
21 |     """Check status of backend server."""
22 |     return {"status": "Server running."}
23 | 
24 | 
25 | @app.post("/ask")
26 | def ask_question(question: Question):
27 |     """Run the QA RAG cycle on a document and return the answer."""
28 |     ans = get_answer(pdf_string=question.pdf, question=question.question)
29 |     return ans
30 | 


--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi==0.109.0
2 | uvicorn==0.25.0
3 | langchain==0.1.0
4 | langchain-community==0.0.12
5 | chromadb==0.4.22
6 | sentence-transformers==2.2.2
7 | langchain-google-genai==0.0.6
8 | pypdf==3.17.4


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 | 
 4 |   backend:
 5 |     build: ./backend
 6 |     ports:
 7 |       - "8000:8000"
 8 |     volumes:
 9 |       - ./backend/:/app
10 |       - ./src/:/app/src
11 |     networks:
12 |       - my-ragnet
13 |     env_file:
14 |       - .env
15 | 
16 |   frontend:
17 |     build: ./frontend
18 |     ports:
19 |       - "8501:8501"
20 |     volumes:
21 |       - ./frontend/:/app
22 |     depends_on:
23 |       - backend
24 |     networks:
25 |       - my-ragnet
26 | 
27 | networks:
28 |   my-ragnet:
29 |     external: false


--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11
 2 | 
 3 | ENV PYTHONUNBUFFERED=1
 4 | ENV PYTHONIOENCODING=UTF-8
 5 | 
 6 | WORKDIR /app
 7 | COPY . /app
 8 | RUN pip install -r requirements.txt
 9 | EXPOSE 8501
10 | 
11 | CMD ["streamlit", "run", "app.py"]
12 | 


--------------------------------------------------------------------------------
/frontend/app.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RAG-QA frontend logic.
 3 | """
 4 | 
 5 | import base64
 6 | import logging
 7 | import requests
 8 | import streamlit as st
 9 | 
10 | # Set up logger
11 | logging.basicConfig(level=logging.INFO)
12 | logger = logging.getLogger()
13 | 
14 | # Set page title
15 | st.title("💬 PDF QA")
16 | 
17 | # Create a file uploader in the sidebar
18 | st.sidebar.title("Upload a PDF file")
19 | uploaded_file = st.sidebar.file_uploader(
20 |     label="Choose a file", type="pdf", accept_multiple_files=False
21 | )
22 | 
23 | # Create a chat interface
24 | message = st.chat_message("assistant")
25 | message.write("Ask a question to your page.")
26 | 
27 | 
28 | # Once file and question received
29 | prompt = st.chat_input("Ask something")
30 | if prompt and uploaded_file is not None:
31 |     # Update user with messages
32 |     message = st.chat_message("user")
33 |     logger.info(f"User message received: {prompt}")
34 |     message.write(prompt)
35 |     message = st.chat_message("assistant")
36 |     message.write("Thinking...")
37 | 
38 |     # Encode the PDF file as base64 string
39 |     logger.info("Encoding PDF file as base64 string")
40 |     encoded_pdf = base64.b64encode(uploaded_file.read()).decode("ascii")
41 |     json_payload = {"question": prompt, "pdf": encoded_pdf}
42 | 
43 |     # Send request and display answer to user
44 |     logger.info("Sending request to backend")
45 |     response = requests.post("http://backend:8000/ask", json=json_payload, timeout=120)
46 | 
47 |     logger.info("Showing response from backend")
48 |     message.write(response.json())
49 | 


--------------------------------------------------------------------------------
/frontend/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit==1.30.0
2 | requests==2.31.0
3 | 


--------------------------------------------------------------------------------
/notebooks/retrieval.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"markdown","id":"86fc5bb2-017f-434e-8cd6-53ab214a5604","metadata":{"id":"86fc5bb2-017f-434e-8cd6-53ab214a5604"},"source":["# Quickstart\n","\n","[![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/use_cases/question_answering/quickstart.ipynb)\n","\n","Adapted from: https://python.langchain.com/docs/use_cases/question_answering/quickstart"]},{"cell_type":"markdown","id":"51ef48de-70b6-4f43-8e0b-ab9b84c9c02a","metadata":{"id":"51ef48de-70b6-4f43-8e0b-ab9b84c9c02a"},"source":["We need to set environment variable `OPENAI_API_KEY`, which can be done directly or loaded from a `.env` file like so:"]},{"cell_type":"code","execution_count":1,"id":"143787ca-d8e6-4dc9-8281-4374f4d71720","metadata":{"id":"143787ca-d8e6-4dc9-8281-4374f4d71720"},"outputs":[{"data":{"text/plain":["True"]},"execution_count":1,"metadata":{},"output_type":"execute_result"}],"source":["import dotenv\n","dotenv.load_dotenv()"]},{"cell_type":"code","execution_count":2,"id":"af0dc499","metadata":{},"outputs":[],"source":["from src.utils import url_qa"]},{"cell_type":"code","execution_count":3,"id":"c949e560","metadata":{},"outputs":[],"source":["ans = url_qa(\n","    url=\"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n","    question=\"What is Task Decomposition?\"\n",")"]},{"cell_type":"code","execution_count":null,"id":"5f5bf2c4","metadata":{},"outputs":[],"source":[]},{"cell_type":"markdown","id":"842cf72d-abbc-468e-a2eb-022470347727","metadata":{"id":"842cf72d-abbc-468e-a2eb-022470347727"},"source":["## Detailed walkthrough\n","\n","Let's go through the above code step-by-step to really understand what's going on."]},{"cell_type":"markdown","id":"ba5daed6","metadata":{"id":"ba5daed6"},"source":["## 1. Indexing: Load\n","\n","We need to first load the blog post contents. We can use [DocumentLoaders](/docs/modules/data_connection/document_loaders/) for this, which are objects that load in data from a source and return a list of [Documents](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html).  A `Document` is an object with some `page_content` (str) and `metadata` (dict).\n","\n","In this case we'll use the [WebBaseLoader](/docs/integrations/document_loaders/web_base), which uses `urllib` to load HTML form web URLs and `BeautifulSoup` to parse it to text. We can customize the HTML -> text parsing by passing in parameters to the `BeautifulSoup` parser via `bs_kwargs` (see [BeautifulSoup docs](https://beautiful-soup-4.readthedocs.io/en/latest/#beautifulsoup)). In this case only HTML tags with class \"post-content\", \"post-title\", or \"post-header\" are relevant, so we'll remove all others."]},{"cell_type":"code","execution_count":2,"id":"cf4d5c72","metadata":{"id":"cf4d5c72"},"outputs":[],"source":["import bs4\n","from langchain_community.document_loaders import WebBaseLoader\n","\n","# Only keep post title, headers, and content from the full HTML.\n","bs4_strainer = bs4.SoupStrainer(class_=(\"post-title\", \"post-header\", \"post-content\"))\n","loader = WebBaseLoader(\n","    web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n","    bs_kwargs={\"parse_only\": bs4_strainer},\n",")\n","docs = loader.load()"]},{"cell_type":"code","execution_count":3,"id":"207f87a3-effa-4457-b013-6d233bc7a088","metadata":{"id":"207f87a3-effa-4457-b013-6d233bc7a088","outputId":"15f3af0f-0289-4d1c-fa02-b5aa6fa0b1e7"},"outputs":[{"data":{"text/plain":["42824"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["len(docs[0].page_content)"]},{"cell_type":"code","execution_count":4,"id":"52469796-5ce4-4c12-bd2a-a903872dac33","metadata":{"id":"52469796-5ce4-4c12-bd2a-a903872dac33","outputId":"6e08cb28-a40c-42a4-e9a6-1bef9bb4f159"},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","\n","      LLM Powered Autonomous Agents\n","    \n","Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n","\n","\n","Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\n","Agent System Overview#\n","In\n"]}],"source":["print(docs[0].page_content[:500])"]},{"cell_type":"code","execution_count":null,"id":"29258be4","metadata":{},"outputs":[],"source":[]},{"cell_type":"code","execution_count":8,"id":"b32b7f0f","metadata":{},"outputs":[],"source":["from langchain_community.document_loaders import PyPDFLoader\n","from langchain.text_splitter import RecursiveCharacterTextSplitter\n","\n","loader = PyPDFLoader(\"https://arxiv.org/pdf/2103.15348.pdf\")\n","all_splits = loader.load_and_split(\n","    text_splitter=RecursiveCharacterTextSplitter(\n","        chunk_size=1000, chunk_overlap=200, add_start_index=True\n","    )\n",")"]},{"cell_type":"code","execution_count":9,"id":"8d744e6b","metadata":{},"outputs":[{"data":{"text/plain":["57"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["len(all_splits)"]},{"cell_type":"code","execution_count":null,"id":"635996d1","metadata":{},"outputs":[],"source":[]},{"cell_type":"markdown","id":"ee5c6556-56be-4067-adbc-98b5aa19ef6e","metadata":{"id":"ee5c6556-56be-4067-adbc-98b5aa19ef6e"},"source":["### Go deeper\n","`DocumentLoader`: Object that loads data from a source as list of `Documents`.\n","- [Docs](/docs/modules/data_connection/document_loaders/): Detailed documentation on how to use `DocumentLoaders`.\n","- [Integrations](/docs/integrations/document_loaders/): 160+ integrations to choose from.\n","- [Interface](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.base.BaseLoader.html): API reference  for the base interface."]},{"cell_type":"markdown","id":"fd2cc9a7","metadata":{"id":"fd2cc9a7"},"source":["## 2. Indexing: Split\n","\n","Our loaded document is over 42k characters long. This is too long to fit in the context window of many models. Even for those models that could fit the full post in their context window, models can struggle to find information in very long inputs.\n","\n","To handle this we'll split the `Document` into chunks for embedding and vector storage. This should help us retrieve only the most relevant bits of the blog post at run time.\n","\n","In this case we'll split our documents into chunks of 1000 characters with 200 characters of overlap between chunks. The overlap helps mitigate the possibility of separating a statement from important context related to it. We use the [RecursiveCharacterTextSplitter](/docs/modules/data_connection/document_transformers/recursive_text_splitter), which will recursively split the document using common separators like new lines until each chunk is the appropriate size. This is the recommended text splitter for generic text use cases.\n","\n","We set `add_start_index=True` so that the character index at which each split Document starts within the initial Document is preserved as metadata attribute \"start_index\"."]},{"cell_type":"code","execution_count":5,"id":"4b11c01d","metadata":{"id":"4b11c01d"},"outputs":[],"source":["from langchain.text_splitter import RecursiveCharacterTextSplitter\n","\n","text_splitter = RecursiveCharacterTextSplitter(\n","    chunk_size=1000, chunk_overlap=200, add_start_index=True\n",")\n","all_splits = text_splitter.split_documents(docs)"]},{"cell_type":"code","execution_count":6,"id":"3741eb67-9caf-40f2-a001-62f49349bff5","metadata":{"id":"3741eb67-9caf-40f2-a001-62f49349bff5","outputId":"21939afc-3a10-43aa-d415-569b23fe31e3"},"outputs":[{"data":{"text/plain":["66"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["len(all_splits)"]},{"cell_type":"code","execution_count":7,"id":"f868d0e5-5670-4d54-b562-f50265e907f4","metadata":{"id":"f868d0e5-5670-4d54-b562-f50265e907f4","outputId":"805211a6-ace2-450c-b0d0-0218756484c0"},"outputs":[{"data":{"text/plain":["969"]},"execution_count":7,"metadata":{},"output_type":"execute_result"}],"source":["len(all_splits[0].page_content)"]},{"cell_type":"code","execution_count":8,"id":"5c9e5f27-c8e3-4ca7-8a8e-45c5de2901cc","metadata":{"id":"5c9e5f27-c8e3-4ca7-8a8e-45c5de2901cc","outputId":"349a2950-ab87-49a7-dd6d-e7bf4a29e9f5"},"outputs":[{"data":{"text/plain":["{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',\n"," 'start_index': 7056}"]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["all_splits[10].metadata"]},{"cell_type":"markdown","id":"0a33bd4d","metadata":{"id":"0a33bd4d"},"source":["### Go deeper\n","\n","`TextSplitter`: Object that splits a list of `Document`s into smaller chunks. Subclass of `DocumentTransformer`s.\n","- Explore `Context-aware splitters`, which keep the location (\"context\") of each split in the original `Document`:\n","    - [Markdown files](/docs/modules/data_connection/document_transformers/markdown_header_metadata)\n","    - [Code (py or js)](/docs/integrations/document_loaders/source_code)\n","    - [Scientific papers](/docs/integrations/document_loaders/grobid)\n","- [Interface](https://api.python.langchain.com/en/latest/text_splitter/langchain.text_splitter.TextSplitter.html): API reference for the base interface.\n","\n","`DocumentTransformer`: Object that performs a transformation on a list of `Document`s.\n","- [Docs](/docs/modules/data_connection/document_transformers/): Detailed documentation on how to use `DocumentTransformers`\n","- [Integrations](/docs/integrations/document_transformers/)\n","- [Interface](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.transformers.BaseDocumentTransformer.html): API reference for the base interface.\n"]},{"cell_type":"markdown","id":"46547031-2352-4321-9970-d6ea27285c2e","metadata":{"id":"46547031-2352-4321-9970-d6ea27285c2e"},"source":["## 3. Indexing: Store\n","\n","Now we need to index our 66 text chunks so that we can search over them at runtime. The most common way to do this is to embed the contents of each document split and insert these embeddings into a vector database (or vector store). When we want to search over our splits, we take a text search query, embed it, and perform some sort of \"similarity\" search to identify the stored splits with the most similar embeddings to our query embedding. The simplest similarity measure is cosine similarity — we measure the cosine of the angle between each pair of embeddings (which are high dimensional vectors).\n","\n","We can embed and store all of our document splits in a single command using the [Chroma](/docs/integrations/vectorstores/chroma) vector store and [OpenAIEmbeddings](/docs/integrations/text_embedding/openai) model."]},{"cell_type":"code","execution_count":10,"id":"e9c302c8","metadata":{"id":"e9c302c8"},"outputs":[{"name":"stderr","output_type":"stream","text":["/Users/ruan/Library/Caches/pypoetry/virtualenvs/rag-qa-jxFCGv5h-py3.11/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n","  from .autonotebook import tqdm as notebook_tqdm\n"]}],"source":["from langchain_community.vectorstores import Chroma\n","# from langchain_openai import OpenAIEmbeddings\n","# from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings\n","from langchain_community.embeddings import HuggingFaceEmbeddings\n","\n","embedding_function = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n","vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_function)"]},{"cell_type":"markdown","id":"dc6f22b0","metadata":{"id":"dc6f22b0"},"source":["### Go deeper\n","`Embeddings`: Wrapper around a text embedding model, used for converting text to embeddings.\n","- [Docs](/docs/modules/data_connection/text_embedding): Detailed documentation on how to use embeddings.\n","- [Integrations](/docs/integrations/text_embedding/): 30+ integrations to choose from.\n","- [Interface](https://api.python.langchain.com/en/latest/embeddings/langchain_core.embeddings.Embeddings.html): API reference for the base interface.\n","\n","`VectorStore`: Wrapper around a vector database, used for storing and querying embeddings.\n","- [Docs](/docs/modules/data_connection/vectorstores/): Detailed documentation on how to use vector stores.\n","- [Integrations](/docs/integrations/vectorstores/): 40+ integrations to choose from.\n","- [Interface](https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStore.html): API reference for the base interface.\n","\n","This completes the **Indexing** portion of the pipeline. At this point we have a query-able vector store containing the chunked contents of our blog post. Given a user question, we should ideally be able to return the snippets of the blog post that answer the question."]},{"cell_type":"markdown","id":"70d64d40-e475-43d9-b64c-925922bb5ef7","metadata":{"id":"70d64d40-e475-43d9-b64c-925922bb5ef7"},"source":["## 4. Retrieval and Generation: Retrieve\n","\n","Now let's write the actual application logic. We want to create a simple application that takes a user question, searches for documents relevant to that question, passes the retrieved documents and initial question to a model, and returns an answer.\n","\n","First we need to define our logic for searching over documents. LangChain defines a [Retriever](/docs/modules/data_connection/retrievers/) interface which wraps an index that can return relevant `Documents` given a string query.\n","\n","The most common type of `Retriever` is the [VectorStoreRetriever](/docs/modules/data_connection/retrievers/vectorstore), which uses the similarity search capabilities of a vector store to facillitate retrieval. Any `VectorStore` can easily be turned into a `Retriever` with `VectorStore.as_retriever()`:"]},{"cell_type":"code","execution_count":11,"id":"4414df0d-5d43-46d0-85a9-5f47be0dd099","metadata":{"id":"4414df0d-5d43-46d0-85a9-5f47be0dd099"},"outputs":[],"source":["retriever = vectorstore.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": 6})"]},{"cell_type":"code","execution_count":15,"id":"e2c26b7d","metadata":{"id":"e2c26b7d"},"outputs":[],"source":["# retrieved_docs = retriever.invoke(\"What are the approaches to Task Decomposition?\")\n","retrieved_docs = retriever.invoke(\"What does the abstract at the beginning of the document say?\")"]},{"cell_type":"code","execution_count":16,"id":"8684291d-0f5e-453a-8d3e-ff9feea765d0","metadata":{"id":"8684291d-0f5e-453a-8d3e-ff9feea765d0","outputId":"2f3cd364-94c5-4ee1-c928-08d27690d3b4"},"outputs":[{"data":{"text/plain":["6"]},"execution_count":16,"metadata":{},"output_type":"execute_result"}],"source":["len(retrieved_docs)"]},{"cell_type":"code","execution_count":17,"id":"9a5dc074-816d-409a-b005-ab4eddfd76af","metadata":{"id":"9a5dc074-816d-409a-b005-ab4eddfd76af","outputId":"0c60575a-5dce-4dc6-94d8-11312b04ee13"},"outputs":[{"name":"stdout","output_type":"stream","text":["nition accuracy.\n","As shown in Figure 4 (a), the\n","document contains columns of text\n","written vertically15, a common style\n","in Japanese. Due to scanning noise\n","and archaic printing technology, the\n","columns can be skewed or have vari-\n","able widths, and hence cannot be eas-\n","ily identiﬁed via rule-based methods.\n","Within each column, words are sepa-\n","rated by white spaces of variable size,\n","and the vertical positions of objects\n","can be an indicator of their layout\n","type.\n","15A document page consists of eight rows like this. For simplicity we skip the row\n","segmentation discussion and refer readers to the source code when available.\n"]}],"source":["print(retrieved_docs[0].page_content)"]},{"cell_type":"markdown","id":"5d5a113b","metadata":{"id":"5d5a113b"},"source":["### Go deeper\n","Vector stores are commonly used for retrieval, but there are other ways to do retrieval, too.\n","\n","`Retriever`: An object that returns `Document`s given a text query\n","- [Docs](/docs/modules/data_connection/retrievers/): Further documentation on the interface and built-in retrieval techniques. Some of which include:\n","    - `MultiQueryRetriever` [generates variants of the input question](/docs/modules/data_connection/retrievers/MultiQueryRetriever) to improve retrieval hit rate.\n","    - `MultiVectorRetriever` (diagram below) instead generates [variants of the embeddings](/docs/modules/data_connection/retrievers/multi_vector), also in order to improve retrieval hit rate.\n","    - `Max marginal relevance` selects for [relevance and diversity](https://www.cs.cmu.edu/~jgc/publication/The_Use_MMR_Diversity_Based_LTMIR_1998.pdf) among the retrieved documents to avoid passing in duplicate context.\n","    - Documents can be filtered during vector store retrieval using [`metadata` filters](/docs/use_cases/question_answering/document-context-aware-QA).\n","- [Integrations](/docs/integrations/retrievers/): Integrations with retrieval services.\n","- [Interface](https://api.python.langchain.com/en/latest/retrievers/langchain_core.retrievers.BaseRetriever.html): API reference for the base interface."]},{"cell_type":"markdown","id":"415d6824","metadata":{"id":"415d6824"},"source":["## 5. Retrieval and Generation: Generate\n","\n","Let's put it all together into a chain that takes a question, retrieves relevant documents, constructs a prompt, passes that to a model, and parses the output.\n","\n","We'll use the gpt-3.5-turbo OpenAI chat model, but any LangChain `LLM` or `ChatModel` could be substituted in."]},{"cell_type":"code","execution_count":18,"id":"d34d998c-9abf-4e01-a4ad-06dadfcf131c","metadata":{"id":"d34d998c-9abf-4e01-a4ad-06dadfcf131c"},"outputs":[],"source":["# from langchain_openai import ChatOpenAI\n","# llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n","\n","from langchain_google_genai import ChatGoogleGenerativeAI\n","\n","llm = ChatGoogleGenerativeAI(model=\"gemini-pro\", temperature=0)"]},{"cell_type":"markdown","id":"bc826723-36fc-45d1-a3ef-df8c2c8471a8","metadata":{"id":"bc826723-36fc-45d1-a3ef-df8c2c8471a8"},"source":["We'll use a prompt for RAG that is checked into the LangChain prompt hub ([here](https://smith.langchain.com/hub/rlm/rag-prompt))."]},{"cell_type":"code","execution_count":19,"id":"bede955b-9aeb-4fd3-964d-8e43f214ce70","metadata":{"id":"bede955b-9aeb-4fd3-964d-8e43f214ce70"},"outputs":[],"source":["# from langchain import hub\n","# prompt = hub.pull(\"rlm/rag-prompt\")\n","\n","from langchain_core.prompts import PromptTemplate\n","\n","template = \"\"\"Use the following pieces of context to answer the question at the end.\n","If you don't know the answer, just say that you don't know, don't try to make up an answer.\n","Keep the answer as concise as possible.\n","\n","{context}\n","\n","Question: {question}\n","\n","Helpful Answer:\"\"\"\n","prompt = PromptTemplate.from_template(template)"]},{"cell_type":"code","execution_count":20,"id":"11c35354-f275-47ec-9f72-ebd5c23731eb","metadata":{"id":"11c35354-f275-47ec-9f72-ebd5c23731eb","outputId":"fae2bead-cc5c-4f56-b335-9cd661fe9420"},"outputs":[{"data":{"text/plain":["[HumanMessage(content=\"Use the following pieces of context to answer the question at the end.\\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\nKeep the answer as concise as possible.\\n\\nfiller context\\n\\nQuestion: filler question\\n\\nHelpful Answer:\")]"]},"execution_count":20,"metadata":{},"output_type":"execute_result"}],"source":["example_messages = prompt.invoke(\n","    {\"context\": \"filler context\", \"question\": \"filler question\"}\n",").to_messages()\n","example_messages"]},{"cell_type":"code","execution_count":21,"id":"2ccc50fa-5fa2-4f80-8685-58ec2255523a","metadata":{"id":"2ccc50fa-5fa2-4f80-8685-58ec2255523a","outputId":"7c0cf250-44e2-48ce-c85b-67f7a67438ec"},"outputs":[{"name":"stdout","output_type":"stream","text":["Use the following pieces of context to answer the question at the end.\n","If you don't know the answer, just say that you don't know, don't try to make up an answer.\n","Keep the answer as concise as possible.\n","\n","filler context\n","\n","Question: filler question\n","\n","Helpful Answer:\n"]}],"source":["print(example_messages[0].content)"]},{"cell_type":"markdown","id":"51f9a210-1eee-4054-99d7-9d9ddf7e3593","metadata":{"id":"51f9a210-1eee-4054-99d7-9d9ddf7e3593"},"source":["We'll use the [LCEL Runnable](/docs/expression_language/) protocol to define the chain, allowing us to\n","- pipe together components and functions in a transparent way\n","- automatically trace our chain in LangSmith\n","- get streaming, async, and batched calling out of the box"]},{"cell_type":"code","execution_count":22,"id":"99fa1aec","metadata":{"id":"99fa1aec"},"outputs":[],"source":["from langchain_core.output_parsers import StrOutputParser\n","from langchain_core.runnables import RunnablePassthrough\n","\n","\n","def format_docs(docs):\n","    return \"\\n\\n\".join(doc.page_content for doc in docs)\n","\n","\n","rag_chain = (\n","    {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n","    | prompt\n","    | llm\n","    | StrOutputParser()\n",")"]},{"cell_type":"code","execution_count":23,"id":"8655a152-d7cf-466f-b1bc-fbff9ae2b889","metadata":{"id":"8655a152-d7cf-466f-b1bc-fbff9ae2b889","outputId":"2810e04c-4959-4ce3-eb71-caf987daaf13"},"outputs":[{"name":"stdout","output_type":"stream","text":["LayoutParser provides a comprehensive toolkit for deep learning-based document image analysis. It is easy to install and use, supports high-level customization, and enables easy labeling and training of DL models on unique document image datasets. The LayoutParser community platform facilitates sharing DL models and DIA pipelines, inviting discussion and promoting code reproducibility and reusability. The LayoutParser team is committed to keeping the library updated continuously and bringing the most recent advances in DL-based DIA to a diverse audience of end-users."]}],"source":["for chunk in rag_chain.stream(\"What is the paper conclusion?\"):\n","    print(chunk, end=\"\", flush=True)"]},{"cell_type":"markdown","id":"2c000e5f-2b7f-4eb9-8876-9f4b186b4a08","metadata":{"id":"2c000e5f-2b7f-4eb9-8876-9f4b186b4a08"},"source":[":::tip\n","\n","Check out the [LangSmith trace](https://smith.langchain.com/public/1799e8db-8a6d-4eb2-84d5-46e8d7d5a99b/r)\n","\n",":::"]},{"cell_type":"markdown","id":"f7d52c84","metadata":{"id":"f7d52c84"},"source":["### Go deeper\n","\n","#### Choosing a model\n","`ChatModel`: An LLM-backed chat model. Takes in a sequence of messages and returns a message.\n","- [Docs](/docs/modules/model_io/chat/): Detailed documentation on\n","- [Integrations](/docs/integrations/chat/): 25+ integrations to choose from.\n","- [Interface](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.chat_models.BaseChatModel.html): API reference for the base interface.\n","\n","`LLM`: A text-in-text-out LLM. Takes in a string and returns a string.\n","- [Docs](/docs/modules/model_io/llms)\n","- [Integrations](/docs/integrations/llms): 75+ integrations to choose from.\n","- [Interface](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.llms.BaseLLM.html): API reference for the base interface.\n","\n","See a guide on RAG with locally-running models [here](/docs/use_cases/question_answering/local_retrieval_qa)."]},{"cell_type":"markdown","id":"fa82f437","metadata":{"id":"fa82f437"},"source":["#### Customizing the prompt\n","\n","As shown above, we can load prompts (e.g., [this RAG prompt](https://smith.langchain.com/hub/rlm/rag-prompt)) from the prompt hub. The prompt can also be easily customized:"]},{"cell_type":"code","execution_count":21,"id":"e4fee704","metadata":{"id":"e4fee704","outputId":"9ce63bf2-bfce-4711-892f-4fdb3cdd322d"},"outputs":[{"data":{"text/plain":["'Task decomposition is the process of breaking down a complex task into smaller, more manageable subtasks. This can be done using a variety of methods, such as chain of thought (CoT), tree of thoughts (ToT), or simply by using task-specific instructions. Thanks for asking!'"]},"execution_count":21,"metadata":{},"output_type":"execute_result"}],"source":["from langchain_core.prompts import PromptTemplate\n","\n","template = \"\"\"Use the following pieces of context to answer the question at the end.\n","If you don't know the answer, just say that you don't know, don't try to make up an answer.\n","Keep the answer as concise as possible.\n","\n","{context}\n","\n","Question: {question}\n","\n","Helpful Answer:\"\"\"\n","custom_rag_prompt = PromptTemplate.from_template(template)\n","\n","rag_chain = (\n","    {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n","    | custom_rag_prompt\n","    | llm\n","    | StrOutputParser()\n",")\n","\n","rag_chain.invoke(\"What is Task Decomposition?\")"]},{"cell_type":"markdown","id":"94b952e6-dc4b-415b-9cf3-1ad333e48366","metadata":{"id":"94b952e6-dc4b-415b-9cf3-1ad333e48366"},"source":[":::tip\n","\n","Check out the [LangSmith trace](https://smith.langchain.com/public/da23c4d8-3b33-47fd-84df-a3a582eedf84/r)\n","\n",":::"]},{"cell_type":"markdown","id":"580e18de-132d-4009-ba67-4aaf2c7717a2","metadata":{"id":"580e18de-132d-4009-ba67-4aaf2c7717a2"},"source":["## Next steps\n","\n","That's a lot of content we've covered in a short amount of time. There's plenty of features, integrations, and extensions to explore in each of the above sections. Along from the **Go deeper** sources mentioned above, good next steps include:\n","\n","- [Return sources](/docs/use_cases/question_answering/sources): Learn how to return source documents\n","- [Streaming](/docs/use_cases/question_answering/streaming): Learn how to stream outputs and intermediate steps\n","- [Add chat history](/docs/use_cases/question_answering/chat_history): Learn how to add chat history to your app"]}],"metadata":{"colab":{"provenance":[{"file_id":"https://github.com/langchain-ai/langchain/blob/master/docs/docs/use_cases/question_answering/quickstart.ipynb","timestamp":1705256074169}]},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.2"}},"nbformat":4,"nbformat_minor":5}
2 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "rag-qa"
 3 | version = "0.1.0"
 4 | description = "A containerised QA framework to ask questions to your documents."
 5 | authors = ["Ruan Pretorius"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | packages = [
 9 |     { include = "src" },
10 | ]
11 | 
12 | [tool.poetry.dependencies]
13 | python = "^3.11"
14 | python-dotenv = "^1.0.0"
15 | jupyterlab = "^4.0.10"
16 | fastapi = "^0.109.0"
17 | uvicorn = "^0.25.0"
18 | langchain = "^0.1.0"
19 | streamlit = "^1.30.0"
20 | requests = "^2.31.0"
21 | langchain-community = "^0.0.12"
22 | chromadb = "^0.4.22"
23 | bs4 = "^0.0.1"
24 | langchainhub = "^0.1.14"
25 | langchain-openai = "^0.0.2.post1"
26 | sentence-transformers = "^2.2.2"
27 | langchain-google-genai = "^0.0.6"
28 | pypdf = "^3.17.4"
29 | python-multipart = "^0.0.6"
30 | 
31 | [tool.poetry.group.dev.dependencies]
32 | pytest = "^7.3.1"
33 | black = "^23.3.0"
34 | pylint = "^2.17.4"
35 | 
36 | [build-system]
37 | requires = ["poetry-core"]
38 | build-backend = "poetry.core.masonry.api"
39 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ruankie/rag-qa/ee92b23da227cc0b61578c06eb33dc42a2075b16/src/__init__.py


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility functions used in the RAG app.
  3 | """
  4 | 
  5 | import base64
  6 | import logging
  7 | from typing import List
  8 | 
  9 | from langchain_core.documents import Document
 10 | from langchain_core.vectorstores import VectorStoreRetriever
 11 | from langchain_core.language_models.chat_models import BaseChatModel
 12 | from langchain_core.runnables.base import Runnable
 13 | from langchain_core.output_parsers import StrOutputParser
 14 | from langchain_core.runnables import RunnablePassthrough
 15 | from langchain_core.prompts import PromptTemplate
 16 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 17 | from langchain_community.vectorstores import Chroma
 18 | from langchain_community.embeddings import HuggingFaceEmbeddings
 19 | from langchain_community.document_loaders import PyPDFLoader
 20 | from langchain_google_genai import ChatGoogleGenerativeAI
 21 | 
 22 | # Set up logger
 23 | logging.basicConfig(level=logging.INFO)
 24 | logger = logging.getLogger()
 25 | 
 26 | TEMPLATE = """Use the following pieces of context to answer the question at the end.
 27 | If you don't know the answer, just say that you don't know, don't try to make up an answer.
 28 | Be helpful in your answer and be sure to reference the following context when possible.
 29 | 
 30 | {context}
 31 | 
 32 | Question: {question}
 33 | 
 34 | Answer:"""
 35 | 
 36 | prompt = PromptTemplate.from_template(TEMPLATE)
 37 | llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0)
 38 | 
 39 | 
 40 | def _save_doc_locally(pdf_string: str, path: str = "./document.pdf") -> str:
 41 |     """
 42 |     Decodes the base64 string representation of the pdf document
 43 |     and saves it to a local path.
 44 | 
 45 |     Args:
 46 |         pdf_string (str): Base64 string representation of the pdf document.
 47 |         path (str): Local path to save pdf document to.
 48 | 
 49 |     Returns:
 50 |         str: Local path where pdf document was saved to.
 51 |     """
 52 |     logger.info("Saving pdf document locally")
 53 |     # Decode the base64 string to bytes
 54 |     decoded_bytes = base64.b64decode(pdf_string)
 55 |     if decoded_bytes[0:4] != b"%PDF":
 56 |         raise TypeError("Invalid PDF file received.")
 57 | 
 58 |     # Save pdf file
 59 |     with open(path, "wb") as _f:
 60 |         _f.write(decoded_bytes)
 61 | 
 62 |     return path
 63 | 
 64 | 
 65 | def _load_and_split_doc(pdf_path: str) -> List[Document]:
 66 |     """
 67 |     Loads the content of the pdf document and separates it
 68 |     into chunks for embedding.
 69 | 
 70 |     Args:
 71 |         pdf_path (str): Local path to pdf document.
 72 | 
 73 |     Returns:
 74 |         List[Document]: Document chunks (splits).
 75 |     """
 76 |     logger.info("Splitting pdf document into chunks")
 77 |     loader = PyPDFLoader(pdf_path)
 78 |     splits = loader.load_and_split(
 79 |         text_splitter=RecursiveCharacterTextSplitter(
 80 |             chunk_size=500, chunk_overlap=50, add_start_index=True
 81 |         )
 82 |     )
 83 |     return splits
 84 | 
 85 | 
 86 | def _get_embedding_retriever(splits: List[Document]) -> VectorStoreRetriever:
 87 |     """
 88 |     Embeds all the document chunks/splits and returns the
 89 |     appropriate similarity retriever for the vector database used.
 90 | 
 91 |     Args:
 92 |         splits (List[Document]): Document chunks/splits.
 93 | 
 94 |     Returns:
 95 |         VectorStoreRetriever: Vector store retriever.
 96 |     """
 97 |     logger.info("Embedding document chunks")
 98 |     embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 99 |     vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_function)
100 |     retriever = vectorstore.as_retriever(
101 |         search_type="similarity", search_kwargs={"k": 5}
102 |     )
103 |     return retriever
104 | 
105 | 
106 | def _format_docs(docs: List[str]) -> str:
107 |     """
108 |     Join all documents into single string, separated
109 |     by double newline characters.
110 |     """
111 |     return "\n\n".join(doc.page_content for doc in docs)
112 | 
113 | 
114 | def _get_chain(
115 |     retriever: VectorStoreRetriever, prompt: PromptTemplate, llm: BaseChatModel
116 | ) -> Runnable:
117 |     """
118 |     Creates a RAG chain using LCEL (LangChain Expression Language).
119 | 
120 |     Args:
121 |         retriever (VectorStoreRetriever): A vector store retriever.
122 |         prompt (PromptTemplate): The prompt template that includes the content
123 |             retrieved from the vector store and the question.
124 |         llm (BaseChatModel): The LLM that will be used to answer the question.
125 | 
126 |     Returns:
127 |         Runnable: A Runnable object that represents the RAG chain.
128 |     """
129 |     logger.info("Getting RAG chain")
130 |     rag_chain = (
131 |         {"context": retriever | _format_docs, "question": RunnablePassthrough()}
132 |         | prompt
133 |         | llm
134 |         | StrOutputParser()
135 |     )
136 |     return rag_chain
137 | 
138 | 
139 | def get_answer(pdf_string: str, question: str) -> str:
140 |     """
141 |     Run the RAG cycle and return the answer to the question.
142 | 
143 |     Args:
144 |         pdf_string (str): The base64 encoded string representation of the PDF document.
145 |         question (str): The question you want to ask about the PDF document.
146 | 
147 |     Returns:
148 |         str: The answer to the given question.
149 |     """
150 |     logger.info("Initiating RAG QA cycle")
151 |     pdf_path = _save_doc_locally(pdf_string, path="./document.pdf")
152 |     splits = _load_and_split_doc(pdf_path)
153 |     retriever = _get_embedding_retriever(splits)
154 |     rag_chain = _get_chain(retriever, prompt, llm)
155 | 
156 |     logger.info("Invoking RAG chain")
157 |     response = rag_chain.invoke(question)
158 | 
159 |     logger.info("Replying with chain response")
160 |     return response
161 | 


--------------------------------------------------------------------------------