├── pdf_bot ├── __init__.py ├── config.py ├── pull_model.py ├── app.py └── pdf_helper.py ├── start.sh ├── install.sh ├── docker-build.sh ├── .demo-stuff ├── logo.jpeg └── pdf-bot.jpg ├── requirements.txt ├── Dockerfile ├── docker-compose.yml ├── Makefile ├── docker-compose-gpu.yml ├── Makeme.bat ├── .github └── workflows │ └── push-docker-image.yml ├── .gitignore └── README.md /pdf_bot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | streamlit run pdf_bot/app.py -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip install -r requirements.txt -------------------------------------------------------------------------------- /docker-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker build --rm --file Dockerfile -t amithkoujalgi/pdf-bot:1.0.0 . -------------------------------------------------------------------------------- /.demo-stuff/logo.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amithkoujalgi/ollama-pdf-bot/HEAD/.demo-stuff/logo.jpeg -------------------------------------------------------------------------------- /.demo-stuff/pdf-bot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amithkoujalgi/ollama-pdf-bot/HEAD/.demo-stuff/pdf-bot.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.334 2 | streamlit==1.28.1 3 | replicate==0.18.1 4 | pymupdf==1.23.6 5 | huggingface-hub==0.17.3 6 | faiss-cpu==1.7.4 7 | sentence-transformers==2.2.2 8 | requests==2.31.0 9 | streamlit_js_eval==0.1.5 -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.18 2 | 3 | WORKDIR /app 4 | 5 | COPY ./requirements.txt /app/requirements.txt 6 | RUN pip install -r /app/requirements.txt 7 | 8 | COPY ./pdf_bot /app/pdf_bot 9 | 10 | RUN printf '#!/bin/bash \n\ 11 | python /app/pdf_bot/pull_model.py \n\ 12 | streamlit run /app/pdf_bot/app.py' >> /app/pdf_bot/run.sh 13 | 14 | CMD ["bash", "/app/pdf_bot/run.sh"] -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | ollama: 4 | image: ollama/ollama:latest 5 | ports: 6 | - 11434:11434 7 | volumes: 8 | - ~/ollama:/root/.ollama 9 | networks: 10 | - net 11 | 12 | app: 13 | image: amithkoujalgi/pdf-bot:1.0.0 14 | ports: 15 | - 8501:8501 16 | environment: 17 | - OLLAMA_API_BASE_URL=http://ollama:11434 18 | - MODEL=llama2 19 | networks: 20 | - net 21 | 22 | networks: 23 | net: -------------------------------------------------------------------------------- /pdf_bot/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | base_url = os.environ.get('OLLAMA_API_BASE_URL', "http://192.168.29.223:11434") 4 | if base_url.endswith('/'): 5 | base_url = base_url.rstrip('/') 6 | 7 | 8 | class Config: 9 | MODEL = os.environ.get('MODEL', "llama2") 10 | EMBEDDING_MODEL_NAME = os.environ.get('EMBEDDING_MODEL_NAME', "all-MiniLM-L6-v2") 11 | OLLAMA_API_BASE_URL = base_url 12 | HUGGING_FACE_EMBEDDINGS_DEVICE_TYPE = os.environ.get('HUGGING_FACE_EMBEDDINGS_DEVICE_TYPE', 13 | "cpu") 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | start: 2 | docker-compose -f ./docker-compose.yml down -v; \ 3 | docker-compose -f ./docker-compose.yml rm -fsv; \ 4 | docker-compose -f ./docker-compose.yml up --remove-orphans; 5 | 6 | start-gpu: 7 | docker-compose -f ./docker-compose-gpu.yml down -v; \ 8 | docker-compose -f ./docker-compose-gpu.yml rm -fsv; \ 9 | docker-compose -f ./docker-compose-gpu.yml up --remove-orphans; 10 | stop: 11 | docker-compose -f ./docker-compose.yml down -v; \ 12 | docker-compose -f ./docker-compose-gpu.yml down -v; \ 13 | docker-compose -f ./docker-compose.yml rm -fsv; \ 14 | docker-compose -f ./docker-compose-gpu.yml rm -fsv; -------------------------------------------------------------------------------- /docker-compose-gpu.yml: -------------------------------------------------------------------------------- 1 | services: 2 | 3 | ollama: 4 | image: ollama/ollama:latest 5 | ports: 6 | - 11434:11434 7 | deploy: 8 | resources: 9 | reservations: 10 | devices: 11 | - driver: nvidia 12 | count: 1 13 | capabilities: [ gpu ] 14 | volumes: 15 | - ~/ollama:/root/.ollama 16 | networks: 17 | - net 18 | 19 | app: 20 | image: amithkoujalgi/pdf-bot:1.0.0 21 | ports: 22 | - 8501:8501 23 | environment: 24 | - OLLAMA_API_BASE_URL=http://ollama:11434 25 | - MODEL=llama3.2 26 | networks: 27 | - net 28 | 29 | networks: 30 | net: -------------------------------------------------------------------------------- /Makeme.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | if "%1" == "start" goto start 3 | if "%1" == "start-gpu" goto start-gpu 4 | if "%1" == "stop" goto stop 5 | echo Invalid argument. Use "start", "start-gpu", or "stop". 6 | goto end 7 | 8 | :start 9 | docker-compose -f ./docker-compose.yml down -v 10 | docker-compose -f ./docker-compose.yml rm -fsv 11 | docker-compose -f ./docker-compose.yml up --remove-orphans 12 | goto end 13 | 14 | :start-gpu 15 | docker-compose -f ./docker-compose-gpu.yml down -v 16 | docker-compose -f ./docker-compose-gpu.yml rm -fsv 17 | docker-compose -f ./docker-compose-gpu.yml up --remove-orphans 18 | goto end 19 | 20 | :stop 21 | docker-compose -f ./docker-compose.yml down -v 22 | docker-compose -f ./docker-compose-gpu.yml down -v 23 | docker-compose -f ./docker-compose.yml rm -fsv 24 | docker-compose -f ./docker-compose-gpu.yml rm -fsv 25 | goto end 26 | 27 | :end -------------------------------------------------------------------------------- /.github/workflows/push-docker-image.yml: -------------------------------------------------------------------------------- 1 | name: CI - Build and Push Docker Image 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.ref }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | push_to_registry: 15 | name: Push Docker image to Docker Hub 16 | runs-on: ubuntu-latest 17 | env: 18 | CONTEXT_SUBDIR: . 19 | steps: 20 | - name: Check out the repo 21 | uses: actions/checkout@v4 22 | 23 | - name: Log in to Docker Hub 24 | run: docker login -u amithkoujalgi -p ${{ secrets.DOCKERHUB_ACCESS_TOKEN }} 25 | 26 | - name: Build and push Docker image 27 | uses: docker/build-push-action@v5 28 | with: 29 | context: . 30 | file: Dockerfile 31 | push: true 32 | tags: amithkoujalgi/pdf-bot:1.0.0 -------------------------------------------------------------------------------- /pdf_bot/pull_model.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | from config import Config 5 | 6 | model_name = Config.MODEL 7 | ollama_api_base_url = Config.OLLAMA_API_BASE_URL 8 | print(f"Using model: {model_name}") 9 | print(f"Using Ollama base URL: {ollama_api_base_url}") 10 | 11 | 12 | def pull_model(model_name_): 13 | print(f"Pulling model '{model_name_}'...") 14 | url = f"{ollama_api_base_url}/api/pull" 15 | data = json.dumps(dict(name=model_name_)) 16 | headers = {'Content-Type': 'application/json'} 17 | 18 | # Use stream=True to handle streaming response 19 | with requests.post(url, data=data, headers=headers, stream=True) as response: 20 | if response.status_code == 200: 21 | # Process the response content in chunks 22 | for chunk in response.iter_content(chunk_size=1024): 23 | if chunk: 24 | print(chunk.decode('utf-8'), end='') # Replace 'utf-8' with the appropriate encoding 25 | else: 26 | print(f"Error: {response.status_code} - {response.text}") 27 | 28 | 29 | pull_model(model_name_=model_name) 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | 163 | .idea/ 164 | .idea/** 165 | .DS_Store 166 | *.pyc 167 | *.egg-info/** 168 | 169 | 170 | .vscode/ 171 | .npm/ 172 | -------------------------------------------------------------------------------- /pdf_bot/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import Optional 4 | 5 | import streamlit as st 6 | 7 | from config import Config 8 | from pdf_helper import PDFHelper, load_embedding_model 9 | 10 | load_embedding_model(model_name=Config.EMBEDDING_MODEL_NAME) 11 | 12 | title = "PDF Bot" 13 | init_msg = "Hello, I'm your PDF assistant. Upload a PDF to get going." 14 | model_name = Config.MODEL 15 | 16 | ollama_api_base_url = Config.OLLAMA_API_BASE_URL 17 | pdfs_directory = os.path.join(str(Path.home()), 'langchain-store', 'uploads', 'pdfs') 18 | os.makedirs(pdfs_directory, exist_ok=True) 19 | 20 | print(f"Using model: {model_name}") 21 | print(f"Using Ollama base URL: {ollama_api_base_url}") 22 | print(f"Using PDFs upload directory: {pdfs_directory}") 23 | 24 | st.set_page_config(page_title=title) 25 | 26 | 27 | def on_upload_change(): 28 | # clear_chat_history() 29 | print("File changed.") 30 | 31 | st.session_state.messages = [{"role": "assistant", "content": init_msg}] 32 | 33 | 34 | 35 | def set_uploaded_file(_uploaded_file: str): 36 | st.session_state['uploaded_file'] = _uploaded_file 37 | 38 | 39 | def get_uploaded_file() -> Optional[str]: 40 | if 'uploaded_file' in st.session_state: 41 | return st.session_state['uploaded_file'] 42 | return None 43 | 44 | 45 | with st.sidebar: 46 | st.title(title) 47 | st.write('This chatbot accepts a PDF file and lets you ask questions on it.') 48 | uploaded_file = st.file_uploader( 49 | label='Upload a PDF', type=['pdf', 'PDF'], 50 | accept_multiple_files=False, 51 | key='file-uploader', 52 | help=None, 53 | on_change=on_upload_change, 54 | args=None, 55 | kwargs=None, 56 | disabled=False, 57 | label_visibility="visible" 58 | ) 59 | 60 | if uploaded_file is not None: 61 | added = False 62 | my_msg = f"Great! Now, what do you want from `{uploaded_file.name}`?" 63 | for msg in st.session_state.messages: 64 | if msg["content"] == my_msg: 65 | added = True 66 | if not added: 67 | st.session_state.messages.append({"role": "assistant", "content": my_msg}) 68 | bytes_data = uploaded_file.getvalue() 69 | target_file = os.path.join(pdfs_directory, uploaded_file.name) 70 | # print(uploaded_file) 71 | set_uploaded_file(target_file) 72 | with open(target_file, 'wb') as f: 73 | f.write(bytes_data) 74 | 75 | # Store LLM generated responses 76 | if "messages" not in st.session_state.keys(): 77 | st.session_state.messages = [{"role": "assistant", "content": init_msg}] 78 | 79 | # Display or clear chat messages 80 | for message in st.session_state.messages: 81 | with st.chat_message(message["role"]): 82 | st.write(message["content"]) 83 | 84 | 85 | def clear_chat_history(): 86 | from streamlit_js_eval import streamlit_js_eval 87 | streamlit_js_eval(js_expressions="parent.window.location.reload()") 88 | st.session_state.messages = [{"role": "assistant", "content": init_msg}] 89 | 90 | 91 | st.sidebar.button('Reset', on_click=clear_chat_history) 92 | 93 | # User-provided prompt 94 | if prompt := st.chat_input(disabled=False, placeholder="What do you want to know from the uploaded PDF?"): 95 | st.session_state.messages.append({"role": "user", "content": prompt}) 96 | with st.chat_message("user"): 97 | st.write(prompt) 98 | 99 | if st.session_state.messages[-1]["role"] != "assistant": 100 | source_file = get_uploaded_file() 101 | if source_file is None: 102 | with st.chat_message("assistant"): 103 | with st.spinner("Thinking..."): 104 | placeholder = st.empty() 105 | full_response = 'PDF file needs to be uploaded before you can ask questions on it 😟. Please upload a file.' 106 | placeholder.markdown(full_response) 107 | message = {"role": "assistant", "content": full_response} 108 | st.session_state.messages.append(message) 109 | else: 110 | with st.chat_message("assistant"): 111 | with st.spinner("Thinking..."): 112 | question = dict(st.session_state.messages[-1]).get('content') 113 | pdf_helper = PDFHelper( 114 | ollama_api_base_url=ollama_api_base_url, 115 | model_name=model_name 116 | ) 117 | response = pdf_helper.ask( 118 | pdf_file_path=source_file, 119 | question=question 120 | ) 121 | placeholder = st.empty() 122 | full_response = '' 123 | for item in response: 124 | full_response += item 125 | placeholder.markdown(full_response) 126 | placeholder.markdown(full_response) 127 | message = {"role": "assistant", "content": full_response} 128 | st.session_state.messages.append(message) 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### PDF Bot with Ollama 2 | 3 | 4 |

5 | 6 |

7 | 8 | A bot that accepts PDF docs and lets you ask questions on it. 9 | 10 | The LLMs are downloaded and served via [Ollama](https://github.com/jmorganca/ollama). 11 | 12 |

13 | 14 |

15 | 16 |

17 | 18 | 19 | 20 | 21 |

22 | 26 | 27 | 28 | ## Table of Contents 29 | 30 | - [Requirements](#requirements) 31 | - [How to run](#how-to-run) 32 | - [Demo](#demo) 33 | - [Improvements](#improvements) 34 | - [Contributing](#contributing) 35 | - [Credits](#credits) 36 | 37 | ### Requirements 38 | 39 | [![][shield]][site] 40 | 41 | [![][maketool-shield]][maketool-site] 42 | 43 | [site]: https://docs.docker.com/compose/ 44 | 45 | [shield]: https://img.shields.io/badge/Docker_Compose-Installation-blue.svg?style=for-the-badge&labelColor=gray 46 | 47 | [maketool-site]: https://www.gnu.org/software/make/ 48 | 49 | [maketool-shield]: https://img.shields.io/badge/Make-Tool-blue.svg?style=for-the-badge&labelColor=gray 50 | 51 | ### How to run 52 | 53 | #### CPU version 54 | 55 | ```shell 56 | make start 57 | ``` 58 | 59 | #### GPU version 60 | 61 | ```shell 62 | make start-gpu 63 | ``` 64 | 65 | When the server is up and running, access the app at: http://localhost:8501 66 | 67 | Switch to a different model by changing the `MODEL` env variable in the [docker-compose.yaml](https://github.com/amithkoujalgi/ollama-pdf-bot/blob/main/docker-compose.yml#L18). Check out the available models from [here](https://ollama.ai/library). 68 | 69 | **Note:** 70 | 71 | - It takes a while to start up since it downloads the specified model for the first time. 72 | - If your hardware does not have a GPU and you choose to run only on CPU, expect high response time from the bot. 73 | - Only Nvidia is supported as mentioned in Ollama's documentation. Others such as AMD isn't supported yet. Read how to 74 | use GPU on [Ollama container](https://hub.docker.com/r/ollama/ollama) 75 | and [docker-compose](https://docs.docker.com/compose/gpu-support/#:~:text=GPUs%20are%20referenced%20in%20a,capabilities%20.). 76 | - Make sure to have Nvidia drivers setup on your execution environment for the best results. 77 | 78 | Image on DockerHub: https://hub.docker.com/r/amithkoujalgi/pdf-bot 79 | 80 | ### [Demo](https://www.youtube.com/watch?v=jJyFslR-oNQ) 81 | 82 | https://github.com/amithkoujalgi/ollama-pdf-bot/assets/1876165/40dc70e6-9d35-4171-9ae6-d82247dbaa17 83 | 84 | #### Sample PDFs 85 | 86 | [Hl-L2351DW v0522.pdf](https://github.com/amithkoujalgi/ollama-pdf-bot/files/13323209/Hl-L2351DW.v0522.pdf) 87 | 88 | [HL-B2080DW v0522.pdf](https://github.com/amithkoujalgi/ollama-pdf-bot/files/13323208/HL-B2080DW.v0522.pdf) 89 | 90 | 91 | ### Improvements 92 | 93 | - [ ] Expose model params such as `temperature`, `top_k`, `top_p` as configurable env vars 94 | 95 | ### Contributing 96 | 97 | Contributions are most welcome! Whether it's reporting a bug, proposing an enhancement, or helping 98 | with code - any sort of contribution is much appreciated. 99 | 100 | #### Requirements 101 | 102 | ![Python](https://img.shields.io/badge/python-3.8_+-green.svg) 103 | 104 | #### Setup Ollama server for development 105 | 106 | ```shell 107 | docker run -it -v ~/ollama:/root/.ollama -p 11434:11434 ollama/ollama 108 | ``` 109 | 110 | #### Install the libs 111 | 112 | ```shell 113 | pip install -r requirements.txt 114 | ``` 115 | 116 | #### Start the app 117 | 118 | ```shell 119 | streamlit run pdf_bot/app.py 120 | ``` 121 | 122 | ### Credits 123 | 124 | Thanks to the incredible [Ollama](https://github.com/jmorganca/ollama), [Langchain](https://www.langchain.com/) 125 | and [Streamlit](https://streamlit.io/) projects. 126 | 127 | ### Appreciate my work? 128 | 129 |

130 | Buy Me A Coffee 131 | 132 |

133 | 134 | 135 | -------------------------------------------------------------------------------- /pdf_bot/pdf_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import textwrap 3 | import time 4 | import uuid 5 | from pathlib import Path 6 | 7 | import langchain 8 | from langchain.chains import RetrievalQA 9 | from langchain.chat_models import ChatOllama 10 | from langchain.document_loaders import PyMuPDFLoader 11 | from langchain.embeddings import HuggingFaceEmbeddings 12 | from langchain.text_splitter import RecursiveCharacterTextSplitter 13 | from langchain.vectorstores import FAISS 14 | 15 | from config import Config 16 | 17 | 18 | # This loads the PDF file 19 | def load_pdf_data(file_path): 20 | # Create a PyMuPDFLoader object with file_path 21 | loader = PyMuPDFLoader(file_path=file_path) 22 | 23 | # load the PDF file 24 | docs = loader.load() 25 | 26 | # return the loaded document 27 | return docs 28 | 29 | 30 | # Responsible for splitting the documents into several chunks 31 | def split_docs(documents, chunk_size=1000, chunk_overlap=20): 32 | # Initialize the RecursiveCharacterTextSplitter with 33 | # chunk_size and chunk_overlap 34 | text_splitter = RecursiveCharacterTextSplitter( 35 | chunk_size=chunk_size, 36 | chunk_overlap=chunk_overlap 37 | ) 38 | 39 | # Split the documents into chunks 40 | chunks = text_splitter.split_documents(documents=documents) 41 | 42 | # return the document chunks 43 | return chunks 44 | 45 | 46 | # function for loading the embedding model 47 | def load_embedding_model(model_name, normalize_embedding=True): 48 | print("Loading embedding model...") 49 | start_time = time.time() 50 | hugging_face_embeddings = HuggingFaceEmbeddings( 51 | model_name=model_name, 52 | model_kwargs={'device': Config.HUGGING_FACE_EMBEDDINGS_DEVICE_TYPE}, # here we will run the model with CPU only 53 | encode_kwargs={ 54 | 'normalize_embeddings': normalize_embedding # keep True to compute cosine similarity 55 | } 56 | ) 57 | end_time = time.time() 58 | time_taken = round(end_time - start_time, 2) 59 | print(f"Embedding model load time: {time_taken} seconds.\n") 60 | return hugging_face_embeddings 61 | 62 | 63 | # Function for creating embeddings using FAISS 64 | def create_embeddings(chunks, embedding_model, storing_path="vectorstore"): 65 | print("Creating embeddings...") 66 | e_start_time = time.time() 67 | 68 | # Create the embeddings using FAISS 69 | vectorstore = FAISS.from_documents(chunks, embedding_model) 70 | 71 | e_end_time = time.time() 72 | e_time_taken = round(e_end_time - e_start_time, 2) 73 | print(f"Embeddings creation time: {e_time_taken} seconds.\n") 74 | 75 | print("Writing vectorstore..") 76 | v_start_time = time.time() 77 | 78 | # Save the model in a directory 79 | vectorstore.save_local(storing_path) 80 | 81 | v_end_time = time.time() 82 | v_time_taken = round(v_end_time - v_start_time, 2) 83 | print(f"Vectorstore write time: {v_time_taken} seconds.\n") 84 | 85 | # return the vectorstore 86 | return vectorstore 87 | 88 | 89 | # Create the chain for Question Answering 90 | def load_qa_chain(retriever, llm, prompt): 91 | print("Loading QA chain...") 92 | start_time = time.time() 93 | qa_chain = RetrievalQA.from_chain_type( 94 | llm=llm, 95 | retriever=retriever, # here we are using the vectorstore as a retriever 96 | chain_type="stuff", 97 | return_source_documents=True, # including source documents in output 98 | chain_type_kwargs={'prompt': prompt} # customizing the prompt 99 | ) 100 | end_time = time.time() 101 | time_taken = round(end_time - start_time, 2) 102 | print(f"QA chain load time: {time_taken} seconds.\n") 103 | return qa_chain 104 | 105 | 106 | def get_response(query, chain) -> str: 107 | # Get response from chain 108 | response = chain({'query': query}) 109 | res = response['result'] 110 | # Wrap the text for better output in Jupyter Notebook 111 | # wrapped_text = textwrap.fill(res, width=100) 112 | return res 113 | 114 | 115 | class PDFHelper: 116 | 117 | def __init__(self, ollama_api_base_url: str, model_name: str = Config.MODEL, 118 | embedding_model_name: str = Config.EMBEDDING_MODEL_NAME): 119 | self._ollama_api_base_url = ollama_api_base_url 120 | self._model_name = model_name 121 | self._embedding_model_name = embedding_model_name 122 | 123 | def ask(self, pdf_file_path: str, question: str) -> str: 124 | vector_store_directory = os.path.join(str(Path.home()), 'langchain-store', 'vectorstore', 125 | 'pdf-doc-helper-store', str(uuid.uuid4())) 126 | os.makedirs(vector_store_directory, exist_ok=True) 127 | print(f"Using vector store: {vector_store_directory}") 128 | 129 | llm = ChatOllama( 130 | temperature=0, 131 | base_url=self._ollama_api_base_url, 132 | model=self._model_name, 133 | streaming=True, 134 | # seed=2, 135 | top_k=10, 136 | # A higher value (100) will give more diverse answers, while a lower value (10) will be more conservative. 137 | top_p=0.3, 138 | # Higher value (0.95) will lead to more diverse text, while a lower value (0.5) will generate more 139 | # focused text. 140 | num_ctx=3072, # Sets the size of the context window used to generate the next token. 141 | verbose=False 142 | ) 143 | 144 | # Load the Embedding Model 145 | embed = load_embedding_model(model_name=self._embedding_model_name) 146 | 147 | # load and split the documents 148 | docs = load_pdf_data(file_path=pdf_file_path) 149 | documents = split_docs(documents=docs) 150 | 151 | # create vectorstore 152 | vectorstore = create_embeddings(chunks=documents, embedding_model=embed, storing_path=vector_store_directory) 153 | 154 | # convert vectorstore to a retriever 155 | retriever = vectorstore.as_retriever() 156 | 157 | template = """ 158 | ### System: 159 | You are an honest assistant. 160 | You will accept PDF files and you will answer the question asked by the user appropriately. 161 | If you don't know the answer, just say you don't know. Don't try to make up an answer. 162 | 163 | ### Context: 164 | {context} 165 | 166 | ### User: 167 | {question} 168 | 169 | ### Response: 170 | """ 171 | 172 | prompt = langchain.prompts.PromptTemplate.from_template(template) 173 | 174 | # Create the chain 175 | chain = load_qa_chain(retriever, llm, prompt) 176 | 177 | start_time = time.time() 178 | 179 | response = get_response(question, chain) 180 | 181 | end_time = time.time() 182 | 183 | time_taken = round(end_time - start_time, 2) 184 | 185 | print(f"Response time: {time_taken} seconds.\n") 186 | 187 | return response.strip() 188 | --------------------------------------------------------------------------------