├── AWS_Deployment ├── OpenAI_API_Key.txt ├── docker-compose.yml ├── requirements.txt ├── Dockerfile ├── app.py └── README.md └── .gitignore /AWS_Deployment/OpenAI_API_Key.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AWS_Deployment/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | langchain-rag-app: 4 | image: langchain-rag-app:latest 5 | build: . 6 | command: streamlit run app.py --server.port 8501 7 | ports: 8 | - 8501:8501 -------------------------------------------------------------------------------- /AWS_Deployment/requirements.txt: -------------------------------------------------------------------------------- 1 | chromadb 2 | faiss-cpu 3 | openai 4 | langchain-core 5 | langchain-experimental 6 | langchain-community 7 | langchain-openai 8 | streamlit 9 | python-dotenv 10 | PyPDF2 11 | tiktoken 12 | tenacity 13 | pandas 14 | numpy 15 | -------------------------------------------------------------------------------- /AWS_Deployment/Dockerfile: -------------------------------------------------------------------------------- 1 | # base image 2 | FROM python:3.10 3 | 4 | # making directory of app 5 | WORKDIR langchain-rag-app 6 | 7 | # copy of requirements file 8 | COPY requirements.txt ./requirements.txt 9 | 10 | # install pacakges 11 | RUN pip3 install -r requirements.txt 12 | 13 | # copying all files over 14 | COPY . . 15 | 16 | # exposing default port for streamlit 17 | EXPOSE 8501 18 | 19 | # command to launch app 20 | CMD streamlit run app.py -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | *.env 10 | *.txt 11 | 12 | # Packages # 13 | ############ 14 | # it's better to unpack these files and commit the raw source 15 | # git has its own built in compression methods 16 | *.7z 17 | *.dmg 18 | *.gz 19 | *.iso 20 | *.jar 21 | *.rar 22 | *.tar 23 | *.zip 24 | 25 | # Logs and databases # 26 | ###################### 27 | *.log 28 | *.sql 29 | *.sqlite 30 | 31 | # OS generated files # 32 | ###################### 33 | .DS_Store 34 | .DS_Store? 35 | ._* 36 | .Spotlight-V100 37 | .Trashes 38 | ehthumbs.db 39 | Thumbs.db -------------------------------------------------------------------------------- /AWS_Deployment/app.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | import os 3 | import openai 4 | import streamlit as st 5 | from PyPDF2 import PdfReader 6 | from langchain.text_splitter import CharacterTextSplitter 7 | # from langchain.embeddings.openai import OpenAIEmbeddings 8 | # from langchain_community.embeddings import OpenAIEmbeddings 9 | from langchain_openai import OpenAIEmbeddings 10 | # from langchain.vectorstores import FAISS, Chroma 11 | from langchain_community.vectorstores import FAISS, Chroma 12 | from langchain.chains.question_answering import load_qa_chain 13 | # from langchain.llms import OpenAI 14 | # from langchain_community.llms import OpenAI 15 | from langchain_openai import OpenAI 16 | 17 | def upload_files(): 18 | uploaded_files = st.file_uploader("Upload the PDF files", accept_multiple_files=True) 19 | return uploaded_files 20 | 21 | 22 | def main(): 23 | filepath = os.getcwd() 24 | # Read the text file containing the API key 25 | with open(filepath + "/OpenAI_API_Key.txt", "r") as f: 26 | openai_api_key = ' '.join(f.readlines()) 27 | 28 | # Update the OpenAI API key by updating the environment variable 29 | os.environ["OPENAI_API_KEY"] = openai_api_key 30 | openai.api_key = openai_api_key 31 | 32 | # Load a pre-trained OpenAI language model 33 | llm = OpenAI() 34 | 35 | # Configure the page settings for the Streamlit app 36 | st.set_page_config(page_title="Chat with PDF") 37 | 38 | # Display the header for the Streamlit app 39 | st.header("LangChain RAG App") 40 | 41 | # Allow users to upload a PDF file 42 | # pdf = st.file_uploader("Upload your PDF", type="pdf") 43 | pdfs = upload_files() 44 | 45 | # Check if a PDF file has been uploaded 46 | if pdfs is not None: 47 | for pdf in pdfs: 48 | # Read the PDF file and extract text from its pages 49 | pdf_reader = PdfReader(pdf) 50 | text = "" 51 | for page in pdf_reader.pages: 52 | text += page.extract_text() 53 | 54 | # Set up the text splitter for splitting texts into chunkss 55 | text_splitter = CharacterTextSplitter( 56 | separator="\n", 57 | chunk_size=1000, 58 | chunk_overlap=200, 59 | length_function=len 60 | ) 61 | # Split the extracted text into chunks for efficient processing 62 | chunks = text_splitter.split_text(text) 63 | 64 | # Create embeddings and build a knowledge base for the chunks. 65 | embeddings = OpenAIEmbeddings(model="text-embedding-3-small") 66 | 67 | knowledge_base = FAISS.from_texts(chunks, embeddings) 68 | 69 | # Allow the user to input a question about the PDF 70 | user_question = st.text_input("Ask a question about your PDF") 71 | 72 | # Check if a user question has been entered. 73 | if user_question: 74 | 75 | # Perform similarity search on the knowledge base using the user's question 76 | docs = knowledge_base.similarity_search(user_question) 77 | 78 | # Set up a question-answering chain 79 | chain = load_qa_chain(llm, chain_type="stuff") 80 | 81 | # Generate a response to the user's question using the question-answering chain 82 | response = chain.run(input_documents=docs, question=user_question) 83 | 84 | # Display the generated response 85 | st.write(response) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /AWS_Deployment/README.md: -------------------------------------------------------------------------------- 1 | # Deploying on AWS Cloud 2 | ## Logging to the Amazon EC2 instance 3 | For Amazon Linux 2023 based instances, the password is `ec2-user` 4 | For Ubuntu based instance, the password is `ubuntu` 5 | 6 | Once you've logged into the EC2 instance via a terminal (Mac/Linux) or PuTTY (Windows), follow the instructions below to install Git, Docker and Docker-Compose on an EC2 instance. 7 | 8 | ## Switch to the superuser 9 | 10 | `sudo su` 11 | 12 | ## Install Docker 13 | 14 | `yum install docker` 15 | 16 | ## Get pip3 if not already installed 17 | 18 | `yum install python3-pip` 19 | 20 | ## Install Docker-Compose through Pip 21 | 22 | `pip3 install --user docker-compose` 23 | 24 | ## Download the docker-compose binaries 25 | 26 | `sudo curl -L https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose` 27 | 28 | ## Change the permissions of the docker-compose binaries 29 | 30 | `sudo chmod +x /usr/local/bin/docker-compose` 31 | 32 | 33 | ## Create a system link between the docker-compose binaries 34 | 35 | `ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose` 36 | 37 | ## Verify docker compose version: 38 | 39 | `docker-compose version` 40 | 41 | ## Enable docker service at AMI boot time: 42 | 43 | `sudo systemctl enable docker.service` 44 | 45 | ## Start the Docker service: 46 | 47 | `sudo systemctl start docker.service` 48 | 49 | Steps for running the application locally 50 | 51 | ## Install requirments 52 | 53 | `pip install -r requirements.txt` 54 | 55 | ## Run the Streamlit app 56 | The application can be run locally using VS Code or a terminal using the following command: 57 | `streamlit run app.py` 58 | 59 | Navigate to the following link in your browser to access the application 60 | `localhost:8501` 61 | 62 | ## Docker Commands 63 | 64 | `docker build . -t langchain-rag-app:latest` 65 | 66 | `docker-compose up` 67 | 68 | NOTE: While deploying in EC2 instance, make sure that you allow the inbound port `8501` in the security groups section of EC2. 69 | 70 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 71 | 72 | # Deploying on Azure Cloud 73 | 74 | ## Set Up Your Azure Virtual Machine (VM) 75 | 76 | Log into Azure Portal and create a new Virtual Machine: 77 | - For Ubuntu-based instances, use username: `azureuser`. 78 | - Choose a VM size that meets your the app's resource requirements. 79 | - Under Inbound port rules, add 8501 to allow access to your application. 80 | 81 | Connect to Your VM: 82 | - On Mac/Linux, use SSH from your terminal: 83 | `ssh azureuser@` 84 | - On Windows, use an SSH client such as PuTTY with the provided public IP. 85 | 86 | ## Install Required Tools on the Azure VM 87 | 88 | Once connected to the VM, follow these instructions to install Git, Docker, and Docker-Compose. 89 | 90 | ## Switch to Superuser: 91 | `sudo su` 92 | 93 | ## Update package lists: 94 | `sudo apt update` 95 | 96 | ## Install Docker: 97 | `sudo apt install -y docker.io` 98 | 99 | ## Install pip3 (Python Package Manager): 100 | `sudo apt install -y python3-pip` 101 | 102 | 103 | ## Install Docker-Compose via pip: 104 | `pip3 install docker-compose` 105 | 106 | ## Download Docker-Compose Binaries: 107 | Use the following command to download the latest Docker-Compose binaries: 108 | 109 | `sudo curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose` 110 | 111 | ## Set Permissions for Docker-Compose: 112 | `sudo chmod +x /usr/local/bin/docker-compose` 113 | 114 | ## Create a System Link for Docker-Compose: 115 | `sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose` 116 | 117 | ## Verify Docker-Compose Installation: 118 | `docker-compose --version` 119 | 120 | ## Enable Docker to Start on Boot: 121 | `sudo systemctl enable docker.service` 122 | 123 | ## Start the Docker Service: 124 | `sudo systemctl start docker.service` 125 | 126 | ## Clone Your Application’s Repository: 127 | `git clone ` 128 | 129 | `cd ` 130 | 131 | ## Install Python Dependencies: 132 | `pip install -r requirements.txt` 133 | 134 | ## Run the Application Locally for Testing: 135 | `streamlit run app.py` 136 | 137 | ## Use Docker to Containerize and Run the Application 138 | `docker build . -t langchain-rag-app:latest` 139 | `docker-compose up` 140 | 141 | **NOTE**: 142 | If you're facing issues with the firewall Settings on Azure, you need to allow inbound traffic on Port 8501: 143 | In Azure Portal, go to your VM’s Networking settings. 144 | Under Inbound port rules, add Rule to allow Port 8501 for HTTP access to your Streamlit application. 145 | Open a browser and go to http://:8501 to access the RAG application. 146 | 147 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 148 | # Deploying on Google Cloud 149 | 150 | To deploy a Streamlit LangChain RAG app on Google Cloud Platform (GCP) using Google Compute Engine (GCE), the following are the instructions for GCP-specific adjustments. 151 | 152 | ## Create a Google Cloud Virtual Machine (VM) Instance 153 | - Go to the Google Cloud Console (https://console.cloud.google.com/). 154 | - Navigate to Compute Engine > VM Instances. 155 | - Click Create Instance. 156 | - Choose your desired Machine Type (e.g., n1-standard-1). 157 | - Choose your Region and Zone. 158 | - For the OS image, select Ubuntu 22.04 LTS (or any preferred Ubuntu version). 159 | - Set up a Firewall rule to allow HTTP and HTTPS traffic. 160 | - Set up SSH keys for secure login (or you can choose to log in with the Google Cloud Console for SSH access). 161 | 162 | ## SSH into the VM Instance 163 | Once the VM instance is created: 164 | In the Google Cloud Console, navigate to VM Instances, and click SSH next to your VM instance to log in directly via the browser. 165 | Alternatively, if you are using a terminal (Mac/Linux), you can SSH into the instance using the following command: 166 | 167 | `gcloud compute ssh --zone ` 168 | 169 | ## Install Docker, Docker Compose, and Pip 170 | After SSH’ing into your VM, follow these steps: 171 | 172 | ## Switch to Superuser: 173 | `sudo su` 174 | 175 | ## Update the system (optional but recommended): 176 | 177 | `apt-get update` 178 | 179 | ## Install Docker: 180 | `apt-get install docker.io` 181 | 182 | ## Enable Docker Service: 183 | `sudo systemctl enable --now docker` 184 | 185 | ## Install Pip3 (if not already installed): 186 | 187 | `apt-get install python3-pip` 188 | 189 | ## Install Docker Compose: 190 | `pip3 install --user docker-compose` 191 | 192 | ## Download the latest docker-compose binary: 193 | 194 | `sudo curl -L https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose` 195 | 196 | Make the docker-compose executable: 197 | 198 | `sudo chmod +x /usr/local/bin/docker-compose` 199 | 200 | Verify the docker-compose version: 201 | `docker-compose version` 202 | 203 | Enable Docker to start on boot: 204 | `sudo systemctl enable docker.service` 205 | 206 | Start the Docker service: 207 | `sudo systemctl start docker.service` 208 | 209 | Clone Your Git Repository (Optional) 210 | If your app code is stored in a Git repository, clone it to your VM instance: 211 | 212 | `git clone ` 213 | `cd ` 214 | 215 | Install Python Requirements 216 | Install the required dependencies for the Streamlit LangChain app: 217 | 218 | `pip install -r requirements.txt` 219 | 220 | Build and Run Docker Containers 221 | a) Build the Docker Image: 222 | 223 | `docker build . -t langchain-rag-app:latest` 224 | 225 | b) Run Docker Compose: 226 | `docker-compose up` 227 | 228 | This will build the necessary Docker containers and start the application in the background. 229 | 230 | 7. Open Port 8501 in GCP Firewall 231 | Make sure that the necessary ports (e.g., 8501 for Streamlit) are open to the internet by configuring the Firewall rules for your VM instance: 232 | 233 | a) Allow Port 8501 in the VM's Firewall: 234 | - In the Google Cloud Console, go to VPC Network > Firewall. 235 | - Click on Create Firewall Rule. 236 | - Give the rule a name (e.g., allow-streamlit). 237 | - Set the Source IP Ranges to 0.0.0.0/0 (this makes it accessible from anywhere). 238 | - Under Protocols and Ports, select Specified protocols and ports and enter tcp:8501. 239 | - Click Create. 240 | 241 | b) Verify if the Firewall Rule is Working: 242 | You can check if the port is open by running: 243 | 244 | `gcloud compute firewall-rules list` 245 | 246 | This will list all the firewall rules for your project, and you can confirm that port 8501 is open. 247 | 248 | 8. Access the Application 249 | After the container is running and the firewall rules are configured, navigate to the external IP address of your VM instance: 250 | 251 | In the VM Instances section of the GCP Console, locate your VM and copy the External IP. 252 | Open your browser and navigate to http://:8501. 253 | You should now be able to access your Streamlit app deployed on the Google Cloud VM. 254 | 255 | **Notes**: 256 | Scaling: To scale your app in GCP, you can create additional VM instances, use load balancing, or deploy on Google Kubernetes Engine (GKE) for container orchestration. 257 | Storage: If you need persistent storage, consider using Google Cloud Storage or Persistent Disks for saving uploaded files (like PDFs) or other data. 258 | Billing: Ensure that your GCP account is set up with billing enabled to avoid any service interruptions. 259 | This should get your LangChain RAG app running on Google Cloud using Google Compute Engine. 260 | --------------------------------------------------------------------------------