├── .gitignore ├── README.md ├── app.py ├── favicon.ico ├── pdfs ├── QarmashiFood.pdf └── TaxCard-TaxYear2025.pdf └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Virtual environment 2 | .venv/ 3 | .env 4 | .vscode -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Streamlit Web App Assignment given by Sir Zia](/favicon.ico) [![Typing SVG](https://readme-typing-svg.demolab.com?font=Roboto+Slab&weight=500&size=27&duration=4000&pause=500&color=ffde7b¢er=true&vCenter=true&width=700&height=50&lines=%E2%9C%A8Hey%2C+I'm+Rida+Naz%E2%9C%A8;%E2%9C%A8Full+Stack+Developer+%7C+AI+Engineer%E2%9C%A8;%E2%9C%A8Building+Scalable+Web+Apps+%26+AI+Solutions%E2%9C%A8)](https://git.io/typing-svg) 2 | 3 | # RAGify Query Model 4 | 5 | ## 1. Run these Commands: 6 | 7 | - Create a `uv` virtual environment and activate it: 8 | ```bash 9 | uv venv 10 | .\.venv\Scripts\activate 11 | ``` 12 | 13 | - install the dependencies in the `uv venv` 14 | ```bash 15 | uv pip install -r requirements.txt 16 | ``` 17 | 18 | - Run the Streamlit App 19 | ```bash 20 | streamlit run app.py 21 | # or 22 | uv run streamlit run app.py 23 | ``` 24 | 25 | ## Get Groq API Key 26 | 27 | - `https://console.groq.com/` 28 | 29 | 30 | ## 🔗 Links 31 | [![portfolio](https://img.shields.io/badge/my_portfolio-000?style=for-the-badge&logo=ko-fi&logoColor=white)](https://ridanaz.vercel.app/) 32 | [![linkedin](https://img.shields.io/badge/linkedin-0A66C2?style=for-the-badge&logo=linkedin&logoColor=white)](https://linkedin.com/in/ridanaz67) 33 | [![gmail](https://img.shields.io/badge/gmail-f44336?style=for-the-badge&logo=twitter&logoColor=white)](mailto:rnaz3414@gmail.com) 34 | [![medium](https://img.shields.io/badge/medium-white?style=for-the-badge&logo=twitter&logoColor=black)](https://medium.com/@rnaz3414) -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import streamlit as st 3 | from langchain_groq import ChatGroq 4 | from langchain.text_splitter import RecursiveCharacterTextSplitter 5 | from langchain.chains.combine_documents import create_stuff_documents_chain 6 | from langchain_core.prompts import ChatPromptTemplate 7 | from langchain.chains import create_retrieval_chain 8 | from langchain_community.vectorstores import FAISS 9 | from langchain_community.document_loaders import PyPDFDirectoryLoader 10 | from langchain_google_genai import GoogleGenerativeAIEmbeddings 11 | from dotenv import load_dotenv 12 | 13 | # Load environment variables 14 | load_dotenv() 15 | 16 | # Load the GROQ and Google API keys 17 | groq_api_key = os.getenv('GROQ_API_KEY') 18 | os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") 19 | 20 | # Set page configuration 21 | st.set_page_config( 22 | page_title="RAG Query Model", 23 | page_icon="favicon.ico", 24 | layout="centered", 25 | ) 26 | 27 | # Custom CSS for styling 28 | st.markdown( 29 | """ 30 | 65 | """, 66 | unsafe_allow_html=True, 67 | ) 68 | 69 | # Display the title 70 | st.markdown("

RAGify Query Model

", unsafe_allow_html=True) 71 | 72 | # Sidebar for file upload and model selection 73 | with st.sidebar: 74 | col1, col2 = st.columns([0.2, 0.8]) 75 | with col1: 76 | st.image("favicon.ico", width=25) 77 | 78 | with col2: 79 | st.markdown("

Rida Naz

", unsafe_allow_html=True) 80 | uploaded_files = st.file_uploader("Upload your PDFs", type="pdf", accept_multiple_files=True) 81 | model_name = st.selectbox( 82 | "Select Model", 83 | options=["llama-3.3-70b-versatile", "llama-3.3-70b-specdec", "llama3-70b-8192"], 84 | index=0, 85 | help="Select the model for generating embeddings and answering queries.", 86 | ) 87 | if uploaded_files and st.button("Generate Embeddings"): 88 | with st.spinner("Generating embeddings..."): 89 | # Clear existing embeddings and documents 90 | if "vectors" in st.session_state: 91 | del st.session_state.vectors 92 | if "docs" in st.session_state: 93 | del st.session_state.docs 94 | 95 | # Save uploaded files to a temporary directory 96 | temp_dir = "./temp_pdfs" 97 | os.makedirs(temp_dir, exist_ok=True) 98 | for uploaded_file in uploaded_files: 99 | with open(os.path.join(temp_dir, uploaded_file.name), "wb") as f: 100 | f.write(uploaded_file.getbuffer()) 101 | 102 | # Load and process documents 103 | st.session_state.loader = PyPDFDirectoryLoader(temp_dir) 104 | st.session_state.docs = st.session_state.loader.load() 105 | st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 106 | st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs) 107 | st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") 108 | st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings) 109 | 110 | # Clean up temporary directory 111 | for file in os.listdir(temp_dir): 112 | os.remove(os.path.join(temp_dir, file)) 113 | os.rmdir(temp_dir) 114 | 115 | st.success("Embeddings generated successfully!") 116 | 117 | # Initialize LLM 118 | llm = ChatGroq(groq_api_key=groq_api_key, model_name=model_name) 119 | 120 | # Dynamic system prompt 121 | def generate_dynamic_prompt(doc_titles): 122 | return f""" 123 | You are an AI assistant specializing in the content provided within the following documents: {', '.join(doc_titles)}. 124 | Answer user queries strictly based on the context from these documents. Provide clear, concise, and accurate responses. 125 | """ 126 | 127 | # User query input 128 | user_query = st.text_input("What would you like to know?", placeholder="Enter your query here...") 129 | 130 | # Process user query 131 | if user_query: 132 | if "vectors" not in st.session_state: 133 | st.warning("Please generate embeddings first.") 134 | else: 135 | with st.spinner("Processing your query..."): 136 | # Extract document titles 137 | doc_titles = [doc.metadata['source'].split('/')[-1] for doc in st.session_state.docs] 138 | system_prompt = generate_dynamic_prompt(doc_titles) 139 | 140 | # Create prompt template 141 | prompt = ChatPromptTemplate.from_template(f""" 142 | {system_prompt} 143 | 144 | {{context}} 145 | 146 | Questions:{{input}} 147 | """) 148 | 149 | # Create document chain and retrieval chain 150 | document_chain = create_stuff_documents_chain(llm, prompt) 151 | retriever = st.session_state.vectors.as_retriever() 152 | retrieval_chain = create_retrieval_chain(retriever, document_chain) 153 | 154 | # Invoke retrieval chain 155 | response = retrieval_chain.invoke({'input': user_query}) 156 | st.write(response['answer']) 157 | 158 | # Reset session state 159 | if st.button("Reset App"): 160 | st.session_state.clear() 161 | st.rerun() -------------------------------------------------------------------------------- /favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RidaNaz/RAGify-Query-Model/2b3d2347169c2dfd85c550d89079ec54b8d1ee6d/favicon.ico -------------------------------------------------------------------------------- /pdfs/QarmashiFood.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RidaNaz/RAGify-Query-Model/2b3d2347169c2dfd85c550d89079ec54b8d1ee6d/pdfs/QarmashiFood.pdf -------------------------------------------------------------------------------- /pdfs/TaxCard-TaxYear2025.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RidaNaz/RAGify-Query-Model/2b3d2347169c2dfd85c550d89079ec54b8d1ee6d/pdfs/TaxCard-TaxYear2025.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | langchain 3 | langchain_community 4 | langchain_google_genai 5 | langchain-groq 6 | groq 7 | pypdf 8 | faiss-cpu 9 | python-dotenv 10 | --------------------------------------------------------------------------------