RAGify Query Model

Rida Naz

", unsafe_allow_html=True) 80 | uploaded_files = st.file_uploader("Upload your PDFs", type="pdf", accept_multiple_files=True) 81 | model_name = st.selectbox( 82 | "Select Model", 83 | options=["llama-3.3-70b-versatile", "llama-3.3-70b-specdec", "llama3-70b-8192"], 84 | index=0, 85 | help="Select the model for generating embeddings and answering queries.", 86 | ) 87 | if uploaded_files and st.button("Generate Embeddings"): 88 | with st.spinner("Generating embeddings..."): 89 | # Clear existing embeddings and documents 90 | if "vectors" in st.session_state: 91 | del st.session_state.vectors 92 | if "docs" in st.session_state: 93 | del st.session_state.docs 94 | 95 | # Save uploaded files to a temporary directory 96 | temp_dir = "./temp_pdfs" 97 | os.makedirs(temp_dir, exist_ok=True) 98 | for uploaded_file in uploaded_files: 99 | with open(os.path.join(temp_dir, uploaded_file.name), "wb") as f: 100 | f.write(uploaded_file.getbuffer()) 101 | 102 | # Load and process documents 103 | st.session_state.loader = PyPDFDirectoryLoader(temp_dir) 104 | st.session_state.docs = st.session_state.loader.load() 105 | st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 106 | st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs) 107 | st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") 108 | st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings) 109 | 110 | # Clean up temporary directory 111 | for file in os.listdir(temp_dir): 112 | os.remove(os.path.join(temp_dir, file)) 113 | os.rmdir(temp_dir) 114 | 115 | st.success("Embeddings generated successfully!") 116 | 117 | # Initialize LLM 118 | llm = ChatGroq(groq_api_key=groq_api_key, model_name=model_name) 119 | 120 | # Dynamic system prompt 121 | def generate_dynamic_prompt(doc_titles): 122 | return f""" 123 | You are an AI assistant specializing in the content provided within the following documents: {', '.join(doc_titles)}. 124 | Answer user queries strictly based on the context from these documents. Provide clear, concise, and accurate responses. 125 | """ 126 | 127 | # User query input 128 | user_query = st.text_input("What would you like to know?", placeholder="Enter your query here...") 129 | 130 | # Process user query 131 | if user_query: 132 | if "vectors" not in st.session_state: 133 | st.warning("Please generate embeddings first.") 134 | else: 135 | with st.spinner("Processing your query..."): 136 | # Extract document titles 137 | doc_titles = [doc.metadata['source'].split('/')[-1] for doc in st.session_state.docs] 138 | system_prompt = generate_dynamic_prompt(doc_titles) 139 | 140 | # Create prompt template 141 | prompt = ChatPromptTemplate.from_template(f""" 142 | {system_prompt} 143 | 144 | {{context}} 145 | 146 | Questions:{{input}} 147 | """) 148 | 149 | # Create document chain and retrieval chain 150 | document_chain = create_stuff_documents_chain(llm, prompt) 151 | retriever = st.session_state.vectors.as_retriever() 152 | retrieval_chain = create_retrieval_chain(retriever, document_chain) 153 | 154 | # Invoke retrieval chain 155 | response = retrieval_chain.invoke({'input': user_query}) 156 | st.write(response['answer']) 157 | 158 | # Reset session state 159 | if st.button("Reset App"): 160 | st.session_state.clear() 161 | st.rerun() -------------------------------------------------------------------------------- /favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RidaNaz/RAGify-Query-Model/2b3d2347169c2dfd85c550d89079ec54b8d1ee6d/favicon.ico -------------------------------------------------------------------------------- /pdfs/QarmashiFood.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RidaNaz/RAGify-Query-Model/2b3d2347169c2dfd85c550d89079ec54b8d1ee6d/pdfs/QarmashiFood.pdf -------------------------------------------------------------------------------- /pdfs/TaxCard-TaxYear2025.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RidaNaz/RAGify-Query-Model/2b3d2347169c2dfd85c550d89079ec54b8d1ee6d/pdfs/TaxCard-TaxYear2025.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | langchain 3 | langchain_community 4 | langchain_google_genai 5 | langchain-groq 6 | groq 7 | pypdf 8 | faiss-cpu 9 | python-dotenv 10 | --------------------------------------------------------------------------------