├── .gitignore
├── README.md
├── app.py
├── favicon.ico
├── pdfs
├── QarmashiFood.pdf
└── TaxCard-TaxYear2025.pdf
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Virtual environment
2 | .venv/
3 | .env
4 | .vscode
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |  [](https://git.io/typing-svg)
2 |
3 | # RAGify Query Model
4 |
5 | ## 1. Run these Commands:
6 |
7 | - Create a `uv` virtual environment and activate it:
8 | ```bash
9 | uv venv
10 | .\.venv\Scripts\activate
11 | ```
12 |
13 | - install the dependencies in the `uv venv`
14 | ```bash
15 | uv pip install -r requirements.txt
16 | ```
17 |
18 | - Run the Streamlit App
19 | ```bash
20 | streamlit run app.py
21 | # or
22 | uv run streamlit run app.py
23 | ```
24 |
25 | ## Get Groq API Key
26 |
27 | - `https://console.groq.com/`
28 |
29 |
30 | ## 🔗 Links
31 | [](https://ridanaz.vercel.app/)
32 | [](https://linkedin.com/in/ridanaz67)
33 | [](mailto:rnaz3414@gmail.com)
34 | [](https://medium.com/@rnaz3414)
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import os
2 | import streamlit as st
3 | from langchain_groq import ChatGroq
4 | from langchain.text_splitter import RecursiveCharacterTextSplitter
5 | from langchain.chains.combine_documents import create_stuff_documents_chain
6 | from langchain_core.prompts import ChatPromptTemplate
7 | from langchain.chains import create_retrieval_chain
8 | from langchain_community.vectorstores import FAISS
9 | from langchain_community.document_loaders import PyPDFDirectoryLoader
10 | from langchain_google_genai import GoogleGenerativeAIEmbeddings
11 | from dotenv import load_dotenv
12 |
13 | # Load environment variables
14 | load_dotenv()
15 |
16 | # Load the GROQ and Google API keys
17 | groq_api_key = os.getenv('GROQ_API_KEY')
18 | os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
19 |
20 | # Set page configuration
21 | st.set_page_config(
22 | page_title="RAG Query Model",
23 | page_icon="favicon.ico",
24 | layout="centered",
25 | )
26 |
27 | # Custom CSS for styling
28 | st.markdown(
29 | """
30 |
65 | """,
66 | unsafe_allow_html=True,
67 | )
68 |
69 | # Display the title
70 | st.markdown("
RAGify Query Model
", unsafe_allow_html=True)
71 |
72 | # Sidebar for file upload and model selection
73 | with st.sidebar:
74 | col1, col2 = st.columns([0.2, 0.8])
75 | with col1:
76 | st.image("favicon.ico", width=25)
77 |
78 | with col2:
79 | st.markdown("Rida Naz
", unsafe_allow_html=True)
80 | uploaded_files = st.file_uploader("Upload your PDFs", type="pdf", accept_multiple_files=True)
81 | model_name = st.selectbox(
82 | "Select Model",
83 | options=["llama-3.3-70b-versatile", "llama-3.3-70b-specdec", "llama3-70b-8192"],
84 | index=0,
85 | help="Select the model for generating embeddings and answering queries.",
86 | )
87 | if uploaded_files and st.button("Generate Embeddings"):
88 | with st.spinner("Generating embeddings..."):
89 | # Clear existing embeddings and documents
90 | if "vectors" in st.session_state:
91 | del st.session_state.vectors
92 | if "docs" in st.session_state:
93 | del st.session_state.docs
94 |
95 | # Save uploaded files to a temporary directory
96 | temp_dir = "./temp_pdfs"
97 | os.makedirs(temp_dir, exist_ok=True)
98 | for uploaded_file in uploaded_files:
99 | with open(os.path.join(temp_dir, uploaded_file.name), "wb") as f:
100 | f.write(uploaded_file.getbuffer())
101 |
102 | # Load and process documents
103 | st.session_state.loader = PyPDFDirectoryLoader(temp_dir)
104 | st.session_state.docs = st.session_state.loader.load()
105 | st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
106 | st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
107 | st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
108 | st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
109 |
110 | # Clean up temporary directory
111 | for file in os.listdir(temp_dir):
112 | os.remove(os.path.join(temp_dir, file))
113 | os.rmdir(temp_dir)
114 |
115 | st.success("Embeddings generated successfully!")
116 |
117 | # Initialize LLM
118 | llm = ChatGroq(groq_api_key=groq_api_key, model_name=model_name)
119 |
120 | # Dynamic system prompt
121 | def generate_dynamic_prompt(doc_titles):
122 | return f"""
123 | You are an AI assistant specializing in the content provided within the following documents: {', '.join(doc_titles)}.
124 | Answer user queries strictly based on the context from these documents. Provide clear, concise, and accurate responses.
125 | """
126 |
127 | # User query input
128 | user_query = st.text_input("What would you like to know?", placeholder="Enter your query here...")
129 |
130 | # Process user query
131 | if user_query:
132 | if "vectors" not in st.session_state:
133 | st.warning("Please generate embeddings first.")
134 | else:
135 | with st.spinner("Processing your query..."):
136 | # Extract document titles
137 | doc_titles = [doc.metadata['source'].split('/')[-1] for doc in st.session_state.docs]
138 | system_prompt = generate_dynamic_prompt(doc_titles)
139 |
140 | # Create prompt template
141 | prompt = ChatPromptTemplate.from_template(f"""
142 | {system_prompt}
143 |
144 | {{context}}
145 |
146 | Questions:{{input}}
147 | """)
148 |
149 | # Create document chain and retrieval chain
150 | document_chain = create_stuff_documents_chain(llm, prompt)
151 | retriever = st.session_state.vectors.as_retriever()
152 | retrieval_chain = create_retrieval_chain(retriever, document_chain)
153 |
154 | # Invoke retrieval chain
155 | response = retrieval_chain.invoke({'input': user_query})
156 | st.write(response['answer'])
157 |
158 | # Reset session state
159 | if st.button("Reset App"):
160 | st.session_state.clear()
161 | st.rerun()
--------------------------------------------------------------------------------
/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RidaNaz/RAGify-Query-Model/2b3d2347169c2dfd85c550d89079ec54b8d1ee6d/favicon.ico
--------------------------------------------------------------------------------
/pdfs/QarmashiFood.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RidaNaz/RAGify-Query-Model/2b3d2347169c2dfd85c550d89079ec54b8d1ee6d/pdfs/QarmashiFood.pdf
--------------------------------------------------------------------------------
/pdfs/TaxCard-TaxYear2025.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RidaNaz/RAGify-Query-Model/2b3d2347169c2dfd85c550d89079ec54b8d1ee6d/pdfs/TaxCard-TaxYear2025.pdf
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | langchain
3 | langchain_community
4 | langchain_google_genai
5 | langchain-groq
6 | groq
7 | pypdf
8 | faiss-cpu
9 | python-dotenv
10 |
--------------------------------------------------------------------------------