├── Doc-Web-ai.PNG
├── LICENSE
├── README.md
├── ai_chat.py
└── requirements.txt
/Doc-Web-ai.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Safiullah-Rahu/Doc-Web-AI-Chat/1ae89549a98805d10c16c7e826d7d13c04c72730/Doc-Web-ai.PNG
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Safiullah Rahu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Doc-Web AI Chat 💬🌐
2 |
3 |
4 | Doc-Web AI Chat is the ultimate app powered by LangChain, OpenAI, and Streamlit that allows you to unlock the power of knowledge by effortlessly interacting with your PDFs. With Doc-Web AI Chat, you can seamlessly navigate through your PDF documents, ask questions directly, and augment your search with SERPAPI to gather additional information from the web. Get ready to get accurate and relevant answers to your burning questions in one convenient place.
5 |
6 | ## Features 📚🔎
7 |
8 | Doc-Web AI Chat offers the following key features:
9 |
10 | - **Interact:** Seamlessly navigate through your PDF documents and ask questions directly.
11 | - **Augmented Search:** Access an extensive web search using SERPAPI to gather additional information.
12 | - **Get Answers:** Leverage advanced AI algorithms to provide accurate and relevant answers to your queries.
13 |
14 | ## Installation
15 |
16 | To run Doc-Web AI Chat, follow these steps:
17 |
18 | 1. Clone this repository to your local machine.
19 | 2. Navigate to the project directory.
20 |
21 | ```bash
22 | git clone https://github.com/your-username/doc-web-ai-chat.git
23 | cd doc-web-ai-chat
24 | ```
25 | 3. Install the required packages using `pip` with the provided `requirements.txt` file.
26 | ```bash
27 | pip install -r requirements.txt
28 | ```
29 | ## Usage
30 |
31 | To start Doc-Web AI Chat, run the following command:
32 | ```bash
33 | streamlit run ai_chat.py
34 | ```
35 | This command will launch the Doc-Web AI Chat app in your default web browser. You can then start interacting with your PDFs, asking questions, and accessing augmented search capabilities.
36 |
37 |
38 |
39 | ## Feedback and Contributions
40 | If you have any feedback, suggestions, or issues related to Doc-Web AI Chat, please open an issue on the GitHub repository. Contributions are also welcome! If you would like to contribute to Doc-Web AI Chat, please follow the guidelines outlined in the Contribution Guidelines.
41 |
42 | ## License
43 | Doc-Web AI Chat is licensed under the MIT License.
44 |
45 | ---
46 |
47 |
Doc-Web AI Chat is a project powered by LangChain, OpenAI, and Streamlit.
48 |
--------------------------------------------------------------------------------
/ai_chat.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | from streamlit_chat import message
3 | from langchain.chat_models import ChatOpenAI
4 | from langchain.chains import ConversationalRetrievalChain
5 | from langchain.prompts.prompt import PromptTemplate
6 | from langchain.callbacks import get_openai_callback
7 | from langchain import LLMChain
8 | from langchain.chains.llm import LLMChain
9 | from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
10 | from langchain.chains.question_answering import load_qa_chain
11 | import os
12 | import pickle
13 | import tempfile
14 | import pandas as pd
15 | import pdfplumber
16 | from langchain.document_loaders.csv_loader import CSVLoader
17 | from langchain.vectorstores import FAISS
18 | from langchain.embeddings.openai import OpenAIEmbeddings
19 | from langchain.document_loaders import PyPDFLoader
20 | from langchain.document_loaders import TextLoader
21 | from langchain.text_splitter import RecursiveCharacterTextSplitter
22 | from langchain.callbacks import get_openai_callback
23 | import sys
24 | import re
25 | from dotenv import load_dotenv
26 | from io import BytesIO
27 | from io import StringIO
28 | import datetime
29 | import json
30 | import openai
31 | import re
32 | from tqdm.auto import tqdm
33 | from typing import List, Union
34 | import zipfile
35 |
36 | from langchain.agents import create_csv_agent
37 | from langchain.chains import RetrievalQA
38 | from langchain.chat_models import ChatOpenAI
39 |
40 | # Langchain imports
41 | from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
42 | from langchain.prompts import BaseChatPromptTemplate, ChatPromptTemplate
43 | from langchain import SerpAPIWrapper, LLMChain
44 | from langchain.schema import AgentAction, AgentFinish, HumanMessage, SystemMessage
45 | # LLM wrapper
46 | from langchain.chat_models import ChatOpenAI
47 | from langchain import OpenAI
48 | # Conversational memory
49 | from langchain.memory import ConversationBufferWindowMemory
50 |
51 |
52 | class Utilities:
53 |
54 | @staticmethod
55 | def load_api_key():
56 | """
57 | Loads the OpenAI API key from the .env file or
58 | from the user's input and returns it
59 | """
60 | if os.path.exists(".env") and os.environ.get("OPENAI_API_KEY") is not None:
61 | user_api_key = os.environ["OPENAI_API_KEY"]
62 | st.sidebar.success("API key loaded from .env", icon="🚀")
63 | else:
64 | user_api_key = st.sidebar.text_input(
65 | label="#### Enter OpenAI API key 👇", placeholder="Paste your openAI API key, sk-", type="password"
66 | )
67 | if user_api_key:
68 | st.sidebar.success("API keys loaded", icon="🚀")
69 |
70 | return user_api_key
71 |
72 | @staticmethod
73 | def handle_upload():
74 | """
75 | Handles the file upload and displays the uploaded file
76 | """
77 | uploaded_file = st.sidebar.file_uploader("upload", type=["pdf"], label_visibility="collapsed", accept_multiple_files = True)
78 | if uploaded_file is not None:
79 |
80 | def show_pdf_file(uploaded_file):
81 | file_container = st.expander("Your PDF file :")
82 | for i in range(len(uploaded_file)):
83 | with pdfplumber.open(uploaded_file[i]) as pdf:
84 | pdf_text = ""
85 | for page in pdf.pages:
86 | pdf_text += page.extract_text() + "\n\n"
87 | file_container.write(pdf_text)
88 |
89 | file_extension = ".pdf"
90 |
91 | if file_extension== ".pdf" :
92 | show_pdf_file(uploaded_file)
93 |
94 | else:
95 | st.sidebar.info(
96 | "👆 Upload your PDF file to get started..!"
97 | )
98 | st.session_state["reset_chat"] = True
99 |
100 | #print(uploaded_file)
101 | return uploaded_file
102 |
103 | @staticmethod
104 | def setup_chatbot(uploaded_file, model, temperature,):
105 | """
106 | Sets up the chatbot with the uploaded file, model, and temperature
107 | """
108 | embeds = Embedder()
109 | # Use RecursiveCharacterTextSplitter as the default and only text splitter
110 | splitter_type = "RecursiveCharacterTextSplitter"
111 | with st.spinner("Processing..."):
112 | #uploaded_file.seek(0)
113 | file = uploaded_file
114 |
115 | # Get the document embeddings for the uploaded file
116 | vectors = embeds.getDocEmbeds(file, "Docs")
117 |
118 | # Create a Chatbot instance with the specified model and temperature
119 | chatbot = Chatbot(model, temperature,vectors)
120 | st.session_state["ready"] = True
121 |
122 | return chatbot
123 |
124 | def count_tokens_agent(agent, query):
125 | """
126 | Count the tokens used by the CSV Agent
127 | """
128 | with get_openai_callback() as cb:
129 | result = agent(query)
130 | st.write(f'Spent a total of {cb.total_tokens} tokens')
131 |
132 | return result
133 |
134 | class Layout:
135 |
136 | def show_header(self):
137 | """
138 | Displays the header of the app
139 | """
140 | st.markdown(
141 | """
142 | Ask Anything: Your Personal AI Assistant
143 | """,
144 | unsafe_allow_html=True,
145 | )
146 |
147 | def show_api_key_missing(self):
148 | """
149 | Displays a message if the user has not entered an API key
150 | """
151 | st.markdown(
152 | """
153 |
156 | """,
157 | unsafe_allow_html=True,
158 | )
159 |
160 | def prompt_form(self):
161 | """
162 | Displays the prompt form
163 | """
164 | with st.form(key="my_form", clear_on_submit=True):
165 | user_input = st.text_area(
166 | "Query:",
167 | placeholder="Ask me anything about the document...",
168 | key="input",
169 | label_visibility="collapsed",
170 | )
171 | submit_button = st.form_submit_button(label="Send")
172 |
173 | is_ready = submit_button and user_input
174 | return is_ready, user_input
175 |
176 |
177 | class Sidebar:
178 |
179 | MODEL_OPTIONS = ["gpt-3.5-turbo", "gpt-4"]
180 | TEMPERATURE_MIN_VALUE = 0.0
181 | TEMPERATURE_MAX_VALUE = 1.0
182 | TEMPERATURE_DEFAULT_VALUE = 0.0
183 | TEMPERATURE_STEP = 0.01
184 |
185 | @staticmethod
186 | def about():
187 | about = st.sidebar.expander("🧠 About")
188 | sections = [
189 | "#### Welcome to our AI Assistant, a cutting-edge solution to help you find the answers you need quickly and easily. Our AI Assistant is designed to provide you with the most relevant information from a variety of sources, including PDFs, CSVs, and web search.",
190 | "#### With our AI Assistant, you can ask questions on any topic, and our intelligent algorithms will search through our vast database to provide you with the most accurate and up-to-date information available. Whether you need help with a school assignment, are researching a topic for work, or simply want to learn something new, our AI Assistant is the perfect tool for you.",
191 | ]
192 | for section in sections:
193 | about.write(section)
194 |
195 | @staticmethod
196 | def reset_chat_button():
197 | if st.button("Reset chat"):
198 | st.session_state["reset_chat"] = True
199 | st.session_state.setdefault("reset_chat", False)
200 |
201 | def model_selector(self):
202 | model = st.selectbox(label="Model", options=self.MODEL_OPTIONS)
203 | st.session_state["model"] = model
204 |
205 | def temperature_slider(self):
206 | temperature = st.slider(
207 | label="Temperature",
208 | min_value=self.TEMPERATURE_MIN_VALUE,
209 | max_value=self.TEMPERATURE_MAX_VALUE,
210 | value=self.TEMPERATURE_DEFAULT_VALUE,
211 | step=self.TEMPERATURE_STEP,
212 | )
213 | st.session_state["temperature"] = temperature
214 |
215 | def csv_agent_button(self, uploaded_file):
216 | st.session_state.setdefault("show_csv_agent", False)
217 |
218 | def show_options(self, uploaded_file):
219 | with st.sidebar.expander("🛠️ Tools", expanded=False):
220 |
221 | self.reset_chat_button()
222 | self.csv_agent_button(uploaded_file)
223 | # self.model_selector()
224 | # self.temperature_slider()
225 | st.session_state.setdefault("model", model_name)
226 | st.session_state.setdefault("temperature", temperature)
227 |
228 | original_filename="Docs"
229 | class Embedder:
230 |
231 | def __init__(self):
232 | self.PATH = "embeddings"
233 | self.createEmbeddingsDir()
234 |
235 | def createEmbeddingsDir(self):
236 | """
237 | Creates a directory to store the embeddings vectors
238 | """
239 | if not os.path.exists(self.PATH):
240 | os.mkdir(self.PATH)
241 |
242 | def storeDocEmbeds(self, file, original_filename="Docs"):
243 | """
244 | Stores document embeddings using Langchain and FAISS
245 | """
246 | with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp_file:
247 | tmp_file.write(file)
248 | tmp_file_path = tmp_file.name
249 |
250 |
251 | text_splitter = RecursiveCharacterTextSplitter(
252 | # Set a really small chunk size, just to show.
253 | chunk_size = 2000,
254 | chunk_overlap = 50,
255 | length_function = len,
256 | )
257 | file_extension = ".pdf" #get_file_extension(original_filename)
258 |
259 |
260 | if file_extension == ".pdf":
261 | loader = PyPDFLoader(file_path=tmp_file_path)
262 | data = loader.load_and_split(text_splitter)
263 |
264 |
265 | embeddings = OpenAIEmbeddings()
266 |
267 | vectors = FAISS.from_documents(data, embeddings)
268 | os.remove(tmp_file_path)
269 |
270 | # Save the vectors to a pickle file
271 | with open(f"{self.PATH}/{original_filename}.pkl", "wb") as f:
272 | pickle.dump(vectors, f)
273 |
274 |
275 | def getDocEmbeds(self, file, original_filename):
276 | """
277 | Retrieves document embeddings
278 | """
279 | # Use RecursiveCharacterTextSplitter as the default and only text splitter
280 | splitter_type = "RecursiveCharacterTextSplitter"
281 | # Load and process the uploaded PDF or TXT files.
282 | loaded_text = load_docs(file)
283 | #st.write("Documents uploaded and processed.")
284 |
285 | # Split the document into chunks
286 | splits = split_texts(loaded_text, chunk_size=500,
287 | overlap=0, split_method=splitter_type)
288 | embeddings = OpenAIEmbeddings()
289 | vectors = create_retriever(embeddings, splits, retriever_type="SIMILARITY SEARCH")
290 | return vectors
291 |
292 | class ChatHistory:
293 |
294 | def __init__(self):
295 | self.history = st.session_state.get("history", [])
296 | st.session_state["history"] = self.history
297 |
298 | def default_greeting(self):
299 | return "Hey! 👋"
300 |
301 | def default_prompt(self, topic):
302 | return f"Hello ! Ask me anything about {topic} 🤗"
303 |
304 | def initialize_user_history(self):
305 | st.session_state["user"] = [self.default_greeting()]
306 |
307 | def initialize_assistant_history(self, uploaded_file):
308 | st.session_state["assistant"] = [self.default_prompt(original_filename)]
309 |
310 | def initialize(self, uploaded_file):
311 | if "assistant" not in st.session_state:
312 | self.initialize_assistant_history(original_filename)
313 | if "user" not in st.session_state:
314 | self.initialize_user_history()
315 |
316 | def reset(self, uploaded_file):
317 | st.session_state["history"] = []
318 |
319 | self.initialize_user_history()
320 | self.initialize_assistant_history(original_filename)
321 | st.session_state["reset_chat"] = False
322 |
323 | def append(self, mode, message):
324 | st.session_state[mode].append(message)
325 |
326 | def generate_messages(self, container):
327 | if st.session_state["assistant"]:
328 | with container:
329 | for i in range(len(st.session_state["assistant"])):
330 | message(
331 | st.session_state["user"][i],
332 | is_user=True,
333 | key=f"{i}_user",
334 | avatar_style="big-smile",
335 | )
336 | message(st.session_state["assistant"][i], key=str(i), avatar_style="thumbs")
337 |
338 | def load(self):
339 | if os.path.exists(self.history_file):
340 | with open(self.history_file, "r") as f:
341 | self.history = f.read().splitlines()
342 |
343 | def save(self):
344 | with open(self.history_file, "w") as f:
345 | f.write("\n".join(self.history))
346 |
347 |
348 | from langchain.chains.question_answering import load_qa_chain
349 | class Chatbot:
350 |
351 | def __init__(self, model_name, temperature, vectors):
352 | self.model_name = model_name
353 | self.temperature = temperature
354 | self.vectors = vectors
355 |
356 |
357 | _template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question.
358 | Chat History:
359 | {chat_history}
360 | Follow-up entry: {question}
361 | Standalone question:"""
362 | CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
363 |
364 | qa_template = """You are a friendly conversational assistant, designed to answer questions and chat with the user from a contextual file.
365 | You receive data from a user's files and a question, you must help the user find the information they need.
366 | Your answers must be user-friendly and respond to the user.
367 | You will get questions and contextual information.
368 | question: {question}
369 | =========
370 | context: {context}
371 | ======="""
372 | QA_PROMPT = PromptTemplate(template=qa_template, input_variables=["question", "context"])
373 |
374 | def conversational_chat(self, query):
375 | """
376 | Start a conversational chat with a model via Langchain
377 | """
378 | llm = ChatOpenAI(model_name=model_name, temperature=temperature)
379 |
380 | retriever = self.vectors#.as_retriever()
381 |
382 | question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT,verbose=True)
383 | doc_chain = load_qa_chain(llm=llm,
384 |
385 | prompt=self.QA_PROMPT,
386 | verbose=True,
387 | chain_type= "stuff"
388 | )
389 |
390 | chain = ConversationalRetrievalChain(
391 | retriever=retriever, combine_docs_chain=doc_chain, question_generator=question_generator, verbose=True, return_source_documents=True)
392 |
393 |
394 | chain_input = {"question": query, "chat_history": st.session_state["history"]}
395 | result = chain(chain_input)
396 |
397 | st.session_state["history"].append((query, result["answer"]))
398 | #count_tokens_chain(chain, chain_input)
399 | return result["answer"]
400 |
401 | def count_tokens_chain(chain, query):
402 | with get_openai_callback() as cb:
403 | result = chain.run(query)
404 | st.write(f'###### Tokens used in this conversation : {cb.total_tokens} tokens')
405 | return result
406 | # from langchain.vectorstores import Chroma
407 | # from langchain.document_loaders import UnstructuredPDFLoader
408 | import PyPDF2
409 | @st.cache_data
410 | def load_docs(files):
411 | st.sidebar.info("`Reading doc ...`")
412 | all_text = ""
413 | for file_path in files:
414 | file_extension = os.path.splitext(file_path.name)[1]
415 | if file_extension == ".pdf":
416 | pdf_reader = PyPDF2.PdfReader(file_path)
417 | text = ""
418 | for page in pdf_reader.pages:
419 | text += page.extract_text()
420 | all_text += text
421 | elif file_extension == ".txt":
422 | stringio = StringIO(file_path.getvalue().decode("utf-8"))
423 | text = stringio.read()
424 | all_text += text
425 | else:
426 | st.warning('Please provide txt or pdf.', icon="⚠️")
427 | return all_text
428 |
429 |
430 |
431 |
432 | @st.cache_resource
433 | def create_retriever(_embeddings, splits, retriever_type):
434 | if retriever_type == "SIMILARITY SEARCH":
435 | try:
436 | vectorstore = FAISS.from_texts(splits, _embeddings)
437 | except (IndexError, ValueError) as e:
438 | st.error(f"Error creating vectorstore: {e}")
439 | return
440 | retriever = vectorstore.as_retriever(k=5)
441 | elif retriever_type == "SUPPORT VECTOR MACHINES":
442 | retriever = SVMRetriever.from_texts(splits, _embeddings)
443 |
444 | return retriever
445 |
446 | @st.cache_resource
447 | def split_texts(text, chunk_size, overlap, split_method):
448 |
449 | # Split texts
450 | # IN: text, chunk size, overlap, split_method
451 | # OUT: list of str splits
452 |
453 | st.sidebar.info("`Splitting doc ...`")
454 |
455 | split_method = "RecursiveTextSplitter"
456 | text_splitter = RecursiveCharacterTextSplitter(
457 | chunk_size=chunk_size, chunk_overlap=overlap, separators=[" ", ",", "\n"])
458 |
459 | splits = text_splitter.split_text(text)
460 | if not splits:
461 | st.error("Failed to split document")
462 | st.stop()
463 |
464 | return splits
465 | def doc_search(temperature):
466 | os.environ["SERPAPI_API_KEY"] = user_serpapi_key
467 | st.sidebar.success("Upload PDF To Chat With!", icon="👇")
468 | uploaded_file = st.sidebar.file_uploader("Upload PDF file here!", type="pdf", accept_multiple_files = True)
469 | if uploaded_file is None:
470 | st.warning("Upload PDF File first!!")
471 | else:
472 | search = SerpAPIWrapper()
473 | # Set up a prompt template which can interpolate the history
474 | template_with_history = """You are SearchGPT, a professional search engine who provides informative answers to users. Answer the following questions as best you can. You have access to the following tools:
475 |
476 | {tools}
477 |
478 | Use the following format:
479 |
480 | Question: the input question you must answer
481 | Thought: you should always think about what to do
482 | Action: the action to take, should be one of [{tool_names}]
483 | Action Input: the input to the action
484 | Observation: the result of the action
485 | ... (this Thought/Action/Action Input/Observation can repeat N times)
486 | Thought: I now know the final answer
487 | Final Answer: the final answer to the original input question
488 |
489 | Begin! Remember to give detailed, informative answers
490 |
491 | Previous conversation history:
492 | {history}
493 |
494 | New question: {input}
495 | {agent_scratchpad}"""
496 | def search_chroma(query):
497 | #result_docs = vectordb.similarity_search(query)
498 | retriever = db#.as_retriever(search_type="mmr") # db.similarity_search(query)
499 | retrieval_llm = ChatOpenAI(model_name=model_name, temperature=temperature, top_p=top_p, frequency_penalty=freq_penalty)
500 | # Initiate our LLM - default is 'gpt-3.5-turbo'
501 | llm = ChatOpenAI(model_name = model_name, temperature=temperature)
502 | podcast_retriever = RetrievalQA.from_chain_type(llm=retrieval_llm, chain_type="stuff", retriever=retriever)
503 | expanded_tools = [
504 | Tool(
505 | name = "Search",
506 | func=search.run,
507 | description="useful for when you need to answer questions about current events"
508 | ),
509 | Tool(
510 | name = 'Knowledge Base',
511 | func=podcast_retriever.run,
512 | description="Useful for general questions about how to do things and for details on interesting topics. Input should be a fully formed question."
513 | )
514 | ]
515 | # Re-initialize the agent with our new list of tools
516 | prompt_with_history = CustomPromptTemplate(
517 | template=template_with_history,
518 | tools=expanded_tools,
519 | input_variables=["input", "intermediate_steps", "history"]
520 | )
521 | llm_chain = LLMChain(llm=llm, prompt=prompt_with_history)
522 | multi_tool_names = [tool.name for tool in expanded_tools]
523 | multi_tool_agent = LLMSingleActionAgent(
524 | llm_chain=llm_chain,
525 | output_parser=output_parser,
526 | stop=["\nObservation:"],
527 | allowed_tools=multi_tool_names
528 | )
529 | multi_tool_memory = ConversationBufferWindowMemory(k=0)
530 | multi_tool_executor = AgentExecutor.from_agent_and_tools(agent=multi_tool_agent, tools=expanded_tools, verbose=True, memory=multi_tool_memory)
531 | output = multi_tool_executor.run(query)
532 | return output
533 | def get_text():
534 | input_text = st.text_input("", key="input")
535 | return input_text
536 | def generate_response(prompt):
537 | completion = openai.ChatCompletion.create(
538 | model=model_name,
539 | temperature=temperature,
540 | max_tokens=max_tokens[model_name],
541 | top_p=top_p,
542 | frequency_penalty=freq_penalty,
543 | messages=[
544 | {"role": "user", "content": prompt}
545 | ])
546 | response = completion.choices[0].message.content
547 | return response
548 | def prompt_form():
549 | """
550 | Displays the prompt form
551 | """
552 | with st.form(key="my_form", clear_on_submit=True):
553 | user_input = st.text_area(
554 | "Query:",
555 | placeholder="Ask me anything about the document...",
556 | key="input_",
557 | label_visibility="collapsed",
558 | )
559 | submit_button = st.form_submit_button(label="Send")
560 |
561 | is_ready = submit_button and user_input
562 | return is_ready, user_input
563 |
564 | #layout.show_header()
565 | embeddings = OpenAIEmbeddings()
566 | # Use RecursiveCharacterTextSplitter as the default and only text splitter
567 | splitter_type = "RecursiveCharacterTextSplitter"
568 | loaded_text = load_docs(uploaded_file)
569 | st.write("Documents uploaded and processed.")
570 |
571 | # Split the document into chunks
572 | splits = split_texts(loaded_text, chunk_size=500,
573 | overlap=0, split_method=splitter_type)
574 | # Display the number of text chunks
575 | num_chunks = len(splits)
576 | st.sidebar.write(f"Number of text chunks: {num_chunks}")
577 | db = create_retriever(embeddings, splits, retriever_type="SIMILARITY SEARCH")
578 | st.write("Write your query here:💬")
579 |
580 | if 'generated' not in st.session_state:
581 | st.session_state['generated'] = ['I am ready to help you sir']
582 |
583 | if 'past' not in st.session_state:
584 | st.session_state['past'] = ['Hey there!']
585 | #user_input = get_text()
586 | is_ready, user_input = prompt_form()
587 | #is_readyy = st.button("Send")
588 | if is_ready: # user_input:
589 | output = search_chroma(user_input)
590 | st.session_state.past.append(user_input)
591 | st.session_state.generated.append(output)
592 |
593 | if st.session_state['generated']:
594 |
595 | for i in range(len(st.session_state['generated'])-1, -1, -1):
596 | message(st.session_state["generated"][i], key=str(i))
597 | message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
598 | def init():
599 | load_dotenv()
600 | st.set_page_config(layout="wide", page_icon="💬", page_title="AI Chatbot 🤖")
601 |
602 | def main(temperature):
603 | # Initialize the app
604 | #init()
605 |
606 | # Instantiate the main components
607 | layout, sidebar, utils = Layout(), Sidebar(), Utilities()
608 |
609 | layout.show_header()
610 |
611 | #user_api_key = utils.load_api_key()
612 |
613 | if not user_api_key:
614 | layout.show_api_key_missing()
615 | else:
616 | os.environ["OPENAI_API_KEY"] = user_api_key
617 |
618 | # search = st.sidebar.button("Web Search Chat")
619 | # if search:
620 | # doc_search()
621 |
622 | uploaded_file = utils.handle_upload()
623 |
624 | if uploaded_file:
625 | # Initialize chat history
626 | history = ChatHistory()
627 |
628 | # Configure the sidebar
629 | sidebar.show_options(uploaded_file)
630 |
631 | try:
632 | chatbot = utils.setup_chatbot(
633 | uploaded_file, st.session_state["model"], st.session_state["temperature"]
634 | )
635 | st.session_state["chatbot"] = chatbot
636 |
637 | if st.session_state["ready"]:
638 | # Create containers for chat responses and user prompts
639 | response_container, prompt_container = st.container(), st.container()
640 |
641 | with prompt_container:
642 | # Display the prompt form
643 | is_ready, user_input = layout.prompt_form()
644 |
645 | # Initialize the chat history
646 | history.initialize(uploaded_file)
647 |
648 | # Reset the chat history if button clicked
649 | if st.session_state["reset_chat"]:
650 | history.reset(uploaded_file)
651 |
652 | if is_ready:
653 | # Update the chat history and display the chat messages
654 | history.append("user", user_input)
655 | output = st.session_state["chatbot"].conversational_chat(user_input)
656 | history.append("assistant", output)
657 |
658 | history.generate_messages(response_container)
659 |
660 | except Exception as e:
661 | st.error(f"Error: {str(e)}")
662 |
663 | sidebar.about()
664 |
665 | # Set up a prompt template
666 | class CustomPromptTemplate(BaseChatPromptTemplate):
667 | # The template to use
668 | template: str
669 | # The list of tools available
670 | tools: List[Tool]
671 |
672 | def format_messages(self, **kwargs) -> str:
673 | # Get the intermediate steps (AgentAction, Observation tuples)
674 |
675 | # Format them in a particular way
676 | intermediate_steps = kwargs.pop("intermediate_steps")
677 | thoughts = ""
678 | for action, observation in intermediate_steps:
679 | thoughts += action.log
680 | thoughts += f"\nObservation: {observation}\nThought: "
681 |
682 | # Set the agent_scratchpad variable to that value
683 | kwargs["agent_scratchpad"] = thoughts
684 |
685 | # Create a tools variable from the list of tools provided
686 | kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
687 |
688 | # Create a list of tool names for the tools provided
689 | kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
690 | formatted = self.template.format(**kwargs)
691 | return [HumanMessage(content=formatted)]
692 |
693 | class CustomOutputParser(AgentOutputParser):
694 |
695 | def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
696 |
697 | # Check if agent should finish
698 | if "Final Answer:" in llm_output:
699 | return AgentFinish(
700 | # Return values is generally always a dictionary with a single `output` key
701 | # It is not recommended to try anything else at the moment :)
702 | return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
703 | log=llm_output,
704 | )
705 |
706 | # Parse out the action and action input
707 | regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
708 | match = re.search(regex, llm_output, re.DOTALL)
709 |
710 | # If it can't parse the output it raises an error
711 | # You can add your own logic here to handle errors in a different way i.e. pass to a human, give a canned response
712 | # if not match:
713 | # raise ValueError(f"Could not parse LLM output: `{llm_output}`")
714 | action = match.group(1).strip()
715 | action_input = match.group(2)
716 |
717 | # Return the action and action input
718 | return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
719 |
720 | output_parser = CustomOutputParser()
721 |
722 | # Define a dictionary with the function names and their respective functions
723 | functions = [
724 | "Chat with Docs",
725 | "Chat with Docs + Web Search"
726 | ]
727 |
728 | st.set_page_config(layout="wide", page_icon="💬", page_title="AI Chatbot 🤖")
729 | #st.markdown("# AI Chat with Docs and Web!👽")
730 | st.markdown(
731 | """
732 |
733 |
Doc-Web AI Chat 💬
734 |
AI Chat with Docs and Web!
735 |
736 | """,
737 | unsafe_allow_html=True,
738 | )
739 | #st.title("")
740 |
741 | st.subheader("Select any chat type👇")
742 | # Create a selectbox with the function names as options
743 | selected_function = st.selectbox("Select a Chat", functions)
744 |
745 | if os.path.exists(".env") and os.environ.get("OPENAI_API_KEY") is not None:
746 | user_api_key = os.environ["OPENAI_API_KEY"]
747 | st.sidebar.success("API key loaded from .env", icon="🚀")
748 | else:
749 | user_api_key = st.sidebar.text_input(
750 | label="#### Enter OpenAI API key 👇", placeholder="Paste your openAI API key, sk-", type="password"
751 | )
752 | if user_api_key:
753 | st.sidebar.success("OpenAI API key loaded", icon="🚀")
754 |
755 | MODEL_OPTIONS = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
756 | max_tokens = {"gpt-4":7000, "gpt-4-32k":31000, "gpt-3.5-turbo":3000}
757 | TEMPERATURE_MIN_VALUE = 0.0
758 | TEMPERATURE_MAX_VALUE = 1.0
759 | TEMPERATURE_DEFAULT_VALUE = 0.9
760 | TEMPERATURE_STEP = 0.01
761 | model_name = st.sidebar.selectbox(label="Model", options=MODEL_OPTIONS)
762 | top_p = st.sidebar.slider("Top_P", 0.0, 1.0, 1.0, 0.1)
763 | freq_penalty = st.sidebar.slider("Frequency Penalty", 0.0, 2.0, 0.0, 0.1)
764 | temperature = st.sidebar.slider(
765 | label="Temperature",
766 | min_value=TEMPERATURE_MIN_VALUE,
767 | max_value=TEMPERATURE_MAX_VALUE,
768 | value=TEMPERATURE_DEFAULT_VALUE,
769 | step=TEMPERATURE_STEP,)
770 |
771 | if selected_function == "Chat with Docs":
772 | main(temperature)
773 | elif selected_function == "Chat with Docs + Web Search":
774 | st.markdown(
775 | """
776 |
777 |
Enter your Serp API key to start conversation with Docs + Web Search
778 |
779 | """,
780 | unsafe_allow_html=True,
781 | )
782 | if os.path.exists(".env") and os.environ.get("SERPAPI_API_KEY") is not None:
783 | user_serpapi_key = os.environ["SERPAPI_API_KEY"]
784 | st.sidebar.success("API key loaded from .env", icon="🚀")
785 | else:
786 | user_serpapi_key = st.sidebar.text_input(
787 | label="#### Enter SERP API key 👇", placeholder="Paste your SERP API key, sk-", type="password"
788 | )
789 | if user_serpapi_key:
790 | st.sidebar.success("Serp API key loaded", icon="🚀")
791 | os.environ["OPENAI_API_KEY"] = user_api_key
792 | doc_search(temperature)
793 | else:
794 | st.warning("You haven't selected any AI Chat!!")
795 |
796 |
797 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | streamlit_chat
3 | langchain
4 | pdfplumber
5 | pandas
6 | openai
7 | tqdm
8 | python-dotenv
9 | pypdf
10 | tiktoken
11 | faiss-cpu
12 | google-search-results
13 | PyPDF2
14 |
--------------------------------------------------------------------------------