├── .env ├── README.md ├── app.py ├── mental_health_Document.pdf ├── pic.jpg └── requirements.txt /.env: -------------------------------------------------------------------------------- 1 | GROQ_API_KEY= -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | image 2 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import PyPDF2 2 | from langchain_community.embeddings import OllamaEmbeddings 3 | from langchain.text_splitter import RecursiveCharacterTextSplitter 4 | from langchain_community.vectorstores import Chroma 5 | from langchain.chains import ConversationalRetrievalChain 6 | from langchain.memory import ChatMessageHistory, ConversationBufferMemory 7 | import chainlit as cl 8 | from langchain_groq import ChatGroq 9 | from dotenv import load_dotenv 10 | import os 11 | 12 | # Loading environment variables from .env file 13 | load_dotenv() 14 | 15 | # Function to initialize conversation chain with GROQ language model 16 | groq_api_key = os.environ['GROQ_API_KEY'] 17 | 18 | # Initializing GROQ chat with provided API key, model name, and settings 19 | llm_groq = ChatGroq( 20 | groq_api_key=groq_api_key, model_name="mixtral-8x7b-32768", 21 | temperature=0.2) 22 | 23 | 24 | @cl.on_chat_start 25 | async def on_chat_start(): 26 | files = None #Initialize variable to store uploaded files 27 | 28 | # Wait for the user to upload a file 29 | while files is None: 30 | files = await cl.AskFileMessage( 31 | content="Please upload a pdf file to begin!", 32 | accept=["application/pdf"], 33 | max_size_mb=100,# Optionally limit the file size 34 | timeout=180, # Set a timeout for user response, 35 | ).send() 36 | 37 | file = files[0] # Get the first uploaded file 38 | print(file) # Print the file object for debugging 39 | 40 | # Sending an image with the local file path 41 | elements = [ 42 | cl.Image(name="image", display="inline", path="pic.jpg") 43 | ] 44 | # Inform the user that processing has started 45 | msg = cl.Message(content=f"Processing `{file.name}`...",elements=elements) 46 | await msg.send() 47 | 48 | # Read the PDF file 49 | pdf = PyPDF2.PdfReader(file.path) 50 | pdf_text = "" 51 | for page in pdf.pages: 52 | pdf_text += page.extract_text() 53 | 54 | 55 | # Split the text into chunks 56 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=50) 57 | texts = text_splitter.split_text(pdf_text) 58 | 59 | # Create a metadata for each chunk 60 | metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))] 61 | 62 | # Create a Chroma vector store 63 | embeddings = OllamaEmbeddings(model="nomic-embed-text") 64 | docsearch = await cl.make_async(Chroma.from_texts)( 65 | texts, embeddings, metadatas=metadatas 66 | ) 67 | 68 | # Initialize message history for conversation 69 | message_history = ChatMessageHistory() 70 | 71 | # Memory for conversational context 72 | memory = ConversationBufferMemory( 73 | memory_key="chat_history", 74 | output_key="answer", 75 | chat_memory=message_history, 76 | return_messages=True, 77 | ) 78 | 79 | # Create a chain that uses the Chroma vector store 80 | chain = ConversationalRetrievalChain.from_llm( 81 | llm=llm_groq, 82 | chain_type="stuff", 83 | retriever=docsearch.as_retriever(), 84 | memory=memory, 85 | return_source_documents=True, 86 | ) 87 | 88 | # Let the user know that the system is ready 89 | msg.content = f"Processing `{file.name}` done. You can now ask questions!" 90 | await msg.update() 91 | #store the chain in user session 92 | cl.user_session.set("chain", chain) 93 | 94 | 95 | @cl.on_message 96 | async def main(message: cl.Message): 97 | # Retrieve the chain from user session 98 | chain = cl.user_session.get("chain") 99 | #call backs happens asynchronously/parallel 100 | cb = cl.AsyncLangchainCallbackHandler() 101 | 102 | # call the chain with user's message content 103 | res = await chain.ainvoke(message.content, callbacks=[cb]) 104 | answer = res["answer"] 105 | source_documents = res["source_documents"] 106 | 107 | text_elements = [] # Initialize list to store text elements 108 | 109 | # Process source documents if available 110 | if source_documents: 111 | for source_idx, source_doc in enumerate(source_documents): 112 | source_name = f"source_{source_idx}" 113 | # Create the text element referenced in the message 114 | text_elements.append( 115 | cl.Text(content=source_doc.page_content, name=source_name) 116 | ) 117 | source_names = [text_el.name for text_el in text_elements] 118 | 119 | # Add source references to the answer 120 | if source_names: 121 | answer += f"\nSources: {', '.join(source_names)}" 122 | else: 123 | answer += "\nNo sources found" 124 | #return results 125 | await cl.Message(content=answer, elements=text_elements).send() -------------------------------------------------------------------------------- /mental_health_Document.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightEdge01/GroqPDFFastChatbot/bced389590cec161f46932e562c276ef9d1a2364/mental_health_Document.pdf -------------------------------------------------------------------------------- /pic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightEdge01/GroqPDFFastChatbot/bced389590cec161f46932e562c276ef9d1a2364/pic.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | #chainlit==1.0.200 2 | chainlit 3 | langchain 4 | langchain_community 5 | PyPDF2 6 | chromadb 7 | groq 8 | langchain-groq 9 | ollama 10 | python-dotenv 11 | --------------------------------------------------------------------------------