├── .env ├── .gitignore ├── 1 Alex_Nunez_Resume_11-05-2023-08-11-00.pkl ├── HD_AI_ML_Cognitive.pkl └── app.py /.env: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 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 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /env -------------------------------------------------------------------------------- /1 Alex_Nunez_Resume_11-05-2023-08-11-00.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/star900817/ChatPDF-langchain/b54797ef4b51e52917b32d6960130e10e93af734/1 Alex_Nunez_Resume_11-05-2023-08-11-00.pkl -------------------------------------------------------------------------------- /HD_AI_ML_Cognitive.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/star900817/ChatPDF-langchain/b54797ef4b51e52917b32d6960130e10e93af734/HD_AI_ML_Cognitive.pkl -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pickle 3 | from dotenv import load_dotenv 4 | from PyPDF2 import PdfReader 5 | from streamlit_extras.add_vertical_space import add_vertical_space 6 | from langchain.text_splitter import RecursiveCharacterTextSplitter 7 | from langchain import OpenAI 8 | from langchain.embeddings.openai import OpenAIEmbeddings 9 | from langchain.callbacks import get_openai_callback 10 | from langchain.vectorstores import FAISS 11 | from langchain.chains.question_answering import load_qa_chain 12 | from langchain.prompts import PromptTemplate 13 | import os 14 | 15 | #Sidebar contents 16 | with st.sidebar: 17 | st.title('LLM Chat App') 18 | st.markdown(''' 19 | ## About 20 | This app is an LLM-powered chatbot built using: 21 | - [Streamlit](https://#/) 22 | - [LanchChain](https://#/) 23 | - [OpenAI](https://#/) LLM model 24 | ''') 25 | add_vertical_space(33) 26 | st.write('Made with Me [Prompt Engineer](https://#)') 27 | 28 | load_dotenv() 29 | 30 | def main(): 31 | st.header("Chat with PDF file") 32 | 33 | #upload a pdf file 34 | pdf = st.file_uploader("Upload your PDF", type='pdf') 35 | 36 | if pdf is not None: 37 | pdf_reader = PdfReader(pdf) 38 | 39 | store_name = pdf.name[:-4] 40 | 41 | text = "" 42 | for page in pdf_reader.pages: 43 | text += page.extract_text() 44 | 45 | text_splitter = RecursiveCharacterTextSplitter( 46 | chunk_size = 1000, 47 | chunk_overlap = 200, 48 | length_function=len 49 | ) 50 | 51 | chunks = text_splitter.split_text(text=text) 52 | 53 | 54 | #embedding 55 | if os.path.exists(f"{store_name}.pkl"): 56 | with open(f"{store_name}.pkl", "rb") as f: 57 | VectorStore = pickle.load(f) 58 | st.write("loaded") 59 | else: 60 | with open(f"{store_name}.pkl", 'wb') as f: 61 | embeddings = OpenAIEmbeddings() 62 | VectorStore = FAISS.from_texts(chunks, embedding=embeddings) 63 | pickle.dump(VectorStore, f) 64 | st.write("stored") 65 | 66 | #Accept use question 67 | query = st.text_input("Ask question about your PDF file:") 68 | 69 | prompt_template = """Please answer the question. 70 | {context} 71 | question: {question} 72 | """ 73 | 74 | 75 | if query: 76 | embedding_vector = OpenAIEmbeddings().embed_query(query) 77 | docs = VectorStore.similarity_search_by_vector(embedding_vector, k=3) 78 | 79 | PROMPT = PromptTemplate( 80 | template=prompt_template, input_variables=['context', 'question'] 81 | ) 82 | 83 | llm = OpenAI(temperature=0) 84 | chain = load_qa_chain(llm=llm, chain_type = "stuff", prompt=PROMPT) 85 | 86 | with get_openai_callback() as cb: 87 | response = chain.run(input_documents=docs, question=query) 88 | print(cb) 89 | st.write(response) 90 | 91 | 92 | 93 | if __name__ == '__main__': 94 | main() --------------------------------------------------------------------------------