Power of Laws 💬

├── README.md
├── db
    ├── c05505f9-768f-4252-a239-185598b4e57e
    │   ├── link_lists.bin
    │   ├── header.bin
    │   └── length.bin
    └── chroma.sqlite3
├── requirements.txt
└── app.py


/README.md:
--------------------------------------------------------------------------------
1 | # RAG-Embedding


--------------------------------------------------------------------------------
/db/c05505f9-768f-4252-a239-185598b4e57e/link_lists.bin:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/db/chroma.sqlite3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hrishikesh332/RAG-Embedding/HEAD/db/chroma.sqlite3


--------------------------------------------------------------------------------
/db/c05505f9-768f-4252-a239-185598b4e57e/header.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hrishikesh332/RAG-Embedding/HEAD/db/c05505f9-768f-4252-a239-185598b4e57e/header.bin


--------------------------------------------------------------------------------
/db/c05505f9-768f-4252-a239-185598b4e57e/length.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hrishikesh332/RAG-Embedding/HEAD/db/c05505f9-768f-4252-a239-185598b4e57e/length.bin


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | PyPDF2
 2 | streamlit
 3 | openai
 4 | streamlit_option_menu
 5 | sentence-transformers
 6 | langchain==0.1.0
 7 | langchain_experimental==0.0.49
 8 | openai==1.7.1
 9 | tabulate==0.9.0
10 | InstructorEmbedding
11 | torch
12 | sentence-transformers
13 | chromadb
14 | pysqlite3-binary
15 | tiktoken
16 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import textwrap
  3 | from PyPDF2 import PdfReader
  4 | import streamlit as st
  5 | import streamlit.components.v1 as components
  6 | from streamlit_option_menu import option_menu
  7 | from openai import OpenAI
  8 | from langchain.text_splitter import CharacterTextSplitter
  9 | from langchain.embeddings.openai import OpenAIEmbeddings
 10 | from langchain import FAISS
 11 | from langchain.chains.question_answering import load_qa_chain
 12 | from langchain.llms import OpenAI
 13 | from langchain.callbacks import get_openai_callback
 14 | from langchain import OpenAI
 15 | from langchain.chains import RetrievalQA
 16 | from langchain_community.llms import OpenAI
 17 | from langchain_community.vectorstores import Chroma
 18 | from langchain.embeddings import HuggingFaceEmbeddings
 19 | __import__('pysqlite3')
 20 | import sys
 21 | sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
 22 | 
 23 | 
 24 | page_element="""
 25 | <style>
 26 | [data-testid="stAppViewContainer"]{
 27 | background-image: url("https://cdn.wallpapersafari.com/88/75/cLUQqJ.jpg");
 28 | background-size: cover;
 29 | }
 30 | [data-testid="stHeader"]{
 31 | background-color: rgba(0,0,0,0);
 32 | }
 33 | [data-testid="stSidebar"]> div:first-child{
 34 | background-image: url("https://mcdn.wallpapersafari.com/medium/89/87/X7GDE5.jpg");
 35 | background-size: cover;
 36 | }
 37 | </style>
 38 | 
 39 | """
 40 | 
 41 | st.markdown(page_element, unsafe_allow_html=True)
 42 | st.markdown("<h1 style='text-align: center; color: white';>Power of Laws 💬</h1>", unsafe_allow_html=True)
 43 | st.markdown("---")
 44 | 
 45 | 
 46 | 
 47 | llm = OpenAI(openai_api_key=st.secrets["LLM_API"])
 48 | 
 49 | 
 50 | def process_text(text):
 51 | 
 52 |     text_splitter = CharacterTextSplitter(
 53 |         separator="\n",
 54 |         chunk_size=1000,
 55 |         chunk_overlap=200,
 56 |         length_function=len
 57 |     )
 58 |     chunks = text_splitter.split_text(text)
 59 | 
 60 |     embeddings = OpenAIEmbeddings(openai_api_key=st.secrets["LLM_API"])
 61 |     knowledgeBase = FAISS.from_texts(chunks, embeddings)
 62 |     
 63 |     return knowledgeBase
 64 | 
 65 | def wrap_text_preserve_newlines(text, width=110):
 66 | 
 67 |     lines = text.split('\n')
 68 |     wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
 69 |     # Join the wrapped lines back together using newline characters
 70 |     wrapped_text = '\n'.join(wrapped_lines)
 71 |     return wrapped_text
 72 | 
 73 | def process_llm_response(llm_response):
 74 |     result_output = wrap_text_preserve_newlines(llm_response['result'])
 75 |     print(result_output)
 76 |     return result_output
 77 | 
 78 | 
 79 | flow_option = st.selectbox(
 80 |     'Choose an Option -',
 81 |     ('Power of Laws', 'Upload Another PDF'))
 82 | 
 83 | if flow_option == 'Power of Laws':
 84 |     query = st.text_input('Ask a question to the PDF')
 85 |     submit=st.button("Submit")
 86 | 
 87 |     model_name = "sentence-transformers/all-mpnet-base-v2"
 88 |     instructor_embeddings = HuggingFaceEmbeddings(
 89 |     model_name=model_name,
 90 |     model_kwargs={'device': 'cpu'},
 91 |     encode_kwargs={'normalize_embeddings': False}
 92 |     )
 93 | 
 94 |     embedding = instructor_embeddings
 95 |     persist_directory='db'
 96 |     vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
 97 |     retriever = vectordb.as_retriever(search_kwargs={"k": 3})
 98 |     qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
 99 |     if submit:
100 |         query = f"""
101 |         Do strictly follow the context and for not retrieved data, output as No data. The context is from the Book of Power of Laws - {query}
102 |                 """
103 |         
104 |         llm_response = qa_chain(query)
105 |         result_ipc=process_llm_response(llm_response)
106 |         st.write(result_ipc)
107 | 
108 | 
109 | 
110 | elif flow_option == 'Upload Another PDF':
111 | 
112 |     pdf = st.file_uploader('Upload your PDF Document', type='pdf')
113 | 
114 |     if pdf is not None:
115 |         pdf_reader = PdfReader(pdf)
116 |         text = ""
117 |         for page in pdf_reader.pages:
118 |             text += page.extract_text()
119 |         
120 |         # Create the knowledge base object
121 |         knowledgeBase = process_text(text)
122 |         query = st.text_input('Ask a question to the PDF')
123 |         cancel_button = st.button('Cancel')
124 |         
125 |         if cancel_button:
126 |             st.stop()
127 |         
128 |         if query:
129 |             docs = knowledgeBase.similarity_search(query)
130 |             chain = load_qa_chain(llm, chain_type='stuff')
131 |             
132 |             with get_openai_callback() as cost:
133 |                 response = chain.run(input_documents=docs, question=query)
134 |                 print(cost)
135 |                 
136 |             st.write(response)
137 | 


--------------------------------------------------------------------------------