├── .gitignore ├── __pycache__ ├── pdfgen.cpython-39.pyc └── pdfgenerator.cpython-39.pyc ├── Readme.md ├── main.py ├── pdfgenerator.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | -------------------------------------------------------------------------------- /__pycache__/pdfgen.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshitgautam42/AskYourPDF/HEAD/__pycache__/pdfgen.cpython-39.pyc -------------------------------------------------------------------------------- /__pycache__/pdfgenerator.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshitgautam42/AskYourPDF/HEAD/__pycache__/pdfgenerator.cpython-39.pyc -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | Find the whole article here : [Ask Your PDF](https://akshitgautam.hashnode.dev/how-i-built-a-pdf-chat-application-using-langchain-and-openai) -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | import streamlit as st 3 | from PyPDF2 import PdfReader 4 | from langchain.text_splitter import CharacterTextSplitter 5 | from langchain.embeddings.openai import OpenAIEmbeddings 6 | from langchain.vectorstores import FAISS 7 | #from langchain.chains import QAGenerationChain 8 | from langchain.chains.question_answering import load_qa_chain 9 | from langchain.llms import OpenAI 10 | from langchain.callbacks import get_openai_callback 11 | from gtts import gTTS 12 | from io import BytesIO 13 | from langchain.chat_models import ChatOpenAI 14 | #from pdfgen import generate_pdf 15 | 16 | load_dotenv() 17 | 18 | def main(): 19 | 20 | st.set_page_config(page_title="Ask your PDF") 21 | st.header("Ask your PDF 💬") 22 | 23 | # Code to upload the PDF file 24 | pdf=st.file_uploader("Upload your PDF",type="pdf") 25 | 26 | hide_streamlit_style = """ 27 | 31 | """ 32 | st.markdown(hide_streamlit_style, unsafe_allow_html=True) 33 | 34 | # Code to extract the text from PDF file 35 | if pdf is not None: 36 | pdf_reader=PdfReader(pdf) 37 | text ="" 38 | for page in pdf_reader.pages: 39 | text+=page.extract_text() 40 | 41 | #split into chunks 42 | text_splitter=CharacterTextSplitter( 43 | separator="\n", 44 | chunk_size=1000, 45 | chunk_overlap=200, 46 | length_function = len 47 | ) 48 | chunks = text_splitter.split_text(text) 49 | 50 | # Create embeddings 51 | embeddings = OpenAIEmbeddings() 52 | knowledge_base = FAISS.from_texts(chunks,embeddings) 53 | 54 | #show user input 55 | 56 | #user_question = st.text_input("Ask a question:") 57 | on_click=st.button("Generate Q&A") 58 | prompt = "I want you to act as a tutor and the professor has asked you to make 10 questions and answers respectively from the given text. Please generate 2 arrays of 10 question and 10 answers corresponding to 10 questions each, wihout numbering and separated by semicolon respectively." 59 | # prompt = """Please generate 10 questions in the following format: 60 | 61 | # Question: [Insert question here] 62 | 63 | # Answer: [Insert answer here] 64 | 65 | 66 | 67 | # Please make sure to base your questions and answers on the provided context""" 68 | if on_click: 69 | docs=knowledge_base.similarity_search(prompt) 70 | 71 | llm=ChatOpenAI() 72 | chain=load_qa_chain(llm,chain_type="stuff") 73 | with get_openai_callback() as cb: 74 | response =chain.run(input_documents=docs,question=prompt) 75 | print(cb) 76 | 77 | st.write(response) 78 | #generate_pdf(response,'QA.pdf') 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | if __name__ == '__main__': 87 | main() -------------------------------------------------------------------------------- /pdfgenerator.py: -------------------------------------------------------------------------------- 1 | import pdfgen 2 | 3 | options = { 4 | 'scale': 2.0, 5 | 'format': 'Letter', 6 | 'margin': { 7 | 'top': '1 in', 8 | 'right': '1 in', 9 | 'bottom': '1 in', 10 | 'left': '1 in', 11 | }, 12 | 13 | 14 | } 15 | 16 | 17 | # Example usage 18 | text = """ 19 | Legal Framework of Wage and Salary Administration

Legal Framework of Wage and Salary Administration

Questions and Answers

  1. What is the concept of need-based minimum wage?

    The concept of need-based minimum wage ensures the minimum human needs of the industrial worker, irrespective of any other considerations.

  2. What is the living wage?

    The living wage is the wage that will maintain the workmen in the highest state of industrial efficiency, which will enable him to provide his family with all the material things which are needed for their health and physical well-being, enough to enable him to qualify to discharge his duties as a citizen.

  3. What are the common elements of wages in all the statutory definitions?

    The common elements of wages in all the statutory definitions include foodgrains or other articles, any travelling concession, and it does not include any bonus, any contribution paid or payable by the employer to any pension fund or provident fund or for the benefit of the workman under any law for the time being in force, and any gratuity payable on the termination of his service.

  4. What is the difference between minimum wage and need-based minimum wage?

    The minimum wage is the wage that is fixed by the authorities concerned, while the need-based minimum wage is the wage that ensures the minimum human needs of the industrial worker, irrespective of any other considerations.

  5. Has the Constitution provided for securing living wage workers?

    The Constitution has not provided for securing living wage workers.

  6. What are the six broad categories of wages?

    The six broad categories of wages include statutory minimum wage, bare subsistence or minimum wage, living wage, fair wage, minimum wage, and need-based minimum wage.

  7. What is the statutory minimum wage?

    The statutory minimum wage is determined according to the provisions of the Minimum Wages Act, 1948.

  8. What is the Fair wage?

    The Fair wage is the wage that is based on a fair return on the capital invested in the business, including a reasonable share of profits in proportion to the capital invested.

  9. What is the Minimum wage?

    The minimum wage is the wage that is fixed by the authorities concerned for any area for houses provided under the subsidized industrial housing scheme for low-income groups.

  10. What is the Fuel, lighting, and other miscellaneous items of expenditure?

    The fuel, lighting, and other miscellaneous items of expenditure should constitute 20% of the total minimum wage.

20 | """ 21 | 22 | pdfgen.sync.from_string(text, 'out.pdf') 23 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import json 3 | import re 4 | 5 | def extract_question_answer_pairs(text): 6 | pairs = [] 7 | data = json.loads(text) 8 | for item in data: 9 | question = item['question'] 10 | answer = item['answer'] 11 | pairs.append((question, answer)) 12 | return pairs 13 | 14 | 15 | openai.api_key = '' 16 | 17 | text = "Deep in the heart of an enchanted forest, where sunlight danced through the canopies of ancient trees, a mystical creature known as the Forest Guardian resided. With emerald-green eyes and shimmering wings, the Guardian possessed the power to protect the delicate balance of nature. Each day, it would soar above the treetops, overseeing the thriving flora and fauna below.One fateful morning, as the Forest Guardian was perched upon a moss-covered branch, it noticed a disturbance in the otherwise tranquil forest. A group of lumberjacks had encroached upon its domain, axes poised to fell the majestic trees. Filled with sadness and determination, the Guardian vowed to defend its home.With a wave of its wings, the Forest Guardian summoned a gust of wind, toppling the lumberjacks' tools and forcing them to retreat. But the threat remained, for the desire for timber lingered in the hearts of others. The Guardian knew it must find a way to educate and inspire harmony between humans and nature.Thus, it embarked on a quest to spread its message far and wide. Through whispered secrets to the wind, the Guardian communicated with animals, who in turn shared its wisdom with humans. Tales of the Forest Guardian's magic and compassion began to circulate, kindling a newfound respect and reverence for the natural world.As seasons passed, the forest flourished, protected by the watchful eyes and tireless efforts of the Forest Guardian. It became a sanctuary where creatures big and small, humans and animals, lived in harmony, recognizing the interconnectedness of all life. And the legend of the Forest Guardian lived on, a symbol of hope and unity in a world that learned to treasure and preserve the gifts of nature." 18 | 19 | response = openai.ChatCompletion.create( 20 | model="gpt-3.5-turbo", 21 | messages=[ 22 | {"role": "system", "content": """You are a smart assistant designed to help teachers come up with reading comprehension questions. 23 | Given a piece of text, you must come up with 10 question and answer pairs that can be used to test a student's reading comprehension abilities. 24 | When coming up with this question/answer pair, you must respond in the following format: 25 | ``` 26 | { 27 | "question": "$YOUR_QUESTION_HERE", 28 | "answer": "$THE_ANSWER_HERE" 29 | } 30 | ``` 31 | 32 | Everything between the ``` must be valid json. 33 | """ 34 | }, 35 | {"role": "user", "content": f"""Please come up with 10 question/answer pairs, in the specified JSON format, for the following text: 36 | ---------------- 37 | {text}"""}, 38 | 39 | ] 40 | ) 41 | 42 | message_content = response["choices"][0]["message"]["content"] 43 | print(message_content) 44 | def extract_json_objects(text): 45 | json_objects = [] 46 | start_index = 0 47 | while True: 48 | start_index = text.find('{', start_index) 49 | if start_index == -1: 50 | break 51 | end_index = text.find('}', start_index) + 1 52 | if end_index == 0: 53 | break 54 | json_str = text[start_index:end_index] 55 | try: 56 | json_obj = json.loads(json_str) 57 | json_objects.append(json_obj) 58 | except json.JSONDecodeError: 59 | pass 60 | start_index = end_index 61 | return json_objects 62 | 63 | json_objects = extract_json_objects(message_content) 64 | print(json_objects[0]['question']) 65 | print(json_objects[0]['answer']) 66 | 67 | # pairs = extract_question_answer_pairs(message_content) 68 | # for i, pair in enumerate(pairs, 1): 69 | # print(f"Question {i}: {pair[0]}") 70 | # print(f"Answer {i}: {pair[1]}") 71 | # print() 72 | 73 | --------------------------------------------------------------------------------