├── Pipfile
├── Pipfile.lock
├── README.md
├── example.pdf
├── img
    └── robot.png
├── pyproject.toml
├── requirements.txt
├── script.py
├── sharks.pdf
└── streamlit.py


/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | 
 8 | [dev-packages]
 9 | 
10 | [requires]
11 | python_version = "3.8"
12 | 


--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |     "_meta": {
 3 |         "hash": {
 4 |             "sha256": "7f7606f08e0544d8d012ef4d097dabdd6df6843a28793eb6551245d4b2db4242"
 5 |         },
 6 |         "pipfile-spec": 6,
 7 |         "requires": {
 8 |             "python_version": "3.8"
 9 |         },
10 |         "sources": [
11 |             {
12 |                 "name": "pypi",
13 |                 "url": "https://pypi.org/simple",
14 |                 "verify_ssl": true
15 |             }
16 |         ]
17 |     },
18 |     "default": {},
19 |     "develop": {}
20 | }
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # quizmegpt
 2 | 
 3 | Video demo here: https://twitter.com/kimchoi_jjiggae/status/1652355091817545733
 4 | 
 5 | This project is a fun way to generate short-answer test questions based on a given document and evaluate a student's response to the question by giving feedback. It uses various language models and algorithms to perform the following tasks:
 6 | 
 7 | - Load a document from a PDF file using PyPDFLoader
 8 | - Split the document into chunks using CharacterTextSplitter
 9 | - Extract embeddings from the text using OpenAIEmbeddings
10 | - Create a vector store using Chroma
11 | - Implement a RetrievalQA chain using OpenAI and the vector store as the index
12 | - Generate a question based on a specific document chunk using OpenAI's GPT model
13 | - Evaluate the student's answer to the generated question and give feedback using OpenAI's GPT model
14 | - The project uses the OpenAI API to interact with GPT models and requires an API key to function. The API key is loaded from a .env file in the project directory using load_dotenv() and stored in the api_key variable.
15 | 
16 | To use the project, simply provide a PDF file path and run the script. It will generate a short-answer test question based on a specific chunk of the document and prompt the user to provide an answer. It will then evaluate the answer and provide feedback to the user.
17 | 
18 | Note that this project is purely for fun and should not be used for any serious educational purposes.
19 | 
20 | Enjoy the project!
21 | 
22 | Limitations: Right now it only will parse ~1-2 pages of your document, since OpenAI has restrictions on how much data you can upload to get a response
23 | 


--------------------------------------------------------------------------------
/example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kimchoi-jjiggae/quizmegpt/c8b16479e5172ef06094e721b97c13d472d2491d/example.pdf


--------------------------------------------------------------------------------
/img/robot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kimchoi-jjiggae/quizmegpt/c8b16479e5172ef06094e721b97c13d472d2491d/img/robot.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "quizmegpt"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Michelle <choi.michelle.12@gmail.com>"]
 6 | readme = "README.md"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "^3.8"
10 | 
11 | 
12 | [build-system]
13 | requires = ["poetry-core"]
14 | build-backend = "poetry.core.masonry.api"
15 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | attrs==22.2.0
 2 | ConfigParser==5.3.0
 3 | contextlib2==21.6.0
 4 | cryptography==40.0.2
 5 | Cython==0.29.34
 6 | dl==0.1.0
 7 | docutils==0.19
 8 | HTMLParser==0.0.2
 9 | ipython==8.13.1
10 | ipywidgets==8.0.6
11 | Jinja2==3.1.2
12 | jnius==1.1.0
13 | keyring==23.13.1
14 | langchain==0.0.153
15 | lockfile==0.12.2
16 | mock==5.0.2
17 | openai==0.27.2
18 | Pillow==9.4.0
19 | Pillow==9.5.0
20 | protobuf==3.20.3
21 | pyOpenSSL==23.1.1
22 | pyOpenSSL==23.1.1
23 | python-dotenv==1.0.0
24 | railroad==0.5.0
25 | Sphinx==7.0.0
26 | tornado==6.3.1
27 | trove_classifiers==2023.4.29
28 | truststore==0.7.0
29 | urllib3_secure_extra==0.1.0
30 | xmlrpclib==1.0.1
31 | 


--------------------------------------------------------------------------------
/script.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import openai
  3 | from langchain.document_loaders import PyPDFLoader
  4 | from langchain.chains import RetrievalQA
  5 | from langchain.indexes import VectorstoreIndexCreator
  6 | from langchain.text_splitter import CharacterTextSplitter
  7 | from langchain.embeddings import OpenAIEmbeddings
  8 | from langchain.vectorstores import Chroma
  9 | from langchain.llms import OpenAI
 10 | from dotenv import load_dotenv
 11 | 
 12 | load_dotenv() # Load variables from .env file
 13 | 
 14 | 
 15 | def generate_questions(doc):
 16 |     # Construct the prompt for the GPT model
 17 |     prompt = f"Generate 1 true false or factual very short answer test question based on this text, and do not include the answer: {doc}"  # noqa: E501
 18 | 
 19 |     # Call the OpenAI API for chat completion
 20 |     response = openai.ChatCompletion.create(
 21 |         model="gpt-3.5-turbo",
 22 |         messages=[
 23 |             {
 24 |                 "role": "system",
 25 |                 "content": "You are a teacher. I need you to help write me exam questions.",  # noqa: E501
 26 |             },
 27 |             {"role": "user", "content": prompt},
 28 |         ],
 29 |         max_tokens=1200,
 30 |         n=1,
 31 |         stop=None,
 32 |         temperature=0.7,
 33 |     )
 34 | 
 35 |     # Extract the generated HTML from the response
 36 |     reply = response["choices"][0]["message"]["content"]
 37 |     return reply
 38 | 
 39 | 
 40 | def evaluate_questions(query, result, user_answer):
 41 |     # Construct the prompt for the GPT model
 42 |     prompt = f"give encouraging feedback to this student as if you were quizzing them on this question, telling them if they are correct and how to improve if applicable: {query} 'correct answer': ' {result['result']}', student answer: {user_answer}"  # noqa: E501
 43 | 
 44 |     # Call the OpenAI API for chat completion
 45 |     response = openai.ChatCompletion.create(
 46 |         model="gpt-3.5-turbo",
 47 |         messages=[
 48 |             {
 49 |                 "role": "system",
 50 |                 "content": "You are a teacher. I need you to help grade exams.",  # noqa: E501
 51 |             },
 52 |             {"role": "user", "content": prompt},
 53 |         ],
 54 |         max_tokens=1200,
 55 |         n=1,
 56 |         stop=None,
 57 |         temperature=0.7,
 58 |     )
 59 | 
 60 |     # Extract the generated HTML from the response
 61 |     reply = response["choices"][0]["message"]["content"]
 62 |     return reply
 63 | 
 64 | 
 65 | if __name__ == "__main__":
 66 |     # load document
 67 |     api_key = os.environ.get("OPENAI_API_KEY") 
 68 |     loader = PyPDFLoader("./sharks.pdf")
 69 |     documents = loader.load()
 70 |     
 71 |     # split the documents into chunks
 72 |     text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
 73 |     texts = text_splitter.split_documents(documents)
 74 | 
 75 |     # select which embeddings we want to use
 76 |     embeddings = OpenAIEmbeddings(openai_api_key = api_key)
 77 | 
 78 |     # create the vectorestore to use as the index
 79 |     db = Chroma.from_documents(texts, embeddings)
 80 | 
 81 |     # expose this index in a retriever interface
 82 |     retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})
 83 | 
 84 |     # create a chain to answer questions 
 85 |     qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
 86 |     # print("TEXT", texts[5])
 87 |     query = generate_questions(texts[5])
 88 |     print ('********************\n\n')
 89 |     print('GENERATED QUESTION:', query, '\n\n')
 90 |     result = qa({"query": query})
 91 |     print ('********************\n')
 92 |     user_answer = input("Enter your answer: ")
 93 |     print ('\n********* Generating Feedback ***********\n')
 94 |     feedback = evaluate_questions(query, result, user_answer)
 95 |     print('FEEDBACK:', feedback)
 96 | 
 97 |     print ('\n\n\n********* Complete Answer ***********\n')  
 98 |     print('CORRECT ANSWER:', result['result'])
 99 |     print ('\n\n\n********* Brb gonna go nap! ***********\n')  
100 | 
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/sharks.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kimchoi-jjiggae/quizmegpt/c8b16479e5172ef06094e721b97c13d472d2491d/sharks.pdf


--------------------------------------------------------------------------------
/streamlit.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import openai
  3 | from langchain.document_loaders import PyPDFLoader
  4 | from langchain.chains import RetrievalQA
  5 | from langchain.indexes import VectorstoreIndexCreator
  6 | from langchain.text_splitter import CharacterTextSplitter# from langchain.text_splitter import split_text
  7 | from langchain.embeddings import OpenAIEmbeddings
  8 | from langchain.vectorstores import Chroma
  9 | from langchain.llms import OpenAI
 10 | from langchain.docstore.document import Document
 11 | from dotenv import load_dotenv
 12 | import streamlit as st
 13 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 14 | 
 15 | 
 16 | load_dotenv() # Load variables from .env file
 17 | 
 18 | def generate_questions(text):
 19 |     # Construct the prompt for the GPT model
 20 |     prompt = f"Generate 1 true false or factual very short answer test question based on this text, and do not include the answer: {text}"  # noqa: E501
 21 | 
 22 |     # Call the OpenAI API for chat completion
 23 |     response = openai.ChatCompletion.create(
 24 |         model="gpt-3.5-turbo",
 25 |         messages=[
 26 |             {
 27 |                 "role": "system",
 28 |                 "content": "You are a teacher. I need you to help write me exam questions.",  # noqa: E501
 29 |             },
 30 |             {"role": "user", "content": prompt},
 31 |         ],
 32 |         max_tokens=1200,
 33 |         n=1,
 34 |         stop=None,
 35 |         temperature=0.7,
 36 |     )
 37 | 
 38 |     # Extract the generated HTML from the response
 39 |     reply = response["choices"][0]["message"]["content"]
 40 |     return reply
 41 | 
 42 | def evaluate_questions(query, result, user_answer):
 43 |     # Construct the prompt for the GPT model
 44 |     prompt = f"give encouraging feedback to this student as if you were quizzing them on this question, telling them if they are correct and how to improve if applicable: {query} 'correct answer': ' {result['result']}', student answer: {user_answer}"  # noqa: E501
 45 | 
 46 |     # Call the OpenAI API for chat completion
 47 |     response = openai.ChatCompletion.create(
 48 |         model="gpt-3.5-turbo",
 49 |         messages=[
 50 |             {
 51 |                 "role": "system",
 52 |                 "content": "You are a teacher. I need you to help grade exams.",  # noqa: E501
 53 |             },
 54 |             {"role": "user", "content": prompt},
 55 |         ],
 56 |         max_tokens=1200,
 57 |         n=1,
 58 |         stop=None,
 59 |         temperature=0.7,
 60 |     )
 61 | 
 62 |     # Extract the generated HTML from the response
 63 |     reply = response["choices"][0]["message"]["content"]
 64 |     return reply
 65 | 
 66 | 
 67 | class Message:
 68 |     ai_icon = "./img/robot.png"
 69 | 
 70 |     def __init__(self, label: str, expanded: bool = True):
 71 |         self.label = label
 72 |         self.expanded = expanded
 73 | 
 74 |     def __enter__(self):
 75 |         message_area, icon_area = st.columns([10, 1])
 76 |         icon_area.image(self.ai_icon, caption="QuizMeGPT")
 77 | 
 78 |         self.expander = message_area.expander(label=self.label, expanded=self.expanded)
 79 | 
 80 |         return self
 81 | 
 82 |     def __exit__(self, ex_type, ex_value, trace):
 83 |         pass
 84 | 
 85 |     def write(self, content):
 86 |         self.expander.markdown(content)
 87 | 
 88 | # def split_text(text):
 89 | #     # """Split documents."""
 90 | #     texts = text
 91 | #     # metadatas = [doc.metadata for doc in documents]
 92 | #     documents = []
 93 | #     for i, text in enumerate(texts):
 94 | #         for chunk in split_text(text):
 95 | #             new_doc = Document(
 96 | #                 page_content=chunk
 97 | #             )
 98 | #             documents.append(new_doc)
 99 | #     return documents
100 | 
101 | 
102 | 
103 | 
104 | def quizflow(text, openai_api_key):
105 |     # Split the text into chunks
106 |     text_splitter = RecursiveCharacterTextSplitter(
107 |         # Set a really small chunk size, just to show.
108 |         chunk_size = 1000,
109 |         chunk_overlap  = 0,
110 |         # length_function = len,
111 |     )
112 |     # texts = text_splitter.split_text(text)
113 |     texts = text_splitter.create_documents([text])
114 | 
115 |     # Select which embeddings we want to use
116 |     embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
117 | 
118 |     # Create the vector store to use as the index
119 |     db = Chroma.from_documents(texts, embeddings)
120 | 
121 |     # Expose this index in a retriever interface
122 |     retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
123 | 
124 |     # Create a chain to answer questions
125 |     qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
126 |     # Generate a question
127 |     query = generate_questions(text)
128 | 
129 |     with Message(label="Question") as m:
130 |         m.write("### Question")
131 |         m.write(query)
132 | 
133 |     user_answer = st.text_input("Write your answer here:")
134 |     result = qa({"query": query})
135 | 
136 |     submit_button = st.button("Submit")
137 | 
138 |     if submit_button:
139 |         feedback = evaluate_questions(query, result, user_answer)
140 |         provide_feedback(feedback, text, openai_api_key)
141 | 
142 | def provide_feedback(feedback, text, openai_api_key):
143 |     with Message(label="Feedback") as m:
144 |         m.write("### Here's how you did!")
145 |         m.write(feedback)
146 | 
147 |     new_question_button = st.button("New Question")
148 |     if new_question_button:
149 |         quizflow(text, openai_api_key)
150 | 
151 | if __name__ == "__main__":
152 |     st.set_page_config(
153 |         initial_sidebar_state="expanded",
154 |         page_title="QuizMeGPT Streamlit",
155 |         layout="centered",
156 |     )
157 | 
158 |     with st.sidebar:
159 |         openai_api_key = st.text_input('Your OpenAI API KEY', type="password")
160 | 
161 |     st.title("QuizMeGPT")
162 |     text = st.text_area("Paste your text here", height=300)
163 |     if text:
164 |         quizflow(text, openai_api_key)
165 | 


--------------------------------------------------------------------------------