├── Pipfile ├── Pipfile.lock ├── README.md ├── example.pdf ├── img └── robot.png ├── pyproject.toml ├── requirements.txt ├── script.py ├── sharks.pdf └── streamlit.py /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | 8 | [dev-packages] 9 | 10 | [requires] 11 | python_version = "3.8" 12 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "7f7606f08e0544d8d012ef4d097dabdd6df6843a28793eb6551245d4b2db4242" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.8" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": {}, 19 | "develop": {} 20 | } 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # quizmegpt 2 | 3 | Video demo here: https://twitter.com/kimchoi_jjiggae/status/1652355091817545733 4 | 5 | This project is a fun way to generate short-answer test questions based on a given document and evaluate a student's response to the question by giving feedback. It uses various language models and algorithms to perform the following tasks: 6 | 7 | - Load a document from a PDF file using PyPDFLoader 8 | - Split the document into chunks using CharacterTextSplitter 9 | - Extract embeddings from the text using OpenAIEmbeddings 10 | - Create a vector store using Chroma 11 | - Implement a RetrievalQA chain using OpenAI and the vector store as the index 12 | - Generate a question based on a specific document chunk using OpenAI's GPT model 13 | - Evaluate the student's answer to the generated question and give feedback using OpenAI's GPT model 14 | - The project uses the OpenAI API to interact with GPT models and requires an API key to function. The API key is loaded from a .env file in the project directory using load_dotenv() and stored in the api_key variable. 15 | 16 | To use the project, simply provide a PDF file path and run the script. It will generate a short-answer test question based on a specific chunk of the document and prompt the user to provide an answer. It will then evaluate the answer and provide feedback to the user. 17 | 18 | Note that this project is purely for fun and should not be used for any serious educational purposes. 19 | 20 | Enjoy the project! 21 | 22 | Limitations: Right now it only will parse ~1-2 pages of your document, since OpenAI has restrictions on how much data you can upload to get a response 23 | -------------------------------------------------------------------------------- /example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kimchoi-jjiggae/quizmegpt/c8b16479e5172ef06094e721b97c13d472d2491d/example.pdf -------------------------------------------------------------------------------- /img/robot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kimchoi-jjiggae/quizmegpt/c8b16479e5172ef06094e721b97c13d472d2491d/img/robot.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "quizmegpt" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Michelle "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.8" 10 | 11 | 12 | [build-system] 13 | requires = ["poetry-core"] 14 | build-backend = "poetry.core.masonry.api" 15 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==22.2.0 2 | ConfigParser==5.3.0 3 | contextlib2==21.6.0 4 | cryptography==40.0.2 5 | Cython==0.29.34 6 | dl==0.1.0 7 | docutils==0.19 8 | HTMLParser==0.0.2 9 | ipython==8.13.1 10 | ipywidgets==8.0.6 11 | Jinja2==3.1.2 12 | jnius==1.1.0 13 | keyring==23.13.1 14 | langchain==0.0.153 15 | lockfile==0.12.2 16 | mock==5.0.2 17 | openai==0.27.2 18 | Pillow==9.4.0 19 | Pillow==9.5.0 20 | protobuf==3.20.3 21 | pyOpenSSL==23.1.1 22 | pyOpenSSL==23.1.1 23 | python-dotenv==1.0.0 24 | railroad==0.5.0 25 | Sphinx==7.0.0 26 | tornado==6.3.1 27 | trove_classifiers==2023.4.29 28 | truststore==0.7.0 29 | urllib3_secure_extra==0.1.0 30 | xmlrpclib==1.0.1 31 | -------------------------------------------------------------------------------- /script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | from langchain.document_loaders import PyPDFLoader 4 | from langchain.chains import RetrievalQA 5 | from langchain.indexes import VectorstoreIndexCreator 6 | from langchain.text_splitter import CharacterTextSplitter 7 | from langchain.embeddings import OpenAIEmbeddings 8 | from langchain.vectorstores import Chroma 9 | from langchain.llms import OpenAI 10 | from dotenv import load_dotenv 11 | 12 | load_dotenv() # Load variables from .env file 13 | 14 | 15 | def generate_questions(doc): 16 | # Construct the prompt for the GPT model 17 | prompt = f"Generate 1 true false or factual very short answer test question based on this text, and do not include the answer: {doc}" # noqa: E501 18 | 19 | # Call the OpenAI API for chat completion 20 | response = openai.ChatCompletion.create( 21 | model="gpt-3.5-turbo", 22 | messages=[ 23 | { 24 | "role": "system", 25 | "content": "You are a teacher. I need you to help write me exam questions.", # noqa: E501 26 | }, 27 | {"role": "user", "content": prompt}, 28 | ], 29 | max_tokens=1200, 30 | n=1, 31 | stop=None, 32 | temperature=0.7, 33 | ) 34 | 35 | # Extract the generated HTML from the response 36 | reply = response["choices"][0]["message"]["content"] 37 | return reply 38 | 39 | 40 | def evaluate_questions(query, result, user_answer): 41 | # Construct the prompt for the GPT model 42 | prompt = f"give encouraging feedback to this student as if you were quizzing them on this question, telling them if they are correct and how to improve if applicable: {query} 'correct answer': ' {result['result']}', student answer: {user_answer}" # noqa: E501 43 | 44 | # Call the OpenAI API for chat completion 45 | response = openai.ChatCompletion.create( 46 | model="gpt-3.5-turbo", 47 | messages=[ 48 | { 49 | "role": "system", 50 | "content": "You are a teacher. I need you to help grade exams.", # noqa: E501 51 | }, 52 | {"role": "user", "content": prompt}, 53 | ], 54 | max_tokens=1200, 55 | n=1, 56 | stop=None, 57 | temperature=0.7, 58 | ) 59 | 60 | # Extract the generated HTML from the response 61 | reply = response["choices"][0]["message"]["content"] 62 | return reply 63 | 64 | 65 | if __name__ == "__main__": 66 | # load document 67 | api_key = os.environ.get("OPENAI_API_KEY") 68 | loader = PyPDFLoader("./sharks.pdf") 69 | documents = loader.load() 70 | 71 | # split the documents into chunks 72 | text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) 73 | texts = text_splitter.split_documents(documents) 74 | 75 | # select which embeddings we want to use 76 | embeddings = OpenAIEmbeddings(openai_api_key = api_key) 77 | 78 | # create the vectorestore to use as the index 79 | db = Chroma.from_documents(texts, embeddings) 80 | 81 | # expose this index in a retriever interface 82 | retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2}) 83 | 84 | # create a chain to answer questions 85 | qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True) 86 | # print("TEXT", texts[5]) 87 | query = generate_questions(texts[5]) 88 | print ('********************\n\n') 89 | print('GENERATED QUESTION:', query, '\n\n') 90 | result = qa({"query": query}) 91 | print ('********************\n') 92 | user_answer = input("Enter your answer: ") 93 | print ('\n********* Generating Feedback ***********\n') 94 | feedback = evaluate_questions(query, result, user_answer) 95 | print('FEEDBACK:', feedback) 96 | 97 | print ('\n\n\n********* Complete Answer ***********\n') 98 | print('CORRECT ANSWER:', result['result']) 99 | print ('\n\n\n********* Brb gonna go nap! ***********\n') 100 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /sharks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kimchoi-jjiggae/quizmegpt/c8b16479e5172ef06094e721b97c13d472d2491d/sharks.pdf -------------------------------------------------------------------------------- /streamlit.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | from langchain.document_loaders import PyPDFLoader 4 | from langchain.chains import RetrievalQA 5 | from langchain.indexes import VectorstoreIndexCreator 6 | from langchain.text_splitter import CharacterTextSplitter# from langchain.text_splitter import split_text 7 | from langchain.embeddings import OpenAIEmbeddings 8 | from langchain.vectorstores import Chroma 9 | from langchain.llms import OpenAI 10 | from langchain.docstore.document import Document 11 | from dotenv import load_dotenv 12 | import streamlit as st 13 | from langchain.text_splitter import RecursiveCharacterTextSplitter 14 | 15 | 16 | load_dotenv() # Load variables from .env file 17 | 18 | def generate_questions(text): 19 | # Construct the prompt for the GPT model 20 | prompt = f"Generate 1 true false or factual very short answer test question based on this text, and do not include the answer: {text}" # noqa: E501 21 | 22 | # Call the OpenAI API for chat completion 23 | response = openai.ChatCompletion.create( 24 | model="gpt-3.5-turbo", 25 | messages=[ 26 | { 27 | "role": "system", 28 | "content": "You are a teacher. I need you to help write me exam questions.", # noqa: E501 29 | }, 30 | {"role": "user", "content": prompt}, 31 | ], 32 | max_tokens=1200, 33 | n=1, 34 | stop=None, 35 | temperature=0.7, 36 | ) 37 | 38 | # Extract the generated HTML from the response 39 | reply = response["choices"][0]["message"]["content"] 40 | return reply 41 | 42 | def evaluate_questions(query, result, user_answer): 43 | # Construct the prompt for the GPT model 44 | prompt = f"give encouraging feedback to this student as if you were quizzing them on this question, telling them if they are correct and how to improve if applicable: {query} 'correct answer': ' {result['result']}', student answer: {user_answer}" # noqa: E501 45 | 46 | # Call the OpenAI API for chat completion 47 | response = openai.ChatCompletion.create( 48 | model="gpt-3.5-turbo", 49 | messages=[ 50 | { 51 | "role": "system", 52 | "content": "You are a teacher. I need you to help grade exams.", # noqa: E501 53 | }, 54 | {"role": "user", "content": prompt}, 55 | ], 56 | max_tokens=1200, 57 | n=1, 58 | stop=None, 59 | temperature=0.7, 60 | ) 61 | 62 | # Extract the generated HTML from the response 63 | reply = response["choices"][0]["message"]["content"] 64 | return reply 65 | 66 | 67 | class Message: 68 | ai_icon = "./img/robot.png" 69 | 70 | def __init__(self, label: str, expanded: bool = True): 71 | self.label = label 72 | self.expanded = expanded 73 | 74 | def __enter__(self): 75 | message_area, icon_area = st.columns([10, 1]) 76 | icon_area.image(self.ai_icon, caption="QuizMeGPT") 77 | 78 | self.expander = message_area.expander(label=self.label, expanded=self.expanded) 79 | 80 | return self 81 | 82 | def __exit__(self, ex_type, ex_value, trace): 83 | pass 84 | 85 | def write(self, content): 86 | self.expander.markdown(content) 87 | 88 | # def split_text(text): 89 | # # """Split documents.""" 90 | # texts = text 91 | # # metadatas = [doc.metadata for doc in documents] 92 | # documents = [] 93 | # for i, text in enumerate(texts): 94 | # for chunk in split_text(text): 95 | # new_doc = Document( 96 | # page_content=chunk 97 | # ) 98 | # documents.append(new_doc) 99 | # return documents 100 | 101 | 102 | 103 | 104 | def quizflow(text, openai_api_key): 105 | # Split the text into chunks 106 | text_splitter = RecursiveCharacterTextSplitter( 107 | # Set a really small chunk size, just to show. 108 | chunk_size = 1000, 109 | chunk_overlap = 0, 110 | # length_function = len, 111 | ) 112 | # texts = text_splitter.split_text(text) 113 | texts = text_splitter.create_documents([text]) 114 | 115 | # Select which embeddings we want to use 116 | embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) 117 | 118 | # Create the vector store to use as the index 119 | db = Chroma.from_documents(texts, embeddings) 120 | 121 | # Expose this index in a retriever interface 122 | retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2}) 123 | 124 | # Create a chain to answer questions 125 | qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True) 126 | # Generate a question 127 | query = generate_questions(text) 128 | 129 | with Message(label="Question") as m: 130 | m.write("### Question") 131 | m.write(query) 132 | 133 | user_answer = st.text_input("Write your answer here:") 134 | result = qa({"query": query}) 135 | 136 | submit_button = st.button("Submit") 137 | 138 | if submit_button: 139 | feedback = evaluate_questions(query, result, user_answer) 140 | provide_feedback(feedback, text, openai_api_key) 141 | 142 | def provide_feedback(feedback, text, openai_api_key): 143 | with Message(label="Feedback") as m: 144 | m.write("### Here's how you did!") 145 | m.write(feedback) 146 | 147 | new_question_button = st.button("New Question") 148 | if new_question_button: 149 | quizflow(text, openai_api_key) 150 | 151 | if __name__ == "__main__": 152 | st.set_page_config( 153 | initial_sidebar_state="expanded", 154 | page_title="QuizMeGPT Streamlit", 155 | layout="centered", 156 | ) 157 | 158 | with st.sidebar: 159 | openai_api_key = st.text_input('Your OpenAI API KEY', type="password") 160 | 161 | st.title("QuizMeGPT") 162 | text = st.text_area("Paste your text here", height=300) 163 | if text: 164 | quizflow(text, openai_api_key) 165 | --------------------------------------------------------------------------------