├── .github └── workflows │ ├── codeql.yml │ └── dependency-review.yml ├── .gitignore ├── Model_experimentation ├── SQuAD │ └── train-v2.0.json └── model_testing │ └── test.ipynb ├── Model_training ├── AnswerAwareQG │ ├── aaqg-t5_fine_tune.py │ ├── test_aaqg.ipynb │ └── train_checkpoint.py └── KeyPhrase Detection │ ├── keyphrase-detection-T5.ipynb │ ├── keyphrase-detection-T5.py │ ├── keyphrase-detection-test.py │ ├── test_checkpoint.ipynb │ └── train_checkpoint.py ├── README.md ├── Testing ├── data │ └── README.md ├── generate_qa.py └── qna │ └── README.md ├── backend ├── Generator │ ├── __init__.py │ ├── encoding.py │ ├── main.py │ └── mcq.py ├── script.sh ├── server.py ├── service_account_key.json └── test_server.py ├── eduaid_web ├── .env.example ├── .gitignore ├── README.md ├── package-lock.json ├── package.json ├── public │ ├── aossie_logo.png │ ├── aossie_logo64.ico │ ├── aossie_transparent.png │ ├── index.html │ ├── manifest.json │ └── robots.txt ├── src │ ├── App.css │ ├── App.js │ ├── App.test.js │ ├── assets │ │ ├── aossie_logo.png │ │ ├── aossie_logo_transparent.png │ │ ├── arrow.png │ │ ├── cloud.png │ │ ├── gitStar.png │ │ ├── open.png │ │ └── stars.png │ ├── index.css │ ├── index.js │ ├── pages │ │ ├── Home.jsx │ │ ├── Output.jsx │ │ ├── PageNotFound.jsx │ │ ├── Previous.jsx │ │ ├── Question_Type.jsx │ │ └── Text_Input.jsx │ ├── reportWebVitals.js │ └── setupTests.js └── tailwind.config.js ├── extension ├── package-lock.json ├── package.json ├── postcss.config.js ├── public │ ├── aossie_logo16.png │ ├── aossie_logo32.png │ ├── aossie_logo64.png │ ├── background.js │ ├── contentScript.js │ ├── manifest.json │ └── youtubeContentScript.js ├── src │ ├── App.css │ ├── assets │ │ ├── aossie_logo.png │ │ ├── aossie_logo.webp │ │ ├── arrow.png │ │ ├── cloud.png │ │ ├── gitStar.png │ │ ├── open.png │ │ └── stars.png │ ├── index.css │ ├── pages │ │ ├── answer │ │ │ ├── Answer.jsx │ │ │ └── answer.html │ │ ├── home │ │ │ ├── Home.jsx │ │ │ └── home.html │ │ ├── previous │ │ │ ├── Previous.jsx │ │ │ └── previous.html │ │ ├── question │ │ │ ├── Question.jsx │ │ │ ├── SidePanel.jsx │ │ │ ├── question.html │ │ │ └── sidePanel.html │ │ └── text_input │ │ │ ├── TextInput.jsx │ │ │ └── text_input.html │ └── popup │ │ ├── Popup.jsx │ │ └── popup.html ├── tailwind.config.js └── vite.config.js ├── readme-assets └── EduAid-demo.gif └── requirements.txt /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "main" ] 17 | pull_request: 18 | branches: [ "main" ] 19 | schedule: 20 | - cron: '18 8 * * 2' 21 | 22 | jobs: 23 | analyze: 24 | name: Analyze (${{ matrix.language }}) 25 | # Runner size impacts CodeQL analysis time. To learn more, please see: 26 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 27 | # - https://gh.io/supported-runners-and-hardware-resources 28 | # - https://gh.io/using-larger-runners (GitHub.com only) 29 | # Consider using larger runners or machines with greater resources for possible analysis time improvements. 30 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 31 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 32 | permissions: 33 | # required for all workflows 34 | security-events: write 35 | 36 | # required to fetch internal or private CodeQL packs 37 | packages: read 38 | 39 | # only required for workflows in private repositories 40 | actions: read 41 | contents: read 42 | 43 | strategy: 44 | fail-fast: false 45 | matrix: 46 | include: 47 | - language: javascript-typescript 48 | build-mode: none 49 | - language: python 50 | build-mode: none 51 | # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' 52 | # Use `c-cpp` to analyze code written in C, C++ or both 53 | # Use 'java-kotlin' to analyze code written in Java, Kotlin or both 54 | # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both 55 | # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, 56 | # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. 57 | # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how 58 | # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages 59 | steps: 60 | - name: Checkout repository 61 | uses: actions/checkout@v4 62 | 63 | # Initializes the CodeQL tools for scanning. 64 | - name: Initialize CodeQL 65 | uses: github/codeql-action/init@v3 66 | with: 67 | languages: ${{ matrix.language }} 68 | build-mode: ${{ matrix.build-mode }} 69 | # If you wish to specify custom queries, you can do so here or in a config file. 70 | # By default, queries listed here will override any specified in a config file. 71 | # Prefix the list here with "+" to use these queries and those in the config file. 72 | 73 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 74 | # queries: security-extended,security-and-quality 75 | 76 | # If the analyze step fails for one of the languages you are analyzing with 77 | # "We were unable to automatically build your code", modify the matrix above 78 | # to set the build mode to "manual" for that language. Then modify this step 79 | # to build your code. 80 | # ℹ️ Command-line programs to run using the OS shell. 81 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 82 | - if: matrix.build-mode == 'manual' 83 | shell: bash 84 | run: | 85 | echo 'If you are using a "manual" build mode for one or more of the' \ 86 | 'languages you are analyzing, replace this with the commands to build' \ 87 | 'your code, for example:' 88 | echo ' make bootstrap' 89 | echo ' make release' 90 | exit 1 91 | 92 | - name: Perform CodeQL Analysis 93 | uses: github/codeql-action/analyze@v3 94 | with: 95 | category: "/language:${{matrix.language}}" 96 | -------------------------------------------------------------------------------- /.github/workflows/dependency-review.yml: -------------------------------------------------------------------------------- 1 | # Dependency Review Action 2 | # 3 | # This Action will scan dependency manifest files that change as part of a Pull Request, 4 | # surfacing known-vulnerable versions of the packages declared or updated in the PR. 5 | # Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable 6 | # packages will be blocked from merging. 7 | # 8 | # Source repository: https://github.com/actions/dependency-review-action 9 | # Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement 10 | name: 'Dependency review' 11 | on: 12 | pull_request: 13 | branches: [ "main" ] 14 | 15 | # If using a dependency submission action in this workflow this permission will need to be set to: 16 | # 17 | # permissions: 18 | # contents: write 19 | # 20 | # https://docs.github.com/en/enterprise-cloud@latest/code-security/supply-chain-security/understanding-your-software-supply-chain/using-the-dependency-submission-api 21 | permissions: 22 | contents: read 23 | # Write permissions for pull-requests are required for using the `comment-summary-in-pr` option, comment out if you aren't using this option 24 | pull-requests: write 25 | 26 | jobs: 27 | dependency-review: 28 | runs-on: ubuntu-latest 29 | steps: 30 | - name: 'Checkout repository' 31 | uses: actions/checkout@v4 32 | - name: 'Dependency Review' 33 | uses: actions/dependency-review-action@v4 34 | # Commonly enabled options, see https://github.com/actions/dependency-review-action#configuration-options for all available options. 35 | with: 36 | comment-summary-in-pr: always 37 | # fail-on-severity: moderate 38 | # deny-licenses: GPL-1.0-or-later, LGPL-2.0-or-later 39 | # retry-on-snapshot-warnings: true 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Model_experimentation/model_testing/fine_tuned_t5_model 2 | Model_experimentation/model_testing/fine_tuned_t5_tokenizer 3 | Model_training/KeyPhrase\ Detection/fine_tuned_t5_model_kp 4 | Model_training/KeyPhrase\ Detection/fine_tuned_t5_tokenizer_kp 5 | Model_training/KeyPhrase\ Detection/fine_tuned_t5_model_kp_2 6 | Model_training/KeyPhrase\ Detection/fine_tuned_t5_tokenizer_kp_2 7 | Model_training/KeyPhrase\ Detection/fine_tuned_t5_model_kp_3 8 | Model_training/KeyPhrase\ Detection/fine_tuned_t5_tokenizer_kp_3 9 | Model_training/KeyPhrase\ Detection/fine_tuned_t5_model_kp_4 10 | Model_training/KeyPhrase\ Detection/fine_tuned_t5_tokenizer_kp_4 11 | Model_training/KeyPhrase Detection/fine_tuned_model_kp_4/ 12 | Model_training/AnswerAwareQG/fine_tuned_t5_model_aaqg 13 | Model_training/AnswerAwareQG/fine_tuned_t5_model_aaqg_2 14 | Model_training/AnswerAwareQG/fine_tuned_t5_tokenizer_aaqg 15 | Model_training/AnswerAwareQG/fine_tuned_t5_tokenizer_aaqg_2 16 | backend/models 17 | backend/tokenizers 18 | backend/sample_input.py 19 | extension/pdfjs-3.9.179-dist 20 | backend/s2v_old 21 | __pycache__/ 22 | *.pyc 23 | extension/dist 24 | extension/node_modules 25 | backend/credentials.json 26 | backend/token.json 27 | backend/service_account_key.json 28 | venv 29 | backend/Eduaid 30 | .DS_Store -------------------------------------------------------------------------------- /Model_training/AnswerAwareQG/aaqg-t5_fine_tune.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | import torch 3 | from torch.utils.data import Dataset 4 | from torch.utils.data import DataLoader 5 | from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW 6 | from tqdm import tqdm 7 | from time import time 8 | import os 9 | 10 | 11 | class HuggingFaceDataset(Dataset): 12 | def __init__(self,dataset): 13 | self.dataset=dataset 14 | def __getitem__(self,index): 15 | item=self.dataset[index] 16 | # ID=item['id'] 17 | # title=item['title'] 18 | # abstract=item['abstract'] 19 | # keyphrases=item['keyphrases'] 20 | # return ID,title,abstract,keyphrases 21 | return item 22 | def __len__(self): 23 | return len(self.dataset) 24 | 25 | class SQuADDataset(HuggingFaceDataset): 26 | def __init__(self, dataset): 27 | super().__init__(dataset) 28 | self.tokenizer = T5Tokenizer.from_pretrained('t5-base') 29 | 30 | def __getitem__(self, index): 31 | item = self.dataset[index] 32 | 33 | context = item['context'] 34 | answer = item['answers']['text'][0] 35 | 36 | input_text = f"answer: {answer} context: {context}" 37 | target_text = item['question'] 38 | 39 | inputs = self.tokenizer.encode_plus( 40 | input_text, 41 | padding='max_length', 42 | truncation=True, 43 | max_length=512, 44 | return_tensors='pt' 45 | ) 46 | 47 | targets = self.tokenizer.encode_plus( 48 | target_text, 49 | padding='max_length', 50 | truncation=True, 51 | max_length=32, 52 | return_tensors='pt' 53 | ) 54 | 55 | return { 56 | 'input_ids': inputs['input_ids'].squeeze(), 57 | 'attention_mask': inputs['attention_mask'].squeeze(), 58 | 'target_ids': targets['input_ids'].squeeze(), 59 | 'target_attention_mask': targets['attention_mask'].squeeze() 60 | } 61 | 62 | def main(): 63 | 64 | dataset=load_dataset('squad') 65 | 66 | sample_percentage=1 67 | 68 | dataset['train'] = dataset['train'].shuffle() 69 | num_samples = int(len(dataset['train']) * sample_percentage) 70 | train_set = dataset['train'].select(range(num_samples)) 71 | 72 | 73 | squad_dataset=SQuADDataset(train_set) 74 | 75 | model_path='t5-base' 76 | tokenizer_path='t5-base' 77 | 78 | model = T5ForConditionalGeneration.from_pretrained(model_path) 79 | tokenizer = T5Tokenizer.from_pretrained(tokenizer_path) 80 | 81 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 82 | model.to(device) 83 | 84 | batch_size=32 85 | lr=1e-4 86 | epochs=15 87 | 88 | dataloader=DataLoader(squad_dataset,batch_size=batch_size, shuffle=True) 89 | 90 | optimizer=AdamW(model.parameters(),lr=lr) 91 | scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) 92 | 93 | checkpoint_interval=1800 94 | 95 | start_time=time() 96 | START_TIME=time() 97 | 98 | for epoch in range(epochs): 99 | model.train() 100 | total_loss=0 101 | 102 | for batch_idx, batch in enumerate(tqdm(dataloader,desc=f'Epoch {epoch}')): 103 | input_ids = batch['input_ids'].to(device) 104 | attention_mask = batch['attention_mask'].to(device) 105 | target_ids = batch['target_ids'].to(device) 106 | target_attention_mask = batch['target_attention_mask'].to(device) 107 | 108 | outputs = model( 109 | input_ids=input_ids, 110 | attention_mask=attention_mask, 111 | labels=target_ids, 112 | decoder_attention_mask=target_attention_mask, 113 | return_dict=True 114 | ) 115 | 116 | loss=outputs.loss 117 | total_loss+=loss.item() 118 | optimizer.zero_grad() 119 | loss.backward() 120 | optimizer.step() 121 | 122 | curr_time=time() 123 | elapsed_time=curr_time-start_time 124 | if elapsed_time>=checkpoint_interval: 125 | start_time=time() 126 | checkpoint={ 127 | 'model_state_dict': model.state_dict(), 128 | 'optimizer_state_dict': optimizer.state_dict(), 129 | 'epoch': epoch, 130 | 'batch_idx': batch_idx, 131 | 'time': curr_time-START_TIME 132 | } 133 | try: 134 | if os.path.exists('./checkpoint/pt'): 135 | os.remove('./checkpoint.pt') 136 | torch.save(checkpoint, './checkpoint.pt') 137 | print(f'Checkpoint at {(curr_time-START_TIME)/60} minutes saved!') 138 | except Exception as e: 139 | print("Error while saving checkpoint ",e) 140 | 141 | scheduler.step() 142 | avg_loss = total_loss / len(dataloader) 143 | print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss}") 144 | 145 | model.save_pretrained('./fine_tuned_t5_model_aaqg') 146 | tokenizer.save_pretrained('./fine_tuned_t5_tokenizer_aaqg') 147 | 148 | 149 | if __name__=="__main__": 150 | main() 151 | 152 | -------------------------------------------------------------------------------- /Model_training/AnswerAwareQG/test_aaqg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/home/prarabdh/miniconda3/envs/eduaid/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", 13 | " from .autonotebook import tqdm as notebook_tqdm\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import torch\n", 19 | "from transformers import T5ForConditionalGeneration, T5Tokenizer" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 4, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "model_path='./fine_tuned_t5_model_aaqg/'\n", 29 | "tokenizer_path='./fine_tuned_t5_tokenizer_aaqg/'" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 5, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "def generate_question(context,answer,model_path, tokenizer_path):\n", 39 | " model = T5ForConditionalGeneration.from_pretrained(model_path)\n", 40 | " tokenizer = T5Tokenizer.from_pretrained(tokenizer_path)\n", 41 | " device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", 42 | " model.to(device)\n", 43 | "\n", 44 | " input_text=f'answer: {answer} context: {context}'\n", 45 | "\n", 46 | " inputs=tokenizer.encode_plus(\n", 47 | " input_text,\n", 48 | " padding='max_length',\n", 49 | " truncation=True,\n", 50 | " max_length=512,\n", 51 | " return_tensors='pt'\n", 52 | " )\n", 53 | "\n", 54 | " input_ids=inputs['input_ids'].to(device)\n", 55 | " attention_mask=inputs['attention_mask'].to(device)\n", 56 | "\n", 57 | " with torch.no_grad():\n", 58 | " output=model.generate(\n", 59 | " input_ids=input_ids,\n", 60 | " attention_mask=attention_mask,\n", 61 | " max_length=32\n", 62 | " )\n", 63 | "\n", 64 | " generated_question = tokenizer.decode(output[0], skip_special_tokens=True)\n", 65 | " return generated_question" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 6, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "context=\"\"\"\n", 75 | "Mitochondria are double-membraned organelles found in most eukaryotic cells. They are often referred to as the \"powerhouses\" of the cell because they generate most of the cell's energy in the form of adenosine triphosphate (ATP). Mitochondria play a crucial role in cellular respiration, which is the process by which cells convert nutrients into usable energy.\n", 76 | "The structure of mitochondria consists of an outer membrane, which surrounds the entire organelle, and an inner membrane that is highly folded to form structures called cristae. The inner membrane encloses the mitochondrial matrix, which contains enzymes and DNA molecules necessary for various metabolic reactions.\n", 77 | "One of the primary functions of mitochondria is to carry out aerobic respiration, a process that uses oxygen to break down glucose and other organic molecules, releasing energy in the form of ATP. This process occurs in the inner membrane of the mitochondria, specifically in the electron transport chain and the citric acid cycle.\n", 78 | "\n", 79 | "\"\"\"\n", 80 | "answer= 'adenosine triphosphate'" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 7, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "'What does ATP stand for?'" 92 | ] 93 | }, 94 | "execution_count": 7, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "generate_question(context,answer,model_path,tokenizer_path)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 8, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "context=\"\"\"\n", 110 | "Mitochondria are double-membraned organelles found in most eukaryotic cells. They are often referred to as the \"powerhouses\" of the cell because they generate most of the cell's energy in the form of adenosine triphosphate (ATP). Mitochondria play a crucial role in cellular respiration, which is the process by which cells convert nutrients into usable energy.\n", 111 | "\"\"\"\n", 112 | "answer= 'Mitochondria'" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 9, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "text/plain": [ 123 | "'What are the \"powerhouses\" of the cell?'" 124 | ] 125 | }, 126 | "execution_count": 9, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "generate_question(context,answer,model_path,tokenizer_path)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 10, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "context=''' \n", 142 | "In mathematics, the Johnson–Lindenstrauss lemma is a result concerning low-distortion embeddings of points from high-dimensional into low-dimensional Euclidean space. The lemma states that a set of points in a high-dimensional space can be embedded into a space of much lower dimension in such a way that distances between the points are nearly preserved. The map used for the embedding is at least Lipschitz, and can even be taken to be an orthogonal projection.\n", 143 | "'''\n", 144 | "\n", 145 | "answer1 = 'Johnson Lindenstrauss Lemma'\n", 146 | "\n", 147 | "answer2= 'Lipschitz'" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 11, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "'What is the result concerning low-distortion embeddings of points from high-dimensional into low-dimensional Euclidean space?'" 159 | ] 160 | }, 161 | "execution_count": 11, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "generate_question(context,answer1,model_path,tokenizer_path)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 12, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/plain": [ 178 | "'What map is used for the embedding?'" 179 | ] 180 | }, 181 | "execution_count": 12, 182 | "metadata": {}, 183 | "output_type": "execute_result" 184 | } 185 | ], 186 | "source": [ 187 | "generate_question(context,answer2,model_path,tokenizer_path)" 188 | ] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "eduaid", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.11.4" 208 | }, 209 | "orig_nbformat": 4 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 2 213 | } 214 | -------------------------------------------------------------------------------- /Model_training/AnswerAwareQG/train_checkpoint.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | import torch 3 | from torch.utils.data import Dataset 4 | from torch.utils.data import DataLoader 5 | from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW 6 | from tqdm import tqdm 7 | from time import time 8 | import os 9 | 10 | 11 | class HuggingFaceDataset(Dataset): 12 | def __init__(self,dataset): 13 | self.dataset=dataset 14 | def __getitem__(self,index): 15 | item=self.dataset[index] 16 | # ID=item['id'] 17 | # title=item['title'] 18 | # abstract=item['abstract'] 19 | # keyphrases=item['keyphrases'] 20 | # return ID,title,abstract,keyphrases 21 | return item 22 | def __len__(self): 23 | return len(self.dataset) 24 | 25 | class SQuADDataset(HuggingFaceDataset): 26 | def __init__(self, dataset): 27 | super().__init__(dataset) 28 | self.tokenizer = T5Tokenizer.from_pretrained('t5-base') 29 | 30 | def __getitem__(self, index): 31 | item = self.dataset[index] 32 | 33 | context = item['context'] 34 | answer = item['answers']['text'][0] 35 | 36 | input_text = f"answer: {answer} context: {context}" 37 | target_text = item['question'] 38 | 39 | inputs = self.tokenizer.encode_plus( 40 | input_text, 41 | padding='max_length', 42 | truncation=True, 43 | max_length=512, 44 | return_tensors='pt' 45 | ) 46 | 47 | targets = self.tokenizer.encode_plus( 48 | target_text, 49 | padding='max_length', 50 | truncation=True, 51 | max_length=32, 52 | return_tensors='pt' 53 | ) 54 | 55 | return { 56 | 'input_ids': inputs['input_ids'].squeeze(), 57 | 'attention_mask': inputs['attention_mask'].squeeze(), 58 | 'target_ids': targets['input_ids'].squeeze(), 59 | 'target_attention_mask': targets['attention_mask'].squeeze() 60 | } 61 | 62 | def main(): 63 | 64 | dataset=load_dataset('squad') 65 | 66 | sample_percentage=1 67 | 68 | dataset['train'] = dataset['train'].shuffle() 69 | num_samples = int(len(dataset['train']) * sample_percentage) 70 | train_set = dataset['train'].select(range(num_samples)) 71 | 72 | 73 | squad_dataset=SQuADDataset(train_set) 74 | 75 | model_path='./fine_tuned_t5_model_aaqg' 76 | tokenizer_path='./fine_tuned_t5_tokenizer_aaqg' 77 | 78 | model = T5ForConditionalGeneration.from_pretrained(model_path) 79 | tokenizer = T5Tokenizer.from_pretrained(tokenizer_path) 80 | 81 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 82 | model.to(device) 83 | 84 | batch_size=32 85 | lr=1e-4 86 | epochs=15 87 | 88 | dataloader=DataLoader(squad_dataset,batch_size=batch_size, shuffle=True) 89 | 90 | optimizer=AdamW(model.parameters(),lr=lr) 91 | scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) 92 | 93 | checkpoint_interval=1800 94 | 95 | start_time=time() 96 | START_TIME=time() 97 | 98 | for epoch in range(epochs): 99 | model.train() 100 | total_loss=0 101 | 102 | for batch_idx, batch in enumerate(tqdm(dataloader,desc=f'Epoch {epoch}')): 103 | input_ids = batch['input_ids'].to(device) 104 | attention_mask = batch['attention_mask'].to(device) 105 | target_ids = batch['target_ids'].to(device) 106 | target_attention_mask = batch['target_attention_mask'].to(device) 107 | 108 | outputs = model( 109 | input_ids=input_ids, 110 | attention_mask=attention_mask, 111 | labels=target_ids, 112 | decoder_attention_mask=target_attention_mask, 113 | return_dict=True 114 | ) 115 | 116 | loss=outputs.loss 117 | total_loss+=loss.item() 118 | optimizer.zero_grad() 119 | loss.backward() 120 | optimizer.step() 121 | 122 | curr_time=time() 123 | elapsed_time=curr_time-start_time 124 | if elapsed_time>=checkpoint_interval: 125 | start_time=time() 126 | checkpoint={ 127 | 'model_state_dict': model.state_dict(), 128 | 'optimizer_state_dict': optimizer.state_dict(), 129 | 'epoch': epoch, 130 | 'batch_idx': batch_idx, 131 | 'time': curr_time-START_TIME 132 | } 133 | try: 134 | if os.path.exists('./checkpoint/pt'): 135 | os.remove('./checkpoint.pt') 136 | torch.save(checkpoint, './checkpoint.pt') 137 | print(f'Checkpoint at {(curr_time-START_TIME)/60} minutes saved!') 138 | except Exception as e: 139 | print("Error while saving checkpoint ",e) 140 | 141 | scheduler.step() 142 | avg_loss = total_loss / len(dataloader) 143 | print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss}") 144 | 145 | model.save_pretrained('./fine_tuned_t5_model_aaqg') 146 | tokenizer.save_pretrained('./fine_tuned_t5_tokenizer_aaqg') 147 | 148 | 149 | if __name__=="__main__": 150 | main() 151 | 152 | -------------------------------------------------------------------------------- /Model_training/KeyPhrase Detection/keyphrase-detection-T5.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from datasets import load_dataset" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "tags": [] 19 | }, 20 | "outputs": [ 21 | { 22 | "name": "stderr", 23 | "output_type": "stream", 24 | "text": [ 25 | "Found cached dataset kp20k (/home/studio-lab-user/.cache/huggingface/datasets/taln-ls2n___kp20k/raw/0.0.1/19886675d0423305a027d83fee3bdd63042317de8eceff75a0b79fd8d8380375)\n" 26 | ] 27 | }, 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "BuilderConfig(name='raw', version=0.0.1, data_dir=None, data_files=None, description='This part of my dataset covers the raw data')\n" 33 | ] 34 | }, 35 | { 36 | "data": { 37 | "application/vnd.jupyter.widget-view+json": { 38 | "model_id": "bc8dd2799eeb40599372ad285b8f65c4", 39 | "version_major": 2, 40 | "version_minor": 0 41 | }, 42 | "text/plain": [ 43 | " 0%| | 0/3 [00:00\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mtotal_loss\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 247 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "checkpoint_interval=1800 #Checkpoint every 30 minutes\n", 253 | "start_time=time()\n", 254 | "\n", 255 | "for epoch in range(epochs):\n", 256 | " model.train()\n", 257 | " total_loss = 0\n", 258 | " for batch_idx,batch in enumerate(tqdm(dataloader,desc=f'Epoch {epoch}')):\n", 259 | " input_ids = batch['input_ids'].to(device)\n", 260 | " attention_mask = batch['attention_mask'].to(device)\n", 261 | " target_ids = batch['target_ids'].to(device)\n", 262 | " target_attention_mask = batch['target_attention_mask'].to(device)\n", 263 | "\n", 264 | " outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids, decoder_attention_mask=target_attention_mask)\n", 265 | "\n", 266 | " loss = outputs.loss\n", 267 | " total_loss += loss.item()\n", 268 | "\n", 269 | " optimizer.zero_grad()\n", 270 | " loss.backward()\n", 271 | " optimizer.step()\n", 272 | " \n", 273 | " curr_time=time()\n", 274 | " elapsed_time=curr_time-start_time\n", 275 | " start_time=time()\n", 276 | " if elapsed_time >=checkpoint_interval:\n", 277 | " checkpoint = {\n", 278 | " 'model_state_dict': model.state_dict(),\n", 279 | " 'optimizer_state_dict': optimizer.state_dict(),\n", 280 | " 'epoch': epoch,\n", 281 | " 'batch_idx': batch_idx,\n", 282 | " 'time': elapsed_time\n", 283 | " }\n", 284 | " \n", 285 | " torch.save(checkpoint, './checkpoint.pt')\n", 286 | " print(f'Checkpoint at {(elapsed_time)/60} minutes saved!')\n", 287 | " \n", 288 | "\n", 289 | " scheduler.step()\n", 290 | "\n", 291 | " avg_loss = total_loss / len(dataloader)\n", 292 | " print(f\"Epoch {epoch+1}/{epochs} - Loss: {avg_loss}\")\n", 293 | "\n", 294 | "model.save_pretrained(\"./fine_tuned_t5_model_kp\")\n", 295 | "tokenizer.save_pretrained(\"./fine_tuned_t5_tokenizer_kp\")" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [] 304 | } 305 | ], 306 | "metadata": { 307 | "kernelspec": { 308 | "display_name": "default:Python", 309 | "language": "python", 310 | "name": "conda-env-default-py" 311 | }, 312 | "language_info": { 313 | "codemirror_mode": { 314 | "name": "ipython", 315 | "version": 3 316 | }, 317 | "file_extension": ".py", 318 | "mimetype": "text/x-python", 319 | "name": "python", 320 | "nbconvert_exporter": "python", 321 | "pygments_lexer": "ipython3", 322 | "version": "3.9.16" 323 | } 324 | }, 325 | "nbformat": 4, 326 | "nbformat_minor": 4 327 | } 328 | -------------------------------------------------------------------------------- /Model_training/KeyPhrase Detection/keyphrase-detection-T5.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | import torch 3 | from torch.utils.data import Dataset 4 | from torch.utils.data import DataLoader 5 | from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW 6 | from tqdm import tqdm 7 | from time import time 8 | 9 | 10 | class HuggingFaceDataset(Dataset): 11 | def __init__(self,dataset): 12 | self.dataset=dataset 13 | def __getitem__(self,index): 14 | item=self.dataset[index] 15 | # ID=item['id'] 16 | # title=item['title'] 17 | # abstract=item['abstract'] 18 | # keyphrases=item['keyphrases'] 19 | # return ID,title,abstract,keyphrases 20 | return item 21 | def __len__(self): 22 | return len(self.dataset) 23 | class kp_data(HuggingFaceDataset): 24 | 25 | def __init__(self,dataset,tokenizer): 26 | super().__init__(dataset) 27 | self.tokenizer=tokenizer 28 | 29 | def __getitem__(self,idx): 30 | item=super().__getitem__(idx) 31 | abstract=item['abstract'] 32 | keyphrases=item['keyphrases'] 33 | 34 | input_text=f'detect keyword: abstract:{abstract}' 35 | 36 | target_text=f'{", ".join(keyphrases)}' 37 | 38 | input_ids=self.tokenizer.encode(input_text, truncation=True, padding='max_length', max_length=512, return_tensors='pt')[0] 39 | target_ids=self.tokenizer.encode(target_text,truncation=True, padding='max_length', max_length=32, return_tensors='pt')[0] 40 | 41 | return {'input_ids': input_ids, 'attention_mask': input_ids.ne(0), 'target_ids':target_ids, 'target_attention_mask': target_ids.ne(0)} 42 | 43 | 44 | def main(): 45 | 46 | dataset = load_dataset("taln-ls2n/kp20k") 47 | sample_percentage=0.25 48 | 49 | dataset['train'] = dataset['train'].shuffle() 50 | num_samples = int(len(dataset['train']) * sample_percentage) 51 | train_set = dataset['train'].select(range(num_samples)) 52 | 53 | model_name='t5-base' 54 | 55 | tokenizer=T5Tokenizer.from_pretrained(model_name) 56 | kp_dataset=kp_data(train_set,tokenizer) 57 | 58 | model=T5ForConditionalGeneration.from_pretrained(model_name) 59 | epochs=5 60 | batch_size=8 61 | learning_rate=2e-5 62 | dataloader=DataLoader(kp_dataset,batch_size=batch_size,shuffle=True) 63 | device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') 64 | 65 | model.to(device) 66 | optimizer=AdamW(model.parameters(),lr=learning_rate) 67 | scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) 68 | 69 | checkpoint_interval=1800 #Checkpoint every 30 minutes 70 | start_time=time() 71 | 72 | for epoch in range(epochs): 73 | model.train() 74 | total_loss = 0 75 | for batch_idx,batch in enumerate(tqdm(dataloader,desc=f'Epoch {epoch}')): 76 | input_ids = batch['input_ids'].to(device) 77 | attention_mask = batch['attention_mask'].to(device) 78 | target_ids = batch['target_ids'].to(device) 79 | target_attention_mask = batch['target_attention_mask'].to(device) 80 | 81 | outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids, decoder_attention_mask=target_attention_mask) 82 | 83 | loss = outputs.loss 84 | total_loss += loss.item() 85 | 86 | optimizer.zero_grad() 87 | loss.backward() 88 | optimizer.step() 89 | 90 | curr_time=time() 91 | elapsed_time=curr_time-start_time 92 | if elapsed_time >=checkpoint_interval: 93 | start_time=time() 94 | checkpoint = { 95 | 'model_state_dict': model.state_dict(), 96 | 'optimizer_state_dict': optimizer.state_dict(), 97 | 'epoch': epoch, 98 | 'batch_idx': batch_idx, 99 | 'time': elapsed_time 100 | } 101 | 102 | torch.save(checkpoint, './checkpoint.pt') 103 | print(f'Checkpoint at {(elapsed_time)/60} minutes saved!') 104 | 105 | 106 | scheduler.step() 107 | avg_loss = total_loss / len(dataloader) 108 | print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss}") 109 | 110 | model.save_pretrained("./fine_tuned_t5_model_kp") 111 | tokenizer.save_pretrained("./fine_tuned_t5_tokenizer_kp") 112 | 113 | if __name__=="__main__": 114 | main() -------------------------------------------------------------------------------- /Model_training/KeyPhrase Detection/keyphrase-detection-test.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | import torch 3 | from torch.utils.data import Dataset 4 | from torch.utils.data import DataLoader 5 | from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW 6 | from tqdm import tqdm 7 | from time import time 8 | 9 | 10 | 11 | class HuggingFaceDataset(Dataset): 12 | def __init__(self,dataset): 13 | self.dataset=dataset 14 | def __getitem__(self,index): 15 | item=self.dataset[index] 16 | # ID=item['id'] 17 | # title=item['title'] 18 | # abstract=item['abstract'] 19 | # keyphrases=item['keyphrases'] 20 | # return ID,title,abstract,keyphrases 21 | return item 22 | def __len__(self): 23 | return len(self.dataset) 24 | class kp_data(HuggingFaceDataset): 25 | 26 | def __init__(self,dataset,tokenizer): 27 | super().__init__(dataset) 28 | self.tokenizer=tokenizer 29 | 30 | def __getitem__(self,idx): 31 | item=super().__getitem__(idx) 32 | abstract=item['abstract'] 33 | keyphrases=item['keyphrases'] 34 | 35 | input_text=f'detect keyword: abstract:{abstract}' 36 | 37 | target_text=f'{", ".join(keyphrases)}' 38 | 39 | input_ids=self.tokenizer.encode(input_text, truncation=True, padding='max_length', max_length=512, return_tensors='pt')[0] 40 | target_ids=self.tokenizer.encode(target_text,truncation=True, padding='max_length', max_length=32, return_tensors='pt')[0] 41 | 42 | return {'input_ids': input_ids, 'attention_mask': input_ids.ne(0), 'target_ids':target_ids, 'target_attention_mask': target_ids.ne(0)} 43 | 44 | def parse_list(L:list[str])->list[list[str]]: 45 | L_new=[] 46 | for i in L: 47 | L_new.append(i.split(',')) 48 | return L_new 49 | 50 | def clean_list(L:list[list[str]]): 51 | for i in range(len(L)): 52 | for j in range(len(L[i])): 53 | L[i][j]=L[i][j].strip().lower() 54 | return L 55 | 56 | def recall_precision(y_true:list[list[str]], y_pred:list[list[str]])->float: 57 | n=len(y_pred) 58 | true_pos=0 59 | prec_denom=0 60 | rec_denom=0 61 | for i in range(n): 62 | preds=set(y_pred[i]) 63 | prec_denom+=len(preds) 64 | truth=set(y_true[i]) 65 | rec_denom+=len(truth) 66 | true_pos+= len(preds&truth) 67 | recall=true_pos/rec_denom 68 | precision=true_pos/prec_denom 69 | return recall,precision 70 | 71 | 72 | def main(): 73 | dataset=load_dataset('taln-ls2n/kp20k') 74 | # sample_percentage=0.1 75 | # dataset['test']=dataset['test'].shuffle() 76 | # num_samples=int(len(dataset['test'])*sample_percentage) 77 | # test_set=dataset['test'].select(range(num_samples)) 78 | test_set=dataset['test'] 79 | tokenizer_path='./fine_tuned_t5_tokenizer_kp' 80 | model_path='./fine_tuned_t5_model_kp' 81 | tokenizer=T5Tokenizer.from_pretrained(tokenizer_path) 82 | kp_test_set=kp_data(test_set,tokenizer) 83 | 84 | device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') 85 | 86 | model=T5ForConditionalGeneration.from_pretrained(model_path) 87 | model.to(device) 88 | batch_size=32 89 | model.eval() 90 | cutoff=10 91 | predicted_keyphrases = [] 92 | target_keyphrases = [] 93 | predicted_keyphrases10=[] 94 | target_keyphrases10= [] 95 | dataloader=DataLoader(kp_test_set,batch_size=batch_size) 96 | with torch.no_grad(): 97 | for batch_idx,batch in enumerate(tqdm(dataloader)): 98 | input_ids=batch['input_ids'].to(device) 99 | attention_mask=batch['attention_mask'].to(device) 100 | target_ids=batch['target_ids'].to(device) 101 | target_attention_mask=batch['target_attention_mask'].to(device) 102 | 103 | outputs=model.generate(input_ids=input_ids,attention_mask=attention_mask,max_length=32) 104 | predicted_phrases = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs] 105 | predicted_keyphrases.extend(predicted_phrases) 106 | # predicted_keyphrases10.extend(predicted_phrases[:cutoff]) 107 | # for i, predicted_phrase in enumerate(predicted_phrases): 108 | # predicted_keyphrases10.append(predicted_phrase) 109 | # if i == cutoff-1: 110 | # break 111 | target_phrases = [tokenizer.decode(target, skip_special_tokens=True) for target in target_ids] 112 | target_keyphrases.extend(target_phrases) 113 | # target_keyphrases10.extend(target_phrases[:cutoff]) 114 | y_true=clean_list(parse_list(target_keyphrases)) 115 | y_pred=clean_list(parse_list(predicted_keyphrases)) 116 | 117 | recall,precision=recall_precision(y_true,y_pred) 118 | 119 | print("Recall: ",recall) 120 | print("Precision: ", precision) 121 | 122 | 123 | 124 | if __name__=="__main__": 125 | main() -------------------------------------------------------------------------------- /Model_training/KeyPhrase Detection/test_checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "c:\\Users\\prara\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", 13 | " from .autonotebook import tqdm as notebook_tqdm\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import torch \n", 19 | "from transformers import T5Tokenizer, T5ForConditionalGeneration" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "tokenizer=T5Tokenizer.from_pretrained('./fine_tuned_t5_tokenizer_kp/')\n", 29 | "model=T5ForConditionalGeneration.from_pretrained('./fine_tuned_t5_model_kp/')" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 6, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "def generate_keyphrases(abstract, model,tokenizer):\n", 39 | " device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", 40 | " input_text=f'detect keyword: abstract: {abstract}'\n", 41 | " input_ids=tokenizer.encode(input_text, truncation=True,padding='max_length',max_length=512,return_tensors='pt').to(device)\n", 42 | " output=model.generate(input_ids)\n", 43 | " return tokenizer.decode(output[0],skip_special_tokens=True)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "abstract=\"\"\"\n", 53 | "Mitochondria are double-membraned organelles found in most eukaryotic cells. They are often referred to as the \"powerhouses\" of the cell because they generate most of the cell's energy in the form of adenosine triphosphate (ATP). Mitochondria play a crucial role in cellular respiration, which is the process by which cells convert nutrients into usable energy.\n", 54 | "The structure of mitochondria consists of an outer membrane, which surrounds the entire organelle, and an inner membrane that is highly folded to form structures called cristae. The inner membrane encloses the mitochondrial matrix, which contains enzymes and DNA molecules necessary for various metabolic reactions.\n", 55 | "One of the primary functions of mitochondria is to carry out aerobic respiration, a process that uses oxygen to break down glucose and other organic molecules, releasing energy in the form of ATP. This process occurs in the inner membrane of the mitochondria, specifically in the electron transport chain and the citric acid cycle.\n", 56 | "Apart from energy production, mitochondria have other important roles in the cell. They are involved in the regulation of cellular metabolism, calcium signaling, and apoptosis (programmed cell death). Mitochondria also contain their own DNA, known as mitochondrial DNA (mtDNA), which is separate from the nuclear DNA found in the cell's nucleus.\n", 57 | "\n", 58 | "It's worth noting that while mitochondria are present in most eukaryotic cells, certain cell types may have varying numbers of mitochondria depending on their energy requirements. For example, muscle cells and liver cells often contain a higher number of mitochondria due to their high energy demands.\n", 59 | "\n", 60 | "\"\"\"" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 7, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stderr", 70 | "output_type": "stream", 71 | "text": [ 72 | "c:\\Users\\prara\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\generation\\utils.py:1346: UserWarning: Using `max_length`'s default (20) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n", 73 | " warnings.warn(\n" 74 | ] 75 | }, 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "'mitochondria, apoptosis, dna, dna,'" 80 | ] 81 | }, 82 | "execution_count": 7, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "generate_keyphrases(abstract,model,tokenizer)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [] 97 | } 98 | ], 99 | "metadata": { 100 | "kernelspec": { 101 | "display_name": "Python 3", 102 | "language": "python", 103 | "name": "python3" 104 | }, 105 | "language_info": { 106 | "codemirror_mode": { 107 | "name": "ipython", 108 | "version": 3 109 | }, 110 | "file_extension": ".py", 111 | "mimetype": "text/x-python", 112 | "name": "python", 113 | "nbconvert_exporter": "python", 114 | "pygments_lexer": "ipython3", 115 | "version": "3.11.3" 116 | }, 117 | "orig_nbformat": 4 118 | }, 119 | "nbformat": 4, 120 | "nbformat_minor": 2 121 | } 122 | -------------------------------------------------------------------------------- /Model_training/KeyPhrase Detection/train_checkpoint.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | import torch 3 | from torch.utils.data import Dataset 4 | from torch.utils.data import DataLoader 5 | from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW 6 | from tqdm import tqdm 7 | from time import time 8 | 9 | 10 | class HuggingFaceDataset(Dataset): 11 | def __init__(self,dataset): 12 | self.dataset=dataset 13 | def __getitem__(self,index): 14 | item=self.dataset[index] 15 | # ID=item['id'] 16 | # title=item['title'] 17 | # abstract=item['abstract'] 18 | # keyphrases=item['keyphrases'] 19 | # return ID,title,abstract,keyphrases 20 | return item 21 | def __len__(self): 22 | return len(self.dataset) 23 | class kp_data(HuggingFaceDataset): 24 | 25 | def __init__(self,dataset,tokenizer): 26 | super().__init__(dataset) 27 | self.tokenizer=tokenizer 28 | 29 | def __getitem__(self,idx): 30 | item=super().__getitem__(idx) 31 | abstract=item['abstract'] 32 | keyphrases=item['keyphrases'] 33 | 34 | input_text=f'detect keyword: abstract:{abstract}' 35 | 36 | target_text=f'{", ".join(keyphrases)}' 37 | 38 | input_ids=self.tokenizer.encode(input_text, truncation=True, padding='max_length', max_length=512, return_tensors='pt')[0] 39 | target_ids=self.tokenizer.encode(target_text,truncation=True, padding='max_length', max_length=32, return_tensors='pt')[0] 40 | 41 | return {'input_ids': input_ids, 'attention_mask': input_ids.ne(0), 'target_ids':target_ids, 'target_attention_mask': target_ids.ne(0)} 42 | 43 | 44 | def main(): 45 | 46 | dataset=load_dataset("taln-ls2n/kp20k") 47 | sample_percentage=0.25 48 | 49 | dataset['train'] = dataset['train'].shuffle() 50 | num_samples = int(len(dataset['train']) * sample_percentage) 51 | train_set = dataset['train'].select(range(num_samples)) 52 | 53 | checkpoint=torch.load('checkpoint.pt',map_location=torch.device('cpu')) 54 | 55 | model_state_dict=checkpoint['model_state_dict'] 56 | optimizer_state_dict=checkpoint['optimizer_state_dict'] 57 | chk_epoch=checkpoint['epoch'] 58 | batch_idx = checkpoint['batch_idx'] 59 | elapsed_time = checkpoint['time'] 60 | 61 | model_path='./fine_tuned_t5_model_kp' 62 | tokenizer_path='./fine_tuned_t5_tokenizer_kp' 63 | 64 | tokenizer=T5Tokenizer.from_pretrained(tokenizer_path) 65 | kp_dataset=kp_data(train_set,tokenizer) 66 | 67 | model=T5ForConditionalGeneration.from_pretrained(model_path) 68 | 69 | model.load_state_dict(model_state_dict) 70 | 71 | epochs=10 72 | batch_size=32 73 | learning_rate=2e-5 74 | dataloader=DataLoader(kp_dataset,batch_size=batch_size,shuffle=True) 75 | device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') 76 | 77 | model.to(device) 78 | optimizer=AdamW(model.parameters(), lr=learning_rate) 79 | scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) 80 | optimizer.load_state_dict(optimizer_state_dict) 81 | 82 | checkpoint_interval=1800 83 | start_time=time() 84 | 85 | for epoch in range(chk_epoch,epochs): 86 | model.train() 87 | total_loss=0 88 | for batch_idx,batch in enumerate(tqdm(dataloader,desc=f'Epoch {epoch}')): 89 | input_ids = batch['input_ids'].to(device) 90 | attention_mask = batch['attention_mask'].to(device) 91 | target_ids = batch['target_ids'].to(device) 92 | target_attention_mask = batch['target_attention_mask'].to(device) 93 | 94 | outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids, decoder_attention_mask=target_attention_mask) 95 | 96 | loss = outputs.loss 97 | total_loss += loss.item() 98 | 99 | optimizer.zero_grad() 100 | loss.backward() 101 | optimizer.step() 102 | 103 | curr_time=time() 104 | elapsed_time=curr_time-start_time 105 | if elapsed_time >=checkpoint_interval: 106 | start_time=time() 107 | checkpoint = { 108 | 'model_state_dict': model.state_dict(), 109 | 'optimizer_state_dict': optimizer.state_dict(), 110 | 'epoch': epoch, 111 | 'batch_idx': batch_idx, 112 | 'time': elapsed_time 113 | } 114 | 115 | torch.save(checkpoint, './checkpoint.pt') 116 | print(f'Checkpoint at {(elapsed_time)/60} minutes saved!') 117 | 118 | 119 | scheduler.step() 120 | avg_loss = total_loss / len(dataloader) 121 | print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss}") 122 | 123 | model.save_pretrained("./fine_tuned_t5_model_kp") 124 | tokenizer.save_pretrained("./fine_tuned_t5_tokenizer_kp") 125 | 126 | 127 | if __name__=="__main__": 128 | main() 129 | 130 | z -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EduAid: AI Quiz Generation 🚀 2 | 3 | Online learning has taken the front seat in the post-pandemic age. With the advent of sophisticated AI architectures like Transformers, it is only natural that AI would find its way into education. Learning online via platforms like YouTube or MOOCs is often a method of self-learning. The biggest obstacle faced by students in self-learning is the lack of attention span. An online tool that can generate short quizzes from input educational content can be a great resource for both teachers and students. It helps retain important information, frame questions, and quickly revise large chunks of content. 4 | 5 | EduAid is one such project currently available in the form of a browser extension. 6 | 7 | ## Installation and Setup 8 | 9 | ### 1. Clone the Repository 10 | 11 | ```bash 12 | git clone https://github.com/AOSSIE-Org/EduAid.git 13 | cd EduAid 14 | ``` 15 | ## 2. Backend Setup 16 | 17 | You can choose to set up the backend manually or use an automated shell script. 18 | 19 | ### Option 1: Manual Setup 20 | 21 | 1. **Download the Sense2Vec Model**: 22 | - Download the Sense2Vec model from [this link](https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz) and extract the contents into the `backend` folder. 23 | 24 | 2. **Install Python Dependencies**: 25 | - Navigate to the root repository folder and run the following command to install the required Python dependencies: 26 | ```bash 27 | pip install -r requirements.txt 28 | ``` 29 | 30 | 3. **Run Flask App**: 31 | - Navigate to the `backend` folder and start the Flask app: 32 | ```bash 33 | python server.py 34 | ``` 35 | - This will activate the backend for the application. 36 | 37 | ### Option 2: Automated Setup with Shell Script 38 | 39 | 1. **Run the Setup Script**: 40 | - Navigate to the `backend` folder and run the following shell script: 41 | ```bash 42 | ./script.sh 43 | ``` 44 | - This script will automatically download and extract the Sense2Vec model, install Python dependencies, and start the Flask app. 45 | 46 | ### Troubleshooting 47 | 48 | - If the script fails to run, ensure that you have execution permissions: 49 | ```bash 50 | chmod +x script.sh 51 | 52 | 53 | ### 3. Configure Google APIs 54 | 55 | #### Google Docs API 56 | 57 | 1. Navigate to the `backend` folder. 58 | 2. Open the `service_account_key.json` file. 59 | 3. Enter the service account details for the Google Docs API. 60 | 4. Refer to the [Google Docs API documentation](https://developers.google.com/docs/api/reference/rest) for more details. 61 | 62 | #### Google Forms API 63 | 64 | 1. Open the `credentials.json` file in the `backend` folder. 65 | 2. Enter the necessary credentials for the Google Forms API. 66 | 3. Refer to the [Google Forms API quickstart guide](https://developers.google.com/forms/api/quickstart/python#set_up_your_environment) for setup instructions. 67 | 68 | ### 4. Extension Setup 69 | 70 | #### Install Dependencies 71 | 72 | Navigate to the `extension` folder and install the required dependencies: 73 | 74 | ```bash 75 | npm install 76 | ``` 77 | #### Build the Project 78 | 79 | Build the extension: 80 | 81 | ```bash 82 | npm run build 83 | ``` 84 | #### Load the Extension in Chrome 85 | 86 | 1. Open Chrome and navigate to `chrome://extensions/`. 87 | 2. Enable "Developer mode" (top-right corner). 88 | 3. Click on "Load Unpacked" and select the `dist` folder created in the previous step. 89 | 90 | #### EduAid Web App 91 | In addition to the browser extension, EduAid also offers a web app that provides the same powerful features for quiz generation. The web app allows you to access EduAid's capabilities directly from your browser without needing to install any extensions. Just start the backend server locally and: 92 | 93 | 1. Navigate to the Web App Directory: 94 | `cd eduaid_web` 95 | 2. Install Dependencies: 96 | `npm install` 97 | 3. Start the Web App: 98 | `npm run start` 99 | 100 | ## Features 101 | 102 | 1. **Dynamic Question Generation**: 103 | - **Boolean Questions**: Quickly generate engaging true/false questions. 104 | - **Multiple-Choice Questions (MCQ)**: Create diverse MCQs with up to 4 options for comprehensive quizzes. 105 | - **Single Correct Answer Questions**: Formulate questions with one clear correct answer. 106 | - **Customizable Question Count**: Tailor the number of questions to your needs—just select the type, set the number, and hit "Generate" to see your quiz come to life! 107 | 108 | 2. **Quiz History at Your Fingertips**: 109 | - **Last 5 Quizzes**: Instantly access and review the last 5 quizzes you've generated. No more losing track—your quiz history is always just a click away! 110 | 111 | 3. **Smart Answer Generator**: 112 | - **Automatic Answers**: Seamlessly generate answers for various question types. Toggle the switch on the Get Started page to enable or disable this feature. 113 | - **MCQ Answer Magic**: For MCQs, provide the options and let the tool generate the perfect answers for you. 114 | 115 | 4. **Wiki-Based Quiz Generation**: 116 | - **Topic-Based Quizzes**: Missing text content for a topic? Toggle the switch in the bottom right corner of the Question Generator page to create a quiz based on the topic using external knowledge sources. 117 | 118 | 5. **Flexible Quiz Input**: 119 | - **File Parsing**: Upload `.txt`, `.docx`, or `.pdf` files to easily extract content for quiz creation. 120 | - **Google Docs Integration**: Use the open shareable link from Google Docs to generate quizzes directly from your documents. 121 | 122 | 6. **Enhanced Quiz Visibility**: 123 | - **SidePanel View**: Enjoy an organized and enhanced view of your generated quizzes right in the SidePanel. 124 | 125 | 7. **Editable Forms**: 126 | - **PDF Forms**: Generate editable PDF forms based on your quizzes. 127 | - **Google Forms**: Create Google Forms for your quizzes, perfect for easy distribution and response collection. 128 | 129 | ## How to contribute 130 | 131 | This is the second year of the project. While some may have their own ideas on how to contribute, for the newcomers to the repository, you may follow the following steps: 132 | 133 | 1. First get to know the organization and the project by visiting the [Official Website](https://github.com/AOSSIE-Org) 134 | 135 | 2. Visit the [Discord Channel](https://discord.com/channels/1022871757289422898/1073262393670504589) for interacting with the community! 136 | 137 | 138 | -------------------------------------------------------------------------------- /Testing/data/README.md: -------------------------------------------------------------------------------- 1 | Save the PDFs you wish to read in this directory -------------------------------------------------------------------------------- /Testing/generate_qa.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from transformers import T5ForConditionalGeneration, T5Tokenizer 3 | from transformers import pipeline 4 | import argparse 5 | from transformers import BartForConditionalGeneration, BartTokenizer 6 | import PyPDF2 7 | import os 8 | 9 | 10 | model_A='../Model_training/KeyPhrase Detection/fine_tuned_t5_model_kp/' 11 | 12 | model_A_tokenizer='../Model_training/KeyPhrase Detection/fine_tuned_t5_tokenizer_kp/' 13 | 14 | model_B='../Model_training/AnswerAwareQG/fine_tuned_t5_model_aaqg/' 15 | 16 | model_B_tokenizer='../Model_training/AnswerAwareQG/fine_tuned_t5_tokenizer_aaqg/' 17 | 18 | summarizer_model='facebook/bart-large-cnn' 19 | 20 | DATA_DIR='./data' 21 | 22 | def parse_arguments(): 23 | parser=argparse.ArgumentParser() 24 | 25 | parser.add_argument('--file_name','-f', help='Name of the PDF file which contains the data') 26 | parser.add_argument('--num_pages', '-n', help='Number of pages of the PDF to use', type=int, default=5) 27 | parser.add_argument('--start_page', type=int, help='Where to start reading the PDF from', default=1) 28 | parser.add_argument('--save_dir', '-s', help='Directory where the generated QnA is to be saved', default='./qna') 29 | parser.add_argument('--save_as', help='Name of the file to save as (without extension)') 30 | 31 | args=parser.parse_args() 32 | return args 33 | 34 | def generate_question(context,answer,model_path, tokenizer_path): 35 | model = T5ForConditionalGeneration.from_pretrained(model_path) 36 | tokenizer = T5Tokenizer.from_pretrained(tokenizer_path) 37 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 38 | model.to(device) 39 | 40 | input_text=f'answer: {answer} context: {context}' 41 | 42 | inputs=tokenizer.encode_plus( 43 | input_text, 44 | padding='max_length', 45 | truncation=True, 46 | max_length=512, 47 | return_tensors='pt' 48 | ) 49 | 50 | input_ids=inputs['input_ids'].to(device) 51 | attention_mask=inputs['attention_mask'].to(device) 52 | 53 | with torch.no_grad(): 54 | output=model.generate( 55 | input_ids=input_ids, 56 | attention_mask=attention_mask, 57 | max_length=32 58 | ) 59 | 60 | generated_question = tokenizer.decode(output[0], skip_special_tokens=True) 61 | return generated_question 62 | 63 | def generate_keyphrases(abstract, model_path,tokenizer_path): 64 | device= torch.device('cuda' if torch.cuda.is_available() else 'cpu') 65 | model = T5ForConditionalGeneration.from_pretrained(model_path) 66 | tokenizer = T5Tokenizer.from_pretrained(tokenizer_path) 67 | model.to(device) 68 | # tokenizer.to(device) 69 | input_text=f'detect keyword: abstract: {abstract}' 70 | input_ids=tokenizer.encode(input_text, truncation=True,padding='max_length',max_length=512,return_tensors='pt').to(device) 71 | output=model.generate(input_ids) 72 | keyphrases= tokenizer.decode(output[0],skip_special_tokens=True).split(',') 73 | return [x.strip() for x in keyphrases if x != ''] 74 | 75 | 76 | def summarize_text(input_text,model_name): 77 | # Load the pre-trained model and tokenizer 78 | model = BartForConditionalGeneration.from_pretrained(model_name) 79 | tokenizer = BartTokenizer.from_pretrained(model_name) 80 | 81 | # Tokenize the input text 82 | input_ids = tokenizer.encode(input_text, truncation=True, max_length=1024, return_tensors="pt") 83 | 84 | # Generate the summary 85 | summary_ids = model.generate(input_ids, max_length=100) 86 | summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) 87 | 88 | return summary 89 | 90 | def summarize_pdf(pdf_path:str, model_name:str, pages:int, start_page:int): 91 | with open(pdf_path, 'rb') as f: 92 | pdf=PyPDF2.PdfReader(f) 93 | text='' 94 | 95 | for page_num in range(start_page,start_page+pages): 96 | page=pdf.pages[page_num] 97 | text+=page.extract_text() 98 | 99 | summary= summarize_text(text,model_name) 100 | 101 | return summary 102 | 103 | def main(): 104 | 105 | global model_A, model_A_tokenizer, model_B, model_B_tokenizer, summarizer_model, DATA_DIR 106 | args=parse_arguments() 107 | 108 | pdf_path=os.path.join(DATA_DIR,f'{args.file_name}.pdf') if not args.file_name.endswith('.pdf') else os.path.join(DATA_DIR,args.file_name) 109 | 110 | pdf_summary=summarize_pdf(pdf_path,summarizer_model,args.num_pages,args.start_page) 111 | 112 | answers=generate_keyphrases(pdf_summary,model_A, model_A_tokenizer) 113 | questions=[] 114 | for answer in answers: 115 | question=generate_question(pdf_summary,answer,model_B, model_B_tokenizer) 116 | 117 | questions.append(question) 118 | 119 | for i,qna in enumerate(zip(questions,answers)): 120 | print(f'Question {i+1}: {qna[0]}' ) 121 | print(f'Answer {i+1}: {qna[1]}') 122 | print('-------------------------------------------------------------------------------') 123 | 124 | save_file=os.path.join(args.save_dir,f'{args.save_as}.txt') 125 | 126 | with open(save_file, 'w') as f: 127 | for i,qna in enumerate(zip(questions,answers)): 128 | f.write(f'Question {i+1}: {qna[0]}\n') 129 | f.write(f'Answer {i+1}: {qna[1]}\n') 130 | f.write('\n') 131 | 132 | if __name__=='__main__': 133 | main() 134 | 135 | 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /Testing/qna/README.md: -------------------------------------------------------------------------------- 1 | The generated qna text file will be stored here -------------------------------------------------------------------------------- /backend/Generator/__init__.py: -------------------------------------------------------------------------------- 1 | # Constructor for questgen 2 | from __future__ import absolute_import 3 | from Generator.main import MCQGenerator, BoolQGenerator, ShortQGenerator, AnswerPredictor, GoogleDocsService, FileProcessor, QuestionGenerator 4 | -------------------------------------------------------------------------------- /backend/Generator/encoding.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from transformers import T5ForConditionalGeneration,T5Tokenizer 3 | 4 | 5 | def greedy_decoding (inp_ids,attn_mask,model,tokenizer): 6 | greedy_output = model.generate(input_ids=inp_ids, attention_mask=attn_mask, max_length=256) 7 | Question = tokenizer.decode(greedy_output[0], skip_special_tokens=True,clean_up_tokenization_spaces=True) 8 | return Question.strip().capitalize() 9 | 10 | 11 | def beam_search_decoding (inp_ids,attn_mask,model,tokenizer,num): 12 | beam_output = model.generate(input_ids=inp_ids, 13 | attention_mask=attn_mask, 14 | max_length=256, 15 | num_beams=10, 16 | num_return_sequences=num, 17 | no_repeat_ngram_size=2, 18 | early_stopping=True 19 | ) 20 | Questions = [tokenizer.decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True) for out in 21 | beam_output] 22 | return [Question.strip().capitalize() for Question in Questions] 23 | 24 | 25 | def topkp_decoding (inp_ids,attn_mask,model,tokenizer): 26 | topkp_output = model.generate(input_ids=inp_ids, 27 | attention_mask=attn_mask, 28 | max_length=256, 29 | do_sample=True, 30 | top_k=40, 31 | top_p=0.80, 32 | num_return_sequences=3, 33 | no_repeat_ngram_size=2, 34 | early_stopping=True 35 | ) 36 | Questions = [tokenizer.decode(out, skip_special_tokens=True,clean_up_tokenization_spaces=True) for out in topkp_output] 37 | return [Question.strip().capitalize() for Question in Questions] 38 | -------------------------------------------------------------------------------- /backend/Generator/mcq.py: -------------------------------------------------------------------------------- 1 | import string 2 | import nltk 3 | import pke 4 | import torch 5 | from nltk.tokenize import sent_tokenize 6 | from flashtext import KeywordProcessor 7 | from nltk.corpus import stopwords 8 | from sense2vec import Sense2Vec 9 | from similarity.normalized_levenshtein import NormalizedLevenshtein 10 | 11 | nltk.download('brown') 12 | nltk.download('stopwords') 13 | nltk.download('popular') 14 | 15 | def is_word_available(word, s2v_model): 16 | word = word.replace(" ", "_") 17 | sense = s2v_model.get_best_sense(word) 18 | if sense is not None: 19 | return True 20 | else: 21 | return False 22 | 23 | def generate_word_variations(word): 24 | letters = 'abcdefghijklmnopqrstuvwxyz ' + string.punctuation 25 | splits = [(word[:i], word[i:]) for i in range(len(word) + 1)] 26 | deletes = [L + R[1:] for L, R in splits if R] 27 | transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R) > 1] 28 | replaces = [L + c + R[1:] for L, R in splits if R for c in letters] 29 | inserts = [L + c + R for L, R in splits for c in letters] 30 | return set(deletes + transposes + replaces + inserts) 31 | 32 | def find_similar_words(word, s2v_model): 33 | output = [] 34 | word_preprocessed = word.translate(word.maketrans("", "", string.punctuation)) 35 | word_preprocessed = word_preprocessed.lower() 36 | 37 | word_variations = generate_word_variations(word_preprocessed) 38 | 39 | word = word.replace(" ", "_") 40 | 41 | sense = s2v_model.get_best_sense(word) 42 | most_similar = s2v_model.most_similar(sense, n=15) 43 | 44 | compare_list = [word_preprocessed] 45 | for each_word in most_similar: 46 | append_word = each_word[0].split("|")[0].replace("_", " ") 47 | append_word = append_word.strip() 48 | append_word_processed = append_word.lower() 49 | append_word_processed = append_word_processed.translate(word.maketrans("", "", string.punctuation)) 50 | if append_word_processed not in compare_list and word_preprocessed not in append_word_processed and append_word_processed not in word_variations: 51 | output.append(append_word.title()) 52 | compare_list.append(append_word_processed) 53 | 54 | out = list(dict.fromkeys(output)) 55 | return out 56 | 57 | def get_answer_choices(answer, s2v_model): 58 | choices = [] 59 | 60 | try: 61 | choices = find_similar_words(answer, s2v_model) 62 | if len(choices) > 0: 63 | print("Generated choices successfully for word:", answer) 64 | return choices, "sense2vec" 65 | except Exception as e: 66 | print(f"Failed to generate choices for word: {answer}. Error: {e}") 67 | 68 | return choices, "None" 69 | 70 | def tokenize_into_sentences(text): 71 | sentences = [sent_tokenize(text)] 72 | sentences = [y for x in sentences for y in x] 73 | sentences = [sentence.strip() for sentence in sentences if len(sentence) > 20] 74 | return sentences 75 | 76 | def find_sentences_with_keywords(keywords, sentences): 77 | keyword_processor = KeywordProcessor() 78 | keyword_sentences = {} 79 | for word in keywords: 80 | word = word.strip() 81 | keyword_sentences[word] = [] 82 | keyword_processor.add_keyword(word) 83 | for sentence in sentences: 84 | keywords_found = keyword_processor.extract_keywords(sentence) 85 | for key in keywords_found: 86 | keyword_sentences[key].append(sentence) 87 | 88 | for key in keyword_sentences.keys(): 89 | values = keyword_sentences[key] 90 | values = sorted(values, key=len, reverse=True) 91 | keyword_sentences[key] = values 92 | 93 | delete_keys = [k for k, v in keyword_sentences.items() if len(v) == 0] 94 | for del_key in delete_keys: 95 | del keyword_sentences[del_key] 96 | 97 | return keyword_sentences 98 | 99 | def are_words_distant(words_list, current_word, threshold, normalized_levenshtein): 100 | score_list = [normalized_levenshtein.distance(word.lower(), current_word.lower()) for word in words_list] 101 | return min(score_list) >= threshold 102 | 103 | def filter_useful_phrases(phrase_keys, max_count, normalized_levenshtein): 104 | filtered_phrases = [] 105 | if phrase_keys: 106 | filtered_phrases.append(phrase_keys[0]) 107 | for ph in phrase_keys[1:]: 108 | if are_words_distant(filtered_phrases, ph, 0.7, normalized_levenshtein): 109 | filtered_phrases.append(ph) 110 | if len(filtered_phrases) >= max_count: 111 | break 112 | return filtered_phrases 113 | 114 | def extract_noun_phrases(text): 115 | out = [] 116 | extractor = pke.unsupervised.MultipartiteRank() 117 | extractor.load_document(input=text, language='en') 118 | pos = {'PROPN', 'NOUN'} 119 | stoplist = list(string.punctuation) 120 | stoplist += stopwords.words('english') 121 | extractor.candidate_selection(pos=pos) 122 | try: 123 | extractor.candidate_weighting(alpha=1.1, threshold=0.75, method='average') 124 | except Exception as e: 125 | print(f"Error in candidate weighting: {e}") 126 | return out 127 | 128 | keyphrases = extractor.get_n_best(n=10) 129 | out = [key[0] for key in keyphrases] 130 | return out 131 | 132 | def extract_phrases_from_doc(doc): 133 | phrases = {} 134 | for np in doc.noun_chunks: 135 | phrase = np.text 136 | len_phrase = len(phrase.split()) 137 | if len_phrase > 1: 138 | if phrase not in phrases: 139 | phrases[phrase] = 1 140 | else: 141 | phrases[phrase] += 1 142 | 143 | phrase_keys = list(phrases.keys()) 144 | phrase_keys = sorted(phrase_keys, key=lambda x: len(x), reverse=True) 145 | phrase_keys = phrase_keys[:50] 146 | return phrase_keys 147 | 148 | def identify_keywords(nlp_model, text, max_keywords, s2v_model, fdist, normalized_levenshtein, num_sentences): 149 | doc = nlp_model(text) 150 | max_keywords = int(max_keywords) 151 | 152 | keywords = extract_noun_phrases(text) 153 | keywords = sorted(keywords, key=lambda x: fdist[x]) 154 | keywords = filter_useful_phrases(keywords, max_keywords, normalized_levenshtein) 155 | 156 | phrase_keys = extract_phrases_from_doc(doc) 157 | filtered_phrases = filter_useful_phrases(phrase_keys, max_keywords, normalized_levenshtein) 158 | 159 | total_phrases = keywords + filtered_phrases 160 | 161 | total_phrases_filtered = filter_useful_phrases(total_phrases, min(max_keywords, 2 * num_sentences), normalized_levenshtein) 162 | 163 | answers = [] 164 | for answer in total_phrases_filtered: 165 | if answer not in answers and is_word_available(answer, s2v_model): 166 | answers.append(answer) 167 | 168 | answers = answers[:max_keywords] 169 | return answers 170 | 171 | def generate_multiple_choice_questions(keyword_sent_mapping, device, tokenizer, model, sense2vec_model, normalized_levenshtein): 172 | batch_text = [] 173 | answers = keyword_sent_mapping.keys() 174 | for answer in answers: 175 | txt = keyword_sent_mapping[answer] 176 | context = "context: " + txt 177 | text = context + " " + "answer: " + answer + " " 178 | batch_text.append(text) 179 | 180 | encoding = tokenizer.batch_encode_plus(batch_text, pad_to_max_length=True, return_tensors="pt") 181 | 182 | print("Generating questions using the model...") 183 | input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device) 184 | 185 | with torch.no_grad(): 186 | outputs = model.generate(input_ids=input_ids, 187 | attention_mask=attention_masks, 188 | max_length=150) 189 | 190 | generated_questions = [] 191 | for index, answer in enumerate(answers): 192 | out = outputs[index, :] 193 | decoded_question = tokenizer.decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True) 194 | 195 | question_statement = decoded_question.replace("question:", "").strip() 196 | options, options_algorithm = get_answer_choices(answer, sense2vec_model) 197 | options = filter_useful_phrases(options, 10, normalized_levenshtein) 198 | extra_options = options[3:] 199 | options = options[:3] 200 | 201 | question_data = { 202 | "question_statement": question_statement, 203 | "question_type": "MCQ", 204 | "answer": answer, 205 | "id": index + 1, 206 | "options": options, 207 | "options_algorithm": options_algorithm, 208 | "extra_options": extra_options, 209 | "context": keyword_sent_mapping[answer] 210 | } 211 | 212 | generated_questions.append(question_data) 213 | 214 | return {"questions": generated_questions} 215 | 216 | def generate_normal_questions(keyword_sent_mapping, device, tokenizer, model): 217 | batch_text = [] 218 | answers = keyword_sent_mapping.keys() 219 | 220 | for answer in answers: 221 | txt = keyword_sent_mapping[answer] 222 | context = "context: " + txt 223 | text = context + " " + "answer: " + answer + " " 224 | batch_text.append(text) 225 | 226 | encoding = tokenizer.batch_encode_plus(batch_text, pad_to_max_length=True, return_tensors="pt") 227 | 228 | print("Running model for generation...") 229 | input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device) 230 | 231 | with torch.no_grad(): 232 | outs = model.generate(input_ids=input_ids, 233 | attention_mask=attention_masks, 234 | max_length=150) 235 | 236 | output_array = {"questions": []} 237 | 238 | for index, val in enumerate(answers): 239 | individual_quest = {} 240 | out = outs[index, :] 241 | dec = tokenizer.decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True) 242 | 243 | Question = dec.replace('question:', '') 244 | Question = Question.strip() 245 | 246 | individual_quest['Question'] = Question 247 | individual_quest['Answer'] = val 248 | individual_quest["id"] = index + 1 249 | individual_quest["context"] = keyword_sent_mapping[val] 250 | 251 | output_array["questions"].append(individual_quest) 252 | 253 | return output_array 254 | -------------------------------------------------------------------------------- /backend/script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | REPO_URL="https://github.com/AOSSIE-Org/EduAid.git" 4 | S2V_URL="https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz" 5 | REPO_DIR="EduAid" 6 | S2V_ARCHIVE="s2v_reddit_2015_md.tar.gz" 7 | S2V_DIR="s2v_old" 8 | 9 | if [ ! -d "venv" ]; then 10 | python3 -m venv venv 11 | fi 12 | source venv/bin/activate 13 | 14 | if [ ! -d "$REPO_DIR" ]; then 15 | git clone $REPO_URL 16 | fi 17 | 18 | if [ ! -f "$S2V_ARCHIVE" ]; then 19 | wget $S2V_URL -O $S2V_ARCHIVE 20 | fi 21 | 22 | if [ ! -d "$REPO_DIR/$S2V_DIR" ]; then 23 | mkdir -p $REPO_DIR/$S2V_DIR 24 | tar -xzvf $S2V_ARCHIVE -C $REPO_DIR/$S2V_DIR --strip-components=1 25 | fi 26 | 27 | # Deactivate virtual environment after completion 28 | source deactivate -------------------------------------------------------------------------------- /backend/server.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | from flask_cors import CORS 3 | from pprint import pprint 4 | import nltk 5 | import subprocess 6 | import os 7 | import glob 8 | 9 | from sklearn.metrics.pairwise import cosine_similarity 10 | from sklearn.feature_extraction.text import TfidfVectorizer 11 | nltk.download("stopwords") 12 | nltk.download('punkt_tab') 13 | from Generator import main 14 | import re 15 | import json 16 | import spacy 17 | from transformers import pipeline 18 | from spacy.lang.en.stop_words import STOP_WORDS 19 | from string import punctuation 20 | from heapq import nlargest 21 | import random 22 | import webbrowser 23 | from apiclient import discovery 24 | from httplib2 import Http 25 | from oauth2client import client, file, tools 26 | from mediawikiapi import MediaWikiAPI 27 | 28 | app = Flask(__name__) 29 | CORS(app) 30 | print("Starting Flask App...") 31 | 32 | SERVICE_ACCOUNT_FILE = './service_account_key.json' 33 | SCOPES = ['https://www.googleapis.com/auth/documents.readonly'] 34 | 35 | MCQGen = main.MCQGenerator() 36 | answer = main.AnswerPredictor() 37 | BoolQGen = main.BoolQGenerator() 38 | ShortQGen = main.ShortQGenerator() 39 | qg = main.QuestionGenerator() 40 | docs_service = main.GoogleDocsService(SERVICE_ACCOUNT_FILE, SCOPES) 41 | file_processor = main.FileProcessor() 42 | mediawikiapi = MediaWikiAPI() 43 | qa_model = pipeline("question-answering") 44 | 45 | 46 | def process_input_text(input_text, use_mediawiki): 47 | if use_mediawiki == 1: 48 | input_text = mediawikiapi.summary(input_text,8) 49 | return input_text 50 | 51 | 52 | @app.route("/get_mcq", methods=["POST"]) 53 | def get_mcq(): 54 | data = request.get_json() 55 | input_text = data.get("input_text", "") 56 | use_mediawiki = data.get("use_mediawiki", 0) 57 | max_questions = data.get("max_questions", 4) 58 | input_text = process_input_text(input_text, use_mediawiki) 59 | output = MCQGen.generate_mcq( 60 | {"input_text": input_text, "max_questions": max_questions} 61 | ) 62 | questions = output["questions"] 63 | return jsonify({"output": questions}) 64 | 65 | 66 | @app.route("/get_boolq", methods=["POST"]) 67 | def get_boolq(): 68 | data = request.get_json() 69 | input_text = data.get("input_text", "") 70 | use_mediawiki = data.get("use_mediawiki", 0) 71 | max_questions = data.get("max_questions", 4) 72 | input_text = process_input_text(input_text, use_mediawiki) 73 | output = BoolQGen.generate_boolq( 74 | {"input_text": input_text, "max_questions": max_questions} 75 | ) 76 | boolean_questions = output["Boolean_Questions"] 77 | return jsonify({"output": boolean_questions}) 78 | 79 | 80 | @app.route("/get_shortq", methods=["POST"]) 81 | def get_shortq(): 82 | data = request.get_json() 83 | input_text = data.get("input_text", "") 84 | use_mediawiki = data.get("use_mediawiki", 0) 85 | max_questions = data.get("max_questions", 4) 86 | input_text = process_input_text(input_text, use_mediawiki) 87 | output = ShortQGen.generate_shortq( 88 | {"input_text": input_text, "max_questions": max_questions} 89 | ) 90 | questions = output["questions"] 91 | return jsonify({"output": questions}) 92 | 93 | 94 | @app.route("/get_problems", methods=["POST"]) 95 | def get_problems(): 96 | data = request.get_json() 97 | input_text = data.get("input_text", "") 98 | use_mediawiki = data.get("use_mediawiki", 0) 99 | max_questions_mcq = data.get("max_questions_mcq", 4) 100 | max_questions_boolq = data.get("max_questions_boolq", 4) 101 | max_questions_shortq = data.get("max_questions_shortq", 4) 102 | input_text = process_input_text(input_text, use_mediawiki) 103 | output1 = MCQGen.generate_mcq( 104 | {"input_text": input_text, "max_questions": max_questions_mcq} 105 | ) 106 | output2 = BoolQGen.generate_boolq( 107 | {"input_text": input_text, "max_questions": max_questions_boolq} 108 | ) 109 | output3 = ShortQGen.generate_shortq( 110 | {"input_text": input_text, "max_questions": max_questions_shortq} 111 | ) 112 | return jsonify( 113 | {"output_mcq": output1, "output_boolq": output2, "output_shortq": output3} 114 | ) 115 | 116 | @app.route("/get_mcq_answer", methods=["POST"]) 117 | def get_mcq_answer(): 118 | data = request.get_json() 119 | input_text = data.get("input_text", "") 120 | input_questions = data.get("input_question", []) 121 | input_options = data.get("input_options", []) 122 | outputs = [] 123 | 124 | if not input_questions or not input_options or len(input_questions) != len(input_options): 125 | return jsonify({"outputs": outputs}) 126 | 127 | for question, options in zip(input_questions, input_options): 128 | # Generate answer using the QA model 129 | qa_response = qa_model(question=question, context=input_text) 130 | generated_answer = qa_response["answer"] 131 | 132 | # Calculate similarity between generated answer and each option 133 | options_with_answer = options + [generated_answer] 134 | vectorizer = TfidfVectorizer().fit_transform(options_with_answer) 135 | vectors = vectorizer.toarray() 136 | generated_answer_vector = vectors[-1].reshape(1, -1) 137 | 138 | similarities = cosine_similarity(vectors[:-1], generated_answer_vector).flatten() 139 | max_similarity_index = similarities.argmax() 140 | 141 | # Return the option with the highest similarity 142 | best_option = options[max_similarity_index] 143 | 144 | outputs.append(best_option) 145 | 146 | return jsonify({"output": outputs}) 147 | 148 | 149 | @app.route("/get_shortq_answer", methods=["POST"]) 150 | def get_answer(): 151 | data = request.get_json() 152 | input_text = data.get("input_text", "") 153 | input_questions = data.get("input_question", []) 154 | answers = [] 155 | for question in input_questions: 156 | qa_response = qa_model(question=question, context=input_text) 157 | answers.append(qa_response["answer"]) 158 | 159 | return jsonify({"output": answers}) 160 | 161 | 162 | @app.route("/get_boolean_answer", methods=["POST"]) 163 | def get_boolean_answer(): 164 | data = request.get_json() 165 | input_text = data.get("input_text", "") 166 | input_questions = data.get("input_question", []) 167 | output = [] 168 | 169 | for question in input_questions: 170 | qa_response = answer.predict_boolean_answer( 171 | {"input_text": input_text, "input_question": question} 172 | ) 173 | if(qa_response): 174 | output.append("True") 175 | else: 176 | output.append("False") 177 | 178 | return jsonify({"output": output}) 179 | 180 | 181 | @app.route('/get_content', methods=['POST']) 182 | def get_content(): 183 | try: 184 | data = request.get_json() 185 | document_url = data.get('document_url') 186 | if not document_url: 187 | return jsonify({'error': 'Document URL is required'}), 400 188 | 189 | text = docs_service.get_document_content(document_url) 190 | return jsonify(text) 191 | except ValueError as e: 192 | return jsonify({'error': str(e)}), 400 193 | except Exception as e: 194 | return jsonify({'error': str(e)}), 500 195 | 196 | 197 | @app.route("/generate_gform", methods=["POST"]) 198 | def generate_gform(): 199 | data = request.get_json() 200 | qa_pairs = data.get("qa_pairs", "") 201 | question_type = data.get("question_type", "") 202 | SCOPES = "https://www.googleapis.com/auth/forms.body" 203 | DISCOVERY_DOC = "https://forms.googleapis.com/$discovery/rest?version=v1" 204 | 205 | store = file.Storage("token.json") 206 | creds = None 207 | if not creds or creds.invalid: 208 | flow = client.flow_from_clientsecrets("credentials.json", SCOPES) 209 | creds = tools.run_flow(flow, store) 210 | 211 | form_service = discovery.build( 212 | "forms", 213 | "v1", 214 | http=creds.authorize(Http()), 215 | discoveryServiceUrl=DISCOVERY_DOC, 216 | static_discovery=False, 217 | ) 218 | NEW_FORM = { 219 | "info": { 220 | "title": "EduAid form", 221 | } 222 | } 223 | requests_list = [] 224 | 225 | if question_type == "get_shortq": 226 | for index, qapair in enumerate(qa_pairs): 227 | requests = { 228 | "createItem": { 229 | "item": { 230 | "title": qapair["question"], 231 | "questionItem": { 232 | "question": { 233 | "required": True, 234 | "textQuestion": {}, 235 | } 236 | }, 237 | }, 238 | "location": {"index": index}, 239 | } 240 | } 241 | requests_list.append(requests) 242 | elif question_type == "get_mcq": 243 | for index, qapair in enumerate(qa_pairs): 244 | # Extract and filter the options 245 | options = qapair.get("options", []) 246 | valid_options = [ 247 | opt for opt in options if opt 248 | ] # Filter out empty or None options 249 | 250 | # Ensure the answer is included in the choices 251 | choices = [qapair["answer"]] + valid_options[ 252 | :3 253 | ] # Include up to the first 3 options 254 | 255 | # Randomize the order of the choices 256 | random.shuffle(choices) 257 | 258 | # Prepare the request structure 259 | choices_list = [{"value": choice} for choice in choices] 260 | 261 | requests = { 262 | "createItem": { 263 | "item": { 264 | "title": qapair["question"], 265 | "questionItem": { 266 | "question": { 267 | "required": True, 268 | "choiceQuestion": { 269 | "type": "RADIO", 270 | "options": choices_list, 271 | }, 272 | } 273 | }, 274 | }, 275 | "location": {"index": index}, 276 | } 277 | } 278 | 279 | requests_list.append(requests) 280 | elif question_type == "get_boolq": 281 | for index, qapair in enumerate(qa_pairs): 282 | choices_list = [ 283 | {"value": "True"}, 284 | {"value": "False"}, 285 | ] 286 | requests = { 287 | "createItem": { 288 | "item": { 289 | "title": qapair["question"], 290 | "questionItem": { 291 | "question": { 292 | "required": True, 293 | "choiceQuestion": { 294 | "type": "RADIO", 295 | "options": choices_list, 296 | }, 297 | } 298 | }, 299 | }, 300 | "location": {"index": index}, 301 | } 302 | } 303 | 304 | requests_list.append(requests) 305 | else: 306 | for index, qapair in enumerate(qa_pairs): 307 | if "options" in qapair and qapair["options"]: 308 | options = qapair["options"] 309 | valid_options = [ 310 | opt for opt in options if opt 311 | ] # Filter out empty or None options 312 | choices = [qapair["answer"]] + valid_options[ 313 | :3 314 | ] # Include up to the first 3 options 315 | random.shuffle(choices) 316 | choices_list = [{"value": choice} for choice in choices] 317 | question_structure = { 318 | "choiceQuestion": { 319 | "type": "RADIO", 320 | "options": choices_list, 321 | } 322 | } 323 | elif "answer" in qapair: 324 | question_structure = {"textQuestion": {}} 325 | else: 326 | question_structure = { 327 | "choiceQuestion": { 328 | "type": "RADIO", 329 | "options": [ 330 | {"value": "True"}, 331 | {"value": "False"}, 332 | ], 333 | } 334 | } 335 | 336 | requests = { 337 | "createItem": { 338 | "item": { 339 | "title": qapair["question"], 340 | "questionItem": { 341 | "question": { 342 | "required": True, 343 | **question_structure, 344 | } 345 | }, 346 | }, 347 | "location": {"index": index}, 348 | } 349 | } 350 | requests_list.append(requests) 351 | 352 | NEW_QUESTION = {"requests": requests_list} 353 | 354 | result = form_service.forms().create(body=NEW_FORM).execute() 355 | form_service.forms().batchUpdate( 356 | formId=result["formId"], body=NEW_QUESTION 357 | ).execute() 358 | 359 | edit_url = jsonify(result["responderUri"]) 360 | webbrowser.open_new_tab( 361 | "https://docs.google.com/forms/d/" + result["formId"] + "/edit" 362 | ) 363 | return edit_url 364 | 365 | 366 | @app.route("/get_shortq_hard", methods=["POST"]) 367 | def get_shortq_hard(): 368 | data = request.get_json() 369 | input_text = data.get("input_text", "") 370 | use_mediawiki = data.get("use_mediawiki", 0) 371 | input_text = process_input_text(input_text,use_mediawiki) 372 | input_questions = data.get("input_question", []) 373 | output = qg.generate( 374 | article=input_text, num_questions=input_questions, answer_style="sentences" 375 | ) 376 | return jsonify({"output": output}) 377 | 378 | 379 | @app.route("/get_mcq_hard", methods=["POST"]) 380 | def get_mcq_hard(): 381 | data = request.get_json() 382 | input_text = data.get("input_text", "") 383 | use_mediawiki = data.get("use_mediawiki", 0) 384 | input_text = process_input_text(input_text,use_mediawiki) 385 | input_questions = data.get("input_question", []) 386 | output = qg.generate( 387 | article=input_text, num_questions=input_questions, answer_style="multiple_choice" 388 | ) 389 | return jsonify({"output": output}) 390 | 391 | @app.route('/upload', methods=['POST']) 392 | def upload_file(): 393 | if 'file' not in request.files: 394 | return jsonify({"error": "No file part"}), 400 395 | 396 | file = request.files['file'] 397 | 398 | if file.filename == '': 399 | return jsonify({"error": "No selected file"}), 400 400 | 401 | content = file_processor.process_file(file) 402 | 403 | if content: 404 | return jsonify({"content": content}) 405 | else: 406 | return jsonify({"error": "Unsupported file type or error processing file"}), 400 407 | 408 | @app.route("/", methods=["GET"]) 409 | def hello(): 410 | return "The server is working fine" 411 | 412 | def clean_transcript(file_path): 413 | """Extracts and cleans transcript from a VTT file.""" 414 | with open(file_path, "r", encoding="utf-8") as file: 415 | lines = file.readlines() 416 | 417 | transcript_lines = [] 418 | skip_metadata = True # Skip lines until we reach actual captions 419 | 420 | for line in lines: 421 | line = line.strip() 422 | 423 | # Skip metadata lines like "Kind: captions" or "Language: en" 424 | if line.lower().startswith(("kind:", "language:", "webvtt")): 425 | continue 426 | 427 | # Detect timestamps like "00:01:23.456 --> 00:01:25.789" 428 | if "-->" in line: 429 | skip_metadata = False # Now real captions start 430 | continue 431 | 432 | if not skip_metadata: 433 | # Remove formatting tags like ... and <00:00:00.000> 434 | line = re.sub(r"<[^>]+>", "", line) 435 | transcript_lines.append(line) 436 | 437 | return " ".join(transcript_lines).strip() 438 | 439 | @app.route('/getTranscript', methods=['GET']) 440 | def get_transcript(): 441 | video_id = request.args.get('videoId') 442 | if not video_id: 443 | return jsonify({"error": "No video ID provided"}), 400 444 | 445 | subprocess.run(["yt-dlp", "--write-auto-sub", "--sub-lang", "en", "--skip-download", 446 | "--sub-format", "vtt", "-o", f"subtitles/{video_id}.vtt", f"https://www.youtube.com/watch?v={video_id}"], 447 | check=True, capture_output=True, text=True) 448 | 449 | # Find the latest .vtt file in the "subtitles" folder 450 | subtitle_files = glob.glob("subtitles/*.vtt") 451 | if not subtitle_files: 452 | return jsonify({"error": "No subtitles found"}), 404 453 | 454 | latest_subtitle = max(subtitle_files, key=os.path.getctime) 455 | transcript_text = clean_transcript(latest_subtitle) 456 | 457 | # Optional: Clean up the file after reading 458 | os.remove(latest_subtitle) 459 | 460 | return jsonify({"transcript": transcript_text}) 461 | 462 | if __name__ == "__main__": 463 | os.makedirs("subtitles", exist_ok=True) 464 | app.run() 465 | -------------------------------------------------------------------------------- /backend/service_account_key.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "service_account", 3 | "project_id": "your-project-id", 4 | "private_key_id": "your-private-key-id", 5 | "private_key": "-----BEGIN PRIVATE KEY-----\nYOUR_PRIVATE_KEY_HERE\n-----END PRIVATE KEY-----\n", 6 | "client_email": "your-client-email@your-project-id.iam.gserviceaccount.com", 7 | "client_id": "your-client-id", 8 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 9 | "token_uri": "https://oauth2.googleapis.com/token", 10 | "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", 11 | "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/your-client-email%40your-project-id.iam.gserviceaccount.com", 12 | "universe_domain": "googleapis.com" 13 | } 14 | -------------------------------------------------------------------------------- /backend/test_server.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | 4 | BASE_URL = 'http://localhost:5000' 5 | 6 | # Shared input text for all endpoints 7 | input_text = ''' 8 | Artificial intelligence (AI) is the simulation of human intelligence processes 9 | by machines, especially computer systems. These processes include learning 10 | (the acquisition of information and rules for using the information), reasoning 11 | (using rules to reach approximate or definite conclusions), and self-correction. 12 | 13 | AI applications include speech recognition, natural language processing, 14 | machine vision, expert systems, and robotics. Machine learning, a subset of AI, 15 | focuses on the development of algorithms that can learn from and make predictions 16 | or decisions based on data. 17 | 18 | Deep learning, a technique within machine learning, involves neural networks 19 | with many layers (hence the term "deep"). It has revolutionized AI by enabling 20 | complex pattern recognition and data processing tasks. 21 | 22 | Ethical considerations in AI include issues of bias in algorithms, privacy concerns 23 | with data collection, and the impact of AI on jobs and society as a whole. 24 | ''' 25 | 26 | def test_get_mcq(): 27 | endpoint = '/get_mcq' 28 | data = { 29 | 'input_text': input_text, 30 | 'max_questions': 5 31 | } 32 | response = make_post_request(endpoint, data) 33 | print(f'/get_mcq Response: {response}') 34 | assert 'output' in response 35 | 36 | def test_get_boolq(): 37 | endpoint = '/get_boolq' 38 | data = { 39 | 'input_text': input_text, 40 | 'max_questions': 3 41 | } 42 | response = make_post_request(endpoint, data) 43 | print(f'/get_boolq Response: {response}') 44 | assert 'output' in response 45 | 46 | def test_get_shortq(): 47 | endpoint = '/get_shortq' 48 | data = { 49 | 'input_text': input_text, 50 | 'max_questions': 4 51 | } 52 | response = make_post_request(endpoint, data) 53 | print(f'/get_shortq Response: {response}') 54 | assert 'output' in response 55 | 56 | def test_get_problems(): 57 | endpoint = '/get_problems' 58 | data = { 59 | 'input_text': input_text, 60 | 'max_questions_mcq': 3, 61 | 'max_questions_boolq': 2, 62 | 'max_questions_shortq': 4 63 | } 64 | response = make_post_request(endpoint, data) 65 | print(f'/get_problems Response: {response}') 66 | assert 'output_mcq' in response 67 | assert 'output_boolq' in response 68 | assert 'output_shortq' in response 69 | 70 | def test_root(): 71 | endpoint = '/' 72 | response = requests.get(f'{BASE_URL}{endpoint}') 73 | print(f'Root Endpoint Response: {response.text}') 74 | assert response.status_code == 200 75 | 76 | def test_get_answer(): 77 | endpoint = '/get_answer' 78 | data = { 79 | 'input_text': input_text, 80 | 'input_question': [ 81 | "What is artificial intelligence?", 82 | "What does AI include?", 83 | "What is deep learning?", 84 | "What are the ethical considerations in AI?" 85 | ] 86 | } 87 | response = make_post_request(endpoint, data) 88 | print(f'/get_answer Response: {response}') 89 | assert 'output' in response 90 | 91 | def test_get_boolean_answer(): 92 | endpoint = '/get_boolean_answer' 93 | data = { 94 | 'input_text': input_text, 95 | 'input_question': [ 96 | "Artificial intelligence is the simulation of human intelligence.", 97 | "Deep learning does not involve neural networks.", 98 | "AI applications do not include speech recognition." 99 | ] 100 | } 101 | response = make_post_request(endpoint, data) 102 | print(f'/get_boolean_answer Response: {response}') 103 | assert 'output' in response 104 | 105 | def make_post_request(endpoint, data): 106 | url = f'{BASE_URL}{endpoint}' 107 | headers = {'Content-Type': 'application/json'} 108 | response = requests.post(url, headers=headers, data=json.dumps(data)) 109 | return response.json() 110 | 111 | if __name__ == '__main__': 112 | test_get_mcq() 113 | test_get_boolq() 114 | test_get_shortq() 115 | test_get_problems() 116 | test_root() 117 | test_get_answer() 118 | test_get_boolean_answer() 119 | -------------------------------------------------------------------------------- /eduaid_web/.env.example: -------------------------------------------------------------------------------- 1 | REACT_APP_BASE_URL=http://localhost:5000 2 | -------------------------------------------------------------------------------- /eduaid_web/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | -------------------------------------------------------------------------------- /eduaid_web/README.md: -------------------------------------------------------------------------------- 1 | # Getting Started with Create React App 2 | 3 | This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app). 4 | 5 | ## Available Scripts 6 | 7 | In the project directory, you can run: 8 | 9 | ### `npm start` 10 | 11 | Runs the app in the development mode.\ 12 | Open [http://localhost:3000](http://localhost:3000) to view it in your browser. 13 | 14 | The page will reload when you make changes.\ 15 | You may also see any lint errors in the console. 16 | 17 | Rename the `.env.example` file to `.env` after setting up the project. 18 | 19 | ### `npm test` 20 | 21 | Launches the test runner in the interactive watch mode.\ 22 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. 23 | 24 | ### `npm run build` 25 | 26 | Builds the app for production to the `build` folder.\ 27 | It correctly bundles React in production mode and optimizes the build for the best performance. 28 | 29 | The build is minified and the filenames include the hashes.\ 30 | Your app is ready to be deployed! 31 | 32 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information. 33 | 34 | ### `npm run eject` 35 | 36 | **Note: this is a one-way operation. Once you `eject`, you can't go back!** 37 | 38 | If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project. 39 | 40 | Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own. 41 | 42 | You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it. 43 | 44 | ## Learn More 45 | 46 | You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started). 47 | 48 | To learn React, check out the [React documentation](https://reactjs.org/). 49 | 50 | ### Code Splitting 51 | 52 | This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting) 53 | 54 | ### Analyzing the Bundle Size 55 | 56 | This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size) 57 | 58 | ### Making a Progressive Web App 59 | 60 | This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app) 61 | 62 | ### Advanced Configuration 63 | 64 | This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration) 65 | 66 | ### Deployment 67 | 68 | This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment) 69 | 70 | ### `npm run build` fails to minify 71 | 72 | This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify) 73 | -------------------------------------------------------------------------------- /eduaid_web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "eduaid_web", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@testing-library/jest-dom": "^5.17.0", 7 | "@testing-library/react": "^13.4.0", 8 | "@testing-library/user-event": "^13.5.0", 9 | "dotenv": "^16.4.7", 10 | "pdf-lib": "^1.17.1", 11 | "react": "^18.3.1", 12 | "react-dom": "^18.3.1", 13 | "react-icons": "^5.2.1", 14 | "react-router-dom": "^6.26.0", 15 | "react-scripts": "5.0.1", 16 | "react-switch": "^7.0.0", 17 | "web-vitals": "^2.1.4" 18 | }, 19 | "scripts": { 20 | "start": "react-scripts start", 21 | "build": "react-scripts build", 22 | "test": "react-scripts test", 23 | "eject": "react-scripts eject" 24 | }, 25 | "eslintConfig": { 26 | "extends": [ 27 | "react-app", 28 | "react-app/jest" 29 | ] 30 | }, 31 | "browserslist": { 32 | "production": [ 33 | ">0.2%", 34 | "not dead", 35 | "not op_mini all" 36 | ], 37 | "development": [ 38 | "last 1 chrome version", 39 | "last 1 firefox version", 40 | "last 1 safari version" 41 | ] 42 | }, 43 | "devDependencies": { 44 | "tailwindcss": "^3.4.9" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /eduaid_web/public/aossie_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AOSSIE-Org/EduAid/1f4ad364024f41289060999ca8ba2fd3ae3741b4/eduaid_web/public/aossie_logo.png -------------------------------------------------------------------------------- /eduaid_web/public/aossie_logo64.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AOSSIE-Org/EduAid/1f4ad364024f41289060999ca8ba2fd3ae3741b4/eduaid_web/public/aossie_logo64.ico -------------------------------------------------------------------------------- /eduaid_web/public/aossie_transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AOSSIE-Org/EduAid/1f4ad364024f41289060999ca8ba2fd3ae3741b4/eduaid_web/public/aossie_transparent.png -------------------------------------------------------------------------------- /eduaid_web/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 17 | 18 | 27 | EduAid 28 | 29 | 30 | 31 |
32 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /eduaid_web/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "EduAid", 3 | "name": "Eduaid : AI Quiz Generator", 4 | "icons": [ 5 | { 6 | "src": "aossie_logo64.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "aossie_logo.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "aossie_logo.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /eduaid_web/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /eduaid_web/src/App.css: -------------------------------------------------------------------------------- 1 | .App { 2 | text-align: center; 3 | } 4 | 5 | .App-logo { 6 | height: 40vmin; 7 | pointer-events: none; 8 | } 9 | 10 | @media (prefers-reduced-motion: no-preference) { 11 | .App-logo { 12 | animation: App-logo-spin infinite 20s linear; 13 | } 14 | } 15 | 16 | .App-header { 17 | background-color: #282c34; 18 | min-height: 100vh; 19 | display: flex; 20 | flex-direction: column; 21 | align-items: center; 22 | justify-content: center; 23 | font-size: calc(10px + 2vmin); 24 | color: white; 25 | } 26 | 27 | .App-link { 28 | color: #61dafb; 29 | } 30 | 31 | @keyframes App-logo-spin { 32 | from { 33 | transform: rotate(0deg); 34 | } 35 | to { 36 | transform: rotate(360deg); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /eduaid_web/src/App.js: -------------------------------------------------------------------------------- 1 | import "./App.css"; 2 | import { BrowserRouter, Routes, Route } from "react-router-dom"; 3 | import Home from "./pages/Home"; 4 | import Question_Type from "./pages/Question_Type"; 5 | import Text_Input from "./pages/Text_Input"; 6 | import Output from "./pages/Output"; 7 | import Previous from "./pages/Previous"; 8 | import NotFound from "./pages/PageNotFound"; 9 | 10 | function App() { 11 | return ( 12 | 13 | 14 | } /> 15 | } /> 16 | } /> 17 | } /> 18 | } /> 19 | } /> 20 | 21 | 22 | ); 23 | } 24 | 25 | export default App; 26 | -------------------------------------------------------------------------------- /eduaid_web/src/App.test.js: -------------------------------------------------------------------------------- 1 | import { render, screen } from '@testing-library/react'; 2 | import App from './App'; 3 | 4 | test('renders learn react link', () => { 5 | render(); 6 | const linkElement = screen.getByText(/learn react/i); 7 | expect(linkElement).toBeInTheDocument(); 8 | }); 9 | -------------------------------------------------------------------------------- /eduaid_web/src/assets/aossie_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AOSSIE-Org/EduAid/1f4ad364024f41289060999ca8ba2fd3ae3741b4/eduaid_web/src/assets/aossie_logo.png -------------------------------------------------------------------------------- /eduaid_web/src/assets/aossie_logo_transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AOSSIE-Org/EduAid/1f4ad364024f41289060999ca8ba2fd3ae3741b4/eduaid_web/src/assets/aossie_logo_transparent.png -------------------------------------------------------------------------------- /eduaid_web/src/assets/arrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AOSSIE-Org/EduAid/1f4ad364024f41289060999ca8ba2fd3ae3741b4/eduaid_web/src/assets/arrow.png -------------------------------------------------------------------------------- /eduaid_web/src/assets/cloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AOSSIE-Org/EduAid/1f4ad364024f41289060999ca8ba2fd3ae3741b4/eduaid_web/src/assets/cloud.png -------------------------------------------------------------------------------- /eduaid_web/src/assets/gitStar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AOSSIE-Org/EduAid/1f4ad364024f41289060999ca8ba2fd3ae3741b4/eduaid_web/src/assets/gitStar.png -------------------------------------------------------------------------------- /eduaid_web/src/assets/open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AOSSIE-Org/EduAid/1f4ad364024f41289060999ca8ba2fd3ae3741b4/eduaid_web/src/assets/open.png -------------------------------------------------------------------------------- /eduaid_web/src/assets/stars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AOSSIE-Org/EduAid/1f4ad364024f41289060999ca8ba2fd3ae3741b4/eduaid_web/src/assets/stars.png -------------------------------------------------------------------------------- /eduaid_web/src/index.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | body { 6 | margin: 0; 7 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 8 | 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', 9 | sans-serif; 10 | -webkit-font-smoothing: antialiased; 11 | -moz-osx-font-smoothing: grayscale; 12 | } 13 | 14 | code { 15 | font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', 16 | monospace; 17 | } 18 | 19 | .border-gradient { 20 | border-width: 2px; 21 | border-style: solid; 22 | border-image: linear-gradient(to right, #ff005c, #7600f2, #00cbe7); 23 | border-image-slice: 1; 24 | border-radius: 2rem; 25 | } -------------------------------------------------------------------------------- /eduaid_web/src/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom/client'; 3 | import './index.css'; 4 | import App from './App'; 5 | import reportWebVitals from './reportWebVitals'; 6 | 7 | const root = ReactDOM.createRoot(document.getElementById('root')); 8 | root.render( 9 | 10 | 11 | 12 | ); 13 | 14 | // If you want to start measuring performance in your app, pass a function 15 | // to log results (for example: reportWebVitals(console.log)) 16 | // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals 17 | reportWebVitals(); 18 | -------------------------------------------------------------------------------- /eduaid_web/src/pages/Home.jsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from "react"; 2 | import "../index.css"; 3 | import logo from "../assets/aossie_logo.png"; 4 | import starsImg from "../assets/stars.png"; 5 | import arrow from "../assets/arrow.png"; 6 | import gitStar from "../assets/gitStar.png"; 7 | import { FaGithub } from "react-icons/fa"; 8 | 9 | const Home = () => { 10 | const [stars, setStars] = useState(null); 11 | const [error, setError] = useState(""); 12 | 13 | async function fetchGitHubStars() { 14 | const response = await fetch( 15 | "https://api.github.com/repos/AOSSIE-Org/EduAid" 16 | ); 17 | if (!response.ok) { 18 | throw new Error("Failed to fetch stars"); 19 | } 20 | const data = await response.json(); 21 | return data.stargazers_count; 22 | } 23 | 24 | function isMoreThanOneDayOld(timestamp) { 25 | const oneDay = 24 * 60 * 60 * 1000; // One day in milliseconds 26 | return Date.now() - timestamp > oneDay; 27 | } 28 | 29 | useEffect(() => { 30 | const storedStars = localStorage.getItem("stars"); 31 | const storedTime = localStorage.getItem("fetchTime"); 32 | if ( 33 | storedStars && 34 | storedTime && 35 | !isMoreThanOneDayOld(parseInt(storedTime)) 36 | ) { 37 | setStars(parseInt(storedStars)); 38 | } else { 39 | fetchGitHubStars() 40 | .then((starCount) => { 41 | setStars(starCount); 42 | localStorage.setItem("stars", starCount); 43 | localStorage.setItem("fetchTime", Date.now().toString()); 44 | }) 45 | .catch((err) => { 46 | setError("Failed to fetch stars"); 47 | }); 48 | } 49 | }, []); 50 | 51 | return ( 52 |
53 |
54 |
55 | logo 56 |
57 | 58 | Edu 59 | 60 | 61 | Aid 62 | 63 |
64 |
65 |
A tool that can auto-generate short quizzes
66 |
67 | based on user input{" "} 68 | 69 |
70 |
71 |
72 |
73 |
74 | 75 |
Doc/Audio Input
76 |
77 |
78 |
79 |
80 | 81 |
In-depth questions gen
82 |
83 |
84 |
85 |
86 | 87 |
88 | Dynamic Google Form Integration 89 |
90 |
91 |
92 |
93 |
94 | 102 | 110 |
111 | 117 |
118 | 119 |
120 | {stars !== null ? ( 121 | 122 | {stars} 123 | 124 | 125 | ) : ( 126 | {error} 127 | )} 128 |
129 |
130 |
131 |
132 |
133 |
134 | ); 135 | }; 136 | 137 | export default Home; 138 | -------------------------------------------------------------------------------- /eduaid_web/src/pages/PageNotFound.jsx: -------------------------------------------------------------------------------- 1 | import React, { useEffect, useState } from 'react'; 2 | import { useNavigate } from 'react-router-dom'; 3 | import "../index.css"; 4 | const NotFound = () => { 5 | const router = useNavigate() 6 | const [countdown, setCountdown] = useState(5); 7 | 8 | useEffect(() => { 9 | const timer = setInterval(() => { 10 | setCountdown((prev) => prev - 1); 11 | }, 1000); 12 | 13 | const redirect = setTimeout(() => { 14 | router('/') 15 | }, 5000); 16 | 17 | return () => { 18 | clearInterval(timer); 19 | clearTimeout(redirect); 20 | }; 21 | }, []); 22 | 23 | return ( 24 |
25 |
26 |

404

27 |

Page Not Found

28 |

29 | Oops! The page you're looking for doesn't exist. 30 |

31 |

32 | Redirecting to home page in {countdown} seconds... 33 |

34 |
35 |
36 | ); 37 | }; 38 | 39 | export default NotFound; -------------------------------------------------------------------------------- /eduaid_web/src/pages/Previous.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import "../index.css"; 3 | import logo from "../assets/aossie_logo.png"; 4 | import stars from "../assets/stars.png"; 5 | import { FaArrowRight } from "react-icons/fa"; 6 | 7 | const Previous = () => { 8 | const getQuizzesFromLocalStorage = () => { 9 | const quizzes = localStorage.getItem("last5Quizzes"); 10 | return quizzes ? JSON.parse(quizzes) : []; 11 | }; 12 | 13 | const [quizzes, setQuizzes] = React.useState(getQuizzesFromLocalStorage()); 14 | 15 | const handleQuizClick = (quiz) => { 16 | localStorage.setItem("qaPairs", JSON.stringify(quiz.qaPair)); 17 | window.location.href = "/output"; 18 | }; 19 | 20 | const handleClearQuizzes = () => { 21 | localStorage.removeItem("last5Quizzes"); 22 | setQuizzes([]); 23 | }; 24 | 25 | const handleBack = () => { 26 | window.location.href = "/"; 27 | }; 28 | 29 | return ( 30 |
31 |
32 | 33 |
34 | logo 35 |
36 | 37 | Edu 38 | 39 | 40 | Aid 41 | 42 |
43 |
44 |
45 |
46 |
Quiz Dashboard
47 |
48 | Your{" "} 49 | 50 | Generated Quizzes 51 | {" "} 52 | stars 53 |
54 |
55 |
56 | 57 | Your Quizzes 58 | {" "} 59 |
60 |
61 | {quizzes.length === 0 ? ( 62 |
63 | No quizzes available 64 |
65 | ) : ( 66 |
    67 | {quizzes.map((quiz, index) => ( 68 |
  • handleQuizClick(quiz)} 72 | > 73 |
    74 |
    75 | {quiz.difficulty} - {quiz.numQuestions} Questions 76 |
    77 |
    {quiz.date}
    78 |
    79 | 80 |
  • 81 | ))} 82 |
83 | )} 84 |
85 |
86 |
87 | 93 |
94 |
95 | 101 |
102 |
103 |
104 |
105 | ); 106 | }; 107 | 108 | export default Previous; 109 | -------------------------------------------------------------------------------- /eduaid_web/src/pages/Question_Type.jsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from "react"; 2 | import "../index.css"; 3 | import logo from "../assets/aossie_logo.png"; 4 | 5 | const Question_Type = () => { 6 | const [selectedOption, setSelectedOption] = useState(null); 7 | 8 | const handleOptionClick = (option) => { 9 | setSelectedOption(option); 10 | }; 11 | 12 | const handleSaveToLocalStorage = () => { 13 | if (selectedOption) { 14 | localStorage.setItem("selectedQuestionType", selectedOption); 15 | } 16 | }; 17 | 18 | return ( 19 |
20 |
21 | 22 |
23 | logo 24 |
25 | 26 | Edu 27 | 28 | 29 | Aid 30 | 31 |
32 |
33 |
34 |
35 | What’s on your Mind? 36 |
37 |
38 | Choose one 39 |
40 |
41 |
handleOptionClick("get_shortq")} 43 | className="flex my-3 items-center w-full max-w-lg cursor-pointer rounded-xl gap-6 px-6 py-6 bg-opacity-50 bg-[#202838]" 44 | > 45 |
52 |
53 | Short-Answer Type Questions 54 |
55 |
56 |
handleOptionClick("get_mcq")} 58 | className="flex my-3 items-center w-full max-w-lg cursor-pointer rounded-xl gap-6 px-6 py-6 bg-opacity-50 bg-[#202838]" 59 | > 60 |
67 |
68 | Multiple Choice Questions 69 |
70 |
71 |
handleOptionClick("get_boolq")} 73 | className="flex my-3 items-center w-full max-w-lg cursor-pointer rounded-xl gap-6 px-6 py-6 bg-opacity-50 bg-[#202838]" 74 | > 75 |
82 |
83 | True/False Questions 84 |
85 |
86 |
handleOptionClick("get_problems")} 88 | className="flex my-3 items-center w-full max-w-lg cursor-pointer rounded-xl gap-6 px-6 py-6 bg-opacity-50 bg-[#202838]" 89 | > 90 |
97 |
All Questions
98 |
99 |
100 |
101 | {selectedOption ? ( 102 | 103 | 109 | 110 | ) : ( 111 | 118 | )} 119 |
120 |
121 |
122 | ); 123 | }; 124 | 125 | export default Question_Type; 126 | -------------------------------------------------------------------------------- /eduaid_web/src/pages/Text_Input.jsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useRef } from "react"; 2 | import "../index.css"; 3 | import logo from "../assets/aossie_logo.png"; 4 | import stars from "../assets/stars.png"; 5 | import cloud from "../assets/cloud.png"; 6 | import { FaClipboard } from "react-icons/fa"; 7 | import Switch from "react-switch"; 8 | 9 | const Text_Input = () => { 10 | const [text, setText] = useState(""); 11 | const [difficulty, setDifficulty] = useState("Easy Difficulty"); 12 | const [numQuestions, setNumQuestions] = useState(10); 13 | const [loading, setLoading] = useState(false); 14 | const fileInputRef = useRef(null); 15 | const [fileContent, setFileContent] = useState(""); 16 | const [docUrl, setDocUrl] = useState(""); 17 | const [isToggleOn, setIsToggleOn] = useState(0); 18 | 19 | const toggleSwitch = () => { 20 | setIsToggleOn((isToggleOn + 1) % 2); 21 | }; 22 | 23 | const handleFileUpload = async (event) => { 24 | const file = event.target.files[0]; 25 | if (file) { 26 | const formData = new FormData(); 27 | formData.append("file", file); 28 | 29 | try { 30 | const response = await fetch(`${process.env.REACT_APP_BASE_URL}/upload`, { 31 | method: "POST", 32 | body: formData, 33 | }); 34 | const data = await response.json(); 35 | setText(data.content || data.error); 36 | } catch (error) { 37 | console.error("Error uploading file:", error); 38 | setText("Error uploading file"); 39 | } 40 | } 41 | }; 42 | 43 | const handleClick = (event) => { 44 | event.preventDefault(); // Prevent default behavior 45 | event.stopPropagation(); // Stop event propagation 46 | 47 | // Open file input dialog 48 | fileInputRef.current.click(); 49 | }; 50 | 51 | const handleSaveToLocalStorage = async () => { 52 | setLoading(true); 53 | 54 | // Check if a Google Doc URL is provided 55 | if (docUrl) { 56 | try { 57 | const response = await fetch(`${process.env.REACT_APP_BASE_URL}/get_content`, { 58 | method: "POST", 59 | headers: { 60 | "Content-Type": "application/json", 61 | }, 62 | body: JSON.stringify({ document_url: docUrl }), 63 | }); 64 | 65 | if (response.ok) { 66 | const data = await response.json(); 67 | setDocUrl(""); 68 | setText(data || "Error in retrieving"); 69 | } else { 70 | console.error("Error retrieving Google Doc content"); 71 | setText("Error retrieving Google Doc content"); 72 | } 73 | } catch (error) { 74 | console.error("Error:", error); 75 | setText("Error retrieving Google Doc content"); 76 | } finally { 77 | setLoading(false); 78 | } 79 | } else if (text) { 80 | // Proceed with existing functionality for local storage 81 | localStorage.setItem("textContent", text); 82 | localStorage.setItem("difficulty", difficulty); 83 | localStorage.setItem("numQuestions", numQuestions); 84 | 85 | await sendToBackend( 86 | text, 87 | difficulty, 88 | localStorage.getItem("selectedQuestionType") 89 | ); 90 | } 91 | }; 92 | 93 | const handleDifficultyChange = (e) => { 94 | setDifficulty(e.target.value); 95 | }; 96 | 97 | const incrementQuestions = () => { 98 | setNumQuestions((prev) => prev + 1); 99 | }; 100 | 101 | const decrementQuestions = () => { 102 | setNumQuestions((prev) => (prev > 0 ? prev - 1 : 0)); 103 | }; 104 | 105 | const getEndpoint = (difficulty, questionType) => { 106 | if (difficulty !== "Easy Difficulty") { 107 | if (questionType === "get_shortq") { 108 | return "get_shortq_hard"; 109 | } else if (questionType === "get_mcq") { 110 | return "get_mcq_hard"; 111 | } 112 | } 113 | return questionType; 114 | }; 115 | 116 | const sendToBackend = async (data, difficulty, questionType) => { 117 | const endpoint = getEndpoint(difficulty, questionType); 118 | try { 119 | const formData = JSON.stringify({ 120 | input_text: data, 121 | max_questions: numQuestions, 122 | use_mediawiki: isToggleOn, 123 | }); 124 | 125 | const response = await fetch(`${process.env.REACT_APP_BASE_URL}/${endpoint}`, { 126 | method: "POST", 127 | body: formData, 128 | headers: { 129 | "Content-Type": "application/json", 130 | }, 131 | }); 132 | 133 | if (response.ok) { 134 | const responseData = await response.json(); 135 | localStorage.setItem("qaPairs", JSON.stringify(responseData)); 136 | 137 | // Save quiz details to local storage 138 | const quizDetails = { 139 | difficulty, 140 | numQuestions, 141 | date: new Date().toLocaleDateString(), 142 | qaPair: responseData, 143 | }; 144 | 145 | let last5Quizzes = 146 | JSON.parse(localStorage.getItem("last5Quizzes")) || []; 147 | last5Quizzes.push(quizDetails); 148 | if (last5Quizzes.length > 5) { 149 | last5Quizzes.shift(); // Keep only the last 5 quizzes 150 | } 151 | localStorage.setItem("last5Quizzes", JSON.stringify(last5Quizzes)); 152 | 153 | window.location.href = "output"; 154 | } else { 155 | console.error("Backend request failed."); 156 | } 157 | } catch (error) { 158 | console.error("Error:", error); 159 | } finally { 160 | setLoading(false); 161 | } 162 | }; 163 | 164 | return ( 165 |
166 | {loading && ( 167 |
168 |
169 |
170 | )} 171 |
176 | 177 |
178 | logo 179 |
180 | 181 | Edu 182 | 183 | 184 | Aid 185 | 186 |
187 |
188 |
189 |
190 |
Enter the Content
191 |
192 | to Generate{" "} 193 | 194 | Questionaries 195 | {" "} 196 | stars 197 |
198 |
199 | 200 |
201 | 204 |