├── .ipynb_checkpoints └── Code-checkpoint.ipynb ├── Code.ipynb ├── NLP Project.pdf ├── README.md └── input.txt /.ipynb_checkpoints/Code-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 26, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Imports\n", 10 | "import nltk\n", 11 | "import nltk.data\n", 12 | "from nltk.stem.lancaster import LancasterStemmer\n", 13 | "from nltk.stem.wordnet import WordNetLemmatizer\n", 14 | "import re\n", 15 | "import spacy\n", 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 9, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# # Setting stanford environment variables\n", 26 | "# os.environ['STANFORD_PARSER'] = '/home/nishant/Downloads/stanford-parser-full-2015-12-09/jars'\n", 27 | "# os.environ['STANFORD_MODELS'] = '/home/nishant/Downloads/stanford-parser-full-2015-12-09/jars'\n", 28 | "# stanford_dir = '/home/nishant/Downloads/stanford-parser-full-2015-12-09'" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 10, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "# Class initializations\n", 38 | "nlp = spacy.load('en_core_web_sm')\n", 39 | "stemmer = LancasterStemmer()\n", 40 | "# parser = stanford.StanfordParser(model_path = '/home/nishant/Downloads/stanford-parser-full-2015-12-09/jars/englishPCFG.ser.gz')\n", 41 | "# parser._classpath = tuple(find_jars_within_path(stanford_dir))" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 11, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# List to hold all input sentences\n", 51 | "sentences = []\n", 52 | "\n", 53 | "# Dictionary to hold sentences corresponding to respective discourse markers\n", 54 | "disc_sentences = {}\n", 55 | "\n", 56 | "# Remaining sentences which do not have discourse markers (To be used later to generate other kinds of questions)\n", 57 | "nondisc_sentences = []\n", 58 | "\n", 59 | "# List of auxiliary verbs\n", 60 | "aux_list = ['am', 'are', 'is', 'was', 'were', 'can', 'could', 'does', 'do', 'did', 'has', 'had', 'may', 'might', 'must', 'need',\n", 61 | " 'ought', 'shall', 'should', 'will', 'would']\n", 62 | "\n", 63 | "# List of all discourse markers\n", 64 | "discourse_markers = ['because', 'as a result', 'since', 'when', 'although', 'for example', 'for instance']\n", 65 | "\n", 66 | "# Different question types possible for each discourse marker\n", 67 | "qtype = {'because': ['Why'], 'since': ['When', 'Why'], 'when': ['When'], 'although': ['Yes/No'], 'as a result': ['Why'], \n", 68 | "'for example': ['Give an example where'], 'for instance': ['Give an instance where'], 'to': ['Why']}\n", 69 | "\n", 70 | "# The argument which forms a question\n", 71 | "target_arg = {'because': 1, 'since': 1, 'when': 1, 'although': 1, 'as a result': 2, 'for example': 1, 'for instance': 1, 'to': 1}\n" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 12, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "[('Sachin', 'NNP'), ('Tendulkar', 'NNP'), ('was', 'VBD'), ('awarded', 'VBN'), ('Bharat', 'NNP'), ('Ratna', 'NNP'), ('in', 'IN'), ('2013', 'CD')]\n", 84 | "learn\n", 85 | "I eat apple\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "# Rough Work\n", 91 | "text = 'Sachin Tendulkar was awarded Bharat Ratna in 2013'\n", 92 | "text = nltk.word_tokenize(text)\n", 93 | "tags = nltk.pos_tag(text)\n", 94 | "print(tags)\n", 95 | "# question_part = 'Sanskar think he felt included'\n", 96 | "# question_part = question_part[:question_part.index(tags[0][0]) + len(tags[0][0])]\n", 97 | "# print(question_part)\n", 98 | "# text = nltk.word_tokenize('I eat apple')\n", 99 | "# tags = nltk.pos_tag(text)\n", 100 | "# print(tags)\n", 101 | "print(stemmer.stem('learned'))\n", 102 | "s = \"I eat apple.\"\n", 103 | "if(s[-1] in ['.', ',']):\n", 104 | " s = s[:-1]\n", 105 | "print(s)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 13, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "# This function is used to tokenize and split into sentences\n", 115 | "def sentensify():\n", 116 | " global sentences\n", 117 | " tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')\n", 118 | " fp = open('input.txt')\n", 119 | " data = fp.read()\n", 120 | " sentences = tokenizer.tokenize(data)\n", 121 | " discourse()\n" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 14, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# Function used to generate the questions from sentences which have already been pre-processed.\n", 131 | "def generate_question(question_part, type):\n", 132 | "\n", 133 | " ''' Tree -> Input tree\n", 134 | " question_part -> Part of input sentence which forms a question\n", 135 | " type-> The type of question (why, where, etc)\n", 136 | " '''\n", 137 | " # Remove full stop and make first letter lower case\n", 138 | " question_part = question_part[0].lower() + question_part[1:]\n", 139 | " if(question_part[-1] == '.' or question_part[-1] == ','):\n", 140 | " question_part = question_part[:-1]\n", 141 | " \n", 142 | " # Capitalizing 'i' since 'I' is recognized by parsers appropriately \n", 143 | " for i in range(0, len(question_part)):\n", 144 | " if(question_part[i] == 'i'):\n", 145 | " if((i == 0 and question_part[i + 1] == ' ') or (question_part[i - 1] == ' ' and question_part[i + 1] == ' ')):\n", 146 | " question_part = question_part[:i] + 'I' + question_part[i + 1: ]\n", 147 | " \n", 148 | " question = \"\"\n", 149 | " if(type == 'Give an example where' or type == 'Give an instance where'):\n", 150 | " question = type + \" \" + question_part + '?'\n", 151 | " return question\n", 152 | "\n", 153 | " aux_verb = False\n", 154 | " res = None\n", 155 | " \n", 156 | " # Find out if auxiliary verb already exists\n", 157 | " for i in range(len(aux_list)):\n", 158 | " if(aux_list[i] in question_part.split()):\n", 159 | " aux_verb = True\n", 160 | " pos = i\n", 161 | " break\n", 162 | "\n", 163 | " # If auxiliary verb exists\n", 164 | " if(aux_verb):\n", 165 | " \n", 166 | " # Tokeninze the part of the sentence from which the question has to be made\n", 167 | " text = nltk.word_tokenize(question_part)\n", 168 | " tags = nltk.pos_tag(text)\n", 169 | " question_part = \"\"\n", 170 | " fP = False\n", 171 | " \n", 172 | " for word, tag in tags:\n", 173 | " if(word in ['I', 'We', 'we']):\n", 174 | " question_part += 'you' + \" \"\n", 175 | " fP = True\n", 176 | " continue\n", 177 | " question_part += word + \" \"\n", 178 | "\n", 179 | " # Split across the auxiliary verb and prepend it at the start of question part\n", 180 | " question = question_part.split(\" \" + aux_list[pos])\n", 181 | " if(fP):\n", 182 | " question = [\"were \"] + question\n", 183 | " else:\n", 184 | " question = [aux_list[pos] + \" \"] + question\n", 185 | "\n", 186 | " # If Yes/No, no need to introduce question phrase\n", 187 | " if(type == 'Yes/No'):\n", 188 | " question += ['?']\n", 189 | " \n", 190 | " elif(type != \"non_disc\"):\n", 191 | " question = [type + \" \"] + question + [\"?\"]\n", 192 | " \n", 193 | " else:\n", 194 | " question = question + [\"?\"]\n", 195 | " \n", 196 | " question = ''.join(question)\n", 197 | "\n", 198 | " # If auxilary verb does ot exist, it can only be some form of verb 'do'\n", 199 | " else:\n", 200 | " aux = None\n", 201 | " text = nltk.word_tokenize(question_part)\n", 202 | " tags = nltk.pos_tag(text)\n", 203 | " comb = \"\"\n", 204 | "\n", 205 | " '''There can be following combinations of nouns and verbs:\n", 206 | " NN/NNP and VBZ -> Does\n", 207 | " NNS/NNPS(plural) and VBP -> Do\n", 208 | " NN/NNP and VBN -> Did\n", 209 | " NNS/NNPS(plural) and VBN -> Did\n", 210 | " '''\n", 211 | " \n", 212 | " for tag in tags:\n", 213 | " if(comb == \"\"):\n", 214 | " if(tag[1] == 'NN' or tag[1] == 'NNP'):\n", 215 | " comb = 'NN'\n", 216 | "\n", 217 | " elif(tag[1] == 'NNS' or tag[1] == 'NNPS'):\n", 218 | " comb = 'NNS'\n", 219 | "\n", 220 | " elif(tag[1] == 'PRP'):\n", 221 | " if tag[0] in ['He','She','It']:\n", 222 | " comb = 'PRPS'\n", 223 | " else:\n", 224 | " comb = 'PRPP'\n", 225 | " tmp = question_part.split(\" \")\n", 226 | " tmp = tmp[1: ]\n", 227 | " if(tag[0] in ['I', 'we', 'We']):\n", 228 | " question_part = 'you ' + ' '.join(tmp)\n", 229 | " if(res == None):\n", 230 | " res = re.match(r\"VB*\", tag[1])\n", 231 | " if(res):\n", 232 | " # question_part = question_part[:question_part.index(tag[0]) + len(tag[0])]\n", 233 | "\n", 234 | " # Stem the verb\n", 235 | " question_part = question_part.replace(tag[0], stemmer.stem(tag[0]))\n", 236 | " res = re.match(r\"VBN\", tag[1])\n", 237 | " res = re.match(r\"VBD\", tag[1])\n", 238 | "\n", 239 | " if(comb == 'NN'):\n", 240 | " aux = 'does'\n", 241 | " \n", 242 | " elif(comb == 'NNS'):\n", 243 | " aux = 'do'\n", 244 | " \n", 245 | " elif(comb == 'PRPS'):\n", 246 | " aux = 'does'\n", 247 | " \n", 248 | " elif(comb == 'PRPP'):\n", 249 | " aux = 'do'\n", 250 | " \n", 251 | " if(res and res.group() in ['VBD', 'VBN']):\n", 252 | " aux = 'did'\n", 253 | "\n", 254 | " if(aux):\n", 255 | " if(type == \"non_disc\" or type == \"Yes/No\"):\n", 256 | " question = aux + \" \" + question_part + \"?\"\n", 257 | "\n", 258 | " else:\n", 259 | " question = type + \" \" + aux + \" \" + question_part + \"?\"\n", 260 | " if(question != \"\"):\n", 261 | " question = question[0].upper() + question[1:]\n", 262 | " return question" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 15, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "# This function is used to get the named entities\n", 272 | "def get_named_entities(sent):\n", 273 | " \n", 274 | " doc = nlp(sent)\n", 275 | " named_entities = [(X.text, X.label_) for X in doc.ents]\n", 276 | " return named_entities" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 16, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "# This function is used to get the required wh word\n", 286 | "def get_wh_word(entity, sent):\n", 287 | " wh_word = \"\"\n", 288 | " if entity[1] in ['TIME', 'DATE']:\n", 289 | " wh_word = 'When'\n", 290 | " elif entity[1] == ['PRODUCT', 'EVENT', 'WORK_OF_ART', 'LAW', 'LANGUAGE']:\n", 291 | " wh_word = 'What'\n", 292 | " elif entity[1] in ['PERSON']:\n", 293 | " wh_word = 'Who'\n", 294 | " elif entity[1] in ['NORP', 'FAC' ,'ORG', 'GPE', 'LOC']:\n", 295 | " index = sent.find(entity[0])\n", 296 | " if index == 0:\n", 297 | " wh_word = \"Who\"\n", 298 | " else:\n", 299 | " wh_word = \"Where\"\n", 300 | " else:\n", 301 | " wh_word = \"Where\"\n", 302 | " return wh_word" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 35, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "# This function generate questions based on NER templates\n", 312 | "def generate_one_word_questions(sent):\n", 313 | " \n", 314 | " named_entities = get_named_entities(sent)\n", 315 | " questions = []\n", 316 | " \n", 317 | " if not named_entities:\n", 318 | " return questions\n", 319 | " \n", 320 | " for entity in named_entities:\n", 321 | " wh_word = get_wh_word(entity, sent)\n", 322 | " \n", 323 | " if(sent[-1]== '.'):\n", 324 | " sent= sent[:-1]\n", 325 | " \n", 326 | " if sent.find(entity[0]) == 0:\n", 327 | " questions.append(sent.replace(entity[0],wh_word) + '?')\n", 328 | " continue\n", 329 | " \n", 330 | " question= \"\"\n", 331 | " aux_verb= False\n", 332 | " res= None\n", 333 | "\n", 334 | " for i in range(len(aux_list)):\n", 335 | " if(aux_list[i] in sent.split()):\n", 336 | " aux_verb= True\n", 337 | " pos= i\n", 338 | " break\n", 339 | " \n", 340 | " if not aux_verb:\n", 341 | " pos = 9\n", 342 | " \n", 343 | " text = nltk.word_tokenize(sent)\n", 344 | " tags= nltk.pos_tag(text)\n", 345 | " question_part= \"\"\n", 346 | " \n", 347 | " if wh_word == 'When':\n", 348 | " word_list = sent.split(entity[0])[0].split()\n", 349 | " if word_list[-1] in ['in', 'at', 'on']:\n", 350 | " question_part = \" \".join(word_list[:-1])\n", 351 | " else:\n", 352 | " question_part = \" \".join(word_list)\n", 353 | " \n", 354 | " qp_text = nltk.word_tokenize(question_part)\n", 355 | " qp_tags = nltk.pos_tag(qp_text)\n", 356 | " \n", 357 | " question_part = \"\"\n", 358 | " \n", 359 | " for i, grp in enumerate(qp_tags):\n", 360 | " word = grp[0]\n", 361 | " tag = grp[1]\n", 362 | " if(re.match(\"VB*\", tag) and word not in aux_list):\n", 363 | " question_part += WordNetLemmatizer().lemmatize(word,'v') + \" \"\n", 364 | " else:\n", 365 | " question_part += word + \" \"\n", 366 | " \n", 367 | " if question_part[-1] == ' ':\n", 368 | " question_part = question_part[:-1]\n", 369 | " \n", 370 | " else:\n", 371 | " for i, grp in enumerate(tags):\n", 372 | " #Break the sentence after the first non-auxiliary verb\n", 373 | "\n", 374 | " word = grp[0]\n", 375 | " tag = grp[1]\n", 376 | "\n", 377 | " if(re.match(\"VB*\", tag) and word not in aux_list):\n", 378 | " question_part+= word\n", 379 | "\n", 380 | " if i 1 and tmp >= 3 and tmp < maxLen):\n", 419 | " maxLen = tmp\n", 420 | " val = j\n", 421 | " \n", 422 | " if(val != -1):\n", 423 | "\n", 424 | " # To initialize a list for every new key\n", 425 | " if(disc_sentences.get(val, 'empty') == 'empty'):\n", 426 | " disc_sentences[val] = []\n", 427 | " \n", 428 | " disc_sentences[val].append(sentences[i])\n", 429 | " temp.append(sentences[i])\n", 430 | "\n", 431 | "\n", 432 | " nondisc_sentences = list(set(sentences) - set(temp))\n", 433 | " \n", 434 | " t = []\n", 435 | " for k, v in disc_sentences.items():\n", 436 | " for val in range(len(v)):\n", 437 | " # Split the sentence on discourse marker and identify the question part\n", 438 | " question_part = disc_sentences[k][val].split(k)[target_arg[k] - 1]\n", 439 | " q = generate_question(question_part, qtype[k][0])\n", 440 | " if(q != \"\"):\n", 441 | " questions.append([disc_sentences[k][val],q])\n", 442 | " \n", 443 | " \n", 444 | " for question_part in nondisc_sentences:\n", 445 | " s = \"non_disc\"\n", 446 | " sentence = question_part\n", 447 | " text = nltk.word_tokenize(question_part)\n", 448 | " if(text[0] == 'Yes'):\n", 449 | " question_part = question_part[5:]\n", 450 | " s = \"Yes/No\"\n", 451 | " \n", 452 | " elif(text[0] == 'No'):\n", 453 | " question_part = question_part[4:]\n", 454 | " s = \"Yes/No\"\n", 455 | " \n", 456 | " q = generate_question(question_part, s)\n", 457 | " if(q != \"\"):\n", 458 | " questions.append([sentence,q])\n", 459 | " l = generate_one_word_questions(question_part)\n", 460 | " questions += [[sentence,i] for i in l]\n", 461 | " print(len(questions))\n", 462 | " \n", 463 | " for pair in questions:\n", 464 | " print(\"S: \",pair[0])\n", 465 | " print(\"Q: \",pair[1])\n", 466 | " print()" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 104, 472 | "metadata": { 473 | "scrolled": false 474 | }, 475 | "outputs": [ 476 | { 477 | "name": "stdout", 478 | "output_type": "stream", 479 | "text": [ 480 | "92\n", 481 | "0.7608695652173914\n", 482 | "0.8428571428571429\n" 483 | ] 484 | } 485 | ], 486 | "source": [ 487 | "# Syntactic Score and Fluency using Manual Evaluation\n", 488 | "\n", 489 | "syntactic_score = [0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1]\n", 490 | "fluency_score = [0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,1]\n", 491 | "print(len(syntactic_score))\n", 492 | "print(sum(syntactic_score)/len(syntactic_score))\n", 493 | "print(sum(fluency_score)/sum(syntactic_score))" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": 95, 499 | "metadata": { 500 | "scrolled": true 501 | }, 502 | "outputs": [ 503 | { 504 | "name": "stdout", 505 | "output_type": "stream", 506 | "text": [ 507 | "92\n", 508 | "S: I had been playing the drums since school time.\n", 509 | "Q: When were you been playing the drums ?\n", 510 | "\n", 511 | "S: I have been up since four.\n", 512 | "Q: When do you hav been up ?\n", 513 | "\n", 514 | "S: They were angry because their plans had been discovered.\n", 515 | "Q: Why were they angry ?\n", 516 | "\n", 517 | "S: I think he felt included because he was helping as much as we were.\n", 518 | "Q: Why did you think he felt included ?\n", 519 | "\n", 520 | "S: I am studying English because I’d like to immigrate to the U.S.\n", 521 | "Q: Why were you studying English ?\n", 522 | "\n", 523 | "S: Children often cry just because they want some attention.\n", 524 | "Q: Why do children often cry just ?\n", 525 | "\n", 526 | "S: Tom repainted his mailbox because it was looking shabby.\n", 527 | "Q: Why did tom repaint his mailbox ?\n", 528 | "\n", 529 | "S: They slept in the car because they couldn't find a hotel.\n", 530 | "Q: Why do they slept in the car ?\n", 531 | "\n", 532 | "S: The accident happened because of the driver's negligence.\n", 533 | "Q: Why did the accident hap ?\n", 534 | "\n", 535 | "S: He gave up traveling abroad because of his sudden illness.\n", 536 | "Q: Why did he gav up traveling abroad ?\n", 537 | "\n", 538 | "S: Harry was late for class yesterday because of his accident.\n", 539 | "Q: Why was harry late for class yesterday ?\n", 540 | "\n", 541 | "S: We had a bad rice crop last year because it rained a lot.\n", 542 | "Q: Why were you a bad rice crop last year ?\n", 543 | "\n", 544 | "S: I can play quite a few musical instruments, for example, the flute, the guitar, and the piano.\n", 545 | "Q: Give an example where I can play quite a few musical instruments, ?\n", 546 | "\n", 547 | "S: Calcium is found in green leafy vegetables, for example, broccoli, kale, arugula, or spinach have over 160 mg. per serving.\n", 548 | "Q: Give an example where calcium is found in green leafy vegetables, ?\n", 549 | "\n", 550 | "S: It is possible to combine Computer Science with other subjects, for example Physics.\n", 551 | "Q: Give an example where it is possible to combine Computer Science with other subjects, ?\n", 552 | "\n", 553 | "S: Fractions can be written with oblique strokes, for example 2/3.\n", 554 | "Q: Give an example where fractions can be written with oblique strokes, ?\n", 555 | "\n", 556 | "S: Vitamin C is found in colorful vegetables, for instance, bell peppers have a lot of vitamin C.\n", 557 | "John is feeling much better now.\n", 558 | "Q: Give an instance where vitamin C is found in colorful vegetables, ?\n", 559 | "\n", 560 | "S: He hurt his hand when he fell.\n", 561 | "Q: When did he hurt his hand ?\n", 562 | "\n", 563 | "S: Every child feels displaced to some degree when a new sibling arrives.\n", 564 | "Q: When does every child feels displac to some degree ?\n", 565 | "\n", 566 | "S: She was angry when you told her about the accident.\n", 567 | "Q: When was she angry ?\n", 568 | "\n", 569 | "S: I married her when she was 23.\n", 570 | "Q: When did you marry her ?\n", 571 | "\n", 572 | "S: I shall have been living in Mumbai for five years by May 2019.\n", 573 | "Q: Were you have been living in Mumbai for five years by May 2019 ?\n", 574 | "\n", 575 | "S: I shall have been living in Mumbai for five years by May 2019.\n", 576 | "Q: Where shall I have been?\n", 577 | "\n", 578 | "S: I shall have been living in Mumbai for five years by May 2019.\n", 579 | "Q: When shall I have be live in Mumbai for?\n", 580 | "\n", 581 | "S: I shall have been living in Mumbai for five years by May 2019.\n", 582 | "Q: When shall I have be live in Mumbai for five years by?\n", 583 | "\n", 584 | "S: It is ten o’clock.\n", 585 | "Q: Is it ten o ’ clock ?\n", 586 | "\n", 587 | "S: It is ten o’clock.\n", 588 | "Q: Where is It ten o ’ clock ?\n", 589 | "\n", 590 | "S: Yes, I like coffee.\n", 591 | "Q: Do you lik coffee?\n", 592 | "\n", 593 | "S: He will go to China tomorrow.\n", 594 | "Q: Will he go to China tomorrow ?\n", 595 | "\n", 596 | "S: He will go to China tomorrow.\n", 597 | "Q: Where will He go to?\n", 598 | "\n", 599 | "S: He will go to China tomorrow.\n", 600 | "Q: When will He go to China?\n", 601 | "\n", 602 | "S: He was elected as the Prime Minister of India on 15th August 1947.\n", 603 | "Q: Was he elected as the Prime Minister of India on 15th August 1947 ?\n", 604 | "\n", 605 | "S: He was elected as the Prime Minister of India on 15th August 1947.\n", 606 | "Q: Where was He elected as?\n", 607 | "\n", 608 | "S: He was elected as the Prime Minister of India on 15th August 1947.\n", 609 | "Q: When was He elect as the Prime Minister of India?\n", 610 | "\n", 611 | "S: I was playing tennis.\n", 612 | "Q: Were you playing tennis ?\n", 613 | "\n", 614 | "S: Yes, she is working very hard.\n", 615 | "Q: Is she working very hard ?\n", 616 | "\n", 617 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 618 | "Q: Was abdul Kalam an aerospace scientist who served as the 11th President of India from 2002 to 2007 ?\n", 619 | "\n", 620 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 621 | "Q: Who was an aerospace scientist who served as the 11th President of India from 2002 to 2007?\n", 622 | "\n", 623 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 624 | "Q: Where was Abdul Kalam an aerospace scientist who served as?\n", 625 | "\n", 626 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 627 | "Q: Where was Abdul Kalam an aerospace scientist who served as?\n", 628 | "\n", 629 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 630 | "Q: When was Abdul Kalam an aerospace scientist who serve as the 11th President of India from?\n", 631 | "\n", 632 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 633 | "Q: When was Abdul Kalam an aerospace scientist who serve as the 11th President of India from 2002 to?\n", 634 | "\n", 635 | "S: Population refers to the number of individuals in a particular place.\n", 636 | "Q: Does population refers to the number of individuals in a particular place?\n", 637 | "\n", 638 | "S: John was held captive at Castle Black.\n", 639 | "Q: Was john held captive at Castle Black ?\n", 640 | "\n", 641 | "S: John was held captive at Castle Black.\n", 642 | "Q: Who was held captive at Castle Black?\n", 643 | "\n", 644 | "S: John was held captive at Castle Black.\n", 645 | "Q: Where was John held captive?\n", 646 | "\n", 647 | "S: The Taj Mahal is a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal.\n", 648 | "Q: Is the Taj Mahal a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal ?\n", 649 | "\n", 650 | "S: The Taj Mahal is a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal.\n", 651 | "Q: When is The Taj Mahal a beautiful monument build?\n", 652 | "\n", 653 | "S: The Taj Mahal is a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal.\n", 654 | "Q: Who is The Taj Mahal a beautiful monument built in?\n", 655 | "\n", 656 | "S: The Taj Mahal is a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal.\n", 657 | "Q: Where is The Taj Mahal a beautiful monument built in?\n", 658 | "\n", 659 | "S: Gandhi Jayanti is celebrated on 2nd October.\n", 660 | "Q: Is gandhi Jayanti celebrated on 2nd October ?\n", 661 | "\n", 662 | "S: Gandhi Jayanti is celebrated on 2nd October.\n", 663 | "Q: Who is celebrated on 2nd October?\n", 664 | "\n", 665 | "S: Gandhi Jayanti is celebrated on 2nd October.\n", 666 | "Q: When is Gandhi Jayanti celebrate?\n", 667 | "\n", 668 | "S: They have been trying to contact her.\n", 669 | "Q: Do they hav been try to contact her?\n", 670 | "\n", 671 | "S: Sun is the largest member of the Solar System.\n", 672 | "Q: Is sun the largest member of the Solar System ?\n", 673 | "\n", 674 | "S: Sun is the largest member of the Solar System.\n", 675 | "Q: Who is the largest member of the Solar System?\n", 676 | "\n", 677 | "S: Sun is the largest member of the Solar System.\n", 678 | "Q: Where is Sun the largest member of the Solar System ?\n", 679 | "\n", 680 | "S: She is preparing chicken sandwiches for breakfast.\n", 681 | "Q: Is she preparing chicken sandwiches for breakfast ?\n", 682 | "\n", 683 | "S: We were playing tennis at the club.\n", 684 | "Q: Were you playing tennis at the club ?\n", 685 | "\n", 686 | "S: Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh.\n", 687 | "Q: Was jawaharlal Nehru born on 14th November 1889 in Allahabad , Uttar Pradesh ?\n", 688 | "\n", 689 | "S: Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh.\n", 690 | "Q: Who was born on 14th November 1889 in Allahabad, Uttar Pradesh?\n", 691 | "\n", 692 | "S: Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh.\n", 693 | "Q: When was Jawaharlal Nehru bear?\n", 694 | "\n", 695 | "S: Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh.\n", 696 | "Q: Where was Jawaharlal Nehru born on?\n", 697 | "\n", 698 | "S: Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh.\n", 699 | "Q: Where was Jawaharlal Nehru born on?\n", 700 | "\n", 701 | "S: Hindi Diwas was first celebrated in the year 1953.\n", 702 | "Q: Was hindi Diwas first celebrated in the year 1953 ?\n", 703 | "\n", 704 | "S: Hindi Diwas was first celebrated in the year 1953.\n", 705 | "Q: Who was first celebrated in the year 1953?\n", 706 | "\n", 707 | "S: Hindi Diwas was first celebrated in the year 1953.\n", 708 | "Q: When was Hindi Diwas first celebrate?\n", 709 | "\n", 710 | "S: Delhi is the capital of India.\n", 711 | "Q: Is delhi the capital of India ?\n", 712 | "\n", 713 | "S: Delhi is the capital of India.\n", 714 | "Q: Who is the capital of India?\n", 715 | "\n", 716 | "S: Delhi is the capital of India.\n", 717 | "Q: Where is Delhi the capital of India ?\n", 718 | "\n", 719 | "S: Sachin Tendulkar was awarded Bharat Ratna in 2013.\n", 720 | "Q: Was sachin Tendulkar awarded Bharat Ratna in 2013 ?\n", 721 | "\n", 722 | "S: Sachin Tendulkar was awarded Bharat Ratna in 2013.\n", 723 | "Q: Who was awarded Bharat Ratna in 2013?\n", 724 | "\n", 725 | "S: Sachin Tendulkar was awarded Bharat Ratna in 2013.\n", 726 | "Q: Who was Sachin Tendulkar awarded?\n", 727 | "\n", 728 | "S: Sachin Tendulkar was awarded Bharat Ratna in 2013.\n", 729 | "Q: When was Sachin Tendulkar award Bharat Ratna?\n", 730 | "\n", 731 | "S: His name is Peter.\n", 732 | "Q: Is his name Peter ?\n", 733 | "\n", 734 | "S: His name is Peter.\n", 735 | "Q: Who is His name Peter ?\n", 736 | "\n", 737 | "S: No, I was not playing cricket.\n", 738 | "Q: Were you not playing cricket ?\n", 739 | "\n", 740 | "S: Darjeeling is known for its beautiful tea gardens.\n", 741 | "Q: Is darjeeling known for its beautiful tea gardens ?\n", 742 | "\n", 743 | "S: Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat.\n", 744 | "Q: Was mahatma Gandhi born on 2nd October 1869 in Porbandar , Gujarat ?\n", 745 | "\n", 746 | "S: Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat.\n", 747 | "Q: Who was born on 2nd October 1869 in Porbandar, Gujarat?\n", 748 | "\n", 749 | "S: Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat.\n", 750 | "Q: When was Mahatma Gandhi bear?\n", 751 | "\n", 752 | "S: Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat.\n", 753 | "Q: Where was Mahatma Gandhi born on?\n", 754 | "\n", 755 | "S: Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat.\n", 756 | "Q: Where was Mahatma Gandhi born on?\n", 757 | "\n", 758 | "S: The British had introduced a National Flag for British India after the revolt of 1857.\n", 759 | "Q: Had the British introduced a National Flag for British India after the revolt of 1857 ?\n", 760 | "\n", 761 | "S: The British had introduced a National Flag for British India after the revolt of 1857.\n", 762 | "Q: Where had The British introduced a?\n", 763 | "\n", 764 | "S: The British had introduced a National Flag for British India after the revolt of 1857.\n", 765 | "Q: Where had The British introduced a?\n", 766 | "\n", 767 | "S: The British had introduced a National Flag for British India after the revolt of 1857.\n", 768 | "Q: Where had The British introduced a?\n", 769 | "\n", 770 | "S: The British had introduced a National Flag for British India after the revolt of 1857.\n", 771 | "Q: When had The British introduce a National Flag for British India after the revolt of?\n", 772 | "\n", 773 | "S: They grow really well in pots.\n", 774 | "Q: Do they grow really well in pots?\n", 775 | "\n", 776 | "S: You usually walk to work.\n", 777 | "Q: Do you usually walk to work?\n", 778 | "\n", 779 | "S: I did go for fishing today.\n", 780 | "Q: Were you go for fishing today ?\n", 781 | "\n", 782 | "S: I did go for fishing today.\n", 783 | "Q: When did I go for fishing?\n", 784 | "\n" 785 | ] 786 | } 787 | ], 788 | "source": [ 789 | "sentensify()" 790 | ] 791 | }, 792 | { 793 | "cell_type": "code", 794 | "execution_count": null, 795 | "metadata": {}, 796 | "outputs": [], 797 | "source": [] 798 | }, 799 | { 800 | "cell_type": "code", 801 | "execution_count": null, 802 | "metadata": {}, 803 | "outputs": [], 804 | "source": [] 805 | } 806 | ], 807 | "metadata": { 808 | "kernelspec": { 809 | "display_name": "Python 3", 810 | "language": "python", 811 | "name": "python3" 812 | }, 813 | "language_info": { 814 | "codemirror_mode": { 815 | "name": "ipython", 816 | "version": 3 817 | }, 818 | "file_extension": ".py", 819 | "mimetype": "text/x-python", 820 | "name": "python", 821 | "nbconvert_exporter": "python", 822 | "pygments_lexer": "ipython3", 823 | "version": "3.6.7" 824 | } 825 | }, 826 | "nbformat": 4, 827 | "nbformat_minor": 2 828 | } 829 | -------------------------------------------------------------------------------- /Code.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Imports\n", 10 | "import nltk\n", 11 | "import nltk.data\n", 12 | "from nltk.stem.lancaster import LancasterStemmer\n", 13 | "from nltk.stem.wordnet import WordNetLemmatizer\n", 14 | "import re\n", 15 | "import spacy\n", 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# Class initializations\n", 26 | "nlp = spacy.load('en_core_web_sm')\n", 27 | "stemmer = LancasterStemmer()" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# List to hold all input sentences\n", 37 | "sentences = []\n", 38 | "\n", 39 | "# Dictionary to hold sentences corresponding to respective discourse markers\n", 40 | "disc_sentences = {}\n", 41 | "\n", 42 | "# Remaining sentences which do not have discourse markers (To be used later to generate other kinds of questions)\n", 43 | "nondisc_sentences = []\n", 44 | "\n", 45 | "# List of auxiliary verbs\n", 46 | "aux_list = ['am', 'are', 'is', 'was', 'were', 'can', 'could', 'does', 'do', 'did', 'has', 'had', 'may', 'might', 'must',\n", 47 | " 'need', 'ought', 'shall', 'should', 'will', 'would']\n", 48 | "\n", 49 | "# List of all discourse markers\n", 50 | "discourse_markers = ['because', 'as a result', 'since', 'when', 'although', 'for example', 'for instance']\n", 51 | "\n", 52 | "# Different question types possible for each discourse marker\n", 53 | "qtype = {'because': ['Why'], 'since': ['When', 'Why'], 'when': ['When'], 'although': ['Yes/No'], 'as a result': ['Why'], \n", 54 | " 'for example': ['Give an example where'], 'for instance': ['Give an instance where'], 'to': ['Why']}\n", 55 | "\n", 56 | "# The argument which forms a question\n", 57 | "target_arg = {'because': 1, 'since': 1, 'when': 1, 'although': 1, 'as a result': 2, 'for example': 1, 'for instance': 1, \n", 58 | " 'to': 1}" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "# This function is used to tokenize and split into sentences\n", 68 | "def sentensify():\n", 69 | " global sentences\n", 70 | " tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')\n", 71 | " fp = open('input.txt')\n", 72 | " data = fp.read()\n", 73 | " sentences = tokenizer.tokenize(data)\n", 74 | " discourse()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "# Function used to generate the questions from sentences which have already been pre-processed.\n", 84 | "def generate_question(question_part, type):\n", 85 | "\n", 86 | " ''' \n", 87 | " question_part -> Part of input sentence which forms a question\n", 88 | " type-> The type of question (why, where, etc)\n", 89 | " '''\n", 90 | " # Remove full stop and make first letter lower case\n", 91 | " question_part = question_part[0].lower() + question_part[1:]\n", 92 | " if(question_part[-1] == '.' or question_part[-1] == ','):\n", 93 | " question_part = question_part[:-1]\n", 94 | " \n", 95 | " # Capitalizing 'i' since 'I' is recognized by parsers appropriately \n", 96 | " for i in range(0, len(question_part)):\n", 97 | " if(question_part[i] == 'i'):\n", 98 | " if((i == 0 and question_part[i+1] == ' ') or (question_part[i-1] == ' ' and question_part[i+1] == ' ')):\n", 99 | " question_part = question_part[:i] + 'I' + question_part[i + 1: ]\n", 100 | " \n", 101 | " question = \"\"\n", 102 | " if(type == 'Give an example where' or type == 'Give an instance where'):\n", 103 | " question = type + \" \" + question_part + '?'\n", 104 | " return question\n", 105 | "\n", 106 | " aux_verb = False\n", 107 | " res = None\n", 108 | " \n", 109 | " # Find out if auxiliary verb already exists\n", 110 | " for i in range(len(aux_list)):\n", 111 | " if(aux_list[i] in question_part.split()):\n", 112 | " aux_verb = True\n", 113 | " pos = i\n", 114 | " break\n", 115 | "\n", 116 | " # If auxiliary verb exists\n", 117 | " if(aux_verb):\n", 118 | " \n", 119 | " # Tokeninze the part of the sentence from which the question has to be made\n", 120 | " text = nltk.word_tokenize(question_part)\n", 121 | " tags = nltk.pos_tag(text)\n", 122 | " question_part = \"\"\n", 123 | " fP = False\n", 124 | " \n", 125 | " for word, tag in tags:\n", 126 | " if(word in ['I', 'We', 'we']):\n", 127 | " question_part += 'you' + \" \"\n", 128 | " fP = True\n", 129 | " continue\n", 130 | " question_part += word + \" \"\n", 131 | "\n", 132 | " # Split across the auxiliary verb and prepend it at the start of question part\n", 133 | " question = question_part.split(\" \" + aux_list[pos])\n", 134 | " if(fP):\n", 135 | " question = [\"were \"] + question\n", 136 | " else:\n", 137 | " question = [aux_list[pos] + \" \"] + question\n", 138 | "\n", 139 | " # If Yes/No, no need to introduce question phrase\n", 140 | " if(type == 'Yes/No'):\n", 141 | " question += ['?']\n", 142 | " \n", 143 | " elif(type != \"non_disc\"):\n", 144 | " question = [type + \" \"] + question + [\"?\"]\n", 145 | " \n", 146 | " else:\n", 147 | " question = question + [\"?\"]\n", 148 | " \n", 149 | " question = ''.join(question)\n", 150 | "\n", 151 | " # If auxilary verb does ot exist, it can only be some form of verb 'do'\n", 152 | " else:\n", 153 | " aux = None\n", 154 | " text = nltk.word_tokenize(question_part)\n", 155 | " tags = nltk.pos_tag(text)\n", 156 | " comb = \"\"\n", 157 | "\n", 158 | " '''There can be following combinations of nouns and verbs:\n", 159 | " NN/NNP and VBZ -> Does\n", 160 | " NNS/NNPS(plural) and VBP -> Do\n", 161 | " NN/NNP and VBN -> Did\n", 162 | " NNS/NNPS(plural) and VBN -> Did\n", 163 | " '''\n", 164 | " \n", 165 | " for tag in tags:\n", 166 | " if(comb == \"\"):\n", 167 | " if(tag[1] == 'NN' or tag[1] == 'NNP'):\n", 168 | " comb = 'NN'\n", 169 | "\n", 170 | " elif(tag[1] == 'NNS' or tag[1] == 'NNPS'):\n", 171 | " comb = 'NNS'\n", 172 | "\n", 173 | " elif(tag[1] == 'PRP'):\n", 174 | " if tag[0] in ['He','She','It']:\n", 175 | " comb = 'PRPS'\n", 176 | " else:\n", 177 | " comb = 'PRPP'\n", 178 | " tmp = question_part.split(\" \")\n", 179 | " tmp = tmp[1: ]\n", 180 | " if(tag[0] in ['I', 'we', 'We']):\n", 181 | " question_part = 'you ' + ' '.join(tmp)\n", 182 | " \n", 183 | " if(res == None):\n", 184 | " res = re.match(r\"VB*\", tag[1])\n", 185 | " if(res):\n", 186 | " \n", 187 | " # Stem the verb\n", 188 | " question_part = question_part.replace(tag[0], stemmer.stem(tag[0]))\n", 189 | " res = re.match(r\"VBN\", tag[1])\n", 190 | " res = re.match(r\"VBD\", tag[1])\n", 191 | "\n", 192 | " if(comb == 'NN'):\n", 193 | " aux = 'does'\n", 194 | " \n", 195 | " elif(comb == 'NNS'):\n", 196 | " aux = 'do'\n", 197 | " \n", 198 | " elif(comb == 'PRPS'):\n", 199 | " aux = 'does'\n", 200 | " \n", 201 | " elif(comb == 'PRPP'):\n", 202 | " aux = 'do'\n", 203 | " \n", 204 | " if(res and res.group() in ['VBD', 'VBN']):\n", 205 | " aux = 'did'\n", 206 | "\n", 207 | " if(aux):\n", 208 | " if(type == \"non_disc\" or type == \"Yes/No\"):\n", 209 | " question = aux + \" \" + question_part + \"?\"\n", 210 | "\n", 211 | " else:\n", 212 | " question = type + \" \" + aux + \" \" + question_part + \"?\"\n", 213 | " if(question != \"\"):\n", 214 | " question = question[0].upper() + question[1:]\n", 215 | " return question" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 6, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "# This function is used to get the named entities\n", 225 | "def get_named_entities(sent):\n", 226 | " doc = nlp(sent)\n", 227 | " named_entities = [(X.text, X.label_) for X in doc.ents]\n", 228 | " return named_entities" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 7, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "# This function is used to get the required wh word\n", 238 | "def get_wh_word(entity, sent):\n", 239 | " wh_word = \"\"\n", 240 | " if entity[1] in ['TIME', 'DATE']:\n", 241 | " wh_word = 'When'\n", 242 | " \n", 243 | " elif entity[1] == ['PRODUCT', 'EVENT', 'WORK_OF_ART', 'LAW', 'LANGUAGE']:\n", 244 | " wh_word = 'What'\n", 245 | " \n", 246 | " elif entity[1] in ['PERSON']:\n", 247 | " wh_word = 'Who'\n", 248 | " \n", 249 | " elif entity[1] in ['NORP', 'FAC' ,'ORG', 'GPE', 'LOC']:\n", 250 | " index = sent.find(entity[0])\n", 251 | " if index == 0:\n", 252 | " wh_word = \"Who\"\n", 253 | " \n", 254 | " else:\n", 255 | " wh_word = \"Where\"\n", 256 | " \n", 257 | " else:\n", 258 | " wh_word = \"Where\"\n", 259 | " return wh_word" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 8, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "# This function generate questions based on NER templates\n", 269 | "def generate_one_word_questions(sent):\n", 270 | " \n", 271 | " named_entities = get_named_entities(sent)\n", 272 | " questions = []\n", 273 | " \n", 274 | " if not named_entities:\n", 275 | " return questions\n", 276 | " \n", 277 | " for entity in named_entities:\n", 278 | " wh_word = get_wh_word(entity, sent)\n", 279 | " \n", 280 | " if(sent[-1] == '.'):\n", 281 | " sent = sent[:-1]\n", 282 | " \n", 283 | " if sent.find(entity[0]) == 0:\n", 284 | " questions.append(sent.replace(entity[0],wh_word) + '?')\n", 285 | " continue\n", 286 | " \n", 287 | " question = \"\"\n", 288 | " aux_verb = False\n", 289 | " res = None\n", 290 | "\n", 291 | " for i in range(len(aux_list)):\n", 292 | " if(aux_list[i] in sent.split()):\n", 293 | " aux_verb = True\n", 294 | " pos = i\n", 295 | " break\n", 296 | " \n", 297 | " if not aux_verb:\n", 298 | " pos = 9\n", 299 | " \n", 300 | " text = nltk.word_tokenize(sent)\n", 301 | " tags = nltk.pos_tag(text)\n", 302 | " question_part = \"\"\n", 303 | " \n", 304 | " if wh_word == 'When':\n", 305 | " word_list = sent.split(entity[0])[0].split()\n", 306 | " if word_list[-1] in ['in', 'at', 'on']:\n", 307 | " question_part = \" \".join(word_list[:-1])\n", 308 | " else:\n", 309 | " question_part = \" \".join(word_list)\n", 310 | " \n", 311 | " qp_text = nltk.word_tokenize(question_part)\n", 312 | " qp_tags = nltk.pos_tag(qp_text)\n", 313 | " \n", 314 | " question_part = \"\"\n", 315 | " \n", 316 | " for i, grp in enumerate(qp_tags):\n", 317 | " word = grp[0]\n", 318 | " tag = grp[1]\n", 319 | " if(re.match(\"VB*\", tag) and word not in aux_list):\n", 320 | " question_part += WordNetLemmatizer().lemmatize(word,'v') + \" \"\n", 321 | " else:\n", 322 | " question_part += word + \" \"\n", 323 | " \n", 324 | " if question_part[-1] == ' ':\n", 325 | " question_part = question_part[:-1]\n", 326 | " \n", 327 | " else:\n", 328 | " for i, grp in enumerate(tags):\n", 329 | " \n", 330 | " #Break the sentence after the first non-auxiliary verb\n", 331 | " word = grp[0]\n", 332 | " tag = grp[1]\n", 333 | "\n", 334 | " if(re.match(\"VB*\", tag) and word not in aux_list):\n", 335 | " question_part += word\n", 336 | "\n", 337 | " if i 1 and tmp >= 3 and tmp < maxLen):\n", 372 | " maxLen = tmp\n", 373 | " val = j\n", 374 | " \n", 375 | " if(val != -1):\n", 376 | "\n", 377 | " # To initialize a list for every new key\n", 378 | " if(disc_sentences.get(val, 'empty') == 'empty'):\n", 379 | " disc_sentences[val] = []\n", 380 | " \n", 381 | " disc_sentences[val].append(sentences[i])\n", 382 | " temp.append(sentences[i])\n", 383 | "\n", 384 | "\n", 385 | " nondisc_sentences = list(set(sentences) - set(temp))\n", 386 | " \n", 387 | " t = []\n", 388 | " for k, v in disc_sentences.items():\n", 389 | " for val in range(len(v)):\n", 390 | " \n", 391 | " # Split the sentence on discourse marker and identify the question part\n", 392 | " question_part = disc_sentences[k][val].split(k)[target_arg[k] - 1]\n", 393 | " q = generate_question(question_part, qtype[k][0])\n", 394 | " if(q != \"\"):\n", 395 | " questions.append([disc_sentences[k][val],q])\n", 396 | " \n", 397 | " \n", 398 | " for question_part in nondisc_sentences:\n", 399 | " s = \"non_disc\"\n", 400 | " sentence = question_part\n", 401 | " text = nltk.word_tokenize(question_part)\n", 402 | " if(text[0] == 'Yes'):\n", 403 | " question_part = question_part[5:]\n", 404 | " s = \"Yes/No\"\n", 405 | " \n", 406 | " elif(text[0] == 'No'):\n", 407 | " question_part = question_part[4:]\n", 408 | " s = \"Yes/No\"\n", 409 | " \n", 410 | " q = generate_question(question_part, s)\n", 411 | " if(q != \"\"):\n", 412 | " questions.append([sentence,q])\n", 413 | " l = generate_one_word_questions(question_part)\n", 414 | " questions += [[sentence,i] for i in l]\n", 415 | " print(len(questions))\n", 416 | " \n", 417 | " for pair in questions:\n", 418 | " print(\"S: \",pair[0])\n", 419 | " print(\"Q: \",pair[1])\n", 420 | " print()" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 10, 426 | "metadata": { 427 | "scrolled": false 428 | }, 429 | "outputs": [ 430 | { 431 | "name": "stdout", 432 | "output_type": "stream", 433 | "text": [ 434 | "92\n", 435 | "0.7608695652173914\n", 436 | "0.8428571428571429\n" 437 | ] 438 | } 439 | ], 440 | "source": [ 441 | "# Syntactic Score and Fluency using Manual Evaluation\n", 442 | "\n", 443 | "syntactic_score = [0,0,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,1,0,1,1,0,1,0,1,1,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,0,0,\n", 444 | " 1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1]\n", 445 | "fluency_score = [0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,0,0,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0,0,\n", 446 | " 1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,1,1]\n", 447 | "print(len(syntactic_score))\n", 448 | "print(sum(syntactic_score)/len(syntactic_score))\n", 449 | "print(sum(fluency_score)/sum(syntactic_score))" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 11, 455 | "metadata": { 456 | "scrolled": true 457 | }, 458 | "outputs": [ 459 | { 460 | "name": "stdout", 461 | "output_type": "stream", 462 | "text": [ 463 | "92\n", 464 | "S: They were angry because their plans had been discovered.\n", 465 | "Q: Why were they angry ?\n", 466 | "\n", 467 | "S: I think he felt included because he was helping as much as we were.\n", 468 | "Q: Why did you think he felt included ?\n", 469 | "\n", 470 | "S: I am studying English because I’d like to immigrate to the U.S.\n", 471 | "Q: Why were you studying English ?\n", 472 | "\n", 473 | "S: Children often cry just because they want some attention.\n", 474 | "Q: Why do children often cry just ?\n", 475 | "\n", 476 | "S: Tom repainted his mailbox because it was looking shabby.\n", 477 | "Q: Why did tom repaint his mailbox ?\n", 478 | "\n", 479 | "S: They slept in the car because they couldn't find a hotel.\n", 480 | "Q: Why do they slept in the car ?\n", 481 | "\n", 482 | "S: The accident happened because of the driver's negligence.\n", 483 | "Q: Why did the accident hap ?\n", 484 | "\n", 485 | "S: He gave up traveling abroad because of his sudden illness.\n", 486 | "Q: Why did he gav up traveling abroad ?\n", 487 | "\n", 488 | "S: Harry was late for class yesterday because of his accident.\n", 489 | "Q: Why was harry late for class yesterday ?\n", 490 | "\n", 491 | "S: We had a bad rice crop last year because it rained a lot.\n", 492 | "Q: Why were you a bad rice crop last year ?\n", 493 | "\n", 494 | "S: I can play quite a few musical instruments, for example, the flute, the guitar, and the piano.\n", 495 | "Q: Give an example where I can play quite a few musical instruments, ?\n", 496 | "\n", 497 | "S: Calcium is found in green leafy vegetables, for example, broccoli, kale, arugula, or spinach have over 160 mg. per serving.\n", 498 | "Q: Give an example where calcium is found in green leafy vegetables, ?\n", 499 | "\n", 500 | "S: It is possible to combine Computer Science with other subjects, for example Physics.\n", 501 | "Q: Give an example where it is possible to combine Computer Science with other subjects, ?\n", 502 | "\n", 503 | "S: Fractions can be written with oblique strokes, for example 2/3.\n", 504 | "Q: Give an example where fractions can be written with oblique strokes, ?\n", 505 | "\n", 506 | "S: He hurt his hand when he fell.\n", 507 | "Q: When did he hurt his hand ?\n", 508 | "\n", 509 | "S: Every child feels displaced to some degree when a new sibling arrives.\n", 510 | "Q: When does every child feels displac to some degree ?\n", 511 | "\n", 512 | "S: She was angry when you told her about the accident.\n", 513 | "Q: When was she angry ?\n", 514 | "\n", 515 | "S: I married her when she was 23.\n", 516 | "Q: When did you marry her ?\n", 517 | "\n", 518 | "S: Vitamin C is found in colorful vegetables, for instance, bell peppers have a lot of vitamin C.\n", 519 | "John is feeling much better now.\n", 520 | "Q: Give an instance where vitamin C is found in colorful vegetables, ?\n", 521 | "\n", 522 | "S: I had been playing the drums since school time.\n", 523 | "Q: When were you been playing the drums ?\n", 524 | "\n", 525 | "S: I have been up since four.\n", 526 | "Q: When do you hav been up ?\n", 527 | "\n", 528 | "S: He will go to China tomorrow.\n", 529 | "Q: Will he go to China tomorrow ?\n", 530 | "\n", 531 | "S: He will go to China tomorrow.\n", 532 | "Q: Where will He go to?\n", 533 | "\n", 534 | "S: He will go to China tomorrow.\n", 535 | "Q: When will He go to China?\n", 536 | "\n", 537 | "S: Yes, she is working very hard.\n", 538 | "Q: Is she working very hard ?\n", 539 | "\n", 540 | "S: You usually walk to work.\n", 541 | "Q: Do you usually walk to work?\n", 542 | "\n", 543 | "S: The British had introduced a National Flag for British India after the revolt of 1857.\n", 544 | "Q: Had the British introduced a National Flag for British India after the revolt of 1857 ?\n", 545 | "\n", 546 | "S: The British had introduced a National Flag for British India after the revolt of 1857.\n", 547 | "Q: Where had The British introduced a?\n", 548 | "\n", 549 | "S: The British had introduced a National Flag for British India after the revolt of 1857.\n", 550 | "Q: Where had The British introduced a?\n", 551 | "\n", 552 | "S: The British had introduced a National Flag for British India after the revolt of 1857.\n", 553 | "Q: Where had The British introduced a?\n", 554 | "\n", 555 | "S: The British had introduced a National Flag for British India after the revolt of 1857.\n", 556 | "Q: When had The British introduce a National Flag for British India after the revolt of?\n", 557 | "\n", 558 | "S: Gandhi Jayanti is celebrated on 2nd October.\n", 559 | "Q: Is gandhi Jayanti celebrated on 2nd October ?\n", 560 | "\n", 561 | "S: Gandhi Jayanti is celebrated on 2nd October.\n", 562 | "Q: Who is celebrated on 2nd October?\n", 563 | "\n", 564 | "S: Gandhi Jayanti is celebrated on 2nd October.\n", 565 | "Q: When is Gandhi Jayanti celebrate?\n", 566 | "\n", 567 | "S: I was playing tennis.\n", 568 | "Q: Were you playing tennis ?\n", 569 | "\n", 570 | "S: No, I was not playing cricket.\n", 571 | "Q: Were you not playing cricket ?\n", 572 | "\n", 573 | "S: Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat.\n", 574 | "Q: Was mahatma Gandhi born on 2nd October 1869 in Porbandar , Gujarat ?\n", 575 | "\n", 576 | "S: Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat.\n", 577 | "Q: Who was born on 2nd October 1869 in Porbandar, Gujarat?\n", 578 | "\n", 579 | "S: Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat.\n", 580 | "Q: When was Mahatma Gandhi bear?\n", 581 | "\n", 582 | "S: Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat.\n", 583 | "Q: Where was Mahatma Gandhi born on?\n", 584 | "\n", 585 | "S: Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat.\n", 586 | "Q: Where was Mahatma Gandhi born on?\n", 587 | "\n", 588 | "S: Sun is the largest member of the Solar System.\n", 589 | "Q: Is sun the largest member of the Solar System ?\n", 590 | "\n", 591 | "S: Sun is the largest member of the Solar System.\n", 592 | "Q: Who is the largest member of the Solar System?\n", 593 | "\n", 594 | "S: Sun is the largest member of the Solar System.\n", 595 | "Q: Where is Sun the largest member of the Solar System ?\n", 596 | "\n", 597 | "S: They grow really well in pots.\n", 598 | "Q: Do they grow really well in pots?\n", 599 | "\n", 600 | "S: John was held captive at Castle Black.\n", 601 | "Q: Was john held captive at Castle Black ?\n", 602 | "\n", 603 | "S: John was held captive at Castle Black.\n", 604 | "Q: Who was held captive at Castle Black?\n", 605 | "\n", 606 | "S: John was held captive at Castle Black.\n", 607 | "Q: Where was John held captive?\n", 608 | "\n", 609 | "S: It is ten o’clock.\n", 610 | "Q: Is it ten o ’ clock ?\n", 611 | "\n", 612 | "S: It is ten o’clock.\n", 613 | "Q: Where is It ten o ’ clock ?\n", 614 | "\n", 615 | "S: She is preparing chicken sandwiches for breakfast.\n", 616 | "Q: Is she preparing chicken sandwiches for breakfast ?\n", 617 | "\n", 618 | "S: He was elected as the Prime Minister of India on 15th August 1947.\n", 619 | "Q: Was he elected as the Prime Minister of India on 15th August 1947 ?\n", 620 | "\n", 621 | "S: He was elected as the Prime Minister of India on 15th August 1947.\n", 622 | "Q: Where was He elected as?\n", 623 | "\n", 624 | "S: He was elected as the Prime Minister of India on 15th August 1947.\n", 625 | "Q: When was He elect as the Prime Minister of India?\n", 626 | "\n", 627 | "S: I shall have been living in Mumbai for five years by May 2019.\n", 628 | "Q: Were you have been living in Mumbai for five years by May 2019 ?\n", 629 | "\n", 630 | "S: I shall have been living in Mumbai for five years by May 2019.\n", 631 | "Q: Where shall I have been?\n", 632 | "\n", 633 | "S: I shall have been living in Mumbai for five years by May 2019.\n", 634 | "Q: When shall I have be live in Mumbai for?\n", 635 | "\n", 636 | "S: I shall have been living in Mumbai for five years by May 2019.\n", 637 | "Q: When shall I have be live in Mumbai for five years by?\n", 638 | "\n", 639 | "S: Darjeeling is known for its beautiful tea gardens.\n", 640 | "Q: Is darjeeling known for its beautiful tea gardens ?\n", 641 | "\n", 642 | "S: Hindi Diwas was first celebrated in the year 1953.\n", 643 | "Q: Was hindi Diwas first celebrated in the year 1953 ?\n", 644 | "\n", 645 | "S: Hindi Diwas was first celebrated in the year 1953.\n", 646 | "Q: Who was first celebrated in the year 1953?\n", 647 | "\n", 648 | "S: Hindi Diwas was first celebrated in the year 1953.\n", 649 | "Q: When was Hindi Diwas first celebrate?\n", 650 | "\n", 651 | "S: I did go for fishing today.\n", 652 | "Q: Were you go for fishing today ?\n", 653 | "\n", 654 | "S: I did go for fishing today.\n", 655 | "Q: When did I go for fishing?\n", 656 | "\n", 657 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 658 | "Q: Was abdul Kalam an aerospace scientist who served as the 11th President of India from 2002 to 2007 ?\n", 659 | "\n", 660 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 661 | "Q: Who was an aerospace scientist who served as the 11th President of India from 2002 to 2007?\n", 662 | "\n", 663 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 664 | "Q: Where was Abdul Kalam an aerospace scientist who served as?\n", 665 | "\n", 666 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 667 | "Q: Where was Abdul Kalam an aerospace scientist who served as?\n", 668 | "\n", 669 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 670 | "Q: When was Abdul Kalam an aerospace scientist who serve as the 11th President of India from?\n", 671 | "\n", 672 | "S: Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007.\n", 673 | "Q: When was Abdul Kalam an aerospace scientist who serve as the 11th President of India from 2002 to?\n", 674 | "\n", 675 | "S: Delhi is the capital of India.\n", 676 | "Q: Is delhi the capital of India ?\n", 677 | "\n", 678 | "S: Delhi is the capital of India.\n", 679 | "Q: Who is the capital of India?\n", 680 | "\n", 681 | "S: Delhi is the capital of India.\n", 682 | "Q: Where is Delhi the capital of India ?\n", 683 | "\n", 684 | "S: Yes, I like coffee.\n", 685 | "Q: Do you lik coffee?\n", 686 | "\n", 687 | "S: The Taj Mahal is a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal.\n", 688 | "Q: Is the Taj Mahal a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal ?\n", 689 | "\n", 690 | "S: The Taj Mahal is a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal.\n", 691 | "Q: When is The Taj Mahal a beautiful monument build?\n", 692 | "\n", 693 | "S: The Taj Mahal is a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal.\n", 694 | "Q: Who is The Taj Mahal a beautiful monument built in?\n", 695 | "\n", 696 | "S: The Taj Mahal is a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal.\n", 697 | "Q: Where is The Taj Mahal a beautiful monument built in?\n", 698 | "\n", 699 | "S: Sachin Tendulkar was awarded Bharat Ratna in 2013.\n", 700 | "Q: Was sachin Tendulkar awarded Bharat Ratna in 2013 ?\n", 701 | "\n", 702 | "S: Sachin Tendulkar was awarded Bharat Ratna in 2013.\n", 703 | "Q: Who was awarded Bharat Ratna in 2013?\n", 704 | "\n", 705 | "S: Sachin Tendulkar was awarded Bharat Ratna in 2013.\n", 706 | "Q: Who was Sachin Tendulkar awarded?\n", 707 | "\n", 708 | "S: Sachin Tendulkar was awarded Bharat Ratna in 2013.\n", 709 | "Q: When was Sachin Tendulkar award Bharat Ratna?\n", 710 | "\n", 711 | "S: His name is Peter.\n", 712 | "Q: Is his name Peter ?\n", 713 | "\n", 714 | "S: His name is Peter.\n", 715 | "Q: Who is His name Peter ?\n", 716 | "\n", 717 | "S: Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh.\n", 718 | "Q: Was jawaharlal Nehru born on 14th November 1889 in Allahabad , Uttar Pradesh ?\n", 719 | "\n", 720 | "S: Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh.\n", 721 | "Q: Who was born on 14th November 1889 in Allahabad, Uttar Pradesh?\n", 722 | "\n", 723 | "S: Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh.\n", 724 | "Q: When was Jawaharlal Nehru bear?\n", 725 | "\n", 726 | "S: Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh.\n", 727 | "Q: Where was Jawaharlal Nehru born on?\n", 728 | "\n", 729 | "S: Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh.\n", 730 | "Q: Where was Jawaharlal Nehru born on?\n", 731 | "\n", 732 | "S: We were playing tennis at the club.\n", 733 | "Q: Were you playing tennis at the club ?\n", 734 | "\n", 735 | "S: Population refers to the number of individuals in a particular place.\n", 736 | "Q: Does population refers to the number of individuals in a particular place?\n", 737 | "\n", 738 | "S: They have been trying to contact her.\n", 739 | "Q: Do they hav been try to contact her?\n", 740 | "\n" 741 | ] 742 | } 743 | ], 744 | "source": [ 745 | "sentensify()" 746 | ] 747 | } 748 | ], 749 | "metadata": { 750 | "kernelspec": { 751 | "display_name": "Python 3", 752 | "language": "python", 753 | "name": "python3" 754 | }, 755 | "language_info": { 756 | "codemirror_mode": { 757 | "name": "ipython", 758 | "version": 3 759 | }, 760 | "file_extension": ".py", 761 | "mimetype": "text/x-python", 762 | "name": "python", 763 | "nbconvert_exporter": "python", 764 | "pygments_lexer": "ipython3", 765 | "version": "3.6.7" 766 | } 767 | }, 768 | "nbformat": 4, 769 | "nbformat_minor": 2 770 | } 771 | -------------------------------------------------------------------------------- /NLP Project.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sanskar-Jain/Automatic-Question-Generator/b64bf27d39c08e561b503fc493c03fc510603462/NLP Project.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Question-Generation 2 | An automated system that can take as input a text and produce as output questions for assessing a reader’s knowledge of the information in the 3 | text. The system uses a combination of manually encoded transformation rules on each sentence in a paragraph. 4 | 5 | ### Libraries Used 6 | - Spacy 7 | - NLTK 8 | - pandas 9 | - re 10 | 11 | ### These are parts-of-speech tags used in the project 12 | - NNS Noun, plural 13 | - JJ Adjective 14 | - NNP Proper noun, singular 15 | - VBG Verb, gerund or present participle 16 | - VBN Verb, past participle 17 | - VBZ Verb, 3rd person singular present 18 | - VBD Verb, past tense 19 | - IN Preposition or subordinating conjunction 20 | - PRP Personal pronoun 21 | - NN Noun, singular or mass 22 | -------------------------------------------------------------------------------- /input.txt: -------------------------------------------------------------------------------- 1 | They were angry because their plans had been discovered. 2 | No, I was not playing cricket. 3 | I can play quite a few musical instruments, for example, the flute, the guitar, and the piano. 4 | He hurt his hand when he fell. 5 | I think he felt included because he was helping as much as we were. 6 | Calcium is found in green leafy vegetables, for example, broccoli, kale, arugula, or spinach have over 160 mg. per serving. 7 | Sachin Tendulkar was awarded Bharat Ratna in 2013. 8 | John was held captive at Castle Black. 9 | I am studying English because I’d like to immigrate to the U.S. 10 | It is ten o’clock. 11 | He will go to China tomorrow. 12 | Children often cry just because they want some attention. 13 | Abdul Kalam was an aerospace scientist who served as the 11th President of India from 2002 to 2007. 14 | Vitamin C is found in colorful vegetables, for instance, bell peppers have a lot of vitamin C. 15 | John is feeling much better now. 16 | His name is Peter. 17 | Tom repainted his mailbox because it was looking shabby. 18 | The Taj Mahal is a beautiful monument built in 1631 by an Emperor named Shah Jahan in memory of his wife Mumtaz Mahal. 19 | Delhi is the capital of India. 20 | It is possible to combine Computer Science with other subjects, for example Physics. 21 | Sun is the largest member of the Solar System. 22 | Mahatma Gandhi was born on 2nd October 1869 in Porbandar, Gujarat. 23 | The British had introduced a National Flag for British India after the revolt of 1857. 24 | Hindi Diwas was first celebrated in the year 1953. 25 | Population refers to the number of individuals in a particular place. 26 | Jawaharlal Nehru was born on 14th November 1889 in Allahabad, Uttar Pradesh. 27 | He was elected as the Prime Minister of India on 15th August 1947. 28 | They slept in the car because they couldn't find a hotel. 29 | The accident happened because of the driver's negligence. 30 | Yes, I like coffee. 31 | Darjeeling is known for its beautiful tea gardens. 32 | Gandhi Jayanti is celebrated on 2nd October. 33 | Every child feels displaced to some degree when a new sibling arrives. 34 | He gave up traveling abroad because of his sudden illness. 35 | I was playing tennis. 36 | We were playing tennis at the club. 37 | I had been playing the drums since school time. 38 | She is preparing chicken sandwiches for breakfast. 39 | They have been trying to contact her. 40 | I shall have been living in Mumbai for five years by May 2019. 41 | Harry was late for class yesterday because of his accident. 42 | I have been up since four. 43 | Fractions can be written with oblique strokes, for example 2/3. 44 | Yes, she is working very hard. 45 | You usually walk to work. 46 | She was angry when you told her about the accident. 47 | I did go for fishing today. 48 | We had a bad rice crop last year because it rained a lot. 49 | They grow really well in pots. 50 | I married her when she was 23. --------------------------------------------------------------------------------