├── AutomaticQuestionGenerator ├── DB │ ├── db.txt │ └── db01.txt ├── aqgFunction.py ├── clause.py ├── identification.py ├── main.py ├── nlpNER.py ├── nonClause.py └── questionValidation.py ├── README.md └── requirements.txt /AutomaticQuestionGenerator/DB/db.txt: -------------------------------------------------------------------------------- 1 | Goldie did not care for jewels or gold. She loved walking barefoot in her simple clothes over grassy fields. She loved the feel of the wind in her hair, roses and bird songs, the light in the sky at dusk and dawn, the scent of wood smoke and lilacs. She often walked alone, wishing that her father would join her and learn to love the world as she did. 2 | -------------------------------------------------------------------------------- /AutomaticQuestionGenerator/aqgFunction.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | import clause 3 | import nonClause 4 | import identification 5 | import questionValidation 6 | from nlpNER import nerTagger 7 | 8 | 9 | class AutomaticQuestionGenerator(): 10 | # AQG Parsing & Generate a question 11 | def aqgParse(self, sentence): 12 | 13 | #nlp = spacy.load("en") 14 | nlp = spacy.load('en_core_web_md') 15 | 16 | singleSentences = sentence.split(".") 17 | questionsList = [] 18 | if len(singleSentences) != 0: 19 | for i in range(len(singleSentences)): 20 | segmentSets = singleSentences[i].split(",") 21 | 22 | ner = nerTagger(nlp, singleSentences[i]) 23 | 24 | if (len(segmentSets)) != 0: 25 | for j in range(len(segmentSets)): 26 | try: 27 | questionsList += clause.howmuch_2(segmentSets, j, ner) 28 | except Exception: 29 | pass 30 | 31 | if identification.clause_identify(segmentSets[j]) == 1: 32 | try: 33 | questionsList += clause.whom_1(segmentSets, j, ner) 34 | except Exception: 35 | pass 36 | try: 37 | questionsList += clause.whom_2(segmentSets, j, ner) 38 | except Exception: 39 | pass 40 | try: 41 | questionsList += clause.whom_3(segmentSets, j, ner) 42 | except Exception: 43 | pass 44 | try: 45 | questionsList += clause.whose(segmentSets, j, ner) 46 | except Exception: 47 | pass 48 | try: 49 | questionsList += clause.what_to_do(segmentSets, j, ner) 50 | except Exception: 51 | pass 52 | try: 53 | questionsList += clause.who(segmentSets, j, ner) 54 | except Exception: 55 | pass 56 | try: 57 | questionsList += clause.howmuch_1(segmentSets, j, ner) 58 | except Exception: 59 | pass 60 | try: 61 | questionsList += clause.howmuch_3(segmentSets, j, ner) 62 | except Exception: 63 | pass 64 | 65 | 66 | else: 67 | try: 68 | s = identification.subjectphrase_search(segmentSets, j) 69 | except Exception: 70 | pass 71 | 72 | if len(s) != 0: 73 | segmentSets[j] = s + segmentSets[j] 74 | try: 75 | questionsList += clause.whom_1(segmentSets, j, ner) 76 | except Exception: 77 | pass 78 | try: 79 | questionsList += clause.whom_2(segmentSets, j, ner) 80 | except Exception: 81 | pass 82 | try: 83 | questionsList += clause.whom_3(segmentSets, j, ner) 84 | except Exception: 85 | pass 86 | try: 87 | questionsList += clause.whose(segmentSets, j, ner) 88 | except Exception: 89 | pass 90 | try: 91 | questionsList += clause.what_to_do(segmentSets, j, ner) 92 | except Exception: 93 | pass 94 | try: 95 | questionsList += clause.who(segmentSets, j, ner) 96 | except Exception: 97 | pass 98 | 99 | else: 100 | try: 101 | questionsList += nonClause.what_whom1(segmentSets, j, ner) 102 | except Exception: 103 | pass 104 | try: 105 | questionsList += nonClause.what_whom2(segmentSets, j, ner) 106 | except Exception: 107 | pass 108 | try: 109 | questionsList += nonClause.whose(segmentSets, j, ner) 110 | except Exception: 111 | pass 112 | try: 113 | questionsList += nonClause.howmany(segmentSets, j, ner) 114 | except Exception: 115 | pass 116 | try: 117 | questionsList += nonClause.howmuch_1(segmentSets, j, ner) 118 | except Exception: 119 | pass 120 | 121 | questionsList.append('\n') 122 | return questionsList 123 | 124 | 125 | 126 | def DisNormal(self, str): 127 | print("\n") 128 | print("------X------") 129 | print("Start output:\n") 130 | 131 | count = 0 132 | out = "" 133 | 134 | for i in range(len(str)): 135 | count = count + 1 136 | print("Q-0%d: %s" % (count, str[i])) 137 | 138 | print("") 139 | print("End OutPut") 140 | print("-----X-----\n\n") 141 | 142 | 143 | # AQG Display the Generated Question 144 | def display(self, str): 145 | print("\n") 146 | print("------X------") 147 | print("Start output:\n") 148 | 149 | count = 0 150 | out = "" 151 | for i in range(len(str)): 152 | if (len(str[i]) >= 3): 153 | if (questionValidation.hNvalidation(str[i]) == 1): 154 | if ((str[i][0] == 'W' and str[i][1] == 'h') or (str[i][0] == 'H' and str[i][1] == 'o') or ( 155 | str[i][0] == 'H' and str[i][1] == 'a')): 156 | WH = str[i].split(',') 157 | if (len(WH) == 1): 158 | str[i] = str[i][:-1] 159 | str[i] = str[i][:-1] 160 | str[i] = str[i][:-1] 161 | str[i] = str[i] + "?" 162 | count = count + 1 163 | 164 | if (count < 10): 165 | print("Q-0%d: %s" % (count, str[i])) 166 | out += "Q-0" + count.__str__() + ": " + str[i] + "\n" 167 | 168 | else: 169 | print("Q-%d: %s" % (count, str[i])) 170 | out += "Q-" + count.__str__() + ": " + str[i] + "\n" 171 | 172 | print("") 173 | print("End OutPut") 174 | print("-----X-----\n\n") 175 | 176 | output = "output file path -- ?? ../DB/output.txt" 177 | w = open(output, 'w+', encoding="utf8") 178 | w.write(out) 179 | w.close() 180 | return 0 181 | -------------------------------------------------------------------------------- /AutomaticQuestionGenerator/clause.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | import identification 3 | import nonClause 4 | 5 | 6 | def whom_1(segment_set, num, ner): 7 | tok = nltk.word_tokenize(segment_set[num]) 8 | tag = nltk.pos_tag(tok) 9 | gram = r"""chunk:{+
?**+}""" 10 | chunkparser = nltk.RegexpParser(gram) 11 | chunked = chunkparser.parse(tag) 12 | 13 | list1 = identification.chunk_search(segment_set[num], chunked) 14 | list3 = [] 15 | 16 | if len(list1) != 0: 17 | for j in range(len(chunked)): 18 | str1 = "" 19 | str2 = "" 20 | str3 = "" 21 | if j in list1: 22 | for k in range(j): 23 | if k in list1: 24 | str1 += nonClause.get_chunk(chunked[k]) 25 | else: 26 | str1 += (chunked[k][0] + " ") 27 | 28 | for k in range(j + 1, len(chunked)): 29 | if k in list1: 30 | str3 += nonClause.get_chunk(chunked[k]) 31 | else: 32 | str3 += (chunked[k][0] + " ") 33 | 34 | if chunked[j][1][1] == 'PRP': 35 | str2 = " to whom " 36 | else: 37 | for x in range(len(chunked[j])): 38 | if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or 39 | chunked[j][x][1] == "NN"): 40 | break 41 | 42 | for x1 in range(len(ner)): 43 | 44 | if ner[x1][0] == chunked[j][x][0]: 45 | if ner[x1][1] == "PERSON": 46 | str2 = " to whom " 47 | elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE": 48 | str2 = " where " 49 | elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE": 50 | str2 = " when " 51 | else: 52 | str2 = "to what " 53 | 54 | tok = nltk.word_tokenize(str1) 55 | tag = nltk.pos_tag(tok) 56 | gram = r"""chunk:{?
?*+*+}""" 57 | chunkparser = nltk.RegexpParser(gram) 58 | chunked1 = chunkparser.parse(tag) 59 | 60 | list2 = identification.chunk_search(str1, chunked1) 61 | if len(list2) != 0: 62 | m = list2[len(list2) - 1] 63 | 64 | str4 = nonClause.get_chunk(chunked1[m]) 65 | str4 = identification.verbphrase_identify(str4) 66 | str5 = "" 67 | str6 = "" 68 | 69 | for k in range(m): 70 | if k in list2: 71 | str5 += nonClause.get_chunk(chunked1[k]) 72 | else: 73 | str5 += (chunked1[k][0] + " ") 74 | 75 | for k in range(m + 1, len(chunked1)): 76 | if k in list2: 77 | str6 += nonClause.get_chunk(chunked1[k]) 78 | else: 79 | str6 += (chunked1[k][0] + " ") 80 | 81 | st = str5 + str2 + str4 + str6 + str3 82 | for l in range(num + 1, len(segment_set)): 83 | st += ("," + segment_set[l]) 84 | st += '?' 85 | st = identification.postprocess(st) 86 | # st = 'Q.' + st 87 | list3.append(st) 88 | 89 | return list3 90 | 91 | 92 | def whom_2(segment_set, num, ner): 93 | tok = nltk.word_tokenize(segment_set[num]) 94 | tag = nltk.pos_tag(tok) 95 | gram = r"""chunk:{+
?**+}""" 96 | chunkparser = nltk.RegexpParser(gram) 97 | chunked = chunkparser.parse(tag) 98 | 99 | list1 = identification.chunk_search(segment_set[num], chunked) 100 | list3 = [] 101 | 102 | if len(list1) != 0: 103 | for j in range(len(chunked)): 104 | str1 = "" 105 | str2 = "" 106 | str3 = "" 107 | if j in list1: 108 | for k in range(j): 109 | if k in list1: 110 | str1 += nonClause.get_chunk(chunked[k]) 111 | else: 112 | str1 += (chunked[k][0] + " ") 113 | 114 | for k in range(j + 1, len(chunked)): 115 | if k in list1: 116 | str3 += nonClause.get_chunk(chunked[k]) 117 | else: 118 | str3 += (chunked[k][0] + " ") 119 | 120 | if chunked[j][1][1] == 'PRP': 121 | str2 = " " + chunked[j][0][0] + " whom " 122 | else: 123 | for x in range(len(chunked[j])): 124 | if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or 125 | chunked[j][x][1] == "NN"): 126 | break 127 | 128 | for x1 in range(len(ner)): 129 | if ner[x1][0] == chunked[j][x][0]: 130 | if ner[x1][1] == "PERSON": 131 | str2 = " " + chunked[j][0][0] + " whom " 132 | elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE": 133 | str2 = " where " 134 | elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE": 135 | str2 = " when " 136 | else: 137 | str2 = " " + chunked[j][0][0] + " what " 138 | 139 | tok = nltk.word_tokenize(str1) 140 | tag = nltk.pos_tag(tok) 141 | gram = r"""chunk:{?
?*+*+}""" 142 | chunkparser = nltk.RegexpParser(gram) 143 | chunked1 = chunkparser.parse(tag) 144 | 145 | list2 = identification.chunk_search(str1, chunked1) 146 | if len(list2) != 0: 147 | m = list2[len(list2) - 1] 148 | 149 | str4 = nonClause.get_chunk(chunked1[m]) 150 | str4 = identification.verbphrase_identify(str4) 151 | str5 = "" 152 | str6 = "" 153 | 154 | for k in range(m): 155 | if k in list2: 156 | str5 += nonClause.get_chunk(chunked1[k]) 157 | else: 158 | str5 += (chunked1[k][0] + " ") 159 | 160 | for k in range(m + 1, len(chunked1)): 161 | if k in list2: 162 | str6 += nonClause.get_chunk(chunked1[k]) 163 | else: 164 | str6 += (chunked1[k][0] + " ") 165 | 166 | st = str5 + str2 + str4 + str6 + str3 167 | for l in range(num + 1, len(segment_set)): 168 | st += ("," + segment_set[l]) 169 | st += '?' 170 | st = identification.postprocess(st) 171 | # st = 'Q.' + st 172 | list3.append(st) 173 | 174 | return list3 175 | 176 | 177 | def whom_3(segment_set, num, ner): 178 | tok = nltk.word_tokenize(segment_set[num]) 179 | tag = nltk.pos_tag(tok) 180 | gram = r"""chunk:{+
?**+}""" 181 | chunkparser = nltk.RegexpParser(gram) 182 | chunked = chunkparser.parse(tag) 183 | 184 | list1 = identification.chunk_search(segment_set[num], chunked) 185 | list3 = [] 186 | 187 | if len(list1) != 0: 188 | for j in range(len(chunked)): 189 | str1 = "" 190 | str2 = "" 191 | str3 = "" 192 | if j in list1: 193 | for k in range(j): 194 | if k in list1: 195 | str1 += nonClause.get_chunk(chunked[k]) 196 | else: 197 | str1 += (chunked[k][0] + " ") 198 | 199 | for k in range(j + 1, len(chunked)): 200 | if k in list1: 201 | str3 += nonClause.get_chunk(chunked[k]) 202 | else: 203 | str3 += (chunked[k][0] + " ") 204 | 205 | if chunked[j][1][1] == 'PRP': 206 | str2 = " whom " 207 | else: 208 | for x in range(len(chunked[j])): 209 | if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or 210 | chunked[j][x][1] == "NN"): 211 | break 212 | 213 | for x1 in range(len(ner)): 214 | if ner[x1][0] == chunked[j][x][0]: 215 | if ner[x1][1] == "PERSON": 216 | str2 = " whom " 217 | elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE": 218 | str2 = " what " 219 | elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE": 220 | str2 = " what time " 221 | else: 222 | str2 = " what " 223 | 224 | strx = nonClause.get_chunk(chunked[j]) 225 | tok = nltk.word_tokenize(strx) 226 | tag = nltk.pos_tag(tok) 227 | gram = r"""chunk:{+}""" 228 | chunkparser = nltk.RegexpParser(gram) 229 | chunked1 = chunkparser.parse(tag) 230 | 231 | strx = nonClause.get_chunk(chunked1[0]) 232 | 233 | str1 += strx 234 | 235 | tok = nltk.word_tokenize(str1) 236 | tag = nltk.pos_tag(tok) 237 | gram = r"""chunk:{?
?*+*+}""" 238 | chunkparser = nltk.RegexpParser(gram) 239 | chunked1 = chunkparser.parse(tag) 240 | 241 | list2 = identification.chunk_search(str1, chunked1) 242 | 243 | if len(list2) != 0: 244 | m = list2[len(list2) - 1] 245 | 246 | str4 = nonClause.get_chunk(chunked1[m]) 247 | str4 = identification.verbphrase_identify(str4) 248 | str5 = "" 249 | str6 = "" 250 | 251 | for k in range(m): 252 | if k in list2: 253 | str5 += nonClause.get_chunk(chunked1[k]) 254 | else: 255 | str5 += (chunked1[k][0] + " ") 256 | 257 | for k in range(m + 1, len(chunked1)): 258 | if k in list2: 259 | str6 += nonClause.get_chunk(chunked1[k]) 260 | else: 261 | str6 += (chunked1[k][0] + " ") 262 | 263 | st = str5 + str2 + str4 + str6 + str3 264 | for l in range(num + 1, len(segment_set)): 265 | st += ("," + segment_set[l]) 266 | st += '?' 267 | st = identification.postprocess(st) 268 | # st = 'Q.' + st 269 | list3.append(st) 270 | 271 | return list3 272 | 273 | 274 | def whose(segment_set, num, ner): 275 | tok = nltk.word_tokenize(segment_set[num]) 276 | tag = nltk.pos_tag(tok) 277 | gram = r"""chunk:{*+**+*+}""" 278 | chunkparser = nltk.RegexpParser(gram) 279 | chunked = chunkparser.parse(tag) 280 | 281 | list1 = identification.chunk_search(segment_set[num], chunked) 282 | list3 = [] 283 | 284 | if len(list1) != 0: 285 | for i in range(len(chunked)): 286 | if i in list1: 287 | str1 = "" 288 | str3 = "" 289 | str2 = "" 290 | for k in range(i): 291 | if k in list1: 292 | str1 += nonClause.get_chunk(chunked[k]) 293 | else: 294 | str1 += (chunked[k][0] + " ") 295 | str1 += " whose " 296 | 297 | for k in range(i + 1, len(chunked)): 298 | if k in list1: 299 | str3 += nonClause.get_chunk(chunked[k]) 300 | else: 301 | str3 += (chunked[k][0] + " ") 302 | 303 | if chunked[i][1][1] == 'POS': 304 | for k in range(2, len(chunked[i])): 305 | str2 += (chunked[i][k][0] + " ") 306 | 307 | if chunked[i][0][1] == 'PRP$': 308 | for k in range(1, len(chunked[i])): 309 | str2 += (chunked[i][k][0] + " ") 310 | 311 | str2 = str1 + str2 + str3 312 | str4 = "" 313 | 314 | for l in range(0, len(segment_set)): 315 | if l < num: 316 | str4 += (segment_set[l] + ",") 317 | if l > num: 318 | str2 += ("," + segment_set[l]) 319 | str2 = str4 + str2 320 | str2 += '?' 321 | str2 = identification.postprocess(str2) 322 | # str2 = 'Q.' + str2 323 | list3.append(str2) 324 | 325 | return list3 326 | 327 | 328 | def what_to_do(segment_set, num, ner): 329 | tok = nltk.word_tokenize(segment_set[num]) 330 | tag = nltk.pos_tag(tok) 331 | gram = r"""chunk:{++
?***}""" 332 | chunkparser = nltk.RegexpParser(gram) 333 | chunked = chunkparser.parse(tag) 334 | 335 | list1 = identification.chunk_search(segment_set[num], chunked) 336 | list3 = [] 337 | 338 | if len(list1) != 0: 339 | for j in range(len(chunked)): 340 | str1 = "" 341 | str2 = "" 342 | str3 = "" 343 | if j in list1: 344 | for k in range(j): 345 | if k in list1: 346 | str1 += nonClause.get_chunk(chunked[k]) 347 | else: 348 | str1 += (chunked[k][0] + " ") 349 | 350 | for k in range(j + 1, len(chunked)): 351 | if k in list1: 352 | str3 += nonClause.get_chunk(chunked[k]) 353 | else: 354 | str3 += (chunked[k][0] + " ") 355 | 356 | ls = nonClause.get_chunk(chunked[j]) 357 | tok = nltk.word_tokenize(ls) 358 | tag = nltk.pos_tag(tok) 359 | gram = r"""chunk:{
?**+}""" 360 | chunkparser = nltk.RegexpParser(gram) 361 | chunked2 = chunkparser.parse(tag) 362 | lis = identification.chunk_search(ls, chunked2) 363 | if len(lis) != 0: 364 | x = lis[len(lis) - 1] 365 | ls1 = nonClause.get_chunk(chunked2[x]) 366 | index = ls.find(ls1) 367 | str2 = " " + ls[0:index] 368 | else: 369 | str2 = " to do " 370 | 371 | tok = nltk.word_tokenize(str1) 372 | tag = nltk.pos_tag(tok) 373 | gram = r"""chunk:{?
?*+*+}""" 374 | chunkparser = nltk.RegexpParser(gram) 375 | chunked1 = chunkparser.parse(tag) 376 | 377 | list2 = identification.chunk_search(str1, chunked1) 378 | if len(list2) != 0: 379 | m = list2[len(list2) - 1] 380 | 381 | str4 = nonClause.get_chunk(chunked1[m]) 382 | str4 = identification.verbphrase_identify(str4) 383 | str5 = "" 384 | str6 = "" 385 | 386 | for k in range(m): 387 | if k in list2: 388 | str5 += nonClause.get_chunk(chunked1[k]) 389 | else: 390 | str5 += (chunked1[k][0] + " ") 391 | 392 | for k in range(m + 1, len(chunked1)): 393 | if k in list2: 394 | str6 += nonClause.get_chunk(chunked1[k]) 395 | else: 396 | str6 += (chunked1[k][0] + " ") 397 | 398 | if chunked2[j][1][1] == 'PRP': 399 | tr = " whom " 400 | else: 401 | for x in range(len(chunked[j])): 402 | if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or 403 | chunked[j][x][1] == "NN"): 404 | break 405 | 406 | for x1 in range(len(ner)): 407 | if ner[x1][0] == chunked[j][x][0]: 408 | if ner[x1][1] == "PERSON": 409 | tr = " whom " 410 | elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE": 411 | tr = " where " 412 | elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE": 413 | tr = " when " 414 | else: 415 | tr = " what " 416 | 417 | st = str5 + tr + str4 + str2 + str6 + str3 418 | for l in range(num + 1, len(segment_set)): 419 | st += ("," + segment_set[l]) 420 | st += '?' 421 | st = identification.postprocess(st) 422 | # st = 'Q.' + st 423 | list3.append(st) 424 | 425 | return list3 426 | 427 | 428 | def who(segment_set, num, ner): 429 | tok = nltk.word_tokenize(segment_set[num]) 430 | tag = nltk.pos_tag(tok) 431 | gram = r"""chunk:{?
?*+*+}""" 432 | chunkparser = nltk.RegexpParser(gram) 433 | chunked = chunkparser.parse(tag) 434 | 435 | list1 = identification.chunk_search(segment_set[num], chunked) 436 | list3 = [] 437 | 438 | if len(list1) != 0: 439 | for j in range(len(list1)): 440 | m = list1[j] 441 | str1 = "" 442 | for k in range(m + 1, len(chunked)): 443 | if k in list1: 444 | str1 += nonClause.get_chunk(chunked[k]) 445 | else: 446 | str1 += (chunked[k][0] + " ") 447 | 448 | str2 = nonClause.get_chunk(chunked[m]) 449 | tok = nltk.word_tokenize(str2) 450 | tag = nltk.pos_tag(tok) 451 | 452 | for m11 in range(len(tag)): 453 | if tag[m11][1] == 'NNP' or tag[m11][1] == 'NNPS' or tag[m11][1] == 'NNS' or tag[m11][1] == 'NN': 454 | break 455 | s11 = ' who ' 456 | for m12 in range(len(ner)): 457 | if ner[m12][0] == tag[m11][0]: 458 | if ner[m12][1] == 'LOC': 459 | s11 = ' which place ' 460 | elif ner[m12][1] == 'ORG': 461 | s11 = ' who ' 462 | elif ner[m12][1] == 'DATE' or ner[m12][1] == 'TIME': 463 | s11 = ' what time ' 464 | else: 465 | s11 = ' who ' 466 | 467 | gram = r"""chunk:{*+}""" 468 | chunkparser = nltk.RegexpParser(gram) 469 | chunked1 = chunkparser.parse(tag) 470 | 471 | list2 = identification.chunk_search(str2, chunked1) 472 | if len(list2) != 0: 473 | str2 = nonClause.get_chunk(chunked1[list2[0]]) 474 | str2 = s11 + str2 475 | for k in range(list2[0] + 1, len(chunked1)): 476 | if k in list2: 477 | str2 += nonClause.get_chunk(chunked[k]) 478 | else: 479 | str2 += (chunked[k][0] + " ") 480 | str2 += (" " + str1) 481 | 482 | tok_1 = nltk.word_tokenize(str2) 483 | str2 = "" 484 | for h in range(len(tok_1)): 485 | if tok_1[h] == "am": 486 | str2 += " is " 487 | else: 488 | str2 += (tok_1[h] + " ") 489 | 490 | for l in range(num + 1, len(segment_set)): 491 | str2 += ("," + segment_set[l]) 492 | str2 += '?' 493 | 494 | str2 = identification.postprocess(str2) 495 | # str2 = 'Q.' + str2 496 | list3.append(str2) 497 | 498 | return list3 499 | 500 | 501 | def howmuch_2(segment_set, num, ner): 502 | tok = nltk.word_tokenize(segment_set[num]) 503 | tag = nltk.pos_tag(tok) 504 | gram = r"""chunk:{<\$>*+?+}""" 505 | chunkparser = nltk.RegexpParser(gram) 506 | chunked = chunkparser.parse(tag) 507 | 508 | list1 = identification.chunk_search(segment_set[num], chunked) 509 | list3 = [] 510 | 511 | if len(list1) != 0: 512 | for j in range(len(list1)): 513 | m = list1[j] 514 | str1 = "" 515 | for k in range(m + 1, len(chunked)): 516 | if k in list1: 517 | str1 += nonClause.get_chunk(chunked[k]) 518 | else: 519 | str1 += (chunked[k][0] + " ") 520 | 521 | str2 = nonClause.get_chunk(chunked[m]) 522 | tok = nltk.word_tokenize(str2) 523 | tag = nltk.pos_tag(tok) 524 | gram = r"""chunk:{*+}""" 525 | chunkparser = nltk.RegexpParser(gram) 526 | chunked1 = chunkparser.parse(tag) 527 | s11 = ' how much ' 528 | 529 | list2 = identification.chunk_search(str2, chunked1) 530 | if len(list2) != 0: 531 | str2 = nonClause.get_chunk(chunked1[list2[0]]) 532 | str2 = s11 + str2 533 | for k in range(list2[0] + 1, len(chunked1)): 534 | if k in list2: 535 | str2 += nonClause.get_chunk(chunked[k]) 536 | else: 537 | str2 += (chunked[k][0] + " ") 538 | str2 += (" " + str1) 539 | 540 | tok_1 = nltk.word_tokenize(str2) 541 | str2 = "" 542 | for h in range(len(tok_1)): 543 | if tok_1[h] == "am": 544 | str2 += " is " 545 | else: 546 | str2 += (tok_1[h] + " ") 547 | 548 | for l in range(num + 1, len(segment_set)): 549 | str2 += ("," + segment_set[l]) 550 | str2 += '?' 551 | 552 | str2 = identification.postprocess(str2) 553 | # str2 = 'Q.' + str2 554 | list3.append(str2) 555 | 556 | return list3 557 | 558 | 559 | def howmuch_1(segment_set, num, ner): 560 | tok = nltk.word_tokenize(segment_set[num]) 561 | tag = nltk.pos_tag(tok) 562 | gram = r"""chunk:{+<\$>?+}""" 563 | chunkparser = nltk.RegexpParser(gram) 564 | chunked = chunkparser.parse(tag) 565 | 566 | list1 = identification.chunk_search(segment_set[num], chunked) 567 | list3 = [] 568 | 569 | if len(list1) != 0: 570 | for j in range(len(chunked)): 571 | str1 = "" 572 | str2 = "" 573 | str3 = "" 574 | if j in list1: 575 | for k in range(j): 576 | if k in list1: 577 | str1 += nonClause.get_chunk(chunked[k]) 578 | else: 579 | str1 += (chunked[k][0] + " ") 580 | 581 | for k in range(j + 1, len(chunked)): 582 | if k in list1: 583 | str3 += nonClause.get_chunk(chunked[k]) 584 | else: 585 | str3 += (chunked[k][0] + " ") 586 | 587 | str2 = ' ' + chunked[j][0][0] + ' how much ' 588 | 589 | tok = nltk.word_tokenize(str1) 590 | tag = nltk.pos_tag(tok) 591 | gram = r"""chunk:{?
?*+*+}""" 592 | chunkparser = nltk.RegexpParser(gram) 593 | chunked1 = chunkparser.parse(tag) 594 | 595 | list2 = identification.chunk_search(str1, chunked1) 596 | if len(list2) != 0: 597 | m = list2[len(list2) - 1] 598 | 599 | str4 = nonClause.get_chunk(chunked1[m]) 600 | str4 = identification.verbphrase_identify(str4) 601 | str5 = "" 602 | str6 = "" 603 | 604 | for k in range(m): 605 | if k in list2: 606 | str5 += nonClause.get_chunk(chunked1[k]) 607 | else: 608 | str5 += (chunked1[k][0] + " ") 609 | 610 | for k in range(m + 1, len(chunked1)): 611 | if k in list2: 612 | str6 += nonClause.get_chunk(chunked1[k]) 613 | else: 614 | str6 += (chunked1[k][0] + " ") 615 | 616 | st = str5 + str2 + str4 + str6 + str3 617 | for l in range(num + 1, len(segment_set)): 618 | st += ("," + segment_set[l]) 619 | st += '?' 620 | st = identification.postprocess(st) 621 | # st = 'Q.' + st 622 | list3.append(st) 623 | 624 | return list3 625 | 626 | 627 | def howmuch_3(segment_set, num, ner): 628 | tok = nltk.word_tokenize(segment_set[num]) 629 | tag = nltk.pos_tag(tok) 630 | gram = r"""chunk:{?+??<\$>*+}""" 631 | chunkparser = nltk.RegexpParser(gram) 632 | chunked = chunkparser.parse(tag) 633 | 634 | list1 = identification.chunk_search(segment_set[num], chunked) 635 | list3 = [] 636 | 637 | if len(list1) != 0: 638 | for j in range(len(chunked)): 639 | str1 = "" 640 | str2 = "" 641 | str3 = "" 642 | if j in list1: 643 | for k in range(j): 644 | if k in list1: 645 | str1 += nonClause.get_chunk(chunked[k]) 646 | else: 647 | str1 += (chunked[k][0] + " ") 648 | 649 | for k in range(j + 1, len(chunked)): 650 | if k in list1: 651 | str3 += nonClause.get_chunk(chunked[k]) 652 | else: 653 | str3 += (chunked[k][0] + " ") 654 | 655 | strx = nonClause.get_chunk(chunked[j]) 656 | tok = nltk.word_tokenize(strx) 657 | tag = nltk.pos_tag(tok) 658 | gram = r"""chunk:{?+??}""" 659 | chunkparser = nltk.RegexpParser(gram) 660 | chunked1 = chunkparser.parse(tag) 661 | 662 | strx = nonClause.get_chunk(chunked1[0]) 663 | str1 += (" " + strx) 664 | 665 | str2 = ' how much ' 666 | 667 | tok = nltk.word_tokenize(str1) 668 | tag = nltk.pos_tag(tok) 669 | gram = r"""chunk:{?
?*+*+}""" 670 | chunkparser = nltk.RegexpParser(gram) 671 | chunked1 = chunkparser.parse(tag) 672 | 673 | list2 = identification.chunk_search(str1, chunked1) 674 | 675 | if len(list2) != 0: 676 | m = list2[len(list2) - 1] 677 | 678 | str4 = nonClause.get_chunk(chunked1[m]) 679 | str4 = identification.verbphrase_identify(str4) 680 | str5 = "" 681 | str6 = "" 682 | 683 | for k in range(m): 684 | if k in list2: 685 | str5 += nonClause.get_chunk(chunked1[k]) 686 | else: 687 | str5 += (chunked1[k][0] + " ") 688 | 689 | for k in range(m + 1, len(chunked1)): 690 | if k in list2: 691 | str6 += nonClause.get_chunk(chunked1[k]) 692 | else: 693 | str6 += (chunked1[k][0] + " ") 694 | 695 | st = str5 + str2 + str4 + str6 + str3 696 | 697 | for l in range(num + 1, len(segment_set)): 698 | st += ("," + segment_set[l]) 699 | st += '?' 700 | st = identification.postprocess(st) 701 | # st = 'Q.' + st 702 | list3.append(st) 703 | 704 | return list3 705 | -------------------------------------------------------------------------------- /AutomaticQuestionGenerator/identification.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | 3 | 4 | def chunk_search(segment, chunked): 5 | m = len(chunked) 6 | list1 = [] 7 | for j in range(m): 8 | if (len(chunked[j]) > 2 or len(chunked[j]) == 1): 9 | list1.append(j) 10 | if (len(chunked[j]) == 2): 11 | try: 12 | str1 = chunked[j][0][0] + " " + chunked[j][1][0] 13 | except Exception: 14 | pass 15 | else: 16 | if (str1 in segment) == True: 17 | list1.append(j) 18 | return list1 19 | 20 | def segment_identify(sen): 21 | segment_set = sen.split(",") 22 | return segment_set 23 | 24 | 25 | def clause_identify(segment): 26 | tok = nltk.word_tokenize(segment) 27 | tag = nltk.pos_tag(tok) 28 | gram = r"""chunk:{?
?*++}""" 29 | chunkparser = nltk.RegexpParser(gram) 30 | chunked = chunkparser.parse(tag) 31 | 32 | flag = 0 33 | for j in range(len(chunked)): 34 | if (len(chunked[j]) > 2): 35 | flag = 1 36 | if (len(chunked[j]) == 2): 37 | try: 38 | str1 = chunked[j][0][0] + " " + chunked[j][1][0] 39 | except Exception: 40 | pass 41 | else: 42 | if (str1 in segment) == True: 43 | flag = 1 44 | if flag == 1: 45 | break 46 | 47 | return flag 48 | 49 | 50 | def verbphrase_identify(clause): 51 | tok = nltk.word_tokenize(clause) 52 | tag = nltk.pos_tag(tok) 53 | gram = r"""chunk:{?
?*+*+}""" 54 | chunkparser = nltk.RegexpParser(gram) 55 | chunked = chunkparser.parse(tag) 56 | str1 = "" 57 | str2 = "" 58 | str3 = "" 59 | list1 = chunk_search(clause, chunked) 60 | if len(list1) != 0: 61 | m = list1[len(list1) - 1] 62 | for j in range(len(chunked[m])): 63 | str1 += chunked[m][j][0] 64 | str1 += " " 65 | 66 | tok1 = nltk.word_tokenize(str1) 67 | tag1 = nltk.pos_tag(tok1) 68 | gram1 = r"""chunk:{?
?*+*}""" 69 | chunkparser1 = nltk.RegexpParser(gram1) 70 | chunked1 = chunkparser1.parse(tag1) 71 | 72 | list2 = chunk_search(str1, chunked1) 73 | if len(list2) != 0: 74 | 75 | m = list2[0] 76 | for j in range(len(chunked1[m])): 77 | str2 += (chunked1[m][j][0] + " ") 78 | 79 | tok1 = nltk.word_tokenize(str1) 80 | tag1 = nltk.pos_tag(tok1) 81 | gram1 = r"""chunk:{+}""" 82 | chunkparser1 = nltk.RegexpParser(gram1) 83 | chunked2 = chunkparser1.parse(tag1) 84 | 85 | list3 = chunk_search(str1, chunked2) 86 | if len(list3) != 0: 87 | 88 | m = list3[0] 89 | for j in range(len(chunked2[m])): 90 | str3 += (chunked2[m][j][0] + " ") 91 | 92 | X = "" 93 | str4 = "" 94 | st = nltk.word_tokenize(str3) 95 | if len(st) > 1: 96 | X = st[0] 97 | s = "" 98 | for k in range(1, len(st)): 99 | s += st[k] 100 | s += " " 101 | str3 = s 102 | str4 = X + " " + str2 + str3 103 | 104 | if len(st) == 1: 105 | tag1 = nltk.pos_tag(st) 106 | if tag1[0][0] != 'are' and tag1[0][0] != 'were' and tag1[0][0] != 'is' and tag1[0][0] != 'am': 107 | if tag1[0][1] == 'VB' or tag1[0][1] == 'VBP': 108 | X = 'do' 109 | if tag1[0][1] == 'VBD' or tag1[0][1] == 'VBN': 110 | X = 'did' 111 | if tag1[0][1] == 'VBZ': 112 | X = 'does' 113 | str4 = X + " " + str2 + str3 114 | if (tag1[0][0] == 'are' or tag1[0][0] == 'were' or tag1[0][0] == 'is' or tag1[0][0] == 'am'): 115 | str4 = tag1[0][0] + " " + str2 116 | 117 | return str4 118 | 119 | 120 | def subjectphrase_search(segment_set, num): 121 | str2 = "" 122 | for j in range(num - 1, 0, -1): 123 | str1 = "" 124 | flag = 0 125 | tok = nltk.word_tokenize(segment_set[j]) 126 | tag = nltk.pos_tag(tok) 127 | gram = r"""chunk:{?
?*+*+}""" 128 | chunkparser = nltk.RegexpParser(gram) 129 | chunked = chunkparser.parse(tag) 130 | 131 | list1 = chunk_search(segment_set[j], chunked) 132 | if len(list1) != 0: 133 | m = list1[len(list1) - 1] 134 | for j in range(len(chunked[m])): 135 | str1 += chunked[m][j][0] 136 | str1 += " " 137 | 138 | tok1 = nltk.word_tokenize(str1) 139 | tag1 = nltk.pos_tag(tok1) 140 | gram1 = r"""chunk:{?
?*+}""" 141 | chunkparser1 = nltk.RegexpParser(gram1) 142 | chunked1 = chunkparser1.parse(tag1) 143 | 144 | list2 = chunk_search(str1, chunked1) 145 | if len(list2) != 0: 146 | m = list2[len(list2) - 1] 147 | for j in range(len(chunked1[m])): 148 | str2 += (chunked1[m][j][0] + " ") 149 | flag = 1 150 | 151 | if flag == 0: 152 | tok1 = nltk.word_tokenize(segment_set[j]) 153 | tag1 = nltk.pos_tag(tok1) 154 | gram1 = r"""chunk:{?
?*+}""" 155 | chunkparser1 = nltk.RegexpParser(gram1) 156 | chunked1 = chunkparser1.parse(tag1) 157 | 158 | list2 = chunk_search(str1, chunked1) 159 | st = nltk.word_tokenize(segment_set[j]) 160 | if len(chunked1[list2[0]]) == len(st): 161 | str2 = segment_set[j] 162 | flag = 1 163 | 164 | if flag == 1: 165 | break 166 | 167 | return str2 168 | 169 | 170 | def postprocess(string): 171 | tok = nltk.word_tokenize(string) 172 | tag = nltk.pos_tag(tok) 173 | 174 | str1 = tok[0].capitalize() 175 | str1 += " " 176 | if len(tok) != 0: 177 | for i in range(1, len(tok)): 178 | if tag[i][1] == "NNP": 179 | str1 += tok[i].capitalize() 180 | str1 += " " 181 | else: 182 | str1 += tok[i].lower() 183 | str1 += " " 184 | tok = nltk.word_tokenize(str1) 185 | str1 = "" 186 | for i in range(len(tok)): 187 | if tok[i] == "i" or tok[i] == "we": 188 | str1 += "you" 189 | str1 += " " 190 | elif tok[i] == "my" or tok[i] == "our": 191 | str1 += "your" 192 | str1 += " " 193 | elif tok[i] == "your": 194 | str1 += "my" 195 | str1 += " " 196 | elif tok[i] == "you": 197 | if i - 1 >= 0: 198 | to = nltk.word_tokenize(tok[i - 1]) 199 | ta = nltk.pos_tag(to) 200 | # print ta 201 | if ta[0][1] == 'IN': 202 | str1 += "me" 203 | str1 += " " 204 | else: 205 | str1 += "i" 206 | str1 += " " 207 | else: 208 | str1 += "i " 209 | 210 | elif tok[i] == "am": 211 | str1 += "are" 212 | str1 += " " 213 | else: 214 | str1 += tok[i] 215 | str1 += " " 216 | 217 | return str1 218 | 219 | -------------------------------------------------------------------------------- /AutomaticQuestionGenerator/main.py: -------------------------------------------------------------------------------- 1 | import aqgFunction 2 | 3 | 4 | # Main Function 5 | def main(): 6 | # Create AQG object 7 | aqg = aqgFunction.AutomaticQuestionGenerator() 8 | 9 | inputTextPath = "input file path -- ?? ../DB/db.txt" 10 | readFile = open(inputTextPath, 'r+', encoding="utf8") 11 | #readFile = open(inputTextPath, 'r+', encoding="utf8", errors = 'ignore') 12 | 13 | inputText = readFile.read() 14 | #inputText = '''I am Dipta. I love codding. I build my carrier with this.''' 15 | 16 | questionList = aqg.aqgParse(inputText) 17 | aqg.display(questionList) 18 | 19 | #aqg.DisNormal(questionList) 20 | 21 | return 0 22 | 23 | 24 | # Call Main Function 25 | if __name__ == "__main__": 26 | main() 27 | 28 | -------------------------------------------------------------------------------- /AutomaticQuestionGenerator/nlpNER.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | 3 | 4 | def nerTagger(nlp, tokenize): 5 | doc = nlp(tokenize) 6 | 7 | finalList = [] 8 | array = [[]] 9 | for word in doc: 10 | array[0] = 0 11 | for ner in doc.ents: 12 | if (ner.text == word.text): 13 | finalList.append((word.text, ner.label_)) 14 | array[0] = 1 15 | if (array[0] == 0): 16 | finalList.append((word.text, 'O')) 17 | 18 | return finalList 19 | 20 | -------------------------------------------------------------------------------- /AutomaticQuestionGenerator/nonClause.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | import identification 3 | 4 | 5 | def get_chunk(chunked): 6 | str1 = "" 7 | for j in range(len(chunked)): 8 | str1 += (chunked[j][0] + " ") 9 | return str1 10 | 11 | def what_whom1(segment_set, num, ner): 12 | tok = nltk.word_tokenize(segment_set[num]) 13 | tag = nltk.pos_tag(tok) 14 | gram = r"""chunk:{+
?**+}""" 15 | chunkparser = nltk.RegexpParser(gram) 16 | chunked = chunkparser.parse(tag) 17 | 18 | list1 = identification.chunk_search(segment_set[num], chunked) 19 | s = [] 20 | 21 | if len(list1) != 0: 22 | for j in range(len(chunked)): 23 | str1 = "" 24 | str3 = "" 25 | if j in list1: 26 | for k in range(j): 27 | if k in list1: 28 | str1 += get_chunk(chunked[k]) 29 | else: 30 | str1 += (chunked[k][0] + " ") 31 | for k in range(j + 1, len(chunked)): 32 | if k in list1: 33 | str3 += get_chunk(chunked[k]) 34 | else: 35 | str3 += (chunked[k][0] + " ") 36 | 37 | if chunked[j][1][1] == 'PRP': 38 | str2 = "to whom " 39 | else: 40 | for x in range(len(chunked[j])): 41 | if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or 42 | chunked[j][x][1] == "NN"): 43 | break 44 | 45 | for x1 in range(len(ner)): 46 | if ner[x1][0] == chunked[j][x][0]: 47 | if ner[x1][1] == "PERSON": 48 | str2 = " to whom " 49 | elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE": 50 | str2 = " where " 51 | elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE": 52 | str2 = " when " 53 | else: 54 | str2 = "to what" 55 | 56 | str4 = str1 + str2 + str3 57 | for k in range(len(segment_set)): 58 | if k != num: 59 | str4 += ("," + segment_set[k]) 60 | str4 += '?' 61 | str4 = identification.postprocess(str4) 62 | # str4 = 'Q.' + str4 63 | s.append(str4) 64 | return s 65 | 66 | 67 | def what_whom2(segment_set, num, ner): 68 | tok = nltk.word_tokenize(segment_set[num]) 69 | tag = nltk.pos_tag(tok) 70 | gram = r"""chunk:{+
?**+}""" 71 | chunkparser = nltk.RegexpParser(gram) 72 | chunked = chunkparser.parse(tag) 73 | list1 = identification.chunk_search(segment_set[num], chunked) 74 | s = [] 75 | 76 | if len(list1) != 0: 77 | for j in range(len(chunked)): 78 | str1 = "" 79 | str3 = "" 80 | if j in list1: 81 | for k in range(j): 82 | if k in list1: 83 | str1 += get_chunk(chunked[k]) 84 | else: 85 | str1 += (chunked[k][0] + " ") 86 | for k in range(j + 1, len(chunked)): 87 | if k in list1: 88 | str3 += get_chunk(chunked[k]) 89 | else: 90 | str3 += (chunked[k][0] + " ") 91 | 92 | if chunked[j][1][1] == 'PRP': 93 | str2 = " " + chunked[j][0][0] + " whom " 94 | else: 95 | for x in range(len(chunked[j])): 96 | if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or 97 | chunked[j][x][1] == "NN"): 98 | break 99 | 100 | for x1 in range(len(ner)): 101 | if ner[x1][0] == chunked[j][x][0]: 102 | if ner[x1][1] == "PERSON": 103 | str2 = " " + chunked[j][0][0] + "whom " 104 | elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE": 105 | str2 = " where " 106 | elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE": 107 | str2 = " when " 108 | else: 109 | str2 = " " + chunked[j][0][0] + " what" 110 | 111 | str4 = str1 + str2 + str3 112 | for k in range(len(segment_set)): 113 | if k != num: 114 | str4 += ("," + segment_set[k]) 115 | str4 += '?' 116 | str4 = identification.postprocess(str4) 117 | # str4 = 'Q.' + str4 118 | s.append(str4) 119 | return s 120 | 121 | 122 | def whose(segment_set, num, ner): 123 | tok = nltk.word_tokenize(segment_set[num]) 124 | tag = nltk.pos_tag(tok) 125 | gram = r"""chunk:{*+**+}""" 126 | chunkparser = nltk.RegexpParser(gram) 127 | chunked = chunkparser.parse(tag) 128 | 129 | list1 = identification.chunk_search(segment_set[num], chunked) 130 | s = [] 131 | 132 | if len(list1) != 0: 133 | for j in range(len(chunked)): 134 | str1 = "" 135 | str3 = "" 136 | str2 = " whose " 137 | if j in list1: 138 | for k in range(j): 139 | if k in list1: 140 | str1 += get_chunk(chunked[k]) 141 | else: 142 | str1 += (chunked[k][0] + " ") 143 | for k in range(j + 1, len(chunked)): 144 | if k in list1: 145 | str3 += get_chunk(chunked[k]) 146 | else: 147 | str3 += (chunked[k][0] + " ") 148 | if chunked[j][1][1] == 'POS': 149 | for k in range(2, len(chunked[j])): 150 | str2 += (chunked[j][k][0] + " ") 151 | else: 152 | for k in range(1, len(chunked[j])): 153 | str2 += (chunked[j][k][0] + " ") 154 | 155 | str4 = str1 + str2 + str3 156 | for k in range(len(segment_set)): 157 | if k != num: 158 | str4 += ("," + segment_set[k]) 159 | str4 += '?' 160 | str4 = identification.postprocess(str4) 161 | # str4 = 'Q.' + str4 162 | s.append(str4) 163 | return s 164 | 165 | 166 | def howmany(segment_set, num, ner): 167 | tok = nltk.word_tokenize(segment_set[num]) 168 | tag = nltk.pos_tag(tok) 169 | gram = r"""chunk:{
?+??+}""" 170 | chunkparser = nltk.RegexpParser(gram) 171 | chunked = chunkparser.parse(tag) 172 | 173 | list1 = identification.chunk_search(segment_set[num], chunked) 174 | s = [] 175 | 176 | if len(list1) != 0: 177 | for j in range(len(chunked)): 178 | str1 = "" 179 | str3 = "" 180 | str2 = " how many " 181 | if j in list1: 182 | for k in range(j): 183 | if k in list1: 184 | str1 += get_chunk(chunked[k]) 185 | else: 186 | str1 += (chunked[k][0] + " ") 187 | for k in range(j + 1, len(chunked)): 188 | if k in list1: 189 | str3 += get_chunk(chunked[k]) 190 | else: 191 | str3 += (chunked[k][0] + " ") 192 | 193 | st = get_chunk(chunked[j]) 194 | tok = nltk.word_tokenize(st) 195 | tag = nltk.pos_tag(tok) 196 | gram = r"""chunk:{??+}""" 197 | chunkparser = nltk.RegexpParser(gram) 198 | chunked1 = chunkparser.parse(tag) 199 | 200 | list2 = identification.chunk_search(st, chunked1) 201 | z = "" 202 | 203 | for k in range(len(chunked1)): 204 | if k in list2: 205 | z += get_chunk(chunked1[k]) 206 | 207 | str4 = str1 + str2 + z + str3 208 | for k in range(len(segment_set)): 209 | if k != num: 210 | str4 += ("," + segment_set[k]) 211 | str4 += '?' 212 | str4 = identification.postprocess(str4) 213 | # str4 = 'Q.' + str4 214 | s.append(str4) 215 | return s 216 | 217 | 218 | def howmuch_1(segment_set, num, ner): 219 | tok = nltk.word_tokenize(segment_set[num]) 220 | tag = nltk.pos_tag(tok) 221 | gram = r"""chunk:{+<\$>?+}""" 222 | chunkparser = nltk.RegexpParser(gram) 223 | chunked = chunkparser.parse(tag) 224 | 225 | list1 = identification.chunk_search(segment_set[num], chunked) 226 | s = [] 227 | 228 | if len(list1) != 0: 229 | for j in range(len(chunked)): 230 | str1 = "" 231 | str3 = "" 232 | str2 = " how much " 233 | if j in list1: 234 | for k in range(j): 235 | if k in list1: 236 | str1 += get_chunk(chunked[k]) 237 | else: 238 | str1 += (chunked[k][0] + " ") 239 | for k in range(j + 1, len(chunked)): 240 | if k in list1: 241 | str3 += get_chunk(chunked[k]) 242 | else: 243 | str3 += (chunked[k][0] + " ") 244 | 245 | str2 = chunked[j][0][0] + str2 246 | str4 = str1 + str2 + str3 247 | for k in range(len(segment_set)): 248 | if k != num: 249 | str4 += ("," + segment_set[k]) 250 | str4 += '?' 251 | str4 = identification.postprocess(str4) 252 | # str4 = 'Q.' + str4 253 | s.append(str4) 254 | return s 255 | 256 | -------------------------------------------------------------------------------- /AutomaticQuestionGenerator/questionValidation.py: -------------------------------------------------------------------------------- 1 | # Question Validation 2 | 3 | 4 | def hNvalidation(sentence): 5 | flag = 1 6 | 7 | Length = len(sentence) 8 | if (Length > 4): 9 | for i in range(Length): 10 | if (i+4 < Length): 11 | if (sentence[i]==' ' and sentence[i+1]=='h' and sentence[i+2]==' ' and sentence[i+3]=='N' and sentence[i+4]==' '): 12 | flag = 0 13 | 14 | 15 | return flag 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Automatic Question Generator 2 | Automatic Question Generator from Text 3 | 4 | 5 | Prerequisites 6 | ------------- 7 | ``` 8 | - Python 3.5+ 9 | - NLTK 10 | - SpaCy 11 | - NumPy 12 | ``` 13 | 14 | ### Quickstart (main.py) 15 | 16 | #### run user input: 17 | ```python 18 | inputText = '''My best friend and I have been studying in the same 19 | school since kindergarten.''' 20 | ``` 21 | 22 | #### run from user text file: 23 | ```python 24 | inputText = filePATH 25 | 26 | # Like: 27 | # inputText = "E:/EDU/project/input.txt" 28 | ``` 29 | 30 | 31 | ### Example 32 | 33 | #### input: 34 | ``` 35 | My best friend and I have been studying in the same school since kindergarten. We have been classmates each year at 36 | school. We share a very close bond and have a special friendship that we cherish and treasure. My friend is my 37 | partner, sitting beside me in class. She is kindly and helpful, and if I have any difficulties in understanding any 38 | topic in my studies, or in completing my homework or school project, she helps me. She is brilliant in mathematics 39 | and the sciences, while I am good at English. So we both help each other in whatever way possible. She helps me 40 | without ever belittling me. I greatly appreciate the quality in her. She does not make me feel obliged. 41 | ``` 42 | 43 | #### output: 44 | ``` 45 | Q-01: Have you been classmates each year? 46 | Q-02: Have you been at school? 47 | Q-03: Who have been classmates each year at school? 48 | Q-04: Who cherish and treasure? 49 | Q-05: Who helps me? 50 | Q-06: Who is good at English? 51 | Q-07: Who helps me without ever belittling me? 52 | Q-08: Who ever belittling me? 53 | Q-09: Who greatly appreciate the quality in her? 54 | Q-10: Whom she does not make feel obliged? 55 | ``` 56 | 57 | ## SETTINGS OF MAIN FUNCTION: 58 | 59 | ### main.py 60 | ```python 61 | # Main Function 62 | def main(): 63 | # Create AQG object 64 | aqg = aqgFunction.AutomaticQuestionGenerator() 65 | 66 | # Enter input Text File PATH 67 | inputTextPath = "PATH: (Like:- E:/in.txt)" 68 | readFile = open(inputTextPath, 'r+') 69 | inputText = readFile.read() 70 | 71 | questionList = aqg.aqgParse(inputText) 72 | aqg.display(questionList) 73 | 74 | return 0 75 | 76 | 77 | # Call Main Function 78 | if __name__ == "__main__": 79 | main() 80 | 81 | ``` 82 | 83 | 84 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 85 | ============================================================================================= 86 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nltk==3.4 2 | numpy==1.16.2 3 | textblob==0.15.3 4 | spacy==2.1.6 5 | --------------------------------------------------------------------------------