├── AutomaticQuestionGenerator
    ├── DB
    │   ├── db.txt
    │   └── db01.txt
    ├── aqgFunction.py
    ├── clause.py
    ├── identification.py
    ├── main.py
    ├── nlpNER.py
    ├── nonClause.py
    └── questionValidation.py
├── README.md
└── requirements.txt


/AutomaticQuestionGenerator/DB/db.txt:
--------------------------------------------------------------------------------
1 | Goldie did not care for jewels or gold. She loved walking barefoot in her simple clothes over grassy fields. She loved the feel of the wind in her hair, roses and bird songs, the light in the sky at dusk and dawn, the scent of wood smoke and lilacs. She often walked alone, wishing that her father would join her and learn to love the world as she did.
2 | 


--------------------------------------------------------------------------------
/AutomaticQuestionGenerator/aqgFunction.py:
--------------------------------------------------------------------------------
  1 | import spacy
  2 | import clause
  3 | import nonClause
  4 | import identification
  5 | import questionValidation
  6 | from nlpNER import nerTagger
  7 | 
  8 | 
  9 | class AutomaticQuestionGenerator():
 10 |     # AQG Parsing & Generate a question
 11 |     def aqgParse(self, sentence):
 12 | 
 13 |         #nlp = spacy.load("en")
 14 |         nlp = spacy.load('en_core_web_md')
 15 | 
 16 |         singleSentences = sentence.split(".")
 17 |         questionsList = []
 18 |         if len(singleSentences) != 0:
 19 |             for i in range(len(singleSentences)):
 20 |                 segmentSets = singleSentences[i].split(",")
 21 | 
 22 |                 ner = nerTagger(nlp, singleSentences[i])
 23 | 
 24 |                 if (len(segmentSets)) != 0:
 25 |                     for j in range(len(segmentSets)):
 26 |                         try:
 27 |                             questionsList += clause.howmuch_2(segmentSets, j, ner)
 28 |                         except Exception:
 29 |                             pass
 30 | 
 31 |                         if identification.clause_identify(segmentSets[j]) == 1:
 32 |                             try:
 33 |                                 questionsList += clause.whom_1(segmentSets, j, ner)
 34 |                             except Exception:
 35 |                                 pass
 36 |                             try:
 37 |                                 questionsList += clause.whom_2(segmentSets, j, ner)
 38 |                             except Exception:
 39 |                                 pass
 40 |                             try:
 41 |                                 questionsList += clause.whom_3(segmentSets, j, ner)
 42 |                             except Exception:
 43 |                                 pass
 44 |                             try:
 45 |                                 questionsList += clause.whose(segmentSets, j, ner)
 46 |                             except Exception:
 47 |                                 pass
 48 |                             try:
 49 |                                 questionsList += clause.what_to_do(segmentSets, j, ner)
 50 |                             except Exception:
 51 |                                 pass
 52 |                             try:
 53 |                                 questionsList += clause.who(segmentSets, j, ner)
 54 |                             except Exception:
 55 |                                 pass
 56 |                             try:
 57 |                                 questionsList += clause.howmuch_1(segmentSets, j, ner)
 58 |                             except Exception:
 59 |                                 pass
 60 |                             try:
 61 |                                 questionsList += clause.howmuch_3(segmentSets, j, ner)
 62 |                             except Exception:
 63 |                                 pass
 64 | 
 65 | 
 66 |                             else:
 67 |                                 try:
 68 |                                     s = identification.subjectphrase_search(segmentSets, j)
 69 |                                 except Exception:
 70 |                                     pass
 71 | 
 72 |                                 if len(s) != 0:
 73 |                                     segmentSets[j] = s + segmentSets[j]
 74 |                                     try:
 75 |                                         questionsList += clause.whom_1(segmentSets, j, ner)
 76 |                                     except Exception:
 77 |                                         pass
 78 |                                     try:
 79 |                                         questionsList += clause.whom_2(segmentSets, j, ner)
 80 |                                     except Exception:
 81 |                                         pass
 82 |                                     try:
 83 |                                         questionsList += clause.whom_3(segmentSets, j, ner)
 84 |                                     except Exception:
 85 |                                         pass
 86 |                                     try:
 87 |                                         questionsList += clause.whose(segmentSets, j, ner)
 88 |                                     except Exception:
 89 |                                         pass
 90 |                                     try:
 91 |                                         questionsList += clause.what_to_do(segmentSets, j, ner)
 92 |                                     except Exception:
 93 |                                         pass
 94 |                                     try:
 95 |                                         questionsList += clause.who(segmentSets, j, ner)
 96 |                                     except Exception:
 97 |                                         pass
 98 | 
 99 |                                     else:
100 |                                         try:
101 |                                             questionsList += nonClause.what_whom1(segmentSets, j, ner)
102 |                                         except Exception:
103 |                                             pass
104 |                                         try:
105 |                                             questionsList += nonClause.what_whom2(segmentSets, j, ner)
106 |                                         except Exception:
107 |                                             pass
108 |                                         try:
109 |                                             questionsList += nonClause.whose(segmentSets, j, ner)
110 |                                         except Exception:
111 |                                             pass
112 |                                         try:
113 |                                             questionsList += nonClause.howmany(segmentSets, j, ner)
114 |                                         except Exception:
115 |                                             pass
116 |                                         try:
117 |                                             questionsList += nonClause.howmuch_1(segmentSets, j, ner)
118 |                                         except Exception:
119 |                                             pass
120 | 
121 |                 questionsList.append('\n')
122 |         return questionsList
123 | 
124 | 
125 | 
126 |     def DisNormal(self, str):
127 |         print("\n")
128 |         print("------X------")
129 |         print("Start  output:\n")
130 | 
131 |         count = 0
132 |         out = ""
133 | 
134 |         for i in range(len(str)):
135 |             count = count + 1
136 |             print("Q-0%d: %s" % (count, str[i]))
137 | 
138 |         print("")
139 |         print("End  OutPut")
140 |         print("-----X-----\n\n")
141 | 
142 | 
143 |     # AQG Display the Generated Question
144 |     def display(self, str):
145 |         print("\n")
146 |         print("------X------")
147 |         print("Start  output:\n")
148 | 
149 |         count = 0
150 |         out = ""
151 |         for i in range(len(str)):
152 |             if (len(str[i]) >= 3):
153 |                 if (questionValidation.hNvalidation(str[i]) == 1):
154 |                     if ((str[i][0] == 'W' and str[i][1] == 'h') or (str[i][0] == 'H' and str[i][1] == 'o') or (
155 |                             str[i][0] == 'H' and str[i][1] == 'a')):
156 |                         WH = str[i].split(',')
157 |                         if (len(WH) == 1):
158 |                             str[i] = str[i][:-1]
159 |                             str[i] = str[i][:-1]
160 |                             str[i] = str[i][:-1]
161 |                             str[i] = str[i] + "?"
162 |                             count = count + 1
163 | 
164 |                             if (count < 10):
165 |                                 print("Q-0%d: %s" % (count, str[i]))
166 |                                 out += "Q-0" + count.__str__() + ": " + str[i] + "\n"
167 | 
168 |                             else:
169 |                                 print("Q-%d: %s" % (count, str[i]))
170 |                                 out += "Q-" + count.__str__() + ": " + str[i] + "\n"
171 | 
172 |         print("")
173 |         print("End  OutPut")
174 |         print("-----X-----\n\n")
175 | 
176 |         output = "output file path -- ?? ../DB/output.txt"
177 |         w = open(output, 'w+', encoding="utf8")
178 |         w.write(out)
179 |         w.close()
180 |         return 0
181 | 


--------------------------------------------------------------------------------
/AutomaticQuestionGenerator/clause.py:
--------------------------------------------------------------------------------
  1 | import nltk
  2 | import identification
  3 | import nonClause
  4 | 
  5 | 
  6 | def whom_1(segment_set, num, ner):
  7 |     tok = nltk.word_tokenize(segment_set[num])
  8 |     tag = nltk.pos_tag(tok)
  9 |     gram = r"""chunk:{<TO>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|VBG|DT|POS|CD|VBN>+}"""
 10 |     chunkparser = nltk.RegexpParser(gram)
 11 |     chunked = chunkparser.parse(tag)
 12 | 
 13 |     list1 = identification.chunk_search(segment_set[num], chunked)
 14 |     list3 = []
 15 | 
 16 |     if len(list1) != 0:
 17 |         for j in range(len(chunked)):
 18 |             str1 = ""
 19 |             str2 = ""
 20 |             str3 = ""
 21 |             if j in list1:
 22 |                 for k in range(j):
 23 |                     if k in list1:
 24 |                         str1 += nonClause.get_chunk(chunked[k])
 25 |                     else:
 26 |                         str1 += (chunked[k][0] + " ")
 27 | 
 28 |                 for k in range(j + 1, len(chunked)):
 29 |                     if k in list1:
 30 |                         str3 += nonClause.get_chunk(chunked[k])
 31 |                     else:
 32 |                         str3 += (chunked[k][0] + " ")
 33 | 
 34 |                 if chunked[j][1][1] == 'PRP':
 35 |                     str2 = " to whom "
 36 |                 else:
 37 |                     for x in range(len(chunked[j])):
 38 |                         if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or
 39 |                                 chunked[j][x][1] == "NN"):
 40 |                             break
 41 | 
 42 |                     for x1 in range(len(ner)):
 43 | 
 44 |                         if ner[x1][0] == chunked[j][x][0]:
 45 |                             if ner[x1][1] == "PERSON":
 46 |                                 str2 = " to whom "
 47 |                             elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE":
 48 |                                 str2 = " where "
 49 |                             elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE":
 50 |                                 str2 = " when "
 51 |                             else:
 52 |                                 str2 = "to what "
 53 | 
 54 |                 tok = nltk.word_tokenize(str1)
 55 |                 tag = nltk.pos_tag(tok)
 56 |                 gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
 57 |                 chunkparser = nltk.RegexpParser(gram)
 58 |                 chunked1 = chunkparser.parse(tag)
 59 | 
 60 |                 list2 = identification.chunk_search(str1, chunked1)
 61 |                 if len(list2) != 0:
 62 |                     m = list2[len(list2) - 1]
 63 | 
 64 |                     str4 = nonClause.get_chunk(chunked1[m])
 65 |                     str4 = identification.verbphrase_identify(str4)
 66 |                     str5 = ""
 67 |                     str6 = ""
 68 | 
 69 |                     for k in range(m):
 70 |                         if k in list2:
 71 |                             str5 += nonClause.get_chunk(chunked1[k])
 72 |                         else:
 73 |                             str5 += (chunked1[k][0] + " ")
 74 | 
 75 |                     for k in range(m + 1, len(chunked1)):
 76 |                         if k in list2:
 77 |                             str6 += nonClause.get_chunk(chunked1[k])
 78 |                         else:
 79 |                             str6 += (chunked1[k][0] + " ")
 80 | 
 81 |                     st = str5 + str2 + str4 + str6 + str3
 82 |                     for l in range(num + 1, len(segment_set)):
 83 |                         st += ("," + segment_set[l])
 84 |                     st += '?'
 85 |                     st = identification.postprocess(st)
 86 |                     # st = 'Q.' + st
 87 |                     list3.append(st)
 88 | 
 89 |     return list3
 90 | 
 91 | 
 92 | def whom_2(segment_set, num, ner):
 93 |     tok = nltk.word_tokenize(segment_set[num])
 94 |     tag = nltk.pos_tag(tok)
 95 |     gram = r"""chunk:{<IN>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT|CD|VBN>+}"""
 96 |     chunkparser = nltk.RegexpParser(gram)
 97 |     chunked = chunkparser.parse(tag)
 98 | 
 99 |     list1 = identification.chunk_search(segment_set[num], chunked)
100 |     list3 = []
101 | 
102 |     if len(list1) != 0:
103 |         for j in range(len(chunked)):
104 |             str1 = ""
105 |             str2 = ""
106 |             str3 = ""
107 |             if j in list1:
108 |                 for k in range(j):
109 |                     if k in list1:
110 |                         str1 += nonClause.get_chunk(chunked[k])
111 |                     else:
112 |                         str1 += (chunked[k][0] + " ")
113 | 
114 |                 for k in range(j + 1, len(chunked)):
115 |                     if k in list1:
116 |                         str3 += nonClause.get_chunk(chunked[k])
117 |                     else:
118 |                         str3 += (chunked[k][0] + " ")
119 | 
120 |                 if chunked[j][1][1] == 'PRP':
121 |                     str2 = " " + chunked[j][0][0] + " whom "
122 |                 else:
123 |                     for x in range(len(chunked[j])):
124 |                         if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or
125 |                                 chunked[j][x][1] == "NN"):
126 |                             break
127 | 
128 |                     for x1 in range(len(ner)):
129 |                         if ner[x1][0] == chunked[j][x][0]:
130 |                             if ner[x1][1] == "PERSON":
131 |                                 str2 = " " + chunked[j][0][0] + " whom "
132 |                             elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE":
133 |                                 str2 = " where "
134 |                             elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE":
135 |                                 str2 = " when "
136 |                             else:
137 |                                 str2 = " " + chunked[j][0][0] + " what "
138 | 
139 |                 tok = nltk.word_tokenize(str1)
140 |                 tag = nltk.pos_tag(tok)
141 |                 gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
142 |                 chunkparser = nltk.RegexpParser(gram)
143 |                 chunked1 = chunkparser.parse(tag)
144 | 
145 |                 list2 = identification.chunk_search(str1, chunked1)
146 |                 if len(list2) != 0:
147 |                     m = list2[len(list2) - 1]
148 | 
149 |                     str4 = nonClause.get_chunk(chunked1[m])
150 |                     str4 = identification.verbphrase_identify(str4)
151 |                     str5 = ""
152 |                     str6 = ""
153 | 
154 |                     for k in range(m):
155 |                         if k in list2:
156 |                             str5 += nonClause.get_chunk(chunked1[k])
157 |                         else:
158 |                             str5 += (chunked1[k][0] + " ")
159 | 
160 |                     for k in range(m + 1, len(chunked1)):
161 |                         if k in list2:
162 |                             str6 += nonClause.get_chunk(chunked1[k])
163 |                         else:
164 |                             str6 += (chunked1[k][0] + " ")
165 | 
166 |                     st = str5 + str2 + str4 + str6 + str3
167 |                     for l in range(num + 1, len(segment_set)):
168 |                         st += ("," + segment_set[l])
169 |                     st += '?'
170 |                     st = identification.postprocess(st)
171 |                     # st = 'Q.' + st
172 |                     list3.append(st)
173 | 
174 |     return list3
175 | 
176 | 
177 | def whom_3(segment_set, num, ner):
178 |     tok = nltk.word_tokenize(segment_set[num])
179 |     tag = nltk.pos_tag(tok)
180 |     gram = r"""chunk:{<VB.?|MD|RP>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT|CD|VBN>+}"""
181 |     chunkparser = nltk.RegexpParser(gram)
182 |     chunked = chunkparser.parse(tag)
183 | 
184 |     list1 = identification.chunk_search(segment_set[num], chunked)
185 |     list3 = []
186 | 
187 |     if len(list1) != 0:
188 |         for j in range(len(chunked)):
189 |             str1 = ""
190 |             str2 = ""
191 |             str3 = ""
192 |             if j in list1:
193 |                 for k in range(j):
194 |                     if k in list1:
195 |                         str1 += nonClause.get_chunk(chunked[k])
196 |                     else:
197 |                         str1 += (chunked[k][0] + " ")
198 | 
199 |                 for k in range(j + 1, len(chunked)):
200 |                     if k in list1:
201 |                         str3 += nonClause.get_chunk(chunked[k])
202 |                     else:
203 |                         str3 += (chunked[k][0] + " ")
204 | 
205 |                 if chunked[j][1][1] == 'PRP':
206 |                     str2 = " whom "
207 |                 else:
208 |                     for x in range(len(chunked[j])):
209 |                         if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or
210 |                                 chunked[j][x][1] == "NN"):
211 |                             break
212 | 
213 |                     for x1 in range(len(ner)):
214 |                         if ner[x1][0] == chunked[j][x][0]:
215 |                             if ner[x1][1] == "PERSON":
216 |                                 str2 = " whom "
217 |                             elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE":
218 |                                 str2 = " what "
219 |                             elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE":
220 |                                 str2 = " what time "
221 |                             else:
222 |                                 str2 = " what "
223 | 
224 |                 strx = nonClause.get_chunk(chunked[j])
225 |                 tok = nltk.word_tokenize(strx)
226 |                 tag = nltk.pos_tag(tok)
227 |                 gram = r"""chunk:{<VB.?|MD>+}"""
228 |                 chunkparser = nltk.RegexpParser(gram)
229 |                 chunked1 = chunkparser.parse(tag)
230 | 
231 |                 strx = nonClause.get_chunk(chunked1[0])
232 | 
233 |                 str1 += strx
234 | 
235 |                 tok = nltk.word_tokenize(str1)
236 |                 tag = nltk.pos_tag(tok)
237 |                 gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
238 |                 chunkparser = nltk.RegexpParser(gram)
239 |                 chunked1 = chunkparser.parse(tag)
240 | 
241 |                 list2 = identification.chunk_search(str1, chunked1)
242 | 
243 |                 if len(list2) != 0:
244 |                     m = list2[len(list2) - 1]
245 | 
246 |                     str4 = nonClause.get_chunk(chunked1[m])
247 |                     str4 = identification.verbphrase_identify(str4)
248 |                     str5 = ""
249 |                     str6 = ""
250 | 
251 |                     for k in range(m):
252 |                         if k in list2:
253 |                             str5 += nonClause.get_chunk(chunked1[k])
254 |                         else:
255 |                             str5 += (chunked1[k][0] + " ")
256 | 
257 |                     for k in range(m + 1, len(chunked1)):
258 |                         if k in list2:
259 |                             str6 += nonClause.get_chunk(chunked1[k])
260 |                         else:
261 |                             str6 += (chunked1[k][0] + " ")
262 | 
263 |                     st = str5 + str2 + str4 + str6 + str3
264 |                     for l in range(num + 1, len(segment_set)):
265 |                         st += ("," + segment_set[l])
266 |                     st += '?'
267 |                     st = identification.postprocess(st)
268 |                     # st = 'Q.' + st
269 |                     list3.append(st)
270 | 
271 |     return list3
272 | 
273 | 
274 | def whose(segment_set, num, ner):
275 |     tok = nltk.word_tokenize(segment_set[num])
276 |     tag = nltk.pos_tag(tok)
277 |     gram = r"""chunk:{<DT|NN.?>*<PRP\$|POS>+<RB.?>*<JJ.?>*<NN.?|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
278 |     chunkparser = nltk.RegexpParser(gram)
279 |     chunked = chunkparser.parse(tag)
280 | 
281 |     list1 = identification.chunk_search(segment_set[num], chunked)
282 |     list3 = []
283 | 
284 |     if len(list1) != 0:
285 |         for i in range(len(chunked)):
286 |             if i in list1:
287 |                 str1 = ""
288 |                 str3 = ""
289 |                 str2 = ""
290 |                 for k in range(i):
291 |                     if k in list1:
292 |                         str1 += nonClause.get_chunk(chunked[k])
293 |                     else:
294 |                         str1 += (chunked[k][0] + " ")
295 |                 str1 += " whose "
296 | 
297 |                 for k in range(i + 1, len(chunked)):
298 |                     if k in list1:
299 |                         str3 += nonClause.get_chunk(chunked[k])
300 |                     else:
301 |                         str3 += (chunked[k][0] + " ")
302 | 
303 |                 if chunked[i][1][1] == 'POS':
304 |                     for k in range(2, len(chunked[i])):
305 |                         str2 += (chunked[i][k][0] + " ")
306 | 
307 |                 if chunked[i][0][1] == 'PRP$':
308 |                     for k in range(1, len(chunked[i])):
309 |                         str2 += (chunked[i][k][0] + " ")
310 | 
311 |                 str2 = str1 + str2 + str3
312 |                 str4 = ""
313 | 
314 |                 for l in range(0, len(segment_set)):
315 |                     if l < num:
316 |                         str4 += (segment_set[l] + ",")
317 |                     if l > num:
318 |                         str2 += ("," + segment_set[l])
319 |                 str2 = str4 + str2
320 |                 str2 += '?'
321 |                 str2 = identification.postprocess(str2)
322 |                 # str2 = 'Q.' + str2
323 |                 list3.append(str2)
324 | 
325 |     return list3
326 | 
327 | 
328 | def what_to_do(segment_set, num, ner):
329 |     tok = nltk.word_tokenize(segment_set[num])
330 |     tag = nltk.pos_tag(tok)
331 |     gram = r"""chunk:{<TO>+<VB|VBP|RP>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT>*}"""
332 |     chunkparser = nltk.RegexpParser(gram)
333 |     chunked = chunkparser.parse(tag)
334 | 
335 |     list1 = identification.chunk_search(segment_set[num], chunked)
336 |     list3 = []
337 | 
338 |     if len(list1) != 0:
339 |         for j in range(len(chunked)):
340 |             str1 = ""
341 |             str2 = ""
342 |             str3 = ""
343 |             if j in list1:
344 |                 for k in range(j):
345 |                     if k in list1:
346 |                         str1 += nonClause.get_chunk(chunked[k])
347 |                     else:
348 |                         str1 += (chunked[k][0] + " ")
349 | 
350 |                 for k in range(j + 1, len(chunked)):
351 |                     if k in list1:
352 |                         str3 += nonClause.get_chunk(chunked[k])
353 |                     else:
354 |                         str3 += (chunked[k][0] + " ")
355 | 
356 |                 ls = nonClause.get_chunk(chunked[j])
357 |                 tok = nltk.word_tokenize(ls)
358 |                 tag = nltk.pos_tag(tok)
359 |                 gram = r"""chunk:{<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT>+}"""
360 |                 chunkparser = nltk.RegexpParser(gram)
361 |                 chunked2 = chunkparser.parse(tag)
362 |                 lis = identification.chunk_search(ls, chunked2)
363 |                 if len(lis) != 0:
364 |                     x = lis[len(lis) - 1]
365 |                     ls1 = nonClause.get_chunk(chunked2[x])
366 |                     index = ls.find(ls1)
367 |                     str2 = " " + ls[0:index]
368 |                 else:
369 |                     str2 = " to do "
370 | 
371 |                 tok = nltk.word_tokenize(str1)
372 |                 tag = nltk.pos_tag(tok)
373 |                 gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
374 |                 chunkparser = nltk.RegexpParser(gram)
375 |                 chunked1 = chunkparser.parse(tag)
376 | 
377 |                 list2 = identification.chunk_search(str1, chunked1)
378 |                 if len(list2) != 0:
379 |                     m = list2[len(list2) - 1]
380 | 
381 |                     str4 = nonClause.get_chunk(chunked1[m])
382 |                     str4 = identification.verbphrase_identify(str4)
383 |                     str5 = ""
384 |                     str6 = ""
385 | 
386 |                     for k in range(m):
387 |                         if k in list2:
388 |                             str5 += nonClause.get_chunk(chunked1[k])
389 |                         else:
390 |                             str5 += (chunked1[k][0] + " ")
391 | 
392 |                     for k in range(m + 1, len(chunked1)):
393 |                         if k in list2:
394 |                             str6 += nonClause.get_chunk(chunked1[k])
395 |                         else:
396 |                             str6 += (chunked1[k][0] + " ")
397 | 
398 |                     if chunked2[j][1][1] == 'PRP':
399 |                         tr = " whom "
400 |                     else:
401 |                         for x in range(len(chunked[j])):
402 |                             if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or
403 |                                     chunked[j][x][1] == "NN"):
404 |                                 break
405 | 
406 |                         for x1 in range(len(ner)):
407 |                             if ner[x1][0] == chunked[j][x][0]:
408 |                                 if ner[x1][1] == "PERSON":
409 |                                     tr = " whom "
410 |                                 elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE":
411 |                                     tr = " where "
412 |                                 elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE":
413 |                                     tr = " when "
414 |                                 else:
415 |                                     tr = " what "
416 | 
417 |                     st = str5 + tr + str4 + str2 + str6 + str3
418 |                     for l in range(num + 1, len(segment_set)):
419 |                         st += ("," + segment_set[l])
420 |                     st += '?'
421 |                     st = identification.postprocess(st)
422 |                     # st = 'Q.' + st
423 |                     list3.append(st)
424 | 
425 |     return list3
426 | 
427 | 
428 | def who(segment_set, num, ner):
429 |     tok = nltk.word_tokenize(segment_set[num])
430 |     tag = nltk.pos_tag(tok)
431 |     gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
432 |     chunkparser = nltk.RegexpParser(gram)
433 |     chunked = chunkparser.parse(tag)
434 | 
435 |     list1 = identification.chunk_search(segment_set[num], chunked)
436 |     list3 = []
437 | 
438 |     if len(list1) != 0:
439 |         for j in range(len(list1)):
440 |             m = list1[j]
441 |             str1 = ""
442 |             for k in range(m + 1, len(chunked)):
443 |                 if k in list1:
444 |                     str1 += nonClause.get_chunk(chunked[k])
445 |                 else:
446 |                     str1 += (chunked[k][0] + " ")
447 | 
448 |             str2 = nonClause.get_chunk(chunked[m])
449 |             tok = nltk.word_tokenize(str2)
450 |             tag = nltk.pos_tag(tok)
451 | 
452 |             for m11 in range(len(tag)):
453 |                 if tag[m11][1] == 'NNP' or tag[m11][1] == 'NNPS' or tag[m11][1] == 'NNS' or tag[m11][1] == 'NN':
454 |                     break
455 |             s11 = ' who '
456 |             for m12 in range(len(ner)):
457 |                 if ner[m12][0] == tag[m11][0]:
458 |                     if ner[m12][1] == 'LOC':
459 |                         s11 = ' which place '
460 |                     elif ner[m12][1] == 'ORG':
461 |                         s11 = ' who '
462 |                     elif ner[m12][1] == 'DATE' or ner[m12][1] == 'TIME':
463 |                         s11 = ' what time '
464 |                     else:
465 |                         s11 = ' who '
466 | 
467 |             gram = r"""chunk:{<RB.?>*<VB.?|MD|RP>+}"""
468 |             chunkparser = nltk.RegexpParser(gram)
469 |             chunked1 = chunkparser.parse(tag)
470 | 
471 |             list2 = identification.chunk_search(str2, chunked1)
472 |             if len(list2) != 0:
473 |                 str2 = nonClause.get_chunk(chunked1[list2[0]])
474 |                 str2 = s11 + str2
475 |                 for k in range(list2[0] + 1, len(chunked1)):
476 |                     if k in list2:
477 |                         str2 += nonClause.get_chunk(chunked[k])
478 |                     else:
479 |                         str2 += (chunked[k][0] + " ")
480 |                 str2 += (" " + str1)
481 | 
482 |                 tok_1 = nltk.word_tokenize(str2)
483 |                 str2 = ""
484 |                 for h in range(len(tok_1)):
485 |                     if tok_1[h] == "am":
486 |                         str2 += " is "
487 |                     else:
488 |                         str2 += (tok_1[h] + " ")
489 | 
490 |                 for l in range(num + 1, len(segment_set)):
491 |                     str2 += ("," + segment_set[l])
492 |                 str2 += '?'
493 | 
494 |                 str2 = identification.postprocess(str2)
495 |                 # str2 = 'Q.' + str2
496 |                 list3.append(str2)
497 | 
498 |     return list3
499 | 
500 | 
501 | def howmuch_2(segment_set, num, ner):
502 |     tok = nltk.word_tokenize(segment_set[num])
503 |     tag = nltk.pos_tag(tok)
504 |     gram = r"""chunk:{<\$>*<CD>+<MD>?<VB|VBD|VBG|VBP|VBN|VBZ|RP>+}"""
505 |     chunkparser = nltk.RegexpParser(gram)
506 |     chunked = chunkparser.parse(tag)
507 | 
508 |     list1 = identification.chunk_search(segment_set[num], chunked)
509 |     list3 = []
510 | 
511 |     if len(list1) != 0:
512 |         for j in range(len(list1)):
513 |             m = list1[j]
514 |             str1 = ""
515 |             for k in range(m + 1, len(chunked)):
516 |                 if k in list1:
517 |                     str1 += nonClause.get_chunk(chunked[k])
518 |                 else:
519 |                     str1 += (chunked[k][0] + " ")
520 | 
521 |             str2 = nonClause.get_chunk(chunked[m])
522 |             tok = nltk.word_tokenize(str2)
523 |             tag = nltk.pos_tag(tok)
524 |             gram = r"""chunk:{<RB.?>*<VB.?|MD|RP>+}"""
525 |             chunkparser = nltk.RegexpParser(gram)
526 |             chunked1 = chunkparser.parse(tag)
527 |             s11 = ' how much '
528 | 
529 |             list2 = identification.chunk_search(str2, chunked1)
530 |             if len(list2) != 0:
531 |                 str2 = nonClause.get_chunk(chunked1[list2[0]])
532 |                 str2 = s11 + str2
533 |                 for k in range(list2[0] + 1, len(chunked1)):
534 |                     if k in list2:
535 |                         str2 += nonClause.get_chunk(chunked[k])
536 |                     else:
537 |                         str2 += (chunked[k][0] + " ")
538 |                 str2 += (" " + str1)
539 | 
540 |                 tok_1 = nltk.word_tokenize(str2)
541 |                 str2 = ""
542 |                 for h in range(len(tok_1)):
543 |                     if tok_1[h] == "am":
544 |                         str2 += " is "
545 |                     else:
546 |                         str2 += (tok_1[h] + " ")
547 | 
548 |                 for l in range(num + 1, len(segment_set)):
549 |                     str2 += ("," + segment_set[l])
550 |                 str2 += '?'
551 | 
552 |                 str2 = identification.postprocess(str2)
553 |                 # str2 = 'Q.' + str2
554 |                 list3.append(str2)
555 | 
556 |     return list3
557 | 
558 | 
559 | def howmuch_1(segment_set, num, ner):
560 |     tok = nltk.word_tokenize(segment_set[num])
561 |     tag = nltk.pos_tag(tok)
562 |     gram = r"""chunk:{<IN>+<\$>?<CD>+}"""
563 |     chunkparser = nltk.RegexpParser(gram)
564 |     chunked = chunkparser.parse(tag)
565 | 
566 |     list1 = identification.chunk_search(segment_set[num], chunked)
567 |     list3 = []
568 | 
569 |     if len(list1) != 0:
570 |         for j in range(len(chunked)):
571 |             str1 = ""
572 |             str2 = ""
573 |             str3 = ""
574 |             if j in list1:
575 |                 for k in range(j):
576 |                     if k in list1:
577 |                         str1 += nonClause.get_chunk(chunked[k])
578 |                     else:
579 |                         str1 += (chunked[k][0] + " ")
580 | 
581 |                 for k in range(j + 1, len(chunked)):
582 |                     if k in list1:
583 |                         str3 += nonClause.get_chunk(chunked[k])
584 |                     else:
585 |                         str3 += (chunked[k][0] + " ")
586 | 
587 |                 str2 = ' ' + chunked[j][0][0] + ' how much '
588 | 
589 |                 tok = nltk.word_tokenize(str1)
590 |                 tag = nltk.pos_tag(tok)
591 |                 gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
592 |                 chunkparser = nltk.RegexpParser(gram)
593 |                 chunked1 = chunkparser.parse(tag)
594 | 
595 |                 list2 = identification.chunk_search(str1, chunked1)
596 |                 if len(list2) != 0:
597 |                     m = list2[len(list2) - 1]
598 | 
599 |                     str4 = nonClause.get_chunk(chunked1[m])
600 |                     str4 = identification.verbphrase_identify(str4)
601 |                     str5 = ""
602 |                     str6 = ""
603 | 
604 |                     for k in range(m):
605 |                         if k in list2:
606 |                             str5 += nonClause.get_chunk(chunked1[k])
607 |                         else:
608 |                             str5 += (chunked1[k][0] + " ")
609 | 
610 |                     for k in range(m + 1, len(chunked1)):
611 |                         if k in list2:
612 |                             str6 += nonClause.get_chunk(chunked1[k])
613 |                         else:
614 |                             str6 += (chunked1[k][0] + " ")
615 | 
616 |                     st = str5 + str2 + str4 + str6 + str3
617 |                     for l in range(num + 1, len(segment_set)):
618 |                         st += ("," + segment_set[l])
619 |                     st += '?'
620 |                     st = identification.postprocess(st)
621 |                     # st = 'Q.' + st
622 |                     list3.append(st)
623 | 
624 |     return list3
625 | 
626 | 
627 | def howmuch_3(segment_set, num, ner):
628 |     tok = nltk.word_tokenize(segment_set[num])
629 |     tag = nltk.pos_tag(tok)
630 |     gram = r"""chunk:{<MD>?<VB|VBD|VBG|VBP|VBN|VBZ>+<IN|TO>?<PRP|PRP\$|NN.?>?<\$>*<CD>+}"""
631 |     chunkparser = nltk.RegexpParser(gram)
632 |     chunked = chunkparser.parse(tag)
633 | 
634 |     list1 = identification.chunk_search(segment_set[num], chunked)
635 |     list3 = []
636 | 
637 |     if len(list1) != 0:
638 |         for j in range(len(chunked)):
639 |             str1 = ""
640 |             str2 = ""
641 |             str3 = ""
642 |             if j in list1:
643 |                 for k in range(j):
644 |                     if k in list1:
645 |                         str1 += nonClause.get_chunk(chunked[k])
646 |                     else:
647 |                         str1 += (chunked[k][0] + " ")
648 | 
649 |                 for k in range(j + 1, len(chunked)):
650 |                     if k in list1:
651 |                         str3 += nonClause.get_chunk(chunked[k])
652 |                     else:
653 |                         str3 += (chunked[k][0] + " ")
654 | 
655 |                 strx = nonClause.get_chunk(chunked[j])
656 |                 tok = nltk.word_tokenize(strx)
657 |                 tag = nltk.pos_tag(tok)
658 |                 gram = r"""chunk:{<MD>?<VB|VBD|VBG|VBP|VBN|VBZ>+<IN|TO>?<PRP|PRP\$|NN.?>?}"""
659 |                 chunkparser = nltk.RegexpParser(gram)
660 |                 chunked1 = chunkparser.parse(tag)
661 | 
662 |                 strx = nonClause.get_chunk(chunked1[0])
663 |                 str1 += (" " + strx)
664 | 
665 |                 str2 = ' how much '
666 | 
667 |                 tok = nltk.word_tokenize(str1)
668 |                 tag = nltk.pos_tag(tok)
669 |                 gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
670 |                 chunkparser = nltk.RegexpParser(gram)
671 |                 chunked1 = chunkparser.parse(tag)
672 | 
673 |                 list2 = identification.chunk_search(str1, chunked1)
674 | 
675 |                 if len(list2) != 0:
676 |                     m = list2[len(list2) - 1]
677 | 
678 |                     str4 = nonClause.get_chunk(chunked1[m])
679 |                     str4 = identification.verbphrase_identify(str4)
680 |                     str5 = ""
681 |                     str6 = ""
682 | 
683 |                     for k in range(m):
684 |                         if k in list2:
685 |                             str5 += nonClause.get_chunk(chunked1[k])
686 |                         else:
687 |                             str5 += (chunked1[k][0] + " ")
688 | 
689 |                     for k in range(m + 1, len(chunked1)):
690 |                         if k in list2:
691 |                             str6 += nonClause.get_chunk(chunked1[k])
692 |                         else:
693 |                             str6 += (chunked1[k][0] + " ")
694 | 
695 |                     st = str5 + str2 + str4 + str6 + str3
696 | 
697 |                     for l in range(num + 1, len(segment_set)):
698 |                         st += ("," + segment_set[l])
699 |                     st += '?'
700 |                     st = identification.postprocess(st)
701 |                     # st = 'Q.' + st
702 |                     list3.append(st)
703 | 
704 |     return list3
705 | 


--------------------------------------------------------------------------------
/AutomaticQuestionGenerator/identification.py:
--------------------------------------------------------------------------------
  1 | import nltk
  2 | 
  3 | 
  4 | def chunk_search(segment, chunked):
  5 |     m = len(chunked)
  6 |     list1 = []
  7 |     for j in range(m):
  8 |         if (len(chunked[j]) > 2 or len(chunked[j]) == 1):
  9 |             list1.append(j)
 10 |         if (len(chunked[j]) == 2):
 11 |             try:
 12 |                 str1 = chunked[j][0][0] + " " + chunked[j][1][0]
 13 |             except Exception:
 14 |                 pass
 15 |             else:
 16 |                 if (str1 in segment) == True:
 17 |                     list1.append(j)
 18 |     return list1
 19 | 
 20 | def segment_identify(sen):
 21 |     segment_set = sen.split(",")
 22 |     return segment_set
 23 | 
 24 | 
 25 | def clause_identify(segment):
 26 |     tok = nltk.word_tokenize(segment)
 27 |     tag = nltk.pos_tag(tok)
 28 |     gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?|VB.?|MD|RP>+}"""
 29 |     chunkparser = nltk.RegexpParser(gram)
 30 |     chunked = chunkparser.parse(tag)
 31 | 
 32 |     flag = 0
 33 |     for j in range(len(chunked)):
 34 |         if (len(chunked[j]) > 2):
 35 |             flag = 1
 36 |         if (len(chunked[j]) == 2):
 37 |             try:
 38 |                 str1 = chunked[j][0][0] + " " + chunked[j][1][0]
 39 |             except Exception:
 40 |                 pass
 41 |             else:
 42 |                 if (str1 in segment) == True:
 43 |                     flag = 1
 44 |         if flag == 1:
 45 |             break
 46 | 
 47 |     return flag
 48 | 
 49 | 
 50 | def verbphrase_identify(clause):
 51 |     tok = nltk.word_tokenize(clause)
 52 |     tag = nltk.pos_tag(tok)
 53 |     gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
 54 |     chunkparser = nltk.RegexpParser(gram)
 55 |     chunked = chunkparser.parse(tag)
 56 |     str1 = ""
 57 |     str2 = ""
 58 |     str3 = ""
 59 |     list1 = chunk_search(clause, chunked)
 60 |     if len(list1) != 0:
 61 |         m = list1[len(list1) - 1]
 62 |         for j in range(len(chunked[m])):
 63 |             str1 += chunked[m][j][0]
 64 |             str1 += " "
 65 | 
 66 |     tok1 = nltk.word_tokenize(str1)
 67 |     tag1 = nltk.pos_tag(tok1)
 68 |     gram1 = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*}"""
 69 |     chunkparser1 = nltk.RegexpParser(gram1)
 70 |     chunked1 = chunkparser1.parse(tag1)
 71 | 
 72 |     list2 = chunk_search(str1, chunked1)
 73 |     if len(list2) != 0:
 74 | 
 75 |         m = list2[0]
 76 |         for j in range(len(chunked1[m])):
 77 |             str2 += (chunked1[m][j][0] + " ")
 78 | 
 79 |     tok1 = nltk.word_tokenize(str1)
 80 |     tag1 = nltk.pos_tag(tok1)
 81 |     gram1 = r"""chunk:{<VB.?|MD|RP>+}"""
 82 |     chunkparser1 = nltk.RegexpParser(gram1)
 83 |     chunked2 = chunkparser1.parse(tag1)
 84 | 
 85 |     list3 = chunk_search(str1, chunked2)
 86 |     if len(list3) != 0:
 87 | 
 88 |         m = list3[0]
 89 |         for j in range(len(chunked2[m])):
 90 |             str3 += (chunked2[m][j][0] + " ")
 91 | 
 92 |     X = ""
 93 |     str4 = ""
 94 |     st = nltk.word_tokenize(str3)
 95 |     if len(st) > 1:
 96 |         X = st[0]
 97 |         s = ""
 98 |         for k in range(1, len(st)):
 99 |             s += st[k]
100 |             s += " "
101 |         str3 = s
102 |         str4 = X + " " + str2 + str3
103 | 
104 |     if len(st) == 1:
105 |         tag1 = nltk.pos_tag(st)
106 |         if tag1[0][0] != 'are' and tag1[0][0] != 'were' and tag1[0][0] != 'is' and tag1[0][0] != 'am':
107 |             if tag1[0][1] == 'VB' or tag1[0][1] == 'VBP':
108 |                 X = 'do'
109 |             if tag1[0][1] == 'VBD' or tag1[0][1] == 'VBN':
110 |                 X = 'did'
111 |             if tag1[0][1] == 'VBZ':
112 |                 X = 'does'
113 |             str4 = X + " " + str2 + str3
114 |         if (tag1[0][0] == 'are' or tag1[0][0] == 'were' or tag1[0][0] == 'is' or tag1[0][0] == 'am'):
115 |             str4 = tag1[0][0] + " " + str2
116 | 
117 |     return str4
118 | 
119 | 
120 | def subjectphrase_search(segment_set, num):
121 |     str2 = ""
122 |     for j in range(num - 1, 0, -1):
123 |         str1 = ""
124 |         flag = 0
125 |         tok = nltk.word_tokenize(segment_set[j])
126 |         tag = nltk.pos_tag(tok)
127 |         gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
128 |         chunkparser = nltk.RegexpParser(gram)
129 |         chunked = chunkparser.parse(tag)
130 | 
131 |         list1 = chunk_search(segment_set[j], chunked)
132 |         if len(list1) != 0:
133 |             m = list1[len(list1) - 1]
134 |             for j in range(len(chunked[m])):
135 |                 str1 += chunked[m][j][0]
136 |                 str1 += " "
137 | 
138 |             tok1 = nltk.word_tokenize(str1)
139 |             tag1 = nltk.pos_tag(tok1)
140 |             gram1 = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+}"""
141 |             chunkparser1 = nltk.RegexpParser(gram1)
142 |             chunked1 = chunkparser1.parse(tag1)
143 | 
144 |             list2 = chunk_search(str1, chunked1)
145 |             if len(list2) != 0:
146 |                 m = list2[len(list2) - 1]
147 |                 for j in range(len(chunked1[m])):
148 |                     str2 += (chunked1[m][j][0] + " ")
149 |                 flag = 1
150 | 
151 |         if flag == 0:
152 |             tok1 = nltk.word_tokenize(segment_set[j])
153 |             tag1 = nltk.pos_tag(tok1)
154 |             gram1 = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+}"""
155 |             chunkparser1 = nltk.RegexpParser(gram1)
156 |             chunked1 = chunkparser1.parse(tag1)
157 | 
158 |             list2 = chunk_search(str1, chunked1)
159 |             st = nltk.word_tokenize(segment_set[j])
160 |             if len(chunked1[list2[0]]) == len(st):
161 |                 str2 = segment_set[j]
162 |                 flag = 1
163 | 
164 |         if flag == 1:
165 |             break
166 | 
167 |     return str2
168 | 
169 | 
170 | def postprocess(string):
171 |     tok = nltk.word_tokenize(string)
172 |     tag = nltk.pos_tag(tok)
173 | 
174 |     str1 = tok[0].capitalize()
175 |     str1 += " "
176 |     if len(tok) != 0:
177 |         for i in range(1, len(tok)):
178 |             if tag[i][1] == "NNP":
179 |                 str1 += tok[i].capitalize()
180 |                 str1 += " "
181 |             else:
182 |                 str1 += tok[i].lower()
183 |                 str1 += " "
184 |         tok = nltk.word_tokenize(str1)
185 |         str1 = ""
186 |         for i in range(len(tok)):
187 |             if tok[i] == "i" or tok[i] == "we":
188 |                 str1 += "you"
189 |                 str1 += " "
190 |             elif tok[i] == "my" or tok[i] == "our":
191 |                 str1 += "your"
192 |                 str1 += " "
193 |             elif tok[i] == "your":
194 |                 str1 += "my"
195 |                 str1 += " "
196 |             elif tok[i] == "you":
197 |                 if i - 1 >= 0:
198 |                     to = nltk.word_tokenize(tok[i - 1])
199 |                     ta = nltk.pos_tag(to)
200 |                     # print ta
201 |                     if ta[0][1] == 'IN':
202 |                         str1 += "me"
203 |                         str1 += " "
204 |                     else:
205 |                         str1 += "i"
206 |                         str1 += " "
207 |                 else:
208 |                     str1 += "i "
209 | 
210 |             elif tok[i] == "am":
211 |                 str1 += "are"
212 |                 str1 += " "
213 |             else:
214 |                 str1 += tok[i]
215 |                 str1 += " "
216 | 
217 |     return str1
218 | 
219 | 


--------------------------------------------------------------------------------
/AutomaticQuestionGenerator/main.py:
--------------------------------------------------------------------------------
 1 | import aqgFunction
 2 | 
 3 | 
 4 | # Main Function
 5 | def main():
 6 |     # Create AQG object
 7 |     aqg = aqgFunction.AutomaticQuestionGenerator()
 8 | 
 9 |     inputTextPath = "input file path -- ?? ../DB/db.txt"
10 |     readFile = open(inputTextPath, 'r+', encoding="utf8")
11 |     #readFile = open(inputTextPath, 'r+', encoding="utf8", errors = 'ignore')
12 | 
13 |     inputText = readFile.read()
14 |     #inputText = '''I am Dipta. I love codding. I build my carrier with this.'''
15 | 
16 |     questionList = aqg.aqgParse(inputText)
17 |     aqg.display(questionList)
18 | 
19 |     #aqg.DisNormal(questionList)
20 | 
21 |     return 0
22 | 
23 | 
24 | # Call Main Function
25 | if __name__ == "__main__":
26 |     main()
27 | 
28 | 


--------------------------------------------------------------------------------
/AutomaticQuestionGenerator/nlpNER.py:
--------------------------------------------------------------------------------
 1 | import spacy
 2 | 
 3 | 
 4 | def nerTagger(nlp, tokenize):
 5 |     doc = nlp(tokenize)
 6 | 
 7 |     finalList = []
 8 |     array = [[]]
 9 |     for word in doc:
10 |         array[0] = 0
11 |         for ner in doc.ents:
12 |             if (ner.text == word.text):
13 |                 finalList.append((word.text, ner.label_))
14 |                 array[0] = 1
15 |         if (array[0] == 0):
16 |             finalList.append((word.text, 'O'))
17 | 
18 |     return finalList
19 | 
20 | 


--------------------------------------------------------------------------------
/AutomaticQuestionGenerator/nonClause.py:
--------------------------------------------------------------------------------
  1 | import nltk
  2 | import identification
  3 | 
  4 | 
  5 | def get_chunk(chunked):
  6 |     str1 = ""
  7 |     for j in range(len(chunked)):
  8 |         str1 += (chunked[j][0] + " ")
  9 |     return str1
 10 | 
 11 | def what_whom1(segment_set, num, ner):
 12 |     tok = nltk.word_tokenize(segment_set[num])
 13 |     tag = nltk.pos_tag(tok)
 14 |     gram = r"""chunk:{<TO>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|VBG|DT|POS|CD|VBN>+}"""
 15 |     chunkparser = nltk.RegexpParser(gram)
 16 |     chunked = chunkparser.parse(tag)
 17 | 
 18 |     list1 = identification.chunk_search(segment_set[num], chunked)
 19 |     s = []
 20 | 
 21 |     if len(list1) != 0:
 22 |         for j in range(len(chunked)):
 23 |             str1 = ""
 24 |             str3 = ""
 25 |             if j in list1:
 26 |                 for k in range(j):
 27 |                     if k in list1:
 28 |                         str1 += get_chunk(chunked[k])
 29 |                     else:
 30 |                         str1 += (chunked[k][0] + " ")
 31 |                 for k in range(j + 1, len(chunked)):
 32 |                     if k in list1:
 33 |                         str3 += get_chunk(chunked[k])
 34 |                     else:
 35 |                         str3 += (chunked[k][0] + " ")
 36 | 
 37 |                 if chunked[j][1][1] == 'PRP':
 38 |                     str2 = "to whom "
 39 |                 else:
 40 |                     for x in range(len(chunked[j])):
 41 |                         if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or
 42 |                                 chunked[j][x][1] == "NN"):
 43 |                             break
 44 | 
 45 |                     for x1 in range(len(ner)):
 46 |                         if ner[x1][0] == chunked[j][x][0]:
 47 |                             if ner[x1][1] == "PERSON":
 48 |                                 str2 = " to whom "
 49 |                             elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE":
 50 |                                 str2 = " where "
 51 |                             elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE":
 52 |                                 str2 = " when "
 53 |                             else:
 54 |                                 str2 = "to what"
 55 | 
 56 |                 str4 = str1 + str2 + str3
 57 |                 for k in range(len(segment_set)):
 58 |                     if k != num:
 59 |                         str4 += ("," + segment_set[k])
 60 |                 str4 += '?'
 61 |                 str4 = identification.postprocess(str4)
 62 |                 # str4 = 'Q.' + str4
 63 |                 s.append(str4)
 64 |     return s
 65 | 
 66 | 
 67 | def what_whom2(segment_set, num, ner):
 68 |     tok = nltk.word_tokenize(segment_set[num])
 69 |     tag = nltk.pos_tag(tok)
 70 |     gram = r"""chunk:{<IN>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT|CD|VBN>+}"""
 71 |     chunkparser = nltk.RegexpParser(gram)
 72 |     chunked = chunkparser.parse(tag)
 73 |     list1 = identification.chunk_search(segment_set[num], chunked)
 74 |     s = []
 75 | 
 76 |     if len(list1) != 0:
 77 |         for j in range(len(chunked)):
 78 |             str1 = ""
 79 |             str3 = ""
 80 |             if j in list1:
 81 |                 for k in range(j):
 82 |                     if k in list1:
 83 |                         str1 += get_chunk(chunked[k])
 84 |                     else:
 85 |                         str1 += (chunked[k][0] + " ")
 86 |                 for k in range(j + 1, len(chunked)):
 87 |                     if k in list1:
 88 |                         str3 += get_chunk(chunked[k])
 89 |                     else:
 90 |                         str3 += (chunked[k][0] + " ")
 91 | 
 92 |                 if chunked[j][1][1] == 'PRP':
 93 |                     str2 = " " + chunked[j][0][0] + " whom "
 94 |                 else:
 95 |                     for x in range(len(chunked[j])):
 96 |                         if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or
 97 |                                 chunked[j][x][1] == "NN"):
 98 |                             break
 99 | 
100 |                     for x1 in range(len(ner)):
101 |                         if ner[x1][0] == chunked[j][x][0]:
102 |                             if ner[x1][1] == "PERSON":
103 |                                 str2 = " " + chunked[j][0][0] + "whom "
104 |                             elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE":
105 |                                 str2 = " where "
106 |                             elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE":
107 |                                 str2 = " when "
108 |                             else:
109 |                                 str2 = " " + chunked[j][0][0] + " what"
110 | 
111 |                 str4 = str1 + str2 + str3
112 |                 for k in range(len(segment_set)):
113 |                     if k != num:
114 |                         str4 += ("," + segment_set[k])
115 |                 str4 += '?'
116 |                 str4 = identification.postprocess(str4)
117 |                 # str4 = 'Q.' + str4
118 |                 s.append(str4)
119 |     return s
120 | 
121 | 
122 | def whose(segment_set, num, ner):
123 |     tok = nltk.word_tokenize(segment_set[num])
124 |     tag = nltk.pos_tag(tok)
125 |     gram = r"""chunk:{<NN.?>*<PRP\$|POS>+<RB.?>*<JJ.?>*<NN.?|VBG|VBN>+}"""
126 |     chunkparser = nltk.RegexpParser(gram)
127 |     chunked = chunkparser.parse(tag)
128 | 
129 |     list1 = identification.chunk_search(segment_set[num], chunked)
130 |     s = []
131 | 
132 |     if len(list1) != 0:
133 |         for j in range(len(chunked)):
134 |             str1 = ""
135 |             str3 = ""
136 |             str2 = " whose "
137 |             if j in list1:
138 |                 for k in range(j):
139 |                     if k in list1:
140 |                         str1 += get_chunk(chunked[k])
141 |                     else:
142 |                         str1 += (chunked[k][0] + " ")
143 |                 for k in range(j + 1, len(chunked)):
144 |                     if k in list1:
145 |                         str3 += get_chunk(chunked[k])
146 |                     else:
147 |                         str3 += (chunked[k][0] + " ")
148 |                 if chunked[j][1][1] == 'POS':
149 |                     for k in range(2, len(chunked[j])):
150 |                         str2 += (chunked[j][k][0] + " ")
151 |                 else:
152 |                     for k in range(1, len(chunked[j])):
153 |                         str2 += (chunked[j][k][0] + " ")
154 | 
155 |                 str4 = str1 + str2 + str3
156 |                 for k in range(len(segment_set)):
157 |                     if k != num:
158 |                         str4 += ("," + segment_set[k])
159 |                 str4 += '?'
160 |                 str4 = identification.postprocess(str4)
161 |                 # str4 = 'Q.' + str4
162 |                 s.append(str4)
163 |     return s
164 | 
165 | 
166 | def howmany(segment_set, num, ner):
167 |     tok = nltk.word_tokenize(segment_set[num])
168 |     tag = nltk.pos_tag(tok)
169 |     gram = r"""chunk:{<DT>?<CD>+<RB>?<JJ|JJR|JJS>?<NN|NNS|NNP|NNPS|VBG>+}"""
170 |     chunkparser = nltk.RegexpParser(gram)
171 |     chunked = chunkparser.parse(tag)
172 | 
173 |     list1 = identification.chunk_search(segment_set[num], chunked)
174 |     s = []
175 | 
176 |     if len(list1) != 0:
177 |         for j in range(len(chunked)):
178 |             str1 = ""
179 |             str3 = ""
180 |             str2 = " how many "
181 |             if j in list1:
182 |                 for k in range(j):
183 |                     if k in list1:
184 |                         str1 += get_chunk(chunked[k])
185 |                     else:
186 |                         str1 += (chunked[k][0] + " ")
187 |                 for k in range(j + 1, len(chunked)):
188 |                     if k in list1:
189 |                         str3 += get_chunk(chunked[k])
190 |                     else:
191 |                         str3 += (chunked[k][0] + " ")
192 | 
193 |                 st = get_chunk(chunked[j])
194 |                 tok = nltk.word_tokenize(st)
195 |                 tag = nltk.pos_tag(tok)
196 |                 gram = r"""chunk:{<RB>?<JJ|JJR|JJS>?<NN|NNS|NNP|NNPS|VBG>+}"""
197 |                 chunkparser = nltk.RegexpParser(gram)
198 |                 chunked1 = chunkparser.parse(tag)
199 | 
200 |                 list2 = identification.chunk_search(st, chunked1)
201 |                 z = ""
202 | 
203 |                 for k in range(len(chunked1)):
204 |                     if k in list2:
205 |                         z += get_chunk(chunked1[k])
206 | 
207 |                 str4 = str1 + str2 + z + str3
208 |                 for k in range(len(segment_set)):
209 |                     if k != num:
210 |                         str4 += ("," + segment_set[k])
211 |                 str4 += '?'
212 |                 str4 = identification.postprocess(str4)
213 |                 # str4 = 'Q.' + str4
214 |                 s.append(str4)
215 |     return s
216 | 
217 | 
218 | def howmuch_1(segment_set, num, ner):
219 |     tok = nltk.word_tokenize(segment_set[num])
220 |     tag = nltk.pos_tag(tok)
221 |     gram = r"""chunk:{<IN>+<\$>?<CD>+}"""
222 |     chunkparser = nltk.RegexpParser(gram)
223 |     chunked = chunkparser.parse(tag)
224 | 
225 |     list1 = identification.chunk_search(segment_set[num], chunked)
226 |     s = []
227 | 
228 |     if len(list1) != 0:
229 |         for j in range(len(chunked)):
230 |             str1 = ""
231 |             str3 = ""
232 |             str2 = " how much "
233 |             if j in list1:
234 |                 for k in range(j):
235 |                     if k in list1:
236 |                         str1 += get_chunk(chunked[k])
237 |                     else:
238 |                         str1 += (chunked[k][0] + " ")
239 |                 for k in range(j + 1, len(chunked)):
240 |                     if k in list1:
241 |                         str3 += get_chunk(chunked[k])
242 |                     else:
243 |                         str3 += (chunked[k][0] + " ")
244 | 
245 |                 str2 = chunked[j][0][0] + str2
246 |                 str4 = str1 + str2 + str3
247 |                 for k in range(len(segment_set)):
248 |                     if k != num:
249 |                         str4 += ("," + segment_set[k])
250 |                 str4 += '?'
251 |                 str4 = identification.postprocess(str4)
252 |                 # str4 = 'Q.' + str4
253 |                 s.append(str4)
254 |     return s
255 | 
256 | 


--------------------------------------------------------------------------------
/AutomaticQuestionGenerator/questionValidation.py:
--------------------------------------------------------------------------------
 1 | # Question Validation
 2 | 
 3 | 
 4 | def hNvalidation(sentence):
 5 |     flag = 1
 6 | 
 7 |     Length = len(sentence)
 8 |     if (Length > 4):
 9 |         for i in range(Length):
10 |             if (i+4 < Length):
11 |                 if (sentence[i]==' ' and sentence[i+1]=='h' and sentence[i+2]==' ' and sentence[i+3]=='N' and sentence[i+4]==' '):
12 |                     flag = 0
13 | 
14 | 
15 |     return flag
16 | 
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Automatic Question Generator
 2 | Automatic Question Generator from Text
 3 | 
 4 | 
 5 | Prerequisites
 6 | -------------
 7 | ```
 8 | - Python 3.5+
 9 | - NLTK 
10 | - SpaCy
11 | - NumPy
12 | ```
13 | 
14 | ### Quickstart (main.py)
15 | 
16 | #### run user input:
17 | ```python
18 | inputText = '''My best friend and I have been studying in the same 
19 |                school since kindergarten.'''
20 | ```
21 | 
22 | #### run from user text file:
23 | ```python
24 | inputText = filePATH
25 | 
26 | # Like:
27 | # inputText = "E:/EDU/project/input.txt"
28 | ```
29 | 
30 | 
31 | ### Example
32 | 
33 | #### input:
34 | ```
35 | My best friend and I have been studying in the same school since kindergarten. We have been classmates each year at 
36 | school. We share a very close bond and have a special friendship that we cherish and treasure. My friend is my 
37 | partner, sitting beside me in class. She is kindly and helpful, and if I have any difficulties in understanding any 
38 | topic in my studies, or in completing my homework or school project, she helps me. She is brilliant in mathematics 
39 | and the sciences, while I am good at English. So we both help each other in whatever way possible. She helps me 
40 | without ever belittling me. I greatly appreciate the quality in her. She does not make me feel obliged.
41 | ```
42 | 
43 | #### output:
44 | ```
45 | Q-01: Have you been classmates each year?
46 | Q-02: Have you been at school?
47 | Q-03: Who have been classmates each year at school?
48 | Q-04: Who cherish and treasure?
49 | Q-05: Who helps me?
50 | Q-06: Who is good at English?
51 | Q-07: Who helps me without ever belittling me?
52 | Q-08: Who ever belittling me?
53 | Q-09: Who greatly appreciate the quality in her?
54 | Q-10: Whom she does not make feel obliged?
55 | ```
56 |  
57 | ## SETTINGS OF MAIN FUNCTION:
58 | 
59 | ### main.py
60 | ```python
61 |     # Main Function
62 |     def main():
63 |         # Create AQG object
64 |         aqg = aqgFunction.AutomaticQuestionGenerator()
65 | 
66 |         # Enter input Text File PATH
67 |         inputTextPath = "PATH: (Like:- E:/in.txt)"
68 |         readFile = open(inputTextPath, 'r+')
69 |         inputText = readFile.read()
70 | 
71 |         questionList = aqg.aqgParse(inputText)
72 |         aqg.display(questionList)
73 | 
74 |         return 0
75 | 
76 | 
77 |     # Call Main Function
78 |     if __name__ == "__main__":
79 |         main()
80 |  
81 | ```
82 | 
83 | 
84 | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
85 | =============================================================================================
86 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | nltk==3.4
2 | numpy==1.16.2
3 | textblob==0.15.3
4 | spacy==2.1.6
5 | 


--------------------------------------------------------------------------------