├── LICENSE
├── README.md
├── __init__.py
├── pymln.py
├── semantic
├── .DS_Store
├── MLN
│ ├── .DS_Store
│ ├── __init__.py
│ ├── __pycache__
│ │ └── __init__.cpython-36.pyc
│ └── src
│ │ ├── Argument.py
│ │ ├── Clust.py
│ │ ├── MLN.py
│ │ └── Part.py
├── Parse.py
├── __init__.py
├── __pycache__
│ ├── Agenda.cpython-36.pyc
│ ├── Argument.cpython-36.pyc
│ ├── Clust.cpython-36.pyc
│ ├── Executor.cpython-36.pyc
│ ├── MLN.cpython-36.pyc
│ ├── Parse.cpython-36.pyc
│ ├── Part.cpython-36.pyc
│ ├── Scorer.cpython-36.pyc
│ └── __init__.cpython-36.pyc
└── src
│ ├── Agenda.py
│ ├── Executor.py
│ ├── Scorer.py
│ └── SearchOp.py
├── syntax
├── .DS_Store
├── Nodes
│ ├── Article.py
│ ├── Sentence.py
│ ├── Token.py
│ ├── TreeNode.py
│ └── __pycache__
│ │ ├── Article.cpython-36.pyc
│ │ ├── Sentence.cpython-36.pyc
│ │ ├── Token.cpython-36.pyc
│ │ └── TreeNode.cpython-36.pyc
├── Relations
│ ├── .DS_Store
│ ├── __init__.py
│ ├── __pycache__
│ │ └── __init__.cpython-36.pyc
│ └── src
│ │ ├── ArgType.py
│ │ ├── Path.py
│ │ └── RelType.py
├── StanfordParseReader.py
├── __init__.py
└── __pycache__
│ ├── Article.cpython-36.pyc
│ ├── Path.cpython-36.pyc
│ ├── RelType.cpython-36.pyc
│ ├── Sentence.cpython-36.pyc
│ ├── StanfordParseReader.cpython-36.pyc
│ ├── Token.cpython-36.pyc
│ ├── TreeNode.cpython-36.pyc
│ └── __init__.cpython-36.pyc
└── utils
├── Utils.py
├── __init__.py
└── __pycache__
├── Utils.cpython-36.pyc
└── __init__.cpython-36.pyc
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Gallup Government, Inc.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pymln
2 | Python implementation of unsupervised semantic parsing and markov logic network knowledgebase induction. This work is funded through DARPA’s ASKE program () as part of Gallup's MULTIVAC project.
3 |
4 | ## This is a work in progress.
5 |
6 | This software is derived from the USP (Beta Version) Software by the University of Washington, available here: http://alchemy.cs.washington.edu/usp/
7 |
8 |
9 |
10 | All of the documentation and software included in the USP (Beta Version) Software is copyrighted by Hoifung Poon and Pedro Domingos.
11 |
12 |
13 | Copyright [2009-11] Hoifung Poon and Pedro Domingos. All rights reserved.
14 |
15 |
16 | Contact: Hoifung Poon (hoifung.poon@gmail.com).
17 |
18 |
19 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
20 |
21 |
22 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
23 |
24 |
25 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
26 |
27 |
28 | 3. All advertising materials mentioning features or use of this software must display the following acknowledgment: "This product includes software developed by Hoifung Poon and Pedro Domingos in the Department of Computer Science and Engineering at the University of Washington".
29 |
30 |
31 | 4. Your publications acknowledge the use or contribution made by the Software to your research using the following citation(s):
32 |
33 | Hoifung Poon and Pedro Domingos (2009). "Unsupervised Semantic Parsing", in Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP), 2009. http://alchemy.cs.washington.edu/usp.
34 |
35 |
36 | 5. Neither the name of the University of Washington nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
37 |
38 |
39 | THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 |
41 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | #
4 | # Python implementation of Unsupervised Semantic Parsing system, from:
5 | #
6 | # Hoifung Poon and Pedro Domingos (2009). "Unsupervised Semantic Parsing",
7 | # in Proceedings of the Conference on Empirical Methods in Natural Language
8 | # Processing (EMNLP), 2009. http://alchemy.cs.washington.edu/usp.
9 | #
10 |
11 |
--------------------------------------------------------------------------------
/pymln.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | #
4 | # Python implementation of Unsupervised Semantic Parsing system, from:
5 | #
6 | # Hoifung Poon and Pedro Domingos (2009). "Unsupervised Semantic Parsing",
7 | # in Proceedings of the Conference on Empirical Methods in Natural Language
8 | # Processing (EMNLP), 2009. http://alchemy.cs.washington.edu/usp.
9 | #
10 |
11 | import argparse
12 | import os
13 |
14 | from semantic import Parse
15 | from semantic.MLN import MLN
16 |
17 | def read_input_files(DIR):
18 | '''Read files given by list of names '''
19 | files = set()
20 | for file in os.listdir(DIR):
21 | if file.endswith(".dep"):
22 | files.add(file)
23 |
24 | return files
25 |
26 |
27 | def run(params):
28 | if os.path.isabs(params['data_dir']):
29 | data_dir = params['data_dir']
30 | else:
31 | data_dir = os.path.join(os.getcwd(), params['data_dir'])
32 |
33 | if os.path.isabs(params['results_dir']):
34 | results_dir = params['results_dir']
35 | else:
36 | results_dir = os.path.join(os.getcwd(), params['results_dir'])
37 |
38 | priorNumParam = params['priorNumParam']
39 | priorNumConj = params['priorNumConj']
40 |
41 | parser = Parse.Parse()
42 |
43 | # Get files
44 | input_files = read_input_files(data_dir)
45 |
46 | # Parse files into MLN knowledge base
47 | parser.parse(input_files)
48 |
49 | # Save knowledge base files to disk
50 | MLN.printModel(results_dir)
51 |
52 | return None
53 |
54 |
55 | if __name__ == '__main__':
56 | prs = argparse.ArgumentParser(description='Parse scientific articles into'
57 | ' Markov Logic Network knowledge base. \n'
58 | 'Usage: python -m pymln.py [-d dataDir] '
59 | '[-r resultDir] [-p priorNumParam] [-c '
60 | 'priorNumConj]')
61 | prs.add_argument('-d', '--data_dir',
62 | help='Directory of source files. If not specified, '
63 | 'defaults to the current working directory.')
64 | prs.add_argument('-r', '--results_dir',
65 | help='Directory to save results files. If not specified,'
66 | ' defaults to the current working directory.')
67 | prs.add_argument('-p', '--priorNumParam',
68 | help='Prior on parameter number. If not specified,'
69 | ' defaults to 5.')
70 | prs.add_argument('-c', '--priorNumConj',
71 | help='Prior on number of conjunctive parts assigned to '
72 | 'same cluster. If not specified, defaults to 10.')
73 |
74 | args = vars(prs.parse_args())
75 |
76 | # Default argument values
77 | params = {'priorNumParam': 5, 'priorNumConj': 10, 'data_dir': os.getcwd(),
78 | 'results_dir': os.getcwd()}
79 |
80 | # If specified in call, override defaults
81 | for par in params:
82 | if args[par] is not None:
83 | params[par] = args[par]
84 |
85 | run(params)
86 |
87 |
88 |
89 |
90 |
91 |
92 |
--------------------------------------------------------------------------------
/semantic/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/.DS_Store
--------------------------------------------------------------------------------
/semantic/MLN/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/MLN/.DS_Store
--------------------------------------------------------------------------------
/semantic/MLN/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from syntax.Relations import RelType
3 |
4 |
5 | class MLN(object):
6 | def __init__(self):
7 | return None
8 |
9 |
10 | class Argument(object):
11 | def __init__(self, argNode, path, argPart):
12 | self._argNode = argNode
13 | self._path = path
14 | self._argPart = argPart
15 |
16 | return None
17 |
18 | def getPath(self):
19 | return self._path
20 |
21 | def getPart(self):
22 | return self._argPart
23 |
24 | def getNode(self):
25 | return self._argNode
26 |
27 |
28 | #
29 | # Part class
30 | #
31 |
32 | class Part(object):
33 | # dictionary mapping {str: Part}
34 | rootNodeId_part = {}
35 | # dictionary mapping {int: set(str)}
36 | clustIdx_partRootNodeIds = {}
37 | # dictionary mapping {(int, int): set((str, str))}
38 | pairClustIdxs_pairPartRootNodeIds = {}
39 | # dictionary mapping {int: set((int, int))}
40 | clustIdx_pairClustIdxs = {}
41 |
42 | def __init__(self, relTreeRoot):
43 | self._isDebug = False
44 |
45 | self._relTreeRoot = relTreeRoot
46 | self._relTypeIdx = RelType.getRelType(relTreeRoot)
47 | self._clustIdx = -1
48 | self._nxtArgIdx = 0 # Remember next index because _args should be ordered Dict
49 |
50 | self._parPart = None
51 | self._parArgIdx = -1
52 |
53 | # Dictionary mapping {int: Argument}
54 | self._args = {}
55 | # Dictionary mapping {int: int}
56 | self._argIdx_argClustIdx = {}
57 | # Dictionary mapping {int: set(int)}
58 | self._argClustIdx_argIdxs = {}
59 |
60 | return None
61 |
62 | def addArgument(self, arg):
63 | argIdx = self._nxtArgIdx + 1
64 | self._args[argIdx] = arg
65 |
66 | return argIdx
67 |
68 | def changeClust(self, newClustIdx, newRelTypeIdx, clust_only=False):
69 | oldClustIdx = self.getClustIdx()
70 | rootID = self.getRelTreeRoot().getId()
71 | Part.clustIdx_partRootNodeIds[oldClustIdx].remove(rootID)
72 |
73 | if clust_only:
74 | self._relTypeIdx = newRelTypeIdx
75 | else:
76 | ocl = Clust.getClust(oldClustIdx)
77 | ocl.onPartUnsetClust(self)
78 | self.setRelTypeIdx(newRelTypeIdx)
79 |
80 | self.setClust(newClustIdx, clust_only=clust_only)
81 |
82 | parent = self.getParPart()
83 |
84 | if parent is None:
85 | if newClustIdx in Clust.clustIdx_rootCnt:
86 | Clust.clustIdx_rootCnt[newClustIdx] += 1
87 | else:
88 | Clust.clustIdx_rootCnt[newClustIdx] = 1
89 | Clust.clustIdx_rootCnt[newClustIdx] -= 1
90 | else:
91 | parent_clust_id = parent.getClustIdx()
92 | paci = parent.getArgClust(self.getParArgIdx())
93 | pcl = Clust.getClust(parent_clust_id)
94 | pac = pcl._argClusts[paci]
95 | pac._chdClustIdx_cnt[oldClustIdx] -= 1
96 |
97 | if newClustIdx in pac._chdClustIdx_cnt:
98 | pac._chdClustIdx_cnt[newClustIdx] += 1
99 | else:
100 | pac._chdClustIdx_cnt[newClustIdx] = 1
101 |
102 | pa = (parent_clust_id, paci)
103 | Clust.clustIdx_parArgs[oldClustIdx][pa] -= 1
104 |
105 | if newClustIdx not in Clust.clustIdx_parArgs:
106 | Clust.clustIdx_parArgs[newClustIdx] = {}
107 |
108 | if pa in Clust.clustIdx_parArgs[newClustIdx]:
109 | Clust.clustIdx_parArgs[newClustIdx][pa] += 1
110 | else:
111 | Clust.clustIdx_parArgs[newClustIdx][pa] = 1
112 |
113 | opci = (parent_clust_id, oldClustIdx)
114 | npci = (parent_clust_id, newClustIdx)
115 | ptnid = (parent.getRelTreeRoot().getId(), rootID)
116 |
117 | Part.pairClustIdxs_pairPartRootNodeIds[opci].remove(ptnid)
118 |
119 | if len(Part.pairClustIdxs_pairPartRootNodeIds[opci]) == 0:
120 | Part.clustIdx_pairClustIdxs[oldClustIdx].remove(opci)
121 | Part.clustIdx_pairClustIdxs[parent_clust_id].remove(opci)
122 |
123 | if npci in Part.pairClustIdxs_pairPartRootNodeIds:
124 | Part.pairClustIdxs_pairPartRootNodeIds[npci].add(ptnid)
125 | else:
126 | Part.pairClustIdxs_pairPartRootNodeIds[npci] = set(ptnid)
127 |
128 | Part.clustIdx_pairClustIdxs[parent_clust_id] = npci
129 | if newClustIdx in Part.clustIdx_pairClustIdxs:
130 | Part.clustIdx_pairClustIdxs[newClustIdx].add(npci)
131 | else:
132 | Part.clustIdx_pairClustIdxs[newClustIdx] = set(npci)
133 |
134 | return None
135 |
136 | def changeClustRemap(self, newClustIdx, argClustIdx_newArgClustIdx, clust_only=False):
137 |
138 | if not clust_only:
139 | oldClustIdx = self.getClustIdx()
140 | ocl = Clust.getClust(oldClustIdx)
141 |
142 | self.changeClust(newClustIdx, self.getRelTypeIdx(), clust_only=clust_only)
143 |
144 | argIdx_newArgClustIdx = {}
145 |
146 | for ai, arg in self._args.items():
147 | oaci = self._argIdx_argClustIdx.pop(ai)
148 | self._argClustIdx_argIdxs[oaci].remove(ai)
149 |
150 | if len(self._argClustIdx_argIdxs[oaci]) == 0:
151 | del self._argClustIdx_argIdxs[oaci]
152 |
153 | argIdx_newArgClustIdx[ai] = argClustIdx_newArgClustIdx[oaci]
154 |
155 | if not clust_only:
156 | ocl.onPartUnsetArg(this, arg, oaci)
157 |
158 | for ai in self._args:
159 | aci = argIdx_newArgClustIdx[ai]
160 | self.setArgClust(ai, aci, clust_only=clust_only)
161 |
162 | return None
163 |
164 | def getArgument(self, argIdx):
165 | return self._args[argIdx]
166 |
167 | def getArguments(self):
168 | return self._args
169 |
170 | def getArgClust(self, argIdx):
171 | if argIdx in self._argIdx_argClustIdx:
172 | return self._argIdx_argClustIdx[argIdx]
173 | else:
174 | return None
175 |
176 | def getParArgIdx(self):
177 | return self._parArgIdx
178 |
179 | def getClustIdx(self):
180 | return self._clustIdx
181 |
182 | def getClustPartRootNodeIds():
183 | return Part.clustIdx_partRootNodeIds
184 |
185 | def getParArgIdx(self):
186 | return self._parArgIdx
187 |
188 | def getPairPartRootNodeIds(parClustIdx=None, chdClustIdx=None):
189 | if parClustIdx is None or chdClustIdx is None:
190 | return Part.pairClustIdxs_pairPartRootNodeIds
191 | else:
192 | return Part.pairClustIdxs_pairPartRootNodeIds[(parClustIdx,
193 | chdClustIdx)]
194 |
195 | def getParPart(self):
196 | return self._parPart
197 |
198 | def getPartByRootNodeId(rnId):
199 | if rnId in Part.rootNodeId_part:
200 | return Part.rootNodeId_part[rnId]
201 | else:
202 | return None
203 |
204 | def getPartRootNodeIds(clustIdx):
205 | if clustIdx in Part.clustIdx_partRootNodeIds:
206 | return Part.clustIdx_partRootNodeIds[clustIdx]
207 | else:
208 | return None
209 |
210 | def getRelTreeRoot(self):
211 | return self._relTreeRoot
212 |
213 | def getRelTypeIdx(self):
214 | return self._relTypeIdx
215 |
216 | def removeArgument(self, argIdx, clust_only=False):
217 | arg = self.getArgument(argIdx)
218 |
219 | oldArgClustIdx = self._argIdx_argClustIdx.pop(argIdx)
220 | self._argClustIdx_argIdxs[oldArgClustIdx].remove(argIdx)
221 |
222 | if len(self._argClustIdx_argIdxs[oldArgClustIdx]) == 0:
223 | self._argClustIdx_argIdxs.remove(oldArgClustIdx)
224 |
225 | if not clust_only:
226 | cl = Clust.getClust(self.getClustIdx())
227 | cl.onPartUnsetArg(self, arg, oldArgClustIdx)
228 |
229 | del self._args[argIdx]
230 |
231 | return None
232 |
233 |
234 | def setArgClust(self, argIdx, argClustIdx, clust_only=False):
235 | oldArgClustIdx = -1
236 |
237 | if argIdx in self._argIdx_argClustIdx:
238 | oldArgClustIdx = self.getArgClust(argIdx)
239 |
240 | if oldArgClustIdx != argClustIdx:
241 | self._argIdx_argClustIdx[argIdx] = argClustIdx
242 |
243 | if argClustIdx in self._argClustIdx_argIdxs:
244 | self._argClustIdx_argIdxs[argClustIdx].add(argIdx)
245 | else:
246 | self._argClustIdx_argIdxs[argClustIdx] = set(argIdx)
247 |
248 | arg = self.getArgument(argIdx)
249 |
250 | if not clust_only:
251 | cl = Clust.getClust(self.getClustIdx())
252 |
253 | if oldArgClustIdx < 0:
254 | if not clust_only:
255 | cl.onPartSetArg(self, arg, argClustIdx)
256 | else:
257 | self._argClustIdx_argIdxs[oldArgClustIdx].remove(argIdx)
258 |
259 | if len(self._argClustIdx_argIdxs[oldArgClustIdx]) == 0:
260 | self._argClustIdx_argIdxs.remove(oldArgClustIdx)
261 |
262 | if not clust_only:
263 | cl.onPartSetArg(self, arg, argClustIdx, oldArgClustIdx)
264 |
265 | return None
266 |
267 | def setClust(self, clustIdx, clust_only=False):
268 | self._clustIdx = clustIdx
269 | rootID = self.getRelTreeRoot().getId()
270 |
271 | if clustIdx in Part.clustIdx_partRootNodeIds:
272 | Part.clustIdx_partRootNodeIds[clustIdx].add(rootID)
273 | else:
274 | Part.clustIdx_partRootNodeIds[clustIdx] = set(rootID)
275 |
276 | if not clust_only:
277 | cl = Clust.getClust(clustIdx)
278 | cl.onPartSetClust(self)
279 |
280 | return None
281 |
282 | def setParent(self, parPart, parArgIdx):
283 | '''
284 | Unset previous parent if it exists
285 | '''
286 | if self.getParPart() is not None:
287 | self.unsetParent()
288 |
289 | self._parPart = parPart
290 | self._parArgIdx = parArgIdx
291 | clustIdx = self.getClustIdx()
292 | parClustID = parPart.getClustIdx()
293 |
294 | assert (parClustID >= 0) & (clustIdx >= 0)
295 |
296 | pcci = (parClustID, clustIdx)
297 |
298 | if parClustID in Part.clustIdx_pairClustIdxs:
299 | Part.clustIdx_pairClustIdxs[parClustID].add(pcci)
300 | else:
301 | Part.clustIdx_pairClustIdxs[parClustID] = set(pcci)
302 |
303 | pids = (parPart.getRelTreeRoot().getId(), self.getRelTreeRoot().getId())
304 |
305 | if pcci in Part.pairClustIdxs_pairPartRootNodeIds:
306 | Part.pairClustIdxs_pairPartRootNodeIds[pcci].add(pids)
307 | else:
308 | Part.pairClustIdxs_pairPartRootNodeIds[pcci] = set(pids)
309 |
310 | if parPart is not None:
311 | arg = parPart.getArgument(parArgIdx)
312 | dep = arg._path.getDep()
313 |
314 | if (parClustID != clustIdx) & dep.startswith('conj_'):
315 | if parClustID < clustIdx:
316 | pci = pcci
317 | else:
318 | pci = (pcci[1], pcci[0])
319 |
320 | if pci not in Clust._pairClustIdxs_conjCnt:
321 | Clust.pairClustIdxs_conjCnt[pci] = 1
322 | else:
323 | Clust.pairClustIdxs_conjCnt[pci] += 1
324 |
325 | return None
326 |
327 | def setRelTypeIdx(self, newRelTypeIdx):
328 | self._relTypeIdx = newRelTypeIdx
329 | cl = Clust.getClust(self._clustIdx)
330 | cl.onPartSetRelTypeIdx(newRelTypeIdx)
331 |
332 | return None
333 |
334 | def unsetArgClust(self, argIdx, clust_only=False):
335 | oldArgClustIdx = self._argIdx_argClustIdx.pop(argIdx)
336 | arg = self.getArgument(argIdx)
337 | self._argClustIdx_argIdxs[oldArgClustIdx].remove(argIdx)
338 |
339 | if len(self._argClustIdx_argIdxs[oldArgClustIdx]) == 0:
340 | self._argClustIdx_argIdxs.remove(oldArgClustIdx)
341 |
342 | if not clust_only:
343 | cl = Clust.getClust(self.getClustIdx())
344 | cl.onPartUnsetArg(self, arg, oldArgClustIdx)
345 |
346 | return None
347 |
348 | def unsetParent(self):
349 | '''
350 | Remove parent-child cluster index information
351 | Remove parent-child relationship index information
352 | NEEDS ADDITIONAL FACTORING - where does Cluster come from?
353 | '''
354 | parent = self.getParPart()
355 | clustIdx = self.getClustIdx()
356 |
357 | if parent is not None:
358 | parClustID = parent.getClustIdx()
359 |
360 | pcci = (parClustID, clustIdx)
361 | Part.clustIdx_pairClustIdxs[parClustID].remove(pcci)
362 |
363 | pids = (parent.getRelTreeRoot().getId(),
364 | self.getRelTreeRoot().getId())
365 | Part.pairClustIdxs_pairPartRootNodeIds[pcci].remove(pids)
366 |
367 | arg = parent.getArgument(self.getParArgIdx())
368 | dep = arg._path.getDep()
369 |
370 | if (parClustID != clustIdx) & dep.startswith('conj_'):
371 | if parClustID < clustIdx:
372 | pci = pcci
373 | else:
374 | pci = (pcci[1], pcci[0])
375 |
376 | if pci in Clust._pairClustIdxs_conjCnt:
377 | Clust.pairClustIdxs_conjCnt[pci] -= 1
378 | if Clust.pairClustIdxs_conjCnt[pci] == 0:
379 | del Clust.pairClustIdxs_conjCnt[pci]
380 |
381 | return None
382 |
383 | def unsetRelTypeIdx(self):
384 | old_type = self._relTypeIdx
385 | cl = Clust.getClust(self._clustIdx)
386 | cl.onPartUnsetRelTypeIdx(old_type)
387 |
388 | return None
389 |
390 |
391 | #
392 | # Clust
393 | #
394 |
395 | class Clust(object):
396 | whereasClustIdx = -1
397 | nxtClustIdx = 1
398 | ttlRootCnt = 0
399 |
400 | # Dictionary mapping
401 | pairClustIdx_conjCnt = {}
402 | # Dictionary mapping {int: {(int, int): int}}
403 | clustIdx_parArgs = {}
404 | # Dictionary mapping {int: int}
405 | clustIdx_rootCnt = {}
406 | # Dictionary mapping {str: int}
407 | argComb_cnt = {}
408 | # Dictionary mapping {int: set(str)}
409 | clustIdx_argCombs = {}
410 | # Dictionary mapping {int: Clust}
411 | clusts = {}
412 | # Dictionary mapping {int: set(int)}
413 | relTypeIdx_clustIdx = {}
414 |
415 | def __init__(self):
416 | self._isDebug = False
417 | self._isStop = False
418 | self._clustIdx = -1
419 | self._ttlCnt = 0
420 | self._nxtArgClustIdx = 0
421 | self._type = ''
422 |
423 | # Dictionary mapping {int: int}
424 | self._relTypeIdx_cnt = {}
425 | # Dictionary mapping {int: set(int)}
426 | self._argTypeIdx_argClustIdxs = {}
427 | # Dictionary mapping {int: ArgClust}
428 | self._argClusts = {}
429 |
430 | def incRootCnt(self):
431 | Clust.ttlRootCnt += 1
432 |
433 | if self.getId() not in Clust.clustIdx_rootCnt:
434 | Clust.clustIdx_rootCnt[self.getId()] = 1
435 | else:
436 | Clust.clustIdx_rootCnt[self.getId()] += 1
437 |
438 | return None
439 |
440 | def decRootCnt(self):
441 | Clust.ttlRootCnt -= 1
442 |
443 | Clust.clustIdx_rootCnt[self.getId()] -= 1
444 |
445 | if Clust.clustIdx_rootCnt[self.getId()] == 0:
446 | del Clust.clustIdx_rootCnt[self.getId()]
447 |
448 | return None
449 |
450 | def onPartUnsetRelTypeIdx(self, oldRelTypeIdx):
451 | self._relTypeIdx_cnt[oldRelTypeIdx] -= 1
452 | return None
453 |
454 | def onPartSetRelTypeIdx(self, newRelTypeIdx):
455 | if newRelTypeIdx not in self._relTypeIdx_cnt:
456 | self._relTypeIdx_cnt[newRelTypeIdx] = 1
457 | else:
458 | self._relTypeIdx_cnt[newRelTypeIdx] += 1
459 |
460 | return None
461 |
462 | def onPartSetClust(self, part):
463 | self._ttlCnt += 1
464 | ridx = part.getRelTypeIdx()
465 | self.onPartSetRelTypeIdx(ridx)
466 |
467 | return None
468 |
469 | def onPartUnsetClust(self, part):
470 | self._ttlCnt -= 1
471 | ridx = part.getRelTypeIdx()
472 | self.onPartUnsetRelTypeIdx(ridx)
473 |
474 | return None
475 |
476 | def createArgClust(self, argTypeIdx):
477 | assert argTypeIdx not in self._argTypeIdx_argClustIdxs
478 | argClustIdx = self._nxtArgClustIdx
479 | self._nxtArgClustIdx += 1
480 | ac = ArgClust()
481 | self._argClusts[argClustIdx] = ac
482 | acs = set()
483 | acs.add(argClustIdx)
484 | self._argTypeIdx_argClustIdxs[argTypeIdx] = acs
485 |
486 | return argClustIdx
487 |
488 | def getType(self):
489 | return self._type
490 |
491 | def isStop(self):
492 | return self._isStop
493 |
494 |
495 | def getClustsWithRelType(relTypeIdx):
496 | if relTypeIdx in Clust.relTypeIdx_clustIdx:
497 | return Clust.relTypeIdx_clustIdx[relTypeIdx]
498 | else:
499 | return None
500 |
501 | def createClust(relTypeIdx):
502 | cl = Clust()
503 | cl._clustIdx = Clust.nxtClustIdx
504 | Clust.nxtClustIdx += 1
505 |
506 | rt = RelType.getRelType(relTypeIdx)
507 | cl._type = rt.getType()
508 | rts = rt.toString()
509 |
510 | if rts in ['(V:be)', '(N:%)', '(V:say)', '($:$)']:
511 | cl._isStop = True
512 |
513 | if Clust.whereasClustIdx == -1 and rts == '(IN:whereas)':
514 | Clust.whereasClustIdx = cl._clustIdx
515 |
516 | Clust.clusts[cl._clustIdx] = cl
517 | if relTypeIdx in Clust.relTypeIdx_clustIdx:
518 | Clust.relTypeIdx_clustIdx[relTypeIdx].add(cl._clustIdx)
519 | else:
520 | Clust.relTypeIdx_clustIdx[relTypeIdx] = set(cl._clustIdx)
521 |
522 | return cl._clustIdx
523 |
524 | def removeClust(clust):
525 | del Clust.clusts[clust._clustIdx]
526 | return None
527 |
528 | def getClust(idx):
529 | return Clust.clusts[idx]
530 |
531 | def incRootCnt(self):
532 | Clust.ttlRootCnt += 1
533 | if self.getId() in Clust.clustIdx_rootCnt:
534 | Clust.clustIdx_rootCnt[self.getId()] += 1
535 | else:
536 | Clust.clustIdx_rootCnt[self.getId()] = 1
537 |
538 | def onPartSetClust(self, part):
539 | self._ttlCnt += 1
540 | ridx = part.getRelTypeIdx()
541 | if ridx in self._relTypeIdx_cnt:
542 | self._relTypeIdx_cnt[ridx] += 1
543 | else:
544 | self._relTypeIdx_cnt[ridx] = 1
545 |
546 | return None
547 |
548 | def onPartSetRelTypeIdx(self, newRelTypeIdx):
549 | if newRelTypeIdx in self._relTypeIdx_cnt:
550 | self._relTypeIdx_cnt[newRelTypeIdx] += 1
551 | else:
552 | self._relTypeIdx_cnt[newRelTypeIdx] = 1
553 |
554 | return None
555 |
556 | def removeArgClust(self, argClustIdx):
557 | del self._argClusts[argClustIdx]
558 | toDel = set()
559 |
560 | for ati in self._argTypeIdx_argClustIdxs:
561 | self._argTypeIdx_argClustIdxs[ati].remove(argClustIdx)
562 |
563 | if len(self._argTypeIdx_argClustIdxs[ati]) == 0:
564 | del self._argTypeIdx_argClustIdxs[ati]
565 |
566 | return None
567 |
568 | def addArgComb(clustIdx, chdClustIdxs, chdClustIdx2=None):
569 | if chdClustIdx2 is not None:
570 | chdClustIdxs = [chdClustIdxs, chdClustIdx2]
571 |
572 | ac = Clust.genArgCombStr(clustIdx, chdClustIdxs)
573 |
574 | if clustIdx not in Clust.clustIdx_argCombs:
575 | Clust.clustIdx_argCombs[clustIdx] = set()
576 |
577 | Clust.clustIdx_argCombs[clustIdx].add(ac)
578 |
579 | for idx in chdClustIdxs:
580 | if idx not in Clust.clustIdx_argCombs:
581 | Clust.clustIdx_argCombs[idx] = set()
582 |
583 | Clust.clustIdx_argCombs[idx].add(ac)
584 |
585 | if ac in Clust.argComb_cnt:
586 | Clust.argComb_cnt[ac] += 1
587 | else:
588 | Clust.argComb_cnt[ac] = 1
589 |
590 | return None
591 |
592 | def genArgCombStr(clustIdx, clustIdxs):
593 | s = ':'.join([str(x) for x in [clustIdx] + clustIdxs])
594 |
595 | return s
596 |
597 | def getArgClustIdxs(self, argTypeIdx):
598 | if argTypeIdx in self._argTypeIdx_argClustIdxs:
599 | return self._argTypeIdx_argClustIdxs[argTypeIdx]
600 | else:
601 | return None
602 |
603 | def onPartSetArg(self, part, arg, argClustIdx, oldArgClustIdx=-1):
604 | argTypeIdx = arg._path.getArgType()
605 | chdClustIdx = arg._artPart.getClusterIdx()
606 | ac = self._argClusts[argClustIdx]
607 |
608 | if argTypeIdx in ac._argTypeIdx_cnt:
609 | ac._argTypeIdx_cnt[argTypeIdx] += 1
610 | else:
611 | ac._argTypeIdx_cnt[argTypeIdx] = 1
612 |
613 | if chdClustIdx in ac._argTypeIdx_cnt:
614 | ac._argTypeIdx_cnt[chdClustIdx] += 1
615 | else:
616 | ac._argTypeIdx_cnt[chdClustIdx] = 1
617 |
618 | ac._ttlArgCnt += 1
619 |
620 | if chdClustIdx not in Clust.clustIdx_parArgs:
621 | Clust.clustIdx_parArgs[chdClustIdx] = {}
622 |
623 | cl_ac = (self.getId(), argClustIdx)
624 |
625 | if cl_ac in Clust.clustIdx_parArgs[chdClustIdx]:
626 | Clust.clustIdx_parArgs[chdClustIdx][cl_ac] += 1
627 | else:
628 | Clust.clustIdx_parArgs[chdClustIdx][cl_ac] = 1
629 |
630 | newArgNum = len(part._argClustIdx_argIdxs[argClustIdx])
631 |
632 | if newArgNum in ac._argNum_cnt:
633 | ac._argNum_cnt[newArgNum] += 1
634 | else:
635 | ac._argNum_cnt[newArgNum] = 1
636 |
637 | if newArgNum > 1:
638 | if ac._argNum_cnt[newArgNum-1] == 1:
639 | del ac._argNum_cnt[newArgNum-1]
640 | else:
641 | ac._argNum_cnt[newArgNum-1] -= 1
642 |
643 | ac._partRootTreeNodeIds.add(part.getRelTreeRoot().getId())
644 |
645 | if oldArgClustIdx >= 0:
646 | self.onPartUnsetArg(part, arg, oldArgClustIdx)
647 |
648 | return None
649 |
650 | def getId(self):
651 | return self._clustIdx
652 |
653 | def onPartUnsetArg(self, part, arg, argClustIdx):
654 | argTypeIdx = arg.getPath().getArgType()
655 | chdClustIdx = arg.getPart().getClustIdx()
656 | ac = self._argClusts[argClustIdx]
657 |
658 | if ac._argTypeIdx_cnt[argTypeIdx] == 1:
659 | del ac._argTypeIdx_cnt[argTypeIdx]
660 | else:
661 | ac._argTypeIdx_cnt[argTypeIdx] -= 1
662 |
663 | if ac._chdClustIdx_cnt[chdClustIdx] == 1:
664 | del ac._chdClustIdx_cnt[chdClustIdx]
665 | else:
666 | ac._chdClustIdx_cnt[chdClustIdx] -= 1
667 |
668 | ac._ttlCnt -= 1
669 | cl_ac = (self.getId(), argClustIdx)
670 |
671 | if Clust.clustIdx_parArgs[chdClustIdx][cl_ac] == 1:
672 | del Clust.clustIdx_parArgs[chdClustIdx][cl_ac]
673 | else:
674 | Clust.clustIdx_parArgs[chdClustIdx][cl_ac] -= 1
675 |
676 | if len(Clust.clustIdx_parArgs[chdClustIdx]) == 0:
677 | del Clust.clustIdx_parArgs[chdClustIdx]
678 |
679 | ac._partRootTreeNodeIds.remove(part.getRelTreeRoot().getId())
680 |
681 | if ac._ttlArgCnt == 0:
682 | self.removeArgClust(argClustIdx)
683 | assert argClustIdx not in part._argClustIdx_argIdxs
684 | else:
685 | oldArgNum = 0
686 |
687 | if argClustIdx in part._argClustIdx_argIdxs:
688 | oldArgNum = part._argClustIdx_argIdxs[argClustIdx]
689 |
690 | if oldArgNum > 0:
691 | if oldArgNum in ac._argNum_cnt:
692 | ac._argNum_cnt[oldArgNum] += 1
693 | else:
694 | ac._argNum_cnt[oldArgNum] = 1
695 |
696 | if ac._argNum_cnt[oldArgNum+1] == 1:
697 | del ac._argNum_cnt[oldArgNum+1]
698 | else:
699 | ac._argNum_cnt[oldArgNum+1] -= 1
700 |
701 | def removePartAndUpdateStat(nid_part):
702 | for nid, p in nid_part.items():
703 | cl = Clust.getClust(p.getClustIdx())
704 |
705 | if p.getParPart() is None:
706 | cl.decRootCnt()
707 |
708 | for nid, p in nid_part.items():
709 | for ai, a in p._args.items():
710 | p.removeArgument(ai)
711 | cp = a._argPart
712 | cp.unsetParent()
713 |
714 | p.unsetRelType()
715 |
716 | for nid, p in nid_part.items():
717 | pclust = getClustIdx()
718 | Part.clustIdx_partRootNodeIds[pclust].remove(p.getRelTreeRoot().getId())
719 |
720 | if len(Part.clustIdx_partRootNodeIds[pclust]) == 0:
721 | del Part.clustIdx_partRootNodeIds[pclust]
722 |
723 | return None
724 |
725 | def updatePartStat(nid_part):
726 | for nid, p in nid_part.items():
727 | cl = Clust.getClust(p.getClustIdx())
728 | cl.onPartSetClust(p)
729 |
730 | if p.getParPart() is None:
731 | cl.incRootCnt()
732 |
733 | for ai, arg in p._args:
734 | aci = p._argTypeIdx_argClustIdxs[ai]
735 | cl.onPartSetArg(p, arg, aci)
736 |
737 | return None
738 |
739 | def toString(self):
740 | rts = ['{}:{}'.format(RelType.getRelType(rti).toString(), cnt)
741 | for x, y in self._relTypeIdx_cnt.items()]
742 | s = ',\t'.join(rts)
743 | s = '[' + s + ']'
744 |
745 | return s
746 |
747 |
748 | '''
749 | End Clust class definitions
750 | '''
751 |
752 | class ArgClust(object):
753 | def __init__(self):
754 | # Dictionary mapping {int: int}
755 | self._argTypeIdx_cnt = {}
756 | # Dictionary mapping {int: int}
757 | self._chdClustIdx_cnt = {}
758 | # Dictionary mapping {int: int}
759 | self._argNum_cnt = {}
760 | self._ttlArgCnt = 0
761 | self._partRootTreeNodeIds = set()
762 |
763 | def toString(self):
764 | s = ''
765 | for k, v in self._argTypeIdx_cnt.items():
766 | if len(s) > 0:
767 | s += ' '
768 | s += '{}:{}'.format(ArgType.getArgType(k), c)
769 |
770 | return s
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
--------------------------------------------------------------------------------
/semantic/MLN/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/MLN/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/semantic/MLN/src/Argument.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class Argument(object):
4 | def __init__(self, argNode, path, argPart):
5 | self._argNode = argNode
6 | self._path = path
7 | self._argPart = argPart
8 |
9 | return None
10 |
11 | def getPath(self):
12 | return self._path
13 |
14 | def getPart(self):
15 | return self._argPart
16 |
17 | def getNode(self):
18 | return self._argNode
19 |
20 |
21 |
--------------------------------------------------------------------------------
/semantic/MLN/src/Clust.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | #
4 | # Clust
5 | #
6 |
7 | class Clust(object):
8 | whereasClustIdx = -1
9 | nxtClustIdx = 1
10 | ttlRootCnt = 0
11 |
12 | # Dictionary mapping
13 | pairClustIdx_conjCnt = {}
14 | # Dictionary mapping {int: {(int, int): int}}
15 | clustIdx_parArgs = {}
16 | # Dictionary mapping {int: int}
17 | clustIdx_rootCnt = {}
18 | # Dictionary mapping {str: int}
19 | argComb_cnt = {}
20 | # Dictionary mapping {int: set(str)}
21 | clustIdx_argCombs = {}
22 | # Dictionary mapping {int: Clust}
23 | clusts = {}
24 | # Dictionary mapping {int: set(int)}
25 | relTypeIdx_clustIdx = {}
26 |
27 | def __init__(self):
28 | self._isDebug = False
29 | self._isStop = False
30 | self._clustIdx = -1
31 | self._ttlCnt = 0
32 | self._nxtArgClustIdx = 0
33 | self._type = ''
34 |
35 | # Dictionary mapping {int: int}
36 | self._relTypeIdx_cnt = {}
37 | # Dictionary mapping {int: set(int)}
38 | self._argTypeIdx_argClustIdxs = {}
39 | # Dictionary mapping {int: ArgClust}
40 | self._argClusts = {}
41 |
42 | def incRootCnt(self):
43 | Clust.ttlRootCnt += 1
44 |
45 | if self.getId() not in Clust.clustIdx_rootCnt:
46 | Clust.clustIdx_rootCnt[self.getId()] = 1
47 | else:
48 | Clust.clustIdx_rootCnt[self.getId()] += 1
49 |
50 | return None
51 |
52 | def decRootCnt(self):
53 | Clust.ttlRootCnt -= 1
54 |
55 | Clust.clustIdx_rootCnt[self.getId()] -= 1
56 |
57 | if Clust.clustIdx_rootCnt[self.getId()] == 0:
58 | del Clust.clustIdx_rootCnt[self.getId()]
59 |
60 | return None
61 |
62 | def onPartUnsetRelTypeIdx(self, oldRelTypeIdx):
63 | self._relTypeIdx_cnt[oldRelTypeIdx] -= 1
64 | return None
65 |
66 | def onPartSetRelTypeIdx(self, newRelTypeIdx):
67 | if newRelTypeIdx not in self._relTypeIdx_cnt:
68 | self._relTypeIdx_cnt[newRelTypeIdx] = 1
69 | else:
70 | self._relTypeIdx_cnt[newRelTypeIdx] += 1
71 |
72 | return None
73 |
74 | def onPartSetClust(self, part):
75 | self._ttlCnt += 1
76 | ridx = part.getRelTypeIdx()
77 | self.onPartSetRelTypeIdx(ridx)
78 |
79 | return None
80 |
81 | def onPartUnsetClust(self, part):
82 | self._ttlCnt -= 1
83 | ridx = part.getRelTypeIdx()
84 | self.onPartUnsetRelTypeIdx(ridx)
85 |
86 | return None
87 |
88 | def createArgClust(self, argTypeIdx):
89 | assert argTypeIdx not in self._argTypeIdx_argClustIdxs
90 | argClustIdx = self._nxtArgClustIdx
91 | self._nxtArgClustIdx += 1
92 | ac = ArgClust()
93 | self._argClusts[argClustIdx] = ac
94 | acs = set()
95 | acs.add(argClustIdx)
96 | self._argTypeIdx_argClustIdxs[argTypeIdx] = acs
97 |
98 | return argClustIdx
99 |
100 | def getType(self):
101 | return self._type
102 |
103 | def isStop(self):
104 | return self._isStop
105 |
106 |
107 | def getClustsWithRelType(relTypeIdx):
108 | if relTypeIdx in Clust.relTypeIdx_clustIdx:
109 | return Clust.relTypeIdx_clustIdx[relTypeIdx]
110 | else:
111 | return None
112 |
113 | def createClust(relTypeIdx):
114 | cl = Clust()
115 | cl._clustIdx = Clust.nxtClustIdx
116 | Clust.nxtClustIdx += 1
117 |
118 | rt = RelType.getRelType(relTypeIdx)
119 | cl._type = rt.getType()
120 | rts = rt.toString()
121 |
122 | if rts in ['(V:be)', '(N:%)', '(V:say)', '($:$)']:
123 | cl._isStop = True
124 |
125 | if Clust.whereasClustIdx == -1 and rts == '(IN:whereas)':
126 | Clust.whereasClustIdx = cl._clustIdx
127 |
128 | Clust.clusts[cl._clustIdx] = cl
129 | if relTypeIdx in Clust.relTypeIdx_clustIdx:
130 | Clust.relTypeIdx_clustIdx[relTypeIdx].add(cl._clustIdx)
131 | else:
132 | Clust.relTypeIdx_clustIdx[relTypeIdx] = set(cl._clustIdx)
133 |
134 | return cl._clustIdx
135 |
136 | def removeClust(clust):
137 | del Clust.clusts[clust._clustIdx]
138 | return None
139 |
140 | def getClust(idx):
141 | return Clust.clusts[idx]
142 |
143 | def incRootCnt(self):
144 | Clust.ttlRootCnt += 1
145 | if self.getId() in Clust.clustIdx_rootCnt:
146 | Clust.clustIdx_rootCnt[self.getId()] += 1
147 | else:
148 | Clust.clustIdx_rootCnt[self.getId()] = 1
149 |
150 | def onPartSetClust(self, part):
151 | self._ttlCnt += 1
152 | ridx = part.getRelTypeIdx()
153 | if ridx in self._relTypeIdx_cnt:
154 | self._relTypeIdx_cnt[ridx] += 1
155 | else:
156 | self._relTypeIdx_cnt[ridx] = 1
157 |
158 | return None
159 |
160 | def onPartSetRelTypeIdx(self, newRelTypeIdx):
161 | if newRelTypeIdx in self._relTypeIdx_cnt:
162 | self._relTypeIdx_cnt[newRelTypeIdx] += 1
163 | else:
164 | self._relTypeIdx_cnt[newRelTypeIdx] = 1
165 |
166 | return None
167 |
168 | def removeArgClust(self, argClustIdx):
169 | del self._argClusts[argClustIdx]
170 | toDel = set()
171 |
172 | for ati in self._argTypeIdx_argClustIdxs:
173 | self._argTypeIdx_argClustIdxs[ati].remove(argClustIdx)
174 |
175 | if len(self._argTypeIdx_argClustIdxs[ati]) == 0:
176 | del self._argTypeIdx_argClustIdxs[ati]
177 |
178 | return None
179 |
180 | def addArgComb(clustIdx, chdClustIdxs, chdClustIdx2=None):
181 | if chdClustIdx2 is not None:
182 | chdClustIdxs = [chdClustIdxs, chdClustIdx2]
183 |
184 | ac = Clust.genArgCombStr(clustIdx, chdClustIdxs)
185 |
186 | if clustIdx not in Clust.clustIdx_argCombs:
187 | Clust.clustIdx_argCombs[clustIdx] = set()
188 |
189 | Clust.clustIdx_argCombs[clustIdx].add(ac)
190 |
191 | for idx in chdClustIdxs:
192 | if idx not in Clust.clustIdx_argCombs:
193 | Clust.clustIdx_argCombs[idx] = set()
194 |
195 | Clust.clustIdx_argCombs[idx].add(ac)
196 |
197 | if ac in Clust.argComb_cnt:
198 | Clust.argComb_cnt[ac] += 1
199 | else:
200 | Clust.argComb_cnt[ac] = 1
201 |
202 | return None
203 |
204 | def genArgCombStr(clustIdx, clustIdxs):
205 | s = ':'.join([str(x) for x in [clustIdx] + clustIdxs])
206 |
207 | return s
208 |
209 | def getArgClustIdxs(self, argTypeIdx):
210 | if argTypeIdx in self._argTypeIdx_argClustIdxs:
211 | return self._argTypeIdx_argClustIdxs[argTypeIdx]
212 | else:
213 | return None
214 |
215 | def onPartSetArg(self, part, arg, argClustIdx, oldArgClustIdx=-1):
216 | argTypeIdx = arg._path.getArgType()
217 | chdClustIdx = arg._artPart.getClusterIdx()
218 | ac = self._argClusts[argClustIdx]
219 |
220 | if argTypeIdx in ac._argTypeIdx_cnt:
221 | ac._argTypeIdx_cnt[argTypeIdx] += 1
222 | else:
223 | ac._argTypeIdx_cnt[argTypeIdx] = 1
224 |
225 | if chdClustIdx in ac._argTypeIdx_cnt:
226 | ac._argTypeIdx_cnt[chdClustIdx] += 1
227 | else:
228 | ac._argTypeIdx_cnt[chdClustIdx] = 1
229 |
230 | ac._ttlArgCnt += 1
231 |
232 | if chdClustIdx not in Clust.clustIdx_parArgs:
233 | Clust.clustIdx_parArgs[chdClustIdx] = {}
234 |
235 | cl_ac = (self.getId(), argClustIdx)
236 |
237 | if cl_ac in Clust.clustIdx_parArgs[chdClustIdx]:
238 | Clust.clustIdx_parArgs[chdClustIdx][cl_ac] += 1
239 | else:
240 | Clust.clustIdx_parArgs[chdClustIdx][cl_ac] = 1
241 |
242 | newArgNum = len(part._argClustIdx_argIdxs[argClustIdx])
243 |
244 | if newArgNum in ac._argNum_cnt:
245 | ac._argNum_cnt[newArgNum] += 1
246 | else:
247 | ac._argNum_cnt[newArgNum] = 1
248 |
249 | if newArgNum > 1:
250 | if ac._argNum_cnt[newArgNum-1] == 1:
251 | del ac._argNum_cnt[newArgNum-1]
252 | else:
253 | ac._argNum_cnt[newArgNum-1] -= 1
254 |
255 | ac._partRootTreeNodeIds.add(part.getRelTreeRoot().getId())
256 |
257 | if oldArgClustIdx >= 0:
258 | self.onPartUnsetArg(part, arg, oldArgClustIdx)
259 |
260 | return None
261 |
262 | def getId(self):
263 | return self._clustIdx
264 |
265 | def onPartUnsetArg(self, part, arg, argClustIdx):
266 | argTypeIdx = arg.getPath().getArgType()
267 | chdClustIdx = arg.getPart().getClustIdx()
268 | ac = self._argClusts[argClustIdx]
269 |
270 | if ac._argTypeIdx_cnt[argTypeIdx] == 1:
271 | del ac._argTypeIdx_cnt[argTypeIdx]
272 | else:
273 | ac._argTypeIdx_cnt[argTypeIdx] -= 1
274 |
275 | if ac._chdClustIdx_cnt[chdClustIdx] == 1:
276 | del ac._chdClustIdx_cnt[chdClustIdx]
277 | else:
278 | ac._chdClustIdx_cnt[chdClustIdx] -= 1
279 |
280 | ac._ttlCnt -= 1
281 | cl_ac = (self.getId(), argClustIdx)
282 |
283 | if Clust.clustIdx_parArgs[chdClustIdx][cl_ac] == 1:
284 | del Clust.clustIdx_parArgs[chdClustIdx][cl_ac]
285 | else:
286 | Clust.clustIdx_parArgs[chdClustIdx][cl_ac] -= 1
287 |
288 | if len(Clust.clustIdx_parArgs[chdClustIdx]) == 0:
289 | del Clust.clustIdx_parArgs[chdClustIdx]
290 |
291 | ac._partRootTreeNodeIds.remove(part.getRelTreeRoot().getId())
292 |
293 | if ac._ttlArgCnt == 0:
294 | self.removeArgClust(argClustIdx)
295 | assert argClustIdx not in part._argClustIdx_argIdxs
296 | else:
297 | oldArgNum = 0
298 |
299 | if argClustIdx in part._argClustIdx_argIdxs:
300 | oldArgNum = part._argClustIdx_argIdxs[argClustIdx]
301 |
302 | if oldArgNum > 0:
303 | if oldArgNum in ac._argNum_cnt:
304 | ac._argNum_cnt[oldArgNum] += 1
305 | else:
306 | ac._argNum_cnt[oldArgNum] = 1
307 |
308 | if ac._argNum_cnt[oldArgNum+1] == 1:
309 | del ac._argNum_cnt[oldArgNum+1]
310 | else:
311 | ac._argNum_cnt[oldArgNum+1] -= 1
312 |
313 | def removePartAndUpdateStat(nid_part):
314 | for nid, p in nid_part.items():
315 | cl = Clust.getClust(p.getClustIdx())
316 |
317 | if p.getParPart() is None:
318 | cl.decRootCnt()
319 |
320 | for nid, p in nid_part.items():
321 | for ai, a in p._args.items():
322 | p.removeArgument(ai)
323 | cp = a._argPart
324 | cp.unsetParent()
325 |
326 | p.unsetRelType()
327 |
328 | for nid, p in nid_part.items():
329 | pclust = getClustIdx()
330 | Part.clustIdx_partRootNodeIds[pclust].remove(p.getRelTreeRoot().getId())
331 |
332 | if len(Part.clustIdx_partRootNodeIds[pclust]) == 0:
333 | del Part.clustIdx_partRootNodeIds[pclust]
334 |
335 | return None
336 |
337 | def updatePartStat(nid_part):
338 | for nid, p in nid_part.items():
339 | cl = Clust.getClust(p.getClustIdx())
340 | cl.onPartSetClust(p)
341 |
342 | if p.getParPart() is None:
343 | cl.incRootCnt()
344 |
345 | for ai, arg in p._args:
346 | aci = p._argTypeIdx_argClustIdxs[ai]
347 | cl.onPartSetArg(p, arg, aci)
348 |
349 | return None
350 |
351 | def toString(self):
352 | rts = ['{}:{}'.format(RelType.getRelType(rti).toString(), cnt)
353 | for x, y in self._relTypeIdx_cnt.items()]
354 | s = ',\t'.join(rts)
355 | s = '[' + s + ']'
356 |
357 | return s
358 |
359 |
360 | '''
361 | End Clust class definitions
362 | '''
363 |
364 | class ArgClust(object):
365 | def __init__(self):
366 | # Dictionary mapping {int: int}
367 | self._argTypeIdx_cnt = {}
368 | # Dictionary mapping {int: int}
369 | self._chdClustIdx_cnt = {}
370 | # Dictionary mapping {int: int}
371 | self._argNum_cnt = {}
372 | self._ttlArgCnt = 0
373 | self._partRootTreeNodeIds = set()
374 |
375 | def toString(self):
376 | s = ''
377 | for k, v in self._argTypeIdx_cnt.items():
378 | if len(s) > 0:
379 | s += ' '
380 | s += '{}:{}'.format(ArgType.getArgType(k), c)
381 |
382 | return s
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
--------------------------------------------------------------------------------
/semantic/MLN/src/MLN.py:
--------------------------------------------------------------------------------
1 |
2 | class MLN(object):
3 | def __init__(self):
4 | return None
5 |
6 |
--------------------------------------------------------------------------------
/semantic/MLN/src/Part.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | #
4 | # Part class
5 | #
6 |
7 | from semantic import Argument, Clust
8 | from syntax.Relations import RelType
9 |
10 | class Part(object):
11 | # dictionary mapping {str: Part}
12 | rootNodeId_part = {}
13 | # dictionary mapping {int: set(str)}
14 | clustIdx_partRootNodeIds = {}
15 | # dictionary mapping {(int, int): set((str, str))}
16 | pairClustIdxs_pairPartRootNodeIds = {}
17 | # dictionary mapping {int: set((int, int))}
18 | clustIdx_pairClustIdxs = {}
19 |
20 | def __init__(self, relTreeRoot):
21 | self._isDebug = False
22 |
23 | self._relTreeRoot = relTreeRoot
24 | self._relTypeIdx = RelType.getRelType(relTreeRoot)
25 | self._clustIdx = -1
26 | self._nxtArgIdx = 0 # Remember next index because _args should be ordered Dict
27 |
28 | self._parPart = None
29 | self._parArgIdx = -1
30 |
31 | # Dictionary mapping {int: Argument}
32 | self._args = {}
33 | # Dictionary mapping {int: int}
34 | self._argIdx_argClustIdx = {}
35 | # Dictionary mapping {int: set(int)}
36 | self._argClustIdx_argIdxs = {}
37 |
38 | return None
39 |
40 | def addArgument(self, arg):
41 | argIdx = self._nxtArgIdx + 1
42 | self._args[argIdx] = arg
43 |
44 | return argIdx
45 |
46 | def changeClust(self, newClustIdx, newRelTypeIdx, clust_only=False):
47 | oldClustIdx = self.getClustIdx()
48 | rootID = self.getRelTreeRoot().getId()
49 | Part.clustIdx_partRootNodeIds[oldClustIdx].remove(rootID)
50 |
51 | if clust_only:
52 | self._relTypeIdx = newRelTypeIdx
53 | else:
54 | ocl = Clust.getClust(oldClustIdx)
55 | ocl.onPartUnsetClust(self)
56 | self.setRelTypeIdx(newRelTypeIdx)
57 |
58 | self.setClust(newClustIdx, clust_only=clust_only)
59 |
60 | parent = self.getParPart()
61 |
62 | if parent is None:
63 | if newClustIdx in Clust.clustIdx_rootCnt:
64 | Clust.clustIdx_rootCnt[newClustIdx] += 1
65 | else:
66 | Clust.clustIdx_rootCnt[newClustIdx] = 1
67 | Clust.clustIdx_rootCnt[newClustIdx] -= 1
68 | else:
69 | parent_clust_id = parent.getClustIdx()
70 | paci = parent.getArgClust(self.getParArgIdx())
71 | pcl = Clust.getClust(parent_clust_id)
72 | pac = pcl._argClusts[paci]
73 | pac._chdClustIdx_cnt[oldClustIdx] -= 1
74 |
75 | if newClustIdx in pac._chdClustIdx_cnt:
76 | pac._chdClustIdx_cnt[newClustIdx] += 1
77 | else:
78 | pac._chdClustIdx_cnt[newClustIdx] = 1
79 |
80 | pa = (parent_clust_id, paci)
81 | Clust.clustIdx_parArgs[oldClustIdx][pa] -= 1
82 |
83 | if newClustIdx not in Clust.clustIdx_parArgs:
84 | Clust.clustIdx_parArgs[newClustIdx] = {}
85 |
86 | if pa in Clust.clustIdx_parArgs[newClustIdx]:
87 | Clust.clustIdx_parArgs[newClustIdx][pa] += 1
88 | else:
89 | Clust.clustIdx_parArgs[newClustIdx][pa] = 1
90 |
91 | opci = (parent_clust_id, oldClustIdx)
92 | npci = (parent_clust_id, newClustIdx)
93 | ptnid = (parent.getRelTreeRoot().getId(), rootID)
94 |
95 | Part.pairClustIdxs_pairPartRootNodeIds[opci].remove(ptnid)
96 |
97 | if len(Part.pairClustIdxs_pairPartRootNodeIds[opci]) == 0:
98 | Part.clustIdx_pairClustIdxs[oldClustIdx].remove(opci)
99 | Part.clustIdx_pairClustIdxs[parent_clust_id].remove(opci)
100 |
101 | if npci in Part.pairClustIdxs_pairPartRootNodeIds:
102 | Part.pairClustIdxs_pairPartRootNodeIds[npci].add(ptnid)
103 | else:
104 | Part.pairClustIdxs_pairPartRootNodeIds[npci] = set(ptnid)
105 |
106 | Part.clustIdx_pairClustIdxs[parent_clust_id] = npci
107 | if newClustIdx in Part.clustIdx_pairClustIdxs:
108 | Part.clustIdx_pairClustIdxs[newClustIdx].add(npci)
109 | else:
110 | Part.clustIdx_pairClustIdxs[newClustIdx] = set(npci)
111 |
112 | return None
113 |
114 | def changeClustRemap(self, newClustIdx, argClustIdx_newArgClustIdx, clust_only=False):
115 |
116 | if not clust_only:
117 | oldClustIdx = self.getClustIdx()
118 | ocl = Clust.getClust(oldClustIdx)
119 |
120 | self.changeClust(newClustIdx, self.getRelTypeIdx(), clust_only=clust_only)
121 |
122 | argIdx_newArgClustIdx = {}
123 |
124 | for ai, arg in self._args.items():
125 | oaci = self._argIdx_argClustIdx.pop(ai)
126 | self._argClustIdx_argIdxs[oaci].remove(ai)
127 |
128 | if len(self._argClustIdx_argIdxs[oaci]) == 0:
129 | del self._argClustIdx_argIdxs[oaci]
130 |
131 | argIdx_newArgClustIdx[ai] = argClustIdx_newArgClustIdx[oaci]
132 |
133 | if not clust_only:
134 | ocl.onPartUnsetArg(this, arg, oaci)
135 |
136 | for ai in self._args:
137 | aci = argIdx_newArgClustIdx[ai]
138 | self.setArgClust(ai, aci, clust_only=clust_only)
139 |
140 | return None
141 |
142 | def getArgument(self, argIdx):
143 | return self._args[argIdx]
144 |
145 | def getArguments(self):
146 | return self._args
147 |
148 | def getArgClust(self, argIdx):
149 | if argIdx in self._argIdx_argClustIdx:
150 | return self._argIdx_argClustIdx[argIdx]
151 | else:
152 | return None
153 |
154 | def getParArgIdx(self):
155 | return self._parArgIdx
156 |
157 | def getClustIdx(self):
158 | return self._clustIdx
159 |
160 | def getClustPartRootNodeIds():
161 | return Part.clustIdx_partRootNodeIds
162 |
163 | def getParArgIdx(self):
164 | return self._parArgIdx
165 |
166 | def getPairPartRootNodeIds(parClustIdx=None, chdClustIdx=None):
167 | if parClustIdx is None or chdClustIdx is None:
168 | return Part.pairClustIdxs_pairPartRootNodeIds
169 | else:
170 | return Part.pairClustIdxs_pairPartRootNodeIds[(parClustIdx,
171 | chdClustIdx)]
172 |
173 | def getParPart(self):
174 | return self._parPart
175 |
176 | def getPartByRootNodeId(rnId):
177 | if rnId in Part.rootNodeId_part:
178 | return Part.rootNodeId_part[rnId]
179 | else:
180 | return None
181 |
182 | def getPartRootNodeIds(clustIdx):
183 | if clustIdx in Part.clustIdx_partRootNodeIds:
184 | return Part.clustIdx_partRootNodeIds[clustIdx]
185 | else:
186 | return None
187 |
188 | def getRelTreeRoot(self):
189 | return self._relTreeRoot
190 |
191 | def getRelTypeIdx(self):
192 | return self._relTypeIdx
193 |
194 | def removeArgument(self, argIdx, clust_only=False):
195 | arg = self.getArgument(argIdx)
196 |
197 | oldArgClustIdx = self._argIdx_argClustIdx.pop(argIdx)
198 | self._argClustIdx_argIdxs[oldArgClustIdx].remove(argIdx)
199 |
200 | if len(self._argClustIdx_argIdxs[oldArgClustIdx]) == 0:
201 | self._argClustIdx_argIdxs.remove(oldArgClustIdx)
202 |
203 | if not clust_only:
204 | cl = Clust.getClust(self.getClustIdx())
205 | cl.onPartUnsetArg(self, arg, oldArgClustIdx)
206 |
207 | del self._args[argIdx]
208 |
209 | return None
210 |
211 |
212 | def setArgClust(self, argIdx, argClustIdx, clust_only=False):
213 | oldArgClustIdx = -1
214 |
215 | if argIdx in self._argIdx_argClustIdx:
216 | oldArgClustIdx = self.getArgClust(argIdx)
217 |
218 | if oldArgClustIdx != argClustIdx:
219 | self._argIdx_argClustIdx[argIdx] = argClustIdx
220 |
221 | if argClustIdx in self._argClustIdx_argIdxs:
222 | self._argClustIdx_argIdxs[argClustIdx].add(argIdx)
223 | else:
224 | self._argClustIdx_argIdxs[argClustIdx] = set(argIdx)
225 |
226 | arg = self.getArgument(argIdx)
227 |
228 | if not clust_only:
229 | cl = Clust.getClust(self.getClustIdx())
230 |
231 | if oldArgClustIdx < 0:
232 | if not clust_only:
233 | cl.onPartSetArg(self, arg, argClustIdx)
234 | else:
235 | self._argClustIdx_argIdxs[oldArgClustIdx].remove(argIdx)
236 |
237 | if len(self._argClustIdx_argIdxs[oldArgClustIdx]) == 0:
238 | self._argClustIdx_argIdxs.remove(oldArgClustIdx)
239 |
240 | if not clust_only:
241 | cl.onPartSetArg(self, arg, argClustIdx, oldArgClustIdx)
242 |
243 | return None
244 |
245 | def setClust(self, clustIdx, clust_only=False):
246 | self._clustIdx = clustIdx
247 | rootID = self.getRelTreeRoot().getId()
248 |
249 | if clustIdx in Part.clustIdx_partRootNodeIds:
250 | Part.clustIdx_partRootNodeIds[clustIdx].add(rootID)
251 | else:
252 | Part.clustIdx_partRootNodeIds[clustIdx] = set(rootID)
253 |
254 | if not clust_only:
255 | cl = Clust.getClust(clustIdx)
256 | cl.onPartSetClust(self)
257 |
258 | return None
259 |
260 | def setParent(self, parPart, parArgIdx):
261 | '''
262 | Unset previous parent if it exists
263 | '''
264 | if self.getParPart() is not None:
265 | self.unsetParent()
266 |
267 | self._parPart = parPart
268 | self._parArgIdx = parArgIdx
269 | clustIdx = self.getClustIdx()
270 | parClustID = parPart.getClustIdx()
271 |
272 | assert (parClustID >= 0) & (clustIdx >= 0)
273 |
274 | pcci = (parClustID, clustIdx)
275 |
276 | if parClustID in Part.clustIdx_pairClustIdxs:
277 | Part.clustIdx_pairClustIdxs[parClustID].add(pcci)
278 | else:
279 | Part.clustIdx_pairClustIdxs[parClustID] = set(pcci)
280 |
281 | pids = (parPart.getRelTreeRoot().getId(), self.getRelTreeRoot().getId())
282 |
283 | if pcci in Part.pairClustIdxs_pairPartRootNodeIds:
284 | Part.pairClustIdxs_pairPartRootNodeIds[pcci].add(pids)
285 | else:
286 | Part.pairClustIdxs_pairPartRootNodeIds[pcci] = set(pids)
287 |
288 | if parPart is not None:
289 | arg = parPart.getArgument(parArgIdx)
290 | dep = arg._path.getDep()
291 |
292 | if (parClustID != clustIdx) & dep.startswith('conj_'):
293 | if parClustID < clustIdx:
294 | pci = pcci
295 | else:
296 | pci = (pcci[1], pcci[0])
297 |
298 | if pci not in Clust._pairClustIdxs_conjCnt:
299 | Clust.pairClustIdxs_conjCnt[pci] = 1
300 | else:
301 | Clust.pairClustIdxs_conjCnt[pci] += 1
302 |
303 | return None
304 |
305 | def setRelTypeIdx(self, newRelTypeIdx):
306 | self._relTypeIdx = newRelTypeIdx
307 | cl = Clust.getClust(self._clustIdx)
308 | cl.onPartSetRelTypeIdx(newRelTypeIdx)
309 |
310 | return None
311 |
312 | def unsetArgClust(self, argIdx, clust_only=False):
313 | oldArgClustIdx = self._argIdx_argClustIdx.pop(argIdx)
314 | arg = self.getArgument(argIdx)
315 | self._argClustIdx_argIdxs[oldArgClustIdx].remove(argIdx)
316 |
317 | if len(self._argClustIdx_argIdxs[oldArgClustIdx]) == 0:
318 | self._argClustIdx_argIdxs.remove(oldArgClustIdx)
319 |
320 | if not clust_only:
321 | cl = Clust.getClust(self.getClustIdx())
322 | cl.onPartUnsetArg(self, arg, oldArgClustIdx)
323 |
324 | return None
325 |
326 | def unsetParent(self):
327 | '''
328 | Remove parent-child cluster index information
329 | Remove parent-child relationship index information
330 | NEEDS ADDITIONAL FACTORING - where does Cluster come from?
331 | '''
332 | parent = self.getParPart()
333 | clustIdx = self.getClustIdx()
334 |
335 | if parent is not None:
336 | parClustID = parent.getClustIdx()
337 |
338 | pcci = (parClustID, clustIdx)
339 | Part.clustIdx_pairClustIdxs[parClustID].remove(pcci)
340 |
341 | pids = (parent.getRelTreeRoot().getId(),
342 | self.getRelTreeRoot().getId())
343 | Part.pairClustIdxs_pairPartRootNodeIds[pcci].remove(pids)
344 |
345 | arg = parent.getArgument(self.getParArgIdx())
346 | dep = arg._path.getDep()
347 |
348 | if (parClustID != clustIdx) & dep.startswith('conj_'):
349 | if parClustID < clustIdx:
350 | pci = pcci
351 | else:
352 | pci = (pcci[1], pcci[0])
353 |
354 | if pci in Clust._pairClustIdxs_conjCnt:
355 | Clust.pairClustIdxs_conjCnt[pci] -= 1
356 | if Clust.pairClustIdxs_conjCnt[pci] == 0:
357 | del Clust.pairClustIdxs_conjCnt[pci]
358 |
359 | return None
360 |
361 | def unsetRelTypeIdx(self):
362 | old_type = self._relTypeIdx
363 | cl = Clust.getClust(self._clustIdx)
364 | cl.onPartUnsetRelTypeIdx(old_type)
365 |
366 | return None
367 |
368 |
369 |
370 |
--------------------------------------------------------------------------------
/semantic/Parse.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from semantic.MLN import Argument, Clust, Part
4 | from semantic import Agenda, Executor, Scorer
5 | from syntax import StanfordParseReader
6 | from syntax.Nodes import TreeNode
7 | from syntax.Relations import Path
8 | from utils import Utils
9 |
10 | class Parse(object):
11 | def __init__(self, priorNumParam=None, priorNumConj=None):
12 | self.debug = False
13 | self.numSents = 0
14 | self.numTkns = 0
15 |
16 | self.id_article = {}
17 |
18 | self.rootTreeNodeIds = set()
19 | self.parseReader = StanfordParseReader()
20 | self.scorer = Scorer()
21 | self.agenda = Agenda()
22 | self.executor = Executor()
23 |
24 | def createArgs(self, ai, sj, sent, idx):
25 | nid = Utils.genTreeNodeId(ai, sj, idx)
26 | node = TreeNode.getTreeNode(nid)
27 | np = Part.getPartByRootNodeId(nid)
28 | ncl = Clust.getClust(np.getClustIdx())
29 | chds = sent.get_children(idx)
30 |
31 | if chds is not None:
32 | for dep, cidx in chds:
33 | cid = Utils.genTreeNodeId(ai, sj, cidx)
34 | p = Path(dep)
35 | argTypeIdx = p.getArgType()
36 | cp = Part.getPartByRootNodeId(cid)
37 |
38 | if cp.getParPart() is None:
39 | continue
40 |
41 | arg = Argument(node, p, cp)
42 | argIdx = np.addArgument(arg)
43 | cp.setParent(np, argIdx)
44 | argClustIdxs = ncl.getArgClustIdxs(argTypeIdx)
45 | argClustIdx = -1
46 |
47 | if argClustIdxs is None:
48 | argClustIdx = ncl.createArgClust(argTypeIdx)
49 | else:
50 | argClustIdx = next(iter(argClustIdxs))
51 |
52 | np.setArgClust(argIdx, argClustIdx)
53 | self.createArgs(ai, sj, sent, cidx)
54 |
55 | return None
56 |
57 | def chkArgs(self):
58 | '''
59 | To Do: for debugging purposes
60 | '''
61 |
62 | return None
63 |
64 | def initialize(self, arts):
65 | #
66 | # Look to vectorize this
67 | #
68 | for art in arts:
69 | self.id_article[art.uid] = art
70 | self.numSents += len(art.sentences)
71 |
72 | for j, sent in enumerate(art.sentences):
73 | self.initializeSent(i, j, sent)
74 |
75 | def initializeSent(self, ai, sj, sent):
76 | self.numTkns += len(sent.get_tokens())
77 |
78 | if len(sent.tkn_children) < 1:
79 | return None
80 |
81 | for k, tok in enumerate(sent.tokens):
82 | if self.isIgnore(sent, k):
83 | continue
84 |
85 | # from utils
86 | part, clustIdx = part_from_node(ai, sj, sent, k)
87 |
88 | part.setClust(clustIdx)
89 |
90 | roots = sent.get_children(0)
91 | assert len(roots) == 1
92 |
93 | for k, v in roots.items():
94 | dep_idx = (k, v)
95 | idx = v
96 | sub_node_id = Utils.genTreeNodeId(ai, sj, idx)
97 | rootTreeNodeIds.add(sub_node_id)
98 | node_part = Part.getPartByRootNodeId(sub_node_id)
99 | if node_part is None:
100 | continue
101 | ncl = Clust.getClust(node_part.getClustIdx())
102 | ncl.incRootCnt()
103 | self.createArgs(ai, sj, sent, idx)
104 |
105 | return None
106 |
107 | def part_from_node(ai, sj, sent, k):
108 | node_id = Utils.genTreeNodeId(ai,sj,k)
109 | tn = TreeNode(node_id, sent.get_tokens(k))
110 | part = Part(tn)
111 | relTypeIdx = part.getRelTypeIdx()
112 | clustIdx = -1
113 | clustIdxs = Clust.getClustsWithRelType(relTypeIdx)
114 |
115 | if clustIdxs is not None:
116 | clustIdx = next(iter(clustIdxs))
117 | else:
118 | clustIdx = Clust.createClust(relTypeIdx)
119 |
120 | return part, clustIdx
121 |
122 | def isIgnore(sent, k):
123 | while True:
124 | try:
125 | parent = sent.get_parent(k)
126 | except KeyError:
127 | break
128 | else:
129 | k = parent[1]
130 |
131 | return (k>0)
132 |
133 | def mergeArgs(self):
134 | for clustIdx in Clust.clusts:
135 | cl = Clust.getClust(clustIdx)
136 | newArgClusts = {}
137 | cnt_acis = []
138 |
139 | for argClustIdx in cl._argClusts:
140 | acl = cl._argClusts[argClustIdx]
141 | cnt = acl._ttlArgCnt
142 | cnt_acis.append((cnt,argClustIdx))
143 |
144 | cnt_acis.sort(reverse=True)
145 |
146 | for item in cnt_acis:
147 | aci = item[1]
148 | ac = cl._argClusts[aci]
149 |
150 | if len(newArgClusts) == 0:
151 | newArgClusts[aci] = ac
152 |
153 | maxScore = 0
154 | maxMap = -1
155 |
156 | for acix in newArgClusts:
157 | score = self.scorer.scoreMergeArgs(cl, acix, aci)
158 | acx = cl._argClusts[acix]
159 |
160 | if score > maxScore:
161 | maxScore = score
162 | maxMap = acix
163 |
164 | if maxMap >= 0:
165 | acx = cl._argClusts[maxMap]
166 | self.executor.mergeArg(cl, maxMap, aci)
167 | else:
168 | newArgClusts[aci] = ac
169 |
170 | cl._argClusts = newArgClusts
171 |
172 | def parse(self, files):
173 | articles = []
174 |
175 | for file in files:
176 | a = self.parseReader.readParse(file)
177 | articles.append(a)
178 |
179 | self.initialize(articles)
180 | self.mergeArgs()
181 | self.agenda.createAgenda()
182 | self.agenda.procAgenda()
183 |
184 | return None
185 |
186 | def reparse(self, aid, si):
187 | a = id_article[aid]
188 | sent = a.sentences[si]
189 |
190 | children = sent.get_children(0)
191 |
192 | if children is None:
193 | return None
194 | elif len(children) == 0:
195 | return None
196 | else:
197 | old_nid_part = {}
198 |
199 | for ni in range(len(sent.get_tokens())):
200 | if isIgnore(sent, ni):
201 | continue
202 | nid = Utils.genTreeNodeId(aid, si, ni)
203 | np = Part.getPartByRootNodeId(nid)
204 | del Part.rootTreeNodeId_part[nid]
205 | old_nid_part[nid] = np
206 |
207 | nid_part = {}
208 |
209 | for ni in range(len(sent.get_tokens())):
210 | if isIgnore(sent, ni):
211 | continue
212 | part, clustIdx = part_from_node(aid, si, sent, ni)
213 | nid_part[Utils.genTreeNodeId(aid, si, ni)] = part
214 | part.setClust(clustIdx, clust_only=True)
215 |
216 | roots = sent.get_children(0)
217 | assert len(roots) == 1
218 |
219 | dep_idx = next(iter(roots))
220 | idx = dep_idx[1]
221 | nid = Utils.genTreeNodeId(aid, si, idx)
222 | np = Part.getPartByRootNodeId(nid)
223 |
224 | if np is not None:
225 | setArgs(aid, si, sent, idx)
226 |
227 | maxImp = 1
228 |
229 | while maxImp > 0:
230 | rp, ap = None, None
231 | maxImp = 0
232 |
233 | for prt in nid_part.values():
234 | for arg in prt.getArguments().values():
235 | score = self.scorer.scoreOpComposePart(prt,arg)
236 |
237 | if score > maxImp:
238 | maxImp = score
239 | rp, ap = prt, arg
240 |
241 | if maxImp <= 0:
242 | break
243 |
244 | self.executor.execComposePart(rp, ap)
245 | del nid_part[ap.getRelTreeRoot().getId()]
246 |
247 | Clust.removePartAndUpdateStat(old_nid_part)
248 | Clust.updatePartStat(nid_part)
249 |
250 | return None
251 |
252 | def setArgs(self, aid, si, sent, idx):
253 | nid = Utils.genTreeNodeId(aid, si, idx)
254 | node = TreeNode.getTreeNode(nid)
255 | np = Part.getPartByRootNodeId(nid)
256 | ncl = Clust.getClust(np.getClustIdx())
257 | chds = sent.get_children(idx)
258 |
259 | if chds is None:
260 | return None
261 | else:
262 | for dep, cidx in chds:
263 | cid = Utils.genTreeNodeId(aid, si, cidx)
264 | p = Path(dep)
265 | argTypeIdx = p.getArgType()
266 | cp = Part.getPartByRootNodeId(cid)
267 |
268 | if cp.getParPart() is not None:
269 | continue
270 |
271 | arg = Argument(node, p, cp)
272 | argIdx = np.addArgument(arg)
273 | cp.setParent(np, argIdx)
274 | argClustIdxs = ncl.getArgClustIdxs(argTypeIdx)
275 | argClustIdx = -1
276 |
277 | if argClustIdxs is None:
278 | argClustIdx = ncl.createArgClust(argTypeIdx)
279 | else:
280 | argClustIdx = next(iter(argClustIdxs))
281 |
282 | np.setArgClust(argIdx, argClustIdx, clust_only=True)
283 |
284 | setArgs(aid, si, sent, cidx)
285 |
286 | return None
287 |
--------------------------------------------------------------------------------
/semantic/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | class Agenda(object):
3 | def __init__(self):
4 |
5 | return None
6 |
7 |
8 | class Executor(object):
9 | def __init__(self):
10 |
11 | return None
12 |
13 |
14 | class Scorer(object):
15 | def __init__(self):
16 |
17 | return None
18 |
19 |
20 | class SearchOp(object):
21 | def __init__(self):
22 |
23 | return None
24 |
25 |
--------------------------------------------------------------------------------
/semantic/__pycache__/Agenda.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/__pycache__/Agenda.cpython-36.pyc
--------------------------------------------------------------------------------
/semantic/__pycache__/Argument.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/__pycache__/Argument.cpython-36.pyc
--------------------------------------------------------------------------------
/semantic/__pycache__/Clust.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/__pycache__/Clust.cpython-36.pyc
--------------------------------------------------------------------------------
/semantic/__pycache__/Executor.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/__pycache__/Executor.cpython-36.pyc
--------------------------------------------------------------------------------
/semantic/__pycache__/MLN.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/__pycache__/MLN.cpython-36.pyc
--------------------------------------------------------------------------------
/semantic/__pycache__/Parse.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/__pycache__/Parse.cpython-36.pyc
--------------------------------------------------------------------------------
/semantic/__pycache__/Part.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/__pycache__/Part.cpython-36.pyc
--------------------------------------------------------------------------------
/semantic/__pycache__/Scorer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/__pycache__/Scorer.cpython-36.pyc
--------------------------------------------------------------------------------
/semantic/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/semantic/src/Agenda.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/src/Agenda.py
--------------------------------------------------------------------------------
/semantic/src/Executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/src/Executor.py
--------------------------------------------------------------------------------
/semantic/src/Scorer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/src/Scorer.py
--------------------------------------------------------------------------------
/semantic/src/SearchOp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/semantic/src/SearchOp.py
--------------------------------------------------------------------------------
/syntax/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/.DS_Store
--------------------------------------------------------------------------------
/syntax/Nodes/Article.py:
--------------------------------------------------------------------------------
1 |
2 | from . import Sentence
3 |
4 | class Article(object):
5 | '''
6 | An Article() is merely a collection of Sentences() (represented as a list)
7 | and an article id, which can be of any particular type but should be unique
8 | in a collection of Articles.
9 | '''
10 | def __init__(self, fn=None, sentences=[]):
11 | self.uid = fn
12 | self.sentences = sentences
13 |
14 | def __repr__(self):
15 | return str(self.__dict__)
16 |
17 |
--------------------------------------------------------------------------------
/syntax/Nodes/Sentence.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | #
4 | # Sentence class
5 | #
6 |
7 | from . import Token
8 |
9 | class Sentence(object):
10 | def __init__(self):
11 | '''
12 | Each sentence consists of:
13 | _tokens: A list of individual tokens in the sentence, containing POS,
14 | lemma, and actual form of the word/item.
15 | _tkn_children: A dictionary mapping parents (denoted by the integer
16 | keys) to children (sets of integer, string tuples).
17 | _tkn_par: A dictionary mapping children (denoted by integer keys) to
18 | parents (tuples of string, integer values)
19 | '''
20 | self._tokens = []
21 |
22 | # Dictionary mapping {int: set((int, str))}
23 | self._tkn_children = {0: set()}
24 | # Dictionary mapping {int: (str, int)}
25 | self._tkn_par = {}
26 |
27 | return None
28 |
29 |
30 | def __repr__(self):
31 | return ('Tokens: ' + str(self._tokens) + '\n' + 'Parents: ' +
32 | str(self._tkn_par) + '\n' + 'Children: ' + str(self._tkn_children))
33 |
34 | def get_tokens(self, idx=None):
35 | '''
36 | Return Tokens at the specified indices.
37 | '''
38 | if idx is None:
39 | return self._tokens
40 | elif isinstance(idx, list):
41 | return [self._tokens[i] for i in idx]
42 | else:
43 | raise ValueError
44 |
45 | def get_token(self, idx):
46 | '''
47 | Return the Token() at the specified index.
48 | '''
49 | return self._tokens[idx]
50 |
51 | def add_token(self, tok):
52 | '''
53 | Append the Token() to the list of _tokens.
54 | '''
55 | assert isinstance(tok, Token)
56 | self._tokens.append(tok)
57 |
58 | return None
59 |
60 | def get_children(self, parent=None):
61 | '''
62 | Return the child/children of the parent specified by the given key. If
63 | no key specified, return them all.
64 | '''
65 | if parent is not None:
66 | if parent in self._tkn_children:
67 | c = self._tkn_children[parent]
68 | else:
69 | c = None
70 | else:
71 | c = self._tkn_children
72 |
73 | return c
74 |
75 | def set_children(self, parent, kids):
76 | '''
77 | Add the child/children specified by the key/kids key/value pair.
78 | '''
79 | assert isinstance(kids, set)
80 | self._tkn_children[parent] = kids
81 |
82 | return None
83 |
84 | def add_child(self, parent, kid):
85 | '''
86 | Add/update the child/children specified by the key/kids key/value pair.
87 | '''
88 | assert parent in self._tkn_children
89 | self._tkn_children[parent].add(kid)
90 |
91 | return None
92 |
93 | def get_parent(self, kid):
94 | '''
95 | Return the parent of the child specified by the given key.
96 | '''
97 | return self._tkn_par[kid]
98 |
99 | def set_parent(self, kid, parent):
100 | '''
101 | Add/update the parent specified by the given key/parent value pair.
102 | '''
103 | assert isinstance(parent, tuple)
104 | self._tkn_par[kid] = parent
105 |
106 | return None
107 |
108 |
109 |
110 |
--------------------------------------------------------------------------------
/syntax/Nodes/Token.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class Token(object):
4 | def __init__(self, pos, lemma, form=None):
5 | self._pos = pos
6 | self._lemma = lemma
7 |
8 | if form is None:
9 | self._form = lemma
10 | else:
11 | self._form = form
12 |
13 | self._tkn_cnt = dict()
14 |
15 | def __repr__(self):
16 | return self.toString()
17 |
18 |
19 | def getForm(self):
20 | return self._form
21 |
22 |
23 | def getPOS(self):
24 | return self._pos
25 |
26 |
27 | def getLemma(self):
28 | return self._lemma
29 |
30 |
31 | def isContent(pos=None):
32 | if pos is None:
33 | pos = self._pos
34 |
35 | result = pos in ['J','R','V','N']
36 |
37 | return result
38 |
39 |
40 | def isVerb(self):
41 | return self._pos[0] == 'V'
42 |
43 |
44 | def isNoun(self):
45 | return (self._pos[0] == 'N') | (self._pos.startswith('PRP'))
46 |
47 |
48 | def compareTo(self, t):
49 | this = sum([ord(x) for x in self._lemma])
50 | that = sum([ord(x) for x in t.getLemma()])
51 | result = this - that
52 |
53 | if result == 0:
54 | this = sum([ord(x) for x in self.pos])
55 | that = sum([ord(x) for x in t.getPOS()])
56 | result = this - that
57 | return result
58 |
59 |
60 | def equals(self, t):
61 | return (self._pos == t.getPOS()) & (self._lemma == t.getLemma())
62 |
63 |
64 | def hashCode(self):
65 | return hash(self)
66 |
67 |
68 | def toString(self):
69 | return (self._pos + ":" + self._lemma)
70 |
--------------------------------------------------------------------------------
/syntax/Nodes/TreeNode.py:
--------------------------------------------------------------------------------
1 |
2 | from . import Token
3 |
4 | class TreeNode(object):
5 | # dictionary mapping {str: TreeNode}
6 | id_treeNodes = {}
7 |
8 | def getTreeNode(idx):
9 | return TreeNode.id_treeNodes[idx]
10 |
11 |
12 | def __init__(self, idx, tkn):
13 | self._id = idx
14 | self._tkn = tkn
15 | self._children = {}
16 | TreeNode.id_treeNodes[idx] = self
17 |
18 | def addChild(self, dep, child):
19 | try:
20 | tns = self._children[dep]
21 | except KeyError:
22 | tns = set(child)
23 | self._children[dep] = tns
24 | else:
25 | self._children[dep] = tns.add(child)
26 |
27 | return None
28 |
29 | def getId(self):
30 | return self._id
31 |
32 | def getToken(self):
33 | return self._tkn
34 |
35 | def getChildren(self):
36 | return self._children
37 |
38 | def compareTo(self, z):
39 | if not isinstance(z, TreeNode):
40 | raise ValueError
41 |
42 | return self._tkn.compareTo(z.tkn_)
43 |
44 | def equals(self, o):
45 | return self.compareTo(o) == 0
46 |
47 | def toString(self):
48 | return self._tkn.toString()
49 |
50 | def getTreeStr(self):
51 | id_str = {}
52 |
53 | if (len(self._children) > 0):
54 | for dep in self._children.keys():
55 | nodes = self._children[dep]
56 | s = ''
57 | for node in nodes:
58 | if dep.startswith('prep_') or dep.startswith('conj_'):
59 | s = dep[5:] + ' '
60 | s = s + node.getTreeStr()
61 | id_str[node.getId()] = s
62 |
63 | id_str[self._id] = self._tkn.getLemma()
64 | result = ' '.join([id_str[x] for x in id_str.keys()])
65 |
66 | return result
67 |
68 |
69 |
--------------------------------------------------------------------------------
/syntax/Nodes/__pycache__/Article.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/Nodes/__pycache__/Article.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/Nodes/__pycache__/Sentence.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/Nodes/__pycache__/Sentence.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/Nodes/__pycache__/Token.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/Nodes/__pycache__/Token.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/Nodes/__pycache__/TreeNode.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/Nodes/__pycache__/TreeNode.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/Relations/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/Relations/.DS_Store
--------------------------------------------------------------------------------
/syntax/Relations/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from ..Nodes import Token, TreeNode
3 |
4 | class RelType(object):
5 | _relTypes = []
6 | _relTypeStr_idx = {}
7 |
8 | def __init__(self):
9 | self._str = None
10 | self._type = ''
11 |
12 | def getType(self):
13 | return self._type
14 |
15 | def getRelType(target):
16 | if target is None:
17 | return None
18 | elif isinstance(target,int):
19 | return RelType.relTypes[idx]
20 | else:
21 | s = RelType.genTypeStr(target)
22 |
23 | try:
24 | _ = _relTypeStr_idx[s]
25 | except KeyError:
26 | t = RelType()
27 | t._str = s
28 |
29 | if target.getToken().isContent():
30 | t._type = 'C'
31 | else:
32 | t._type = 'N'
33 |
34 | RelType.relTypes.append(t)
35 | RelType.relTypeStr_idx[s] = len(RelType.relTypes) - 1
36 |
37 | return RelType.relTypeStr_idx[s]
38 |
39 | def genTypeStr(tn):
40 | type_str = '('
41 | type_str += tn.getToken().toString()
42 | children = tn.getChildren()
43 |
44 | if len(children) > 0:
45 | for child in children:
46 | type_str += ' (' + child
47 | tns = children[child]
48 |
49 | for node in tns:
50 | type_str += ' ' + genTypeStr(node)
51 |
52 | type_str += ')'
53 |
54 | type_str += ')'
55 |
56 | return type_str
57 |
58 | def compareTo(self, z):
59 | this = sum([ord(x) for x in self._str])
60 | that = sum([ord(x) for x in z.toString()])
61 | result = this - that
62 |
63 | return result
64 |
65 | def equals(self, o):
66 | return self.compareTo(o)==0
67 |
68 | def toString(self):
69 | return self._str
70 |
71 |
72 |
73 | class Path(object):
74 | def __init__(self, dep, treeRoot=None, argNode=None, dep2=None):
75 | self._dep = dep
76 | self._treeRoot = treeRoot
77 | self._argNode = argNode
78 | self._dep2 = dep2
79 | self._argTypeIdx = -1
80 | self._str = None
81 |
82 | def getDep(self):
83 | return self._dep
84 |
85 | def getTreeRoot(self):
86 | return self._treeRoot
87 |
88 | def getArgNode(self):
89 | return self._argNode
90 |
91 | def getDep2(self):
92 | return self._dep2
93 |
94 | def getArgType(self):
95 | return self._argTypeIdx
96 |
97 | def toString(self):
98 | if self._str is None:
99 | self._str = self.genTypeStr()
100 |
101 | return self._str
102 |
103 | def genTypeStr(self):
104 | typ_str = '<' + self._dep
105 |
106 | if self._treeRoot is not None:
107 | rel_str = RelType.genTypeStr(self._treeRoot)
108 | typ_str += ':' + rel_str + ':' + self._dep2
109 |
110 | typ_str += '>'
111 |
112 | return typ_str
113 |
114 |
115 |
116 | class ArgType(object):
117 | argTypes = []
118 | # Dictionary mapping {str: int}
119 | argTypeStr_idx = {}
120 |
121 | ARGTYPEIDX_SUBJ = -1
122 | ARGTYPEIDX_OBJ = -1
123 | ARGTYPEIDX_IN = -1
124 |
125 | def __init__(self):
126 | self._dep = None
127 | self._relTypeIdx = -1
128 | self._dep2 = None
129 | self._str = None
130 |
131 | def getArgType(target):
132 | if isinstance(target,int):
133 | result = ArgType.argTypes[idx]
134 | else:
135 | s = target.toString()
136 |
137 | if s not in ArgType.argTypeStr_idx:
138 | t = ArgType()
139 | t._dep = p.getDep()
140 | t._dep2 = p.getDep2()
141 | t._relTypeIdx = -1
142 |
143 | if p.getTreeRoot() is not None:
144 | t._relTypeIdx = RelType.getRelType(p.getTreeRoot())
145 |
146 | ArgType.argTypes.append(t)
147 | ati = len(ArgType.argTypes) - 1
148 | ArgType.argTypeStr_idx[s] = ati
149 |
150 | if p.getTreeRoot() is None:
151 | if p.getDep() == 'nsubj':
152 | ARGTYPEIDX_SUBJ = ati
153 | elif p.getDep() == 'dobj':
154 | ARGTYPEIDX_OBJ = ati
155 | elif p.getDep() == 'prep_in':
156 | ARGTYPEIDX_IN = ati
157 |
158 | result = ArgType.argTypeStr_idx[s]
159 |
160 | return result
161 |
162 | def compareTo(self, z):
163 | if self._dep is None or z.GetDep() is None:
164 | return None
165 |
166 | this = sum([ord(x) for x in self._dep])
167 | that = sum([ord(x) for x in z.getDep()])
168 | result = this - that
169 |
170 | if result == 0:
171 | result = self._relTypeIdx - z._relTypeIdx
172 |
173 | if result == 0:
174 | if self._dep2 is not None:
175 | this = sum([ord(x) for x in self._dep2])
176 |
177 | try:
178 | that = sum([ord(x) for x in z.getDep2()])
179 | except TypeError:
180 | result = -1
181 | else:
182 | result = this - that
183 |
184 | return result
185 |
186 | def equals(self, o):
187 | return self.compareTo(o) == 0
188 |
189 | def toString(self):
190 | if self._str is None:
191 | self._str = '<' + self._dep
192 |
193 | if self._relTypeIdx >= 0:
194 | self._str += ':{}:{}'.format(self._relTypeIdx,self._dep2)
195 |
196 | self._str += '>'
197 |
198 | return self._str
199 |
200 |
201 |
202 |
203 |
--------------------------------------------------------------------------------
/syntax/Relations/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/Relations/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/Relations/src/ArgType.py:
--------------------------------------------------------------------------------
1 |
2 | from . import RelType
3 |
4 | class ArgType(object):
5 | argTypes = []
6 | # Dictionary mapping {str: int}
7 | argTypeStr_idx = {}
8 |
9 | ARGTYPEIDX_SUBJ = -1
10 | ARGTYPEIDX_OBJ = -1
11 | ARGTYPEIDX_IN = -1
12 |
13 | def __init__(self):
14 | self._dep = None
15 | self._relTypeIdx = -1
16 | self._dep2 = None
17 | self._str = None
18 |
19 | def getArgType(target):
20 | if isinstance(target,int):
21 | result = ArgType.argTypes[idx]
22 | else:
23 | s = target.toString()
24 |
25 | if s not in ArgType.argTypeStr_idx:
26 | t = ArgType()
27 | t._dep = p.getDep()
28 | t._dep2 = p.getDep2()
29 | t._relTypeIdx = -1
30 |
31 | if p.getTreeRoot() is not None:
32 | t._relTypeIdx = RelType.getRelType(p.getTreeRoot())
33 |
34 | ArgType.argTypes.append(t)
35 | ati = len(ArgType.argTypes) - 1
36 | ArgType.argTypeStr_idx[s] = ati
37 |
38 | if p.getTreeRoot() is None:
39 | if p.getDep() == 'nsubj':
40 | ARGTYPEIDX_SUBJ = ati
41 | elif p.getDep() == 'dobj':
42 | ARGTYPEIDX_OBJ = ati
43 | elif p.getDep() == 'prep_in':
44 | ARGTYPEIDX_IN = ati
45 |
46 | result = ArgType.argTypeStr_idx[s]
47 |
48 | return result
49 |
50 | def compareTo(self, z):
51 | if self._dep is None or z.GetDep() is None:
52 | return None
53 |
54 | this = sum([ord(x) for x in self._dep])
55 | that = sum([ord(x) for x in z.getDep()])
56 | result = this - that
57 |
58 | if result == 0:
59 | result = self._relTypeIdx - z._relTypeIdx
60 |
61 | if result == 0:
62 | if self._dep2 is not None:
63 | this = sum([ord(x) for x in self._dep2])
64 |
65 | try:
66 | that = sum([ord(x) for x in z.getDep2()])
67 | except TypeError:
68 | result = -1
69 | else:
70 | result = this - that
71 |
72 | return result
73 |
74 | def equals(self, o):
75 | return self.compareTo(o) == 0
76 |
77 | def toString(self):
78 | if self._str is None:
79 | self._str = '<' + self._dep
80 |
81 | if self._relTypeIdx >= 0:
82 | self._str += ':{}:{}'.format(self._relTypeIdx,self._dep2)
83 |
84 | self._str += '>'
85 |
86 | return self._str
87 |
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/syntax/Relations/src/Path.py:
--------------------------------------------------------------------------------
1 |
2 | from . import RelType
3 |
4 | class Path(object):
5 | def __init__(self, dep, treeRoot=None, argNode=None, dep2=None):
6 | self._dep = dep
7 | self._treeRoot = treeRoot
8 | self._argNode = argNode
9 | self._dep2 = dep2
10 | self._argTypeIdx = -1
11 | self._str = None
12 |
13 | def getDep(self):
14 | return self._dep
15 |
16 | def getTreeRoot(self):
17 | return self._treeRoot
18 |
19 | def getArgNode(self):
20 | return self._argNode
21 |
22 | def getDep2(self):
23 | return self._dep2
24 |
25 | def getArgType(self):
26 | return self._argTypeIdx
27 |
28 | def toString(self):
29 | if self._str is None:
30 | self._str = self.genTypeStr()
31 |
32 | return self._str
33 |
34 | def genTypeStr(self):
35 | typ_str = '<' + self._dep
36 |
37 | if self._treeRoot is not None:
38 | rel_str = RelType.genTypeStr(self._treeRoot)
39 | typ_str += ':' + rel_str + ':' + self._dep2
40 |
41 | typ_str += '>'
42 |
43 | return typ_str
44 |
45 |
--------------------------------------------------------------------------------
/syntax/Relations/src/RelType.py:
--------------------------------------------------------------------------------
1 |
2 | from . import Token, TreeNode
3 |
4 | class RelType(object):
5 | _relTypes = []
6 | _relTypeStr_idx = {}
7 |
8 | def __init__(self):
9 | self._str = None
10 | self._type = ''
11 |
12 | def getType(self):
13 | return self._type
14 |
15 | def getRelType(target):
16 | if target is None:
17 | return None
18 | elif isinstance(target,int):
19 | return RelType.relTypes[idx]
20 | else:
21 | s = RelType.genTypeStr(target)
22 |
23 | try:
24 | _ = _relTypeStr_idx[s]
25 | except KeyError:
26 | t = RelType()
27 | t._str = s
28 |
29 | if target.getToken().isContent():
30 | t._type = 'C'
31 | else:
32 | t._type = 'N'
33 |
34 | RelType.relTypes.append(t)
35 | RelType.relTypeStr_idx[s] = len(RelType.relTypes) - 1
36 |
37 | return RelType.relTypeStr_idx[s]
38 |
39 | def genTypeStr(tn):
40 | type_str = '('
41 | type_str += tn.getToken().toString()
42 | children = tn.getChildren()
43 |
44 | if len(children) > 0:
45 | for child in children:
46 | type_str += ' (' + child
47 | tns = children[child]
48 |
49 | for node in tns:
50 | type_str += ' ' + genTypeStr(node)
51 |
52 | type_str += ')'
53 |
54 | type_str += ')'
55 |
56 | return type_str
57 |
58 | def compareTo(self, z):
59 | this = sum([ord(x) for x in self._str])
60 | that = sum([ord(x) for x in z.toString()])
61 | result = this - that
62 |
63 | return result
64 |
65 | def equals(self, o):
66 | return self.compareTo(o)==0
67 |
68 | def toString(self):
69 | return self._str
70 |
71 |
--------------------------------------------------------------------------------
/syntax/StanfordParseReader.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 |
4 | from .Nodes import Article, Sentence, Token
5 |
6 |
7 | class StanfordParseReader(object):
8 | '''
9 | Replicates StanfordParseReader.java from USP implementation found at
10 | http://alchemy.cs.washington.edu/usp/
11 |
12 | Given a set of dependency, POS, and morphology files parsed from source
13 | documents, this compiles lists of tokens and dictionary mappings defining
14 | the dependency relationships in the sentences in a given document.
15 | '''
16 | def __init__(self):
17 | self._isDebug=False
18 | self._ignored_deps = set()
19 | self._ignored_deps.add("aux")
20 | self._ignored_deps.add("auxpass")
21 | self._ignored_deps.add("det")
22 | self._ignored_deps.add("cop")
23 | self._ignored_deps.add("complm")
24 | # self._ignored_deps.add("num")
25 | # self._ignored_deps.add("number")
26 | self._ignored_deps.add("preconj")
27 | self._ignored_deps.add("predet")
28 | self._ignored_deps.add("punct")
29 | # self._ignored_deps.add("quantmod")
30 |
31 | self._ignored_deps.add("expl")
32 | self._ignored_deps.add("mark")
33 | # self._ignored_deps.add("parataxis")
34 |
35 |
36 | def readParse(self, fileName, data_dir, ignoreDep=True):
37 | '''
38 | Given a filename of the type "$FILENAME.dep" gets the file and
39 | corresponding *.morph and *.input files and reads the Tokens and
40 | Dependency relationships by sentence in those files. Each file in such
41 | a trio should contain the same number of sentences represented as blocks
42 | of text with each dependency/token on its own line, separated by blank
43 | lines.
44 | '''
45 | file = os.path.splitext(fileName)[0]
46 | morph_file = os.path.join(data_dir, file + '.morph')
47 | input_file = os.path.join(data_dir, file + '.input')
48 | dep_file = os.path.join(data_dir, fileName)
49 |
50 | doc = Article(file)
51 | doc = self.readTokens(doc, morph_file, input_file)
52 | doc = self.readDeps(doc, dep_file, ignoreDep)
53 |
54 | return doc
55 |
56 |
57 | def readTokens(self, doc, morph_file, input_file):
58 | '''
59 | Reads a morphology and input (POS tagged lemmas) file simultaneously,
60 | parsing single tokens from each line into a Token() object and
61 | appending each Token to its respective Sentence() object, which
62 | are collected in an Article() object "doc" and returned.
63 | '''
64 | isNew=True
65 |
66 | with open(morph_file, 'r') as mor, open(input_file, 'r') as inp:
67 | for mline in mor.readlines():
68 | mline = mline.strip()
69 | iline = inp.readline().strip()
70 |
71 | if iline == '':
72 | isNew = True
73 | continue
74 |
75 | ts = iline.split('_')
76 |
77 | if isNew:
78 | sent = Sentence()
79 | sent.add_token(Token('ROOT','ROOT'))
80 | doc.sentences.append(sent)
81 | isNew = False
82 |
83 | pos = ts[1]
84 | lemma = mline.replace(':','.').lower()
85 | form = iline[0]
86 |
87 | doc.sentences[-1].add_token(Token(pos,lemma,form))
88 |
89 | return doc
90 |
91 |
92 | def readDeps(self, doc, deps_file, ignoreDep):
93 | '''
94 | Reads a dependency relationships file and adds these relationships to
95 | their respective Sentence() objects in an Article() in the form of
96 | reciprocal python dictionaries. The updated Article() "doc" is then
97 | returned.
98 | '''
99 | blank = False
100 | senId = 0
101 |
102 | currSent = doc.sentences[senId]
103 | currNonRoots = set()
104 | currRoots = set()
105 |
106 | with open(deps_file, 'r') as d:
107 | for line in d.readlines():
108 | line = line.strip()
109 |
110 | if len(line) == 0:
111 | if not blank:
112 | senId += 1
113 |
114 | blank = True
115 |
116 | if currRoots is not None:
117 | dep_chds = currSent.get_children(0)
118 | for i in currRoots:
119 | dep_chds.add((i, 'ROOT'))
120 | currSent.set_parent(i, ('ROOT', 0))
121 | currSent.set_children(0, dep_chds)
122 | doc.sentences[senId] = currSent
123 |
124 | currSent = None
125 | currNonRoots = None
126 | currRoots = None
127 |
128 | continue
129 | else:
130 | if blank:
131 | blank = False
132 | currSent = doc.sentences[senId]
133 | currNonRoots = set()
134 | currRoots = set()
135 |
136 | rel = line[:line.index("(")]
137 | items = line[line.index('('):].replace('(','').replace(')','')
138 | items = items.split(', ')
139 | gov, dep = items[0], items[1]
140 | gov = (int(gov[gov.rfind('-')+1:]), gov[:gov.rfind('-')])
141 | dep = (int(dep[dep.rfind('-')+1:]), dep[:dep.rfind('-')])
142 |
143 | if ('conj' not in rel) & (gov[0] == dep[0]):
144 | continue
145 |
146 | currNonRoots.add(dep[0])
147 | if dep[0] in currRoots:
148 | currRoots.remove(dep[0])
149 | if gov[0] not in currNonRoots:
150 | currRoots.add(gov[0])
151 |
152 | if ignoreDep & (rel in self._ignored_deps):
153 | continue
154 |
155 | currSent.set_parent(dep[0], (rel, gov[0]))
156 |
157 | if gov[0] in currSent.get_children():
158 | currSent.add_child(gov[0], (dep[0], rel))
159 | else:
160 | currSent.set_children(gov[0], set())
161 | currSent.add_child(gov[0], (dep[0], rel))
162 |
163 | if currRoots is not None:
164 | dep_chds = currSent.get_children(0)
165 | for i in currRoots:
166 | dep_chds.add((i, 'ROOT'))
167 | currSent.set_parent(i, ('ROOT', 0))
168 | currSent.set_children(0, dep_chds)
169 | doc.sentences[senId] = currSent
170 |
171 | currSent = None
172 | currNonRoots = None
173 | currRoots = None
174 |
175 | return doc
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
--------------------------------------------------------------------------------
/syntax/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 |
4 | from .Nodes import Article, Sentence, Token
5 |
6 |
7 | class StanfordParseReader(object):
8 | '''
9 | Replicates StanfordParseReader.java from USP implementation found at
10 | http://alchemy.cs.washington.edu/usp/
11 |
12 | Given a set of dependency, POS, and morphology files parsed from source
13 | documents, this compiles lists of tokens and dictionary mappings defining
14 | the dependency relationships in the sentences in a given document.
15 | '''
16 | def __init__(self):
17 | self._isDebug=False
18 | self._ignored_deps = set()
19 | self._ignored_deps.add("aux")
20 | self._ignored_deps.add("auxpass")
21 | self._ignored_deps.add("det")
22 | self._ignored_deps.add("cop")
23 | self._ignored_deps.add("complm")
24 | # self._ignored_deps.add("num")
25 | # self._ignored_deps.add("number")
26 | self._ignored_deps.add("preconj")
27 | self._ignored_deps.add("predet")
28 | self._ignored_deps.add("punct")
29 | # self._ignored_deps.add("quantmod")
30 |
31 | self._ignored_deps.add("expl")
32 | self._ignored_deps.add("mark")
33 | # self._ignored_deps.add("parataxis")
34 |
35 |
36 | def readParse(self, fileName, data_dir, ignoreDep=True):
37 | '''
38 | Given a filename of the type "$FILENAME.dep" gets the file and
39 | corresponding *.morph and *.input files and reads the Tokens and
40 | Dependency relationships by sentence in those files. Each file in such
41 | a trio should contain the same number of sentences represented as blocks
42 | of text with each dependency/token on its own line, separated by blank
43 | lines.
44 | '''
45 | file = os.path.splitext(fileName)[0]
46 | morph_file = os.path.join(data_dir, file + '.morph')
47 | input_file = os.path.join(data_dir, file + '.input')
48 | dep_file = os.path.join(data_dir, fileName)
49 |
50 | doc = Article(file)
51 | doc = self.readTokens(doc, morph_file, input_file)
52 | doc = self.readDeps(doc, dep_file, ignoreDep)
53 |
54 | return doc
55 |
56 |
57 | def readTokens(self, doc, morph_file, input_file):
58 | '''
59 | Reads a morphology and input (POS tagged lemmas) file simultaneously,
60 | parsing single tokens from each line into a Token() object and
61 | appending each Token to its respective Sentence() object, which
62 | are collected in an Article() object "doc" and returned.
63 | '''
64 | isNew=True
65 |
66 | with open(morph_file, 'r') as mor, open(input_file, 'r') as inp:
67 | for mline in mor.readlines():
68 | mline = mline.strip()
69 | iline = inp.readline().strip()
70 |
71 | if iline == '':
72 | isNew = True
73 | continue
74 |
75 | ts = iline.split('_')
76 |
77 | if isNew:
78 | sent = Sentence()
79 | sent.add_token(Token('ROOT','ROOT'))
80 | doc.sentences.append(sent)
81 | isNew = False
82 |
83 | pos = ts[1]
84 | lemma = mline.replace(':','.').lower()
85 | form = iline[0]
86 |
87 | doc.sentences[-1].add_token(Token(pos,lemma,form))
88 |
89 | return doc
90 |
91 |
92 | def readDeps(self, doc, deps_file, ignoreDep):
93 | '''
94 | Reads a dependency relationships file and adds these relationships to
95 | their respective Sentence() objects in an Article() in the form of
96 | reciprocal python dictionaries. The updated Article() "doc" is then
97 | returned.
98 | '''
99 | blank = False
100 | senId = 0
101 |
102 | currSent = doc.sentences[senId]
103 | currNonRoots = set()
104 | currRoots = set()
105 |
106 | with open(deps_file, 'r') as d:
107 | for line in d.readlines():
108 | line = line.strip()
109 |
110 | if len(line) == 0:
111 | if not blank:
112 | senId += 1
113 |
114 | blank = True
115 |
116 | if currRoots is not None:
117 | dep_chds = currSent.get_children(0)
118 | for i in currRoots:
119 | dep_chds.add((i, 'ROOT'))
120 | currSent.set_parent(i, ('ROOT', 0))
121 | currSent.set_children(0, dep_chds)
122 | doc.sentences[senId] = currSent
123 |
124 | currSent = None
125 | currNonRoots = None
126 | currRoots = None
127 |
128 | continue
129 | else:
130 | if blank:
131 | blank = False
132 | currSent = doc.sentences[senId]
133 | currNonRoots = set()
134 | currRoots = set()
135 |
136 | rel = line[:line.index("(")]
137 | items = line[line.index('('):].replace('(','').replace(')','')
138 | items = items.split(', ')
139 | gov, dep = items[0], items[1]
140 | gov = (int(gov[gov.rfind('-')+1:]), gov[:gov.rfind('-')])
141 | dep = (int(dep[dep.rfind('-')+1:]), dep[:dep.rfind('-')])
142 |
143 | if ('conj' not in rel) & (gov[0] == dep[0]):
144 | continue
145 |
146 | currNonRoots.add(dep[0])
147 | if dep[0] in currRoots:
148 | currRoots.remove(dep[0])
149 | if gov[0] not in currNonRoots:
150 | currRoots.add(gov[0])
151 |
152 | if ignoreDep & (rel in self._ignored_deps):
153 | continue
154 |
155 | currSent.set_parent(dep[0], (rel, gov[0]))
156 |
157 | if gov[0] in currSent.get_children():
158 | currSent.add_child(gov[0], (dep[0], rel))
159 | else:
160 | currSent.set_children(gov[0], set())
161 | currSent.add_child(gov[0], (dep[0], rel))
162 |
163 | if currRoots is not None:
164 | dep_chds = currSent.get_children(0)
165 | for i in currRoots:
166 | dep_chds.add((i, 'ROOT'))
167 | currSent.set_parent(i, ('ROOT', 0))
168 | currSent.set_children(0, dep_chds)
169 | doc.sentences[senId] = currSent
170 |
171 | currSent = None
172 | currNonRoots = None
173 | currRoots = None
174 |
175 | return doc
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
--------------------------------------------------------------------------------
/syntax/__pycache__/Article.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/__pycache__/Article.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/__pycache__/Path.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/__pycache__/Path.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/__pycache__/RelType.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/__pycache__/RelType.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/__pycache__/Sentence.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/__pycache__/Sentence.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/__pycache__/StanfordParseReader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/__pycache__/StanfordParseReader.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/__pycache__/Token.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/__pycache__/Token.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/__pycache__/TreeNode.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/__pycache__/TreeNode.cpython-36.pyc
--------------------------------------------------------------------------------
/syntax/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/syntax/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/utils/Utils.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | #
4 | # Utility functions for pymln parsing
5 | #
6 |
7 |
8 | def genTreeNodeID(aid, sid, wid):
9 | node_id = ':'.join([str(x) for x in [aid, sid, wid]])
10 |
11 | return node_id
12 |
13 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/utils/__init__.py
--------------------------------------------------------------------------------
/utils/__pycache__/Utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/utils/__pycache__/Utils.cpython-36.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GallupGovt/pymln/d904fcd4f36c8c2409924e1bd4cc2b07242ae8cb/utils/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------