├── README.md
├── epimitheus_v1.py
├── epimitheus_v2.py
├── images
    ├── EpimitheusNeo4j.png
    ├── addEventIDs.png
    ├── blackListedObjects.png
    ├── filename.md
    └── windowsDefender.png
└── minidom
    ├── README.md
    ├── expatbuilder.py
    ├── expatbuilderFixed.png
    └── expatbuilderUnFixed.png


/README.md:
--------------------------------------------------------------------------------
 1 | # Epimitheus
 2 | Epimitheus is a python tool that uses graphical database Neo4j for Windows Events visualization. The job of "epimitheus" is to read the exported Windows Events (including Sysmon) in XML form, create a new XML with the correct Event properties and import it to neo4j. 
 3 | 
 4 | 
 5 | #### Import Windows Events to Neo4j
 6 | python3 epimitheus.py -i "bolt://localhost" -u "neo4j" -p "<password>" -x "Windows_Events.xml" -o "output.xml"
 7 | 
 8 | #### Import Windows Events/Sysmon to Neo4j
 9 | python3 epimitheus.py -i "bolt://localhost" -u "neo4j" -p "<password>" -x "Windows_Evenst.xml" -o "output.xml" -s
10 |   
11 | #### Delete data from Neo4j
12 | python3 epimitheus.py -i "bolt://localhost" -u "neo4j" -p "<password>" -D
13 | 
14 | #### Adding Events missing EventIDs
15 | 
16 | ![alt text](https://github.com/tasox/Epimitheus/blob/master/images/addEventIDs.png)
17 | 
18 | 
19 | ### Neo4j Queries - Examples
20 | More Neo4j queries are coming ...
21 | 
22 | #### RDP Connections (Sysmon and Windows Events)
23 | 
24 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.LogonType = '10' AND c.EventID='4624' RETURN p
25 | 
26 | #### Pass-The-Hash
27 | 
28 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.LogonProcessName = 'NtLmSsp ' AND NOT c.TargetUserName IN ['ANONYMOUS LOGON'] RETURN p
29 | 
30 | #### Runas (Potential)
31 | 
32 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.LogonType = '2' ANd c.LogonProcessName = "seclogo" RETURN p
33 | 
34 | #### Lateral Movement - Pass-The-Hash /w Mimikatz
35 | 
36 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID IN ["4624","4672"] AND c.LogonType = "9" AND c.LogonProcessName = "seclogo" RETURN p
37 | 
38 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID IN ["4624","4672"] AND c.LogonType = "9" AND c.LogonProcessName = "seclogo" AND c.TargetLogonId=c.SubjectLogonId RETURN c.EventID,c.remoteHost,c.targetUser,c.TargetLogonId,c.targetServer,c.PrivilegeList,c.SystemTime
39 | 
40 | MATCH (c:Event),(d:Event) WHERE c.EventID = "4672" AND d.EventID="4688" AND c.SystemTime=d.SystemTime RETURN c.targetUser,d.SubjectUserName,d.targetServer,d.NewProcessName,d.TokenElevationType
41 | 
42 | MATCH (c:Event),(d:Event) WHERE c.EventID="4672" AND d.EventID="4688" AND c.SystemTime=d.SystemTime WITH [(c.EventID),(c.targetUser),(c.remoteHost),(c.SystemTime)] as Event4672,[(d.EventID),(d.targetUser),(d.remoteHost),(d.SystemTime)] as Event4688 RETURN Event4672,Event4688
43 | 
44 | 
45 | #### Memory dump (procdump)
46 | 
47 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID="10" AND c.TargetImage =~ ".*lsass.*" RETURN p - Sysmon 
48 | 
49 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) RETURN collect(c.TargetFilename)  - Sysmon
50 | 
51 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID="10" AND c.TargetImage="C:\\Windows\\system32\\lsass.exe" RETURN p - Sysmon
52 | 
53 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID="10" AND c.TargetImage="C:\\Windows\\system32\\lsass.exe" RETURN c.EventRecordID,c.targetUser, c.SourceImage,c.TargetImage,c.TargetFilename
54 | 
55 | #### Windows Defender
56 | 
57 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID = '1116' RETURN c.Path
58 | 
59 | #### PowerShell
60 | 
61 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.HostApplication =~ ".*Power.*" RETURN p LIMIT 10
62 | 
63 | #### Defense Evasion - PS Script blogging 
64 | 
65 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.TargetObject="HKLM\\SOFTWARE\\Wow6432Node\\Policies\\Microsoft\\Windows\\PowerShell\\ScriptBlockLogging\\EnableScriptBlockLogging" RETURN p
66 | 
67 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.TargetObject="HKLM\\SOFTWARE\\Wow6432Node\\Policies\\Microsoft\\Windows\\PowerShell\\ScriptBlockLogging\\EnableScriptBlockLogging" RETURN c.EventID,c.targetUser,c.EventType,c.Details,c.targetServer,c.TargetObject
68 | 
69 | #### Defense Evasion - PPID Spoofing
70 | 
71 | MATCH (c:Event),(d:Event) WHERE c.EventID = "10" AND d.EventID ="1" AND c.TargetProcessId = d.ProcessId RETURN c.EventRecordID,c.targetUser, c.SourceImage,c.SourceProcessId,c.TargetProcessId,d.Image,d.targetUser
72 | 
73 | #### References
74 | https://medium.com/@pentesttas/windows-events-sysmon-visualization-using-neo4j-part-1-529ca5ab4593
75 | 
76 | https://medium.com/@pentesttas/windows-events-sysmon-visualization-using-neo4j-part-2-d4c2fd3c9413
77 | 


--------------------------------------------------------------------------------
/epimitheus_v1.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | from xml.dom import minidom
  4 | from xml.dom.minidom import Document
  5 | from neo4j import GraphDatabase, basic_auth
  6 | import os,sys,datetime,time,re, subprocess
  7 | import multiprocessing
  8 | from multiprocessing import Process,Lock
  9 | import argparse
 10 | 
 11 | 
 12 | 
 13 | def isDomain():
 14 | 
 15 |     #Url: https://github.com/zakird/pyad/blob/master/pyad/adbase.py
 16 |     #Lines: 11-42 - adbase.py
 17 |     #Fix the code: @taso_x
 18 |     if sys.platform != 'win32':
 19 |         raise Exception("Must be running Windows.")
 20 | 
 21 |     else:
 22 |         try:
 23 |             import win32api
 24 |             import pywintypes
 25 |             import win32com.client
 26 |             import win32security
 27 |         except ImportError:
 28 |             raise Exception("pywin32 library required. Download from http://sourceforge.net/projects/pywin32/")
 29 | 
 30 | 
 31 |         _adsi_provider = win32com.client.Dispatch('ADsNameSpaces')
 32 | 
 33 |         try:
 34 |             # Discover default domain and forest information
 35 |             __default_domain_obj = _adsi_provider.GetObject('', "LDAP://rootDSE")
 36 |             # connecting to rootDSE will connect to the domain that the
 37 |             # current logged-in user belongs to.. which is generally the
 38 |             # domain under question and therefore becomes the default domain.
 39 |             _default_detected_forest = __default_domain_obj.Get("rootDomainNamingContext")
 40 |             _default_detected_domain = __default_domain_obj.Get("defaultNamingContext")
 41 |             if(_default_detected_domain):
 42 |                 print("[+] Domain Found: "+_default_detected_domain)
 43 |             if(_default_detected_forest):
 44 |                 print("[+] Forest Found: "+_default_detected_forest)
 45 |             return True
 46 | 
 47 |         except:
 48 |             # If there was an error, this this computer might not be on a domain.
 49 |             __default_domain_obj = "None"
 50 |             _default_detected_forest = "None"
 51 |             _default_detected_domain = "None"
 52 |             print("[-] Couldn't connect with LDAP Server!")
 53 |             print("\r\n")
 54 |             return False
 55 | 
 56 | 
 57 | def sid2name(sid):
 58 |     dom = win32com.client.GetObject("LDAP://rootDSE").Get("defaultNamingContext")
 59 |     conn = win32com.client.Dispatch('ADODB.Connection')
 60 |     conn.Open("Provider=ADSDSOObject")
 61 |     query = "<LDAP://"+dom+">;(&(objectClass=*)(objectSid="+sid+"));sAMAccountName"
 62 |     record_set = conn.Execute(query)[0]
 63 |     targetUser=record_set.Fields("sAMAccountName").value
 64 |     return(targetUser)
 65 | 
 66 | 
 67 | 
 68 | def regEx(string):
 69 | 
 70 |     dotCounter=0
 71 |     if("@" in str(string)):
 72 |         s = re.findall("^\w+[^@]",str(string))[0]
 73 |     elif("\\" in str(string)):
 74 |         s = re.findall("[^\\\]*$",str(string))[0]
 75 |     elif("." in str(string)):
 76 |         for num,chr in enumerate(string):
 77 |             if chr == ".":
 78 |                 dotCounter=dotCounter+1
 79 |         if dotCounter == 1: #Example username.lastname or desktop111-maria.domain.com
 80 |             s = re.findall("^.\w+[.|-]\w+",string)[0]  #Result: username.lastname or dektop111-maria
 81 |         elif dotCounter == 2: #Example username.domain.com
 82 |             s = re.findall("^\w+",string)[0] #Result: username
 83 |         elif dotCounter >= 3: #Example username.lastname.domain.com
 84 |             s = re.findall("^\w+[.-]\w+",string)[0] #Result: username.lastname
 85 |         else:
 86 |             s = str(string)
 87 | 
 88 |     else:
 89 |         s = str(string)
 90 |     return(s.upper())
 91 | 
 92 | def regExIP(ip):
 93 |     ipAddress=str(ip)
 94 |     if(ipAddress.startswith(':')): #Example: ::ffff:192.168.100.50
 95 |         s = re.findall("\w+[.].*",ipAddress)[0] #Result: 192.168.100.50
 96 |     else:
 97 |         s = ipAddress
 98 | 
 99 |     return(s)
100 | 
101 | def neo4jConn(neo4jUri,neo4jUser,neo4jPass):
102 | 
103 |     try:
104 |         driver = GraphDatabase.driver(neo4jUri, auth=basic_auth(user=neo4jUser, password=neo4jPass))
105 |         #print("[+] Successful connection with database")
106 |         return(driver)
107 |     except Exception as e:
108 |         print("[-] %s" % e)
109 |         sys.exit(1)
110 | 
111 | 
112 | def eventParser(eventIDs):
113 | 
114 | 
115 |     dict={}
116 |     dict2={}
117 |     dict3={}
118 |     counter=0
119 |     t=[]
120 | 
121 |     try:
122 |         for p in rootDoc.childNodes:
123 |             counter=counter+1
124 | 
125 |             for x in p.childNodes:
126 |                 for y in x.childNodes:
127 |                     try:
128 |                         if not y.firstChild:
129 |                             tag=y.nodeName
130 |                             attrs=y.attributes.items()
131 |                             value=y.firstChild
132 | 
133 |                         else:
134 |                             tag=y.nodeName
135 |                             attrs=y.attributes.items()
136 |                             value=y.firstChild.nodeValue
137 |                         dict={'Tags':tag,'Attrs':attrs,'Value':value}
138 |                         if not dict['Attrs']:
139 |                             #print ("[+]%s:%s" %(dict['Tags'],dict['Value'])) [OK]
140 |                             key = dict['Tags']
141 |                             value = dict['Value']
142 |                         for key,value in dict['Attrs']:
143 |                             if dict['Tags'] != 'Data':
144 |                                 #print("[+]%s:%s" % (key,value))
145 |                                 key = key
146 |                                 value = value
147 |                             else:
148 |                                 #print("[+]%s:%s" % (value, dict['Value']))
149 |                                 key = value
150 |                                 value = dict['Value']
151 |                         #print ("[+]%s:%s" % (key,value)) [OK]
152 |                         #if key not in ['Message']:
153 |                         dict2={key:value}
154 |                         dict3={counter:dict2}
155 |                         t.append(dict3)
156 |                     except:
157 |                         pass
158 |     except Exception as e:
159 |         print(e)
160 | 
161 |     #print(t) [OK]
162 |     input_list = {}
163 | 
164 |     #Group events
165 |     for x in range(len(t)):
166 |         for k,v in t[x].items():
167 |             if k not in input_list:
168 |                 input_list[k]=[v]
169 |             else:
170 |                 input_list[k].append(v)
171 | 
172 |     #print(input_list)
173 | 
174 |     filterEvents = eventIDs
175 |     localhostIPs=["","-","::1","127.0.0.1","localhost"]
176 |     blacklistedUsers=["DWM-3","UMFD-3","UMFD-2","DWM-2","UMFD-0","UMFD-1","DWM-1"]
177 |     blacklistedShareFolders=["\\\\*\\SYSVOL","\\\\*\\IPC$"]
178 | 
179 |     #How many data will process
180 |     dataProcess = str(len(input_list.keys()))
181 |     return (filterEvents, localhostIPs, blacklistedUsers, blacklistedShareFolders, input_list)
182 | 
183 | def createXML(evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList,sysmonFile,outXMLFile):
184 | 
185 | 
186 |     targetUserList=[]
187 |     remoteHostsList=[]
188 |     uniqueIPs=[]
189 |     file_handle = open(outXMLFile,"w")
190 | 
191 |     doc = Document()
192 |     root = doc.createElement('Events')
193 |     doc.appendChild(root)
194 | 
195 |     #print(eventList.items()) [OK]
196 |     print("[+] Searching for TargetUsers, RemoteHosts, TargetHosts ...")
197 |     if len(eventList.items()) > 0:
198 |         t={}
199 |         counter=0
200 |         for key, value in eventList.items():
201 |             for eventValues in value:
202 |                 t.update(eventValues)
203 |             if t.get("EventID") in evIDs:
204 | 
205 |                 if sysmonFile: #User provided Sysmon xml file.
206 |                     if t.get("User"):
207 |                         targetUser = t.get("User")
208 |                     elif t.get("UserID"):
209 |                         targetUser = t.get("UserID")
210 |                     elif t.get("SubjectUserName"):
211 |                         targetUser = t.get("SubjectUserName")
212 |                     else:
213 |                         targetUser = "None"
214 |                         print("[-] Event ID %s with Record ID %s does not have a targetUser." % (t.get("EventID"),t.get("EventRecordID")))
215 | 
216 | 
217 | 
218 |                     if t.get("SourceIp"):
219 |                         remoteHost = t.get("SourceIp")
220 |                     elif t.get("SourceHostname"):
221 |                         remoteHost = t.get("SourceHostname")
222 |                     else:
223 |                         remoteHost = t.get("Computer")
224 | 
225 | 
226 |                     if t.get("DestinationIp"):
227 |                         targetServer = t.get("DestinationIp")
228 |                     else:
229 |                         targetServer = t.get("Computer")
230 | 
231 | 
232 | 
233 |                     if targetUser in bListedUsers:
234 |                         print("[-] Event ID %s with Record ID %s discarded because the TargetUser %s is into the bListedUsers list." % (t.get("EventID"),t.get("EventRecordID"),targetUser))
235 |                         t.clear()
236 |                     else: #targetUser is not in bListedUsers list then update the values in Neo4j.
237 |                         t.update({'targetUser':targetUser})
238 |                         t.update({'remoteHost':remoteHost})
239 |                         t.update({'targetServer':targetServer})
240 | 
241 |                         #Push name for every Event node because Neo4j needs it for naming the node else would be null. In addition, i use "name" in relationships.
242 |                         t.update({'name':t.get("EventID")})
243 | 
244 |                 else:
245 |                     if t.get("TargetUserName"):
246 |                         targetUser = t.get("TargetUserName")
247 |                     if t.get("SubjectUserName"):
248 |                         targetUser = t.get("SubjectUserName")
249 |                     elif t.get("Detection User"):
250 |                         targetUser = t.get("Detection User")
251 |                     elif t.get("Computer"):
252 |                         targetUser = t.get("Computer")
253 |                     elif (t.get("EventID") not in ["4103","4104"]) and t.get("UserID"):
254 |                         sid = t.get("UserID")
255 |                         try:
256 |                             if (checkdom):
257 |                                 #After converting sid->username check if user is blacklisted.
258 |                                 if sid2name(sid) not in bListedUsers:
259 |                                     targetUser=sid2name(sid)
260 |                                 else:
261 |                                     targetUser = sid
262 |                         except Exception as e:
263 |                             print(e)
264 | 
265 |                     elif t.get("EventID") in ["4103"]: #Powershell Events don't have target user. ["4103","4104","4105","4106"]
266 |                         f = t.get("ContextInfo")
267 |                         if (re.findall('User = \w+.*',f)[0].split("= ")[1]):
268 |                             regX = re.findall('User = \w+.*',f)[0].split("= ")[1]
269 |                             targetUser = regEx(regX)
270 |                         try:
271 |                             HostApplication = re.findall('Host Application = \w+.*',f)[0].split("= ")[1] #Get Host Application from ContextInfo tag.
272 |                         except:
273 |                             HostApplication = "-"
274 |                         try:
275 |                             ScriptName = re.findall('Script Name = \w+.*',f)[0].split("= ")[1] #Get Script Name from ContextInfo tag.
276 |                         except:
277 |                             ScriptName = "-"
278 |                         try:
279 |                             CommandPath = re.findall('Command Path = \w+.*',f)[0].split("= ")[1] #Get Command Path from ContextInfo tag.
280 |                         except:
281 |                             CommandPath = "-"
282 |                         try:
283 |                             SequenceNumber = re.findall('Sequence Number = \w+.*',f)[0].split("= ")[1] #Get Sequence Number from ContextInfo tag.
284 |                         except:
285 |                             SequenceNumber = "-"
286 |                         try:
287 |                             Severity = re.findall('Severity = \w+.*',f)[0].split("= ")[1] #Get Sequence Number from ContextInfo tag.
288 |                         except:
289 |                             Severity = "-"
290 |                         t.update({'HostApplication':HostApplication})
291 |                         t.update({'ScriptName':ScriptName})
292 |                         t.update({'CommandPath':CommandPath})
293 |                         t.update({'SequenceNumber':SequenceNumber})
294 |                         t.update({'Severity':Severity})
295 | 
296 |                     elif t.get("EventID") in ["4104"]:
297 |                         sid = t.get("UserID")
298 |                         try:
299 |                             if (checkdom):
300 |                                 #After converting sid->username check if user is blacklisted.
301 |                                 if sid2name(sid) not in bListedUsers:
302 |                                     targetUser=sid2name(sid)
303 |                             else:
304 |                                 targetUser = sid
305 |                         except Exception as e:
306 |                             print(e)
307 | 
308 |                     else:
309 |                         targetUser = "NULL"
310 |                         print("[+] Event ID: "+str(t.get("EventID"))+" with Record ID: "+str(t.get("EventRecordID"))+" does not have targetUser tag!")
311 | 
312 | 
313 |                     ##########################################################################################
314 |                     #Extract remote IPs from Event, if IP source field does not exist then extact from the 'TargetServerName', if 'TargetServerName' does not exist then extract from 'Computer' tag.
315 |                     if t.get("IpAddress") and (t.get("IpAddress") in lhostIPs):
316 |                         if t.get("Workstation") and (t.get("Workstation") not in lhostIPs):
317 |                             remoteHost = t.get("Workstation")
318 |                             t.update({'remoteHost':regExIP(remoteHost)})
319 |                         elif t.get("Computer") and (t.get("Computer") not in lhostIPs):
320 |                             remoteHost = t.get("Computer")
321 |                             t.update({'remoteHost':regExIP(remoteHost)})
322 |                         else:
323 |                             print("[-] Event ID %s with Record ID %s does not have a remoteHost." % (t.get("EventID"),t.get("EventRecordID")))
324 | 
325 |                     elif t.get("IpAddress") : #and (t.get("IpAddress") not in lhostIPs)
326 |                         remoteHost = t.get("IpAddress")
327 |                         t.update({'remoteHost':regExIP(remoteHost)})
328 |                     else:
329 |                         remoteHost = t.get("Computer") #t.get("IpAddress")
330 |                         t.update({'remoteHost':regExIP(remoteHost)})
331 | 
332 | 
333 | 
334 | 
335 |                     ########################################################################################
336 | 
337 | 
338 |                     #Add  'Attaking Hosts' into Neo4j
339 |                     targetServer = t.get("Computer")
340 |                     t.update({'targetServer':regEx(targetServer)})
341 | 
342 |                     #print("[-] Event ID %s with Record ID %s does not have a targetServer." % (t.get("EventID"),t.get("EventRecordID")))
343 |                     t.update({'name':t.get("EventID")})
344 | 
345 |                     ##########################################################################################
346 | 
347 | 
348 |                     ###############################MESSAGE TAG###########################################################
349 |                     #Get values from the following keys inside from <Message> tag.
350 |                     #Error Code, Impersonation Level, Restricted Admin Mode, Virtual Account, Elevated Token
351 |                     if t.get("Message"):
352 |                         f = t.get("Message")
353 |                         if (re.findall('Error Code:',f)):
354 |                             ErrorCode = re.findall('Error Code:\s+[\w+-]*',f)[0].split(":")[1].strip()
355 |                             t.update({'ErrorCode':ErrorCode})
356 | 
357 |                         if (re.findall('Impersonation Level:',f)):
358 |                             ImpersonationLevel = re.findall('Impersonation Level:\s+[\w+-]*',f)[0].split(":")[1].strip()
359 |                             t.update({'ImpersonationLevelTranslate':ImpersonationLevel})
360 | 
361 |                         if(re.findall('Restricted Admin Mode:',f)):
362 |                             RestrictedAdminMode = re.findall('Restricted Admin Mode:\s+[\w+-]*',f)[0].split(":")[1].strip()
363 |                             t.update({'RestrictedAdminMode':RestrictedAdminMode})
364 | 
365 |                         if (re.findall('Virtual Account:',f)):
366 |                             VirtualAccount = re.findall('Virtual Account:\s+[\w+-]*',f)[0].split(":")[1].strip()
367 |                             t.update({'VirtualAccount':VirtualAccount})
368 | 
369 |                         if (re.findall('Elevated Token:',f)):
370 |                             ElevatedToken = re.findall('Elevated Token:\s+[\w+-]*',f)[0].split(":")[1].strip()
371 |                             t.update({'ElevatedToken':ElevatedToken})
372 |                     #else:
373 |                     #	print("[-] Couldn't find <Message> tag on Event ID %s with EventRecordID %s." % (t.get("EventID"),t.get("EventRecordID")))
374 | 
375 | 
376 |                     #####################################################################################################
377 |                     if targetUser in bListedUsers:
378 |                         print("[-] Event ID %s with Record ID %s discarded because the TargetUser %s is into the bListedUsers list." % (t.get("EventID"),t.get("EventRecordID"),targetUser))
379 |                         t.clear()
380 |                     else:
381 |                         t.update({'targetUser':regEx(targetUser)})
382 | 
383 |                 counter=counter+1 #How many events added!
384 | 
385 |                 createTagEvent=doc.createElement("Event")
386 |                 doc.childNodes[0].appendChild(createTagEvent)
387 |                 for tagName in t.keys(): #Example of t.keys(): {"EventID":"4624","Version":"1"}
388 |                     if tagName != "Message": #Remove <Message> tag from Exported Windows XML. Too much info :)
389 |                         text = str(t.get(tagName))
390 |                         tag = str(tagName)
391 |                         createTag=doc.createElement(tag.replace(" ","")) #Remove SPACE from the Tag Name. Example: <Product Name>, <ProductName>
392 |                         innerTXT = doc.createTextNode(text.replace("«",""))
393 |                         createTag.appendChild(innerTXT)
394 |                         createTagEvent.appendChild(createTag)
395 | 
396 |             #else:
397 |             #	print("[-] Event ID "+str(t.get("EventID"))+" is missing.")
398 | 
399 | 
400 | 
401 |     print("[+] Creating XML for neo4j...")
402 |     doc.writexml(file_handle)
403 |     #doc.writexml(sys.stdout)
404 |     file_handle.close()
405 | 
406 | def neo4jXML(outXMLFile,neo4jUri,neo4jUser,neo4jPass):
407 | 
408 |     neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass)
409 |     try:
410 |         #Read the created XML from -o/--out argument.
411 |         neo4jDocXML = minidom.parse(outXMLFile).documentElement
412 |     except Exception as e:
413 |         print(e)
414 |         sys.exit(1)
415 | 
416 |     blackListedEventProperties=["Opcode","Keywords","Version","Level","TransmittedServices","KeyLength","LmPackageName","Key Length","Message","LogonGuid","ThreadID","TargetLogonGuid","SubjectDomainName","Guid","Provider","VirtualAccount","TicketEncryptionType","TicketOptions","Keywords","Level","KeyLength","CertIssuerName","CertSerialNumber","CertThumbprint","Channel","ObjectServer","PreAuth Type","ActivityID","TargetOutboundDomainName","FWLink","Unused","Unused2","Unused3","Unused4","Unused5","Unused6","OriginID","OriginName","ErrorCode","TypeID","TypeName","StatusDescription","AdditionalActionsID","SubStatus","ContextInfo","Product"]
417 | 
418 |     counter=0
419 |     groupEvents=[] #Example [{ EventId: "4624",targetUser:"tasos"},{EventId: "4625", targetUser: "tzonis"}]
420 | 
421 |     try:
422 | 
423 |         for eventTagNode in neo4jDocXML.childNodes:
424 |             dictionaryEvents=dict() # {EventId: "4624",targetUser:"tasos"},{EventId: "4625", targetUser: "tzonis"}
425 |             if eventTagNode.childNodes:
426 |                 #print(eventTagNode.childNodes) [OK]
427 |                 for eventTags in eventTagNode.childNodes:
428 |                     #print(eventTags.nodeName)
429 |                     if (eventTags.nodeName not in blackListedEventProperties):
430 |                         for eventValues in eventTags.childNodes:
431 |                             #print(eventTags.nodeName,eventValues.nodeValue)
432 |                             dictionaryEvents.update({eventTags.nodeName:eventValues.nodeValue})
433 |                 #print("-------------------------")
434 |                 groupEvents.append(dictionaryEvents)
435 |         #print(groupEvents) #[OK]
436 | 
437 |         print("[+] Adding the Events ...")
438 |         with neo4jDriver.session() as session:
439 |             insertEvents = session.run("UNWIND $events as eventPros CREATE (e:Event) SET e=eventPros MERGE (r:RemoteHosts {name:e.remoteHost}) MERGE (u:TargetUser {remoteHost: e.remoteHost,EventRecordIDs: [ ],name:e.targetUser}) MERGE (t:TargetHost {name:e.targetServer})",events=groupEvents)
440 |         print("[+] Event Correlation ...")
441 |         with neo4jDriver.session() as session:
442 |             test = session.run("MATCH (u:TargetUser),(e:Event),(r:RemoteHosts),(t:TargetHost) WHERE u.name=e.targetUser AND r.name=e.remoteHost AND t.name=e.targetServer AND u.remoteHost = r.name AND NOT e.EventRecordID IN u.EventRecordIDs SET u.EventRecordIDs=u.EventRecordIDs+e.EventRecordID")
443 |         print("[+] Delete Dublicates ...")
444 |         with neo4jDriver.session() as session:
445 |             deleteDublicates = session.run("MATCH (t:TargetUser) WITH t.name as n, t.remoteHost as r, collect(t) as dublicateTargetUser where size(dublicateTargetUser) > 1 UNWIND dublicateTargetUser[1..] AS p DETACH DELETE p")
446 |         print("[+] Creating the Relationships ...")
447 |         with neo4jDriver.session() as session:
448 |             remoteHost2DomUserRelationship=session.run("MATCH (r:RemoteHosts),(u:TargetUser) WHERE u.remoteHost = r.name MERGE (r)-[r1:Source2DomainUser]->(u)")
449 |         with neo4jDriver.session() as session:
450 |             targetUser2EventRelationship = session.run("MATCH (u:TargetUser),(e:Event) WHERE e.targetUser = u.name AND e.EventRecordID IN u.EventRecordIDs MERGE (u)-[r2:TargetUser2Event]->(e)")
451 |         with neo4jDriver.session() as session:
452 |             event2TargetHostRelationship= session.run("MATCH (t:TargetHost),(e:Event) WHERE t.name = e.targetServer MERGE (e)-[r3:Event2Destination]->(t)")
453 | 
454 |     except Exception as e:
455 |         print(e)
456 | 
457 |     #Close the connection with Neo4j
458 |     neo4jDriver.close()
459 | 
460 | 
461 | def eventCounters(neo4jUri,neo4jUser,neo4jPass):
462 |     neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass) #Call the function
463 |     #Count Events
464 |     #with neo4jDriver.session() as session:
465 |     k=neo4jDriver.session().run("MATCH (n:Event) RETURN count(n)")
466 |     countEvents = 0
467 |     for x in k:
468 |         print("[+] Added Events:"+str(x.value()))
469 |         countEvents = int(x.value())
470 |     #Count RemoteHosts
471 |     #with neo4jDriver.session() as session:
472 |     k=neo4jDriver.session().run("MATCH (n:RemoteHosts) RETURN count(n)")
473 |     countRemHosts = 0
474 |     for x in k:
475 |         print ("[+] Added RemoteHosts:"+str(x.value()))
476 |         countRemHosts = int(x.value())
477 | 
478 |     #Count TargetHosts
479 |     #with neo4jDriver.session() as session:
480 |     k=neo4jDriver.session().run("MATCH (n:TargetHost) RETURN count(n)")
481 |     countTargetHosts = 0
482 |     for x in k:
483 |         print ("[+] Added TargetHosts:"+str(x.value()))
484 |         countTargetHosts = int(x.value())
485 | 
486 |     #Count TargetUsers
487 |     #with neo4jDriver.session() as session:
488 |     k=neo4jDriver.session().run("MATCH (n:TargetUser) RETURN count(n)")
489 |     countTargetUsers = 0
490 |     for x in k:
491 |         print ("[+] Added TargetUsers:"+str(x.value()))
492 |         countTargetUsers = int(x.value())
493 | 
494 |     #Count Relatioships
495 |     #with neo4jDriver.session() as session:
496 |     k=neo4jDriver.session().run("MATCH p=()-->() RETURN count(p)")
497 |     countRel = 0
498 |     for x in k:
499 |         print ("[+] Added Relationships:"+str(x.value()))
500 |         countRel = int(x.value())
501 | 
502 |     print ("[+] Total: "+str(countEvents+countRemHosts+countRel+countTargetHosts+countTargetUsers))
503 |     print ('[+] Finished: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now()))
504 | 
505 |     #Close the connection with Neo4j
506 |     neo4jDriver.close()
507 |     #print(neo4jDriver.closed())
508 | 
509 | 
510 | if __name__ == '__main__':
511 | 
512 |     parser = argparse.ArgumentParser(description='Filter Exported XML.')
513 |     parser.add_argument('-e','--eventID', nargs='+', default=["400","800","1102","1006","1015","1040","1042","1116","4103","4104","4105","4624","4625","4634","4648","4662","4672","4673","4688","4697","4698","4702","4713","4723","4724","4735","4737","4739","4742","4755","4765","4766","4768","4769","4776","4780","4794","4798","4964","5136","5140","5145","5156","5805","7045","8004","8007","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","255"],help='Use comma to seperate eventIDs.')
514 |     parser.add_argument('-x', '--xml',help='Windows Events Exported XML file.')
515 |     parser.add_argument('-o', '--out',help='Save Neo4j XML file.')
516 |     parser.add_argument('-i','--uri',help='neo4j host. Example: bolt://localhost',required=True)
517 |     parser.add_argument('-D','--delete',help='Delete all data from Neo4j.',action='store_true')
518 |     parser.add_argument('-u','--user',help='neo4j username.',required=True)
519 |     parser.add_argument('-p','--passwd',help='neo4j password.',required=True)
520 |     parser.add_argument('-s','--sysmon',help='Sysmon structure.',action='store_true')
521 |     args = parser.parse_args()
522 |     eventIDs=args.eventID
523 |     neo4jUri=args.uri
524 |     neo4jUser=args.user
525 |     neo4jPass=args.passwd
526 |     xmlFile = args.xml
527 |     sysmonFile = args.sysmon
528 |     delData = args.delete
529 |     outXMLFile = args.out
530 | 
531 | 
532 |     if(delData):
533 |         neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass)
534 |         print("[+] Connecting with neo4j ...")
535 |         print("[+] Deleting all the data ...")
536 |         with neo4jDriver.session() as session:
537 |             delAll=session.run("MATCH (n) DETACH DELETE n")
538 |         #Close the connection with Neo4j
539 |         neo4jDriver.close()
540 | 
541 |     else:
542 |         try:
543 |             #Open exported XML and remove those chars
544 |             openXMLread=open(xmlFile,"r")
545 |             fixChars=re.sub(r"ï»¿", r"", openXMLread.read()) #When Events exported from Windows Event Viewer has tose bad chars inside the XML.
546 |             openXMLread.close()
547 | 
548 |             #Write again the XML without those chars
549 |             xmlFile=xmlFile.replace(".xml","_epimitheus.xml")
550 |             openXMLwrite=open(xmlFile,"w")
551 |             openXMLwrite.write(fixChars)
552 |             openXMLwrite.close()
553 | 
554 |             rootDoc = minidom.parse(xmlFile).documentElement #Open exported XML file.
555 | 
556 |         except Exception as e:
557 |             print(e)
558 |             #print("[-] Can't find the XML file or XML is not in the right format. Use -x/--xml to provide the Windows Event XML file.")
559 |             sys.exit(1)
560 | 
561 |         #Check if the script is running in a Domain
562 |         #checkdom = isDomain()
563 | 
564 | 
565 |         #Parse Windows Event XML File - Process 1
566 |         parl=multiprocessing.Lock()
567 |         parl.acquire()
568 |         print("[+] Parsing XML file ...")
569 |         print ('[+] Parsing Started: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now()))
570 |         evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList = eventParser(eventIDs,)
571 |         print ('[+] Parsing Finished: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now()))
572 |         parl.release()
573 | 
574 | 
575 |         #Create neo4j XML - Process 2
576 |         nl = multiprocessing.Lock()
577 |         nl.acquire()
578 |         cnodes = Process(target=createXML, args=(evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList,sysmonFile,outXMLFile))
579 |         cnodes.start()
580 |         cnodes.join()
581 |         nl.release()
582 | 
583 |         #Read neo4j XML - Process 3
584 |         ml = multiprocessing.Lock()
585 |         ml.acquire()
586 |         mnodes = Process(target=neo4jXML,args=(outXMLFile,neo4jUri,neo4jUser,neo4jPass))
587 |         print("[+] Loading neo4j XML ...")
588 |         mnodes.start()
589 |         mnodes.join()
590 |         ml.release()
591 | 
592 |         #Print Counters - Process 4
593 |         cc=multiprocessing.Lock()
594 |         cc.acquire()
595 |         ccounters=Process(target=eventCounters,args=(neo4jUri,neo4jUser,neo4jPass))
596 |         ccounters.start()
597 |         #p=eventCounters()
598 |         ccounters.join()
599 |         cc.release()
600 | 


--------------------------------------------------------------------------------
/epimitheus_v2.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/python3
   2 | 
   3 | from logging import NullHandler
   4 | from xml.dom import minidom
   5 | from xml.dom.minidom import Document
   6 | from neo4j import GraphDatabase, basic_auth
   7 | import os,sys,time,re, subprocess
   8 | import multiprocessing
   9 | from multiprocessing import Process,Lock
  10 | import argparse
  11 | import collections
  12 | import Evtx.Evtx as evtx
  13 | import uuid
  14 | from pathlib import Path
  15 | from xml.etree.cElementTree import Element, ElementTree
  16 | from lxml import etree
  17 | from io import StringIO, BytesIO
  18 | import unicodedata,codecs
  19 | import datetime
  20 | 
  21 | 
  22 | def get_events(input_file, parse_xml=False):
  23 |    # https://chapinb.com/python-forensics-handbook/ch03_event_logs.html#iterate-over-record-xml-data-evtx
  24 | 
  25 |     with evtx.Evtx(input_file) as event_log:
  26 |         for record in event_log.records():
  27 |             if parse_xml:
  28 |                 evtxXML = record.lxml()
  29 |                 yield evtxXML
  30 |                 
  31 |             else:
  32 |                 evtxXML = record.xml()
  33 |                 yield evtxXML                 
  34 |     #return p
  35 |                 
  36 | def regEx(string):
  37 | 
  38 |     dotCounter=0
  39 |     if("@" in str(string)):
  40 |         s = re.findall("^\w+[^@]",str(string))[0]
  41 |     elif("\\" in str(string)):
  42 |         s = re.findall("[^\\\]*$",str(string))[0]
  43 |     elif("." in str(string)):
  44 |         for num,chr in enumerate(string):
  45 |             if chr == ".":
  46 |                 dotCounter=dotCounter+1
  47 |         if dotCounter == 1: #Example username.lastname or desktop111-maria.domain.com
  48 |             s = re.findall("^.\w+[.|-]\w+",string)[0]  #Result: username.lastname or dektop111-maria
  49 |         elif dotCounter == 2: #Example username.domain.com
  50 |             s = re.findall("^\w+",string)[0] #Result: username
  51 |         elif dotCounter >= 3: #Example username.lastname.domain.com
  52 |             s = re.findall("^\w+[.-]\w+",string)[0] #Result: username.lastname
  53 |         else:
  54 |             s = str(string)
  55 | 
  56 |     else:
  57 |         s = str(string)
  58 |     return(s.upper())
  59 | 
  60 | def regExIP(ip):
  61 |     ipAddress=str(ip)
  62 |     if(ipAddress.startswith(':')): #Example: ::ffff:192.168.100.50
  63 |         s = re.findall("\w+[.].*",ipAddress)[0] #Result: 192.168.100.50
  64 |     else:
  65 |         s = ipAddress
  66 | 
  67 |     return(s)
  68 | 
  69 | def neo4jConn(neo4jUri,neo4jUser,neo4jPass):
  70 | 
  71 |     try:
  72 |         driver = GraphDatabase.driver(neo4jUri, auth=basic_auth(user=neo4jUser, password=neo4jPass))
  73 |         #print("[+] Successful connection with database")
  74 |         return(driver)
  75 |     except Exception as e:
  76 |         print("[-] %s" % e)
  77 |         sys.exit(1)
  78 | 
  79 | 
  80 | def eventParser(eventIDs,xmlDoc):
  81 | 
  82 | 
  83 |     dict={}
  84 |     dict2={}
  85 |     dict3={}
  86 |     counter=0
  87 |     t=[]
  88 |     rootDoc = xmlDoc
  89 | 
  90 |     try:
  91 |         for p in rootDoc.childNodes:
  92 |             counter=counter+1
  93 |             
  94 |             for x in p.childNodes:
  95 |                 for y in x.childNodes:
  96 |                     tags=""
  97 |                     values=""
  98 |                     try:
  99 |                         if not y.firstChild:
 100 |                             tag=y.nodeName
 101 |                             attrs=y.attributes.items()
 102 |                             value=y.firstChild
 103 |                         else:
 104 |                             tag=y.nodeName
 105 |                             attrs=y.attributes.items()
 106 |                             value=y.firstChild.nodeValue
 107 | 
 108 |                         # Clean EventID tag from not useful attributes e.g <EventID Qualifer="0">.
 109 |                         # This happened when I used PowerShell events from CCPT.
 110 |                         # We need only the <EventID>
 111 |                         if tag == "EventID" and attrs == "": 
 112 |                             attrs=y.attributes.items()
 113 |                         elif tag == "EventID" and attrs != "":    
 114 |                             #trim the attributes of EventID tag
 115 |                             attrs=[]
 116 |                         else:
 117 |                             attrs=y.attributes.items()
 118 |                         #print(attrs) #[OK]                         
 119 |                         
 120 |                         dict={'Tags':tag,'Attrs':attrs,'Value':value}
 121 |                         #print(dict)
 122 |                         if not dict['Attrs'] and dict['Tags'] != 'Data':
 123 |                             #print ("[+]%s:%s" %(dict['Tags'],dict['Value'])) #[OK]
 124 |                             tags = dict['Tags']
 125 |                             values = dict['Value']
 126 | 
 127 |                         elif dict['Attrs'] and dict['Tags'] == 'Execution': # Then has 2 properties: ThreadID, ProcessID
 128 |                             #print("[+]%s:%s" % (key,value))
 129 |                             tags = dict['Tags']
 130 |                             dictExecution={}
 131 |                             for attrKey,attrValue in dict['Attrs']:
 132 |                                 attrKey=attrKey
 133 |                                 attrValue=attrValue
 134 |                                 dictExecution.update({attrKey:attrValue})
 135 |                             
 136 |                             values=dictExecution # Set ProcessID and ThreadID in Dict format.
 137 | 
 138 |                         elif dict['Attrs'] and dict['Tags'] != 'Data' and dict['Tags'] != 'Execution':
 139 |                             #print ("[+]%s:%s" %(dict['Tags'],dict['Value'])) #[OK]
 140 |                             for key,value in dict['Attrs']:
 141 |                                 
 142 |                                 if key == "SystemTime":
 143 |                                     valueDate= datetime.datetime.fromisoformat(value)
 144 |                                     valueConvert=valueDate.isoformat()
 145 |                                     value=valueConvert
 146 |                                 
 147 |                                 #print ("[+]%s:%s" %(key,value)) #[OK]
 148 |                                 if value:
 149 |                                     tags=key
 150 |                                     values=value
 151 |                                 else:
 152 |                                     tags="ActivityID"
 153 |                             
 154 |                         
 155 |                         elif dict['Attrs'] and dict['Tags'] == 'Data':
 156 |                             #print("[+]%s:%s" % (key,value))
 157 |                             tags = dict['Tags']
 158 |                             for attrKey,attrValue in dict['Attrs']:
 159 |                                 attrValue=attrValue
 160 |                             value = dict['Value']
 161 |                             values = {attrValue:value}
 162 |                         
 163 |                         # Some events don't have attributes on the tag 'DATA'
 164 |                         elif not dict['Attrs'] and dict['Tags'] == 'Data':
 165 |                             #print ("[+]%s:%s" %(dict['Tags'],dict['Value'])) #[OK]
 166 |                             tags = dict['Tags'] #<Data>
 167 |                             attrValue='ContextInfo' # <Data Name="<attrValue>" />
 168 |                             value = dict['Value']  # <Data Name="<attrValue>" /> <value> </Data>
 169 |                             values = {attrValue:value}
 170 | 
 171 |                         #print("[+] %s : %s" % (tags,values))    
 172 |                                                
 173 |                         #dict2=dict5
 174 |                         dict2={tags:values}
 175 |                         #print(dict2)
 176 |                                                 
 177 |                         dict3={counter:dict2}
 178 |                         #print(dict3)
 179 |                         
 180 |                         t.append(dict3)
 181 |                     except:
 182 |                         pass
 183 |     except Exception as e:
 184 |         print(e)
 185 | 
 186 |     # List before EventID filtering
 187 |     input_list = {}
 188 | 
 189 |     
 190 |     #Group events
 191 |     for x in range(len(t)):
 192 |         for k,v in t[x].items():
 193 |             if k not in input_list:
 194 |                 input_list[k]=[v]
 195 |             else:
 196 |                 input_list[k].append(v)
 197 |     #print(input_list)
 198 |     # Event filtering procedure.
 199 |     input_list2 = {}
 200 |     for key,value in input_list.items():
 201 |         for val in value:
 202 |             if eventIDs and val.get("EventID") in eventIDs.split(","):
 203 |                 input_list2[key]=value
 204 |             elif not eventIDs:
 205 |                 input_list2 =  input_list  
 206 |      
 207 |     filterEvents = eventIDs
 208 |     localhostIPs=["","-","::1","127.0.0.1","localhost"]
 209 |     blacklistedUsers=["DWM-3","UMFD-3","UMFD-2","DWM-2","UMFD-0","UMFD-1","DWM-1"]
 210 |     blacklistedShareFolders=["\\\\*\\SYSVOL","\\\\*\\IPC$"]
 211 | 
 212 |     return (filterEvents, localhostIPs, blacklistedUsers, blacklistedShareFolders, input_list2)
 213 | 
 214 | def createXML(evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList,outXMLFile):
 215 | 
 216 | 
 217 |     targetUserList=[]
 218 |     remoteHostsList=[]
 219 |     uniqueIPs=[]
 220 |     
 221 |     # Create a random file and add the parsing data on it. See line
 222 |     file_handle = open(outXMLFile,"w")
 223 | 
 224 |     doc = Document()
 225 |     root = doc.createElement('Events')
 226 |     doc.appendChild(root)
 227 |     counter=0 # Event counter
 228 |     #print(eventList.items()) #[OK]
 229 |     print("[+] Searching for TargetUsers/Hosts, SourceUsers/Hosts, RemoteHosts/Users, TargetHosts/Users ...")
 230 | 
 231 |     if len(eventList.items()) > 0:
 232 |         
 233 |         for key, value in eventList.items():
 234 |             
 235 |             t={} #This dictionary Holds the properties of every event.
 236 |             #Unpacking the List -> Dict Event's keys and values            
 237 |             for eventValue in value: # Value holds the Event data, Keys and Values in Dict format {'EventID':'4624'}
 238 |                 #https://stackoverflow.com/questions/54488095/python-3-dictionary-key-to-a-string-and-value-to-another-string
 239 |                 key, value = list(eventValue.items())[0]
 240 |                 #print(value)
 241 |                 # Add ProcessID and ThreadID from the Execution tag.
 242 |                 if key == "Execution":
 243 |                     for k,v in value.items():
 244 |                         t.update({k:v})
 245 | 
 246 |                 
 247 |                 # Unpack the 'Data' part of Event and update the 'Event' node.
 248 |                 if key == "Data":
 249 |                     t.update(value)
 250 |                 
 251 |                 #if <Data> tag exists dictionary of the Event then append the inside
 252 |                 
 253 |                 if "Data" in t:
 254 |                     t["Data"].append(value)
 255 |                 
 256 |                 #If <Data> tag non-exist on the dict then created but in this format
 257 |                 #e.g. {'Name':'PowerShell','Data':['log1','log2' etc.]}
 258 |                 
 259 |                 elif key == "Data":
 260 |                     t["Data"]=[]
 261 |                     t["Data"].append(value)
 262 | 
 263 |                    
 264 |                 #Otherwise, just update the dictionary
 265 |                 else:   
 266 |                     t.update(eventValue)
 267 | 
 268 |             ####################################REMOTE HOSTS######################################################
 269 |             #Extract remote IPs from Event, 
 270 |             # if IP source field does not exist then extact from the 'TargetServerName', 
 271 |             # if 'TargetServerName' does not exist then extract from 'Computer' tag.
 272 |             try:
 273 |                 if t.get("IpAddress") and (t.get("IpAddress") in lhostIPs):
 274 |                     if t.get("Workstation") and (t.get("Workstation") not in lhostIPs):
 275 |                         remoteHost = t.get("Workstation")
 276 |                         t.update({'remoteHost':regExIP(remoteHost.lower())})
 277 |                     elif t.get("Computer") and (t.get("Computer") not in lhostIPs):
 278 |                         remoteHost = t.get("Computer")
 279 |                         t.update({'remoteHost':regExIP(remoteHost.lower())})
 280 |                     else:
 281 |                         print("[-] Event ID %s with Record ID %s does not have a remoteHost." % (t.get("EventID"),t.get("EventRecordID")))
 282 | 
 283 |                 elif t.get("IpAddress") : #and (t.get("IpAddress") not in lhostIPs)
 284 |                     remoteHost = t.get("IpAddress")
 285 |                     t.update({'remoteHost':regExIP(remoteHost.lower())})
 286 |                     
 287 | 
 288 |                 #if Sysmon File is provided, then "SourceIp" is the correct tag.
 289 |                 elif t.get("SourceIp") and (t.get("SourceIp") not in lhostIPs):
 290 |                     remoteHost = t.get("SourceIp")
 291 |                     t.update({'remoteHost':regExIP(remoteHost.lower())})
 292 |                    
 293 | 
 294 |                 else:
 295 |                     remoteHost = t.get("Computer") #t.get("IpAddress")
 296 |                     t.update({'remoteHost':regExIP(remoteHost.lower())})
 297 |                                 
 298 |                 if t.get("SourceHostname"):
 299 |                     remoteSourceHostname = t.get("SourceHostname")
 300 |                     t.update({'remoteHostname':remoteSourceHostname.lower()})
 301 |                 else:
 302 |                     t.update({'remoteHostname':regExIP(remoteHost.lower())})
 303 | 
 304 |             except TypeError as te:
 305 |                 print("[!] Something went wrong to `remoteHost` clause.")
 306 |                 print(te)
 307 |             
 308 |             
 309 |             #print(remoteHost)
 310 |             
 311 |             ########################################END - REMOTE HOSTS####################################################
 312 |             
 313 |             ###############################MESSAGE TAG###########################################################
 314 |             #Get values from the following keys inside from <Message> tag.
 315 |             #Error Code, Impersonation Level, Restricted Admin Mode, Virtual Account, Elevated Token
 316 |             '''if t.get("Message"):
 317 |                 f = t.get("Message")
 318 |                 if (re.findall('Error Code:',f)):
 319 |                     ErrorCode = re.findall('Error Code:\s+[\w+-]*',f)[0].split(":")[1].strip()
 320 |                     t.update({'ErrorCode':ErrorCode})
 321 | 
 322 |                 if (re.findall('Impersonation Level:',f)):
 323 |                     ImpersonationLevel = re.findall('Impersonation Level:\s+[\w+-]*',f)[0].split(":")[1].strip()
 324 |                     t.update({'ImpersonationLevelTranslate':ImpersonationLevel})
 325 | 
 326 |                 if(re.findall('Restricted Admin Mode:',f)):
 327 |                     RestrictedAdminMode = re.findall('Restricted Admin Mode:\s+[\w+-]*',f)[0].split(":")[1].strip()
 328 |                     t.update({'RestrictedAdminMode':RestrictedAdminMode})
 329 | 
 330 |                 if (re.findall('Virtual Account:',f)):
 331 |                     VirtualAccount = re.findall('Virtual Account:\s+[\w+-]*',f)[0].split(":")[1].strip()
 332 |                     t.update({'VirtualAccount':VirtualAccount})
 333 | 
 334 |                 if (re.findall('Elevated Token:',f)):
 335 |                     ElevatedToken = re.findall('Elevated Token:\s+[\w+-]*',f)[0].split(":")[1].strip()
 336 |                     t.update({'ElevatedToken':ElevatedToken})'''
 337 |             #else:
 338 |             #	print("[-] Couldn't find <Message> tag on Event ID %s with EventRecordID %s." % (t.get("EventID"),t.get("EventRecordID")))
 339 | 
 340 |             ##################################END - MESSAGE TAG###################################################################
 341 |             
 342 |             if (t.get("EventID") not in ["4100","4103","4104","400","403","500","501","600","800"] and not "powershell" in t.get("Channel")): # Not In Powershell Events
 343 |                 
 344 |                 
 345 |                 try:
 346 |                     if t.get("TargetUserName"):
 347 |                         targetUser = t.get("TargetUserName")
 348 |                     elif t.get("TargetName"):
 349 |                         targetUser = t.get("TargetName")
 350 |                         if re.findall('=[a-zA-Z0-9@./]+',str(targetUser)):
 351 |                             targetUser = re.findall('=[a-zA-Z0-9@./]+',str(targetUser))
 352 |                             targetUser = ''.join(targetUser)
 353 |                             targetUser = targetUser.split("=")[1].strip()
 354 |                         else:
 355 |                             targetUser = t.get("TargetName")                        
 356 |                     elif t.get("SubjectUserName"):
 357 |                         targetUser = t.get("SubjectUserName")
 358 |                     # if Sysmon File is provided, then "User" is the correct tag.
 359 |                     elif t.get("User"):
 360 |                         targetUser = t.get("User")
 361 |                     elif t.get("Detection User"):
 362 |                         targetUser = t.get("Detection User")
 363 |                     # if Sysmon File is provided, then "UserID" is the correct tag.
 364 |                     elif t.get("UserID"):
 365 |                         targetUser = t.get("UserID")
 366 |                     elif t.get("Computer"):
 367 |                         targetUser = t.get("Computer")
 368 |                 except TypeError as te:
 369 |                     print(te)
 370 |                 
 371 |                 # If everything goes well then Update/Add the targetUser property to the Event.
 372 |                 if targetUser not in bListedUsers:
 373 |                     t.update({'targetUser':targetUser})
 374 |                 else:
 375 |                     print("[-] Event ID %s with Record ID %s discarded because the TargetUser %s is into the bListedUsers list." % (t.get("EventID"),t.get("EventRecordID"),targetUser))
 376 |                     
 377 | 
 378 |             # PowerShell logging cheatsheet: https://static1.squarespace.com/static/552092d5e4b0661088167e5c/t/5760096ecf80a129e0b17634/1465911664070/Windows+PowerShell+Logging+Cheat+Sheet+ver+June+2016+v2.pdf
 379 |             elif t.get("EventID") in ["4100","4103","4104","400","403","500","501","600","800"]:
 380 |                 
 381 |                 if t.get("Data"):
 382 |                     eventData = t.get("Data")
 383 | 
 384 | 
 385 |                 try:
 386 | 
 387 |                     #Check if the word "User=" or "UserId=" etc. exists inside the <Data> tag
 388 |                     # Before search unpack the Event data which are List format.
 389 |                     for eventX in eventData:                              
 390 |                         if eventX != None:
 391 |                             try:    
 392 |                                 # Try find usernames on Description part of the Event e.g 4103,4104,800 
 393 |                                 if eventX.get("ContextInfo"):
 394 |                                     if re.findall('Use[rId|rID|r]+.=.[a-zA-Z0-9]+.\w+.',str(eventX.get("ContextInfo"))):
 395 |                                         
 396 |                                         targetUser = re.findall('Use[rId|rID|r]+.=.\w+.[\w+]+[^\s]\w+.',str(eventX.get("ContextInfo")))
 397 |                                         targetUser=targetUser[0]
 398 |                                         targetUser = ''.join(targetUser)
 399 |                                         targetUser = targetUser.split("=")[1].strip()
 400 |                                     
 401 |                                         if targetUser in bListedUsers:
 402 |                                             print("[-] Event ID %s with Record ID %s discarded because the TargetUser %s is into the bListedUsers list." % (t.get("EventID"),t.get("EventRecordID"),targetUser))
 403 |                                                 
 404 |                                         else:
 405 |                                             targetUser=re.findall('[^\s]+',targetUser.lower())
 406 |                                             targetUser=targetUser[0]
 407 |                                             targetUser=''.join(targetUser)
 408 |                                             t.update({'targetUser':targetUser})
 409 | 
 410 |                                    
 411 |                                     #If ContextInfo exist as well as the UserID. 
 412 |                                     elif t.get("UserID"):                                    
 413 |                                         targetUser=t.get("UserID")
 414 |                                         t.update({'targetUser':targetUser})
 415 | 
 416 |                                     else:
 417 |                                         #Some PowerShell events doesn't have the UserId property.
 418 |                                         #In this case, use a generic user, which is called `PSGenericUser` 
 419 |                                         #Check if targeUser key hasn't already set.
 420 |                                         targetUser = "PSGenericUser"
 421 |                                         t.update({'targetUser':targetUser})    
 422 |                                 
 423 |                                 
 424 |                                 if not eventX.get("ContextInfo") and t.get("UserID") and not t.get("targetUser"):
 425 |                                     targetUser=t.get("UserID")
 426 |                                     t.update({'targetUser':targetUser})
 427 | 
 428 |                                 elif not eventX.get("ContextInfo") and t.get("UserID") and not t.get("targetUser"):
 429 |                                     targetUser = "PSGenericUser"
 430 |                                     t.update({'targetUser':targetUser})
 431 |                             
 432 |                             except Exception as error:
 433 |                                 print("[-] TargetUser RegEx error! %s" % error)
 434 |                                 
 435 |                             
 436 | 
 437 |                             try:
 438 |                             
 439 |                                 if eventX.get("ContextInfo"):
 440 |                                     if re.findall('HostApplication.*=.[\a-zA-Z0-9]+Engine',eventX.get("ContextInfo")):    
 441 |                                         HostApplication = re.findall('HostApplication.*=.[\a-zA-Z0-9]+Engine',str(eventX.get("ContextInfo")))
 442 |                                     else:
 443 |                                         HostApplication = re.findall('Host Application.*=.[\a-zA-Z0-9]+Engine',str(eventX.get("ContextInfo")))
 444 |                                     
 445 |                                     if HostApplication:
 446 |                                         HostApplication = ' '.join(HostApplication)
 447 |                                         HostApplication = HostApplication.replace("Engine","").strip()
 448 |                                         HostApplication = HostApplication.split("=")[1].strip()
 449 |                                         t.update({'HostApplication':HostApplication})
 450 |                             
 451 |                             except Exception as error:    
 452 |                                 print("[-] HostApplication RegEx error! %s" % error)
 453 | 
 454 |                             try:
 455 |                                 if eventX.get("ContextInfo"):
 456 |                                     if re.findall('ScriptName.*=.[\a-zA-Z0-9]+Command',eventX.get("ContextInfo")):
 457 |                                         ScriptName = re.findall('ScriptName.*=.[\a-zA-Z0-9]+Command',str(eventX.get("ContextInfo")))
 458 |                                     else:
 459 |                                         ScriptName = re.findall('Script Name.*=.[\a-zA-Z0-9]+Command',str(eventX.get("ContextInfo")))    
 460 |                                     
 461 |                                     if ScriptName:
 462 |                                         ScriptName = ' '.join(ScriptName)
 463 |                                         ScriptName = ScriptName.replace("Command","").strip()
 464 |                                         ScriptName = ScriptName.split("=")[1].strip()
 465 |                                         t.update({'ScriptName':ScriptName})
 466 |                                     #print(ScriptName)
 467 |                                     
 468 |                             except Exception as error:     
 469 |                                 print("[-] ScriptName RegEx error! %s" % error)
 470 | 
 471 |                             try:
 472 |                                 if eventX.get("ContextInfo"):
 473 |                                     if re.findall('CommandLine.*=.[\a-zA-Z0-9]+',eventX.get("ContextInfo")):
 474 |                                         CommandLine = re.findall('CommandLine.*=.[\a-zA-Z0-9]+',str(eventX.get("ContextInfo")))
 475 |                                         CommandLine = ' '.join(CommandLine)
 476 |                                         CommandLine = CommandLine.split("=")[1]
 477 |                                         t.update({'CommandLine':CommandLine})
 478 |                                         #print(CommandLine)
 479 |                             except Exception as error:
 480 |                                 print("[-] commandLine RegEx error! %s" % error)
 481 | 
 482 |                             try:    
 483 |                                 if eventX.get("ContextInfo"):
 484 |                                     if re.findall('CommandPath.*=.[\a-zA-Z0-9]+Sequence',eventX.get("ContextInfo")):
 485 |                                         CommandPath = re.findall('CommandPath.*=.[\a-zA-Z0-9]+Sequence',str(eventX.get("ContextInfo")))
 486 |                                     else:
 487 |                                         CommandPath = re.findall('Command Path.*=.[\a-zA-Z0-9]+Sequence',str(eventX.get("ContextInfo")))
 488 |                                     
 489 |                                     if CommandPath:
 490 |                                         CommandPath = ' '.join(CommandPath)
 491 |                                         CommandPath = CommandPath.replace("Sequence","").strip()
 492 |                                         CommandPath = CommandPath.split("=")[1]
 493 |                                         t.update({'CommandPath':CommandPath})
 494 |                                     
 495 |                             except Exception as error:
 496 |                                 print("[-] CommandPath RegEx error! %s" % error)
 497 | 
 498 |                             try:
 499 |                                 if eventX:
 500 |                                     contextInfo = re.findall('Severity.*=',str(eventX.get('ContextInfo')))
 501 |                                     if contextInfo != None:
 502 |                                         contextInfoSeverity = re.findall('Severity.*=.[a-zA-Z]+',str(contextInfo))
 503 |                                         if contextInfoSeverity and contextInfoSeverity != None:
 504 |                                             Severity = re.findall('Severity.*=.[a-zA-Z]+',str(contextInfo))
 505 |                                             Severity = ' '.join(Severity)
 506 |                                             Severity = Severity.split("=")[1].split(" ")[0]
 507 |                                             t.update({'Severity':Severity})
 508 |                                     else:
 509 |                                         contextInfoSeverity=""    
 510 |                                     
 511 |                                 
 512 |                             
 513 |                             except Exception as error:
 514 |                                 print("[-] Severity RegEx error! %s" % error)
 515 |                                 
 516 | 
 517 |                     # print(t.get('EventRecordID')+"-->"+t.get('targetUser')) [OK]
 518 |                 
 519 |                 except Exception as error:
 520 |                     print("[-] Something went wrong while parsing the PowerShell Events!")
 521 |                     print("[+] Event ID: "+str(t.get("EventID"))+" with Record ID: "+str(t.get("EventRecordID")))
 522 |                     print(error)
 523 | 
 524 |                 #print(t)
 525 |                                 
 526 |                 
 527 |             else:
 528 |                 targetUser = "NULL"
 529 |                 print("[+] Event ID: "+str(t.get("EventID"))+" with Record ID: "+str(t.get("EventRecordID"))+" does not have targetUser tag!")
 530 | 
 531 | 
 532 |             ########################################################################################
 533 |             #Add  'Attaking Hosts' into Neo4j
 534 |             # if Sysmon File is provided, then "DestinationIp" is the correct tag.
 535 |             if t.get("DestinationIp"):
 536 |                 targetServer = t.get("DestinationIp")
 537 |                 t.update({'targetServer':targetServer.lower()})
 538 |             elif t.get("Computer"):
 539 |                 targetServer = t.get("Computer")
 540 |                 t.update({'targetServer':targetServer.lower()})
 541 |             else:
 542 |                 print("[-] Something went wrong during the 'DestinationHost' parsing! ")
 543 |             #print("[-] Event ID %s with Record ID %s does not have a targetServer." % (t.get("EventID"),t.get("EventRecordID")))
 544 |             t.update({'name':t.get("EventID")})
 545 |             ##########################################################################################
 546 | 
 547 |             counter=counter+1 #How many events added!
 548 |             
 549 |             createTagEvent=doc.createElement("Event")
 550 |             doc.childNodes[0].appendChild(createTagEvent)
 551 |             for tagName in t.keys(): #Example of t.keys(): {"EventID":"4624","Version":"1"}
 552 |                 if tagName != "Message": #Remove <Message> tag from Exported Windows XML. Too much info :)
 553 |                     text = str(t.get(tagName))
 554 |                     tag = str(tagName)
 555 |                     createTag=doc.createElement(tag.replace(" ","")) #Remove SPACE from the Tag Name. Example: <Product Name>, <ProductName>
 556 |                     innerTXT = doc.createTextNode(text.replace("«",""))
 557 |                     createTag.appendChild(innerTXT)
 558 |                     createTagEvent.appendChild(createTag)
 559 | 
 560 |             #else:
 561 |             #	print("[-] Event ID "+str(t.get("EventID"))+" is missing.")
 562 | 
 563 | 
 564 | 
 565 |     print("[+] Creating XML for neo4j...")
 566 |     doc.writexml(file_handle)
 567 |     #doc.writexml(sys.stdout)
 568 |     file_handle.close()
 569 | 
 570 | #def neo4jXML(outXMLFile,neo4jUri,neo4jUser,neo4jPass):
 571 | def neo4jXML(outXMLFile,neo4jUri,neo4jUser,neo4jPass):
 572 |     
 573 |     neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass)
 574 |     try:
 575 |         #Read the created XML file with the UUID name. e.g. d1ba1cf8-0a30-42d1-ae6b-451289ca6c0d.xml
 576 |         neo4jDocXML = minidom.parse(outXMLFile).documentElement
 577 |     except Exception as e:
 578 |         print(e)
 579 |         sys.exit(1)
 580 | 
 581 |     blackListedEventProperties=[
 582 |         "Opcode",
 583 |         "Keywords",
 584 |         "Version",
 585 |         "Level",
 586 |         "TransmittedServices",
 587 |         "KeyLength",
 588 |         "LmPackageName",
 589 |         "Key Length",
 590 |         "Message",
 591 |         "SubjectDomainName",
 592 |         "TicketEncryptionType",
 593 |         "TicketOptions",
 594 |         "Keywords",
 595 |         "Level",
 596 |         "KeyLength",
 597 |         "CertIssuerName",
 598 |         "CertSerialNumber",
 599 |         "CertThumbprint",
 600 |         "ObjectServer",
 601 |         "PreAuth Type",
 602 |         "TargetOutboundDomainName",
 603 |         "FWLink",
 604 |         "Unused",
 605 |         "Unused2",
 606 |         "Unused3",
 607 |         "Unused4",
 608 |         "Unused5",
 609 |         "Unused6",
 610 |         "OriginID",
 611 |         "OriginName",
 612 |         "ErrorCode",
 613 |         "TypeID",
 614 |         "TypeName",
 615 |         "StatusDescription",
 616 |         "AdditionalActionsID",
 617 |         "SubStatus",
 618 |         "Product"
 619 |         ]
 620 | 
 621 |     counter=0
 622 |     groupEvents=[] #Example [{ EventId: "4624",targetUser:"tasos"},{EventId: "4625", targetUser: "tzonis"}]
 623 | 
 624 |     try:
 625 | 
 626 |         for eventTagNode in neo4jDocXML.childNodes:
 627 |             dictionaryEvents=dict() # {EventId: "4624",targetUser:"tasos"},{EventId: "4625", targetUser: "tzonis"}
 628 |             if eventTagNode.childNodes:
 629 |                 #print(eventTagNode.childNodes) #[OK]
 630 |                 for eventTags in eventTagNode.childNodes:
 631 |                     if (eventTags.nodeName not in blackListedEventProperties):
 632 |                         for eventValues in eventTags.childNodes:
 633 |                             #print(eventTags.nodeName,eventValues.nodeValue)
 634 |                             dictionaryEvents.update({eventTags.nodeName:eventValues.nodeValue})
 635 |                 #print("-------------------------")
 636 |                 groupEvents.append(dictionaryEvents)
 637 |         
 638 |         #print(groupEvents) #[OK]
 639 |         
 640 |         print("[+] Adding the Events ...")
 641 |         with neo4jDriver.session() as session:
 642 |             print("\n")
 643 |             print("=========Time Frame=========")
 644 |             total_time = 0
 645 |             start = time.time()
 646 |             # Create Neo4j Nodes
 647 |             insertEvents = session.run(
 648 |                 "UNWIND $events as eventPros "
 649 |                
 650 |                 "CREATE (e:Event) "
 651 |                 "SET e=eventPros "
 652 |                 "SET (CASE WHEN EXISTS(e.SubjectUserName) AND NOT EXISTS(e.TargetUserName) THEN e END).hasSubjectUser='false' "
 653 |                 "SET (CASE WHEN EXISTS(e.SubjectUserName) AND EXISTS(e.TargetUserName) THEN e END).hasSubjectUser='true' "
 654 |                 "SET (CASE WHEN EXISTS(e.SubjectUserName) AND EXISTS(e.TargetName) THEN e END).hasSubjectUser='true' "
 655 |                 "SET (CASE WHEN NOT EXISTS(e.SubjectUserName) AND EXISTS(e.TargetUserName) THEN e END).hasSubjectUser='false' "
 656 |                 #Example: PowerShell Events.
 657 |                 "SET (CASE WHEN NOT EXISTS(e.SubjectUserName) AND NOT EXISTS(e.TargetUserName) THEN e END).hasSubjectUser='false' "
 658 |                 "WITH e WHERE e.targetUser IS NOT NULL " #Avoid erros when targetUser is blacklisted and it's name will be NULL.
 659 |                 
 660 |                 #"MERGE (e:Event {EventRecordIDs:eventPros.EventRecordID}) SET e=eventPros " #Avoid dublicate Events with MERGE and filtering.
 661 |                 "MERGE (r:RemoteHosts {name:e.remoteHost,remoteHostname:e.remoteHostname}) "
 662 |                 
 663 |                 "MERGE (u:TargetUser {name:e.targetUser,remoteHost:e.remoteHost,targetServer:e.targetServer,hasSubjectUser:e.hasSubjectUser,EventRecordIDs: [ ]}) "
 664 |                 "SET u.EventRecordIDs=u.EventRecordIDs+e.EventRecordID " #Append the EventRecordIDs
 665 |                 "SET u.SubjectUsernames=[ ] "
 666 |                 "SET u.bindSubjectUserSids=[ ] "
 667 |                 
 668 |                 "MERGE (t:TargetHost {name:e.targetServer}) ",events=groupEvents)
 669 |             total_time += time.time() - start
 670 |             print("[1] Neo4j insertEvents query: %f " %(total_time))             
 671 | 
 672 | ###########################################Subject Users ###############################################################
 673 | 
 674 |         with neo4jDriver.session() as session:
 675 |             total_time = 0
 676 |             start = time.time()
 677 |             # Create 'SubjectUser' Node - Initialization
 678 |             createSubjectUsers=session.run(
 679 | 
 680 |                 "MATCH (e:Event) "
 681 |                 #"WHERE EXISTS(e.SubjectUserName) AND EXISTS(e.TargetUserName) "
 682 |                 "WHERE e.hasSubjectUser='true' "
 683 |                 "WITH collect(e.SubjectUserName) as SubjectUserNames,e "
 684 |                 "UNWIND SubjectUserNames as SubjectUserName "
 685 |                 "FOREACH(p in SubjectUserName | MERGE (s:SubjectUser {name:p,SubjectUserRealName:p,TargetUsernames: [ ],EventRecordIDs: [ ],bindTargetUserSids: [ ],IsSubjectUser:'true',remoteHost:e.remoteHost,targetServer:e.targetServer,hasTargetUsernameTag:'true',hasSubjectUsernameTag:'true'}) "
 686 |                 "SET s.IsCreated='true' "
 687 |                 "SET s.IsSubjectUser='true' "
 688 |                 "SET s.CreatedByEventRecordID=e.EventRecordID) "
 689 |             
 690 |             )
 691 | 
 692 |             total_time += time.time() - start
 693 |             print("[2] Neo4j createSubjectUsersNode query: %f " %(total_time)) 
 694 | 
 695 |         
 696 |         with neo4jDriver.session() as session:    
 697 |             total_time = 0
 698 |             start = time.time()
 699 |             # Update 'SubjectUser' node.
 700 |             UpdateSubjectUsers = session.run(
 701 |                 "MATCH (e:Event),(u:TargetUser),(s:SubjectUser) "
 702 |                 "WHERE s.name=e.SubjectUserName "
 703 |                 "AND u.name=e.targetUser "
 704 |                 "AND u.remoteHost=e.remoteHost "
 705 |                 "AND u.targetServer=e.targetServer "
 706 |                 "AND s.remoteHost=u.remoteHost "
 707 |                 "AND s.targetServer=u.targetServer "
 708 |                 "AND s.remoteHost=e.remoteHost "
 709 |                 "AND s.targetServer=e.targetServer "
 710 |                 "AND EXISTS(e.SubjectUserName) AND e.SubjectUserName IS NOT NULL "
 711 |                 "AND ((EXISTS(e.TargetUserName) AND e.TargetUserName IS NOT NULL) OR (EXISTS(e.TargetName) AND e.TargetName IS NOT NULL)) "
 712 |                 "SET s.EventRecordIDs=[e.EventRecordID] " #Adding the first matched EventRecordID. On the FOREACH part is adding the rest.
 713 |                 "WITH collect(e.SubjectUserName) as subjectUsernames, e "
 714 |                 "UNWIND subjectUsernames AS subjectUsername "
 715 |                 "FOREACH(p IN subjectUsername | MERGE (b:SubjectUser {name:p,remoteHost:e.remoteHost,targetServer:e.targetServer}) "
 716 |                 "SET b.IsSubjectUser='true' "
 717 |                 "SET (CASE WHEN NOT e.EventRecordID IN b.EventRecordIDs THEN b END).EventRecordIDs=b.EventRecordIDs+e.EventRecordID "
 718 |                 "SET (CASE WHEN NOT e.targetUser IN b.TargetUsernames THEN b END).TargetUsernames=b.TargetUsernames+e.targetUser "
 719 |                 "SET (CASE WHEN NOT e.TargetUserSid IN b.bindTargetUserSids THEN b END).bindTargetUserSids=b.bindTargetUserSids+e.TargetUserSid "
 720 |                 "SET b.SubjectUserRealName=e.SubjectUserName)"
 721 |             )
 722 | 
 723 |             deleteDublicateSubjectUsers= session.run(
 724 |                 
 725 |                 "MATCH (s:SubjectUser) "
 726 |                 "WITH collect(s) as nodes,s.EventRecordIDs as evIDs,s.remoteHost as remoteHost,s.targetServer as targetServer,s.TargetUsernames as targetUserNames "
 727 |                 "WHERE s.EventRecordIDs=evIDs "
 728 |                 "AND s.remoteHost=remoteHost "
 729 |                 "AND s.targetServer=targetServer "
 730 |                 "AND s.TargetUsernames=targetUserNames "
 731 |                 "AND size(nodes)>1 "
 732 |                 "UNWIND nodes[1..] as node "
 733 |                 "DETACH DELETE node"
 734 | 
 735 |             )    
 736 |             total_time += time.time() - start
 737 |             print("[3] Neo4j updateSubjectUserNode query: %f " %(total_time))
 738 | 
 739 |         
 740 | 
 741 | ############################### Target Users ########################################## 
 742 |         with neo4jDriver.session() as session:               
 743 |             total_time = 0
 744 |             start = time.time()
 745 |             updateTargetUserNode = session.run(
 746 |                 
 747 |                 "MATCH (s:SubjectUser),(t:TargetUser) "
 748 |                 "WHERE t.hasSubjectUser='true' "
 749 |                 "WITH s.EventRecordIDs as subjectUserEventRecordIDs,t.EventRecordIDs as targetUserEventRecordIDs,t,s "
 750 |                 "UNWIND subjectUserEventRecordIDs AS subjectUserEventRecordID "
 751 |                 "FOREACH(p IN subjectUserEventRecordID | "
 752 |                 "SET (CASE WHEN subjectUserEventRecordID IN targetUserEventRecordIDs THEN t END).SubjectUsernames=s.name)"
 753 |             )
 754 |             total_time += time.time() - start
 755 |             print("[4] Neo4j updateTargetUserNode query %f: " %(total_time))
 756 | 
 757 | 
 758 | ###################################################Relationships######################################################
 759 |   
 760 |         with neo4jDriver.session() as session:
 761 |             total_time = 0
 762 |             start = time.time()
 763 |             # Check if Event node has the 'SubjectUserName'. If yes, then the relationship is:
 764 |             # IsSubjectTarget = Means that Event contains 'SubjectUserName'  property but has the same value with 'targetUsername'
 765 |             # RemoteHost -> User -> TargetUser -> EventID -> targetServer
 766 |         #    allInOnerelationship = session.run("MATCH (u:TargetUser),(u2:TargetUser),(e:Event),(r:RemoteHosts),(t:TargetHost) WHERE u.name IN u2.subjectUsernames AND e.EventRecordID IN u.EventRecordIDs AND e.EventRecordID IN u2.EventRecordIDs AND u.name = e.SubjectUserName AND u.remoteHost = r.name AND u.IsSubjectUser = 'true' AND u.IsTargetUser IS NULL AND t.name = u2.targetServer MERGE (r)-[r1:RemoteHostTOSubjectUsername]-(u)-[r2:SubjectUsernameTOTargetuser]-(u2)-[r3:TargetUserTOEventID]-(e)-[r4:EventIDTOtargetHost]->(t)") # WITH collect(r1)[1..] as rels, collect(r2)[1..] as rels2 FOREACH (r1 in rels | DELETE r1) FOREACH (r2 in rels2 | DELETE r2) 
 767 |             SubjectUserTargetUserRelationship1 = session.run(
 768 | 
 769 |                 "MATCH (r:RemoteHosts),(t:TargetUser),(s:SubjectUser),(th:TargetHost),(e:Event) "
 770 |                 "WHERE t.hasSubjectUser='true' "
 771 |                 "AND e.remoteHost=r.name "
 772 |                 "AND s.remoteHost=r.name "
 773 |                 "AND t.remoteHost=s.remoteHost "
 774 |                 "AND s.name IN t.SubjectUsernames "
 775 |                 "AND t.targetServer=s.targetServer "
 776 |                 "AND e.hasSubjectUser='true' "
 777 |                 "AND e.EventRecordID IN s.EventRecordIDs "
 778 |                 "MERGE (r)-[r1:RemoteHostTOSubjectUsername]-(s)-[r2:SubjectUsernameTOTargetuser]->(t)"
 779 |             
 780 |             )
 781 |             total_time += time.time() - start
 782 |             print("[5] Neo4j SubjectUserTargetUserRelationship1 query: %f " %(total_time))
 783 |         
 784 |         with neo4jDriver.session() as session:
 785 |             total_time = 0
 786 |             start = time.time()
 787 |             SubjectUserTargetUserRelationship2 = session.run(
 788 | 
 789 |                 "MATCH (t:TargetUser),(e:Event),(th:TargetHost) "
 790 |                 "WHERE t.hasSubjectUser='true' "
 791 |                 "AND t.targetServer=e.targetServer "
 792 |                 "AND t.remoteHost=e.remoteHost "
 793 |                 "AND e.EventRecordID IN t.EventRecordIDs "
 794 |                 "AND e.targetServer=th.name "
 795 |                 "AND e.hasSubjectUser='true' "
 796 |                 "MERGE (t)-[r3:TargetUserTOEvent]-(e)-[r4:EventIDTOtargetHost]->(th)"
 797 | 
 798 |             )
 799 |             total_time += time.time() - start
 800 |             print("[6] Neo4j SubjectUserTargetUserRelationship2 query: %f " %(total_time))
 801 | 
 802 |         with neo4jDriver.session() as session:   
 803 |             total_time = 0
 804 |             start = time.time()
 805 |             #allInOnerelationship = session.run("MATCH (t:TargetUser),(th:TargetHost),(e:Event) WHERE e.targetUser=t.TargetRealName AND t.targetServer=th.name AND e.targetServer=th.name MERGE (t)-[m1:test1]-(e)-[m2:test2]->(th)")
 806 |             #deleteDublicates_AllInOnerelationship = session.run("MATCH (r:RemoteHosts)-[r1]-(t:SubjectUser)-[r2]->(s:TargetUser) with r,t,s,type(r1) as typ, tail(collect(r1)) as coll foreach(x in coll | delete x)")
 807 |             # Create relationships only for Users that NOT contains 'SubjectUserName'
 808 |             remoteHost2DomUserRelationship=session.run(
 809 |                 
 810 |                 "MATCH (r:RemoteHosts),(u:TargetUser),(e:Event) "
 811 |                 "WHERE u.remoteHost = r.name "
 812 |                 "AND e.hasSubjectUser='false' "
 813 |                 "AND u.hasSubjectUser='false' "
 814 |                 "AND e.EventRecordID IN u.EventRecordIDs "
 815 |                 "MERGE (r)-[r5:Source2TargetUser]->(u)"
 816 |             )
 817 |             total_time += time.time() - start
 818 |             print("[7] Neo4j remoteHost2DomUserRelationship query: %f " %(total_time))
 819 | 
 820 |         with neo4jDriver.session() as session:
 821 |             total_time = 0
 822 |             start = time.time()    
 823 |             targetUser2EventRelationship = session.run(
 824 |                 
 825 |                 "MATCH (u:TargetUser),(e:Event),(t:TargetHost) "
 826 |                 "WHERE e.targetUser=u.name "
 827 |                 "AND t.name=e.targetServer "
 828 |                 "AND u.targetServer=t.name "
 829 |                 "AND e.EventRecordID IN u.EventRecordIDs "
 830 |                 "AND u.hasSubjectUser='false' "
 831 |                 "MERGE (u)-[r7:TargetUser2Event]-(e)-[r8:Event2TargetHost]->(t)"
 832 |             )
 833 | 
 834 |             total_time += time.time() - start
 835 |             print("[8] Neo4j targetUser2EventRelationship query: %f " %(total_time))
 836 | 
 837 | ############################################END###########################################################################
 838 |             
 839 |             
 840 |     except Exception as e:
 841 |         print(e)
 842 | 
 843 |     #Close the connection with Neo4j
 844 |     print("[+] All queries pushed to Neo4j successfully")
 845 |     neo4jDriver.close()
 846 |     print("[+] Connection with Neo4j is closed.")
 847 | 
 848 | 
 849 | def eventCounters(neo4jUri,neo4jUser,neo4jPass):
 850 |     neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass) #Call the function
 851 |     #Count Events
 852 |     #with neo4jDriver.session() as session:
 853 |     k=neo4jDriver.session().run("MATCH (n:Event) RETURN count(n)")
 854 |     countEvents = 0
 855 |     for x in k:
 856 |         print("[+] Added Events:"+str(x.value()))
 857 |         countEvents = int(x.value())
 858 |     #Count RemoteHosts
 859 |     #with neo4jDriver.session() as session:
 860 |     k=neo4jDriver.session().run("MATCH (n:RemoteHosts) RETURN count(n)")
 861 |     countRemHosts = 0
 862 |     for x in k:
 863 |         print ("[+] Added RemoteHosts:"+str(x.value()))
 864 |         countRemHosts = int(x.value())
 865 | 
 866 |     #Count TargetHosts
 867 |     #with neo4jDriver.session() as session:
 868 |     k=neo4jDriver.session().run("MATCH (n:TargetHost) RETURN count(n)")
 869 |     countTargetHosts = 0
 870 |     for x in k:
 871 |         print ("[+] Added TargetHosts:"+str(x.value()))
 872 |         countTargetHosts = int(x.value())
 873 | 
 874 |     #Count TargetUsers
 875 |     #with neo4jDriver.session() as session:
 876 |     k=neo4jDriver.session().run("MATCH (n:TargetUser) RETURN count(n)")
 877 |     countTargetUsers = 0
 878 |     for x in k:
 879 |         print ("[+] Added TargetUsers:"+str(x.value()))
 880 |         countTargetUsers = int(x.value())
 881 |     
 882 |     #Count SubjectUsers
 883 |     #with neo4jDriver.session() as session:
 884 |     k=neo4jDriver.session().run("MATCH (n:SubjectUser) RETURN count(n)")
 885 |     countTargetUsers = 0
 886 |     for x in k:
 887 |         print ("[+] Added SubjectUsers:"+str(x.value()))
 888 |         countSubjectUsers = int(x.value())
 889 | 
 890 |     #Count Relatioships
 891 |     #with neo4jDriver.session() as session:
 892 |     k=neo4jDriver.session().run("MATCH p=()-->() RETURN count(p)")
 893 |     countRel = 0
 894 |     for x in k:
 895 |         print ("[+] Added Relationships:"+str(x.value()))
 896 |         countRel = int(x.value())
 897 | 
 898 |     print ("[+] Total: "+str(countEvents+countRemHosts+countRel+countTargetHosts+countTargetUsers+countSubjectUsers))
 899 |     print ('[+] Finished: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now()))
 900 | 
 901 |     #Close the connection with Neo4j
 902 |     neo4jDriver.close()
 903 | 
 904 | 
 905 | if __name__ == '__main__':
 906 | 
 907 |     parser = argparse.ArgumentParser(description='Filter the Windows Events file.')
 908 |     parser.add_argument('-e','--eventID',help="EventID filtering",nargs='?',type=str, default=[])
 909 |     parser.add_argument('-ev', '--events',help='Windows Events in XML OR EVTX format.')
 910 |     parser.add_argument('-i','--uri',help='neo4j host. Example: bolt://localhost',required=True)
 911 |     parser.add_argument('-D','--delete',help='Delete all data from Neo4j.',action='store_true')
 912 |     parser.add_argument('-u','--user',help='neo4j username.',required=True)
 913 |     parser.add_argument('-p','--passwd',help='neo4j password.',required=True)
 914 |     args = parser.parse_args()
 915 |     eventIDs=args.eventID
 916 |     neo4jUri=args.uri
 917 |     neo4jUser=args.user
 918 |     neo4jPass=args.passwd
 919 |     eventsFile = args.events
 920 |     delData = args.delete
 921 |     
 922 |     outXMLFileArray=[]
 923 |     
 924 | 
 925 |     def parsingFunction(fileName,xmlDoc,outXMLfile):
 926 |         
 927 |         #Parse Windows Event XML File - Process 1
 928 |         parl=multiprocessing.Lock()
 929 |         parl.acquire()
 930 |         print("[+] Parsing file %s " % fileName)
 931 |         print ('[+] Parsing Started: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now()))
 932 |         evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList = eventParser(eventIDs,xmlDoc)
 933 |         print ('[+] Parsing Finished: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now()))
 934 |         parl.release()
 935 | 
 936 |         #Create neo4j XML - Process 2
 937 |         nl = multiprocessing.Lock()
 938 |         nl.acquire()
 939 |         cnodes = Process(target=createXML, args=(evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList,outXMLfile))
 940 |         cnodes.start()
 941 |         cnodes.join()
 942 |         nl.release()
 943 | 
 944 |         #Read neo4j XML - Process 3
 945 |         ml = multiprocessing.Lock()
 946 |         ml.acquire()
 947 |         mnodes = Process(target=neo4jXML,args=(str(outXMLfile),neo4jUri,neo4jUser,neo4jPass))
 948 |         print("[+] Loading neo4j XML ...")
 949 |         mnodes.start()
 950 |         mnodes.join()
 951 |         ml.release()
 952 |     
 953 |     def generateOutXMLFileRandomName(providedPathFile):
 954 |         
 955 |         #Output directory of parsing file. It will be on same path with the running Python script.
 956 |         #Get the directory of the files that are listing under provided path.
 957 |         #cwd = os.path.dirname(providedPathFile)
 958 |         
 959 |         if os.path.isfile(providedPathFile) and providedPathFile.endswith((".xml",".evtx")):
 960 |             # Directory of the file
 961 |             cwd = os.path.dirname(providedPathFile)
 962 |             #Create an XML file with random number
 963 |             randomName = str(uuid.uuid4()) + ".xml"
 964 |             #Generates a random number that will be used on later steps.
 965 |             outXMLFile = cwd + "\\" + randomName
 966 |         else:
 967 |             #Is Directory
 968 |             cwd = Path(providedPathFile)
 969 |             #Create an XML file with random number
 970 |             randomName = str(uuid.uuid4()) + ".xml"
 971 |             #Generates a random number that will be used on later steps.
 972 |             outXMLFile = cwd / randomName
 973 | 
 974 |         return outXMLFile
 975 | 
 976 | 
 977 |     if(delData):
 978 |         neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass)
 979 |         print("[+] Connecting with neo4j ...")
 980 |         print("[+] Deleting all the data ...")
 981 |         with neo4jDriver.session() as session:
 982 |             delAll=session.run("MATCH (n) DETACH DELETE n")
 983 |         #Close the connection with Neo4j
 984 |         neo4jDriver.close()
 985 | 
 986 |     else:
 987 |         try:
 988 |             
 989 |             # Check first if the user provided PATH or FILE and if it is exist.
 990 |             
 991 |             if os.path.isdir(eventsFile):               
 992 |                 
 993 |                 #Enumerate the files under the specified directory.
 994 |                 eventsFolder = Path(eventsFile)
 995 |                 
 996 |                 dirFiles = os.listdir(eventsFolder)
 997 | 
 998 |                 for file in dirFiles:
 999 |                     
1000 |                     fileFullPath = eventsFolder / file
1001 |                     if os.path.isfile(fileFullPath) and file.endswith('.evtx'):
1002 |                         
1003 |                         #Get the file which all the events will be imported befored moved to neo4j.
1004 |                         # It just an empty file which will be filled in with Events
1005 |                         #print(fileFullPath) #OK
1006 |                         outXMLFile = generateOutXMLFileRandomName(eventsFolder)
1007 |                         
1008 |                         # Read the contents of the EVTX file.
1009 |                         evtxDoc = get_events(fileFullPath)
1010 | 
1011 |                         # Create an XML file with the same name as EVTX
1012 |                         #evtx2xml = str(file).replace(".evtx", ".xml")
1013 |                         evtx2xml = str(fileFullPath).replace(".evtx", ".xml")
1014 |                         print ('[+] Started: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now()))
1015 |                         print("[+] I'm fixing the fualty chars, I need sometime for that ...")
1016 |                         print("\n")
1017 |                         f = open(evtx2xml, "w")
1018 |                         f.write("<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>")
1019 |                         f.write("\n")
1020 |                         f.write("<Events>")
1021 |                         for x in evtxDoc:
1022 |                             #discard the unicode chars
1023 |                             if re.findall('&#\d+;',str(x)): 
1024 |                                 f.write(re.sub(r'&#\d+;',r'',x))
1025 |                             else:
1026 |                                 f.write(x) 
1027 |                         f.write("</Events>")    
1028 |                         f.close()
1029 |                         rootDoc = minidom.parse(evtx2xml).documentElement
1030 |                         print("\n")
1031 | 
1032 |                         rootDoc = minidom.parse(evtx2xml).documentElement
1033 |                         parsingFunction(evtx2xml,rootDoc,outXMLFile)
1034 |                         print("\n")
1035 | 
1036 |                         # Remove temp files
1037 |                         os.remove(outXMLFile)
1038 |                         os.remove(evtx2xml)
1039 |                         #os.remove(file)
1040 | 
1041 |                     if os.path.isfile(fileFullPath) and file.endswith('.xml') and not file.endswith('_fixed.xml'):
1042 |                         #Get the file which all the events will be imported befored moved to neo4j.
1043 |                         # It just an empty file which will be filled in with Events
1044 |                         #print(fileFullPath) #OK
1045 |                         outXMLFile = generateOutXMLFileRandomName(eventsFolder)                     
1046 |                         
1047 |                         #Open exported XML and remove those chars - Step 1
1048 |                         openXMLread=open(fileFullPath,"r",encoding="utf-8")
1049 |                         fixChars=re.sub(r"ï»¿", r"", openXMLread.read()) #When Events exported from Windows Event Viewer has those bad chars inside the XML.
1050 |                         fixChars=re.sub(r'&#\d+;',r'',fixChars) # Clean the Unicode chars.
1051 |                         # https://stackoverflow.com/questions/51710082/what-does-unicodedata-normalize-do-in-python
1052 |                         # https://godatadriven.com/blog/handling-encoding-issues-with-unicode-normalisation-in-python/
1053 |                         fixChars=unicodedata.normalize("NFKD", fixChars).encode('WINDOWS-1252', 'ignore').decode('utf-8')
1054 |                         openXMLread.close()
1055 | 
1056 |                         #Write again the XML without those chars -Step 2
1057 |                         file=str(fileFullPath).replace(".xml","_fixed.xml")
1058 |                         openXMLwrite=open(file,"w")
1059 |                         openXMLwrite.write(fixChars)
1060 |                         openXMLwrite.close()
1061 | 
1062 |                         rootDoc = minidom.parse(file).documentElement #Open exported XML file.
1063 |                         
1064 |                         parsingFunction(file,rootDoc,outXMLFile)
1065 |                         print("\n")
1066 | 
1067 |                         # Remove temp files
1068 |                         os.remove(outXMLFile)
1069 |                         os.remove(file)
1070 |                     
1071 |             # User provided a file and not a directory.
1072 |             else:
1073 |                 # Get the file name from -ev flag
1074 |                 file = eventsFile
1075 |                 # Get directory of the EVTX file.
1076 |                 #cwd = os.path.dirname(file)
1077 |                 if file.endswith('.evtx'):
1078 |                     
1079 |                     #Get the file which all the events will be imported befored moved to neo4j
1080 |                     outXMLFile = generateOutXMLFileRandomName(file)
1081 |                     # Read the contents of the EVTX file.
1082 |                     evtxDoc = get_events(file)
1083 |                     
1084 |                     # Create an XML file with the same name as EVTX
1085 |                     evtx2xml = str(file).replace(".evtx", ".xml")
1086 |                     print ('[+] Started: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now()))
1087 |                     print("[+] I'm fixing the fualty chars, I need sometime for that ...")
1088 |                     print("\n")
1089 |                     f = open(evtx2xml, "w")
1090 |                     f.write("<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>")
1091 |                     f.write("\n")
1092 |                     f.write("<Events>")
1093 |                     for x in evtxDoc:
1094 |                         #discard the unicode chars
1095 |                         if re.findall('&#\d+;',str(x)): 
1096 |                             f.write(re.sub(r'&#\d+;',r'',x))
1097 |                         else:
1098 |                             f.write(x) 
1099 |                     f.write("</Events>")    
1100 |                     f.close()
1101 |                     
1102 |                     rootDoc = minidom.parse(evtx2xml).documentElement
1103 |                     parsingFunction(evtx2xml,rootDoc,outXMLFile)
1104 |                     print("\n")
1105 | 
1106 |                     # Remove temp files
1107 |                     os.remove(outXMLFile)
1108 |                     os.remove(evtx2xml)
1109 |                     #os.remove(file) 
1110 |                 
1111 |                 elif file.endswith('.xml'):
1112 |                     
1113 |                     # Get the file which all the events will be imported befored moved to neo4j.
1114 |                     # It just an empty file which will be filled in with Events
1115 |                     outXMLFile = generateOutXMLFileRandomName(file)
1116 |                     #outXMLFileArray.append(outXMLFile)
1117 |                     
1118 |                     #Open exported XML and remove those chars
1119 |                     openXMLread=open(file,"r",encoding="utf-8")
1120 |                     fixChars=re.sub(r"ï»¿", r"", openXMLread.read()) #When Events exported from Windows Event Viewer has those bad chars inside the XML.
1121 |                     fixChars=re.sub(r'&#\d+;',r'',fixChars)
1122 |                     # https://stackoverflow.com/questions/51710082/what-does-unicodedata-normalize-do-in-python
1123 |                     # https://godatadriven.com/blog/handling-encoding-issues-with-unicode-normalisation-in-python/
1124 |                     fixChars=unicodedata.normalize("NFKD", fixChars).encode('WINDOWS-1252', 'ignore').decode('UTF-8')
1125 |                     openXMLread.close()
1126 |                     
1127 |                     #Write again the XML without those chars
1128 |                     file=file.replace(".xml","_fixed.xml")
1129 |                     openXMLwrite=open(file,"w")
1130 |                     openXMLwrite.write(fixChars)
1131 |                     openXMLwrite.close()
1132 |                     rootDoc = minidom.parse(file).documentElement #Open exported XML file.
1133 |                     parsingFunction(file,rootDoc,outXMLFile)
1134 |                     print("\n")
1135 | 
1136 |                     # Remove temp files
1137 |                     os.remove(outXMLFile)
1138 |                     os.remove(file)
1139 |                 
1140 |                 else:
1141 |                     print("[!] Provide an XML or EVTX file! ")
1142 |                     
1143 |         except Exception as e:
1144 |             print(e)
1145 |             #print("[-] Can't find the XML file or XML is not in the right format. Use -x/--xml to provide the Windows Event XML file.")
1146 |             sys.exit(1)
1147 | 
1148 |     
1149 |     #Print Counters - Process 4
1150 |     print("\n")
1151 |     print("========= Database Information ==========")
1152 |     cc=multiprocessing.Lock()
1153 |     cc.acquire()
1154 |     ccounters=Process(target=eventCounters,args=(neo4jUri,neo4jUser,neo4jPass))
1155 |     ccounters.start()
1156 |     #p=eventCounters()
1157 |     ccounters.join()
1158 |     cc.release()
1159 | 
1160 | 
1161 | 


--------------------------------------------------------------------------------
/images/EpimitheusNeo4j.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/images/EpimitheusNeo4j.png


--------------------------------------------------------------------------------
/images/addEventIDs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/images/addEventIDs.png


--------------------------------------------------------------------------------
/images/blackListedObjects.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/images/blackListedObjects.png


--------------------------------------------------------------------------------
/images/filename.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/images/windowsDefender.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/images/windowsDefender.png


--------------------------------------------------------------------------------
/minidom/README.md:
--------------------------------------------------------------------------------
 1 | #### Minidom failed to decode hex chars like \xb5
 2 | 
 3 | If you use wevtutil or any other tool to export the Windows Events in xml form, may be these tools will fail to decode chars like "\xb5"=μ (e.g. μTorrent). As a result this oversight, minidom will also fail to parse yours xml file.
 4 | 
 5 | ##### The portion of the code that it has this problematic behavior is the file "Lib/xml/dom/expatbuilder.py" in line 910.
 6 | 
 7 | ![alt text](https://github.com/tasox/Epimitheus/blob/master/minidom/expatbuilderUnFixed.png)
 8 | 
 9 | 
10 | #### Fixed
11 | ![alt text](https://github.com/tasox/Epimitheus/blob/master/minidom/expatbuilderFixed.png)
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/minidom/expatbuilder.py:
--------------------------------------------------------------------------------
  1 | """Facility to use the Expat parser to load a minidom instance
  2 | from a string or file.
  3 | 
  4 | This avoids all the overhead of SAX and pulldom to gain performance.
  5 | """
  6 | 
  7 | # Warning!
  8 | #
  9 | # This module is tightly bound to the implementation details of the
 10 | # minidom DOM and can't be used with other DOM implementations.  This
 11 | # is due, in part, to a lack of appropriate methods in the DOM (there is
 12 | # no way to create Entity and Notation nodes via the DOM Level 2
 13 | # interface), and for performance.  The latter is the cause of some fairly
 14 | # cryptic code.
 15 | #
 16 | # Performance hacks:
 17 | #
 18 | #   -  .character_data_handler() has an extra case in which continuing
 19 | #      data is appended to an existing Text node; this can be a
 20 | #      speedup since pyexpat can break up character data into multiple
 21 | #      callbacks even though we set the buffer_text attribute on the
 22 | #      parser.  This also gives us the advantage that we don't need a
 23 | #      separate normalization pass.
 24 | #
 25 | #   -  Determining that a node exists is done using an identity comparison
 26 | #      with None rather than a truth test; this avoids searching for and
 27 | #      calling any methods on the node object if it exists.  (A rather
 28 | #      nice speedup is achieved this way as well!)
 29 | 
 30 | from xml.dom import xmlbuilder, minidom, Node
 31 | from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE
 32 | from xml.parsers import expat
 33 | from xml.dom.minidom import _append_child, _set_attribute_node
 34 | from xml.dom.NodeFilter import NodeFilter
 35 | 
 36 | TEXT_NODE = Node.TEXT_NODE
 37 | CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE
 38 | DOCUMENT_NODE = Node.DOCUMENT_NODE
 39 | 
 40 | FILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT
 41 | FILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT
 42 | FILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP
 43 | FILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT
 44 | 
 45 | theDOMImplementation = minidom.getDOMImplementation()
 46 | 
 47 | # Expat typename -> TypeInfo
 48 | _typeinfo_map = {
 49 |     "CDATA":    minidom.TypeInfo(None, "cdata"),
 50 |     "ENUM":     minidom.TypeInfo(None, "enumeration"),
 51 |     "ENTITY":   minidom.TypeInfo(None, "entity"),
 52 |     "ENTITIES": minidom.TypeInfo(None, "entities"),
 53 |     "ID":       minidom.TypeInfo(None, "id"),
 54 |     "IDREF":    minidom.TypeInfo(None, "idref"),
 55 |     "IDREFS":   minidom.TypeInfo(None, "idrefs"),
 56 |     "NMTOKEN":  minidom.TypeInfo(None, "nmtoken"),
 57 |     "NMTOKENS": minidom.TypeInfo(None, "nmtokens"),
 58 |     }
 59 | 
 60 | class ElementInfo(object):
 61 |     __slots__ = '_attr_info', '_model', 'tagName'
 62 | 
 63 |     def __init__(self, tagName, model=None):
 64 |         self.tagName = tagName
 65 |         self._attr_info = []
 66 |         self._model = model
 67 | 
 68 |     def __getstate__(self):
 69 |         return self._attr_info, self._model, self.tagName
 70 | 
 71 |     def __setstate__(self, state):
 72 |         self._attr_info, self._model, self.tagName = state
 73 | 
 74 |     def getAttributeType(self, aname):
 75 |         for info in self._attr_info:
 76 |             if info[1] == aname:
 77 |                 t = info[-2]
 78 |                 if t[0] == "(":
 79 |                     return _typeinfo_map["ENUM"]
 80 |                 else:
 81 |                     return _typeinfo_map[info[-2]]
 82 |         return minidom._no_type
 83 | 
 84 |     def getAttributeTypeNS(self, namespaceURI, localName):
 85 |         return minidom._no_type
 86 | 
 87 |     def isElementContent(self):
 88 |         if self._model:
 89 |             type = self._model[0]
 90 |             return type not in (expat.model.XML_CTYPE_ANY,
 91 |                                 expat.model.XML_CTYPE_MIXED)
 92 |         else:
 93 |             return False
 94 | 
 95 |     def isEmpty(self):
 96 |         if self._model:
 97 |             return self._model[0] == expat.model.XML_CTYPE_EMPTY
 98 |         else:
 99 |             return False
100 | 
101 |     def isId(self, aname):
102 |         for info in self._attr_info:
103 |             if info[1] == aname:
104 |                 return info[-2] == "ID"
105 |         return False
106 | 
107 |     def isIdNS(self, euri, ename, auri, aname):
108 |         # not sure this is meaningful
109 |         return self.isId((auri, aname))
110 | 
111 | def _intern(builder, s):
112 |     return builder._intern_setdefault(s, s)
113 | 
114 | def _parse_ns_name(builder, name):
115 |     assert ' ' in name
116 |     parts = name.split(' ')
117 |     intern = builder._intern_setdefault
118 |     if len(parts) == 3:
119 |         uri, localname, prefix = parts
120 |         prefix = intern(prefix, prefix)
121 |         qname = "%s:%s" % (prefix, localname)
122 |         qname = intern(qname, qname)
123 |         localname = intern(localname, localname)
124 |     elif len(parts) == 2:
125 |         uri, localname = parts
126 |         prefix = EMPTY_PREFIX
127 |         qname = localname = intern(localname, localname)
128 |     else:
129 |         raise ValueError("Unsupported syntax: spaces in URIs not supported: %r" % name)
130 |     return intern(uri, uri), localname, prefix, qname
131 | 
132 | 
133 | class ExpatBuilder:
134 |     """Document builder that uses Expat to build a ParsedXML.DOM document
135 |     instance."""
136 | 
137 |     def __init__(self, options=None):
138 |         if options is None:
139 |             options = xmlbuilder.Options()
140 |         self._options = options
141 |         if self._options.filter is not None:
142 |             self._filter = FilterVisibilityController(self._options.filter)
143 |         else:
144 |             self._filter = None
145 |             # This *really* doesn't do anything in this case, so
146 |             # override it with something fast & minimal.
147 |             self._finish_start_element = id
148 |         self._parser = None
149 |         self.reset()
150 | 
151 |     def createParser(self):
152 |         """Create a new parser object."""
153 |         return expat.ParserCreate()
154 | 
155 |     def getParser(self):
156 |         """Return the parser object, creating a new one if needed."""
157 |         if not self._parser:
158 |             self._parser = self.createParser()
159 |             self._intern_setdefault = self._parser.intern.setdefault
160 |             self._parser.buffer_text = True
161 |             self._parser.ordered_attributes = True
162 |             self._parser.specified_attributes = True
163 |             self.install(self._parser)
164 |         return self._parser
165 | 
166 |     def reset(self):
167 |         """Free all data structures used during DOM construction."""
168 |         self.document = theDOMImplementation.createDocument(
169 |             EMPTY_NAMESPACE, None, None)
170 |         self.curNode = self.document
171 |         self._elem_info = self.document._elem_info
172 |         self._cdata = False
173 | 
174 |     def install(self, parser):
175 |         """Install the callbacks needed to build the DOM into the parser."""
176 |         # This creates circular references!
177 |         parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
178 |         parser.StartElementHandler = self.first_element_handler
179 |         parser.EndElementHandler = self.end_element_handler
180 |         parser.ProcessingInstructionHandler = self.pi_handler
181 |         if self._options.entities:
182 |             parser.EntityDeclHandler = self.entity_decl_handler
183 |         parser.NotationDeclHandler = self.notation_decl_handler
184 |         if self._options.comments:
185 |             parser.CommentHandler = self.comment_handler
186 |         if self._options.cdata_sections:
187 |             parser.StartCdataSectionHandler = self.start_cdata_section_handler
188 |             parser.EndCdataSectionHandler = self.end_cdata_section_handler
189 |             parser.CharacterDataHandler = self.character_data_handler_cdata
190 |         else:
191 |             parser.CharacterDataHandler = self.character_data_handler
192 |         parser.ExternalEntityRefHandler = self.external_entity_ref_handler
193 |         parser.XmlDeclHandler = self.xml_decl_handler
194 |         parser.ElementDeclHandler = self.element_decl_handler
195 |         parser.AttlistDeclHandler = self.attlist_decl_handler
196 | 
197 |     def parseFile(self, file):
198 |         """Parse a document from a file object, returning the document
199 |         node."""
200 |         parser = self.getParser()
201 |         first_buffer = True
202 |         try:
203 |             while 1:
204 |                 buffer = file.read(16*1024)
205 |                 if not buffer:
206 |                     break
207 |                 parser.Parse(buffer, 0)
208 |                 if first_buffer and self.document.documentElement:
209 |                     self._setup_subset(buffer)
210 |                 first_buffer = False
211 |             parser.Parse("", True)
212 |         except ParseEscape:
213 |             pass
214 |         doc = self.document
215 |         self.reset()
216 |         self._parser = None
217 |         return doc
218 | 
219 |     def parseString(self, string):
220 |         """Parse a document from a string, returning the document node."""
221 |         parser = self.getParser()
222 |         try:
223 |             parser.Parse(string, True)
224 |             self._setup_subset(string)
225 |         except ParseEscape:
226 |             pass
227 |         doc = self.document
228 |         self.reset()
229 |         self._parser = None
230 |         return doc
231 | 
232 |     def _setup_subset(self, buffer):
233 |         """Load the internal subset if there might be one."""
234 |         if self.document.doctype:
235 |             extractor = InternalSubsetExtractor()
236 |             extractor.parseString(buffer)
237 |             subset = extractor.getSubset()
238 |             self.document.doctype.internalSubset = subset
239 | 
240 |     def start_doctype_decl_handler(self, doctypeName, systemId, publicId,
241 |                                    has_internal_subset):
242 |         doctype = self.document.implementation.createDocumentType(
243 |             doctypeName, publicId, systemId)
244 |         doctype.ownerDocument = self.document
245 |         _append_child(self.document, doctype)
246 |         self.document.doctype = doctype
247 |         if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT:
248 |             self.document.doctype = None
249 |             del self.document.childNodes[-1]
250 |             doctype = None
251 |             self._parser.EntityDeclHandler = None
252 |             self._parser.NotationDeclHandler = None
253 |         if has_internal_subset:
254 |             if doctype is not None:
255 |                 doctype.entities._seq = []
256 |                 doctype.notations._seq = []
257 |             self._parser.CommentHandler = None
258 |             self._parser.ProcessingInstructionHandler = None
259 |             self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
260 | 
261 |     def end_doctype_decl_handler(self):
262 |         if self._options.comments:
263 |             self._parser.CommentHandler = self.comment_handler
264 |         self._parser.ProcessingInstructionHandler = self.pi_handler
265 |         if not (self._elem_info or self._filter):
266 |             self._finish_end_element = id
267 | 
268 |     def pi_handler(self, target, data):
269 |         node = self.document.createProcessingInstruction(target, data)
270 |         _append_child(self.curNode, node)
271 |         if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
272 |             self.curNode.removeChild(node)
273 | 
274 |     def character_data_handler_cdata(self, data):
275 |         childNodes = self.curNode.childNodes
276 |         if self._cdata:
277 |             if (  self._cdata_continue
278 |                   and childNodes[-1].nodeType == CDATA_SECTION_NODE):
279 |                 childNodes[-1].appendData(data)
280 |                 return
281 |             node = self.document.createCDATASection(data)
282 |             self._cdata_continue = True
283 |         elif childNodes and childNodes[-1].nodeType == TEXT_NODE:
284 |             node = childNodes[-1]
285 |             value = node.data + data
286 |             node.data = value
287 |             return
288 |         else:
289 |             node = minidom.Text()
290 |             node.data = data
291 |             node.ownerDocument = self.document
292 |         _append_child(self.curNode, node)
293 | 
294 |     def character_data_handler(self, data):
295 |         childNodes = self.curNode.childNodes
296 |         if childNodes and childNodes[-1].nodeType == TEXT_NODE:
297 |             node = childNodes[-1]
298 |             node.data = node.data + data
299 |             return
300 |         node = minidom.Text()
301 |         node.data = node.data + data
302 |         node.ownerDocument = self.document
303 |         _append_child(self.curNode, node)
304 | 
305 |     def entity_decl_handler(self, entityName, is_parameter_entity, value,
306 |                             base, systemId, publicId, notationName):
307 |         if is_parameter_entity:
308 |             # we don't care about parameter entities for the DOM
309 |             return
310 |         if not self._options.entities:
311 |             return
312 |         node = self.document._create_entity(entityName, publicId,
313 |                                             systemId, notationName)
314 |         if value is not None:
315 |             # internal entity
316 |             # node *should* be readonly, but we'll cheat
317 |             child = self.document.createTextNode(value)
318 |             node.childNodes.append(child)
319 |         self.document.doctype.entities._seq.append(node)
320 |         if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
321 |             del self.document.doctype.entities._seq[-1]
322 | 
323 |     def notation_decl_handler(self, notationName, base, systemId, publicId):
324 |         node = self.document._create_notation(notationName, publicId, systemId)
325 |         self.document.doctype.notations._seq.append(node)
326 |         if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT:
327 |             del self.document.doctype.notations._seq[-1]
328 | 
329 |     def comment_handler(self, data):
330 |         node = self.document.createComment(data)
331 |         _append_child(self.curNode, node)
332 |         if self._filter and self._filter.acceptNode(node) == FILTER_REJECT:
333 |             self.curNode.removeChild(node)
334 | 
335 |     def start_cdata_section_handler(self):
336 |         self._cdata = True
337 |         self._cdata_continue = False
338 | 
339 |     def end_cdata_section_handler(self):
340 |         self._cdata = False
341 |         self._cdata_continue = False
342 | 
343 |     def external_entity_ref_handler(self, context, base, systemId, publicId):
344 |         return 1
345 | 
346 |     def first_element_handler(self, name, attributes):
347 |         if self._filter is None and not self._elem_info:
348 |             self._finish_end_element = id
349 |         self.getParser().StartElementHandler = self.start_element_handler
350 |         self.start_element_handler(name, attributes)
351 | 
352 |     def start_element_handler(self, name, attributes):
353 |         node = self.document.createElement(name)
354 |         _append_child(self.curNode, node)
355 |         self.curNode = node
356 | 
357 |         if attributes:
358 |             for i in range(0, len(attributes), 2):
359 |                 a = minidom.Attr(attributes[i], EMPTY_NAMESPACE,
360 |                                  None, EMPTY_PREFIX)
361 |                 value = attributes[i+1]
362 |                 a.value = value
363 |                 a.ownerDocument = self.document
364 |                 _set_attribute_node(node, a)
365 | 
366 |         if node is not self.document.documentElement:
367 |             self._finish_start_element(node)
368 | 
369 |     def _finish_start_element(self, node):
370 |         if self._filter:
371 |             # To be general, we'd have to call isSameNode(), but this
372 |             # is sufficient for minidom:
373 |             if node is self.document.documentElement:
374 |                 return
375 |             filt = self._filter.startContainer(node)
376 |             if filt == FILTER_REJECT:
377 |                 # ignore this node & all descendents
378 |                 Rejecter(self)
379 |             elif filt == FILTER_SKIP:
380 |                 # ignore this node, but make it's children become
381 |                 # children of the parent node
382 |                 Skipper(self)
383 |             else:
384 |                 return
385 |             self.curNode = node.parentNode
386 |             node.parentNode.removeChild(node)
387 |             node.unlink()
388 | 
389 |     # If this ever changes, Namespaces.end_element_handler() needs to
390 |     # be changed to match.
391 |     #
392 |     def end_element_handler(self, name):
393 |         curNode = self.curNode
394 |         self.curNode = curNode.parentNode
395 |         self._finish_end_element(curNode)
396 | 
397 |     def _finish_end_element(self, curNode):
398 |         info = self._elem_info.get(curNode.tagName)
399 |         if info:
400 |             self._handle_white_text_nodes(curNode, info)
401 |         if self._filter:
402 |             if curNode is self.document.documentElement:
403 |                 return
404 |             if self._filter.acceptNode(curNode) == FILTER_REJECT:
405 |                 self.curNode.removeChild(curNode)
406 |                 curNode.unlink()
407 | 
408 |     def _handle_white_text_nodes(self, node, info):
409 |         if (self._options.whitespace_in_element_content
410 |             or not info.isElementContent()):
411 |             return
412 | 
413 |         # We have element type information and should remove ignorable
414 |         # whitespace; identify for text nodes which contain only
415 |         # whitespace.
416 |         L = []
417 |         for child in node.childNodes:
418 |             if child.nodeType == TEXT_NODE and not child.data.strip():
419 |                 L.append(child)
420 | 
421 |         # Remove ignorable whitespace from the tree.
422 |         for child in L:
423 |             node.removeChild(child)
424 | 
425 |     def element_decl_handler(self, name, model):
426 |         info = self._elem_info.get(name)
427 |         if info is None:
428 |             self._elem_info[name] = ElementInfo(name, model)
429 |         else:
430 |             assert info._model is None
431 |             info._model = model
432 | 
433 |     def attlist_decl_handler(self, elem, name, type, default, required):
434 |         info = self._elem_info.get(elem)
435 |         if info is None:
436 |             info = ElementInfo(elem)
437 |             self._elem_info[elem] = info
438 |         info._attr_info.append(
439 |             [None, name, None, None, default, 0, type, required])
440 | 
441 |     def xml_decl_handler(self, version, encoding, standalone):
442 |         self.document.version = version
443 |         self.document.encoding = encoding
444 |         # This is still a little ugly, thanks to the pyexpat API. ;-(
445 |         if standalone >= 0:
446 |             if standalone:
447 |                 self.document.standalone = True
448 |             else:
449 |                 self.document.standalone = False
450 | 
451 | 
452 | # Don't include FILTER_INTERRUPT, since that's checked separately
453 | # where allowed.
454 | _ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP)
455 | 
456 | class FilterVisibilityController(object):
457 |     """Wrapper around a DOMBuilderFilter which implements the checks
458 |     to make the whatToShow filter attribute work."""
459 | 
460 |     __slots__ = 'filter',
461 | 
462 |     def __init__(self, filter):
463 |         self.filter = filter
464 | 
465 |     def startContainer(self, node):
466 |         mask = self._nodetype_mask[node.nodeType]
467 |         if self.filter.whatToShow & mask:
468 |             val = self.filter.startContainer(node)
469 |             if val == FILTER_INTERRUPT:
470 |                 raise ParseEscape
471 |             if val not in _ALLOWED_FILTER_RETURNS:
472 |                 raise ValueError(
473 |                       "startContainer() returned illegal value: " + repr(val))
474 |             return val
475 |         else:
476 |             return FILTER_ACCEPT
477 | 
478 |     def acceptNode(self, node):
479 |         mask = self._nodetype_mask[node.nodeType]
480 |         if self.filter.whatToShow & mask:
481 |             val = self.filter.acceptNode(node)
482 |             if val == FILTER_INTERRUPT:
483 |                 raise ParseEscape
484 |             if val == FILTER_SKIP:
485 |                 # move all child nodes to the parent, and remove this node
486 |                 parent = node.parentNode
487 |                 for child in node.childNodes[:]:
488 |                     parent.appendChild(child)
489 |                 # node is handled by the caller
490 |                 return FILTER_REJECT
491 |             if val not in _ALLOWED_FILTER_RETURNS:
492 |                 raise ValueError(
493 |                       "acceptNode() returned illegal value: " + repr(val))
494 |             return val
495 |         else:
496 |             return FILTER_ACCEPT
497 | 
498 |     _nodetype_mask = {
499 |         Node.ELEMENT_NODE:                NodeFilter.SHOW_ELEMENT,
500 |         Node.ATTRIBUTE_NODE:              NodeFilter.SHOW_ATTRIBUTE,
501 |         Node.TEXT_NODE:                   NodeFilter.SHOW_TEXT,
502 |         Node.CDATA_SECTION_NODE:          NodeFilter.SHOW_CDATA_SECTION,
503 |         Node.ENTITY_REFERENCE_NODE:       NodeFilter.SHOW_ENTITY_REFERENCE,
504 |         Node.ENTITY_NODE:                 NodeFilter.SHOW_ENTITY,
505 |         Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION,
506 |         Node.COMMENT_NODE:                NodeFilter.SHOW_COMMENT,
507 |         Node.DOCUMENT_NODE:               NodeFilter.SHOW_DOCUMENT,
508 |         Node.DOCUMENT_TYPE_NODE:          NodeFilter.SHOW_DOCUMENT_TYPE,
509 |         Node.DOCUMENT_FRAGMENT_NODE:      NodeFilter.SHOW_DOCUMENT_FRAGMENT,
510 |         Node.NOTATION_NODE:               NodeFilter.SHOW_NOTATION,
511 |         }
512 | 
513 | 
514 | class FilterCrutch(object):
515 |     __slots__ = '_builder', '_level', '_old_start', '_old_end'
516 | 
517 |     def __init__(self, builder):
518 |         self._level = 0
519 |         self._builder = builder
520 |         parser = builder._parser
521 |         self._old_start = parser.StartElementHandler
522 |         self._old_end = parser.EndElementHandler
523 |         parser.StartElementHandler = self.start_element_handler
524 |         parser.EndElementHandler = self.end_element_handler
525 | 
526 | class Rejecter(FilterCrutch):
527 |     __slots__ = ()
528 | 
529 |     def __init__(self, builder):
530 |         FilterCrutch.__init__(self, builder)
531 |         parser = builder._parser
532 |         for name in ("ProcessingInstructionHandler",
533 |                      "CommentHandler",
534 |                      "CharacterDataHandler",
535 |                      "StartCdataSectionHandler",
536 |                      "EndCdataSectionHandler",
537 |                      "ExternalEntityRefHandler",
538 |                      ):
539 |             setattr(parser, name, None)
540 | 
541 |     def start_element_handler(self, *args):
542 |         self._level = self._level + 1
543 | 
544 |     def end_element_handler(self, *args):
545 |         if self._level == 0:
546 |             # restore the old handlers
547 |             parser = self._builder._parser
548 |             self._builder.install(parser)
549 |             parser.StartElementHandler = self._old_start
550 |             parser.EndElementHandler = self._old_end
551 |         else:
552 |             self._level = self._level - 1
553 | 
554 | class Skipper(FilterCrutch):
555 |     __slots__ = ()
556 | 
557 |     def start_element_handler(self, *args):
558 |         node = self._builder.curNode
559 |         self._old_start(*args)
560 |         if self._builder.curNode is not node:
561 |             self._level = self._level + 1
562 | 
563 |     def end_element_handler(self, *args):
564 |         if self._level == 0:
565 |             # We're popping back out of the node we're skipping, so we
566 |             # shouldn't need to do anything but reset the handlers.
567 |             self._builder._parser.StartElementHandler = self._old_start
568 |             self._builder._parser.EndElementHandler = self._old_end
569 |             self._builder = None
570 |         else:
571 |             self._level = self._level - 1
572 |             self._old_end(*args)
573 | 
574 | 
575 | # framework document used by the fragment builder.
576 | # Takes a string for the doctype, subset string, and namespace attrs string.
577 | 
578 | _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \
579 |     "http://xml.python.org/entities/fragment-builder/internal"
580 | 
581 | _FRAGMENT_BUILDER_TEMPLATE = (
582 |     '''\
583 | <!DOCTYPE wrapper
584 |   %%s [
585 |   <!ENTITY fragment-builder-internal
586 |     SYSTEM "%s">
587 | %%s
588 | ]>
589 | <wrapper %%s
590 | >&fragment-builder-internal;</wrapper>'''
591 |     % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID)
592 | 
593 | 
594 | class FragmentBuilder(ExpatBuilder):
595 |     """Builder which constructs document fragments given XML source
596 |     text and a context node.
597 | 
598 |     The context node is expected to provide information about the
599 |     namespace declarations which are in scope at the start of the
600 |     fragment.
601 |     """
602 | 
603 |     def __init__(self, context, options=None):
604 |         if context.nodeType == DOCUMENT_NODE:
605 |             self.originalDocument = context
606 |             self.context = context
607 |         else:
608 |             self.originalDocument = context.ownerDocument
609 |             self.context = context
610 |         ExpatBuilder.__init__(self, options)
611 | 
612 |     def reset(self):
613 |         ExpatBuilder.reset(self)
614 |         self.fragment = None
615 | 
616 |     def parseFile(self, file):
617 |         """Parse a document fragment from a file object, returning the
618 |         fragment node."""
619 |         return self.parseString(file.read())
620 | 
621 |     def parseString(self, string):
622 |         """Parse a document fragment from a string, returning the
623 |         fragment node."""
624 |         self._source = string
625 |         parser = self.getParser()
626 |         doctype = self.originalDocument.doctype
627 |         ident = ""
628 |         if doctype:
629 |             subset = doctype.internalSubset or self._getDeclarations()
630 |             if doctype.publicId:
631 |                 ident = ('PUBLIC "%s" "%s"'
632 |                          % (doctype.publicId, doctype.systemId))
633 |             elif doctype.systemId:
634 |                 ident = 'SYSTEM "%s"' % doctype.systemId
635 |         else:
636 |             subset = ""
637 |         nsattrs = self._getNSattrs() # get ns decls from node's ancestors
638 |         document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs)
639 |         try:
640 |             parser.Parse(document, 1)
641 |         except:
642 |             self.reset()
643 |             raise
644 |         fragment = self.fragment
645 |         self.reset()
646 | ##         self._parser = None
647 |         return fragment
648 | 
649 |     def _getDeclarations(self):
650 |         """Re-create the internal subset from the DocumentType node.
651 | 
652 |         This is only needed if we don't already have the
653 |         internalSubset as a string.
654 |         """
655 |         doctype = self.context.ownerDocument.doctype
656 |         s = ""
657 |         if doctype:
658 |             for i in range(doctype.notations.length):
659 |                 notation = doctype.notations.item(i)
660 |                 if s:
661 |                     s = s + "\n  "
662 |                 s = "%s<!NOTATION %s" % (s, notation.nodeName)
663 |                 if notation.publicId:
664 |                     s = '%s PUBLIC "%s"\n             "%s">' \
665 |                         % (s, notation.publicId, notation.systemId)
666 |                 else:
667 |                     s = '%s SYSTEM "%s">' % (s, notation.systemId)
668 |             for i in range(doctype.entities.length):
669 |                 entity = doctype.entities.item(i)
670 |                 if s:
671 |                     s = s + "\n  "
672 |                 s = "%s<!ENTITY %s" % (s, entity.nodeName)
673 |                 if entity.publicId:
674 |                     s = '%s PUBLIC "%s"\n             "%s"' \
675 |                         % (s, entity.publicId, entity.systemId)
676 |                 elif entity.systemId:
677 |                     s = '%s SYSTEM "%s"' % (s, entity.systemId)
678 |                 else:
679 |                     s = '%s "%s"' % (s, entity.firstChild.data)
680 |                 if entity.notationName:
681 |                     s = "%s NOTATION %s" % (s, entity.notationName)
682 |                 s = s + ">"
683 |         return s
684 | 
685 |     def _getNSattrs(self):
686 |         return ""
687 | 
688 |     def external_entity_ref_handler(self, context, base, systemId, publicId):
689 |         if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID:
690 |             # this entref is the one that we made to put the subtree
691 |             # in; all of our given input is parsed in here.
692 |             old_document = self.document
693 |             old_cur_node = self.curNode
694 |             parser = self._parser.ExternalEntityParserCreate(context)
695 |             # put the real document back, parse into the fragment to return
696 |             self.document = self.originalDocument
697 |             self.fragment = self.document.createDocumentFragment()
698 |             self.curNode = self.fragment
699 |             try:
700 |                 parser.Parse(self._source, 1)
701 |             finally:
702 |                 self.curNode = old_cur_node
703 |                 self.document = old_document
704 |                 self._source = None
705 |             return -1
706 |         else:
707 |             return ExpatBuilder.external_entity_ref_handler(
708 |                 self, context, base, systemId, publicId)
709 | 
710 | 
711 | class Namespaces:
712 |     """Mix-in class for builders; adds support for namespaces."""
713 | 
714 |     def _initNamespaces(self):
715 |         # list of (prefix, uri) ns declarations.  Namespace attrs are
716 |         # constructed from this and added to the element's attrs.
717 |         self._ns_ordered_prefixes = []
718 | 
719 |     def createParser(self):
720 |         """Create a new namespace-handling parser."""
721 |         parser = expat.ParserCreate(namespace_separator=" ")
722 |         parser.namespace_prefixes = True
723 |         return parser
724 | 
725 |     def install(self, parser):
726 |         """Insert the namespace-handlers onto the parser."""
727 |         ExpatBuilder.install(self, parser)
728 |         if self._options.namespace_declarations:
729 |             parser.StartNamespaceDeclHandler = (
730 |                 self.start_namespace_decl_handler)
731 | 
732 |     def start_namespace_decl_handler(self, prefix, uri):
733 |         """Push this namespace declaration on our storage."""
734 |         self._ns_ordered_prefixes.append((prefix, uri))
735 | 
736 |     def start_element_handler(self, name, attributes):
737 |         if ' ' in name:
738 |             uri, localname, prefix, qname = _parse_ns_name(self, name)
739 |         else:
740 |             uri = EMPTY_NAMESPACE
741 |             qname = name
742 |             localname = None
743 |             prefix = EMPTY_PREFIX
744 |         node = minidom.Element(qname, uri, prefix, localname)
745 |         node.ownerDocument = self.document
746 |         _append_child(self.curNode, node)
747 |         self.curNode = node
748 | 
749 |         if self._ns_ordered_prefixes:
750 |             for prefix, uri in self._ns_ordered_prefixes:
751 |                 if prefix:
752 |                     a = minidom.Attr(_intern(self, 'xmlns:' + prefix),
753 |                                      XMLNS_NAMESPACE, prefix, "xmlns")
754 |                 else:
755 |                     a = minidom.Attr("xmlns", XMLNS_NAMESPACE,
756 |                                      "xmlns", EMPTY_PREFIX)
757 |                 a.value = uri
758 |                 a.ownerDocument = self.document
759 |                 _set_attribute_node(node, a)
760 |             del self._ns_ordered_prefixes[:]
761 | 
762 |         if attributes:
763 |             node._ensure_attributes()
764 |             _attrs = node._attrs
765 |             _attrsNS = node._attrsNS
766 |             for i in range(0, len(attributes), 2):
767 |                 aname = attributes[i]
768 |                 value = attributes[i+1]
769 |                 if ' ' in aname:
770 |                     uri, localname, prefix, qname = _parse_ns_name(self, aname)
771 |                     a = minidom.Attr(qname, uri, localname, prefix)
772 |                     _attrs[qname] = a
773 |                     _attrsNS[(uri, localname)] = a
774 |                 else:
775 |                     a = minidom.Attr(aname, EMPTY_NAMESPACE,
776 |                                      aname, EMPTY_PREFIX)
777 |                     _attrs[aname] = a
778 |                     _attrsNS[(EMPTY_NAMESPACE, aname)] = a
779 |                 a.ownerDocument = self.document
780 |                 a.value = value
781 |                 a.ownerElement = node
782 | 
783 |     if __debug__:
784 |         # This only adds some asserts to the original
785 |         # end_element_handler(), so we only define this when -O is not
786 |         # used.  If changing one, be sure to check the other to see if
787 |         # it needs to be changed as well.
788 |         #
789 |         def end_element_handler(self, name):
790 |             curNode = self.curNode
791 |             if ' ' in name:
792 |                 uri, localname, prefix, qname = _parse_ns_name(self, name)
793 |                 assert (curNode.namespaceURI == uri
794 |                         and curNode.localName == localname
795 |                         and curNode.prefix == prefix), \
796 |                         "element stack messed up! (namespace)"
797 |             else:
798 |                 assert curNode.nodeName == name, \
799 |                        "element stack messed up - bad nodeName"
800 |                 assert curNode.namespaceURI == EMPTY_NAMESPACE, \
801 |                        "element stack messed up - bad namespaceURI"
802 |             self.curNode = curNode.parentNode
803 |             self._finish_end_element(curNode)
804 | 
805 | 
806 | class ExpatBuilderNS(Namespaces, ExpatBuilder):
807 |     """Document builder that supports namespaces."""
808 | 
809 |     def reset(self):
810 |         ExpatBuilder.reset(self)
811 |         self._initNamespaces()
812 | 
813 | 
814 | class FragmentBuilderNS(Namespaces, FragmentBuilder):
815 |     """Fragment builder that supports namespaces."""
816 | 
817 |     def reset(self):
818 |         FragmentBuilder.reset(self)
819 |         self._initNamespaces()
820 | 
821 |     def _getNSattrs(self):
822 |         """Return string of namespace attributes from this element and
823 |         ancestors."""
824 |         # XXX This needs to be re-written to walk the ancestors of the
825 |         # context to build up the namespace information from
826 |         # declarations, elements, and attributes found in context.
827 |         # Otherwise we have to store a bunch more data on the DOM
828 |         # (though that *might* be more reliable -- not clear).
829 |         attrs = ""
830 |         context = self.context
831 |         L = []
832 |         while context:
833 |             if hasattr(context, '_ns_prefix_uri'):
834 |                 for prefix, uri in context._ns_prefix_uri.items():
835 |                     # add every new NS decl from context to L and attrs string
836 |                     if prefix in L:
837 |                         continue
838 |                     L.append(prefix)
839 |                     if prefix:
840 |                         declname = "xmlns:" + prefix
841 |                     else:
842 |                         declname = "xmlns"
843 |                     if attrs:
844 |                         attrs = "%s\n    %s='%s'" % (attrs, declname, uri)
845 |                     else:
846 |                         attrs = " %s='%s'" % (declname, uri)
847 |             context = context.parentNode
848 |         return attrs
849 | 
850 | 
851 | class ParseEscape(Exception):
852 |     """Exception raised to short-circuit parsing in InternalSubsetExtractor."""
853 |     pass
854 | 
855 | class InternalSubsetExtractor(ExpatBuilder):
856 |     """XML processor which can rip out the internal document type subset."""
857 | 
858 |     subset = None
859 | 
860 |     def getSubset(self):
861 |         """Return the internal subset as a string."""
862 |         return self.subset
863 | 
864 |     def parseFile(self, file):
865 |         try:
866 |             ExpatBuilder.parseFile(self, file)
867 |         except ParseEscape:
868 |             pass
869 | 
870 |     def parseString(self, string):
871 |         try:
872 |             ExpatBuilder.parseString(self, string)
873 |         except ParseEscape:
874 |             pass
875 | 
876 |     def install(self, parser):
877 |         parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
878 |         parser.StartElementHandler = self.start_element_handler
879 | 
880 |     def start_doctype_decl_handler(self, name, publicId, systemId,
881 |                                    has_internal_subset):
882 |         if has_internal_subset:
883 |             parser = self.getParser()
884 |             self.subset = []
885 |             parser.DefaultHandler = self.subset.append
886 |             parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
887 |         else:
888 |             raise ParseEscape()
889 | 
890 |     def end_doctype_decl_handler(self):
891 |         s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n')
892 |         self.subset = s
893 |         raise ParseEscape()
894 | 
895 |     def start_element_handler(self, name, attrs):
896 |         raise ParseEscape()
897 | 
898 | 
899 | def parse(file, namespaces=True):
900 |     """Parse a document, returning the resulting Document node.
901 | 
902 |     'file' may be either a file name or an open file object.
903 |     """
904 |     if namespaces:
905 |         builder = ExpatBuilderNS()
906 |     else:
907 |         builder = ExpatBuilder()
908 | 
909 |     if isinstance(file, str):
910 |         with open(file,"r", encoding="latin1") as fp:
911 |             result = builder.parseFile(fp)
912 |     else:
913 |         result = builder.parseFile(file)
914 |     return result
915 | 
916 | 
917 | def parseString(string, namespaces=True):
918 |     """Parse a document from a string, returning the resulting
919 |     Document node.
920 |     """
921 |     if namespaces:
922 |         builder = ExpatBuilderNS()
923 |     else:
924 |         builder = ExpatBuilder()
925 |     return builder.parseString(string)
926 | 
927 | 
928 | def parseFragment(file, context, namespaces=True):
929 |     """Parse a fragment of a document, given the context from which it
930 |     was originally extracted.  context should be the parent of the
931 |     node(s) which are in the fragment.
932 | 
933 |     'file' may be either a file name or an open file object.
934 |     """
935 |     if namespaces:
936 |         builder = FragmentBuilderNS(context)
937 |     else:
938 |         builder = FragmentBuilder(context)
939 | 
940 |     if isinstance(file, str):
941 |         with open(file, 'rb') as fp:
942 |             result = builder.parseFile(fp)
943 |     else:
944 |         result = builder.parseFile(file)
945 |     return result
946 | 
947 | 
948 | def parseFragmentString(string, context, namespaces=True):
949 |     """Parse a fragment of a document from a string, given the context
950 |     from which it was originally extracted.  context should be the
951 |     parent of the node(s) which are in the fragment.
952 |     """
953 |     if namespaces:
954 |         builder = FragmentBuilderNS(context)
955 |     else:
956 |         builder = FragmentBuilder(context)
957 |     return builder.parseString(string)
958 | 
959 | 
960 | def makeBuilder(options):
961 |     """Create a builder based on an Options object."""
962 |     if options.namespaces:
963 |         return ExpatBuilderNS(options)
964 |     else:
965 |         return ExpatBuilder(options)
966 | 


--------------------------------------------------------------------------------
/minidom/expatbuilderFixed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/minidom/expatbuilderFixed.png


--------------------------------------------------------------------------------
/minidom/expatbuilderUnFixed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/minidom/expatbuilderUnFixed.png


--------------------------------------------------------------------------------