├── README.md ├── epimitheus_v1.py ├── epimitheus_v2.py ├── images ├── EpimitheusNeo4j.png ├── addEventIDs.png ├── blackListedObjects.png ├── filename.md └── windowsDefender.png └── minidom ├── README.md ├── expatbuilder.py ├── expatbuilderFixed.png └── expatbuilderUnFixed.png /README.md: -------------------------------------------------------------------------------- 1 | # Epimitheus 2 | Epimitheus is a python tool that uses graphical database Neo4j for Windows Events visualization. The job of "epimitheus" is to read the exported Windows Events (including Sysmon) in XML form, create a new XML with the correct Event properties and import it to neo4j. 3 | 4 | 5 | #### Import Windows Events to Neo4j 6 | python3 epimitheus.py -i "bolt://localhost" -u "neo4j" -p "" -x "Windows_Events.xml" -o "output.xml" 7 | 8 | #### Import Windows Events/Sysmon to Neo4j 9 | python3 epimitheus.py -i "bolt://localhost" -u "neo4j" -p "" -x "Windows_Evenst.xml" -o "output.xml" -s 10 | 11 | #### Delete data from Neo4j 12 | python3 epimitheus.py -i "bolt://localhost" -u "neo4j" -p "" -D 13 | 14 | #### Adding Events missing EventIDs 15 | 16 | ![alt text](https://github.com/tasox/Epimitheus/blob/master/images/addEventIDs.png) 17 | 18 | 19 | ### Neo4j Queries - Examples 20 | More Neo4j queries are coming ... 21 | 22 | #### RDP Connections (Sysmon and Windows Events) 23 | 24 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.LogonType = '10' AND c.EventID='4624' RETURN p 25 | 26 | #### Pass-The-Hash 27 | 28 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.LogonProcessName = 'NtLmSsp ' AND NOT c.TargetUserName IN ['ANONYMOUS LOGON'] RETURN p 29 | 30 | #### Runas (Potential) 31 | 32 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.LogonType = '2' ANd c.LogonProcessName = "seclogo" RETURN p 33 | 34 | #### Lateral Movement - Pass-The-Hash /w Mimikatz 35 | 36 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID IN ["4624","4672"] AND c.LogonType = "9" AND c.LogonProcessName = "seclogo" RETURN p 37 | 38 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID IN ["4624","4672"] AND c.LogonType = "9" AND c.LogonProcessName = "seclogo" AND c.TargetLogonId=c.SubjectLogonId RETURN c.EventID,c.remoteHost,c.targetUser,c.TargetLogonId,c.targetServer,c.PrivilegeList,c.SystemTime 39 | 40 | MATCH (c:Event),(d:Event) WHERE c.EventID = "4672" AND d.EventID="4688" AND c.SystemTime=d.SystemTime RETURN c.targetUser,d.SubjectUserName,d.targetServer,d.NewProcessName,d.TokenElevationType 41 | 42 | MATCH (c:Event),(d:Event) WHERE c.EventID="4672" AND d.EventID="4688" AND c.SystemTime=d.SystemTime WITH [(c.EventID),(c.targetUser),(c.remoteHost),(c.SystemTime)] as Event4672,[(d.EventID),(d.targetUser),(d.remoteHost),(d.SystemTime)] as Event4688 RETURN Event4672,Event4688 43 | 44 | 45 | #### Memory dump (procdump) 46 | 47 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID="10" AND c.TargetImage =~ ".*lsass.*" RETURN p - Sysmon 48 | 49 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) RETURN collect(c.TargetFilename) - Sysmon 50 | 51 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID="10" AND c.TargetImage="C:\\Windows\\system32\\lsass.exe" RETURN p - Sysmon 52 | 53 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID="10" AND c.TargetImage="C:\\Windows\\system32\\lsass.exe" RETURN c.EventRecordID,c.targetUser, c.SourceImage,c.TargetImage,c.TargetFilename 54 | 55 | #### Windows Defender 56 | 57 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.EventID = '1116' RETURN c.Path 58 | 59 | #### PowerShell 60 | 61 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.HostApplication =~ ".*Power.*" RETURN p LIMIT 10 62 | 63 | #### Defense Evasion - PS Script blogging 64 | 65 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.TargetObject="HKLM\\SOFTWARE\\Wow6432Node\\Policies\\Microsoft\\Windows\\PowerShell\\ScriptBlockLogging\\EnableScriptBlockLogging" RETURN p 66 | 67 | MATCH p=(a:RemoteHosts)-->(b:TargetUser)-->(c:Event)-->(d:TargetHost) WHERE c.TargetObject="HKLM\\SOFTWARE\\Wow6432Node\\Policies\\Microsoft\\Windows\\PowerShell\\ScriptBlockLogging\\EnableScriptBlockLogging" RETURN c.EventID,c.targetUser,c.EventType,c.Details,c.targetServer,c.TargetObject 68 | 69 | #### Defense Evasion - PPID Spoofing 70 | 71 | MATCH (c:Event),(d:Event) WHERE c.EventID = "10" AND d.EventID ="1" AND c.TargetProcessId = d.ProcessId RETURN c.EventRecordID,c.targetUser, c.SourceImage,c.SourceProcessId,c.TargetProcessId,d.Image,d.targetUser 72 | 73 | #### References 74 | https://medium.com/@pentesttas/windows-events-sysmon-visualization-using-neo4j-part-1-529ca5ab4593 75 | 76 | https://medium.com/@pentesttas/windows-events-sysmon-visualization-using-neo4j-part-2-d4c2fd3c9413 77 | -------------------------------------------------------------------------------- /epimitheus_v1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | from xml.dom import minidom 4 | from xml.dom.minidom import Document 5 | from neo4j import GraphDatabase, basic_auth 6 | import os,sys,datetime,time,re, subprocess 7 | import multiprocessing 8 | from multiprocessing import Process,Lock 9 | import argparse 10 | 11 | 12 | 13 | def isDomain(): 14 | 15 | #Url: https://github.com/zakird/pyad/blob/master/pyad/adbase.py 16 | #Lines: 11-42 - adbase.py 17 | #Fix the code: @taso_x 18 | if sys.platform != 'win32': 19 | raise Exception("Must be running Windows.") 20 | 21 | else: 22 | try: 23 | import win32api 24 | import pywintypes 25 | import win32com.client 26 | import win32security 27 | except ImportError: 28 | raise Exception("pywin32 library required. Download from http://sourceforge.net/projects/pywin32/") 29 | 30 | 31 | _adsi_provider = win32com.client.Dispatch('ADsNameSpaces') 32 | 33 | try: 34 | # Discover default domain and forest information 35 | __default_domain_obj = _adsi_provider.GetObject('', "LDAP://rootDSE") 36 | # connecting to rootDSE will connect to the domain that the 37 | # current logged-in user belongs to.. which is generally the 38 | # domain under question and therefore becomes the default domain. 39 | _default_detected_forest = __default_domain_obj.Get("rootDomainNamingContext") 40 | _default_detected_domain = __default_domain_obj.Get("defaultNamingContext") 41 | if(_default_detected_domain): 42 | print("[+] Domain Found: "+_default_detected_domain) 43 | if(_default_detected_forest): 44 | print("[+] Forest Found: "+_default_detected_forest) 45 | return True 46 | 47 | except: 48 | # If there was an error, this this computer might not be on a domain. 49 | __default_domain_obj = "None" 50 | _default_detected_forest = "None" 51 | _default_detected_domain = "None" 52 | print("[-] Couldn't connect with LDAP Server!") 53 | print("\r\n") 54 | return False 55 | 56 | 57 | def sid2name(sid): 58 | dom = win32com.client.GetObject("LDAP://rootDSE").Get("defaultNamingContext") 59 | conn = win32com.client.Dispatch('ADODB.Connection') 60 | conn.Open("Provider=ADSDSOObject") 61 | query = ";(&(objectClass=*)(objectSid="+sid+"));sAMAccountName" 62 | record_set = conn.Execute(query)[0] 63 | targetUser=record_set.Fields("sAMAccountName").value 64 | return(targetUser) 65 | 66 | 67 | 68 | def regEx(string): 69 | 70 | dotCounter=0 71 | if("@" in str(string)): 72 | s = re.findall("^\w+[^@]",str(string))[0] 73 | elif("\\" in str(string)): 74 | s = re.findall("[^\\\]*$",str(string))[0] 75 | elif("." in str(string)): 76 | for num,chr in enumerate(string): 77 | if chr == ".": 78 | dotCounter=dotCounter+1 79 | if dotCounter == 1: #Example username.lastname or desktop111-maria.domain.com 80 | s = re.findall("^.\w+[.|-]\w+",string)[0] #Result: username.lastname or dektop111-maria 81 | elif dotCounter == 2: #Example username.domain.com 82 | s = re.findall("^\w+",string)[0] #Result: username 83 | elif dotCounter >= 3: #Example username.lastname.domain.com 84 | s = re.findall("^\w+[.-]\w+",string)[0] #Result: username.lastname 85 | else: 86 | s = str(string) 87 | 88 | else: 89 | s = str(string) 90 | return(s.upper()) 91 | 92 | def regExIP(ip): 93 | ipAddress=str(ip) 94 | if(ipAddress.startswith(':')): #Example: ::ffff:192.168.100.50 95 | s = re.findall("\w+[.].*",ipAddress)[0] #Result: 192.168.100.50 96 | else: 97 | s = ipAddress 98 | 99 | return(s) 100 | 101 | def neo4jConn(neo4jUri,neo4jUser,neo4jPass): 102 | 103 | try: 104 | driver = GraphDatabase.driver(neo4jUri, auth=basic_auth(user=neo4jUser, password=neo4jPass)) 105 | #print("[+] Successful connection with database") 106 | return(driver) 107 | except Exception as e: 108 | print("[-] %s" % e) 109 | sys.exit(1) 110 | 111 | 112 | def eventParser(eventIDs): 113 | 114 | 115 | dict={} 116 | dict2={} 117 | dict3={} 118 | counter=0 119 | t=[] 120 | 121 | try: 122 | for p in rootDoc.childNodes: 123 | counter=counter+1 124 | 125 | for x in p.childNodes: 126 | for y in x.childNodes: 127 | try: 128 | if not y.firstChild: 129 | tag=y.nodeName 130 | attrs=y.attributes.items() 131 | value=y.firstChild 132 | 133 | else: 134 | tag=y.nodeName 135 | attrs=y.attributes.items() 136 | value=y.firstChild.nodeValue 137 | dict={'Tags':tag,'Attrs':attrs,'Value':value} 138 | if not dict['Attrs']: 139 | #print ("[+]%s:%s" %(dict['Tags'],dict['Value'])) [OK] 140 | key = dict['Tags'] 141 | value = dict['Value'] 142 | for key,value in dict['Attrs']: 143 | if dict['Tags'] != 'Data': 144 | #print("[+]%s:%s" % (key,value)) 145 | key = key 146 | value = value 147 | else: 148 | #print("[+]%s:%s" % (value, dict['Value'])) 149 | key = value 150 | value = dict['Value'] 151 | #print ("[+]%s:%s" % (key,value)) [OK] 152 | #if key not in ['Message']: 153 | dict2={key:value} 154 | dict3={counter:dict2} 155 | t.append(dict3) 156 | except: 157 | pass 158 | except Exception as e: 159 | print(e) 160 | 161 | #print(t) [OK] 162 | input_list = {} 163 | 164 | #Group events 165 | for x in range(len(t)): 166 | for k,v in t[x].items(): 167 | if k not in input_list: 168 | input_list[k]=[v] 169 | else: 170 | input_list[k].append(v) 171 | 172 | #print(input_list) 173 | 174 | filterEvents = eventIDs 175 | localhostIPs=["","-","::1","127.0.0.1","localhost"] 176 | blacklistedUsers=["DWM-3","UMFD-3","UMFD-2","DWM-2","UMFD-0","UMFD-1","DWM-1"] 177 | blacklistedShareFolders=["\\\\*\\SYSVOL","\\\\*\\IPC$"] 178 | 179 | #How many data will process 180 | dataProcess = str(len(input_list.keys())) 181 | return (filterEvents, localhostIPs, blacklistedUsers, blacklistedShareFolders, input_list) 182 | 183 | def createXML(evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList,sysmonFile,outXMLFile): 184 | 185 | 186 | targetUserList=[] 187 | remoteHostsList=[] 188 | uniqueIPs=[] 189 | file_handle = open(outXMLFile,"w") 190 | 191 | doc = Document() 192 | root = doc.createElement('Events') 193 | doc.appendChild(root) 194 | 195 | #print(eventList.items()) [OK] 196 | print("[+] Searching for TargetUsers, RemoteHosts, TargetHosts ...") 197 | if len(eventList.items()) > 0: 198 | t={} 199 | counter=0 200 | for key, value in eventList.items(): 201 | for eventValues in value: 202 | t.update(eventValues) 203 | if t.get("EventID") in evIDs: 204 | 205 | if sysmonFile: #User provided Sysmon xml file. 206 | if t.get("User"): 207 | targetUser = t.get("User") 208 | elif t.get("UserID"): 209 | targetUser = t.get("UserID") 210 | elif t.get("SubjectUserName"): 211 | targetUser = t.get("SubjectUserName") 212 | else: 213 | targetUser = "None" 214 | print("[-] Event ID %s with Record ID %s does not have a targetUser." % (t.get("EventID"),t.get("EventRecordID"))) 215 | 216 | 217 | 218 | if t.get("SourceIp"): 219 | remoteHost = t.get("SourceIp") 220 | elif t.get("SourceHostname"): 221 | remoteHost = t.get("SourceHostname") 222 | else: 223 | remoteHost = t.get("Computer") 224 | 225 | 226 | if t.get("DestinationIp"): 227 | targetServer = t.get("DestinationIp") 228 | else: 229 | targetServer = t.get("Computer") 230 | 231 | 232 | 233 | if targetUser in bListedUsers: 234 | print("[-] Event ID %s with Record ID %s discarded because the TargetUser %s is into the bListedUsers list." % (t.get("EventID"),t.get("EventRecordID"),targetUser)) 235 | t.clear() 236 | else: #targetUser is not in bListedUsers list then update the values in Neo4j. 237 | t.update({'targetUser':targetUser}) 238 | t.update({'remoteHost':remoteHost}) 239 | t.update({'targetServer':targetServer}) 240 | 241 | #Push name for every Event node because Neo4j needs it for naming the node else would be null. In addition, i use "name" in relationships. 242 | t.update({'name':t.get("EventID")}) 243 | 244 | else: 245 | if t.get("TargetUserName"): 246 | targetUser = t.get("TargetUserName") 247 | if t.get("SubjectUserName"): 248 | targetUser = t.get("SubjectUserName") 249 | elif t.get("Detection User"): 250 | targetUser = t.get("Detection User") 251 | elif t.get("Computer"): 252 | targetUser = t.get("Computer") 253 | elif (t.get("EventID") not in ["4103","4104"]) and t.get("UserID"): 254 | sid = t.get("UserID") 255 | try: 256 | if (checkdom): 257 | #After converting sid->username check if user is blacklisted. 258 | if sid2name(sid) not in bListedUsers: 259 | targetUser=sid2name(sid) 260 | else: 261 | targetUser = sid 262 | except Exception as e: 263 | print(e) 264 | 265 | elif t.get("EventID") in ["4103"]: #Powershell Events don't have target user. ["4103","4104","4105","4106"] 266 | f = t.get("ContextInfo") 267 | if (re.findall('User = \w+.*',f)[0].split("= ")[1]): 268 | regX = re.findall('User = \w+.*',f)[0].split("= ")[1] 269 | targetUser = regEx(regX) 270 | try: 271 | HostApplication = re.findall('Host Application = \w+.*',f)[0].split("= ")[1] #Get Host Application from ContextInfo tag. 272 | except: 273 | HostApplication = "-" 274 | try: 275 | ScriptName = re.findall('Script Name = \w+.*',f)[0].split("= ")[1] #Get Script Name from ContextInfo tag. 276 | except: 277 | ScriptName = "-" 278 | try: 279 | CommandPath = re.findall('Command Path = \w+.*',f)[0].split("= ")[1] #Get Command Path from ContextInfo tag. 280 | except: 281 | CommandPath = "-" 282 | try: 283 | SequenceNumber = re.findall('Sequence Number = \w+.*',f)[0].split("= ")[1] #Get Sequence Number from ContextInfo tag. 284 | except: 285 | SequenceNumber = "-" 286 | try: 287 | Severity = re.findall('Severity = \w+.*',f)[0].split("= ")[1] #Get Sequence Number from ContextInfo tag. 288 | except: 289 | Severity = "-" 290 | t.update({'HostApplication':HostApplication}) 291 | t.update({'ScriptName':ScriptName}) 292 | t.update({'CommandPath':CommandPath}) 293 | t.update({'SequenceNumber':SequenceNumber}) 294 | t.update({'Severity':Severity}) 295 | 296 | elif t.get("EventID") in ["4104"]: 297 | sid = t.get("UserID") 298 | try: 299 | if (checkdom): 300 | #After converting sid->username check if user is blacklisted. 301 | if sid2name(sid) not in bListedUsers: 302 | targetUser=sid2name(sid) 303 | else: 304 | targetUser = sid 305 | except Exception as e: 306 | print(e) 307 | 308 | else: 309 | targetUser = "NULL" 310 | print("[+] Event ID: "+str(t.get("EventID"))+" with Record ID: "+str(t.get("EventRecordID"))+" does not have targetUser tag!") 311 | 312 | 313 | ########################################################################################## 314 | #Extract remote IPs from Event, if IP source field does not exist then extact from the 'TargetServerName', if 'TargetServerName' does not exist then extract from 'Computer' tag. 315 | if t.get("IpAddress") and (t.get("IpAddress") in lhostIPs): 316 | if t.get("Workstation") and (t.get("Workstation") not in lhostIPs): 317 | remoteHost = t.get("Workstation") 318 | t.update({'remoteHost':regExIP(remoteHost)}) 319 | elif t.get("Computer") and (t.get("Computer") not in lhostIPs): 320 | remoteHost = t.get("Computer") 321 | t.update({'remoteHost':regExIP(remoteHost)}) 322 | else: 323 | print("[-] Event ID %s with Record ID %s does not have a remoteHost." % (t.get("EventID"),t.get("EventRecordID"))) 324 | 325 | elif t.get("IpAddress") : #and (t.get("IpAddress") not in lhostIPs) 326 | remoteHost = t.get("IpAddress") 327 | t.update({'remoteHost':regExIP(remoteHost)}) 328 | else: 329 | remoteHost = t.get("Computer") #t.get("IpAddress") 330 | t.update({'remoteHost':regExIP(remoteHost)}) 331 | 332 | 333 | 334 | 335 | ######################################################################################## 336 | 337 | 338 | #Add 'Attaking Hosts' into Neo4j 339 | targetServer = t.get("Computer") 340 | t.update({'targetServer':regEx(targetServer)}) 341 | 342 | #print("[-] Event ID %s with Record ID %s does not have a targetServer." % (t.get("EventID"),t.get("EventRecordID"))) 343 | t.update({'name':t.get("EventID")}) 344 | 345 | ########################################################################################## 346 | 347 | 348 | ###############################MESSAGE TAG########################################################### 349 | #Get values from the following keys inside from tag. 350 | #Error Code, Impersonation Level, Restricted Admin Mode, Virtual Account, Elevated Token 351 | if t.get("Message"): 352 | f = t.get("Message") 353 | if (re.findall('Error Code:',f)): 354 | ErrorCode = re.findall('Error Code:\s+[\w+-]*',f)[0].split(":")[1].strip() 355 | t.update({'ErrorCode':ErrorCode}) 356 | 357 | if (re.findall('Impersonation Level:',f)): 358 | ImpersonationLevel = re.findall('Impersonation Level:\s+[\w+-]*',f)[0].split(":")[1].strip() 359 | t.update({'ImpersonationLevelTranslate':ImpersonationLevel}) 360 | 361 | if(re.findall('Restricted Admin Mode:',f)): 362 | RestrictedAdminMode = re.findall('Restricted Admin Mode:\s+[\w+-]*',f)[0].split(":")[1].strip() 363 | t.update({'RestrictedAdminMode':RestrictedAdminMode}) 364 | 365 | if (re.findall('Virtual Account:',f)): 366 | VirtualAccount = re.findall('Virtual Account:\s+[\w+-]*',f)[0].split(":")[1].strip() 367 | t.update({'VirtualAccount':VirtualAccount}) 368 | 369 | if (re.findall('Elevated Token:',f)): 370 | ElevatedToken = re.findall('Elevated Token:\s+[\w+-]*',f)[0].split(":")[1].strip() 371 | t.update({'ElevatedToken':ElevatedToken}) 372 | #else: 373 | # print("[-] Couldn't find tag on Event ID %s with EventRecordID %s." % (t.get("EventID"),t.get("EventRecordID"))) 374 | 375 | 376 | ##################################################################################################### 377 | if targetUser in bListedUsers: 378 | print("[-] Event ID %s with Record ID %s discarded because the TargetUser %s is into the bListedUsers list." % (t.get("EventID"),t.get("EventRecordID"),targetUser)) 379 | t.clear() 380 | else: 381 | t.update({'targetUser':regEx(targetUser)}) 382 | 383 | counter=counter+1 #How many events added! 384 | 385 | createTagEvent=doc.createElement("Event") 386 | doc.childNodes[0].appendChild(createTagEvent) 387 | for tagName in t.keys(): #Example of t.keys(): {"EventID":"4624","Version":"1"} 388 | if tagName != "Message": #Remove tag from Exported Windows XML. Too much info :) 389 | text = str(t.get(tagName)) 390 | tag = str(tagName) 391 | createTag=doc.createElement(tag.replace(" ","")) #Remove SPACE from the Tag Name. Example: , 392 | innerTXT = doc.createTextNode(text.replace("«","")) 393 | createTag.appendChild(innerTXT) 394 | createTagEvent.appendChild(createTag) 395 | 396 | #else: 397 | # print("[-] Event ID "+str(t.get("EventID"))+" is missing.") 398 | 399 | 400 | 401 | print("[+] Creating XML for neo4j...") 402 | doc.writexml(file_handle) 403 | #doc.writexml(sys.stdout) 404 | file_handle.close() 405 | 406 | def neo4jXML(outXMLFile,neo4jUri,neo4jUser,neo4jPass): 407 | 408 | neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass) 409 | try: 410 | #Read the created XML from -o/--out argument. 411 | neo4jDocXML = minidom.parse(outXMLFile).documentElement 412 | except Exception as e: 413 | print(e) 414 | sys.exit(1) 415 | 416 | blackListedEventProperties=["Opcode","Keywords","Version","Level","TransmittedServices","KeyLength","LmPackageName","Key Length","Message","LogonGuid","ThreadID","TargetLogonGuid","SubjectDomainName","Guid","Provider","VirtualAccount","TicketEncryptionType","TicketOptions","Keywords","Level","KeyLength","CertIssuerName","CertSerialNumber","CertThumbprint","Channel","ObjectServer","PreAuth Type","ActivityID","TargetOutboundDomainName","FWLink","Unused","Unused2","Unused3","Unused4","Unused5","Unused6","OriginID","OriginName","ErrorCode","TypeID","TypeName","StatusDescription","AdditionalActionsID","SubStatus","ContextInfo","Product"] 417 | 418 | counter=0 419 | groupEvents=[] #Example [{ EventId: "4624",targetUser:"tasos"},{EventId: "4625", targetUser: "tzonis"}] 420 | 421 | try: 422 | 423 | for eventTagNode in neo4jDocXML.childNodes: 424 | dictionaryEvents=dict() # {EventId: "4624",targetUser:"tasos"},{EventId: "4625", targetUser: "tzonis"} 425 | if eventTagNode.childNodes: 426 | #print(eventTagNode.childNodes) [OK] 427 | for eventTags in eventTagNode.childNodes: 428 | #print(eventTags.nodeName) 429 | if (eventTags.nodeName not in blackListedEventProperties): 430 | for eventValues in eventTags.childNodes: 431 | #print(eventTags.nodeName,eventValues.nodeValue) 432 | dictionaryEvents.update({eventTags.nodeName:eventValues.nodeValue}) 433 | #print("-------------------------") 434 | groupEvents.append(dictionaryEvents) 435 | #print(groupEvents) #[OK] 436 | 437 | print("[+] Adding the Events ...") 438 | with neo4jDriver.session() as session: 439 | insertEvents = session.run("UNWIND $events as eventPros CREATE (e:Event) SET e=eventPros MERGE (r:RemoteHosts {name:e.remoteHost}) MERGE (u:TargetUser {remoteHost: e.remoteHost,EventRecordIDs: [ ],name:e.targetUser}) MERGE (t:TargetHost {name:e.targetServer})",events=groupEvents) 440 | print("[+] Event Correlation ...") 441 | with neo4jDriver.session() as session: 442 | test = session.run("MATCH (u:TargetUser),(e:Event),(r:RemoteHosts),(t:TargetHost) WHERE u.name=e.targetUser AND r.name=e.remoteHost AND t.name=e.targetServer AND u.remoteHost = r.name AND NOT e.EventRecordID IN u.EventRecordIDs SET u.EventRecordIDs=u.EventRecordIDs+e.EventRecordID") 443 | print("[+] Delete Dublicates ...") 444 | with neo4jDriver.session() as session: 445 | deleteDublicates = session.run("MATCH (t:TargetUser) WITH t.name as n, t.remoteHost as r, collect(t) as dublicateTargetUser where size(dublicateTargetUser) > 1 UNWIND dublicateTargetUser[1..] AS p DETACH DELETE p") 446 | print("[+] Creating the Relationships ...") 447 | with neo4jDriver.session() as session: 448 | remoteHost2DomUserRelationship=session.run("MATCH (r:RemoteHosts),(u:TargetUser) WHERE u.remoteHost = r.name MERGE (r)-[r1:Source2DomainUser]->(u)") 449 | with neo4jDriver.session() as session: 450 | targetUser2EventRelationship = session.run("MATCH (u:TargetUser),(e:Event) WHERE e.targetUser = u.name AND e.EventRecordID IN u.EventRecordIDs MERGE (u)-[r2:TargetUser2Event]->(e)") 451 | with neo4jDriver.session() as session: 452 | event2TargetHostRelationship= session.run("MATCH (t:TargetHost),(e:Event) WHERE t.name = e.targetServer MERGE (e)-[r3:Event2Destination]->(t)") 453 | 454 | except Exception as e: 455 | print(e) 456 | 457 | #Close the connection with Neo4j 458 | neo4jDriver.close() 459 | 460 | 461 | def eventCounters(neo4jUri,neo4jUser,neo4jPass): 462 | neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass) #Call the function 463 | #Count Events 464 | #with neo4jDriver.session() as session: 465 | k=neo4jDriver.session().run("MATCH (n:Event) RETURN count(n)") 466 | countEvents = 0 467 | for x in k: 468 | print("[+] Added Events:"+str(x.value())) 469 | countEvents = int(x.value()) 470 | #Count RemoteHosts 471 | #with neo4jDriver.session() as session: 472 | k=neo4jDriver.session().run("MATCH (n:RemoteHosts) RETURN count(n)") 473 | countRemHosts = 0 474 | for x in k: 475 | print ("[+] Added RemoteHosts:"+str(x.value())) 476 | countRemHosts = int(x.value()) 477 | 478 | #Count TargetHosts 479 | #with neo4jDriver.session() as session: 480 | k=neo4jDriver.session().run("MATCH (n:TargetHost) RETURN count(n)") 481 | countTargetHosts = 0 482 | for x in k: 483 | print ("[+] Added TargetHosts:"+str(x.value())) 484 | countTargetHosts = int(x.value()) 485 | 486 | #Count TargetUsers 487 | #with neo4jDriver.session() as session: 488 | k=neo4jDriver.session().run("MATCH (n:TargetUser) RETURN count(n)") 489 | countTargetUsers = 0 490 | for x in k: 491 | print ("[+] Added TargetUsers:"+str(x.value())) 492 | countTargetUsers = int(x.value()) 493 | 494 | #Count Relatioships 495 | #with neo4jDriver.session() as session: 496 | k=neo4jDriver.session().run("MATCH p=()-->() RETURN count(p)") 497 | countRel = 0 498 | for x in k: 499 | print ("[+] Added Relationships:"+str(x.value())) 500 | countRel = int(x.value()) 501 | 502 | print ("[+] Total: "+str(countEvents+countRemHosts+countRel+countTargetHosts+countTargetUsers)) 503 | print ('[+] Finished: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now())) 504 | 505 | #Close the connection with Neo4j 506 | neo4jDriver.close() 507 | #print(neo4jDriver.closed()) 508 | 509 | 510 | if __name__ == '__main__': 511 | 512 | parser = argparse.ArgumentParser(description='Filter Exported XML.') 513 | parser.add_argument('-e','--eventID', nargs='+', default=["400","800","1102","1006","1015","1040","1042","1116","4103","4104","4105","4624","4625","4634","4648","4662","4672","4673","4688","4697","4698","4702","4713","4723","4724","4735","4737","4739","4742","4755","4765","4766","4768","4769","4776","4780","4794","4798","4964","5136","5140","5145","5156","5805","7045","8004","8007","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","255"],help='Use comma to seperate eventIDs.') 514 | parser.add_argument('-x', '--xml',help='Windows Events Exported XML file.') 515 | parser.add_argument('-o', '--out',help='Save Neo4j XML file.') 516 | parser.add_argument('-i','--uri',help='neo4j host. Example: bolt://localhost',required=True) 517 | parser.add_argument('-D','--delete',help='Delete all data from Neo4j.',action='store_true') 518 | parser.add_argument('-u','--user',help='neo4j username.',required=True) 519 | parser.add_argument('-p','--passwd',help='neo4j password.',required=True) 520 | parser.add_argument('-s','--sysmon',help='Sysmon structure.',action='store_true') 521 | args = parser.parse_args() 522 | eventIDs=args.eventID 523 | neo4jUri=args.uri 524 | neo4jUser=args.user 525 | neo4jPass=args.passwd 526 | xmlFile = args.xml 527 | sysmonFile = args.sysmon 528 | delData = args.delete 529 | outXMLFile = args.out 530 | 531 | 532 | if(delData): 533 | neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass) 534 | print("[+] Connecting with neo4j ...") 535 | print("[+] Deleting all the data ...") 536 | with neo4jDriver.session() as session: 537 | delAll=session.run("MATCH (n) DETACH DELETE n") 538 | #Close the connection with Neo4j 539 | neo4jDriver.close() 540 | 541 | else: 542 | try: 543 | #Open exported XML and remove those chars 544 | openXMLread=open(xmlFile,"r") 545 | fixChars=re.sub(r"", r"", openXMLread.read()) #When Events exported from Windows Event Viewer has tose bad chars inside the XML. 546 | openXMLread.close() 547 | 548 | #Write again the XML without those chars 549 | xmlFile=xmlFile.replace(".xml","_epimitheus.xml") 550 | openXMLwrite=open(xmlFile,"w") 551 | openXMLwrite.write(fixChars) 552 | openXMLwrite.close() 553 | 554 | rootDoc = minidom.parse(xmlFile).documentElement #Open exported XML file. 555 | 556 | except Exception as e: 557 | print(e) 558 | #print("[-] Can't find the XML file or XML is not in the right format. Use -x/--xml to provide the Windows Event XML file.") 559 | sys.exit(1) 560 | 561 | #Check if the script is running in a Domain 562 | #checkdom = isDomain() 563 | 564 | 565 | #Parse Windows Event XML File - Process 1 566 | parl=multiprocessing.Lock() 567 | parl.acquire() 568 | print("[+] Parsing XML file ...") 569 | print ('[+] Parsing Started: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now())) 570 | evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList = eventParser(eventIDs,) 571 | print ('[+] Parsing Finished: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now())) 572 | parl.release() 573 | 574 | 575 | #Create neo4j XML - Process 2 576 | nl = multiprocessing.Lock() 577 | nl.acquire() 578 | cnodes = Process(target=createXML, args=(evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList,sysmonFile,outXMLFile)) 579 | cnodes.start() 580 | cnodes.join() 581 | nl.release() 582 | 583 | #Read neo4j XML - Process 3 584 | ml = multiprocessing.Lock() 585 | ml.acquire() 586 | mnodes = Process(target=neo4jXML,args=(outXMLFile,neo4jUri,neo4jUser,neo4jPass)) 587 | print("[+] Loading neo4j XML ...") 588 | mnodes.start() 589 | mnodes.join() 590 | ml.release() 591 | 592 | #Print Counters - Process 4 593 | cc=multiprocessing.Lock() 594 | cc.acquire() 595 | ccounters=Process(target=eventCounters,args=(neo4jUri,neo4jUser,neo4jPass)) 596 | ccounters.start() 597 | #p=eventCounters() 598 | ccounters.join() 599 | cc.release() 600 | -------------------------------------------------------------------------------- /epimitheus_v2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | from logging import NullHandler 4 | from xml.dom import minidom 5 | from xml.dom.minidom import Document 6 | from neo4j import GraphDatabase, basic_auth 7 | import os,sys,time,re, subprocess 8 | import multiprocessing 9 | from multiprocessing import Process,Lock 10 | import argparse 11 | import collections 12 | import Evtx.Evtx as evtx 13 | import uuid 14 | from pathlib import Path 15 | from xml.etree.cElementTree import Element, ElementTree 16 | from lxml import etree 17 | from io import StringIO, BytesIO 18 | import unicodedata,codecs 19 | import datetime 20 | 21 | 22 | def get_events(input_file, parse_xml=False): 23 | # https://chapinb.com/python-forensics-handbook/ch03_event_logs.html#iterate-over-record-xml-data-evtx 24 | 25 | with evtx.Evtx(input_file) as event_log: 26 | for record in event_log.records(): 27 | if parse_xml: 28 | evtxXML = record.lxml() 29 | yield evtxXML 30 | 31 | else: 32 | evtxXML = record.xml() 33 | yield evtxXML 34 | #return p 35 | 36 | def regEx(string): 37 | 38 | dotCounter=0 39 | if("@" in str(string)): 40 | s = re.findall("^\w+[^@]",str(string))[0] 41 | elif("\\" in str(string)): 42 | s = re.findall("[^\\\]*$",str(string))[0] 43 | elif("." in str(string)): 44 | for num,chr in enumerate(string): 45 | if chr == ".": 46 | dotCounter=dotCounter+1 47 | if dotCounter == 1: #Example username.lastname or desktop111-maria.domain.com 48 | s = re.findall("^.\w+[.|-]\w+",string)[0] #Result: username.lastname or dektop111-maria 49 | elif dotCounter == 2: #Example username.domain.com 50 | s = re.findall("^\w+",string)[0] #Result: username 51 | elif dotCounter >= 3: #Example username.lastname.domain.com 52 | s = re.findall("^\w+[.-]\w+",string)[0] #Result: username.lastname 53 | else: 54 | s = str(string) 55 | 56 | else: 57 | s = str(string) 58 | return(s.upper()) 59 | 60 | def regExIP(ip): 61 | ipAddress=str(ip) 62 | if(ipAddress.startswith(':')): #Example: ::ffff:192.168.100.50 63 | s = re.findall("\w+[.].*",ipAddress)[0] #Result: 192.168.100.50 64 | else: 65 | s = ipAddress 66 | 67 | return(s) 68 | 69 | def neo4jConn(neo4jUri,neo4jUser,neo4jPass): 70 | 71 | try: 72 | driver = GraphDatabase.driver(neo4jUri, auth=basic_auth(user=neo4jUser, password=neo4jPass)) 73 | #print("[+] Successful connection with database") 74 | return(driver) 75 | except Exception as e: 76 | print("[-] %s" % e) 77 | sys.exit(1) 78 | 79 | 80 | def eventParser(eventIDs,xmlDoc): 81 | 82 | 83 | dict={} 84 | dict2={} 85 | dict3={} 86 | counter=0 87 | t=[] 88 | rootDoc = xmlDoc 89 | 90 | try: 91 | for p in rootDoc.childNodes: 92 | counter=counter+1 93 | 94 | for x in p.childNodes: 95 | for y in x.childNodes: 96 | tags="" 97 | values="" 98 | try: 99 | if not y.firstChild: 100 | tag=y.nodeName 101 | attrs=y.attributes.items() 102 | value=y.firstChild 103 | else: 104 | tag=y.nodeName 105 | attrs=y.attributes.items() 106 | value=y.firstChild.nodeValue 107 | 108 | # Clean EventID tag from not useful attributes e.g . 109 | # This happened when I used PowerShell events from CCPT. 110 | # We need only the 111 | if tag == "EventID" and attrs == "": 112 | attrs=y.attributes.items() 113 | elif tag == "EventID" and attrs != "": 114 | #trim the attributes of EventID tag 115 | attrs=[] 116 | else: 117 | attrs=y.attributes.items() 118 | #print(attrs) #[OK] 119 | 120 | dict={'Tags':tag,'Attrs':attrs,'Value':value} 121 | #print(dict) 122 | if not dict['Attrs'] and dict['Tags'] != 'Data': 123 | #print ("[+]%s:%s" %(dict['Tags'],dict['Value'])) #[OK] 124 | tags = dict['Tags'] 125 | values = dict['Value'] 126 | 127 | elif dict['Attrs'] and dict['Tags'] == 'Execution': # Then has 2 properties: ThreadID, ProcessID 128 | #print("[+]%s:%s" % (key,value)) 129 | tags = dict['Tags'] 130 | dictExecution={} 131 | for attrKey,attrValue in dict['Attrs']: 132 | attrKey=attrKey 133 | attrValue=attrValue 134 | dictExecution.update({attrKey:attrValue}) 135 | 136 | values=dictExecution # Set ProcessID and ThreadID in Dict format. 137 | 138 | elif dict['Attrs'] and dict['Tags'] != 'Data' and dict['Tags'] != 'Execution': 139 | #print ("[+]%s:%s" %(dict['Tags'],dict['Value'])) #[OK] 140 | for key,value in dict['Attrs']: 141 | 142 | if key == "SystemTime": 143 | valueDate= datetime.datetime.fromisoformat(value) 144 | valueConvert=valueDate.isoformat() 145 | value=valueConvert 146 | 147 | #print ("[+]%s:%s" %(key,value)) #[OK] 148 | if value: 149 | tags=key 150 | values=value 151 | else: 152 | tags="ActivityID" 153 | 154 | 155 | elif dict['Attrs'] and dict['Tags'] == 'Data': 156 | #print("[+]%s:%s" % (key,value)) 157 | tags = dict['Tags'] 158 | for attrKey,attrValue in dict['Attrs']: 159 | attrValue=attrValue 160 | value = dict['Value'] 161 | values = {attrValue:value} 162 | 163 | # Some events don't have attributes on the tag 'DATA' 164 | elif not dict['Attrs'] and dict['Tags'] == 'Data': 165 | #print ("[+]%s:%s" %(dict['Tags'],dict['Value'])) #[OK] 166 | tags = dict['Tags'] # 167 | attrValue='ContextInfo' # 168 | value = dict['Value'] # 169 | values = {attrValue:value} 170 | 171 | #print("[+] %s : %s" % (tags,values)) 172 | 173 | #dict2=dict5 174 | dict2={tags:values} 175 | #print(dict2) 176 | 177 | dict3={counter:dict2} 178 | #print(dict3) 179 | 180 | t.append(dict3) 181 | except: 182 | pass 183 | except Exception as e: 184 | print(e) 185 | 186 | # List before EventID filtering 187 | input_list = {} 188 | 189 | 190 | #Group events 191 | for x in range(len(t)): 192 | for k,v in t[x].items(): 193 | if k not in input_list: 194 | input_list[k]=[v] 195 | else: 196 | input_list[k].append(v) 197 | #print(input_list) 198 | # Event filtering procedure. 199 | input_list2 = {} 200 | for key,value in input_list.items(): 201 | for val in value: 202 | if eventIDs and val.get("EventID") in eventIDs.split(","): 203 | input_list2[key]=value 204 | elif not eventIDs: 205 | input_list2 = input_list 206 | 207 | filterEvents = eventIDs 208 | localhostIPs=["","-","::1","127.0.0.1","localhost"] 209 | blacklistedUsers=["DWM-3","UMFD-3","UMFD-2","DWM-2","UMFD-0","UMFD-1","DWM-1"] 210 | blacklistedShareFolders=["\\\\*\\SYSVOL","\\\\*\\IPC$"] 211 | 212 | return (filterEvents, localhostIPs, blacklistedUsers, blacklistedShareFolders, input_list2) 213 | 214 | def createXML(evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList,outXMLFile): 215 | 216 | 217 | targetUserList=[] 218 | remoteHostsList=[] 219 | uniqueIPs=[] 220 | 221 | # Create a random file and add the parsing data on it. See line 222 | file_handle = open(outXMLFile,"w") 223 | 224 | doc = Document() 225 | root = doc.createElement('Events') 226 | doc.appendChild(root) 227 | counter=0 # Event counter 228 | #print(eventList.items()) #[OK] 229 | print("[+] Searching for TargetUsers/Hosts, SourceUsers/Hosts, RemoteHosts/Users, TargetHosts/Users ...") 230 | 231 | if len(eventList.items()) > 0: 232 | 233 | for key, value in eventList.items(): 234 | 235 | t={} #This dictionary Holds the properties of every event. 236 | #Unpacking the List -> Dict Event's keys and values 237 | for eventValue in value: # Value holds the Event data, Keys and Values in Dict format {'EventID':'4624'} 238 | #https://stackoverflow.com/questions/54488095/python-3-dictionary-key-to-a-string-and-value-to-another-string 239 | key, value = list(eventValue.items())[0] 240 | #print(value) 241 | # Add ProcessID and ThreadID from the Execution tag. 242 | if key == "Execution": 243 | for k,v in value.items(): 244 | t.update({k:v}) 245 | 246 | 247 | # Unpack the 'Data' part of Event and update the 'Event' node. 248 | if key == "Data": 249 | t.update(value) 250 | 251 | #if tag exists dictionary of the Event then append the inside 252 | 253 | if "Data" in t: 254 | t["Data"].append(value) 255 | 256 | #If tag non-exist on the dict then created but in this format 257 | #e.g. {'Name':'PowerShell','Data':['log1','log2' etc.]} 258 | 259 | elif key == "Data": 260 | t["Data"]=[] 261 | t["Data"].append(value) 262 | 263 | 264 | #Otherwise, just update the dictionary 265 | else: 266 | t.update(eventValue) 267 | 268 | ####################################REMOTE HOSTS###################################################### 269 | #Extract remote IPs from Event, 270 | # if IP source field does not exist then extact from the 'TargetServerName', 271 | # if 'TargetServerName' does not exist then extract from 'Computer' tag. 272 | try: 273 | if t.get("IpAddress") and (t.get("IpAddress") in lhostIPs): 274 | if t.get("Workstation") and (t.get("Workstation") not in lhostIPs): 275 | remoteHost = t.get("Workstation") 276 | t.update({'remoteHost':regExIP(remoteHost.lower())}) 277 | elif t.get("Computer") and (t.get("Computer") not in lhostIPs): 278 | remoteHost = t.get("Computer") 279 | t.update({'remoteHost':regExIP(remoteHost.lower())}) 280 | else: 281 | print("[-] Event ID %s with Record ID %s does not have a remoteHost." % (t.get("EventID"),t.get("EventRecordID"))) 282 | 283 | elif t.get("IpAddress") : #and (t.get("IpAddress") not in lhostIPs) 284 | remoteHost = t.get("IpAddress") 285 | t.update({'remoteHost':regExIP(remoteHost.lower())}) 286 | 287 | 288 | #if Sysmon File is provided, then "SourceIp" is the correct tag. 289 | elif t.get("SourceIp") and (t.get("SourceIp") not in lhostIPs): 290 | remoteHost = t.get("SourceIp") 291 | t.update({'remoteHost':regExIP(remoteHost.lower())}) 292 | 293 | 294 | else: 295 | remoteHost = t.get("Computer") #t.get("IpAddress") 296 | t.update({'remoteHost':regExIP(remoteHost.lower())}) 297 | 298 | if t.get("SourceHostname"): 299 | remoteSourceHostname = t.get("SourceHostname") 300 | t.update({'remoteHostname':remoteSourceHostname.lower()}) 301 | else: 302 | t.update({'remoteHostname':regExIP(remoteHost.lower())}) 303 | 304 | except TypeError as te: 305 | print("[!] Something went wrong to `remoteHost` clause.") 306 | print(te) 307 | 308 | 309 | #print(remoteHost) 310 | 311 | ########################################END - REMOTE HOSTS#################################################### 312 | 313 | ###############################MESSAGE TAG########################################################### 314 | #Get values from the following keys inside from tag. 315 | #Error Code, Impersonation Level, Restricted Admin Mode, Virtual Account, Elevated Token 316 | '''if t.get("Message"): 317 | f = t.get("Message") 318 | if (re.findall('Error Code:',f)): 319 | ErrorCode = re.findall('Error Code:\s+[\w+-]*',f)[0].split(":")[1].strip() 320 | t.update({'ErrorCode':ErrorCode}) 321 | 322 | if (re.findall('Impersonation Level:',f)): 323 | ImpersonationLevel = re.findall('Impersonation Level:\s+[\w+-]*',f)[0].split(":")[1].strip() 324 | t.update({'ImpersonationLevelTranslate':ImpersonationLevel}) 325 | 326 | if(re.findall('Restricted Admin Mode:',f)): 327 | RestrictedAdminMode = re.findall('Restricted Admin Mode:\s+[\w+-]*',f)[0].split(":")[1].strip() 328 | t.update({'RestrictedAdminMode':RestrictedAdminMode}) 329 | 330 | if (re.findall('Virtual Account:',f)): 331 | VirtualAccount = re.findall('Virtual Account:\s+[\w+-]*',f)[0].split(":")[1].strip() 332 | t.update({'VirtualAccount':VirtualAccount}) 333 | 334 | if (re.findall('Elevated Token:',f)): 335 | ElevatedToken = re.findall('Elevated Token:\s+[\w+-]*',f)[0].split(":")[1].strip() 336 | t.update({'ElevatedToken':ElevatedToken})''' 337 | #else: 338 | # print("[-] Couldn't find tag on Event ID %s with EventRecordID %s." % (t.get("EventID"),t.get("EventRecordID"))) 339 | 340 | ##################################END - MESSAGE TAG################################################################### 341 | 342 | if (t.get("EventID") not in ["4100","4103","4104","400","403","500","501","600","800"] and not "powershell" in t.get("Channel")): # Not In Powershell Events 343 | 344 | 345 | try: 346 | if t.get("TargetUserName"): 347 | targetUser = t.get("TargetUserName") 348 | elif t.get("TargetName"): 349 | targetUser = t.get("TargetName") 350 | if re.findall('=[a-zA-Z0-9@./]+',str(targetUser)): 351 | targetUser = re.findall('=[a-zA-Z0-9@./]+',str(targetUser)) 352 | targetUser = ''.join(targetUser) 353 | targetUser = targetUser.split("=")[1].strip() 354 | else: 355 | targetUser = t.get("TargetName") 356 | elif t.get("SubjectUserName"): 357 | targetUser = t.get("SubjectUserName") 358 | # if Sysmon File is provided, then "User" is the correct tag. 359 | elif t.get("User"): 360 | targetUser = t.get("User") 361 | elif t.get("Detection User"): 362 | targetUser = t.get("Detection User") 363 | # if Sysmon File is provided, then "UserID" is the correct tag. 364 | elif t.get("UserID"): 365 | targetUser = t.get("UserID") 366 | elif t.get("Computer"): 367 | targetUser = t.get("Computer") 368 | except TypeError as te: 369 | print(te) 370 | 371 | # If everything goes well then Update/Add the targetUser property to the Event. 372 | if targetUser not in bListedUsers: 373 | t.update({'targetUser':targetUser}) 374 | else: 375 | print("[-] Event ID %s with Record ID %s discarded because the TargetUser %s is into the bListedUsers list." % (t.get("EventID"),t.get("EventRecordID"),targetUser)) 376 | 377 | 378 | # PowerShell logging cheatsheet: https://static1.squarespace.com/static/552092d5e4b0661088167e5c/t/5760096ecf80a129e0b17634/1465911664070/Windows+PowerShell+Logging+Cheat+Sheet+ver+June+2016+v2.pdf 379 | elif t.get("EventID") in ["4100","4103","4104","400","403","500","501","600","800"]: 380 | 381 | if t.get("Data"): 382 | eventData = t.get("Data") 383 | 384 | 385 | try: 386 | 387 | #Check if the word "User=" or "UserId=" etc. exists inside the tag 388 | # Before search unpack the Event data which are List format. 389 | for eventX in eventData: 390 | if eventX != None: 391 | try: 392 | # Try find usernames on Description part of the Event e.g 4103,4104,800 393 | if eventX.get("ContextInfo"): 394 | if re.findall('Use[rId|rID|r]+.=.[a-zA-Z0-9]+.\w+.',str(eventX.get("ContextInfo"))): 395 | 396 | targetUser = re.findall('Use[rId|rID|r]+.=.\w+.[\w+]+[^\s]\w+.',str(eventX.get("ContextInfo"))) 397 | targetUser=targetUser[0] 398 | targetUser = ''.join(targetUser) 399 | targetUser = targetUser.split("=")[1].strip() 400 | 401 | if targetUser in bListedUsers: 402 | print("[-] Event ID %s with Record ID %s discarded because the TargetUser %s is into the bListedUsers list." % (t.get("EventID"),t.get("EventRecordID"),targetUser)) 403 | 404 | else: 405 | targetUser=re.findall('[^\s]+',targetUser.lower()) 406 | targetUser=targetUser[0] 407 | targetUser=''.join(targetUser) 408 | t.update({'targetUser':targetUser}) 409 | 410 | 411 | #If ContextInfo exist as well as the UserID. 412 | elif t.get("UserID"): 413 | targetUser=t.get("UserID") 414 | t.update({'targetUser':targetUser}) 415 | 416 | else: 417 | #Some PowerShell events doesn't have the UserId property. 418 | #In this case, use a generic user, which is called `PSGenericUser` 419 | #Check if targeUser key hasn't already set. 420 | targetUser = "PSGenericUser" 421 | t.update({'targetUser':targetUser}) 422 | 423 | 424 | if not eventX.get("ContextInfo") and t.get("UserID") and not t.get("targetUser"): 425 | targetUser=t.get("UserID") 426 | t.update({'targetUser':targetUser}) 427 | 428 | elif not eventX.get("ContextInfo") and t.get("UserID") and not t.get("targetUser"): 429 | targetUser = "PSGenericUser" 430 | t.update({'targetUser':targetUser}) 431 | 432 | except Exception as error: 433 | print("[-] TargetUser RegEx error! %s" % error) 434 | 435 | 436 | 437 | try: 438 | 439 | if eventX.get("ContextInfo"): 440 | if re.findall('HostApplication.*=.[\a-zA-Z0-9]+Engine',eventX.get("ContextInfo")): 441 | HostApplication = re.findall('HostApplication.*=.[\a-zA-Z0-9]+Engine',str(eventX.get("ContextInfo"))) 442 | else: 443 | HostApplication = re.findall('Host Application.*=.[\a-zA-Z0-9]+Engine',str(eventX.get("ContextInfo"))) 444 | 445 | if HostApplication: 446 | HostApplication = ' '.join(HostApplication) 447 | HostApplication = HostApplication.replace("Engine","").strip() 448 | HostApplication = HostApplication.split("=")[1].strip() 449 | t.update({'HostApplication':HostApplication}) 450 | 451 | except Exception as error: 452 | print("[-] HostApplication RegEx error! %s" % error) 453 | 454 | try: 455 | if eventX.get("ContextInfo"): 456 | if re.findall('ScriptName.*=.[\a-zA-Z0-9]+Command',eventX.get("ContextInfo")): 457 | ScriptName = re.findall('ScriptName.*=.[\a-zA-Z0-9]+Command',str(eventX.get("ContextInfo"))) 458 | else: 459 | ScriptName = re.findall('Script Name.*=.[\a-zA-Z0-9]+Command',str(eventX.get("ContextInfo"))) 460 | 461 | if ScriptName: 462 | ScriptName = ' '.join(ScriptName) 463 | ScriptName = ScriptName.replace("Command","").strip() 464 | ScriptName = ScriptName.split("=")[1].strip() 465 | t.update({'ScriptName':ScriptName}) 466 | #print(ScriptName) 467 | 468 | except Exception as error: 469 | print("[-] ScriptName RegEx error! %s" % error) 470 | 471 | try: 472 | if eventX.get("ContextInfo"): 473 | if re.findall('CommandLine.*=.[\a-zA-Z0-9]+',eventX.get("ContextInfo")): 474 | CommandLine = re.findall('CommandLine.*=.[\a-zA-Z0-9]+',str(eventX.get("ContextInfo"))) 475 | CommandLine = ' '.join(CommandLine) 476 | CommandLine = CommandLine.split("=")[1] 477 | t.update({'CommandLine':CommandLine}) 478 | #print(CommandLine) 479 | except Exception as error: 480 | print("[-] commandLine RegEx error! %s" % error) 481 | 482 | try: 483 | if eventX.get("ContextInfo"): 484 | if re.findall('CommandPath.*=.[\a-zA-Z0-9]+Sequence',eventX.get("ContextInfo")): 485 | CommandPath = re.findall('CommandPath.*=.[\a-zA-Z0-9]+Sequence',str(eventX.get("ContextInfo"))) 486 | else: 487 | CommandPath = re.findall('Command Path.*=.[\a-zA-Z0-9]+Sequence',str(eventX.get("ContextInfo"))) 488 | 489 | if CommandPath: 490 | CommandPath = ' '.join(CommandPath) 491 | CommandPath = CommandPath.replace("Sequence","").strip() 492 | CommandPath = CommandPath.split("=")[1] 493 | t.update({'CommandPath':CommandPath}) 494 | 495 | except Exception as error: 496 | print("[-] CommandPath RegEx error! %s" % error) 497 | 498 | try: 499 | if eventX: 500 | contextInfo = re.findall('Severity.*=',str(eventX.get('ContextInfo'))) 501 | if contextInfo != None: 502 | contextInfoSeverity = re.findall('Severity.*=.[a-zA-Z]+',str(contextInfo)) 503 | if contextInfoSeverity and contextInfoSeverity != None: 504 | Severity = re.findall('Severity.*=.[a-zA-Z]+',str(contextInfo)) 505 | Severity = ' '.join(Severity) 506 | Severity = Severity.split("=")[1].split(" ")[0] 507 | t.update({'Severity':Severity}) 508 | else: 509 | contextInfoSeverity="" 510 | 511 | 512 | 513 | except Exception as error: 514 | print("[-] Severity RegEx error! %s" % error) 515 | 516 | 517 | # print(t.get('EventRecordID')+"-->"+t.get('targetUser')) [OK] 518 | 519 | except Exception as error: 520 | print("[-] Something went wrong while parsing the PowerShell Events!") 521 | print("[+] Event ID: "+str(t.get("EventID"))+" with Record ID: "+str(t.get("EventRecordID"))) 522 | print(error) 523 | 524 | #print(t) 525 | 526 | 527 | else: 528 | targetUser = "NULL" 529 | print("[+] Event ID: "+str(t.get("EventID"))+" with Record ID: "+str(t.get("EventRecordID"))+" does not have targetUser tag!") 530 | 531 | 532 | ######################################################################################## 533 | #Add 'Attaking Hosts' into Neo4j 534 | # if Sysmon File is provided, then "DestinationIp" is the correct tag. 535 | if t.get("DestinationIp"): 536 | targetServer = t.get("DestinationIp") 537 | t.update({'targetServer':targetServer.lower()}) 538 | elif t.get("Computer"): 539 | targetServer = t.get("Computer") 540 | t.update({'targetServer':targetServer.lower()}) 541 | else: 542 | print("[-] Something went wrong during the 'DestinationHost' parsing! ") 543 | #print("[-] Event ID %s with Record ID %s does not have a targetServer." % (t.get("EventID"),t.get("EventRecordID"))) 544 | t.update({'name':t.get("EventID")}) 545 | ########################################################################################## 546 | 547 | counter=counter+1 #How many events added! 548 | 549 | createTagEvent=doc.createElement("Event") 550 | doc.childNodes[0].appendChild(createTagEvent) 551 | for tagName in t.keys(): #Example of t.keys(): {"EventID":"4624","Version":"1"} 552 | if tagName != "Message": #Remove tag from Exported Windows XML. Too much info :) 553 | text = str(t.get(tagName)) 554 | tag = str(tagName) 555 | createTag=doc.createElement(tag.replace(" ","")) #Remove SPACE from the Tag Name. Example: , 556 | innerTXT = doc.createTextNode(text.replace("«","")) 557 | createTag.appendChild(innerTXT) 558 | createTagEvent.appendChild(createTag) 559 | 560 | #else: 561 | # print("[-] Event ID "+str(t.get("EventID"))+" is missing.") 562 | 563 | 564 | 565 | print("[+] Creating XML for neo4j...") 566 | doc.writexml(file_handle) 567 | #doc.writexml(sys.stdout) 568 | file_handle.close() 569 | 570 | #def neo4jXML(outXMLFile,neo4jUri,neo4jUser,neo4jPass): 571 | def neo4jXML(outXMLFile,neo4jUri,neo4jUser,neo4jPass): 572 | 573 | neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass) 574 | try: 575 | #Read the created XML file with the UUID name. e.g. d1ba1cf8-0a30-42d1-ae6b-451289ca6c0d.xml 576 | neo4jDocXML = minidom.parse(outXMLFile).documentElement 577 | except Exception as e: 578 | print(e) 579 | sys.exit(1) 580 | 581 | blackListedEventProperties=[ 582 | "Opcode", 583 | "Keywords", 584 | "Version", 585 | "Level", 586 | "TransmittedServices", 587 | "KeyLength", 588 | "LmPackageName", 589 | "Key Length", 590 | "Message", 591 | "SubjectDomainName", 592 | "TicketEncryptionType", 593 | "TicketOptions", 594 | "Keywords", 595 | "Level", 596 | "KeyLength", 597 | "CertIssuerName", 598 | "CertSerialNumber", 599 | "CertThumbprint", 600 | "ObjectServer", 601 | "PreAuth Type", 602 | "TargetOutboundDomainName", 603 | "FWLink", 604 | "Unused", 605 | "Unused2", 606 | "Unused3", 607 | "Unused4", 608 | "Unused5", 609 | "Unused6", 610 | "OriginID", 611 | "OriginName", 612 | "ErrorCode", 613 | "TypeID", 614 | "TypeName", 615 | "StatusDescription", 616 | "AdditionalActionsID", 617 | "SubStatus", 618 | "Product" 619 | ] 620 | 621 | counter=0 622 | groupEvents=[] #Example [{ EventId: "4624",targetUser:"tasos"},{EventId: "4625", targetUser: "tzonis"}] 623 | 624 | try: 625 | 626 | for eventTagNode in neo4jDocXML.childNodes: 627 | dictionaryEvents=dict() # {EventId: "4624",targetUser:"tasos"},{EventId: "4625", targetUser: "tzonis"} 628 | if eventTagNode.childNodes: 629 | #print(eventTagNode.childNodes) #[OK] 630 | for eventTags in eventTagNode.childNodes: 631 | if (eventTags.nodeName not in blackListedEventProperties): 632 | for eventValues in eventTags.childNodes: 633 | #print(eventTags.nodeName,eventValues.nodeValue) 634 | dictionaryEvents.update({eventTags.nodeName:eventValues.nodeValue}) 635 | #print("-------------------------") 636 | groupEvents.append(dictionaryEvents) 637 | 638 | #print(groupEvents) #[OK] 639 | 640 | print("[+] Adding the Events ...") 641 | with neo4jDriver.session() as session: 642 | print("\n") 643 | print("=========Time Frame=========") 644 | total_time = 0 645 | start = time.time() 646 | # Create Neo4j Nodes 647 | insertEvents = session.run( 648 | "UNWIND $events as eventPros " 649 | 650 | "CREATE (e:Event) " 651 | "SET e=eventPros " 652 | "SET (CASE WHEN EXISTS(e.SubjectUserName) AND NOT EXISTS(e.TargetUserName) THEN e END).hasSubjectUser='false' " 653 | "SET (CASE WHEN EXISTS(e.SubjectUserName) AND EXISTS(e.TargetUserName) THEN e END).hasSubjectUser='true' " 654 | "SET (CASE WHEN EXISTS(e.SubjectUserName) AND EXISTS(e.TargetName) THEN e END).hasSubjectUser='true' " 655 | "SET (CASE WHEN NOT EXISTS(e.SubjectUserName) AND EXISTS(e.TargetUserName) THEN e END).hasSubjectUser='false' " 656 | #Example: PowerShell Events. 657 | "SET (CASE WHEN NOT EXISTS(e.SubjectUserName) AND NOT EXISTS(e.TargetUserName) THEN e END).hasSubjectUser='false' " 658 | "WITH e WHERE e.targetUser IS NOT NULL " #Avoid erros when targetUser is blacklisted and it's name will be NULL. 659 | 660 | #"MERGE (e:Event {EventRecordIDs:eventPros.EventRecordID}) SET e=eventPros " #Avoid dublicate Events with MERGE and filtering. 661 | "MERGE (r:RemoteHosts {name:e.remoteHost,remoteHostname:e.remoteHostname}) " 662 | 663 | "MERGE (u:TargetUser {name:e.targetUser,remoteHost:e.remoteHost,targetServer:e.targetServer,hasSubjectUser:e.hasSubjectUser,EventRecordIDs: [ ]}) " 664 | "SET u.EventRecordIDs=u.EventRecordIDs+e.EventRecordID " #Append the EventRecordIDs 665 | "SET u.SubjectUsernames=[ ] " 666 | "SET u.bindSubjectUserSids=[ ] " 667 | 668 | "MERGE (t:TargetHost {name:e.targetServer}) ",events=groupEvents) 669 | total_time += time.time() - start 670 | print("[1] Neo4j insertEvents query: %f " %(total_time)) 671 | 672 | ###########################################Subject Users ############################################################### 673 | 674 | with neo4jDriver.session() as session: 675 | total_time = 0 676 | start = time.time() 677 | # Create 'SubjectUser' Node - Initialization 678 | createSubjectUsers=session.run( 679 | 680 | "MATCH (e:Event) " 681 | #"WHERE EXISTS(e.SubjectUserName) AND EXISTS(e.TargetUserName) " 682 | "WHERE e.hasSubjectUser='true' " 683 | "WITH collect(e.SubjectUserName) as SubjectUserNames,e " 684 | "UNWIND SubjectUserNames as SubjectUserName " 685 | "FOREACH(p in SubjectUserName | MERGE (s:SubjectUser {name:p,SubjectUserRealName:p,TargetUsernames: [ ],EventRecordIDs: [ ],bindTargetUserSids: [ ],IsSubjectUser:'true',remoteHost:e.remoteHost,targetServer:e.targetServer,hasTargetUsernameTag:'true',hasSubjectUsernameTag:'true'}) " 686 | "SET s.IsCreated='true' " 687 | "SET s.IsSubjectUser='true' " 688 | "SET s.CreatedByEventRecordID=e.EventRecordID) " 689 | 690 | ) 691 | 692 | total_time += time.time() - start 693 | print("[2] Neo4j createSubjectUsersNode query: %f " %(total_time)) 694 | 695 | 696 | with neo4jDriver.session() as session: 697 | total_time = 0 698 | start = time.time() 699 | # Update 'SubjectUser' node. 700 | UpdateSubjectUsers = session.run( 701 | "MATCH (e:Event),(u:TargetUser),(s:SubjectUser) " 702 | "WHERE s.name=e.SubjectUserName " 703 | "AND u.name=e.targetUser " 704 | "AND u.remoteHost=e.remoteHost " 705 | "AND u.targetServer=e.targetServer " 706 | "AND s.remoteHost=u.remoteHost " 707 | "AND s.targetServer=u.targetServer " 708 | "AND s.remoteHost=e.remoteHost " 709 | "AND s.targetServer=e.targetServer " 710 | "AND EXISTS(e.SubjectUserName) AND e.SubjectUserName IS NOT NULL " 711 | "AND ((EXISTS(e.TargetUserName) AND e.TargetUserName IS NOT NULL) OR (EXISTS(e.TargetName) AND e.TargetName IS NOT NULL)) " 712 | "SET s.EventRecordIDs=[e.EventRecordID] " #Adding the first matched EventRecordID. On the FOREACH part is adding the rest. 713 | "WITH collect(e.SubjectUserName) as subjectUsernames, e " 714 | "UNWIND subjectUsernames AS subjectUsername " 715 | "FOREACH(p IN subjectUsername | MERGE (b:SubjectUser {name:p,remoteHost:e.remoteHost,targetServer:e.targetServer}) " 716 | "SET b.IsSubjectUser='true' " 717 | "SET (CASE WHEN NOT e.EventRecordID IN b.EventRecordIDs THEN b END).EventRecordIDs=b.EventRecordIDs+e.EventRecordID " 718 | "SET (CASE WHEN NOT e.targetUser IN b.TargetUsernames THEN b END).TargetUsernames=b.TargetUsernames+e.targetUser " 719 | "SET (CASE WHEN NOT e.TargetUserSid IN b.bindTargetUserSids THEN b END).bindTargetUserSids=b.bindTargetUserSids+e.TargetUserSid " 720 | "SET b.SubjectUserRealName=e.SubjectUserName)" 721 | ) 722 | 723 | deleteDublicateSubjectUsers= session.run( 724 | 725 | "MATCH (s:SubjectUser) " 726 | "WITH collect(s) as nodes,s.EventRecordIDs as evIDs,s.remoteHost as remoteHost,s.targetServer as targetServer,s.TargetUsernames as targetUserNames " 727 | "WHERE s.EventRecordIDs=evIDs " 728 | "AND s.remoteHost=remoteHost " 729 | "AND s.targetServer=targetServer " 730 | "AND s.TargetUsernames=targetUserNames " 731 | "AND size(nodes)>1 " 732 | "UNWIND nodes[1..] as node " 733 | "DETACH DELETE node" 734 | 735 | ) 736 | total_time += time.time() - start 737 | print("[3] Neo4j updateSubjectUserNode query: %f " %(total_time)) 738 | 739 | 740 | 741 | ############################### Target Users ########################################## 742 | with neo4jDriver.session() as session: 743 | total_time = 0 744 | start = time.time() 745 | updateTargetUserNode = session.run( 746 | 747 | "MATCH (s:SubjectUser),(t:TargetUser) " 748 | "WHERE t.hasSubjectUser='true' " 749 | "WITH s.EventRecordIDs as subjectUserEventRecordIDs,t.EventRecordIDs as targetUserEventRecordIDs,t,s " 750 | "UNWIND subjectUserEventRecordIDs AS subjectUserEventRecordID " 751 | "FOREACH(p IN subjectUserEventRecordID | " 752 | "SET (CASE WHEN subjectUserEventRecordID IN targetUserEventRecordIDs THEN t END).SubjectUsernames=s.name)" 753 | ) 754 | total_time += time.time() - start 755 | print("[4] Neo4j updateTargetUserNode query %f: " %(total_time)) 756 | 757 | 758 | ###################################################Relationships###################################################### 759 | 760 | with neo4jDriver.session() as session: 761 | total_time = 0 762 | start = time.time() 763 | # Check if Event node has the 'SubjectUserName'. If yes, then the relationship is: 764 | # IsSubjectTarget = Means that Event contains 'SubjectUserName' property but has the same value with 'targetUsername' 765 | # RemoteHost -> User -> TargetUser -> EventID -> targetServer 766 | # allInOnerelationship = session.run("MATCH (u:TargetUser),(u2:TargetUser),(e:Event),(r:RemoteHosts),(t:TargetHost) WHERE u.name IN u2.subjectUsernames AND e.EventRecordID IN u.EventRecordIDs AND e.EventRecordID IN u2.EventRecordIDs AND u.name = e.SubjectUserName AND u.remoteHost = r.name AND u.IsSubjectUser = 'true' AND u.IsTargetUser IS NULL AND t.name = u2.targetServer MERGE (r)-[r1:RemoteHostTOSubjectUsername]-(u)-[r2:SubjectUsernameTOTargetuser]-(u2)-[r3:TargetUserTOEventID]-(e)-[r4:EventIDTOtargetHost]->(t)") # WITH collect(r1)[1..] as rels, collect(r2)[1..] as rels2 FOREACH (r1 in rels | DELETE r1) FOREACH (r2 in rels2 | DELETE r2) 767 | SubjectUserTargetUserRelationship1 = session.run( 768 | 769 | "MATCH (r:RemoteHosts),(t:TargetUser),(s:SubjectUser),(th:TargetHost),(e:Event) " 770 | "WHERE t.hasSubjectUser='true' " 771 | "AND e.remoteHost=r.name " 772 | "AND s.remoteHost=r.name " 773 | "AND t.remoteHost=s.remoteHost " 774 | "AND s.name IN t.SubjectUsernames " 775 | "AND t.targetServer=s.targetServer " 776 | "AND e.hasSubjectUser='true' " 777 | "AND e.EventRecordID IN s.EventRecordIDs " 778 | "MERGE (r)-[r1:RemoteHostTOSubjectUsername]-(s)-[r2:SubjectUsernameTOTargetuser]->(t)" 779 | 780 | ) 781 | total_time += time.time() - start 782 | print("[5] Neo4j SubjectUserTargetUserRelationship1 query: %f " %(total_time)) 783 | 784 | with neo4jDriver.session() as session: 785 | total_time = 0 786 | start = time.time() 787 | SubjectUserTargetUserRelationship2 = session.run( 788 | 789 | "MATCH (t:TargetUser),(e:Event),(th:TargetHost) " 790 | "WHERE t.hasSubjectUser='true' " 791 | "AND t.targetServer=e.targetServer " 792 | "AND t.remoteHost=e.remoteHost " 793 | "AND e.EventRecordID IN t.EventRecordIDs " 794 | "AND e.targetServer=th.name " 795 | "AND e.hasSubjectUser='true' " 796 | "MERGE (t)-[r3:TargetUserTOEvent]-(e)-[r4:EventIDTOtargetHost]->(th)" 797 | 798 | ) 799 | total_time += time.time() - start 800 | print("[6] Neo4j SubjectUserTargetUserRelationship2 query: %f " %(total_time)) 801 | 802 | with neo4jDriver.session() as session: 803 | total_time = 0 804 | start = time.time() 805 | #allInOnerelationship = session.run("MATCH (t:TargetUser),(th:TargetHost),(e:Event) WHERE e.targetUser=t.TargetRealName AND t.targetServer=th.name AND e.targetServer=th.name MERGE (t)-[m1:test1]-(e)-[m2:test2]->(th)") 806 | #deleteDublicates_AllInOnerelationship = session.run("MATCH (r:RemoteHosts)-[r1]-(t:SubjectUser)-[r2]->(s:TargetUser) with r,t,s,type(r1) as typ, tail(collect(r1)) as coll foreach(x in coll | delete x)") 807 | # Create relationships only for Users that NOT contains 'SubjectUserName' 808 | remoteHost2DomUserRelationship=session.run( 809 | 810 | "MATCH (r:RemoteHosts),(u:TargetUser),(e:Event) " 811 | "WHERE u.remoteHost = r.name " 812 | "AND e.hasSubjectUser='false' " 813 | "AND u.hasSubjectUser='false' " 814 | "AND e.EventRecordID IN u.EventRecordIDs " 815 | "MERGE (r)-[r5:Source2TargetUser]->(u)" 816 | ) 817 | total_time += time.time() - start 818 | print("[7] Neo4j remoteHost2DomUserRelationship query: %f " %(total_time)) 819 | 820 | with neo4jDriver.session() as session: 821 | total_time = 0 822 | start = time.time() 823 | targetUser2EventRelationship = session.run( 824 | 825 | "MATCH (u:TargetUser),(e:Event),(t:TargetHost) " 826 | "WHERE e.targetUser=u.name " 827 | "AND t.name=e.targetServer " 828 | "AND u.targetServer=t.name " 829 | "AND e.EventRecordID IN u.EventRecordIDs " 830 | "AND u.hasSubjectUser='false' " 831 | "MERGE (u)-[r7:TargetUser2Event]-(e)-[r8:Event2TargetHost]->(t)" 832 | ) 833 | 834 | total_time += time.time() - start 835 | print("[8] Neo4j targetUser2EventRelationship query: %f " %(total_time)) 836 | 837 | ############################################END########################################################################### 838 | 839 | 840 | except Exception as e: 841 | print(e) 842 | 843 | #Close the connection with Neo4j 844 | print("[+] All queries pushed to Neo4j successfully") 845 | neo4jDriver.close() 846 | print("[+] Connection with Neo4j is closed.") 847 | 848 | 849 | def eventCounters(neo4jUri,neo4jUser,neo4jPass): 850 | neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass) #Call the function 851 | #Count Events 852 | #with neo4jDriver.session() as session: 853 | k=neo4jDriver.session().run("MATCH (n:Event) RETURN count(n)") 854 | countEvents = 0 855 | for x in k: 856 | print("[+] Added Events:"+str(x.value())) 857 | countEvents = int(x.value()) 858 | #Count RemoteHosts 859 | #with neo4jDriver.session() as session: 860 | k=neo4jDriver.session().run("MATCH (n:RemoteHosts) RETURN count(n)") 861 | countRemHosts = 0 862 | for x in k: 863 | print ("[+] Added RemoteHosts:"+str(x.value())) 864 | countRemHosts = int(x.value()) 865 | 866 | #Count TargetHosts 867 | #with neo4jDriver.session() as session: 868 | k=neo4jDriver.session().run("MATCH (n:TargetHost) RETURN count(n)") 869 | countTargetHosts = 0 870 | for x in k: 871 | print ("[+] Added TargetHosts:"+str(x.value())) 872 | countTargetHosts = int(x.value()) 873 | 874 | #Count TargetUsers 875 | #with neo4jDriver.session() as session: 876 | k=neo4jDriver.session().run("MATCH (n:TargetUser) RETURN count(n)") 877 | countTargetUsers = 0 878 | for x in k: 879 | print ("[+] Added TargetUsers:"+str(x.value())) 880 | countTargetUsers = int(x.value()) 881 | 882 | #Count SubjectUsers 883 | #with neo4jDriver.session() as session: 884 | k=neo4jDriver.session().run("MATCH (n:SubjectUser) RETURN count(n)") 885 | countTargetUsers = 0 886 | for x in k: 887 | print ("[+] Added SubjectUsers:"+str(x.value())) 888 | countSubjectUsers = int(x.value()) 889 | 890 | #Count Relatioships 891 | #with neo4jDriver.session() as session: 892 | k=neo4jDriver.session().run("MATCH p=()-->() RETURN count(p)") 893 | countRel = 0 894 | for x in k: 895 | print ("[+] Added Relationships:"+str(x.value())) 896 | countRel = int(x.value()) 897 | 898 | print ("[+] Total: "+str(countEvents+countRemHosts+countRel+countTargetHosts+countTargetUsers+countSubjectUsers)) 899 | print ('[+] Finished: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now())) 900 | 901 | #Close the connection with Neo4j 902 | neo4jDriver.close() 903 | 904 | 905 | if __name__ == '__main__': 906 | 907 | parser = argparse.ArgumentParser(description='Filter the Windows Events file.') 908 | parser.add_argument('-e','--eventID',help="EventID filtering",nargs='?',type=str, default=[]) 909 | parser.add_argument('-ev', '--events',help='Windows Events in XML OR EVTX format.') 910 | parser.add_argument('-i','--uri',help='neo4j host. Example: bolt://localhost',required=True) 911 | parser.add_argument('-D','--delete',help='Delete all data from Neo4j.',action='store_true') 912 | parser.add_argument('-u','--user',help='neo4j username.',required=True) 913 | parser.add_argument('-p','--passwd',help='neo4j password.',required=True) 914 | args = parser.parse_args() 915 | eventIDs=args.eventID 916 | neo4jUri=args.uri 917 | neo4jUser=args.user 918 | neo4jPass=args.passwd 919 | eventsFile = args.events 920 | delData = args.delete 921 | 922 | outXMLFileArray=[] 923 | 924 | 925 | def parsingFunction(fileName,xmlDoc,outXMLfile): 926 | 927 | #Parse Windows Event XML File - Process 1 928 | parl=multiprocessing.Lock() 929 | parl.acquire() 930 | print("[+] Parsing file %s " % fileName) 931 | print ('[+] Parsing Started: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now())) 932 | evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList = eventParser(eventIDs,xmlDoc) 933 | print ('[+] Parsing Finished: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now())) 934 | parl.release() 935 | 936 | #Create neo4j XML - Process 2 937 | nl = multiprocessing.Lock() 938 | nl.acquire() 939 | cnodes = Process(target=createXML, args=(evIDs,lhostIPs,bListedUsers,bListedShareFolders,eventList,outXMLfile)) 940 | cnodes.start() 941 | cnodes.join() 942 | nl.release() 943 | 944 | #Read neo4j XML - Process 3 945 | ml = multiprocessing.Lock() 946 | ml.acquire() 947 | mnodes = Process(target=neo4jXML,args=(str(outXMLfile),neo4jUri,neo4jUser,neo4jPass)) 948 | print("[+] Loading neo4j XML ...") 949 | mnodes.start() 950 | mnodes.join() 951 | ml.release() 952 | 953 | def generateOutXMLFileRandomName(providedPathFile): 954 | 955 | #Output directory of parsing file. It will be on same path with the running Python script. 956 | #Get the directory of the files that are listing under provided path. 957 | #cwd = os.path.dirname(providedPathFile) 958 | 959 | if os.path.isfile(providedPathFile) and providedPathFile.endswith((".xml",".evtx")): 960 | # Directory of the file 961 | cwd = os.path.dirname(providedPathFile) 962 | #Create an XML file with random number 963 | randomName = str(uuid.uuid4()) + ".xml" 964 | #Generates a random number that will be used on later steps. 965 | outXMLFile = cwd + "\\" + randomName 966 | else: 967 | #Is Directory 968 | cwd = Path(providedPathFile) 969 | #Create an XML file with random number 970 | randomName = str(uuid.uuid4()) + ".xml" 971 | #Generates a random number that will be used on later steps. 972 | outXMLFile = cwd / randomName 973 | 974 | return outXMLFile 975 | 976 | 977 | if(delData): 978 | neo4jDriver=neo4jConn(neo4jUri,neo4jUser,neo4jPass) 979 | print("[+] Connecting with neo4j ...") 980 | print("[+] Deleting all the data ...") 981 | with neo4jDriver.session() as session: 982 | delAll=session.run("MATCH (n) DETACH DELETE n") 983 | #Close the connection with Neo4j 984 | neo4jDriver.close() 985 | 986 | else: 987 | try: 988 | 989 | # Check first if the user provided PATH or FILE and if it is exist. 990 | 991 | if os.path.isdir(eventsFile): 992 | 993 | #Enumerate the files under the specified directory. 994 | eventsFolder = Path(eventsFile) 995 | 996 | dirFiles = os.listdir(eventsFolder) 997 | 998 | for file in dirFiles: 999 | 1000 | fileFullPath = eventsFolder / file 1001 | if os.path.isfile(fileFullPath) and file.endswith('.evtx'): 1002 | 1003 | #Get the file which all the events will be imported befored moved to neo4j. 1004 | # It just an empty file which will be filled in with Events 1005 | #print(fileFullPath) #OK 1006 | outXMLFile = generateOutXMLFileRandomName(eventsFolder) 1007 | 1008 | # Read the contents of the EVTX file. 1009 | evtxDoc = get_events(fileFullPath) 1010 | 1011 | # Create an XML file with the same name as EVTX 1012 | #evtx2xml = str(file).replace(".evtx", ".xml") 1013 | evtx2xml = str(fileFullPath).replace(".evtx", ".xml") 1014 | print ('[+] Started: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now())) 1015 | print("[+] I'm fixing the fualty chars, I need sometime for that ...") 1016 | print("\n") 1017 | f = open(evtx2xml, "w") 1018 | f.write("") 1019 | f.write("\n") 1020 | f.write("") 1021 | for x in evtxDoc: 1022 | #discard the unicode chars 1023 | if re.findall('&#\d+;',str(x)): 1024 | f.write(re.sub(r'&#\d+;',r'',x)) 1025 | else: 1026 | f.write(x) 1027 | f.write("") 1028 | f.close() 1029 | rootDoc = minidom.parse(evtx2xml).documentElement 1030 | print("\n") 1031 | 1032 | rootDoc = minidom.parse(evtx2xml).documentElement 1033 | parsingFunction(evtx2xml,rootDoc,outXMLFile) 1034 | print("\n") 1035 | 1036 | # Remove temp files 1037 | os.remove(outXMLFile) 1038 | os.remove(evtx2xml) 1039 | #os.remove(file) 1040 | 1041 | if os.path.isfile(fileFullPath) and file.endswith('.xml') and not file.endswith('_fixed.xml'): 1042 | #Get the file which all the events will be imported befored moved to neo4j. 1043 | # It just an empty file which will be filled in with Events 1044 | #print(fileFullPath) #OK 1045 | outXMLFile = generateOutXMLFileRandomName(eventsFolder) 1046 | 1047 | #Open exported XML and remove those chars - Step 1 1048 | openXMLread=open(fileFullPath,"r",encoding="utf-8") 1049 | fixChars=re.sub(r"", r"", openXMLread.read()) #When Events exported from Windows Event Viewer has those bad chars inside the XML. 1050 | fixChars=re.sub(r'&#\d+;',r'',fixChars) # Clean the Unicode chars. 1051 | # https://stackoverflow.com/questions/51710082/what-does-unicodedata-normalize-do-in-python 1052 | # https://godatadriven.com/blog/handling-encoding-issues-with-unicode-normalisation-in-python/ 1053 | fixChars=unicodedata.normalize("NFKD", fixChars).encode('WINDOWS-1252', 'ignore').decode('utf-8') 1054 | openXMLread.close() 1055 | 1056 | #Write again the XML without those chars -Step 2 1057 | file=str(fileFullPath).replace(".xml","_fixed.xml") 1058 | openXMLwrite=open(file,"w") 1059 | openXMLwrite.write(fixChars) 1060 | openXMLwrite.close() 1061 | 1062 | rootDoc = minidom.parse(file).documentElement #Open exported XML file. 1063 | 1064 | parsingFunction(file,rootDoc,outXMLFile) 1065 | print("\n") 1066 | 1067 | # Remove temp files 1068 | os.remove(outXMLFile) 1069 | os.remove(file) 1070 | 1071 | # User provided a file and not a directory. 1072 | else: 1073 | # Get the file name from -ev flag 1074 | file = eventsFile 1075 | # Get directory of the EVTX file. 1076 | #cwd = os.path.dirname(file) 1077 | if file.endswith('.evtx'): 1078 | 1079 | #Get the file which all the events will be imported befored moved to neo4j 1080 | outXMLFile = generateOutXMLFileRandomName(file) 1081 | # Read the contents of the EVTX file. 1082 | evtxDoc = get_events(file) 1083 | 1084 | # Create an XML file with the same name as EVTX 1085 | evtx2xml = str(file).replace(".evtx", ".xml") 1086 | print ('[+] Started: {:%d-%m-%Y %H:%M:%S}'.format(datetime.datetime.now())) 1087 | print("[+] I'm fixing the fualty chars, I need sometime for that ...") 1088 | print("\n") 1089 | f = open(evtx2xml, "w") 1090 | f.write("") 1091 | f.write("\n") 1092 | f.write("") 1093 | for x in evtxDoc: 1094 | #discard the unicode chars 1095 | if re.findall('&#\d+;',str(x)): 1096 | f.write(re.sub(r'&#\d+;',r'',x)) 1097 | else: 1098 | f.write(x) 1099 | f.write("") 1100 | f.close() 1101 | 1102 | rootDoc = minidom.parse(evtx2xml).documentElement 1103 | parsingFunction(evtx2xml,rootDoc,outXMLFile) 1104 | print("\n") 1105 | 1106 | # Remove temp files 1107 | os.remove(outXMLFile) 1108 | os.remove(evtx2xml) 1109 | #os.remove(file) 1110 | 1111 | elif file.endswith('.xml'): 1112 | 1113 | # Get the file which all the events will be imported befored moved to neo4j. 1114 | # It just an empty file which will be filled in with Events 1115 | outXMLFile = generateOutXMLFileRandomName(file) 1116 | #outXMLFileArray.append(outXMLFile) 1117 | 1118 | #Open exported XML and remove those chars 1119 | openXMLread=open(file,"r",encoding="utf-8") 1120 | fixChars=re.sub(r"", r"", openXMLread.read()) #When Events exported from Windows Event Viewer has those bad chars inside the XML. 1121 | fixChars=re.sub(r'&#\d+;',r'',fixChars) 1122 | # https://stackoverflow.com/questions/51710082/what-does-unicodedata-normalize-do-in-python 1123 | # https://godatadriven.com/blog/handling-encoding-issues-with-unicode-normalisation-in-python/ 1124 | fixChars=unicodedata.normalize("NFKD", fixChars).encode('WINDOWS-1252', 'ignore').decode('UTF-8') 1125 | openXMLread.close() 1126 | 1127 | #Write again the XML without those chars 1128 | file=file.replace(".xml","_fixed.xml") 1129 | openXMLwrite=open(file,"w") 1130 | openXMLwrite.write(fixChars) 1131 | openXMLwrite.close() 1132 | rootDoc = minidom.parse(file).documentElement #Open exported XML file. 1133 | parsingFunction(file,rootDoc,outXMLFile) 1134 | print("\n") 1135 | 1136 | # Remove temp files 1137 | os.remove(outXMLFile) 1138 | os.remove(file) 1139 | 1140 | else: 1141 | print("[!] Provide an XML or EVTX file! ") 1142 | 1143 | except Exception as e: 1144 | print(e) 1145 | #print("[-] Can't find the XML file or XML is not in the right format. Use -x/--xml to provide the Windows Event XML file.") 1146 | sys.exit(1) 1147 | 1148 | 1149 | #Print Counters - Process 4 1150 | print("\n") 1151 | print("========= Database Information ==========") 1152 | cc=multiprocessing.Lock() 1153 | cc.acquire() 1154 | ccounters=Process(target=eventCounters,args=(neo4jUri,neo4jUser,neo4jPass)) 1155 | ccounters.start() 1156 | #p=eventCounters() 1157 | ccounters.join() 1158 | cc.release() 1159 | 1160 | 1161 | -------------------------------------------------------------------------------- /images/EpimitheusNeo4j.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/images/EpimitheusNeo4j.png -------------------------------------------------------------------------------- /images/addEventIDs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/images/addEventIDs.png -------------------------------------------------------------------------------- /images/blackListedObjects.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/images/blackListedObjects.png -------------------------------------------------------------------------------- /images/filename.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /images/windowsDefender.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/images/windowsDefender.png -------------------------------------------------------------------------------- /minidom/README.md: -------------------------------------------------------------------------------- 1 | #### Minidom failed to decode hex chars like \xb5 2 | 3 | If you use wevtutil or any other tool to export the Windows Events in xml form, may be these tools will fail to decode chars like "\xb5"=μ (e.g. μTorrent). As a result this oversight, minidom will also fail to parse yours xml file. 4 | 5 | ##### The portion of the code that it has this problematic behavior is the file "Lib/xml/dom/expatbuilder.py" in line 910. 6 | 7 | ![alt text](https://github.com/tasox/Epimitheus/blob/master/minidom/expatbuilderUnFixed.png) 8 | 9 | 10 | #### Fixed 11 | ![alt text](https://github.com/tasox/Epimitheus/blob/master/minidom/expatbuilderFixed.png) 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /minidom/expatbuilder.py: -------------------------------------------------------------------------------- 1 | """Facility to use the Expat parser to load a minidom instance 2 | from a string or file. 3 | 4 | This avoids all the overhead of SAX and pulldom to gain performance. 5 | """ 6 | 7 | # Warning! 8 | # 9 | # This module is tightly bound to the implementation details of the 10 | # minidom DOM and can't be used with other DOM implementations. This 11 | # is due, in part, to a lack of appropriate methods in the DOM (there is 12 | # no way to create Entity and Notation nodes via the DOM Level 2 13 | # interface), and for performance. The latter is the cause of some fairly 14 | # cryptic code. 15 | # 16 | # Performance hacks: 17 | # 18 | # - .character_data_handler() has an extra case in which continuing 19 | # data is appended to an existing Text node; this can be a 20 | # speedup since pyexpat can break up character data into multiple 21 | # callbacks even though we set the buffer_text attribute on the 22 | # parser. This also gives us the advantage that we don't need a 23 | # separate normalization pass. 24 | # 25 | # - Determining that a node exists is done using an identity comparison 26 | # with None rather than a truth test; this avoids searching for and 27 | # calling any methods on the node object if it exists. (A rather 28 | # nice speedup is achieved this way as well!) 29 | 30 | from xml.dom import xmlbuilder, minidom, Node 31 | from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE 32 | from xml.parsers import expat 33 | from xml.dom.minidom import _append_child, _set_attribute_node 34 | from xml.dom.NodeFilter import NodeFilter 35 | 36 | TEXT_NODE = Node.TEXT_NODE 37 | CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE 38 | DOCUMENT_NODE = Node.DOCUMENT_NODE 39 | 40 | FILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT 41 | FILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT 42 | FILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP 43 | FILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT 44 | 45 | theDOMImplementation = minidom.getDOMImplementation() 46 | 47 | # Expat typename -> TypeInfo 48 | _typeinfo_map = { 49 | "CDATA": minidom.TypeInfo(None, "cdata"), 50 | "ENUM": minidom.TypeInfo(None, "enumeration"), 51 | "ENTITY": minidom.TypeInfo(None, "entity"), 52 | "ENTITIES": minidom.TypeInfo(None, "entities"), 53 | "ID": minidom.TypeInfo(None, "id"), 54 | "IDREF": minidom.TypeInfo(None, "idref"), 55 | "IDREFS": minidom.TypeInfo(None, "idrefs"), 56 | "NMTOKEN": minidom.TypeInfo(None, "nmtoken"), 57 | "NMTOKENS": minidom.TypeInfo(None, "nmtokens"), 58 | } 59 | 60 | class ElementInfo(object): 61 | __slots__ = '_attr_info', '_model', 'tagName' 62 | 63 | def __init__(self, tagName, model=None): 64 | self.tagName = tagName 65 | self._attr_info = [] 66 | self._model = model 67 | 68 | def __getstate__(self): 69 | return self._attr_info, self._model, self.tagName 70 | 71 | def __setstate__(self, state): 72 | self._attr_info, self._model, self.tagName = state 73 | 74 | def getAttributeType(self, aname): 75 | for info in self._attr_info: 76 | if info[1] == aname: 77 | t = info[-2] 78 | if t[0] == "(": 79 | return _typeinfo_map["ENUM"] 80 | else: 81 | return _typeinfo_map[info[-2]] 82 | return minidom._no_type 83 | 84 | def getAttributeTypeNS(self, namespaceURI, localName): 85 | return minidom._no_type 86 | 87 | def isElementContent(self): 88 | if self._model: 89 | type = self._model[0] 90 | return type not in (expat.model.XML_CTYPE_ANY, 91 | expat.model.XML_CTYPE_MIXED) 92 | else: 93 | return False 94 | 95 | def isEmpty(self): 96 | if self._model: 97 | return self._model[0] == expat.model.XML_CTYPE_EMPTY 98 | else: 99 | return False 100 | 101 | def isId(self, aname): 102 | for info in self._attr_info: 103 | if info[1] == aname: 104 | return info[-2] == "ID" 105 | return False 106 | 107 | def isIdNS(self, euri, ename, auri, aname): 108 | # not sure this is meaningful 109 | return self.isId((auri, aname)) 110 | 111 | def _intern(builder, s): 112 | return builder._intern_setdefault(s, s) 113 | 114 | def _parse_ns_name(builder, name): 115 | assert ' ' in name 116 | parts = name.split(' ') 117 | intern = builder._intern_setdefault 118 | if len(parts) == 3: 119 | uri, localname, prefix = parts 120 | prefix = intern(prefix, prefix) 121 | qname = "%s:%s" % (prefix, localname) 122 | qname = intern(qname, qname) 123 | localname = intern(localname, localname) 124 | elif len(parts) == 2: 125 | uri, localname = parts 126 | prefix = EMPTY_PREFIX 127 | qname = localname = intern(localname, localname) 128 | else: 129 | raise ValueError("Unsupported syntax: spaces in URIs not supported: %r" % name) 130 | return intern(uri, uri), localname, prefix, qname 131 | 132 | 133 | class ExpatBuilder: 134 | """Document builder that uses Expat to build a ParsedXML.DOM document 135 | instance.""" 136 | 137 | def __init__(self, options=None): 138 | if options is None: 139 | options = xmlbuilder.Options() 140 | self._options = options 141 | if self._options.filter is not None: 142 | self._filter = FilterVisibilityController(self._options.filter) 143 | else: 144 | self._filter = None 145 | # This *really* doesn't do anything in this case, so 146 | # override it with something fast & minimal. 147 | self._finish_start_element = id 148 | self._parser = None 149 | self.reset() 150 | 151 | def createParser(self): 152 | """Create a new parser object.""" 153 | return expat.ParserCreate() 154 | 155 | def getParser(self): 156 | """Return the parser object, creating a new one if needed.""" 157 | if not self._parser: 158 | self._parser = self.createParser() 159 | self._intern_setdefault = self._parser.intern.setdefault 160 | self._parser.buffer_text = True 161 | self._parser.ordered_attributes = True 162 | self._parser.specified_attributes = True 163 | self.install(self._parser) 164 | return self._parser 165 | 166 | def reset(self): 167 | """Free all data structures used during DOM construction.""" 168 | self.document = theDOMImplementation.createDocument( 169 | EMPTY_NAMESPACE, None, None) 170 | self.curNode = self.document 171 | self._elem_info = self.document._elem_info 172 | self._cdata = False 173 | 174 | def install(self, parser): 175 | """Install the callbacks needed to build the DOM into the parser.""" 176 | # This creates circular references! 177 | parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler 178 | parser.StartElementHandler = self.first_element_handler 179 | parser.EndElementHandler = self.end_element_handler 180 | parser.ProcessingInstructionHandler = self.pi_handler 181 | if self._options.entities: 182 | parser.EntityDeclHandler = self.entity_decl_handler 183 | parser.NotationDeclHandler = self.notation_decl_handler 184 | if self._options.comments: 185 | parser.CommentHandler = self.comment_handler 186 | if self._options.cdata_sections: 187 | parser.StartCdataSectionHandler = self.start_cdata_section_handler 188 | parser.EndCdataSectionHandler = self.end_cdata_section_handler 189 | parser.CharacterDataHandler = self.character_data_handler_cdata 190 | else: 191 | parser.CharacterDataHandler = self.character_data_handler 192 | parser.ExternalEntityRefHandler = self.external_entity_ref_handler 193 | parser.XmlDeclHandler = self.xml_decl_handler 194 | parser.ElementDeclHandler = self.element_decl_handler 195 | parser.AttlistDeclHandler = self.attlist_decl_handler 196 | 197 | def parseFile(self, file): 198 | """Parse a document from a file object, returning the document 199 | node.""" 200 | parser = self.getParser() 201 | first_buffer = True 202 | try: 203 | while 1: 204 | buffer = file.read(16*1024) 205 | if not buffer: 206 | break 207 | parser.Parse(buffer, 0) 208 | if first_buffer and self.document.documentElement: 209 | self._setup_subset(buffer) 210 | first_buffer = False 211 | parser.Parse("", True) 212 | except ParseEscape: 213 | pass 214 | doc = self.document 215 | self.reset() 216 | self._parser = None 217 | return doc 218 | 219 | def parseString(self, string): 220 | """Parse a document from a string, returning the document node.""" 221 | parser = self.getParser() 222 | try: 223 | parser.Parse(string, True) 224 | self._setup_subset(string) 225 | except ParseEscape: 226 | pass 227 | doc = self.document 228 | self.reset() 229 | self._parser = None 230 | return doc 231 | 232 | def _setup_subset(self, buffer): 233 | """Load the internal subset if there might be one.""" 234 | if self.document.doctype: 235 | extractor = InternalSubsetExtractor() 236 | extractor.parseString(buffer) 237 | subset = extractor.getSubset() 238 | self.document.doctype.internalSubset = subset 239 | 240 | def start_doctype_decl_handler(self, doctypeName, systemId, publicId, 241 | has_internal_subset): 242 | doctype = self.document.implementation.createDocumentType( 243 | doctypeName, publicId, systemId) 244 | doctype.ownerDocument = self.document 245 | _append_child(self.document, doctype) 246 | self.document.doctype = doctype 247 | if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: 248 | self.document.doctype = None 249 | del self.document.childNodes[-1] 250 | doctype = None 251 | self._parser.EntityDeclHandler = None 252 | self._parser.NotationDeclHandler = None 253 | if has_internal_subset: 254 | if doctype is not None: 255 | doctype.entities._seq = [] 256 | doctype.notations._seq = [] 257 | self._parser.CommentHandler = None 258 | self._parser.ProcessingInstructionHandler = None 259 | self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler 260 | 261 | def end_doctype_decl_handler(self): 262 | if self._options.comments: 263 | self._parser.CommentHandler = self.comment_handler 264 | self._parser.ProcessingInstructionHandler = self.pi_handler 265 | if not (self._elem_info or self._filter): 266 | self._finish_end_element = id 267 | 268 | def pi_handler(self, target, data): 269 | node = self.document.createProcessingInstruction(target, data) 270 | _append_child(self.curNode, node) 271 | if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 272 | self.curNode.removeChild(node) 273 | 274 | def character_data_handler_cdata(self, data): 275 | childNodes = self.curNode.childNodes 276 | if self._cdata: 277 | if ( self._cdata_continue 278 | and childNodes[-1].nodeType == CDATA_SECTION_NODE): 279 | childNodes[-1].appendData(data) 280 | return 281 | node = self.document.createCDATASection(data) 282 | self._cdata_continue = True 283 | elif childNodes and childNodes[-1].nodeType == TEXT_NODE: 284 | node = childNodes[-1] 285 | value = node.data + data 286 | node.data = value 287 | return 288 | else: 289 | node = minidom.Text() 290 | node.data = data 291 | node.ownerDocument = self.document 292 | _append_child(self.curNode, node) 293 | 294 | def character_data_handler(self, data): 295 | childNodes = self.curNode.childNodes 296 | if childNodes and childNodes[-1].nodeType == TEXT_NODE: 297 | node = childNodes[-1] 298 | node.data = node.data + data 299 | return 300 | node = minidom.Text() 301 | node.data = node.data + data 302 | node.ownerDocument = self.document 303 | _append_child(self.curNode, node) 304 | 305 | def entity_decl_handler(self, entityName, is_parameter_entity, value, 306 | base, systemId, publicId, notationName): 307 | if is_parameter_entity: 308 | # we don't care about parameter entities for the DOM 309 | return 310 | if not self._options.entities: 311 | return 312 | node = self.document._create_entity(entityName, publicId, 313 | systemId, notationName) 314 | if value is not None: 315 | # internal entity 316 | # node *should* be readonly, but we'll cheat 317 | child = self.document.createTextNode(value) 318 | node.childNodes.append(child) 319 | self.document.doctype.entities._seq.append(node) 320 | if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 321 | del self.document.doctype.entities._seq[-1] 322 | 323 | def notation_decl_handler(self, notationName, base, systemId, publicId): 324 | node = self.document._create_notation(notationName, publicId, systemId) 325 | self.document.doctype.notations._seq.append(node) 326 | if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT: 327 | del self.document.doctype.notations._seq[-1] 328 | 329 | def comment_handler(self, data): 330 | node = self.document.createComment(data) 331 | _append_child(self.curNode, node) 332 | if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 333 | self.curNode.removeChild(node) 334 | 335 | def start_cdata_section_handler(self): 336 | self._cdata = True 337 | self._cdata_continue = False 338 | 339 | def end_cdata_section_handler(self): 340 | self._cdata = False 341 | self._cdata_continue = False 342 | 343 | def external_entity_ref_handler(self, context, base, systemId, publicId): 344 | return 1 345 | 346 | def first_element_handler(self, name, attributes): 347 | if self._filter is None and not self._elem_info: 348 | self._finish_end_element = id 349 | self.getParser().StartElementHandler = self.start_element_handler 350 | self.start_element_handler(name, attributes) 351 | 352 | def start_element_handler(self, name, attributes): 353 | node = self.document.createElement(name) 354 | _append_child(self.curNode, node) 355 | self.curNode = node 356 | 357 | if attributes: 358 | for i in range(0, len(attributes), 2): 359 | a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, 360 | None, EMPTY_PREFIX) 361 | value = attributes[i+1] 362 | a.value = value 363 | a.ownerDocument = self.document 364 | _set_attribute_node(node, a) 365 | 366 | if node is not self.document.documentElement: 367 | self._finish_start_element(node) 368 | 369 | def _finish_start_element(self, node): 370 | if self._filter: 371 | # To be general, we'd have to call isSameNode(), but this 372 | # is sufficient for minidom: 373 | if node is self.document.documentElement: 374 | return 375 | filt = self._filter.startContainer(node) 376 | if filt == FILTER_REJECT: 377 | # ignore this node & all descendents 378 | Rejecter(self) 379 | elif filt == FILTER_SKIP: 380 | # ignore this node, but make it's children become 381 | # children of the parent node 382 | Skipper(self) 383 | else: 384 | return 385 | self.curNode = node.parentNode 386 | node.parentNode.removeChild(node) 387 | node.unlink() 388 | 389 | # If this ever changes, Namespaces.end_element_handler() needs to 390 | # be changed to match. 391 | # 392 | def end_element_handler(self, name): 393 | curNode = self.curNode 394 | self.curNode = curNode.parentNode 395 | self._finish_end_element(curNode) 396 | 397 | def _finish_end_element(self, curNode): 398 | info = self._elem_info.get(curNode.tagName) 399 | if info: 400 | self._handle_white_text_nodes(curNode, info) 401 | if self._filter: 402 | if curNode is self.document.documentElement: 403 | return 404 | if self._filter.acceptNode(curNode) == FILTER_REJECT: 405 | self.curNode.removeChild(curNode) 406 | curNode.unlink() 407 | 408 | def _handle_white_text_nodes(self, node, info): 409 | if (self._options.whitespace_in_element_content 410 | or not info.isElementContent()): 411 | return 412 | 413 | # We have element type information and should remove ignorable 414 | # whitespace; identify for text nodes which contain only 415 | # whitespace. 416 | L = [] 417 | for child in node.childNodes: 418 | if child.nodeType == TEXT_NODE and not child.data.strip(): 419 | L.append(child) 420 | 421 | # Remove ignorable whitespace from the tree. 422 | for child in L: 423 | node.removeChild(child) 424 | 425 | def element_decl_handler(self, name, model): 426 | info = self._elem_info.get(name) 427 | if info is None: 428 | self._elem_info[name] = ElementInfo(name, model) 429 | else: 430 | assert info._model is None 431 | info._model = model 432 | 433 | def attlist_decl_handler(self, elem, name, type, default, required): 434 | info = self._elem_info.get(elem) 435 | if info is None: 436 | info = ElementInfo(elem) 437 | self._elem_info[elem] = info 438 | info._attr_info.append( 439 | [None, name, None, None, default, 0, type, required]) 440 | 441 | def xml_decl_handler(self, version, encoding, standalone): 442 | self.document.version = version 443 | self.document.encoding = encoding 444 | # This is still a little ugly, thanks to the pyexpat API. ;-( 445 | if standalone >= 0: 446 | if standalone: 447 | self.document.standalone = True 448 | else: 449 | self.document.standalone = False 450 | 451 | 452 | # Don't include FILTER_INTERRUPT, since that's checked separately 453 | # where allowed. 454 | _ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP) 455 | 456 | class FilterVisibilityController(object): 457 | """Wrapper around a DOMBuilderFilter which implements the checks 458 | to make the whatToShow filter attribute work.""" 459 | 460 | __slots__ = 'filter', 461 | 462 | def __init__(self, filter): 463 | self.filter = filter 464 | 465 | def startContainer(self, node): 466 | mask = self._nodetype_mask[node.nodeType] 467 | if self.filter.whatToShow & mask: 468 | val = self.filter.startContainer(node) 469 | if val == FILTER_INTERRUPT: 470 | raise ParseEscape 471 | if val not in _ALLOWED_FILTER_RETURNS: 472 | raise ValueError( 473 | "startContainer() returned illegal value: " + repr(val)) 474 | return val 475 | else: 476 | return FILTER_ACCEPT 477 | 478 | def acceptNode(self, node): 479 | mask = self._nodetype_mask[node.nodeType] 480 | if self.filter.whatToShow & mask: 481 | val = self.filter.acceptNode(node) 482 | if val == FILTER_INTERRUPT: 483 | raise ParseEscape 484 | if val == FILTER_SKIP: 485 | # move all child nodes to the parent, and remove this node 486 | parent = node.parentNode 487 | for child in node.childNodes[:]: 488 | parent.appendChild(child) 489 | # node is handled by the caller 490 | return FILTER_REJECT 491 | if val not in _ALLOWED_FILTER_RETURNS: 492 | raise ValueError( 493 | "acceptNode() returned illegal value: " + repr(val)) 494 | return val 495 | else: 496 | return FILTER_ACCEPT 497 | 498 | _nodetype_mask = { 499 | Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT, 500 | Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE, 501 | Node.TEXT_NODE: NodeFilter.SHOW_TEXT, 502 | Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION, 503 | Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE, 504 | Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY, 505 | Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION, 506 | Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT, 507 | Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT, 508 | Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE, 509 | Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT, 510 | Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION, 511 | } 512 | 513 | 514 | class FilterCrutch(object): 515 | __slots__ = '_builder', '_level', '_old_start', '_old_end' 516 | 517 | def __init__(self, builder): 518 | self._level = 0 519 | self._builder = builder 520 | parser = builder._parser 521 | self._old_start = parser.StartElementHandler 522 | self._old_end = parser.EndElementHandler 523 | parser.StartElementHandler = self.start_element_handler 524 | parser.EndElementHandler = self.end_element_handler 525 | 526 | class Rejecter(FilterCrutch): 527 | __slots__ = () 528 | 529 | def __init__(self, builder): 530 | FilterCrutch.__init__(self, builder) 531 | parser = builder._parser 532 | for name in ("ProcessingInstructionHandler", 533 | "CommentHandler", 534 | "CharacterDataHandler", 535 | "StartCdataSectionHandler", 536 | "EndCdataSectionHandler", 537 | "ExternalEntityRefHandler", 538 | ): 539 | setattr(parser, name, None) 540 | 541 | def start_element_handler(self, *args): 542 | self._level = self._level + 1 543 | 544 | def end_element_handler(self, *args): 545 | if self._level == 0: 546 | # restore the old handlers 547 | parser = self._builder._parser 548 | self._builder.install(parser) 549 | parser.StartElementHandler = self._old_start 550 | parser.EndElementHandler = self._old_end 551 | else: 552 | self._level = self._level - 1 553 | 554 | class Skipper(FilterCrutch): 555 | __slots__ = () 556 | 557 | def start_element_handler(self, *args): 558 | node = self._builder.curNode 559 | self._old_start(*args) 560 | if self._builder.curNode is not node: 561 | self._level = self._level + 1 562 | 563 | def end_element_handler(self, *args): 564 | if self._level == 0: 565 | # We're popping back out of the node we're skipping, so we 566 | # shouldn't need to do anything but reset the handlers. 567 | self._builder._parser.StartElementHandler = self._old_start 568 | self._builder._parser.EndElementHandler = self._old_end 569 | self._builder = None 570 | else: 571 | self._level = self._level - 1 572 | self._old_end(*args) 573 | 574 | 575 | # framework document used by the fragment builder. 576 | # Takes a string for the doctype, subset string, and namespace attrs string. 577 | 578 | _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \ 579 | "http://xml.python.org/entities/fragment-builder/internal" 580 | 581 | _FRAGMENT_BUILDER_TEMPLATE = ( 582 | '''\ 583 | 587 | %%s 588 | ]> 589 | &fragment-builder-internal;''' 591 | % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID) 592 | 593 | 594 | class FragmentBuilder(ExpatBuilder): 595 | """Builder which constructs document fragments given XML source 596 | text and a context node. 597 | 598 | The context node is expected to provide information about the 599 | namespace declarations which are in scope at the start of the 600 | fragment. 601 | """ 602 | 603 | def __init__(self, context, options=None): 604 | if context.nodeType == DOCUMENT_NODE: 605 | self.originalDocument = context 606 | self.context = context 607 | else: 608 | self.originalDocument = context.ownerDocument 609 | self.context = context 610 | ExpatBuilder.__init__(self, options) 611 | 612 | def reset(self): 613 | ExpatBuilder.reset(self) 614 | self.fragment = None 615 | 616 | def parseFile(self, file): 617 | """Parse a document fragment from a file object, returning the 618 | fragment node.""" 619 | return self.parseString(file.read()) 620 | 621 | def parseString(self, string): 622 | """Parse a document fragment from a string, returning the 623 | fragment node.""" 624 | self._source = string 625 | parser = self.getParser() 626 | doctype = self.originalDocument.doctype 627 | ident = "" 628 | if doctype: 629 | subset = doctype.internalSubset or self._getDeclarations() 630 | if doctype.publicId: 631 | ident = ('PUBLIC "%s" "%s"' 632 | % (doctype.publicId, doctype.systemId)) 633 | elif doctype.systemId: 634 | ident = 'SYSTEM "%s"' % doctype.systemId 635 | else: 636 | subset = "" 637 | nsattrs = self._getNSattrs() # get ns decls from node's ancestors 638 | document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs) 639 | try: 640 | parser.Parse(document, 1) 641 | except: 642 | self.reset() 643 | raise 644 | fragment = self.fragment 645 | self.reset() 646 | ## self._parser = None 647 | return fragment 648 | 649 | def _getDeclarations(self): 650 | """Re-create the internal subset from the DocumentType node. 651 | 652 | This is only needed if we don't already have the 653 | internalSubset as a string. 654 | """ 655 | doctype = self.context.ownerDocument.doctype 656 | s = "" 657 | if doctype: 658 | for i in range(doctype.notations.length): 659 | notation = doctype.notations.item(i) 660 | if s: 661 | s = s + "\n " 662 | s = "%s' \ 665 | % (s, notation.publicId, notation.systemId) 666 | else: 667 | s = '%s SYSTEM "%s">' % (s, notation.systemId) 668 | for i in range(doctype.entities.length): 669 | entity = doctype.entities.item(i) 670 | if s: 671 | s = s + "\n " 672 | s = "%s" 683 | return s 684 | 685 | def _getNSattrs(self): 686 | return "" 687 | 688 | def external_entity_ref_handler(self, context, base, systemId, publicId): 689 | if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID: 690 | # this entref is the one that we made to put the subtree 691 | # in; all of our given input is parsed in here. 692 | old_document = self.document 693 | old_cur_node = self.curNode 694 | parser = self._parser.ExternalEntityParserCreate(context) 695 | # put the real document back, parse into the fragment to return 696 | self.document = self.originalDocument 697 | self.fragment = self.document.createDocumentFragment() 698 | self.curNode = self.fragment 699 | try: 700 | parser.Parse(self._source, 1) 701 | finally: 702 | self.curNode = old_cur_node 703 | self.document = old_document 704 | self._source = None 705 | return -1 706 | else: 707 | return ExpatBuilder.external_entity_ref_handler( 708 | self, context, base, systemId, publicId) 709 | 710 | 711 | class Namespaces: 712 | """Mix-in class for builders; adds support for namespaces.""" 713 | 714 | def _initNamespaces(self): 715 | # list of (prefix, uri) ns declarations. Namespace attrs are 716 | # constructed from this and added to the element's attrs. 717 | self._ns_ordered_prefixes = [] 718 | 719 | def createParser(self): 720 | """Create a new namespace-handling parser.""" 721 | parser = expat.ParserCreate(namespace_separator=" ") 722 | parser.namespace_prefixes = True 723 | return parser 724 | 725 | def install(self, parser): 726 | """Insert the namespace-handlers onto the parser.""" 727 | ExpatBuilder.install(self, parser) 728 | if self._options.namespace_declarations: 729 | parser.StartNamespaceDeclHandler = ( 730 | self.start_namespace_decl_handler) 731 | 732 | def start_namespace_decl_handler(self, prefix, uri): 733 | """Push this namespace declaration on our storage.""" 734 | self._ns_ordered_prefixes.append((prefix, uri)) 735 | 736 | def start_element_handler(self, name, attributes): 737 | if ' ' in name: 738 | uri, localname, prefix, qname = _parse_ns_name(self, name) 739 | else: 740 | uri = EMPTY_NAMESPACE 741 | qname = name 742 | localname = None 743 | prefix = EMPTY_PREFIX 744 | node = minidom.Element(qname, uri, prefix, localname) 745 | node.ownerDocument = self.document 746 | _append_child(self.curNode, node) 747 | self.curNode = node 748 | 749 | if self._ns_ordered_prefixes: 750 | for prefix, uri in self._ns_ordered_prefixes: 751 | if prefix: 752 | a = minidom.Attr(_intern(self, 'xmlns:' + prefix), 753 | XMLNS_NAMESPACE, prefix, "xmlns") 754 | else: 755 | a = minidom.Attr("xmlns", XMLNS_NAMESPACE, 756 | "xmlns", EMPTY_PREFIX) 757 | a.value = uri 758 | a.ownerDocument = self.document 759 | _set_attribute_node(node, a) 760 | del self._ns_ordered_prefixes[:] 761 | 762 | if attributes: 763 | node._ensure_attributes() 764 | _attrs = node._attrs 765 | _attrsNS = node._attrsNS 766 | for i in range(0, len(attributes), 2): 767 | aname = attributes[i] 768 | value = attributes[i+1] 769 | if ' ' in aname: 770 | uri, localname, prefix, qname = _parse_ns_name(self, aname) 771 | a = minidom.Attr(qname, uri, localname, prefix) 772 | _attrs[qname] = a 773 | _attrsNS[(uri, localname)] = a 774 | else: 775 | a = minidom.Attr(aname, EMPTY_NAMESPACE, 776 | aname, EMPTY_PREFIX) 777 | _attrs[aname] = a 778 | _attrsNS[(EMPTY_NAMESPACE, aname)] = a 779 | a.ownerDocument = self.document 780 | a.value = value 781 | a.ownerElement = node 782 | 783 | if __debug__: 784 | # This only adds some asserts to the original 785 | # end_element_handler(), so we only define this when -O is not 786 | # used. If changing one, be sure to check the other to see if 787 | # it needs to be changed as well. 788 | # 789 | def end_element_handler(self, name): 790 | curNode = self.curNode 791 | if ' ' in name: 792 | uri, localname, prefix, qname = _parse_ns_name(self, name) 793 | assert (curNode.namespaceURI == uri 794 | and curNode.localName == localname 795 | and curNode.prefix == prefix), \ 796 | "element stack messed up! (namespace)" 797 | else: 798 | assert curNode.nodeName == name, \ 799 | "element stack messed up - bad nodeName" 800 | assert curNode.namespaceURI == EMPTY_NAMESPACE, \ 801 | "element stack messed up - bad namespaceURI" 802 | self.curNode = curNode.parentNode 803 | self._finish_end_element(curNode) 804 | 805 | 806 | class ExpatBuilderNS(Namespaces, ExpatBuilder): 807 | """Document builder that supports namespaces.""" 808 | 809 | def reset(self): 810 | ExpatBuilder.reset(self) 811 | self._initNamespaces() 812 | 813 | 814 | class FragmentBuilderNS(Namespaces, FragmentBuilder): 815 | """Fragment builder that supports namespaces.""" 816 | 817 | def reset(self): 818 | FragmentBuilder.reset(self) 819 | self._initNamespaces() 820 | 821 | def _getNSattrs(self): 822 | """Return string of namespace attributes from this element and 823 | ancestors.""" 824 | # XXX This needs to be re-written to walk the ancestors of the 825 | # context to build up the namespace information from 826 | # declarations, elements, and attributes found in context. 827 | # Otherwise we have to store a bunch more data on the DOM 828 | # (though that *might* be more reliable -- not clear). 829 | attrs = "" 830 | context = self.context 831 | L = [] 832 | while context: 833 | if hasattr(context, '_ns_prefix_uri'): 834 | for prefix, uri in context._ns_prefix_uri.items(): 835 | # add every new NS decl from context to L and attrs string 836 | if prefix in L: 837 | continue 838 | L.append(prefix) 839 | if prefix: 840 | declname = "xmlns:" + prefix 841 | else: 842 | declname = "xmlns" 843 | if attrs: 844 | attrs = "%s\n %s='%s'" % (attrs, declname, uri) 845 | else: 846 | attrs = " %s='%s'" % (declname, uri) 847 | context = context.parentNode 848 | return attrs 849 | 850 | 851 | class ParseEscape(Exception): 852 | """Exception raised to short-circuit parsing in InternalSubsetExtractor.""" 853 | pass 854 | 855 | class InternalSubsetExtractor(ExpatBuilder): 856 | """XML processor which can rip out the internal document type subset.""" 857 | 858 | subset = None 859 | 860 | def getSubset(self): 861 | """Return the internal subset as a string.""" 862 | return self.subset 863 | 864 | def parseFile(self, file): 865 | try: 866 | ExpatBuilder.parseFile(self, file) 867 | except ParseEscape: 868 | pass 869 | 870 | def parseString(self, string): 871 | try: 872 | ExpatBuilder.parseString(self, string) 873 | except ParseEscape: 874 | pass 875 | 876 | def install(self, parser): 877 | parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler 878 | parser.StartElementHandler = self.start_element_handler 879 | 880 | def start_doctype_decl_handler(self, name, publicId, systemId, 881 | has_internal_subset): 882 | if has_internal_subset: 883 | parser = self.getParser() 884 | self.subset = [] 885 | parser.DefaultHandler = self.subset.append 886 | parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler 887 | else: 888 | raise ParseEscape() 889 | 890 | def end_doctype_decl_handler(self): 891 | s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n') 892 | self.subset = s 893 | raise ParseEscape() 894 | 895 | def start_element_handler(self, name, attrs): 896 | raise ParseEscape() 897 | 898 | 899 | def parse(file, namespaces=True): 900 | """Parse a document, returning the resulting Document node. 901 | 902 | 'file' may be either a file name or an open file object. 903 | """ 904 | if namespaces: 905 | builder = ExpatBuilderNS() 906 | else: 907 | builder = ExpatBuilder() 908 | 909 | if isinstance(file, str): 910 | with open(file,"r", encoding="latin1") as fp: 911 | result = builder.parseFile(fp) 912 | else: 913 | result = builder.parseFile(file) 914 | return result 915 | 916 | 917 | def parseString(string, namespaces=True): 918 | """Parse a document from a string, returning the resulting 919 | Document node. 920 | """ 921 | if namespaces: 922 | builder = ExpatBuilderNS() 923 | else: 924 | builder = ExpatBuilder() 925 | return builder.parseString(string) 926 | 927 | 928 | def parseFragment(file, context, namespaces=True): 929 | """Parse a fragment of a document, given the context from which it 930 | was originally extracted. context should be the parent of the 931 | node(s) which are in the fragment. 932 | 933 | 'file' may be either a file name or an open file object. 934 | """ 935 | if namespaces: 936 | builder = FragmentBuilderNS(context) 937 | else: 938 | builder = FragmentBuilder(context) 939 | 940 | if isinstance(file, str): 941 | with open(file, 'rb') as fp: 942 | result = builder.parseFile(fp) 943 | else: 944 | result = builder.parseFile(file) 945 | return result 946 | 947 | 948 | def parseFragmentString(string, context, namespaces=True): 949 | """Parse a fragment of a document from a string, given the context 950 | from which it was originally extracted. context should be the 951 | parent of the node(s) which are in the fragment. 952 | """ 953 | if namespaces: 954 | builder = FragmentBuilderNS(context) 955 | else: 956 | builder = FragmentBuilder(context) 957 | return builder.parseString(string) 958 | 959 | 960 | def makeBuilder(options): 961 | """Create a builder based on an Options object.""" 962 | if options.namespaces: 963 | return ExpatBuilderNS(options) 964 | else: 965 | return ExpatBuilder(options) 966 | -------------------------------------------------------------------------------- /minidom/expatbuilderFixed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/minidom/expatbuilderFixed.png -------------------------------------------------------------------------------- /minidom/expatbuilderUnFixed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tasox/Epimitheus/f0c3202911968021c3762e291f4d793374f7423f/minidom/expatbuilderUnFixed.png --------------------------------------------------------------------------------