├── .gitignore ├── requirements.txt ├── fileType.py ├── CypherScripts ├── ClearConstraintsIndexes.cypher ├── CPEs.cypher ├── CAPECs_reference.cypher ├── CWEs_reference.cypher ├── ConstraintsIndexes.cypher ├── CAPECs_category.cypher ├── CWEs_category.cypher ├── CWEs_view.cypher ├── CAPECs_view.cypher ├── CAPECs_attack.cypher ├── CVEs.cypher ├── CWEs_weakness.cypher ├── CAPECs.cypher └── CWEs.cypher ├── SchemaUtil.py ├── CPEInserter.py ├── CVEInserter.py ├── Util.py ├── DatabaseUtil.py ├── main.py ├── CWEInserter.py ├── CAPECInserter.py ├── README.md ├── scraper.py └── GraphKer.svg /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | xmltodict~=0.12.0 2 | neo4j~=4.3.4 3 | requests~=2.26.0 4 | beautifulsoup4~=4.9.3 -------------------------------------------------------------------------------- /fileType.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class FileType(Enum): 4 | CPE = 1 5 | 6 | CVE = 2 7 | 8 | CWE_REFERENCE = 4 9 | CWE_WEAKNESS = 5 10 | CWE_CATEGORY = 6 11 | CWE_VIEW = 7 12 | 13 | CAPEC_REFERENCE = 8 14 | CAPEC_ATTACK = 9 15 | CAPEC_CATEGORY = 10 16 | CAPEC_VIEW = 11 -------------------------------------------------------------------------------- /CypherScripts/ClearConstraintsIndexes.cypher: -------------------------------------------------------------------------------- 1 | DROP CONSTRAINT cpe if exists; 2 | 3 | DROP CONSTRAINT cve if exists; 4 | 5 | DROP CONSTRAINT cwe if exists; 6 | 7 | DROP CONSTRAINT reference if exists; 8 | 9 | DROP CONSTRAINT cvss3 if exists; 10 | 11 | DROP CONSTRAINT cvss2 if exists; 12 | 13 | DROP CONSTRAINT externalReferencecwe if exists; 14 | 15 | DROP CONSTRAINT Consequence if exists; 16 | 17 | DROP CONSTRAINT Mitigation if exists; 18 | 19 | DROP CONSTRAINT DetectionMethod if exists; 20 | 21 | DROP CONSTRAINT capec if exists; 22 | 23 | DROP CONSTRAINT cweview if exists; 24 | 25 | DROP CONSTRAINT stakeholder if exists; 26 | 27 | DROP INDEX AppPlatformType if exists; 28 | 29 | DROP CONSTRAINT externalReferencecapec if exists; 30 | 31 | DROP CONSTRAINT capecview if exists; 32 | 33 | -------------------------------------------------------------------------------- /CypherScripts/CPEs.cypher: -------------------------------------------------------------------------------- 1 | // Insert CPEs and CPEs Children - Cypher Script 2 | UNWIND [cpeFilesToImport] AS files 3 | CALL apoc.periodic.iterate( 4 | 'CALL apoc.load.json($files) YIELD value RETURN value', 5 | ' 6 | WITH value 7 | MERGE (cpe:CPE { 8 | uri: value.cpe23Uri 9 | }) 10 | 11 | FOREACH (value_child IN value.cpe_name | 12 | MERGE (child:CPE { 13 | uri: value_child.cpe23Uri 14 | }) 15 | MERGE (cpe)-[:parentOf]->(child) 16 | ) 17 | ', 18 | {batchSize:1000, params: {files:files}} 19 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 20 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; -------------------------------------------------------------------------------- /CypherScripts/CAPECs_reference.cypher: -------------------------------------------------------------------------------- 1 | // Insert CAPECs Catalog - Cypher Script 2 | 3 | UNWIND [capecReferenceFilesToImport] AS files 4 | 5 | CALL apoc.periodic.iterate( 6 | 'CALL apoc.load.json($files) YIELD value AS reference RETURN reference', 7 | ' 8 | // Insert External References for CAPECs 9 | MERGE (r:External_Reference_CAPEC {Reference_ID: reference.Reference_ID}) 10 | SET r.Author = [value IN reference.Author | value], r.Title = reference.Title, 11 | r.Edition = reference.Edition, r.URL = reference.URL, 12 | r.Publication_Year = reference.Publication_Year, r.Publisher = reference.Publisher 13 | ', 14 | {batchSize:200, params: {files:files}} 15 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 16 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; -------------------------------------------------------------------------------- /CypherScripts/CWEs_reference.cypher: -------------------------------------------------------------------------------- 1 | // Insert CWEs Catalog - Cypher Script 2 | 3 | UNWIND [cweReferenceFilesToImport] AS files 4 | CALL apoc.periodic.iterate( 5 | 'CALL apoc.load.json($files) YIELD value AS reference RETURN reference', 6 | ' 7 | // Insert External References for CWEs 8 | MERGE (r:External_Reference_CWE {Reference_ID: reference.Reference_ID}) 9 | ON CREATE SET r.Author = [value IN reference.Author | value], 10 | r.Title = reference.Title, 11 | r.Edition = reference.Edition, r.URL = reference.URL, 12 | r.Publication_Year = reference.Publication_Year, r.Publisher = reference.Publisher 13 | ', 14 | {batchSize:200, params: {files:files}} 15 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 16 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; -------------------------------------------------------------------------------- /SchemaUtil.py: -------------------------------------------------------------------------------- 1 | from fileType import FileType 2 | from Util import Util 3 | 4 | class SchemaUtil: 5 | 6 | def __init__(self, driver): 7 | self.driver = driver 8 | 9 | # Clear Database 10 | def clear(self): 11 | # Clear Database from existing nodes and relationships 12 | query = """CALL apoc.periodic.iterate('MATCH (n) RETURN n', 'DETACH DELETE n', {batchSize:2000})""" 13 | session = self.driver.session() 14 | session.run(query) 15 | print("\nPrevious Data have been deleted.") 16 | 17 | self.clearSchema() 18 | print("\nDatabase is clear and ready for imports.") 19 | 20 | # Clear Schema 21 | def clearSchema(self): 22 | # Clear Database from existing constraints and indexes 23 | query = """CALL apoc.schema.assert({}, {}, true)""" 24 | session = self.driver.session() 25 | session.run(query) 26 | print("\nPrevious Schema has been deleted.") 27 | 28 | # Constraints and Indexes 29 | def schema_script(self): 30 | # Create Constraints and Indexes 31 | query = """CALL apoc.cypher.runSchemaFile("ConstraintsIndexes.cypher")""" 32 | session = self.driver.session() 33 | session.run(query) 34 | print("\nSchema with Constraints and Indexes insertion completed.") -------------------------------------------------------------------------------- /CypherScripts/ConstraintsIndexes.cypher: -------------------------------------------------------------------------------- 1 | CREATE CONSTRAINT cpe if NOT exists ON (cpe:CPE) ASSERT cpe.uri IS UNIQUE; 2 | 3 | CREATE CONSTRAINT cve if NOT exists ON (cve:CVE) ASSERT cve.Name IS UNIQUE; 4 | 5 | CREATE CONSTRAINT cwe if NOT exists ON (cwe:CWE) ASSERT cwe.Name IS UNIQUE; 6 | 7 | CREATE CONSTRAINT reference if NOT exists ON (ref:Reference_Data) ASSERT ref.url IS UNIQUE; 8 | 9 | CREATE CONSTRAINT cvss3 if NOT exists ON (cvss3:CVSS_3) ASSERT cvss3.Name IS UNIQUE; 10 | 11 | CREATE CONSTRAINT cvss2 if NOT exists ON (cvss2:CVSS_2) ASSERT cvss2.Name IS UNIQUE; 12 | 13 | CREATE CONSTRAINT externalReferencecwe if NOT exists ON (ref:External_Reference_CWE) ASSERT ref.Reference_ID IS UNIQUE; 14 | 15 | CREATE CONSTRAINT Consequence if NOT exists ON (con:Consequence) ASSERT con.Scope IS UNIQUE; 16 | 17 | CREATE CONSTRAINT Mitigation if NOT exists ON (mit:Mitigation) ASSERT mit.Description IS UNIQUE; 18 | 19 | CREATE CONSTRAINT DetectionMethod if NOT exists ON (dec:Detection_Method) ASSERT dec.Method IS UNIQUE; 20 | 21 | CREATE CONSTRAINT capec if NOT exists ON (cp:CAPEC) ASSERT cp.Name IS UNIQUE; 22 | 23 | CREATE CONSTRAINT cweview if NOT exists ON (v:CWE_VIEW) ASSERT v.ViewID IS UNIQUE; 24 | 25 | CREATE CONSTRAINT stakeholder if NOT exists ON (s:Stakeholder) ASSERT s.Type IS UNIQUE; 26 | 27 | CREATE INDEX AppPlatformType if NOT exists FOR (n:Applicable_Platform) ON (n.Type); 28 | 29 | CREATE CONSTRAINT externalReferencecapec if NOT exists ON (ref:External_Reference_CAPEC) ASSERT ref.Reference_ID IS UNIQUE; 30 | 31 | CREATE CONSTRAINT capecview if NOT exists ON (v:CAPEC_VIEW) ASSERT v.ViewID IS UNIQUE; 32 | 33 | -------------------------------------------------------------------------------- /CypherScripts/CAPECs_category.cypher: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------------ 2 | // Insert Categories for CAPECs 3 | UNWIND [capecCategoryFilesToImport] AS files 4 | 5 | CALL apoc.periodic.iterate( 6 | 'CALL apoc.load.json($files) YIELD value AS category RETURN category', 7 | ' 8 | MERGE (c:CAPEC {Name: "CAPEC-" + category.ID}) 9 | SET c.Extended_Name = category.Name, 10 | c.Status = category.Status, 11 | c.Summary = apoc.convert.toString(category.Summary), 12 | c.Notes = apoc.convert.toString(category.Notes), 13 | c.Submission_Name = category.Content_History.Submission.Submission_Name, 14 | c.Submission_Date = category.Content_History.Submission.Submission_Date, 15 | c.Submission_Organization = category.Content_History.Submission.Submission_Organization, 16 | c.Modification = [value IN category.Content_History.Modification | apoc.convert.toString(value)] 17 | 18 | // Insert Members for each Category 19 | WITH c, category 20 | FOREACH (members IN category.Relationships.Has_Member | 21 | MERGE (MemberAP:CAPEC {Name: "CAPEC-" + members.CAPEC_ID}) 22 | MERGE (c)-[:hasMember]->(MemberAP) 23 | ) 24 | 25 | WITH c, category 26 | FOREACH (categoryExReference IN category.References.Reference | 27 | MERGE (catRef:External_Reference_CAPEC {Reference_ID: categoryExReference.External_Reference_ID}) 28 | MERGE (c)-[rel:hasExternal_Reference]->(catRef) 29 | SET rel.Section = categoryExReference.Section 30 | ) 31 | ', 32 | {batchSize:200, params: {files:files}} 33 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 34 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; -------------------------------------------------------------------------------- /CypherScripts/CWEs_category.cypher: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------------ 2 | // Insert Categories for CWEs 3 | UNWIND [cweCategoryFilesToImport] AS files 4 | CALL apoc.periodic.iterate( 5 | 'CALL apoc.load.json($files) YIELD value AS category RETURN category', 6 | ' 7 | MERGE (c:CWE { 8 | Name: "CWE-" + category.ID 9 | }) 10 | SET c.Extended_Name = category.Name, 11 | c.Status = category.Status, 12 | c.Summary = apoc.convert.toString(category.Summary), 13 | c.Notes = apoc.convert.toString(category.Notes), 14 | c.Submission_Name = category.Content_History.Submission.Submission_Name, 15 | c.Submission_Date = category.Content_History.Submission.Submission_Date, 16 | c.Submission_Organization = category.Content_History.Submission.Submission_Organization, 17 | c.Modification = [value IN category.Content_History.Modification | apoc.convert.toString(value)] 18 | 19 | // Insert Members for each Category 20 | WITH c, category 21 | FOREACH (member IN category.Relationships.Has_Member | 22 | MERGE (MemberWeak:CWE {Name: "CWE-" + member.CWE_ID}) 23 | MERGE (c)-[:hasMember {ViewID: member.View_ID}]->(MemberWeak) 24 | ) 25 | 26 | // ------------------------------------------------------------------------ 27 | // Insert Public References for each Category 28 | WITH c, category 29 | FOREACH (categoryExReference IN category.References.Reference | 30 | MERGE (catRef:External_Reference_CWE {Reference_ID: categoryExReference.External_Reference_ID}) 31 | MERGE (c)-[:hasExternal_Reference]->(catRef) 32 | ) 33 | ', 34 | {batchSize:200, params: {files:files}} 35 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 36 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; -------------------------------------------------------------------------------- /CPEInserter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import fnmatch 4 | from neo4j import exceptions 5 | 6 | class CPEInserter: 7 | 8 | def __init__(self, driver, import_path): 9 | self.driver = driver 10 | self.import_path = import_path 11 | 12 | # Configure CPE Files and CPE Cypher Script for insertion 13 | def cpe_insertion(self): 14 | print("\nInserting CPE Files to Database...") 15 | files = self.files_to_insert_cpe() 16 | for f in files: 17 | print('Inserting ' + f) 18 | self.query_cpe_script(f) 19 | 20 | # Cypher Query to insert CPE Cypher Script 21 | def query_cpe_script(self, file): 22 | start_time = time.time() 23 | # Insert file with CPE Query Script to Database 24 | cpes_cypher_file = open(self.import_path + "CPEs.cypher", "r") 25 | query = cpes_cypher_file.read() 26 | query = query.replace('cpeFilesToImport', f"'{file}'") 27 | try: 28 | with self.driver.session() as session: 29 | session.run(query) 30 | except exceptions.CypherError as e: 31 | print(f"CypherError: {e}") 32 | except exceptions.DriverError as e: 33 | print(f"DriverError: {e}") 34 | except Exception as e: 35 | # Handle other exceptions 36 | print(f"An error occurred: {e}") 37 | 38 | end_time = time.time() 39 | 40 | print(f"\nCPE Files: {file} insertion completed. within {end_time - start_time}\n----------") 41 | 42 | # Define which Dataset and Cypher files will be imported on CPE Insertion 43 | def files_to_insert_cpe(self): 44 | listOfFiles = os.listdir(self.import_path + "nist/cpe/splitted/") 45 | pattern = "*.json" 46 | cpe_files = [] 47 | for entry in listOfFiles: 48 | if fnmatch.fnmatch(entry, pattern): 49 | if entry.startswith("cpe_output"): 50 | cpe_files.append("nist/cpe/splitted/" + entry) 51 | else: 52 | continue 53 | 54 | return cpe_files -------------------------------------------------------------------------------- /CVEInserter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import fnmatch 4 | from fileType import FileType 5 | from Util import Util 6 | from neo4j import exceptions 7 | 8 | class CVEInserter: 9 | 10 | def __init__(self, driver, import_path): 11 | self.driver = driver 12 | self.import_path = import_path 13 | 14 | # Configure CVE Files and CVE Cypher Script for insertion 15 | def cve_insertion(self): 16 | print("\nInserting CVE Files to Database...") 17 | files = self.files_to_insert_cve() 18 | for f in files: 19 | print('Inserting ' + f) 20 | self.query_cve_script(f) 21 | 22 | # Cypher Query to insert CVE Cypher Script 23 | def query_cve_script(self, file): 24 | start_time = time.time() 25 | cves_cypher_file = open(self.import_path + "CVEs.cypher", "r") 26 | query = cves_cypher_file.read() 27 | query = query.replace('cveFilesToImport', f"'{file}'") 28 | 29 | try: 30 | with self.driver.session() as session: 31 | session.run(query) 32 | except exceptions.CypherError as e: 33 | print(f"CypherError: {e}") 34 | except exceptions.DriverError as e: 35 | print(f"DriverError: {e}") 36 | except Exception as e: 37 | # Handle other exceptions 38 | print(f"An error occurred: {e}") 39 | 40 | end_time = time.time() 41 | 42 | print(f"\nCVE Files: { file } insertion completed within { end_time - start_time }\n----------") 43 | 44 | # Define which Dataset and Cypher files will be imported on CVE Insertion 45 | def files_to_insert_cve(self): 46 | listOfFiles = os.listdir(self.import_path + "nist/cve/splitted/") 47 | pattern = "*.json" 48 | cve_files = [] 49 | for entry in listOfFiles: 50 | if fnmatch.fnmatch(entry, pattern): 51 | if entry.startswith("cve_output"): 52 | cve_files.append("nist/cve/splitted/" + entry) 53 | else: 54 | continue 55 | 56 | return cve_files -------------------------------------------------------------------------------- /CypherScripts/CWEs_view.cypher: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------------ 2 | // Insert Views for CWEs 3 | UNWIND [cweViewFilesToImport] AS files 4 | CALL apoc.periodic.iterate( 5 | 'CALL apoc.load.json($files) YIELD value AS view RETURN view', 6 | ' 7 | MERGE (v:CWE_VIEW {ViewID: view.ID}) 8 | SET v.Name = view.Name, 9 | v.Type = view.Type, 10 | v.Status = view.Status, 11 | v.Objective = apoc.convert.toString(view.Objective), 12 | v.Filter = view.Filter, 13 | v.Notes = apoc.convert.toString(view.Notes), 14 | v.Submission_Name = view.Content_History.Submission.Submission_Name, 15 | v.Submission_Date = view.Content_History.Submission.Submission_Date, 16 | v.Submission_Organization = view.Content_History.Submission.Submission_Organization, 17 | v.Modification = [value IN view.Content_History.Modification | apoc.convert.toString(value)] 18 | 19 | // Insert Stakeholders for each View 20 | FOREACH (value IN view.Audience.Stakeholder | 21 | MERGE (st:Stakeholder {Type: value.Type}) 22 | MERGE (v)-[rel:usefulFor]->(st) 23 | SET rel.Description = value.Description 24 | ) 25 | 26 | // Insert Members for each View 27 | WITH v, view 28 | FOREACH (member IN view.Members.Has_Member | 29 | MERGE (MemberWeak:CWE {Name: "CWE-" + member.CWE_ID}) 30 | MERGE (v)-[:hasMember]->(MemberWeak) 31 | ) 32 | 33 | // ------------------------------------------------------------------------ 34 | // Insert Public References for each View 35 | WITH v, view 36 | FOREACH (viewExReference IN view.References.Reference | 37 | MERGE (viewRef:External_Reference_CWE {Reference_ID: viewExReference.External_Reference_ID}) 38 | MERGE (v)-[:hasExternal_Reference]->(viewRef) 39 | ) 40 | ', 41 | {batchSize:200, params: {files:files}} 42 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 43 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; -------------------------------------------------------------------------------- /CypherScripts/CAPECs_view.cypher: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------------ 2 | // Insert Views for CAPECs 3 | 4 | UNWIND [capecViewFilesToImport] AS files 5 | CALL apoc.periodic.iterate( 6 | 'CALL apoc.load.json($files) YIELD value AS view RETURN view', 7 | ' 8 | MERGE (v:CAPEC_VIEW {ViewID: view.ID}) 9 | SET v.Name = view.Name, v.Type = view.Type, v.Status = view.Status, 10 | v.Objective = apoc.convert.toString(view.Objective), v.Filter = view.Filter, 11 | v.Notes = apoc.convert.toString(view.Notes), 12 | v.Submission_Name = view.Content_History.Submission.Submission_Name, 13 | v.Submission_Date = view.Content_History.Submission.Submission_Date, 14 | v.Submission_Organization = view.Content_History.Submission.Submission_Organization, 15 | v.Modification = [value IN view.Content_History.Modification | apoc.convert.toString(value)] 16 | 17 | // Insert Stakeholders for each View 18 | FOREACH (value IN view.Audience.Stakeholder | 19 | MERGE (st:Stakeholder {Type: value.Type}) 20 | MERGE (v)-[rel:usefulFor]->(st) 21 | SET rel.Description = value.Description 22 | ) 23 | 24 | // Insert Members for each View 25 | WITH v, view 26 | FOREACH (members IN view.Members.Has_Member | 27 | MERGE (MemberAP:CAPEC {Name: "CAPEC-" + members.CAPEC_ID}) 28 | MERGE (v)-[:hasMember]->(MemberAP) 29 | ) 30 | 31 | 32 | // ------------------------------------------------------------------------ 33 | // Insert Public References for each View 34 | WITH v, view 35 | FOREACH (viewExReference IN view.References.Reference | 36 | MERGE (v:CAPEC_VIEW {ViewID: view.ID}) 37 | MERGE (viewRef:External_Reference_CAPEC {Reference_ID: viewExReference.External_Reference_ID}) 38 | MERGE (v)-[:hasExternal_Reference]->(viewRef) 39 | ) 40 | ', 41 | {batchSize:200, params: {files:files}} 42 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 43 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; 44 | -------------------------------------------------------------------------------- /Util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import shutil 4 | 5 | class Util: 6 | 7 | @staticmethod 8 | def replace_placeholder_with_value(line, files_by_type): 9 | for key in files_by_type.keys(): 10 | if key in line: 11 | return line.replace(key, Util.string_to_insert_from_files(files_by_type[key])) 12 | return line 13 | 14 | @staticmethod 15 | def string_to_insert_from_files(files): 16 | stringToInsert = "\"" 17 | for file in files: 18 | stringToInsert += file + "\", \"" 19 | stringToInsert = stringToInsert[:-3] 20 | return stringToInsert 21 | 22 | # Clear Import Directory 23 | def clear_directory(path): 24 | try: 25 | # List all files and directories inside the specified directory 26 | directory_contents = os.listdir(path) 27 | 28 | # Delete each file and subdirectory within the directory 29 | for item in directory_contents: 30 | item_path = os.path.join(path, item) 31 | if os.path.isfile(item_path): 32 | os.remove(item_path) 33 | elif os.path.isdir(item_path): 34 | shutil.rmtree(item_path) 35 | 36 | print(f"Contents of '{path}' have been deleted.") 37 | except FileNotFoundError: 38 | print(f"Directory not found: {path}") 39 | except Exception as e: 40 | print(f"Error occurred: {e}") 41 | 42 | # Set Import Directory 43 | def set_import_path(directory): 44 | current_os = platform.system() 45 | if (current_os == "Linux" or current_os == "Darwin"): 46 | return directory 47 | elif current_os == "Windows": 48 | return directory.replace("\\", "\\\\") + "\\\\" 49 | 50 | 51 | # Copy Cypher Script Schema Files to Import Path 52 | def copy_files_cypher_script(to_path): 53 | current_path = os.getcwd() 54 | current_os = platform.system() 55 | if (current_os == "Linux" or current_os == "Darwin"): 56 | current_path += "/CypherScripts/" 57 | elif current_os == "Windows": 58 | current_path += "\CypherScripts\\" 59 | 60 | shutil.copy2(current_path + "ConstraintsIndexes.cypher", to_path) 61 | shutil.copy2(current_path + "ClearConstraintsIndexes.cypher", to_path) -------------------------------------------------------------------------------- /DatabaseUtil.py: -------------------------------------------------------------------------------- 1 | import time 2 | from neo4j import exceptions 3 | 4 | class DatabaseUtil: 5 | 6 | def __init__(self, driver): 7 | self.driver = driver 8 | 9 | # Clear Database 10 | def clear(self): 11 | # Clear Database from existing nodes and relationships 12 | start_time = time.time() 13 | print(f"\Start cleaning Data from from existing nodes and relationships") 14 | labels = ["CPE", "CVE", "CVSS_2", "CVSS_3", "Reference_Data", "CWE", "Detection_Method", "Demonstrative_Example", "External_Reference_CWE", "CWE_VIEW", "Stakeholder", "Applicable_Platform", "Mitigation", "Consequence", "CAPEC", "External_Reference_ID", "CAPEC_VIEW"] 15 | for label in labels: 16 | print(f"Deleting {label}") 17 | query = "CALL apoc.periodic.iterate('MATCH (n:" + label + ") RETURN n', 'DETACH DELETE n', {batchSize:2000})" 18 | try: 19 | with self.driver.session() as session: 20 | session.run(query) 21 | except exceptions.CypherError as e: 22 | print(f"CypherError: {e}") 23 | print(f"{label} deleted successfuly") 24 | 25 | end_time = time.time() 26 | 27 | print(f"\nPrevious Data have been deleted within {end_time - start_time}") 28 | 29 | self.clearSchema() 30 | print("\nDatabase is clear and ready for imports.") 31 | 32 | # Clear Schema 33 | def clearSchema(self): 34 | # Clear Database from existing constraints and indexes 35 | print(f"\Start cleaning Data from existing constraints and indexes") 36 | start_time = time.time() 37 | query = """CALL apoc.cypher.runSchemaFile("ClearConstraintsIndexes.cypher")""" 38 | try: 39 | with self.driver.session() as session: 40 | session.run(query) 41 | except exceptions.CypherError as e: 42 | print(f"CypherError: {e}") 43 | end_time = time.time() 44 | print(f"\nPrevious Schema has been deleted {end_time - start_time}") 45 | 46 | # Constraints and Indexes 47 | def schema_script(self): 48 | # Create Constraints and Indexes 49 | print(f"\Start creating Constraints and Indexes") 50 | start_time = time.time() 51 | query = """CALL apoc.cypher.runSchemaFile("ConstraintsIndexes.cypher")""" 52 | try: 53 | with self.driver.session() as session: 54 | session.run(query) 55 | except exceptions.CypherError as e: 56 | print(f"CypherError: {e}") 57 | end_time = time.time() 58 | print(f"\nSchema with Constraints and Indexes insertion completed {end_time - start_time}") -------------------------------------------------------------------------------- /CypherScripts/CAPECs_attack.cypher: -------------------------------------------------------------------------------- 1 | // Insert CAPECs 2 | UNWIND [capecAttackFilesToImport] AS files 3 | 4 | CALL apoc.periodic.iterate( 5 | 'CALL apoc.load.json($files) YIELD value AS capec RETURN capec', 6 | ' 7 | // Insert Attack Patterns for CAPECs 8 | MERGE (cp:CAPEC { 9 | Name: "CAPEC-" + capec.ID 10 | }) 11 | SET cp.ExtendedName = capec.Name, 12 | cp.Abstraction = capec.Abstraction, 13 | cp.Status = capec.Status, 14 | cp.Description = apoc.convert.toString(capec.Description), 15 | cp.Likelihood_Of_Attack = capec.Likelihood_Of_Attack, 16 | cp.Typical_Severity = capec.Typical_Severity, 17 | cp.Alternate_Terms = [value IN capec.Alternate_Terms.Alternate_Term | value.Term], 18 | cp.Prerequisites = [value IN capec.Prerequisites.Prerequisite | apoc.convert.toString(value)], 19 | cp.Skills_Required = [value IN capec.Skills_Required.Skill | value.Level], 20 | cp.Skills_Required_Description = [value IN capec.Skills_Required.Skill | coalesce(apoc.convert.toString(value.text), " NOT SET ")], 21 | cp.Mitigations = [value IN capec.Mitigations.Mitigation | apoc.convert.toString(value)], 22 | cp.Examples = [value IN capec.Example_Instances.Example | apoc.convert.toString(value)], 23 | cp.Notes = [value IN capec.Notes.Note | apoc.convert.toString(value)], 24 | cp.Submission_Date = capec.Content_History.Submission.Submission_Date, 25 | cp.Submission_Name = capec.Content_History.Submission.Submission_Name, 26 | cp.Submission_Organization = capec.Content_History.Submission.Submission_Organization, 27 | cp.Modifications = [value IN capec.Content_History.Modification | apoc.convert.toString(value)], 28 | cp.Resources_Required = [value IN capec.Resources_Required.Resource | apoc.convert.toString(value)], 29 | cp.Indicators = [value IN capec.Indicators.Indicator | apoc.convert.toString(value)] 30 | 31 | // Consequences 32 | FOREACH (consequence IN capec.Consequences.Consequence | 33 | MERGE (con:Consequence {Scope: [value IN consequence.Scope | value]}) 34 | MERGE (cp)-[rel:hasConsequence]->(con) 35 | ON CREATE SET rel.Impact = [value IN consequence.Impact | value], 36 | rel.Note = consequence.Note, 37 | rel.Likelihood = consequence.Likelihood 38 | ) 39 | 40 | // Mitigations 41 | FOREACH (mit IN capec.Mitigations.Mitigation | 42 | MERGE (m:Mitigation { 43 | Description: apoc.convert.toString(mit) 44 | }) 45 | MERGE (cp)-[:hasMitigation]->(m) 46 | ) 47 | 48 | // Related Attack Patterns 49 | WITH cp, capec 50 | FOREACH (Rel_AP IN capec.Related_Attack_Patterns.Related_Attack_Pattern | 51 | MERGE (pec:CAPEC { Name: "CAPEC-" + Rel_AP.CAPEC_ID }) 52 | MERGE (cp)-[:RelatedAttackPattern {Nature: Rel_AP.Nature}]->(pec) 53 | ) 54 | 55 | // Public References for CAPECs 56 | WITH cp, capec 57 | FOREACH (ExReference IN capec.References.Reference | 58 | MERGE (Ref:External_Reference_CAPEC {Reference_ID: ExReference.External_Reference_ID}) 59 | MERGE (cp)-[rel:hasExternal_Reference {CAPEC_ID: cp.Name}]->(Ref) 60 | ) 61 | ', 62 | {batchSize:1000, params: {files:files}} 63 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 64 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import webbrowser 3 | from neo4j import GraphDatabase 4 | import scraper 5 | import time 6 | from Util import Util 7 | from CPEInserter import CPEInserter 8 | from CWEInserter import CWEInserter 9 | from CVEInserter import CVEInserter 10 | from CAPECInserter import CAPECInserter 11 | from DatabaseUtil import DatabaseUtil 12 | 13 | # Define the functions that will be running 14 | def run(url_db, username, password, directory, neo4jbrowser, graphlytic): 15 | try: 16 | start_time = time.time() 17 | 18 | import_path = Util.set_import_path(directory) 19 | 20 | Util.clear_directory(import_path) 21 | scraper.download_datasets(import_path) 22 | 23 | Util.copy_files_cypher_script(import_path) 24 | 25 | driver = GraphDatabase.driver(url_db, auth=(username, password)) 26 | 27 | cpeInserter = CPEInserter(driver, import_path) 28 | cveInserter = CVEInserter(driver, import_path) 29 | cweInserter = CWEInserter(driver, import_path) 30 | capecInserter = CAPECInserter(driver, import_path) 31 | databaseUtil = DatabaseUtil(driver) 32 | 33 | databaseUtil.clear() 34 | databaseUtil.schema_script() 35 | cpeInserter.cpe_insertion() 36 | capecInserter.capec_insertion() 37 | cveInserter.cve_insertion() 38 | cweInserter.cwe_insertion() 39 | 40 | driver.close() 41 | 42 | end_time = time.time() 43 | 44 | execution_time = end_time - start_time 45 | print(f"Import finished in: {execution_time:.6f} seconds") 46 | 47 | except Exception as e: 48 | print(f"Error occurred: {e}") 49 | driver.close() 50 | 51 | if neo4jbrowser: 52 | webbrowser.open("http://localhost:7474") 53 | if graphlytic: 54 | webbrowser.open("http://localhost:8110/") 55 | return 56 | 57 | 58 | def main(): 59 | # Initialize the parser 60 | parser = argparse.ArgumentParser( 61 | description=" +-+-+-+-+-+-+-+-+ \n |G|r|a|p|h|K|e|r| \n +-+-+-+-+-+-+-+-+" 62 | "\n \nWith GraphKer you can have the most recent update of cyber-security vulnerabilities, weaknesses, attack patterns and platforms " 63 | "from MITRE and NIST, in an very useful and user friendly way provided by neo4j graph databases! \n \n" 64 | "--Search, Export Data and Analytics, Enrich your Skills-- \n \n" 65 | "**Created by Adamantios - Marios Berzovitis, Cybersecurity Expert MSc, BSc** \n" 66 | "Diploma Research - MSc @ Distributed Systems, Security and Emerging Information Technologies | University Of Piraeus \n" 67 | "Co-Working with Cyber Security Research Lab | University Of Piraeus \n" 68 | "LinkedIn:https://tinyurl.com/p57w4ntu \n" 69 | "Github:https://github.com/amberzovitis \n \n" 70 | "Enjoy! Provide Feedback!", formatter_class=argparse.RawTextHelpFormatter 71 | ) 72 | 73 | # Add Parameters 74 | parser.add_argument('-u', '--urldb', required=True, 75 | help="Insert bolt url of your neo4j graph database.") 76 | parser.add_argument('-n', '--username', required=True, 77 | help="Insert username of your graph database.") 78 | parser.add_argument('-p', '--password', required=True, 79 | help="Insert password of your graph database.") 80 | parser.add_argument('-d', '--directory', required=True, 81 | help="Insert import path of your graph database.") 82 | parser.add_argument('-b', '--neo4jbrowser', choices=['y', 'Y'], 83 | help="Press y or Y to open neo4jbrowser after the insertion of elements in your graph database.") 84 | parser.add_argument('-g', '--graphlytic', choices=['y', 'Y'], 85 | help="Press y or Y to open Graphlytic app after the insertion of elements in your graph database.") 86 | 87 | args = parser.parse_args() 88 | if args.neo4jbrowser == "y" or args.neo4jbrowser == "Y": 89 | neo4jbrowser_open = True 90 | else: 91 | neo4jbrowser_open = False 92 | if args.graphlytic == "y" or args.neo4jbrowser == "Y": 93 | graphlytic_open = True 94 | else: 95 | graphlytic_open = False 96 | run(args.urldb, args.username, args.password, 97 | args.directory, neo4jbrowser_open, graphlytic_open) 98 | return 99 | 100 | 101 | if __name__ == '__main__': 102 | main() -------------------------------------------------------------------------------- /CypherScripts/CVEs.cypher: -------------------------------------------------------------------------------- 1 | // Insert CVEs - Cypher Script 2 | UNWIND [cveFilesToImport] AS files 3 | CALL apoc.periodic.iterate( 4 | 'CALL apoc.load.json($files) YIELD value AS item RETURN item', 5 | ' 6 | MERGE (a:CVE { 7 | Name: item.cve.CVE_data_meta.ID 8 | }) 9 | ON CREATE SET a.Assigner = item.cve.CVE_data_meta.ASSIGNER, 10 | a.Description = [desc IN item.cve.description.description_data WHERE desc.lang = "en" | desc.value], 11 | a.Published_Date = item.publishedDate, 12 | a.Last_Modified_Date = item.lastModifiedDate 13 | 14 | // In which CPE is applicable 15 | FOREACH (node IN item.configurations.nodes | 16 | FOREACH (child IN node.children | 17 | FOREACH (cpe_value IN child.cpe_match | 18 | MERGE (cpe:CPE { 19 | uri: cpe_value.cpe23Uri 20 | }) 21 | MERGE (a)-[:applicableIn {Vulnerable: cpe_value.vulnerable}]->(cpe) 22 | ) 23 | ) 24 | ) 25 | 26 | // To which CWE belongs 27 | FOREACH (problemtype_data IN item.cve.problemtype.problemtype_data | 28 | FOREACH (CWE IN problemtype_data.description | 29 | MERGE (c:CWE { 30 | Name: CWE.value 31 | }) 32 | ON CREATE SET c.Language = CWE.lang 33 | MERGE (a)-[:Problem_Type]->(c) 34 | ) 35 | ) 36 | 37 | // CVSS3 38 | MERGE (p:CVSS_3 { 39 | Name: item.cve.CVE_data_meta.ID + "_CVSS3" 40 | }) 41 | ON CREATE SET p.Version = item.impact.baseMetricV3.cvssV3.version, p.Vector_String = item.impact.baseMetricV3. 42 | cvssV3.vectorString, 43 | p.Attack_Vector = item.impact.baseMetricV3.cvssV3.attackVector, p.Attack_Complexity = item.impact.baseMetricV3. 44 | cvssV3.attackComplexity, 45 | p.Privileges_Required = item.impact.baseMetricV3.cvssV3.privilegesRequired, p.User_Interaction = item.impact. 46 | baseMetricV3.cvssV3.userInteraction, 47 | p.Scope = item.impact.baseMetricV3.cvssV3.scope, p.Confidentiality_Impact = item.impact.baseMetricV3.cvssV3. 48 | confidentialityImpact, 49 | p.Integrity_Impact = item.impact.baseMetricV3.cvssV3.integrityImpact, p.Availability_Impact = item.impact. 50 | baseMetricV3.cvssV3.availabilityImpact, 51 | p.Base_Score = item.impact.baseMetricV3.cvssV3.baseScore, p.Base_Severity = item.impact.baseMetricV3.cvssV3. 52 | baseSeverity, 53 | p.Exploitability_Score = item.cve.impact.baseMetricV3.exploitabilityScore, 54 | p.Impact_Score = item.cve.impact.baseMetricV3.impactScore 55 | MERGE (a)-[:CVSS3_Impact]->(p) 56 | 57 | // CVSS2 58 | MERGE (l:CVSS_2 { 59 | Name: item.cve.CVE_data_meta.ID + "_CVSS2" 60 | }) 61 | ON CREATE SET l.Version = item.impact.baseMetricV2.cvssV2.version, l.Vector_String = item.impact.baseMetricV2. 62 | cvssV2.vectorString, 63 | l.Access_Vector = item.impact.baseMetricV2.cvssV2.accessVector, l.Access_Complexity = item.impact.baseMetricV2. 64 | cvssV2.accessComplexity, 65 | l.Authentication = item.impact.baseMetricV2.cvssV2.authentication, 66 | l.Confidentiality_Impact = item.impact.baseMetricV2.cvssV2.confidentialityImpact, 67 | l.Integrity_Impact = item.impact.baseMetricV2.cvssV2.integrityImpact, 68 | l.Availability_Impact = item.impact.baseMetricV2.cvssV2.availabilityImpact, 69 | l.Base_Score = item.impact.baseMetricV2.cvssV2.baseScore, 70 | l.Exploitability_Score = item.cve.impact.baseMetricV2.exploitabilityScore, 71 | l.Severity = item.cve.impact.baseMetricV2.severity, l.Impact_Score = item.cve.impact.baseMetricV2.impactScore, 72 | l.acInsufInfo = item.cve.impact.baseMetricV2.acInsufInfo, 73 | l.Obtain_All_Privileges = item.cve.impact.baseMetricV2.obtainAllPrivileges, 74 | l.Obtain_User_Privileges = item.cve.impact.baseMetricV2.obtainUserPrivileges, 75 | l.Obtain_Other_Privileges = item.cve.impact.baseMetricV2.obtainOtherPrivileges, 76 | l.User_Interaction_Required = item.cve.impact.baseMetricV2.userInteractionRequired 77 | MERGE (a)-[:CVSS2_Impact]->(l) 78 | 79 | // Public References 80 | FOREACH (reference_data IN item.cve.references.reference_data | 81 | MERGE (r:Reference_Data { 82 | url: reference_data.url 83 | }) 84 | ON CREATE SET r.Name = reference_data.name, r.refSource = reference_data.refsource 85 | MERGE (a)-[:referencedBy]->(r) 86 | ) 87 | ', 88 | {batchSize:200, params: {files: files}} 89 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 90 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; -------------------------------------------------------------------------------- /CWEInserter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fnmatch 3 | from neo4j import exceptions 4 | 5 | class CWEInserter: 6 | 7 | def __init__(self, driver, import_path): 8 | self.driver = driver 9 | self.import_path = import_path 10 | 11 | # Cypher Query to insert CWE reference Cypher Script 12 | def query_cwe_reference_script(self, file): 13 | cwes_cypher_file = open(self.import_path + "CWEs_reference.cypher", "r") 14 | query = cwes_cypher_file.read() 15 | query = query.replace('cweReferenceFilesToImport', f"'{file}'") 16 | 17 | try: 18 | with self.driver.session() as session: 19 | session.run(query) 20 | except exceptions.CypherError as e: 21 | print(f"CypherError: {e}") 22 | except exceptions.DriverError as e: 23 | print(f"DriverError: {e}") 24 | except Exception as e: 25 | # Handle other exceptions 26 | print(f"An error occurred: {e}") 27 | 28 | print("\nCWE Files: " + file + " insertion completed. \n----------") 29 | 30 | # Cypher Query to insert CWE weakness Cypher Script 31 | def query_cwe_weakness_script(self, file): 32 | cwes_cypher_file = open(self.import_path + "CWEs_weakness.cypher", "r") 33 | query = cwes_cypher_file.read() 34 | query = query.replace('cweWeaknessFilesToImport', f"'{file}'") 35 | 36 | try: 37 | with self.driver.session() as session: 38 | session.run(query) 39 | except exceptions.CypherError as e: 40 | print(f"CypherError: {e}") 41 | except exceptions.DriverError as e: 42 | print(f"DriverError: {e}") 43 | except Exception as e: 44 | # Handle other exceptions 45 | print(f"An error occurred: {e}") 46 | 47 | print("\nCWE Files: " + file + " insertion completed. \n----------") 48 | 49 | # Cypher Query to insert CWE category Cypher Script 50 | def query_cwe_category_script(self, file): 51 | cwes_cypher_file = open(self.import_path + "CWEs_category.cypher", "r") 52 | query = cwes_cypher_file.read() 53 | query = query.replace('cweCategoryFilesToImport', f"'{file}'") 54 | 55 | try: 56 | with self.driver.session() as session: 57 | session.run(query) 58 | except exceptions.CypherError as e: 59 | print(f"CypherError: {e}") 60 | except exceptions.DriverError as e: 61 | print(f"DriverError: {e}") 62 | except Exception as e: 63 | # Handle other exceptions 64 | print(f"An error occurred: {e}") 65 | 66 | print("\nCWE Files: " + file + " insertion completed. \n----------") 67 | 68 | # Cypher Query to insert CWE view Cypher Script 69 | def query_cwe_view_script(self, file): 70 | cwes_cypher_file = open(self.import_path + "CWEs_view.cypher", "r") 71 | query = cwes_cypher_file.read() 72 | query = query.replace('cweViewFilesToImport', f"'{file}'") 73 | 74 | try: 75 | with self.driver.session() as session: 76 | session.run(query) 77 | except exceptions.CypherError as e: 78 | print(f"CypherError: {e}") 79 | except exceptions.DriverError as e: 80 | print(f"DriverError: {e}") 81 | except Exception as e: 82 | # Handle other exceptions 83 | print(f"An error occurred: {e}") 84 | 85 | print("\nCWE Files: " + file + " insertion completed. \n----------") 86 | 87 | # Configure CWE Files and CWE Cypher Script for insertion 88 | def cwe_insertion(self): 89 | print("\nInserting CWE Files to Database...") 90 | files = self.files_to_insert_cwe_reference() 91 | for f in files: 92 | print('Inserting ' + f) 93 | self.query_cwe_reference_script(f) 94 | 95 | files = self.files_to_insert_cwe_weakness() 96 | for f in files: 97 | print('Inserting ' + f) 98 | self.query_cwe_weakness_script(f) 99 | 100 | files = self.files_to_insert_cwe_category() 101 | for f in files: 102 | print('Inserting ' + f) 103 | self.query_cwe_category_script(f) 104 | 105 | files = self.files_to_insert_cwe_view() 106 | for f in files: 107 | print('Inserting ' + f) 108 | self.query_cwe_view_script(f) 109 | 110 | # Define which Dataset and Cypher files will be imported on CWE reference Insertion 111 | def files_to_insert_cwe_reference(self): 112 | listOfFiles = os.listdir(self.import_path + "mitre_cwe/splitted/") 113 | pattern = "*.json" 114 | 115 | reference_files = [] 116 | 117 | for entry in listOfFiles: 118 | if fnmatch.fnmatch(entry, pattern): 119 | if entry.startswith("cwe_reference"): 120 | reference_files.append("mitre_cwe/splitted/" + entry) 121 | else: 122 | continue 123 | 124 | return reference_files 125 | 126 | # Define which Dataset and Cypher files will be imported on CWE weakness Insertion 127 | def files_to_insert_cwe_weakness(self): 128 | listOfFiles = os.listdir(self.import_path + "mitre_cwe/splitted/") 129 | pattern = "*.json" 130 | weakness_files = [] 131 | for entry in listOfFiles: 132 | if fnmatch.fnmatch(entry, pattern): 133 | if entry.startswith("cwe_weakness"): 134 | weakness_files.append("mitre_cwe/splitted/" + entry) 135 | else: 136 | continue 137 | 138 | return weakness_files 139 | 140 | 141 | # Define which Dataset and Cypher files will be imported on CWE category Insertion 142 | def files_to_insert_cwe_category(self): 143 | listOfFiles = os.listdir(self.import_path + "mitre_cwe/splitted/") 144 | pattern = "*.json" 145 | category_files = [] 146 | for entry in listOfFiles: 147 | if fnmatch.fnmatch(entry, pattern): 148 | if entry.startswith("cwe_category"): 149 | category_files.append("mitre_cwe/splitted/" + entry) 150 | else: 151 | continue 152 | 153 | return category_files 154 | 155 | 156 | # Define which Dataset and Cypher files will be imported on CWE view Insertion 157 | def files_to_insert_cwe_view(self): 158 | listOfFiles = os.listdir(self.import_path + "mitre_cwe/splitted/") 159 | pattern = "*.json" 160 | view_files = [] 161 | for entry in listOfFiles: 162 | if fnmatch.fnmatch(entry, pattern): 163 | if entry.startswith("cwe_view"): 164 | view_files.append("mitre_cwe/splitted/" + entry) 165 | else: 166 | continue 167 | 168 | return view_files -------------------------------------------------------------------------------- /CAPECInserter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fnmatch 3 | from neo4j import exceptions 4 | 5 | class CAPECInserter: 6 | 7 | def __init__(self, driver, import_path): 8 | self.driver = driver 9 | self.import_path = import_path 10 | 11 | # Cypher Query to insert CAPEC refrence Cypher Script 12 | def query_capec_reference_script(self, file): 13 | capecs_cypher_file = open(self.import_path + "CAPECs_reference.cypher", "r") 14 | query = capecs_cypher_file.read() 15 | query = query.replace('capecReferenceFilesToImport', f"'{file}'") 16 | try: 17 | with self.driver.session() as session: 18 | session.run(query) 19 | except exceptions.CypherError as e: 20 | print(f"CypherError: {e}") 21 | except exceptions.DriverError as e: 22 | print(f"DriverError: {e}") 23 | except Exception as e: 24 | # Handle other exceptions 25 | print(f"An error occurred: {e}") 26 | 27 | print("\nCAPEC Files: " + file + " insertion completed. \n----------") 28 | 29 | # Cypher Query to insert CAPEC attack Cypher Script 30 | def query_capec_attack_script(self, file): 31 | capecs_cypher_file = open(self.import_path + "CAPECs_attack.cypher", "r") 32 | query = capecs_cypher_file.read() 33 | 34 | query = query.replace('capecAttackFilesToImport', f"'{file}'") 35 | try: 36 | with self.driver.session() as session: 37 | session.run(query) 38 | except exceptions.CypherError as e: 39 | print(f"CypherError: {e}") 40 | except exceptions.DriverError as e: 41 | print(f"DriverError: {e}") 42 | except Exception as e: 43 | # Handle other exceptions 44 | print(f"An error occurred: {e}") 45 | 46 | 47 | print("\nCAPEC Files: " + file + " insertion completed. \n----------") 48 | 49 | # Cypher Query to insert CAPEC category Cypher Script 50 | def query_capec_category_script(self, file): 51 | capecs_cypher_file = open(self.import_path + "CAPECs_category.cypher", "r") 52 | query = capecs_cypher_file.read() 53 | query = query.replace('capecCategoryFilesToImport', f"'{file}'") 54 | 55 | try: 56 | with self.driver.session() as session: 57 | session.run(query) 58 | except exceptions.CypherError as e: 59 | print(f"CypherError: {e}") 60 | except exceptions.DriverError as e: 61 | print(f"DriverError: {e}") 62 | except Exception as e: 63 | # Handle other exceptions 64 | print(f"An error occurred: {e}") 65 | 66 | 67 | print("\nCAPEC Files: " + file + " insertion completed. \n----------") 68 | 69 | # Cypher Query to insert CAPEC view Cypher Script 70 | def query_capec_view_script(self, file): 71 | capecs_cypher_file = open(self.import_path + "CAPECs_view.cypher", "r") 72 | query = capecs_cypher_file.read() 73 | query = query.replace('capecViewFilesToImport', f"'{file}'") 74 | 75 | try: 76 | with self.driver.session() as session: 77 | session.run(query) 78 | except exceptions.CypherError as e: 79 | print(f"CypherError: {e}") 80 | except exceptions.DriverError as e: 81 | print(f"DriverError: {e}") 82 | except Exception as e: 83 | # Handle other exceptions 84 | print(f"An error occurred: {e}") 85 | 86 | 87 | print("\nCAPEC Files: " + file + " insertion completed. \n----------") 88 | 89 | # Configure CAPEC Files and CAPEC Cypher Script for insertion 90 | def capec_insertion(self): 91 | print("\nInserting CAPEC Files to Database...") 92 | files = self.files_to_insert_capec_reference() 93 | for f in files: 94 | print('Inserting ' + f) 95 | self.query_capec_reference_script(f) 96 | 97 | files = self.files_to_insert_capec_attack() 98 | for f in files: 99 | print('Inserting ' + f) 100 | self.query_capec_attack_script(f) 101 | 102 | files = self.files_to_insert_capec_category() 103 | for f in files: 104 | print('Inserting ' + f) 105 | self.query_capec_category_script(f) 106 | 107 | files = self.files_to_insert_capec_view() 108 | for f in files: 109 | print('Inserting ' + f) 110 | self.query_capec_view_script(f) 111 | 112 | # Define which Dataset and Cypher files will be imported on CAPEC refrence Insertion 113 | def files_to_insert_capec_reference(self): 114 | listOfFiles = os.listdir(self.import_path + "mitre_capec/splitted/") 115 | pattern = "*.json" 116 | reference_files = [] 117 | for entry in listOfFiles: 118 | if fnmatch.fnmatch(entry, pattern): 119 | if entry.startswith("capec_reference"): 120 | reference_files.append("mitre_capec/splitted/" + entry) 121 | else: 122 | continue 123 | 124 | return reference_files 125 | 126 | # Define which Dataset and Cypher files will be imported on CAPEC attack Insertion 127 | def files_to_insert_capec_attack(self): 128 | listOfFiles = os.listdir(self.import_path + "mitre_capec/splitted/") 129 | pattern = "*.json" 130 | attack_pattern_files = [] 131 | for entry in listOfFiles: 132 | if fnmatch.fnmatch(entry, pattern): 133 | if entry.startswith("capec_attack_pattern"): 134 | attack_pattern_files.append("mitre_capec/splitted/" + entry) 135 | else: 136 | continue 137 | 138 | return attack_pattern_files 139 | 140 | # Define which Dataset and Cypher files will be imported on CAPEC category Insertion 141 | def files_to_insert_capec_category(self): 142 | listOfFiles = os.listdir(self.import_path + "mitre_capec/splitted/") 143 | pattern = "*.json" 144 | category_files = [] 145 | for entry in listOfFiles: 146 | if fnmatch.fnmatch(entry, pattern): 147 | if entry.startswith("capec_category"): 148 | category_files.append("mitre_capec/splitted/" + entry) 149 | else: 150 | continue 151 | 152 | return category_files 153 | 154 | # Define which Dataset and Cypher files will be imported on CAPEC view Insertion 155 | def files_to_insert_capec_view(self): 156 | listOfFiles = os.listdir(self.import_path + "mitre_capec/splitted/") 157 | pattern = "*.json" 158 | view_files = [] 159 | for entry in listOfFiles: 160 | if fnmatch.fnmatch(entry, pattern): 161 | if entry.startswith("capec_view"): 162 | view_files.append("mitre_capec/splitted/" + entry) 163 | else: 164 | continue 165 | 166 | return view_files -------------------------------------------------------------------------------- /CypherScripts/CWEs_weakness.cypher: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------------ 2 | // Insert Weaknesses for CWEs 3 | UNWIND [cweWeaknessFilesToImport] AS files 4 | CALL apoc.periodic.iterate( 5 | 'CALL apoc.load.json($files) YIELD value AS weakness RETURN weakness', 6 | ' 7 | // Insert CWEs 8 | MERGE (w:CWE { 9 | Name: "CWE-" + weakness.ID 10 | }) 11 | SET w.Extended_Name = weakness.Name, 12 | w.Abstraction = weakness.Abstraction, 13 | w.Structure = weakness.Structure, 14 | w.Status = weakness.Status, 15 | w.Description = weakness.Description, 16 | w.Extended_Description = CASE apoc.meta.type(weakness.Extended_Description) 17 | WHEN "STRING" THEN apoc.convert.toString(weakness.Extended_Description) 18 | WHEN "MAP" THEN apoc.convert.toString(weakness.Extended_Description.`xhtml:p`) 19 | ELSE null 20 | END, 21 | w.Likelihood_Of_Exploit = weakness.Likelihood_Of_Exploit, 22 | w.Background_Details = apoc.convert.toString(weakness.Background_Details.Background_Detail), 23 | w.Modes_Of_Introduction = [value IN weakness.Modes_Of_Introduction.Introduction | value.Phase], 24 | w.Submission_Date = weakness.Content_History.Submission.Submission_Date, 25 | w.Submission_Name = weakness.Content_History.Submission.Submission_Name, 26 | w.Submission_Organization = weakness.Content_History.Submission.Submission_Organization, 27 | w.Modifications = [value IN weakness.Content_History.Modification | apoc.convert.toString(value)], 28 | w.Alternate_Terms = apoc.convert.toString(weakness.Alternate_Terms), 29 | w.Notes = [value IN weakness.Notes.Note | apoc.convert.toString(value)], 30 | w.Affected_Resources = [value IN weakness.Affected_Resources.Affected_Resource | value], 31 | w.Functional_Areas = [value IN weakness.Functional_Areas.Functional_Area | value] 32 | 33 | // Insert Related Weaknesses CWE --> CWE 34 | WITH w, weakness 35 | FOREACH (Rel_Weakness IN weakness.Related_Weaknesses.Related_Weakness | 36 | MERGE (cwe:CWE {Name: "CWE-" + Rel_Weakness.CWE_ID}) 37 | MERGE (w)-[:Related_Weakness {Nature: Rel_Weakness.Nature}]->(cwe) 38 | ) 39 | 40 | // Insert Applicable Platforms for CWEs 41 | WITH w, weakness 42 | FOREACH (lg IN weakness.Applicable_Platforms.Language | 43 | MERGE (ap:Applicable_Platform {Type: "Language", Prevalence: lg.Prevalence, 44 | Name: coalesce(lg.Name, " NOT SET "), Class: coalesce(lg.Class, " NOT SET ")}) 45 | MERGE (w)-[:Applicable_Platform]->(ap) 46 | ) 47 | 48 | WITH w, weakness 49 | FOREACH (tch IN weakness.Applicable_Platforms.Technology | 50 | MERGE (ap:Applicable_Platform {Type: "Technology", Prevalence: tch.Prevalence, 51 | Name: coalesce(tch.Name, " NOT SET "), Class: coalesce(tch.Class, " NOT SET ")}) 52 | MERGE (w)-[:Applicable_Platform]->(ap) 53 | ) 54 | 55 | WITH w, weakness 56 | FOREACH (arc IN weakness.Applicable_Platforms.Architecture | 57 | MERGE (ap:Applicable_Platform {Type: "Architecture", Prevalence: arc.Prevalence, 58 | Name: coalesce(arc.Name, " NOT SET "), Class: coalesce(arc.Class, " NOT SET ")}) 59 | MERGE (w)-[:Applicable_Platform]->(ap) 60 | ) 61 | 62 | WITH w, weakness 63 | FOREACH (os IN weakness.Applicable_Platforms.Operating_System | 64 | MERGE (ap:Applicable_Platform {Type: "Operating System", Prevalence: os.Prevalence, 65 | Name: coalesce(os.Name, " NOT SET "), Class: coalesce(os.Class, " NOT SET ")}) 66 | MERGE (w)-[:Applicable_Platform]->(ap) 67 | ) 68 | 69 | // Insert Demonstrative Examples for CWEs 70 | WITH w, weakness 71 | FOREACH (example IN weakness.Demonstrative_Examples.Demonstrative_Example | 72 | MERGE (ex:Demonstrative_Example { 73 | Intro_Text: apoc.convert.toString(example.Intro_Text) 74 | }) 75 | MERGE (w)-[r:hasExample]->(ex) 76 | SET r.Body_Text = [value IN example.Body_Text | apoc.convert.toString(value)], 77 | r.Example_Code = [value IN example.Example_Code | apoc.convert.toString(value)] 78 | ) 79 | 80 | // Insert Consequences for CWEs 81 | WITH w, weakness 82 | FOREACH (consequence IN weakness.Common_Consequences.Consequence | 83 | MERGE (con:Consequence {Scope: [value IN consequence.Scope | value]}) 84 | MERGE (w)-[rel:hasConsequence]->(con) 85 | SET rel.Impact = [value IN consequence.Impact | value], 86 | rel.Note = consequence.Note, rel.Likelihood = consequence.Likelihood 87 | ) 88 | 89 | // Insert Detection Methods for CWEs 90 | WITH w, weakness 91 | FOREACH (dec IN weakness.Detection_Methods.Detection_Method | 92 | MERGE (d:Detection_Method { 93 | Method: dec.Method 94 | }) 95 | MERGE (w)-[wd:canBeDetected]->(d) 96 | SET wd.Description = CASE apoc.meta.type(dec.Description) 97 | WHEN "STRING" THEN apoc.convert.toString(dec.Description) 98 | WHEN "MAP" THEN apoc.convert.toString(dec.Description.`xhtml:p`) 99 | ELSE null 100 | END 101 | SET wd.Effectiveness = dec.Effectiveness, 102 | wd.Effectiveness_Notes = CASE apoc.meta.type(dec.Effectiveness_Notes) 103 | WHEN "STRING" THEN apoc.convert.toString(dec.Effectiveness_Notes) 104 | WHEN "MAP" THEN apoc.convert.toString(dec.Effectiveness_Notes.`xhtml:p`) 105 | ELSE null 106 | END, 107 | wd.Detection_Method_ID = dec.Detection_Method_ID 108 | ) 109 | 110 | // Insert Potential Mitigations for CWEs 111 | WITH w, weakness 112 | FOREACH (mit IN weakness.Potential_Mitigations.Mitigation | 113 | MERGE (m:Mitigation {Description: apoc.convert.toString(mit.Description)}) 114 | SET m.Phase = [value IN mit.Phase | value], 115 | m.Strategy = mit.Strategy, 116 | m.Effectiveness = mit.Effectiveness, 117 | m.Effectiveness_Notes = CASE apoc.meta.type(mit.Effectiveness_Notes) 118 | WHEN "STRING" THEN apoc.convert.toString(mit.Effectiveness_Notes) 119 | WHEN "MAP" THEN apoc.convert.toString(mit.Effectiveness_Notes.`xhtml:p`) 120 | ELSE null 121 | END, 122 | m.Mitigation_ID = mit.Mitigation_ID 123 | MERGE (w)-[:hasMitigation]->(m) 124 | ) 125 | 126 | // Insert Related Attack Patterns - CAPEC for CWEs 127 | WITH w, weakness 128 | FOREACH (rap IN weakness.Related_Attack_Patterns.Related_Attack_Pattern | 129 | MERGE (cp:CAPEC { 130 | Name: "CAPEC-" + rap.CAPEC_ID 131 | }) 132 | MERGE (w)-[:RelatedAttackPattern]->(cp) 133 | ) 134 | 135 | // Public References for CWEs 136 | WITH w, weakness 137 | FOREACH (exReference IN weakness.References.Reference | 138 | MERGE (ref:External_Reference_CWE {Reference_ID: exReference.External_Reference_ID}) 139 | MERGE (w)-[:hasExternal_Reference]->(ref) 140 | ) 141 | ', 142 | {batchSize:200, params: {files:files}} 143 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 144 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GraphKer 2 | Open Source Tool - Cybersecurity Graph Database in Neo4j 3 | 4 | 5 | **|G|r|a|p|h|K|e|r|** 6 | 7 | { open source tool for a cybersecurity graph database in neo4j } 8 | 9 | With GraphKer you can have the most recent update of cyber-security vulnerabilities, weaknesses, attack patterns and platforms from MITRE and NIST, in an very useful and user friendly way provided by Neo4j graph databases! 10 | 11 | # **Data model** 12 | 13 | ![GraphKer.svg](GraphKer.svg) 14 | 15 | # **Prerequisites** 16 | 17 | _3 + 1 Steps to run GraphKer Tool_ 18 | 19 | ### **1) Download and Install Neo4j Desktop** 20 | - Windows Users: https://neo4j.com/download/ 21 | 22 | Create an account to get the license (totally free), download and install Neo4j Desktop. 23 | 24 | Useful Video: https://tinyurl.com/yjjbn8jx 25 | - Linux Users: 26 | 27 | ``` 28 | sudo apt update 29 | sudo apt install apt-transport-https ca-certificates curl software-properties-common 30 | curl -fsSL https://debian.neo4j.com/neotechnology.gpg.key | sudo apt-key add - 31 | sudo add-apt-repository "deb https://debian.neo4j.com stable 4.1" 32 | sudo apt install neo4j 33 | sudo systemctl enable neo4j.service 34 | sudo systemctl status neo4j.service 35 | ``` 36 | 37 | You should have output that is similar to the following: 38 | ``` 39 | ● neo4j.service - Neo4j Graph Database 40 | Loaded: loaded (/lib/systemd/system/neo4j.service; enabled; vendor preset: enabled) 41 | Active: active (running) since Fri 2020-08-07 01:43:00 UTC; 6min ago 42 | Main PID: 21915 (java) 43 | Tasks: 45 (limit: 1137) 44 | Memory: 259.3M 45 | CGroup: /system.slice/neo4j.service 46 | . . . 47 | ``` 48 | Useful Video: https://tinyurl.com/vvpjf3dr 49 | 50 | ### **2) Create and Configure the Database** 51 | - Create Database: 52 | - Windows Users: 53 | 54 | You can create databases in whatever version you want (latest version preferable) through GUI or Neo4j Terminal. 55 | - Create a new database in GUI: Just click the (+), set DB Name, Username and Password. Useful Tutorial: https://www.sqlshack.com/getting-started-with-the-neo4j-graph-database/ 56 | - Through Neo4j Shell: https://neo4j.com/docs/cypher-manual/current/databases/ 57 | - Linux Users: When you start neo4j through systemctl, type ``` cypher-shell ```, then ``` create database NAME; ```. Now you have to set this database, as default so when you start neo4j you start automatically this database. Go to /etc/neo4j/neo4j.conf and uncomment ``` dbms.default_database=neo4j ``` and change it with your new database name. Restart neo4j service and you are ready. 58 | - Configure Database: 59 | - Install APOC Plugin: 60 | - Windows Users: In Neo4j Desktop Main Page --> Choose your Database --> Click Plugins --> APOC --> Install 61 | - Linux Users: 62 | - Download APOC jar File: https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases (*-*-all.jar file) 63 | - Place it in Plugins Folder --> check every folder path in Neo4j: https://neo4j.com/docs/operations-manual/current/configuration/file-locations/ 64 | - Modify the Database Configuration File to approve apoc procedures. 65 | 66 | Uncomment: ``` dbms.directories.plugins=plugins ``` 67 | 68 | Uncomment and Modify: 69 | ``` 70 | dbms.security.procedures.unrestricted=apoc.* 71 | dbms.security.procedures.whitelist=apoc.*,apoc.coll.*,apoc.load.* 72 | #loads unrestricted and white-listed procedures/plugins to the server 73 | ``` 74 | 75 | Restart Neo4j: ```systemctl restart neo4j``` 76 | - Configure Database Settings File: 77 | - Windows Users: In Neo4j Desktop Main Page --> Choose your Database --> ... (Three Dots) --> Settings --> Go to last line and set the commands below --> Apply and Restart the Database 78 | 79 | ``` 80 | apoc.export.file.enabled=true 81 | apoc.import.file.enabled=true 82 | apoc.import.file.user_neo4j_config=false 83 | cypher.lenient_create_relationship = true 84 | ``` 85 | 86 | - Linux Users: Same as above, in the neo4j.conf file --> check every folder path in Neo4j: https://neo4j.com/docs/operations-manual/current/configuration/file-locations/ 87 | 88 | - Configure Memory Usage: 89 | 90 | In Neo4j Configuration File (neo4j.conf): 91 | You can run GraphKer with only 1G of Heap and 512M of pagecache 92 | ``` 93 | dbms.memory.heap.initial_size=512M 94 | dbms.memory.heap.max_size=1G 95 | dbms.memory.pagecache.size=512M 96 | ``` 97 | 98 | ### **3) Install requirements.txt** 99 | - GraphKer Uses: xmltodict, neo4j, requests, beautifulsoup4 100 | - ``` pip install -r requirements.txt ``` 101 | 102 | ### **4) Install Applications Created for Neo4j** 103 | - There are several applications created especially for Neo4j that you can use for better experience and work. 104 | - Neo4j Bloom: Application for better graph presentations. Free and Easy to use. 105 | - Graphlytic: Third-Party App, better graph presentations, but most important auto-analytics and statistics. Free and Paid Editions. We can do the most locally with free edition. Learn More: https://graphlytic.biz/ 106 | - Neo4j Database Analyzer: Third-Party App, Free, provides great analysis tools for our Data and our Schema. Learn More: https://community.neo4j.com/t/introducing-the-neo4j-database-analyzer/6197 107 | 108 | # **Run GraphKer** 109 | 110 | ``` 111 | // Default 112 | python main.py -u BOLT_URL -n USERNAME -p PASSWORD -d IMPORT_PATH 113 | // Run and Open Neo4j Browser 114 | python main.py -u BOLT_URL -n USERNAME -p PASSWORD -d IMPORT_PATH -b y 115 | // Run and Open Graphlytic App 116 | python main.py -u BOLT_URL -n USERNAME -p PASSWORD -d IMPORT_PATH -g y 117 | // Default Run Example in Ubuntu 118 | sudo python3 main.py -u BOLT_URL -n USERNAME -p PASSWORD -d /var/lib/neo4j/import/ 119 | ``` 120 | 121 | _Default Bolt URL for Neo4j: bolt://localhost:7687_ 122 | 123 | _Default Username in Neo4j Databases: neo4j_ 124 | 125 | _For Neo4j Import Folder check the link above with File Locations._ 126 | 127 | Estimated RunTime: **6-15 Minutes**. Depends on hardware. 128 | 129 | _**At least 8GB in your hard drive.**_ 130 | 131 | # 132 | You can check out an existing example of the graph database that GraphKer creates. Just download the dump file from repo: https://github.com/amberzovitis/GraphKer-DBMS-Dump and import it to an existing or new graph database in Neo4j. This file consists of CVEs from 2021 (with related CPEs) and all CWEs and CAPECs. 133 | 134 | # 135 | 136 | You can access the CVE and CPE Datasets in National Vulnerability Database by NIST (https://nvd.nist.gov/vuln/data-feeds), CWE Dataset in MITRE (https://cwe.mitre.org/data/downloads.html) and CAPEC Dataset in MITRE (https://capec.mitre.org/data/downloads.html). 137 | 138 | # 139 | --Search, Export Data and Analytics, Enrich your Skills-- 140 | 141 | **Created by Adamantios - Marios Berzovitis, Cybersecurity Expert MSc, BSc** 142 | 143 | _Diploma Research - MSc @ Distributed Systems, Security and Emerging Information Technologies | University Of Piraeus --> https://www.cs.unipi.gr/distributed/_ 144 | 145 | _Co-Working with Cyber Security Research Lab | University Of Piraeus --> https://seclab.cs.unipi.gr/_ 146 | 147 | Facebook: https://www.facebook.com/GraphKerTool/ 148 | 149 | LinkedIn: https://tinyurl.com/p57w4ntu 150 | 151 | Github: https://github.com/amberzovitis 152 | 153 | Enjoy! Provide Feedback! 154 | -------------------------------------------------------------------------------- /CypherScripts/CAPECs.cypher: -------------------------------------------------------------------------------- 1 | // Insert CAPECs Catalog - Cypher Script 2 | 3 | UNWIND [capecReferenceFilesToImport] AS files 4 | 5 | CALL apoc.periodic.iterate( 6 | 'CALL apoc.load.json($files) YIELD value AS reference RETURN reference', 7 | ' 8 | // Insert External References for CAPECs 9 | MERGE (r:External_Reference_CAPEC {Reference_ID: reference.Reference_ID}) 10 | SET r.Author = [value IN reference.Author | value], r.Title = reference.Title, 11 | r.Edition = reference.Edition, r.URL = reference.URL, 12 | r.Publication_Year = reference.Publication_Year, r.Publisher = reference.Publisher 13 | ', 14 | {batchSize:200, params: {files:files}} 15 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 16 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; 17 | 18 | 19 | // Insert CAPECs 20 | UNWIND [capecAttackFilesToImport] AS files 21 | 22 | CALL apoc.periodic.iterate( 23 | 'CALL apoc.load.json($files) YIELD value AS capec RETURN capec', 24 | ' 25 | // Insert Attack Patterns for CAPECs 26 | MERGE (cp:CAPEC { 27 | Name: "CAPEC-" + capec.ID 28 | }) 29 | SET cp.ExtendedName = capec.Name, 30 | cp.Abstraction = capec.Abstraction, 31 | cp.Status = capec.Status, 32 | cp.Description = apoc.convert.toString(capec.Description), 33 | cp.Likelihood_Of_Attack = capec.Likelihood_Of_Attack, 34 | cp.Typical_Severity = capec.Typical_Severity, 35 | cp.Alternate_Terms = [value IN capec.Alternate_Terms.Alternate_Term | value.Term], 36 | cp.Prerequisites = [value IN capec.Prerequisites.Prerequisite | apoc.convert.toString(value)], 37 | cp.Skills_Required = [value IN capec.Skills_Required.Skill | value.Level], 38 | cp.Skills_Required_Description = [value IN capec.Skills_Required.Skill | coalesce(apoc.convert.toString(value.text), " NOT SET ")], 39 | cp.Mitigations = [value IN capec.Mitigations.Mitigation | apoc.convert.toString(value)], 40 | cp.Examples = [value IN capec.Example_Instances.Example | apoc.convert.toString(value)], 41 | cp.Notes = [value IN capec.Notes.Note | apoc.convert.toString(value)], 42 | cp.Submission_Date = capec.Content_History.Submission.Submission_Date, 43 | cp.Submission_Name = capec.Content_History.Submission.Submission_Name, 44 | cp.Submission_Organization = capec.Content_History.Submission.Submission_Organization, 45 | cp.Modifications = [value IN capec.Content_History.Modification | apoc.convert.toString(value)], 46 | cp.Resources_Required = [value IN capec.Resources_Required.Resource | apoc.convert.toString(value)], 47 | cp.Indicators = [value IN capec.Indicators.Indicator | apoc.convert.toString(value)] 48 | 49 | // Consequences 50 | FOREACH (consequence IN capec.Consequences.Consequence | 51 | MERGE (con:Consequence {Scope: [value IN consequence.Scope | value]}) 52 | MERGE (cp)-[rel:hasConsequence]->(con) 53 | ON CREATE SET rel.Impact = [value IN consequence.Impact | value], 54 | rel.Note = consequence.Note, 55 | rel.Likelihood = consequence.Likelihood 56 | ) 57 | 58 | // Mitigations 59 | FOREACH (mit IN capec.Mitigations.Mitigation | 60 | MERGE (m:Mitigation { 61 | Description: apoc.convert.toString(mit) 62 | }) 63 | MERGE (cp)-[:hasMitigation]->(m) 64 | ) 65 | 66 | // Related Attack Patterns 67 | WITH cp, capec 68 | FOREACH (Rel_AP IN capec.Related_Attack_Patterns.Related_Attack_Pattern | 69 | MERGE (pec:CAPEC { Name: "CAPEC-" + Rel_AP.CAPEC_ID }) 70 | MERGE (cp)-[:RelatedAttackPattern {Nature: Rel_AP.Nature}]->(pec) 71 | ) 72 | 73 | // Public References for CAPECs 74 | WITH cp, capec 75 | FOREACH (ExReference IN capec.References.Reference | 76 | MERGE (Ref:External_Reference_CAPEC {Reference_ID: ExReference.External_Reference_ID}) 77 | MERGE (cp)-[rel:hasExternal_Reference {CAPEC_ID: cp.Name}]->(Ref) 78 | ) 79 | ', 80 | {batchSize:1000, params: {files:files}} 81 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 82 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; 83 | 84 | // ------------------------------------------------------------------------ 85 | 86 | // ------------------------------------------------------------------------ 87 | // Insert Categories for CAPECs 88 | UNWIND [capecCategoryFilesToImport] AS files 89 | 90 | CALL apoc.periodic.iterate( 91 | 'CALL apoc.load.json($files) YIELD value AS category RETURN category', 92 | ' 93 | MERGE (c:CAPEC {Name: "CAPEC-" + category.ID}) 94 | SET c.Extended_Name = category.Name, 95 | c.Status = category.Status, 96 | c.Summary = apoc.convert.toString(category.Summary), 97 | c.Notes = apoc.convert.toString(category.Notes), 98 | c.Submission_Name = category.Content_History.Submission.Submission_Name, 99 | c.Submission_Date = category.Content_History.Submission.Submission_Date, 100 | c.Submission_Organization = category.Content_History.Submission.Submission_Organization, 101 | c.Modification = [value IN category.Content_History.Modification | apoc.convert.toString(value)] 102 | 103 | // Insert Members for each Category 104 | WITH c, category 105 | FOREACH (members IN category.Relationships.Has_Member | 106 | MERGE (MemberAP:CAPEC {Name: "CAPEC-" + members.CAPEC_ID}) 107 | MERGE (c)-[:hasMember]->(MemberAP) 108 | ) 109 | 110 | WITH c, category 111 | FOREACH (categoryExReference IN category.References.Reference | 112 | MERGE (catRef:External_Reference_CAPEC {Reference_ID: categoryExReference.External_Reference_ID}) 113 | MERGE (c)-[rel:hasExternal_Reference]->(catRef) 114 | SET rel.Section = categoryExReference.Section 115 | ) 116 | ', 117 | {batchSize:200, params: {files:files}} 118 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 119 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; 120 | 121 | // ------------------------------------------------------------------------ 122 | // Insert Views for CAPECs 123 | 124 | UNWIND [capecViewFilesToImport] AS files 125 | CALL apoc.periodic.iterate( 126 | 'CALL apoc.load.json($files) YIELD value AS view RETURN view', 127 | ' 128 | MERGE (v:CAPEC_VIEW {ViewID: view.ID}) 129 | SET v.Name = view.Name, v.Type = view.Type, v.Status = view.Status, 130 | v.Objective = apoc.convert.toString(view.Objective), v.Filter = view.Filter, 131 | v.Notes = apoc.convert.toString(view.Notes), 132 | v.Submission_Name = view.Content_History.Submission.Submission_Name, 133 | v.Submission_Date = view.Content_History.Submission.Submission_Date, 134 | v.Submission_Organization = view.Content_History.Submission.Submission_Organization, 135 | v.Modification = [value IN view.Content_History.Modification | apoc.convert.toString(value)] 136 | 137 | // Insert Stakeholders for each View 138 | FOREACH (value IN view.Audience.Stakeholder | 139 | MERGE (st:Stakeholder {Type: value.Type}) 140 | MERGE (v)-[rel:usefulFor]->(st) 141 | SET rel.Description = value.Description 142 | ) 143 | 144 | // Insert Members for each View 145 | WITH v, view 146 | FOREACH (members IN view.Members.Has_Member | 147 | MERGE (MemberAP:CAPEC {Name: "CAPEC-" + members.CAPEC_ID}) 148 | MERGE (v)-[:hasMember]->(MemberAP) 149 | ) 150 | 151 | 152 | // ------------------------------------------------------------------------ 153 | // Insert Public References for each View 154 | WITH v, view 155 | FOREACH (viewExReference IN view.References.Reference | 156 | MERGE (v:CAPEC_VIEW {ViewID: view.ID}) 157 | MERGE (viewRef:External_Reference_CAPEC {Reference_ID: viewExReference.External_Reference_ID}) 158 | MERGE (v)-[:hasExternal_Reference]->(viewRef) 159 | ) 160 | ', 161 | {batchSize:200, params: {files:files}} 162 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 163 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; 164 | -------------------------------------------------------------------------------- /CypherScripts/CWEs.cypher: -------------------------------------------------------------------------------- 1 | // Insert CWEs Catalog - Cypher Script 2 | 3 | UNWIND [cweReferenceFilesToImport] AS files 4 | CALL apoc.periodic.iterate( 5 | 'CALL apoc.load.json($files) YIELD value AS reference RETURN reference', 6 | ' 7 | // Insert External References for CWEs 8 | MERGE (r:External_Reference_CWE {Reference_ID: reference.Reference_ID}) 9 | ON CREATE SET r.Author = [value IN reference.Author | value], 10 | r.Title = reference.Title, 11 | r.Edition = reference.Edition, r.URL = reference.URL, 12 | r.Publication_Year = reference.Publication_Year, r.Publisher = reference.Publisher 13 | ', 14 | {batchSize:200, params: {files:files}} 15 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 16 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; 17 | 18 | // ------------------------------------------------------------------------ 19 | // Insert Weaknesses for CWEs 20 | UNWIND [cweWeaknessFilesToImport] AS files 21 | CALL apoc.periodic.iterate( 22 | 'CALL apoc.load.json($files) YIELD value AS weakness RETURN weakness', 23 | ' 24 | // Insert CWEs 25 | MERGE (w:CWE { 26 | Name: "CWE-" + weakness.ID 27 | }) 28 | SET w.Extended_Name = weakness.Name, 29 | w.Abstraction = weakness.Abstraction, 30 | w.Structure = weakness.Structure, 31 | w.Status = weakness.Status, 32 | w.Description = weakness.Description, 33 | w.Extended_Description = CASE apoc.meta.type(weakness.Extended_Description) 34 | WHEN "STRING" THEN apoc.convert.toString(weakness.Extended_Description) 35 | WHEN "MAP" THEN apoc.convert.toString(weakness.Extended_Description.`xhtml:p`) 36 | ELSE null 37 | END, 38 | w.Likelihood_Of_Exploit = weakness.Likelihood_Of_Exploit, 39 | w.Background_Details = apoc.convert.toString(weakness.Background_Details.Background_Detail), 40 | w.Modes_Of_Introduction = [value IN weakness.Modes_Of_Introduction.Introduction | value.Phase], 41 | w.Submission_Date = weakness.Content_History.Submission.Submission_Date, 42 | w.Submission_Name = weakness.Content_History.Submission.Submission_Name, 43 | w.Submission_Organization = weakness.Content_History.Submission.Submission_Organization, 44 | w.Modifications = [value IN weakness.Content_History.Modification | apoc.convert.toString(value)], 45 | w.Alternate_Terms = apoc.convert.toString(weakness.Alternate_Terms), 46 | w.Notes = [value IN weakness.Notes.Note | apoc.convert.toString(value)], 47 | w.Affected_Resources = [value IN weakness.Affected_Resources.Affected_Resource | value], 48 | w.Functional_Areas = [value IN weakness.Functional_Areas.Functional_Area | value] 49 | 50 | // Insert Related Weaknesses CWE --> CWE 51 | WITH w, weakness 52 | FOREACH (Rel_Weakness IN weakness.Related_Weaknesses.Related_Weakness | 53 | MERGE (cwe:CWE {Name: "CWE-" + Rel_Weakness.CWE_ID}) 54 | MERGE (w)-[:Related_Weakness {Nature: Rel_Weakness.Nature}]->(cwe) 55 | ) 56 | 57 | // Insert Applicable Platforms for CWEs 58 | WITH w, weakness 59 | FOREACH (lg IN weakness.Applicable_Platforms.Language | 60 | MERGE (ap:Applicable_Platform {Type: "Language", Prevalence: lg.Prevalence, 61 | Name: coalesce(lg.Name, " NOT SET "), Class: coalesce(lg.Class, " NOT SET ")}) 62 | MERGE (w)-[:Applicable_Platform]->(ap) 63 | ) 64 | 65 | WITH w, weakness 66 | FOREACH (tch IN weakness.Applicable_Platforms.Technology | 67 | MERGE (ap:Applicable_Platform {Type: "Technology", Prevalence: tch.Prevalence, 68 | Name: coalesce(tch.Name, " NOT SET "), Class: coalesce(tch.Class, " NOT SET ")}) 69 | MERGE (w)-[:Applicable_Platform]->(ap) 70 | ) 71 | 72 | WITH w, weakness 73 | FOREACH (arc IN weakness.Applicable_Platforms.Architecture | 74 | MERGE (ap:Applicable_Platform {Type: "Architecture", Prevalence: arc.Prevalence, 75 | Name: coalesce(arc.Name, " NOT SET "), Class: coalesce(arc.Class, " NOT SET ")}) 76 | MERGE (w)-[:Applicable_Platform]->(ap) 77 | ) 78 | 79 | WITH w, weakness 80 | FOREACH (os IN weakness.Applicable_Platforms.Operating_System | 81 | MERGE (ap:Applicable_Platform {Type: "Operating System", Prevalence: os.Prevalence, 82 | Name: coalesce(os.Name, " NOT SET "), Class: coalesce(os.Class, " NOT SET ")}) 83 | MERGE (w)-[:Applicable_Platform]->(ap) 84 | ) 85 | 86 | // Insert Demonstrative Examples for CWEs 87 | WITH w, weakness 88 | FOREACH (example IN weakness.Demonstrative_Examples.Demonstrative_Example | 89 | MERGE (ex:Demonstrative_Example { 90 | Intro_Text: apoc.convert.toString(example.Intro_Text) 91 | }) 92 | MERGE (w)-[r:hasExample]->(ex) 93 | SET r.Body_Text = [value IN example.Body_Text | apoc.convert.toString(value)], 94 | r.Example_Code = [value IN example.Example_Code | apoc.convert.toString(value)] 95 | ) 96 | 97 | // Insert Consequences for CWEs 98 | WITH w, weakness 99 | FOREACH (consequence IN weakness.Common_Consequences.Consequence | 100 | MERGE (con:Consequence {Scope: [value IN consequence.Scope | value]}) 101 | MERGE (w)-[rel:hasConsequence]->(con) 102 | SET rel.Impact = [value IN consequence.Impact | value], 103 | rel.Note = consequence.Note, rel.Likelihood = consequence.Likelihood 104 | ) 105 | 106 | // Insert Detection Methods for CWEs 107 | WITH w, weakness 108 | FOREACH (dec IN weakness.Detection_Methods.Detection_Method | 109 | MERGE (d:Detection_Method { 110 | Method: dec.Method 111 | }) 112 | MERGE (w)-[wd:canBeDetected]->(d) 113 | SET wd.Description = CASE apoc.meta.type(dec.Description) 114 | WHEN "STRING" THEN apoc.convert.toString(dec.Description) 115 | WHEN "MAP" THEN apoc.convert.toString(dec.Description.`xhtml:p`) 116 | ELSE null 117 | END 118 | SET wd.Effectiveness = dec.Effectiveness, 119 | wd.Effectiveness_Notes = CASE apoc.meta.type(dec.Effectiveness_Notes) 120 | WHEN "STRING" THEN apoc.convert.toString(dec.Effectiveness_Notes) 121 | WHEN "MAP" THEN apoc.convert.toString(dec.Effectiveness_Notes.`xhtml:p`) 122 | ELSE null 123 | END, 124 | wd.Detection_Method_ID = dec.Detection_Method_ID 125 | ) 126 | 127 | // Insert Potential Mitigations for CWEs 128 | WITH w, weakness 129 | FOREACH (mit IN weakness.Potential_Mitigations.Mitigation | 130 | MERGE (m:Mitigation {Description: apoc.convert.toString(mit.Description)}) 131 | SET m.Phase = [value IN mit.Phase | value], 132 | m.Strategy = mit.Strategy, 133 | m.Effectiveness = mit.Effectiveness, 134 | m.Effectiveness_Notes = CASE apoc.meta.type(mit.Effectiveness_Notes) 135 | WHEN "STRING" THEN apoc.convert.toString(mit.Effectiveness_Notes) 136 | WHEN "MAP" THEN apoc.convert.toString(mit.Effectiveness_Notes.`xhtml:p`) 137 | ELSE null 138 | END, 139 | m.Mitigation_ID = mit.Mitigation_ID 140 | MERGE (w)-[:hasMitigation]->(m) 141 | ) 142 | 143 | // Insert Related Attack Patterns - CAPEC for CWEs 144 | WITH w, weakness 145 | FOREACH (rap IN weakness.Related_Attack_Patterns.Related_Attack_Pattern | 146 | MERGE (cp:CAPEC { 147 | Name: "CAPEC-" + rap.CAPEC_ID 148 | }) 149 | MERGE (w)-[:RelatedAttackPattern]->(cp) 150 | ) 151 | 152 | // Public References for CWEs 153 | WITH w, weakness 154 | FOREACH (exReference IN weakness.References.Reference | 155 | MERGE (ref:External_Reference_CWE {Reference_ID: exReference.External_Reference_ID}) 156 | MERGE (w)-[:hasExternal_Reference]->(ref) 157 | ) 158 | ', 159 | {batchSize:200, params: {files:files}} 160 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 161 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; 162 | 163 | 164 | // ------------------------------------------------------------------------ 165 | // Insert Categories for CWEs 166 | UNWIND [cweCategoryFilesToImport] AS files 167 | CALL apoc.periodic.iterate( 168 | 'CALL apoc.load.json($files) YIELD value AS category RETURN category', 169 | ' 170 | MERGE (c:CWE { 171 | Name: "CWE-" + category.ID 172 | }) 173 | SET c.Extended_Name = category.Name, 174 | c.Status = category.Status, 175 | c.Summary = apoc.convert.toString(category.Summary), 176 | c.Notes = apoc.convert.toString(category.Notes), 177 | c.Submission_Name = category.Content_History.Submission.Submission_Name, 178 | c.Submission_Date = category.Content_History.Submission.Submission_Date, 179 | c.Submission_Organization = category.Content_History.Submission.Submission_Organization, 180 | c.Modification = [value IN category.Content_History.Modification | apoc.convert.toString(value)] 181 | 182 | // Insert Members for each Category 183 | WITH c, category 184 | FOREACH (member IN category.Relationships.Has_Member | 185 | MERGE (MemberWeak:CWE {Name: "CWE-" + member.CWE_ID}) 186 | MERGE (c)-[:hasMember {ViewID: member.View_ID}]->(MemberWeak) 187 | ) 188 | 189 | // ------------------------------------------------------------------------ 190 | // Insert Public References for each Category 191 | WITH c, category 192 | FOREACH (categoryExReference IN category.References.Reference | 193 | MERGE (catRef:External_Reference_CWE {Reference_ID: categoryExReference.External_Reference_ID}) 194 | MERGE (c)-[:hasExternal_Reference]->(catRef) 195 | ) 196 | ', 197 | {batchSize:200, params: {files:files}} 198 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 199 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; 200 | 201 | // ------------------------------------------------------------------------ 202 | // Insert Views for CWEs 203 | UNWIND [cweViewFilesToImport] AS files 204 | CALL apoc.periodic.iterate( 205 | 'CALL apoc.load.json($files) YIELD value AS view RETURN view', 206 | ' 207 | MERGE (v:CWE_VIEW {ViewID: view.ID}) 208 | SET v.Name = view.Name, 209 | v.Type = view.Type, 210 | v.Status = view.Status, 211 | v.Objective = apoc.convert.toString(view.Objective), 212 | v.Filter = view.Filter, 213 | v.Notes = apoc.convert.toString(view.Notes), 214 | v.Submission_Name = view.Content_History.Submission.Submission_Name, 215 | v.Submission_Date = view.Content_History.Submission.Submission_Date, 216 | v.Submission_Organization = view.Content_History.Submission.Submission_Organization, 217 | v.Modification = [value IN view.Content_History.Modification | apoc.convert.toString(value)] 218 | 219 | // Insert Stakeholders for each View 220 | FOREACH (value IN view.Audience.Stakeholder | 221 | MERGE (st:Stakeholder {Type: value.Type}) 222 | MERGE (v)-[rel:usefulFor]->(st) 223 | SET rel.Description = value.Description 224 | ) 225 | 226 | // Insert Members for each View 227 | WITH v, view 228 | FOREACH (member IN view.Members.Has_Member | 229 | MERGE (MemberWeak:CWE {Name: "CWE-" + member.CWE_ID}) 230 | MERGE (v)-[:hasMember]->(MemberWeak) 231 | ) 232 | 233 | // ------------------------------------------------------------------------ 234 | // Insert Public References for each View 235 | WITH v, view 236 | FOREACH (viewExReference IN view.References.Reference | 237 | MERGE (viewRef:External_Reference_CWE {Reference_ID: viewExReference.External_Reference_ID}) 238 | MERGE (v)-[:hasExternal_Reference]->(viewRef) 239 | ) 240 | ', 241 | {batchSize:200, params: {files:files}} 242 | ) YIELD batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics 243 | RETURN batches,total,timeTaken,committedOperations,failedOperations,failedBatches,retries,errorMessages,batch,operations,wasTerminated,failedParams,updateStatistics; -------------------------------------------------------------------------------- /scraper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import zipfile 4 | from bs4 import BeautifulSoup 5 | import platform 6 | from circuitbreaker import circuit 7 | import json 8 | import xmltodict 9 | import xml.etree.ElementTree as ET 10 | import fnmatch 11 | import subprocess 12 | 13 | 14 | MAX_RETRIES = 5 15 | 16 | def download_files_cve(import_path): 17 | url = 'https://nvd.nist.gov/vuln/data-feeds' 18 | root = 'https://nvd.nist.gov/' 19 | r = requests.get(url) 20 | soup = BeautifulSoup(r.text, 'html.parser') 21 | all_hrfs = soup.find_all('a') 22 | all_links = [ 23 | link.get('href') for link in all_hrfs 24 | ] 25 | zip_files = [ 26 | dl for dl in all_links if dl and '.json.zip' in dl and 'nvdcve' in dl 27 | ] 28 | download_folder = import_path + "nist/cve/" 29 | extract_dir = import_path + "nist/cve/" 30 | 31 | # Download and Unzip the files 32 | print('\nUpdating the Database with the latest CVE Files...') 33 | for zip_file in zip_files: 34 | print("Zip file: ", zip_file) 35 | full_url = root + zip_file 36 | zip_file_name = os.path.basename(zip_file) 37 | download_file_to_path(full_url, download_folder, zip_file_name) 38 | unzip_files_to_directory(download_folder, extract_dir, zip_file_name) 39 | 40 | transform_xml_files_to_json(extract_dir) 41 | transform_big_json_files_to_multiple_json_files(extract_dir, 'cve','CVE_Items') 42 | 43 | def download_files_cpe(import_path): 44 | url = 'https://nvd.nist.gov/vuln/data-feeds' 45 | root = 'https://nvd.nist.gov/' 46 | r = requests.get(url) 47 | soup = BeautifulSoup(r.text, 'html.parser') 48 | all_hrfs = soup.find_all('a') 49 | all_links = [ 50 | link.get('href') for link in all_hrfs 51 | ] 52 | zip_files = [ 53 | dl for dl in all_links if dl and '.json.zip' in dl and 'nvdcpematch' in dl 54 | ] 55 | download_folder = import_path + "nist/cpe/" 56 | extract_dir = import_path + "nist/cpe/" 57 | # 58 | # Download and Unzip the files 59 | print('\nUpdating the Database with the latest CVE Files...') 60 | for zip_file in zip_files: 61 | full_url = root + zip_file 62 | zip_file_name = os.path.basename(zip_file) 63 | # 5 attempts to download and unzip the file correctly 64 | download_file_to_path(full_url, download_folder, zip_file_name) 65 | unzip_files_to_directory(download_folder, extract_dir, zip_file_name) 66 | # 67 | transform_xml_files_to_json(extract_dir) 68 | transform_big_json_files_to_multiple_json_files(extract_dir, 'cpe','matches') 69 | 70 | def download_files_cwe(import_path): 71 | url = 'https://cwe.mitre.org/data/archive.html' 72 | root = 'https://cwe.mitre.org/' 73 | r = requests.get(url) 74 | soup = BeautifulSoup(r.text, 'html.parser') 75 | all_hrfs = soup.find_all('a') 76 | all_links = [ 77 | link.get('href') for link in all_hrfs 78 | ] 79 | zip_files = [ 80 | dl for dl in all_links if dl and '.xml.zip' in dl 81 | ] 82 | zip_file = zip_files[0] 83 | download_folder = import_path + "mitre_cwe/" 84 | extract_dir = import_path + "mitre_cwe/" 85 | 86 | # Download and Unzip the files 87 | print('\nUpdating the Database with the latest CWE Files...') 88 | full_url = root + zip_file 89 | zip_file_name = os.path.basename(zip_file) 90 | 91 | # 5 attempts to download and unzip the file correctly 92 | download_file_to_path(full_url, download_folder, zip_file_name) 93 | unzip_files_to_directory(download_folder, extract_dir, zip_file_name) 94 | transform_xml_files_to_json(extract_dir) 95 | replace_unwanted_string_cwe(extract_dir) 96 | transform_big_json_files_to_multiple_json_files(extract_dir, 'cwe_reference','Weakness_Catalog.External_References.External_Reference') 97 | transform_big_json_files_to_multiple_json_files(extract_dir, 'cwe_weakness','Weakness_Catalog.Weaknesses.Weakness') 98 | transform_big_json_files_to_multiple_json_files(extract_dir, 'cwe_category','Weakness_Catalog.Categories.Category') 99 | transform_big_json_files_to_multiple_json_files(extract_dir, 'cwe_view','Weakness_Catalog.Views.View') 100 | 101 | def download_files_capec(import_path): 102 | url = 'https://capec.mitre.org/data/archive.html' 103 | root = 'https://capec.mitre.org/' 104 | r = requests.get(url) 105 | soup = BeautifulSoup(r.text, 'html.parser') 106 | all_hrfs = soup.find_all('a') 107 | all_links = [ 108 | link.get('href') for link in all_hrfs 109 | ] 110 | xml_files = [ 111 | dl for dl in all_links if dl and '.xml' in dl 112 | ] 113 | xml_file = xml_files[0] 114 | 115 | download_folder = import_path + "mitre_capec/" 116 | extract_dir = import_path + "mitre_capec/" 117 | 118 | # Download xml file 119 | print('\nUpdating the Database with the latest CAPEC Files...') 120 | full_url = root + xml_file 121 | zip_file_name = os.path.basename(xml_file) 122 | 123 | download_file_to_path(full_url, download_folder, zip_file_name) 124 | current_os = platform.system() 125 | if (current_os == "Linux" or current_os == "Darwin"): 126 | run_dos2unix(os.path.join(download_folder, zip_file_name)) 127 | transform_xml_files_to_json(download_folder) 128 | replace_unwanted_string_capec(download_folder) 129 | transform_big_json_files_to_multiple_json_files(extract_dir, 'capec_reference','Attack_Pattern_Catalog.External_References.External_Reference') 130 | transform_big_json_files_to_multiple_json_files(extract_dir, 'capec_attack_pattern','Attack_Pattern_Catalog.Attack_Patterns.Attack_Pattern') 131 | transform_big_json_files_to_multiple_json_files(extract_dir, 'capec_category','Attack_Pattern_Catalog.Categories.Category') 132 | transform_big_json_files_to_multiple_json_files(extract_dir, 'capec_view','Attack_Pattern_Catalog.Views.View') 133 | 134 | 135 | def download_datasets(import_path): 136 | download_files_cve(import_path) 137 | download_files_cpe(import_path) 138 | download_files_cwe(import_path) 139 | download_files_capec(import_path) 140 | 141 | # Define the function that makes the HTTP request with retry 142 | def make_http_request_with_retry(url, retries=0): 143 | try: 144 | # Call the function that makes the HTTP request, protected by the circuit breaker 145 | return download_file_to_path(url) 146 | except circuit.BreakerOpenError: 147 | if retries < MAX_RETRIES: 148 | print(f"Circuit is open. Retrying... Attempt {retries + 1}") 149 | return make_http_request_with_retry(url, retries=retries + 1) 150 | else: 151 | raise RuntimeError("Circuit is open. Max retries reached.") 152 | except Exception as e: 153 | if retries < MAX_RETRIES: 154 | print(f"Error occurred: {e}. Retrying... Attempt {retries + 1}") 155 | return make_http_request_with_retry(url, retries=retries + 1) 156 | else: 157 | raise RuntimeError("Max retries reached. Last error: {}".format(e)) 158 | 159 | # Define the function that makes the HTTP request 160 | @circuit(failure_threshold=10) 161 | def download_file_to_path(url, download_path, file_name): 162 | print("Download path: ", download_path) 163 | if not os.path.exists(download_path): 164 | os.makedirs(download_path, exist_ok=True) 165 | r = requests.get(url) 166 | dl_path = os.path.join(download_path, file_name) 167 | with open(dl_path, 'wb') as file: 168 | file.write(r.content) 169 | 170 | def unzip_files_to_directory(zip_path, extract_path, zip_filename): 171 | try: 172 | if not os.path.exists(extract_path): 173 | os.makedirs(extract_path, exist_ok=True) 174 | z = zipfile.ZipFile(os.path.join(zip_path, zip_filename)) 175 | z.extractall(extract_path) 176 | print(zip_filename + ' unzipped successfully') 177 | print('---------') 178 | z.close() 179 | current_os = platform.system() 180 | if (current_os == "Linux" or current_os == "Darwin"): 181 | file_to_delete = f'{extract_path}' + f'/{zip_filename}' 182 | elif current_os == "Windows": 183 | file_to_delete = f'{extract_path}' + f'\\{zip_filename}' 184 | os.remove(file_to_delete) 185 | except zipfile.BadZipfile as e: 186 | print("Error while unzipping data" + e) 187 | 188 | def transform_xml_files_to_json(path): 189 | directory_contents = os.listdir(path) 190 | 191 | for item in directory_contents: 192 | item_path = os.path.join(path, item) 193 | if item_path.endswith(".xml") and os.path.isfile(item_path): 194 | xml_file_to_json(item_path) 195 | os.remove(item_path) 196 | 197 | def transform_big_json_files_to_multiple_json_files(path, output_prefix, json_array_path): 198 | directory_contents = os.listdir(path) 199 | 200 | for item in directory_contents: 201 | item_path = os.path.join(path, item) 202 | if item_path.endswith(".json") and os.path.isfile(item_path): 203 | slice_json_file(item_path, path, output_prefix, 200, json_array_path) 204 | 205 | 206 | # Convert XML Files to JSON Files 207 | def xml_file_to_json(xmlFile): 208 | # parse the import folder for xml files 209 | # open the input xml file and read 210 | # data in form of python dictionary 211 | # using xmltodict module 212 | print(f"Transforming file {xmlFile}") 213 | if xmlFile.endswith(".xml"): 214 | with open(xmlFile, 'r', encoding='utf-8') as xml_file: 215 | data_dict = xmltodict.parse(xml_file.read()) 216 | xml_file.close() 217 | # generate the object using json.dumps() 218 | # corresponding to json data 219 | json_data = json.dumps(data_dict) 220 | # Write the json data to output 221 | # json file 222 | xml_file.close() 223 | jsonfile = f'{xmlFile}' 224 | print(jsonfile) 225 | jsonfile = jsonfile.replace(".xml", ".json") 226 | print(jsonfile) 227 | with open(jsonfile, "w") as json_file: 228 | json_file.write(json_data) 229 | json_file.close() 230 | 231 | # Flatten CWE Dataset File 232 | def replace_unwanted_string_cwe(path): 233 | listOfFiles = os.listdir(path) 234 | pattern = "*.json" 235 | files = [] 236 | for entry in listOfFiles: 237 | if fnmatch.fnmatch(entry, pattern): 238 | if entry.startswith("cwec"): 239 | files.append(entry) 240 | break 241 | file = path + files[0] 242 | fin = open(file, "rt") 243 | flattened_cwe = path + "cwe.json" 244 | fout = open(flattened_cwe, "wt") 245 | for line in fin: 246 | fout.write(line.replace('"@', '"')) 247 | fin.close() 248 | os.remove(file) 249 | fout.close() 250 | 251 | # Flatten CAPEC Dataset File 252 | def replace_unwanted_string_capec(path): 253 | listOfFiles = os.listdir(path) 254 | pattern = "*.json" 255 | files = [] 256 | for entry in listOfFiles: 257 | if fnmatch.fnmatch(entry, pattern): 258 | if entry.startswith("capec"): 259 | files.append(entry) 260 | break 261 | file = path + files[0] 262 | fin = open(file, "rt") 263 | flattened_cwe = path + "capec.json" 264 | fout = open(flattened_cwe, "wt") 265 | for line in fin: 266 | fout.write(line.replace('"@', '"').replace('#text', 'text')) 267 | fin.close() 268 | fout.close() 269 | os.remove(file) 270 | 271 | def slice_json_file(input_file, output_path, output_prefix, batch_size, json_array_path): 272 | with open(input_file, 'r') as f: 273 | data = json.load(f) 274 | 275 | data_array = select_nested_array_by_path(data, json_array_path) 276 | length = len(data_array) 277 | 278 | if not os.path.exists(os.path.join(output_path, "splitted")): 279 | os.makedirs(os.path.join(output_path, "splitted"), exist_ok=True) 280 | 281 | for i in range(0, length, batch_size): 282 | batch = data_array[i:i+batch_size] 283 | output_file = f"{output_path}/splitted/{output_prefix}_output_file_{i//batch_size + 1}.json" 284 | with open(output_file, 'w') as f_out: 285 | json.dump(batch, f_out, indent=4) 286 | 287 | def select_nested_array_by_path(json_data, path): 288 | parsed_json = json_data 289 | keys = path.split('.') 290 | 291 | for key in keys: 292 | if key in parsed_json: 293 | parsed_json = parsed_json[key] 294 | else: 295 | return None 296 | 297 | return parsed_json 298 | 299 | def run_dos2unix(file): 300 | try: 301 | dos2unix_command = f'dos2unix {file}' 302 | print("Executing command:", dos2unix_command) 303 | 304 | process = subprocess.run(['dos2unix', file], capture_output=True, text=True, check=True) 305 | if process.returncode == 0: 306 | print(f"File {file} transformed to unix format") 307 | else: 308 | raise RuntimeError(f"Error running dos2unix command: {process.stderr.strip()}") 309 | except FileNotFoundError: 310 | raise RuntimeError("dos2unix command not found. Make sure jq is installed on your system.") 311 | except subprocess.CalledProcessError as e: 312 | raise RuntimeError(f"Error running dos2unix command. Make sure dos2unix is installed and check your file. Error: {e}") -------------------------------------------------------------------------------- /GraphKer.svg: -------------------------------------------------------------------------------- 1 | Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)hasMitigationRelated_…hasMemberApplicable_PlatformhasConsequencehasMemberRelatedAtt…parentOfusefulForhasExternal_Refere…hasConsequencehasExternal_ReferencereferencedByhasMitigationhasMembercanBeDetectedhasExternal_ReferencehasExampleProblem_TypeCVSS2_ImpacthasMemberapplicableInCVSS3_ImpactRelatedAttackPatternhasExternal_Reference Applicab… CAPEC CAPEC… CPE CVE CVSS_2 CVSS_3 CWE CWE_VI… Conseq… Demons… Detectio… External… External… Mitigation Referen… Stakehol… --------------------------------------------------------------------------------