├── .gitignore ├── .project ├── .pydevproject ├── .travis.yml ├── ExampleFiles ├── SampleInput-CFM13.xml └── SampleInput-STIX-TLP.xml ├── FlexT.py ├── FlexTBatch.py ├── FlexTransform ├── Configuration │ ├── Config.py │ ├── ConfigFunctions │ │ ├── ConfigFunctionManager.py │ │ ├── GlobalFunctions.py │ │ └── __init__.py │ └── __init__.py ├── FlexT.py ├── FlexTBatch.py ├── FlexTransform.py ├── OntologyOracle │ ├── OntologyOracle.py │ └── __init__.py ├── SchemaParser │ ├── SchemaParser.py │ ├── TransformFunctions │ │ ├── CFM13Functions.py │ │ ├── CFM20Functions.py │ │ ├── GlobalFunctions.py │ │ ├── STIXFunctions.py │ │ ├── TransformFunctionManager.py │ │ └── __init__.py │ └── __init__.py ├── SyntaxParser │ ├── CSVParser.py │ ├── DICTParsers │ │ ├── __init__.py │ │ └── iSightReport.py │ ├── DictionaryParser.py │ ├── KVParser.py │ ├── Parser.py │ ├── XMLParser.py │ ├── XMLParsers │ │ ├── .gitignore │ │ ├── CFM13.py │ │ ├── CFM20Alert.py │ │ ├── STIX.py │ │ └── __init__.py │ └── __init__.py ├── __init__.py ├── resources │ ├── cybox.zip │ ├── images │ │ ├── dev-figure1.png │ │ ├── figure1a.png │ │ ├── figure1b.png │ │ ├── figure2.png │ │ ├── figure3.png │ │ └── figure4.png │ ├── indicator-tbox.owl │ ├── ramrod.zip │ ├── sampleConfigurations │ │ ├── MBL.cfg │ │ ├── cfm13.cfg │ │ ├── cfm20alert.cfg │ │ ├── crisp_json.cfg │ │ ├── csv_example.cfg │ │ ├── doe_em.cfg │ │ ├── flextext.cfg │ │ ├── iid_combined_recent.cfg │ │ ├── iid_host_active.cfg │ │ ├── iid_host_dynamic.cfg │ │ ├── iid_ipv4_recent.cfg │ │ ├── isight.cfg │ │ ├── keyvalue.cfg │ │ ├── lqmtools.cfg │ │ ├── stix_acs30.cfg │ │ ├── stix_essa.cfg │ │ ├── stix_tlp.cfg │ │ ├── stix_tlp2.cfg │ │ └── twitter.cfg │ ├── schemaDefinitions │ │ ├── MBL.json │ │ ├── cfm-metadata.json │ │ ├── cfm13-site.json │ │ ├── cfm13.json │ │ ├── cfm20alert.json │ │ ├── crisp.json │ │ ├── doe-em.json │ │ ├── iid-combined-recent.json │ │ ├── iid-host-dynamic.json │ │ ├── iid-ipv4-recent.json │ │ ├── iid.json │ │ ├── isight.json │ │ ├── keyvalue.json │ │ ├── lqmtools.json │ │ ├── stix-acs30-broken.json │ │ ├── stix-acs30.json │ │ ├── stix-essa.json │ │ ├── stix-tlp.json │ │ ├── stix-tlp2.json │ │ ├── stix.json │ │ └── twitter.json │ ├── schemas │ │ ├── CFMAlert.xsd │ │ ├── CFMDownload.xsd │ │ ├── CFMDownloadRequest.xsd │ │ ├── CFMEnvelope.xsd │ │ └── CFMMessage13.xsd │ ├── stix.zip │ ├── test.owl │ └── transform.owl └── test │ ├── LQMTTests.py │ ├── OntologyOracle_test.py │ ├── Readme.md │ ├── SampleInputs.py │ ├── TestData │ ├── cfm13_multiple_site.cfg │ ├── csv-example-2.json │ ├── csv_example_2.cfg │ └── mult_site_config.json │ ├── ToCFM13_test.py │ ├── ToKeyValue_test.py │ ├── ToLQMT_test.py │ ├── ToStixACS30_test.py │ ├── ToStixACS_test.py │ ├── ToStixTLP_test.py │ ├── __init__.py │ ├── regression_test.py │ └── utils.py ├── ISAMarkingExtension ├── __init__.py ├── bindings │ ├── __init__.py │ ├── isamarkings.py │ └── isamarkings30.py ├── isamarkings.py └── isamarkingsacs30.py ├── LICENSE ├── MANIFEST.in ├── Utils ├── LQMTtestCFM.py ├── LQMTtestSTIX.py ├── addSchemaConfigToTBOX.py ├── iSIGHTtoCFM.py └── subjectCommentParentQuery.py ├── docs ├── contribute.md ├── examples.md ├── index.md ├── install.md └── usage.md ├── flexT_dir_input.py ├── mkdocs.yml ├── readme-dev.md ├── readme.md └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # IDE 86 | .idea/ 87 | catalog*.xml 88 | .project 89 | .pydevproject 90 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | FlexTransform 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | python3.5 4 | python 3.0 5 | 6 | /${PROJECT_DIR_NAME} 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '3.3' 4 | - '3.4' 5 | - '3.5' 6 | - '3.6' 7 | before_install: 8 | - "sudo apt-get -qq update" 9 | - "sudo apt-get install -y -qq libxml2-dev libxslt-dev python-dev" 10 | install: 11 | - "pip install ." 12 | script: 13 | - "nosetests" 14 | deploy: 15 | provider: "pypi" 16 | user: "anl-cyberscience" 17 | password: 18 | secure: HgtKQyGDlTHrhlaH+Cr6F56MTNJ2lwkAjKod7ifUy+dBqZ/FHHqPmLgAbGTlIG9si0dvG1IgOz8de/Vr1+AO3w/J18zmop5xPyv1PreqOCSwbRWK76dJG4vsUB7nW+eRwg29YcPVUmtw+z6qHYEAiR6/GQNKi3ubw5RRO4Oily3QxkOr48pGStiW8f8hHGI8mgLw16saVxAV+yFiaXFikmUHBro1G33W78ZOJhpgxEAMXzPCZHeIepnDZNDmCERX7YZdy9q5uV07WmRqEjLSNpfvdyJSS2T66ZKZ7mcB/2M3sFtz7t5datqAudlNuy9o5JAsPavkxJNz0u6A0V8Ee7ETeRwDKhhuhgBtVStEZHNas7XZaiJzM4TbFTJvgL7lU5mPXzo3cxjuSdlz8Px7Heq3jp5vdE94locYEjd1iQ8KFMIxjQIA15WHKYcDGy9iG4M7iQl7Up3JCCQC+Dsj5v1eHm3YITDd7P8g2c9/xeGcvpWx6zYaukSS2oBuBEuwfQz0JCuVSZkajWg9IETb6iQFml+IZg8uoLP+jOKjDTY4S4a/jiRqHa132q6+bZ5QQWCl8b+fyX+VcPLIAl8BtA/bwQ6cJW1Gni69RSjLxbGe5neUsjfZL4fKO+QnunfxDviRnt4lBqG4Cce2n2YNW40DydaGRXfDR+gUqhDO3IM= 19 | on: 20 | tags: true 21 | distributions: "sdist bdist_wheel" 22 | notifications: 23 | slack: 24 | secure: i2h5/x8NkHYjIxqj1znDK9UkQpjT28aSrIG6YNInURb963+oQV27tRclDDfG+VBigRNF3TvFFApQhzDt/che1W2MsQdVcC8uAL+Lin8IetSV9A3iQL1P3g4NuG6SDPUoWydr1cY/3ONNkZsXms3pSJTrRm9kLYGPB6X7u3TvBgluzdEhxaafUfZgNcV9tPBx1wkzu4/61Qz8Ff1SftIrIitpc1Njytpibjlxl6qo4tlJRacBcu0AagVRtbi1COb5gbL4jdQFqzt1D7BP6ltBLDGEreb3Eucd/s2j8ONwKvem26g9BQCLHx/xD9hoU4rBZ1i8SomkLUZBAFl3avKq6WvYMmFaKe3pVgiFEAPwKNRxoZYLHOGTBgxyKQi1Nuv/iM4MZx15JAQERL4I+Q7BMBRbC+Om3Uv1xAcKfX5CsOyAUdWc6RjwU0XQunqxCQvO/Nvjz5ZMLhU1W7fPUYxU21ye3PgNZq9hcQRWsFDlPPmYM4JmbKgtoNo+vVGUoOvEkG0WId4bJ1W9PXkvyMa4+1c6Gs3tWq78fre8vK/LFcxbRxIufJ5rxUne6TgdQ5Y70gjJgOiqlz43XMmPlmo2/ORLYaN/IyoBZMP1UKAJRYlMEeOPmyWKOoG4gOnCW5yXKlcmuVKDoKdLybBofgRKJCRM7/QKNLhvNSVDvr/J5GU= 25 | -------------------------------------------------------------------------------- /ExampleFiles/SampleInput-CFM13.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Fake National Lab 7 | Fake Name 8 | 9 | 10 | 2016-02-21T22:50:02+0600 11 | 5 minutes 12 | 2 13 | alerts 14 | 2016-02-21T22:45:53+0700 15 | 16 | 17 | 2016-02-21T22:45:53-0400 18 | 19 | 20 |
21 |
10.10.10.10
22 |
23 |
24 | 25 | 26 | 27 | 22 28 | TCP 29 | 30 | 31 | 32 | 33 | SSH Attack 34 | 35 | 36 | 37 | 38 | 39 | 40 | private 41 | 0 42 | 11 43 | 86400 44 | 1 45 | 0 46 | The Republic of Fake 47 |
48 |
-------------------------------------------------------------------------------- /FlexT.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jun 17, 2015 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | from FlexTransform import FlexTransform 8 | from FlexTransform.OntologyOracle import Oracle 9 | import logging 10 | import rdflib 11 | 12 | import argparse 13 | import os 14 | import sys 15 | import json 16 | 17 | 18 | # Configure logging to send INFO, DEGUB and TRACE messages to stdout and all other logs to stderr 19 | # Based on code from http://stackoverflow.com/questions/2302315/how-can-info-and-debug-logging-message-be-sent-to-stdout-and-higher-level-messag 20 | class LessThenFilter(logging.Filter): 21 | def __init__(self, level): 22 | self._level = level 23 | logging.Filter.__init__(self) 24 | 25 | def filter(self, rec): 26 | return rec.levelno < self._level 27 | 28 | 29 | def main(): 30 | log = logging.getLogger() 31 | log.setLevel(logging.NOTSET) 32 | 33 | sh_out = logging.StreamHandler(stream=sys.stdout) 34 | sh_out.setLevel(logging.DEBUG) 35 | sh_out.setFormatter(logging.Formatter('%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s')) 36 | sh_out.addFilter(LessThenFilter(logging.WARNING)) 37 | log.addHandler(sh_out) 38 | 39 | sh_err = logging.StreamHandler(stream=sys.stderr) 40 | sh_err.setLevel(logging.WARNING) 41 | sh_err.setFormatter(logging.Formatter('%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s')) 42 | log.addHandler(sh_err) 43 | 44 | parser = argparse.ArgumentParser( 45 | description="Transform a source file's syntax and schema to the target file document type") 46 | parser.add_argument('--src-config', 47 | type=argparse.FileType('r'), 48 | help='Source file parser configuration', 49 | metavar='CONFIG', 50 | required=True) 51 | parser.add_argument('--src', 52 | type=argparse.FileType('r'), 53 | help='Source file', 54 | required=True) 55 | parser.add_argument('--src-metadata', 56 | type=argparse.FileType('r'), 57 | help='Source Metadata file', 58 | required=False) 59 | parser.add_argument('--dst-config', 60 | type=argparse.FileType('r'), 61 | help='Destination file parser configuration', 62 | metavar='CONFIG', 63 | required=True) 64 | parser.add_argument('--dst', 65 | type=argparse.FileType('w'), 66 | help='Destination file', 67 | required=True) 68 | 69 | parser.add_argument('--tbox-uri', 70 | type=argparse.FileType('r'), 71 | help='The uri location of the tbox file to load', 72 | required=False) 73 | parser.add_argument('--source-schema-IRI', 74 | help='The ontology IRI for the destination', 75 | required=False) 76 | parser.add_argument('--destination-schema-IRI', 77 | help='The ontology IRI for the destination', 78 | required=False) 79 | 80 | args = parser.parse_args() 81 | 82 | try: 83 | Transform = FlexTransform.FlexTransform() 84 | Transform.AddParser('src', args.src_config) 85 | Transform.AddParser('dst', args.dst_config) 86 | 87 | metadata = None 88 | 89 | if args.src_metadata: 90 | metadata = json.load(args.src_metadata) 91 | 92 | kb = None 93 | 94 | if args.tbox_uri: 95 | if args.destination_schema_IRI: 96 | kb = Oracle(args.tbox_uri, rdflib.URIRef(args.destination_schema_IRI)) 97 | else: 98 | logging.warning( 99 | "Ontology file specified, but no destination schema IRI is given. Ontology will not be used.") 100 | 101 | FinalizedData = Transform.TransformFile( 102 | sourceFileName=args.src, 103 | targetFileName=args.dst, 104 | sourceParserName='src', 105 | targetParserName='dst', 106 | sourceMetaData=metadata, 107 | oracle=kb) 108 | args.dst.close() 109 | 110 | except Exception as inst: 111 | log.error(inst) 112 | args.dst.close() 113 | os.remove(args.dst.name) 114 | exit(1) 115 | 116 | else: 117 | log.info("Success") 118 | exit(0) 119 | 120 | if __name__ == '__main__': 121 | main() 122 | -------------------------------------------------------------------------------- /FlexTBatch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Apr 12, 2017 3 | 4 | @author: taxon 5 | ''' 6 | # 7 | # FlexTBatch is a simple wrapper script to FlexT that allows the user to start 8 | # FLexT and send it commands via stdin for processing. If there are many files to convert, 9 | # the config files only need to be specified once and will also only be loaded once, saving 10 | # a significant amount of time in processing subsequent transforms 11 | # 12 | # This class is here for running FlexTBatch within the Eclipse environment as FlexTransform.FlextBatch.py 13 | # is not able to be run within Eclipse. 14 | from FlexTransform import FlexTBatch 15 | 16 | def main(): 17 | FlexTBatch.main() 18 | 19 | if __name__ == '__main__': 20 | FlexTBatch.main() 21 | -------------------------------------------------------------------------------- /FlexTransform/Configuration/ConfigFunctions/ConfigFunctionManager.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jun 13, 2016 3 | 4 | @author: cstrastburg 5 | """ 6 | 7 | import inspect 8 | import logging 9 | from collections import defaultdict 10 | 11 | import FlexTransform.Configuration.ConfigFunctions 12 | 13 | 14 | class ConfigFunctionManager(object): 15 | ''' 16 | classdocs 17 | ''' 18 | 19 | __KnownFunctions = defaultdict(dict) 20 | 21 | def __init__(self, trace, trace_list=[]): 22 | """ 23 | Constructor 24 | :param trace_list: list of elements to trace 25 | :return: 26 | """ 27 | self.logging = logging.getLogger('FlexTransform.Configuration.ConfigFunctions.ConfigFunctionManager') 28 | 29 | self._FunctionClasses = {} 30 | self.trace = trace 31 | self.trace_list = trace_list 32 | self.trace_index = {} 33 | if self.trace: 34 | for x in self.trace_list: 35 | for v in x["src_fields"]: 36 | self.trace_index[v] = x 37 | for y in x["dst_fields"]: 38 | self.trace_index[y] = x 39 | for w in x["src_IRIs"]: 40 | self.trace_index[w] = x 41 | for z in x["dst_IRIs"]: 42 | self.trace_index[z] = x 43 | self.logging.debug("Initialized ConfigFunctionManager with trace_list of {} elements".format(len(trace_list))) 44 | 45 | @classmethod 46 | def register_function(cls, function_name, required_args, function_class): 47 | cls.__KnownFunctions[function_name] = { 48 | 'class': function_class, 49 | 'RequiredArgs': required_args 50 | } 51 | 52 | @classmethod 53 | def get_function_class(cls, function_name): 54 | if function_name in cls.__KnownFunctions: 55 | class_name = cls.__KnownFunctions[function_name]['class'] 56 | else: 57 | raise Exception('FunctionNotRegistered', 58 | "The function %s is not registered with the ConfigFunctionManager" % function_name) 59 | 60 | for name, obj in inspect.getmembers(FlexTransform.Configuration.ConfigFunctions, inspect.isclass): 61 | if name == class_name: 62 | return obj() 63 | 64 | raise Exception('FunctionClassNotFound', 65 | "The Class %s for function %s was not found by the ConfigFunctionManager" % (class_name, function_name)) 66 | 67 | def get_function(self, function_name): 68 | if function_name in self.__KnownFunctions: 69 | return True 70 | else: 71 | return False 72 | 73 | def execute_config_function(self, function_name, args): 74 | if function_name in self._FunctionClasses: 75 | function_class = self._FunctionClasses[function_name] 76 | else: 77 | function_class = ConfigFunctionManager.get_function_class(function_name) 78 | self._FunctionClasses[function_name] = function_class 79 | 80 | self._validate_args(function_name, args) 81 | return function_class.Execute(function_name, args) 82 | 83 | def _validate_args(self, function_name, args): 84 | """ 85 | Allowed fields for the Args dictionary: 86 | 87 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser. 88 | This is the value of the string between the () in the function name in the .json schema configuration files 89 | 90 | fieldName - Optional - The name of the field being processed 91 | 92 | fileName - Optional - The full path of the source file 93 | 94 | fieldDict - Optional - The dictionary associated with this field 95 | 96 | """ 97 | allowed_fields = set(['functionArg', 'fileName', 'fieldName', 'fieldDict']) 98 | RequiredFields = set([]) 99 | 100 | if isinstance(args, dict): 101 | for arg in args: 102 | if arg not in allowed_fields: 103 | self.logging.warning('A argument passed to function %s is not allowed: %s' % (function_name, arg)) 104 | else: 105 | raise Exception('InvalidArgs', 106 | 'The arguments passed to function %s are not defined or not in dictionary format' % function_name) 107 | 108 | if self.__KnownFunctions[function_name]['RequiredArgs'] is not None: 109 | RequiredFields.update(self.__KnownFunctions[function_name]['RequiredArgs']) 110 | 111 | for arg in RequiredFields: 112 | if arg not in args or args[arg] is None: 113 | raise Exception('InvalidArgs', 114 | 'Function %s args did not include the required %s field, could not process' % (function_name, arg)) 115 | -------------------------------------------------------------------------------- /FlexTransform/Configuration/ConfigFunctions/GlobalFunctions.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author: cstrasburg 3 | """ 4 | 5 | import logging 6 | import os.path 7 | import re 8 | 9 | import arrow 10 | 11 | from FlexTransform.Configuration.ConfigFunctions import ConfigFunctionManager 12 | 13 | 14 | class GlobalFunctions(object): 15 | """ 16 | Contains Configuration functions that multiple configuration files can use: 17 | """ 18 | 19 | ''' 20 | The _FunctionNames dictionary should contain each function name understood by this class. Each is 21 | mapped to a list with required fields to be passed in the args dictionary, or None if no args are required. 22 | 23 | Allowed fields for the Args dictionary: 24 | 25 | fieldName - Optional - The name of the field being processed 26 | 27 | fileName - Optional - The name of the loaded file (full path) 28 | 29 | functionArg - Optional - The string between the '(' and ')' in the function definition 30 | 31 | fieldDict - Optional - The dictionary of the field where this method is defined 32 | 33 | ''' 34 | 35 | __FunctionNames = { 36 | 'getFileCreationDate': ['fileName'], 37 | 'getFileUUID': ['fileName', 'functionArg'], 38 | } 39 | 40 | def __init__(self): 41 | """ 42 | Constructor 43 | """ 44 | self.logging = logging.getLogger('FlexTransform.Configuration.ConfigFunctions.GlobalFunctions') 45 | 46 | @classmethod 47 | def register_functions(cls): 48 | for FunctionName, RequiredArgs in cls.__FunctionNames.items(): 49 | ConfigFunctionManager.register_function(FunctionName, RequiredArgs, 'GlobalFunctions') 50 | 51 | def Execute(self, function_name, args): 52 | value = None 53 | 54 | if function_name not in self.__FunctionNames: 55 | raise Exception('FunctionNotDefined', 56 | 'Function %s is not defined in GlobalFunctions' % (function_name)) 57 | 58 | elif function_name == 'getFileCreationDate': 59 | if 'fileName' in args: 60 | try: 61 | rawctime = os.path.getctime(args['fileName']) 62 | ''' Convert to given time format ''' 63 | if 'fieldDict' in args and 'dateTimeFormat' in args['fieldDict'] and \ 64 | args['fieldDict']['dateTimeFormat'] == 'unixtime': 65 | value = str(arrow.get(rawctime).timestamp) 66 | else: 67 | value = arrow.get(rawctime).format(args['fieldDict']['dateTimeFormat']) 68 | except OSError as e: 69 | self.logging.warning("Could not get file ctime for {}: {}".format(args['fileName'], e)) 70 | 71 | elif function_name == 'getFileUUID': 72 | if 'fileName' in args and args['fileName']: 73 | fileName = args['fileName'] 74 | baseName = os.path.basename(fileName) 75 | p = re.compile(args['functionArg']) 76 | result = p.match(baseName) 77 | if result is not None: 78 | value = result.group(1) 79 | else: 80 | value = None 81 | self.logging.warning("getFileUUID: could not extract UUID from filename {} using regex {}".format(baseName,args['functionArg'])) 82 | if value is None: 83 | self.logging.warning("getFileUUID: no fileName provided in args(); could not extract UUID.") 84 | 85 | return value 86 | -------------------------------------------------------------------------------- /FlexTransform/Configuration/ConfigFunctions/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jun 13, 2016 3 | 4 | @author: cstrasburg 5 | """ 6 | 7 | import FlexTransform.Configuration 8 | 9 | from .ConfigFunctionManager import ConfigFunctionManager 10 | from .GlobalFunctions import GlobalFunctions 11 | 12 | GlobalFunctions.register_functions() 13 | -------------------------------------------------------------------------------- /FlexTransform/Configuration/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jul 27, 2014 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | from FlexTransform.Configuration.Config import Config -------------------------------------------------------------------------------- /FlexTransform/FlexT.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jun 17, 2015 3 | 4 | @author: ahoying 5 | """ 6 | 7 | import argparse 8 | import json 9 | import logging 10 | import os 11 | import sys 12 | import traceback 13 | 14 | import rdflib 15 | 16 | from FlexTransform import FlexTransform 17 | from FlexTransform.OntologyOracle import Oracle 18 | 19 | 20 | # Configure logging to send INFO, DEGUB and TRACE messages to stdout and all other logs to stderr 21 | # Based on code from http://stackoverflow.com/questions/2302315/how-can-info-and-debug-logging-message-be-sent-to-stdout-and-higher-level-messag 22 | class LessThenFilter(logging.Filter): 23 | def __init__(self, level): 24 | self._level = level 25 | logging.Filter.__init__(self) 26 | 27 | def filter(self, rec): 28 | return rec.levelno < self._level 29 | 30 | 31 | def main(): 32 | log = logging.getLogger() 33 | log.setLevel(logging.NOTSET) 34 | 35 | sh_out = logging.StreamHandler(stream=sys.stdout) 36 | sh_out.setLevel(logging.DEBUG) 37 | sh_out.setFormatter(logging.Formatter('%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s')) 38 | sh_out.addFilter(LessThenFilter(logging.WARNING)) 39 | log.addHandler(sh_out) 40 | 41 | sh_err = logging.StreamHandler(stream=sys.stderr) 42 | sh_err.setLevel(logging.WARNING) 43 | sh_err.setFormatter(logging.Formatter('%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s')) 44 | log.addHandler(sh_err) 45 | 46 | parser = argparse.ArgumentParser( 47 | description="Transform a source file's syntax and schema to the target file document type") 48 | parser.add_argument('--src-config', 49 | type=argparse.FileType('r'), 50 | help='Source file parser configuration', 51 | metavar='CONFIG', 52 | required=True) 53 | parser.add_argument('--src', 54 | type=argparse.FileType('r'), 55 | help='Source file', 56 | required=True) 57 | parser.add_argument('--src-metadata', 58 | type=argparse.FileType('r'), 59 | help='Source Metadata file', 60 | required=False) 61 | parser.add_argument('--dst-config', 62 | type=argparse.FileType('r'), 63 | help='Destination file parser configuration', 64 | metavar='CONFIG', 65 | required=True) 66 | parser.add_argument('--dst', 67 | type=argparse.FileType('w'), 68 | help='Destination file', 69 | required=True) 70 | 71 | parser.add_argument('--tbox-uri', 72 | type=argparse.FileType('r'), 73 | help='The uri location of the tbox file to load', 74 | required=False) 75 | parser.add_argument('--source-schema-IRI', 76 | help='The ontology IRI for the destination', 77 | required=False) 78 | parser.add_argument('--destination-schema-IRI', 79 | help='The ontology IRI for the destination', 80 | required=False) 81 | parser.add_argument('--trace-src-field', 82 | help='Given the name of a field from the source schema, will output trace messages to log.trace() as it is processed', 83 | action='append', 84 | default=[], 85 | required=False) 86 | parser.add_argument('--trace-dst-field', 87 | help='Given the name of a field from the dest schema, will output trace messages to log.trace() as it is processed', 88 | action='append', 89 | default=[], 90 | required=False) 91 | parser.add_argument('--trace-src-IRI', 92 | help='Given the name of an IRI from the source schema, will output trace messages to log.trace() as it is processed', 93 | action='append', 94 | default=[], 95 | required=False) 96 | parser.add_argument('--trace-dst-IRI', 97 | help='Given the name of an IRI from the dest schema, will output trace messages to log.trace() as it is processed', 98 | action='append', 99 | default=[], 100 | required=False) 101 | parser.add_argument('--logging-level', '-l', 102 | help="Set the output level for the logger. Acceptable values: debug, info, warning, error, critical", 103 | required=False) 104 | 105 | args = parser.parse_args() 106 | try: 107 | if args.logging_level: 108 | if args.logging_level.lower() == "debug": 109 | log.setLevel(logging.DEBUG) 110 | elif args.logging_level.lower() == "info": 111 | log.setLevel(logging.INFO) 112 | elif args.logging_level.lower() == "warning": 113 | log.setLevel(logging.WARNING) 114 | elif args.logging_level.lower() == "error": 115 | log.setLevel(logging.ERROR) 116 | elif args.logging_level.lower() == "critical": 117 | log.setLevel(logging.CRITICAL) 118 | transform = FlexTransform.FlexTransform(source_fields=args.trace_src_field, 119 | source_iri=args.trace_src_IRI, 120 | destination_fields=args.trace_dst_field, 121 | destination_iri=args.trace_dst_IRI, 122 | logging_level=logging.NOTSET) 123 | 124 | transform.add_parser('src', args.src_config) 125 | transform.add_parser('dst', args.dst_config) 126 | 127 | metadata = None 128 | 129 | if args.src_metadata: 130 | metadata = json.load(args.src_metadata) 131 | 132 | kb = None 133 | 134 | if args.tbox_uri: 135 | if args.destination_schema_IRI: 136 | kb = Oracle(args.tbox_uri, rdflib.URIRef(args.destination_schema_IRI)) 137 | else: 138 | logging.warning( 139 | "Ontology file specified, but no destination schema IRI is given. Ontology will not be used.") 140 | 141 | FinalizedData = transform.transform( 142 | source_file=args.src, 143 | target_file=args.dst, 144 | source_parser_name='src', 145 | target_parser_name='dst', 146 | source_meta_data=metadata, 147 | oracle=kb) 148 | args.dst.close() 149 | 150 | except Exception as inst: 151 | log.error(inst) 152 | ''' For debugging - capture to log.debug instead? ''' 153 | traceback.print_exc() 154 | args.dst.close() 155 | os.remove(args.dst.name) 156 | exit(1) 157 | 158 | else: 159 | log.info("Success") 160 | exit(0) 161 | 162 | if __name__ == '__main__': 163 | main() 164 | -------------------------------------------------------------------------------- /FlexTransform/FlexTBatch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Apr 12, 2017 3 | 4 | @author: taxon 5 | ''' 6 | # 7 | # FlexTBatch is a simple wrapper script to FlexT that allows the user to start 8 | # FlexT and send it commands via stdin for processing. If there are many files to convert, 9 | # the config files only need to be specified once and will also only be loaded once, saving 10 | # a significant amount of time in processing subsequent transforms 11 | # 12 | import argparse 13 | import sys 14 | import logging 15 | import json 16 | from FlexTransform import FlexTransform 17 | # 18 | # process a single command 19 | # valid commands are: 20 | # config {config_name}={config_file} 21 | # loads a configuration file and stores it as config_name 22 | # transform src_format={src_config_name} src_file={source_file} [src_metadata={metadata_file}] [dest_format={dest_confgi_name} dest_file={output_file} 23 | # Transforms the source_file from src_config_name into des_file using dest_config_name and optionally the src_metadata. 24 | # quit 25 | # Terminates the program 26 | # 27 | def processCommand(flexT,inputData): 28 | cmd=inputData[0] 29 | if(cmd=='config'): 30 | try: 31 | # config config_id={config_filename} 32 | if(len(inputData)>2): 33 | logging.error("Invalid config inputData: "+(" ".join(inputData))+"\n") 34 | return False 35 | cvtFormat,filename=inputData[1].split("=") 36 | if(format in flexT.Parsers): 37 | logging.error("Format already specified: "+format+"\n") 38 | return False 39 | with(open(filename,"r")) as cfg: 40 | flexT.add_parser(cvtFormat,cfg) 41 | except Exception as e: 42 | logging.error("An exception has occurred while adding configuration: "+str(e)) 43 | 44 | elif(cmd=='list_configs'): 45 | print(json.JSONEncoder().encode({ 'configs': list(flexT.Parsers.keys()) })) 46 | elif(cmd=='transform'): 47 | # src_format={format} src_file={source_filename} src_metadata={source_metdata_filename} dest=format={dest_format} dest_file={dest_filename} 48 | try: 49 | data={} 50 | hasError=False 51 | for kv in inputData[1:]: 52 | key,value=kv.split("=") 53 | if(key in data): 54 | logging.error("Invalid transform inputData - duplicate key: "+key+"\n") 55 | hasError=True 56 | data[key]=value 57 | if(hasError): 58 | return False 59 | required={'src_format','src_file','dest_format','dest_file'} 60 | optional={'src_metadata'} 61 | hasAllRequired=data.keys() >= required 62 | extraKeys=data.keys() - required -optional 63 | if(len(extraKeys)>0): 64 | logging.error("Unsupported keys in transform: "+extraKeys+"\n") 65 | return False 66 | if(not hasAllRequired): 67 | logging.error("Missing required keys in transform: "+(required - data.keys())+"\n") 68 | return False 69 | with open(data['src_file'],"r") as input_file: 70 | with open(data['dest_file'],"w") as output_file: 71 | try: 72 | flexT.transform(source_file=input_file, 73 | source_parser_name=data['src_format'], 74 | target_parser_name=data['dest_format'], 75 | target_file=output_file, 76 | source_meta_data=data.get(data.get('src_metadata'))) 77 | except Exception as e: 78 | logging.error("An exception has occurred while transforming file: "+str(e)) 79 | return False 80 | except Exception as e: 81 | logging.error("An exception has occurred while setting up for transform: "+str(e)) 82 | elif(cmd=='quit'): 83 | return True 84 | else: 85 | logging.error("Unknown command: "+cmd) 86 | 87 | class LessThanFilter(logging.Filter): 88 | def __init__(self, level): 89 | self._level = level 90 | logging.Filter.__init__(self) 91 | 92 | def filter(self, rec): 93 | return rec.levelno < self._level 94 | 95 | def initializeLogging(stdout_level): 96 | log = logging.getLogger() 97 | log.setLevel(logging.NOTSET) 98 | 99 | sh_out = logging.StreamHandler(stream=sys.stdout) 100 | sh_out.setLevel(stdout_level) 101 | sh_out.setFormatter(logging.Formatter('%(levelname)s:%(message)s')) 102 | sh_out.addFilter(LessThanFilter(logging.WARNING)) 103 | log.addHandler(sh_out) 104 | 105 | sh_err = logging.StreamHandler(stream=sys.stderr) 106 | sh_err.setLevel(logging.WARNING) 107 | sh_err.setFormatter(logging.Formatter('%(levelname)s:%(message)s')) 108 | log.addHandler(sh_err) 109 | 110 | def main(): 111 | parser = argparse.ArgumentParser( 112 | description="Transform a source file's syntax and schema to the target file document type") 113 | parser.add_argument('--delimiter', 114 | help='Delimiter used for input lines (default \'\t\')', 115 | metavar='DELIM_CHAR', 116 | default='\t', 117 | required=False) 118 | parser.add_argument('--output-done-markers', 119 | dest='output_markers', 120 | help='Output {err-done} and {out-done} when processing a command is complete. Useful if a program is controlling batch execution.', 121 | action='store_true', 122 | default=False, 123 | required=False) 124 | parser.add_argument('--stdout-log-level', 125 | dest='stdout_level', 126 | help='Log level to output to stdout. (stderr will always be WARNING)', 127 | choices=['NOTSET','DEBUG','INFO'], 128 | default='NOTSET', 129 | required=False) 130 | args = parser.parse_args() 131 | 132 | initializeLogging(args.stdout_level) 133 | flexT = FlexTransform.FlexTransform() 134 | done=False 135 | while(not done): 136 | try: 137 | inputData=sys.stdin.readline() 138 | cmd=inputData.strip().split(args.delimiter) 139 | done=processCommand(flexT,cmd) 140 | except Exception as e: 141 | logging.error("An exception has occurred while processing input: "+str(e)) 142 | if(args.output_markers): 143 | sys.stderr.write('{err-done}\n') 144 | sys.stderr.flush() 145 | sys.stdout.write('{out-done}\n') 146 | sys.stdout.flush() 147 | 148 | if __name__ == '__main__': 149 | main() 150 | -------------------------------------------------------------------------------- /FlexTransform/FlexTransform.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Jul 27, 2014 3 | 4 | @author: ahoying 5 | """ 6 | 7 | import logging 8 | import warnings 9 | 10 | import rdflib 11 | 12 | from .Configuration import Config 13 | from .OntologyOracle import Oracle 14 | 15 | 16 | # TODO: Document in Sphinx compatible format 17 | 18 | class FlexTransform(object): 19 | ''' 20 | API for accessing performing Flexible Transform of source documents to target documents based on syntax and schema mappings against the ontology 21 | ''' 22 | 23 | def __init__(self, 24 | logging_level=logging.WARN, 25 | trace=None, 26 | source_fields=None, 27 | destination_fields=None, 28 | source_iri=None, 29 | destination_iri=None): 30 | self.Parsers = {} 31 | self.logging = logging.getLogger('FlexTransform') 32 | 33 | if logging_level: 34 | self.logging.setLevel(logging_level) 35 | 36 | self.oracle = None 37 | 38 | if trace is None and (source_fields or source_iri or destination_fields or destination_iri): 39 | self.trace = True 40 | else: 41 | self.trace = trace 42 | 43 | self.trace_list = [] 44 | if self.trace: 45 | self._create_trace_list(source_fields=source_fields, destination_fields=destination_fields, 46 | source_iri=source_iri, destination_iri=destination_iri) 47 | 48 | def add_parser(self, parser_name, config_file): 49 | """ 50 | Add Parser to FlexTransform Object 51 | 52 | :param parser_name: String name of parser to add 53 | :param config_file: File of parser 54 | 55 | :type parser_name: String 56 | :type config_file: File Object 57 | :return: 58 | """ 59 | 60 | parser_config = Config(config_file, parser_name, self.trace, trace_list=self.trace_list) 61 | 62 | if parser_name in self.Parsers: 63 | self.logging.warn('Parser %s already configured, configuration will be overwritten', parser_name) 64 | 65 | if parser_config: 66 | self.Parsers[parser_name] = parser_config 67 | 68 | def add_oracle(self, tbox_location, schema_iri): 69 | ''' 70 | Add oracle to the FlexTransform object" 71 | ''' 72 | 73 | # TODO add error checking for locations 74 | self.oracle = Oracle(tbox_location, rdflib.URIRef(schema_iri), self.trace, trace_list=self.trace_list) 75 | 76 | def transform(self, source_file, source_parser_name, target_parser_name, 77 | target_file=None, source_meta_data=None, oracle=None): 78 | """ 79 | Transform the data from fileName using sourceParserName as the source and targetParserName as the destination. 80 | Returns transformed data to the caller. 81 | 82 | :param source_file: File containing information to be transformed 83 | :param source_parser_name: String descriptor of parser to be used for source 84 | :param target_parser_name:String descriptor of parser to be used for destination 85 | :param target_file: File to place transformed information 86 | :param source_meta_data: 87 | :param oracle: An instance of the OntologyOracle, initialized with the TBOX URI. If NONE, will not be used. 88 | 89 | :type source_file: File Object 90 | :type source_parser_name: String 91 | :type target_parser_name: String 92 | :type target_file: File Object 93 | :return: 94 | """ 95 | 96 | if source_file is None or source_parser_name is None or target_parser_name is None: 97 | raise Exception('MissingParameter', 'Required parameter is not defined') 98 | 99 | if source_parser_name not in self.Parsers: 100 | raise Exception('ParserNotFound', 'Source parser %s has not been configured' % source_parser_name) 101 | 102 | if target_parser_name not in self.Parsers: 103 | raise Exception('ParserNotFound', 'Target parser %s has not been configured' % target_parser_name) 104 | 105 | if source_meta_data is not None and not isinstance(source_meta_data, dict): 106 | raise Exception('IncorrectFormat', 'sourceMetaData must be in dictionary format') 107 | 108 | if self.oracle: 109 | oracle = self.oracle 110 | 111 | # Parse and validate configurations 112 | source_config = self.Parsers[source_parser_name] 113 | destination_config = self.Parsers[target_parser_name] 114 | 115 | # Calculate "DerivedData" functions 116 | source_config.calculate_derived_data(source_file=source_file, dest_file=target_file) 117 | 118 | # Parse source file into dictionary object 119 | source_data = source_config.Parser.Read(source_file, source_config) 120 | 121 | if source_data is None: 122 | raise Exception('NoSourceData', 'Source data file could not be parsed, no data') 123 | 124 | # Map source file data to source schema 125 | mapped_data = source_config.SchemaParser.map_data_to_schema(source_data, oracle) 126 | 127 | if source_meta_data is not None: 128 | source_config.SchemaParser.map_metadata_to_schema(source_meta_data) 129 | 130 | # Map source data to destination schema 131 | transformed_data = destination_config.SchemaParser.TransformData(mapped_data, oracle) 132 | 133 | # Finalize data to be written 134 | finalized_data = destination_config.Parser.Finalize(transformed_data) 135 | 136 | if target_file is not None: 137 | destination_config.Parser.Write(target_file, finalized_data) 138 | 139 | return finalized_data 140 | 141 | def _create_trace_list(self, source_fields=None, destination_fields=None, source_iri=None, destination_iri=None): 142 | 143 | trace_list = [] 144 | if source_fields: 145 | for arg in source_fields: 146 | trace_list.append({"src_fields": [arg], "src_IRIs": list(), "dst_fields": list(), "dst_IRIs": list()}) 147 | if source_iri: 148 | for arg in source_iri: 149 | trace_list.append({"src_fields": list(), "src_IRIs": [arg], "dst_fields": list(), "dst_IRIs": list()}) 150 | if destination_fields: 151 | for arg in destination_fields: 152 | trace_list.append({"src_fields": list(), "src_IRIs": list(), "dst_fields": [arg], "dst_IRIs": list()}) 153 | if destination_iri: 154 | for arg in destination_iri: 155 | trace_list.append({"src_fields": list(), "src_IRIs": list(), "dst_fields": list(), "dst_IRIs": [arg]}) 156 | self.trace_list = trace_list 157 | 158 | def AddParser(self, parserName, configFile, sourceFileName = None, destFileName = None): 159 | warnings.warn('"AddParser()" has been deprecated in favor of "add_parser()"', DeprecationWarning) 160 | self.logging.warn('"AddParser()" has been deprecated in favor of "add_parser()"') 161 | return self.add_parser(parserName, configFile) 162 | 163 | def TransformFile(self, sourceFileName, sourceParserName, targetParserName, 164 | targetFileName=None, sourceMetaData=None, oracle=None): 165 | warnings.warn('"TransformFile()" has been deprecated in favor of "transform()"', DeprecationWarning) 166 | self.logging.warn('"TransformFile()" has been deprecated in favor of "transform()"') 167 | return self.transform(sourceFileName, sourceParserName, targetParserName, target_file=targetFileName, 168 | source_meta_data=sourceMetaData, oracle=oracle) 169 | if __name__ == '__main__': 170 | raise Exception("Unsupported", "FlexTransform.py should not be called directly, use helper script FlexT.py") 171 | 172 | -------------------------------------------------------------------------------- /FlexTransform/OntologyOracle/__init__.py: -------------------------------------------------------------------------------- 1 | from .OntologyOracle import Oracle -------------------------------------------------------------------------------- /FlexTransform/SchemaParser/TransformFunctions/CFM13Functions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Mar 13, 2015 3 | 4 | @author: ahoying 5 | """ 6 | 7 | import logging 8 | import pprint 9 | 10 | import arrow 11 | 12 | from FlexTransform.SchemaParser.TransformFunctions import TransformFunctionManager 13 | 14 | 15 | class CFM13Functions(object): 16 | """ 17 | Contains Transform functions that multiple schemas utilize 18 | """ 19 | 20 | """ 21 | The _FunctionNames dictionary should contain each function name understood by this class for with a scope of 22 | indicator data or header data mapped to a list with required fields to be passed in the args dictionary, 23 | or None if no args are required. 24 | 25 | Allowed fields for the Args dictionary: 26 | 27 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser. 28 | This is the value of the string between the () in the function name in the .json 29 | schema configuration files 30 | 31 | fieldName - Required - The name of the current field 32 | 33 | fieldDict - Required - The field dictionary for the current field getting transformed 34 | 35 | currentRow - Optional - The transformed data and associated field dictionaries for the currently processed row 36 | 37 | indicatorType - Optional - The indicator type for the current row 38 | 39 | transformedData - Optional - The dictionary of all current transformed data 40 | 41 | """ 42 | 43 | __FunctionNames = { 44 | 'DocumentHeaderData': { 45 | 'CFM13_determineTLP': ['transformedData'], 46 | 'CFM13_determineReportOUO': ['transformedData'], 47 | 'CFM13_earliestIndicatorTime': ['transformedData'] 48 | }, 49 | 'IndicatorData': { 50 | 'CFM13_GenerateRestrictionsDescription': ['currentRow'], 51 | 'CFM13_SightingsCount': ['functionArg', 'currentRow'] 52 | } 53 | } 54 | 55 | def __init__(self): 56 | """ 57 | Constructor 58 | """ 59 | self.logging = logging.getLogger('FlexTransform.SchemaParser.CFM13Functions') 60 | self.pprint = pprint.PrettyPrinter() 61 | 62 | @classmethod 63 | def RegisterFunctions(cls): 64 | for Scope, Functions in cls.__FunctionNames.items(): 65 | for FunctionName, RequiredArgs in Functions.items(): 66 | TransformFunctionManager.register_function(Scope, FunctionName, RequiredArgs, 'CFM13Functions') 67 | 68 | def Execute(self, Scope, FunctionName, args): 69 | """ 70 | Execute the specific called function with the supplied args 71 | """ 72 | 73 | value = None 74 | 75 | if FunctionName not in self.__FunctionNames[Scope]: 76 | raise Exception('FunctionNotDefined', 77 | 'Function %s is not defined in CFM13Functions for document scope %s' % ( 78 | FunctionName, Scope)) 79 | 80 | if FunctionName == 'CFM13_GenerateRestrictionsDescription': 81 | value = '' 82 | if 'ouo' in args['currentRow'] and 'Value' in args['currentRow']['ouo']: 83 | value += "OUO=" 84 | if args['currentRow']['ouo']['Value'] == '1': 85 | value += "True" 86 | else: 87 | value += "False" 88 | if 'recon' in args['currentRow'] and 'Value' in args['currentRow']['recon']: 89 | if value != '': 90 | value += ", " 91 | value += "ReconAllowed=" 92 | if args['currentRow']['recon']['Value'] == '0': 93 | value += "True" 94 | else: 95 | value += "False" 96 | if 'restriction' in args['currentRow'] and 'Value' in args['currentRow']['restriction']: 97 | if value != '': 98 | value += ", " 99 | value += "SharingRestrictions=%s" % args['currentRow']['restriction']['Value'] 100 | 101 | elif FunctionName == 'CFM13_determineTLP': 102 | valuemap = {"WHITE": 1, "GREEN": 2, "AMBER": 3, "RED": 4} 103 | value = 'WHITE' 104 | for subrow in args['transformedData']['IndicatorData']: 105 | if 'restriction' in subrow: 106 | if subrow['restriction']['Value'] == 'private': 107 | if valuemap['AMBER'] > valuemap[value]: 108 | value = 'AMBER' 109 | if subrow['restriction']['Value'] == 'need-to-know': 110 | if valuemap['GREEN'] > valuemap[value]: 111 | value = 'GREEN' 112 | if 'ouo' in subrow: 113 | if subrow['ouo']['Value'] == '1': 114 | if valuemap['GREEN'] > valuemap[value]: 115 | value = 'GREEN' 116 | 117 | elif FunctionName == 'CFM13_earliestIndicatorTime': 118 | # For now this function is specific to CFM13, it could be made generic if needed in other Schemas 119 | mintime = None 120 | for subrow in args['transformedData']['IndicatorData']: 121 | if 'create_time' in subrow: 122 | indicatorTime = arrow.get(subrow['create_time']['Value'], 'YYYY-MM-DDTHH:mm:ssZ') 123 | if mintime is None or mintime > indicatorTime: 124 | mintime = indicatorTime 125 | 126 | if mintime is not None: 127 | value = mintime.format('YYYY-MM-DDTHH:mm:ssZZ') 128 | else: 129 | value = args['currentRow']['analyzer_time']['Value'] 130 | 131 | elif FunctionName == 'CFM13_SightingsCount': 132 | sightings = 1 133 | if args['functionArg'] in args['currentRow'] and 'Value' in args['currentRow'][args['functionArg']]: 134 | sightings += int(args['currentRow'][args['functionArg']]['Value']) 135 | 136 | value = str(sightings) 137 | 138 | elif FunctionName == 'CFM13_determineReportOUO': 139 | ''' 140 | This function determines the OUO level of the overall report by assuming that if any included indicator is OUO, 141 | then the entire report is OUO. 142 | ''' 143 | value = "0" 144 | self.logging.debug("Evaluating report OUO status based on {} indicators.".format(len(args['transformedData']['IndicatorData']))) 145 | for indicator in args['transformedData']['IndicatorData']: 146 | self.logging.debug("Checking indicator OUO value: {}".format(indicator['ouo']['Value'])) 147 | if indicator['ouo']['Value'] == "1": 148 | value = "1" 149 | break 150 | self.logging.debug("Returning value {}".format(value)) 151 | 152 | return value 153 | -------------------------------------------------------------------------------- /FlexTransform/SchemaParser/TransformFunctions/CFM20Functions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Mar 13, 2015 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | import re 8 | import logging 9 | 10 | from FlexTransform.SchemaParser.TransformFunctions import TransformFunctionManager 11 | 12 | class CFM20Functions(object): 13 | ''' 14 | Contains Transform functions that multiple schemas utilize 15 | ''' 16 | 17 | ''' 18 | The _FunctionNames dictionary should contain each function name understood by this class for with a scope of indicator data or header data 19 | mapped to a list with required fields to be passed in the args dictionary, or None if no args are required. 20 | 21 | Allowed fields for the Args dictionary: 22 | 23 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser. 24 | This is the value of the string between the () in the function name in the .json schema configuration files 25 | 26 | fieldName - Required - The name of the current field 27 | 28 | fieldDict - Required - The field dictionary for the current field getting transformed 29 | 30 | currentRow - Optional - The transformed data and associated field dictionaries for the currently processed row 31 | 32 | indicatorType - Optional - The indicator type for the current row 33 | 34 | transformedData - Optional - The dictionary of all current transformed data 35 | 36 | ''' 37 | 38 | __FunctionNames = { 39 | 'IndicatorData': { 40 | 'CFM20_determineIndicatorConstraint': ['functionArg', 'currentRow'] 41 | } 42 | } 43 | 44 | def __init__(self): 45 | ''' 46 | Constructor 47 | ''' 48 | self.logging = logging.getLogger('FlexTransform.SchemaParser.CFM20Functions') 49 | 50 | @classmethod 51 | def RegisterFunctions(cls): 52 | for Scope, Functions in cls.__FunctionNames.items() : 53 | for FunctionName, RequiredArgs in Functions.items() : 54 | TransformFunctionManager.register_function(Scope, FunctionName, RequiredArgs, 'CFM20Functions') 55 | 56 | def Execute(self, Scope, FunctionName, args): 57 | ''' 58 | Execute the specific called function with the supplied args 59 | ''' 60 | 61 | Value = None 62 | 63 | if (FunctionName not in self.__FunctionNames[Scope]) : 64 | raise Exception('FunctionNotDefined','Function %s is not defined in CFM20Functions for document scope %s' % (FunctionName, Scope)) 65 | 66 | if (FunctionName == 'CFM20_determineIndicatorConstraint') : 67 | # TODO: It would be great if somehow we could query the ontology to get this. Complete for all indicator constraints. 68 | 69 | if (args['functionArg'] in args['currentRow'] and 'Value' in args['currentRow'][args['functionArg']]) : 70 | indicatorValue = args['currentRow'][args['functionArg']]['Value'] 71 | indicatorOntology = args['currentRow'][args['functionArg']]['matchedOntology'] 72 | 73 | if (indicatorOntology == 'http://www.anl.gov/cfm/transform.owl#FilenameIndicatorValueSemanticConcept') : 74 | Value = 'http://www.anl.gov/cfm/2.0/current/#StringValueMatch' 75 | elif (re.match(r'^((\d){1,3}\.){3}(\d){1,3}$', indicatorValue)) : 76 | Value = 'http://www.anl.gov/cfm/2.0/current/#IPv4DottedDecimalEquality' 77 | elif (re.match(r'^[a-fA-F0-9]+:+[a-fA-F0-9:]+$', indicatorValue)) : 78 | Value = 'http://www.anl.gov/cfm/2.0/current/#IPv6ColonHexEquality' 79 | elif (re.match(r'^([a-z0-9][^./]+\.)+[a-z]+$', indicatorValue)) : 80 | Value = 'http://www.anl.gov/cfm/2.0/current/#DNSDomainNameMatch' 81 | elif (re.match(r'^((ft|htt)ps?://)?([a-z][^./]+\.)+[a-z]+/.*$', indicatorValue)) : 82 | Value = 'http://www.anl.gov/cfm/2.0/current/#URLMatch' 83 | elif (re.match(r'^[a-fA-F0-9]{32}$', indicatorValue)) : 84 | Value = 'http://www.anl.gov/cfm/2.0/current/#MD5Equality' 85 | elif (re.match(r'^[a-fA-F0-9]{40}$', indicatorValue)) : 86 | Value = 'http://www.anl.gov/cfm/2.0/current/#SHA1Equality' 87 | elif (re.match(r'^\d+$', indicatorValue)) : 88 | Value = 'http://www.anl.gov/cfm/2.0/current/#IntegerEquality' 89 | 90 | if (Value is None) : 91 | # Still didn't find an indicator type, throw exception 92 | raise Exception('unknownIndicatorConstraint', 'CFM 2.0 Indicator constraint could not be determined for data: %s :: field %s' % (args['currentRow'][args['functionArg']]['Value'], indicatorOntology)) 93 | 94 | return Value -------------------------------------------------------------------------------- /FlexTransform/SchemaParser/TransformFunctions/GlobalFunctions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Mar 13, 2015 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | import logging 8 | import uuid 9 | 10 | import arrow 11 | 12 | from FlexTransform.SchemaParser.TransformFunctions import TransformFunctionManager 13 | 14 | 15 | class GlobalFunctions(object): 16 | """ 17 | Contains Transform functions that multiple schemas utilize 18 | """ 19 | 20 | ''' 21 | The _FunctionNames dictionary should contain each function name understood by this class for with a scope of indicator data or header data 22 | mapped to a list with required fields to be passed in the args dictionary, or None if no args are required. 23 | 24 | Allowed fields for the Args dictionary: 25 | 26 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser. 27 | This is the value of the string between the () in the function name in the .json schema configuration files 28 | 29 | fieldName - Required - The name of the current field 30 | 31 | fieldDict - Required - The field dictionary for the current field getting transformed 32 | 33 | currentRow - Optional - The transformed data and associated field dictionaries for the currently processed row 34 | 35 | indicatorType - Optional - The indicator type for the current row 36 | 37 | transformedData - Optional - The dictionary of all current transformed data 38 | 39 | ''' 40 | 41 | __FunctionNames = { 42 | 'DocumentHeaderData': { 43 | 'countOfIndicators': ['transformedData'], 44 | 'now': ['fieldDict'], 45 | }, 46 | 'IndicatorData': { 47 | 'calculate_duration': ['currentRow'], 48 | 'now': ['fieldDict'], 49 | 'generate_uuid': None, 50 | 'mbl_sourcetype': ['indicatorType'] 51 | } 52 | } 53 | 54 | def __init__(self): 55 | """ 56 | Constructor 57 | """ 58 | self.logging = logging.getLogger('FlexTransform.SchemaParser.GlobalFunctions') 59 | 60 | @classmethod 61 | def RegisterFunctions(cls): 62 | for Scope, Functions in cls.__FunctionNames.items(): 63 | for FunctionName, RequiredArgs in Functions.items(): 64 | TransformFunctionManager.register_function(Scope, FunctionName, RequiredArgs, 'GlobalFunctions') 65 | 66 | def Execute(self, scope, function_name, args): 67 | """ 68 | Execute the specific called function with the supplied args 69 | """ 70 | 71 | value = None 72 | 73 | if function_name not in self.__FunctionNames[scope]: 74 | raise Exception('FunctionNotDefined', 75 | 'Function %s is not defined in GlobalFunctions for document scope %s' % (function_name, scope)) 76 | if function_name == 'calculate_duration': 77 | if 'functionArg' not in args or not args['functionArg']: 78 | self.logging.error('FlexT function "calculate_duration" requires the field name to base value on') 79 | elif args['functionArg'] not in args['currentRow']: 80 | self.logging.error('FlexT function "calculate_duration": {} not in {}'.format(args['functionArg'], list(args['currentRow'].keys()))) 81 | elif args['currentRow'][args['functionArg']]['ParsedValue']: 82 | duration_val = arrow.get(args['currentRow'][args['functionArg']]['ParsedValue']).timestamp - arrow.utcnow().timestamp 83 | if duration_val < 0: 84 | return "0" 85 | return str(duration_val) 86 | elif function_name == 'now': 87 | if 'dateTimeFormat' in args['fieldDict']: 88 | if args['fieldDict']['dateTimeFormat'] == 'unixtime': 89 | value = str(arrow.utcnow().timestamp) 90 | else: 91 | value = arrow.utcnow().format(args['fieldDict']['dateTimeFormat']) 92 | # TODO - Handle case of no 'dateTimeFormat' 93 | elif function_name == 'countOfIndicators': 94 | if 'IndicatorData' in args['transformedData']: 95 | value = str(len(args['transformedData']['IndicatorData'])) 96 | else: 97 | value = '0' 98 | 99 | elif function_name == 'generate_uuid': 100 | value = str(uuid.uuid4()) 101 | 102 | elif function_name == 'mbl_sourcetype': 103 | if ('ipv4' in args['currentRow'] and args['currentRow']['ipv4']['Value']) or \ 104 | ('ipv6' in args['currentRow'] and args['currentRow']['ipv6']['Value']) or \ 105 | ('fqdn' in args['currentRow'] and args['currentRow']['fqdn']['Value']): 106 | value = 'block' 107 | elif ('envelopeaddress' in args['currentRow'] and args['currentRow']['envelopeaddress']['Value']) or \ 108 | ('subject' in args['currentRow'] and args['currentRow']['subject']['Value']) or \ 109 | ('xheader' in args['currentRow'] and args['currentRow']['xheader']['Value']): 110 | value = 'spearphish' 111 | elif ('sizeOnDisk' in args['currentRow'] and args['currentRow']['sizeOnDisk']['Value']) or \ 112 | ('compileTime' in args['currentRow'] and args['currentRow']['compileTime']['Value']) or \ 113 | ('md5hash' in args['currentRow'] and args['currentRow']['md5hash']['Value']) or \ 114 | ('sha1hash' in args['currentRow'] and args['currentRow']['sha1hash']['Value']) or \ 115 | ('sha256hash' in args['currentRow'] and args['currentRow']['sha256hash']['Value']): 116 | value = 'malware' 117 | return value 118 | -------------------------------------------------------------------------------- /FlexTransform/SchemaParser/TransformFunctions/STIXFunctions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Mar 13, 2015 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | import logging 8 | 9 | import arrow 10 | 11 | from FlexTransform.SchemaParser.TransformFunctions import TransformFunctionManager 12 | 13 | 14 | class STIXFunctions(object): 15 | ''' 16 | Contains Transform functions that multiple schemas utilize 17 | ''' 18 | 19 | ''' 20 | The _FunctionNames dictionary should contain each function name understood by this class for with a scope of indicator data or header data 21 | mapped to a list with required fields to be passed in the args dictionary, or None if no args are required. 22 | 23 | Allowed fields for the Args dictionary: 24 | 25 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser. 26 | This is the value of the string between the () in the function name in the .json schema configuration files 27 | 28 | fieldName - Required - The name of the current field 29 | 30 | fieldDict - Required - The field dictionary for the current field getting transformed 31 | 32 | currentRow - Optional - The transformed data and associated field dictionaries for the currently processed row 33 | 34 | indicatorType - Optional - The indicator type for the current row 35 | 36 | transformedData - Optional - The dictionary of all current transformed data 37 | 38 | ''' 39 | 40 | __FunctionNames = { 41 | 'DocumentHeaderData': { 42 | 'stix_now': ['fieldDict'] 43 | }, 44 | 'IndicatorData': { 45 | 'stix_now': ['fieldDict'] 46 | } 47 | } 48 | 49 | def __init__(self): 50 | ''' 51 | Constructor 52 | ''' 53 | self.logging = logging.getLogger('FlexTransform.SchemaParser.STIXFunctions') 54 | 55 | @classmethod 56 | def RegisterFunctions(cls): 57 | for Scope, Functions in cls.__FunctionNames.items() : 58 | for FunctionName, RequiredArgs in Functions.items() : 59 | TransformFunctionManager.register_function(Scope, FunctionName, RequiredArgs, 'STIXFunctions') 60 | 61 | def Execute(self, Scope, function_name, args): 62 | ''' 63 | Execute the specific called function with the supplied args 64 | ''' 65 | 66 | value = None 67 | 68 | if function_name not in self.__FunctionNames[Scope] : 69 | raise Exception('FunctionNotDefined', 70 | 'Function %s is not defined in STIXFunctions for document scope %s' % (function_name, Scope)) 71 | if function_name == 'stix_now': 72 | if 'dateTimeFormat' in args['fieldDict']: 73 | value = arrow.utcnow().format(args['fieldDict']['dateTimeFormat']) 74 | # self.logging.info("Called stix now") 75 | # match = re.match(r"(.*)([+-]\d\d)(\d\d)$", value) 76 | # if match: 77 | # value = match.group(1) + match.group(2) + ":" + match.group(3) 78 | return value 79 | -------------------------------------------------------------------------------- /FlexTransform/SchemaParser/TransformFunctions/TransformFunctionManager.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Mar 13, 2015 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | import inspect 8 | import logging 9 | from collections import defaultdict 10 | 11 | import FlexTransform.SchemaParser.TransformFunctions 12 | 13 | 14 | class TransformFunctionManager(object): 15 | 16 | __KnownFunctions = defaultdict(dict) 17 | 18 | def __init__(self, trace, trace_list=[]): 19 | self.logging = logging.getLogger('FlexTransform.SchemaParser.TransformFunctionManager') 20 | self.trace = trace 21 | self.trace_list = trace_list 22 | if self.trace: 23 | self.logging.debug("Initialized DictionaryParser with tracelist of {} elements.".format(len(trace_list))) 24 | 25 | self._FunctionClasses = {} 26 | 27 | @classmethod 28 | def register_function(cls, scope, function_name, required_args, function_class): 29 | cls.__KnownFunctions[scope][function_name] = {'class': function_class, 'RequiredArgs': required_args} 30 | 31 | @classmethod 32 | def get_function_class(cls, scope, function_name): 33 | if scope in cls.__KnownFunctions and function_name in cls.__KnownFunctions[scope]: 34 | class_name = cls.__KnownFunctions[scope][function_name]['class'] 35 | else: 36 | raise Exception( 37 | 'FunctionNotRegistered', 38 | "The function %s is not registered with the TransformFunctionManager for scope %s" % (function_name, 39 | scope)) 40 | 41 | for name, obj in inspect.getmembers(FlexTransform.SchemaParser.TransformFunctions, inspect.isclass): 42 | if name == class_name: 43 | return obj(); 44 | 45 | raise Exception( 46 | 'FunctionClassNotFound', 47 | "The Class %s for function %s was not found by the TransformFunctionManager" % (class_name, function_name)) 48 | 49 | def get_function_scope(self, scope, function_name): 50 | if scope in self.__KnownFunctions and function_name in self.__KnownFunctions[scope]: 51 | return True 52 | else: 53 | return False 54 | 55 | def execute_transform_function(self, scope, function_name, args): 56 | if function_name in self._FunctionClasses: 57 | function_class = self._FunctionClasses[function_name] 58 | else: 59 | function_class = TransformFunctionManager.get_function_class(scope, function_name) 60 | self._FunctionClasses[function_name] = function_class 61 | 62 | self._validate_args(scope, function_name, args) 63 | return function_class.Execute(scope, function_name, args) 64 | 65 | def _validate_args(self, scope, function_name, args): 66 | ''' 67 | Allowed fields for the Args dictionary: 68 | 69 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser. 70 | This is the value of the string between the () in the function name in the 71 | .json schema configuration files 72 | 73 | fieldName - Required - The name of the current field 74 | 75 | fieldDict - Required - The field dictionary for the current field getting transformed 76 | 77 | currentRow - Optional - The transformed data and associated field dictionaries for the currently processed row 78 | 79 | indicatorType - Optional - The indicator type for the current row 80 | 81 | transformedData - Optional - The dictionary of all current transformed data 82 | 83 | ''' 84 | allowed_fields =set(['functionArg', 'fieldName', 'fieldDict', 'currentRow', 'indicatorType', 'transformedData']) 85 | required_fields =set(['fieldName', 'fieldDict']) 86 | 87 | if isinstance(args, dict): 88 | for arg in args: 89 | if arg not in allowed_fields: 90 | self.logging.warning('A argument passed to function %s is not allowed: %s' % (function_name, arg)) 91 | else: 92 | raise Exception( 93 | 'InvalidArgs', 94 | 'The arguments passed to function %s are not defined or not in dictionary format' % function_name) 95 | 96 | if self.__KnownFunctions[scope][function_name]['RequiredArgs'] is not None: 97 | required_fields.update(self.__KnownFunctions[scope][function_name]['RequiredArgs']) 98 | 99 | for arg in required_fields: 100 | if arg not in args or args[arg] is None: 101 | raise Exception( 102 | 'InvalidArgs', 103 | 'Function %s args did not include the required %s field, could not process' % (function_name, arg)) 104 | -------------------------------------------------------------------------------- /FlexTransform/SchemaParser/TransformFunctions/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Mar 13, 2015 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | import FlexTransform.SchemaParser 8 | 9 | from .TransformFunctionManager import TransformFunctionManager 10 | from .GlobalFunctions import GlobalFunctions 11 | from .CFM13Functions import CFM13Functions 12 | from .CFM20Functions import CFM20Functions 13 | from .STIXFunctions import STIXFunctions 14 | 15 | GlobalFunctions.RegisterFunctions() -------------------------------------------------------------------------------- /FlexTransform/SchemaParser/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Oct 13, 2014 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | from .SchemaParser import SchemaParser -------------------------------------------------------------------------------- /FlexTransform/SyntaxParser/CSVParser.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Aug 13, 2015 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | import logging 8 | import csv 9 | import os 10 | from builtins import str 11 | from FlexTransform.SyntaxParser.Parser import Parser 12 | 13 | class CSVParser(Parser): 14 | ''' 15 | Key/Value Syntax Parser 16 | ''' 17 | 18 | def __init__(self, trace, tracelist=[]): 19 | ''' 20 | Constructor 21 | ''' 22 | super(CSVParser, self).__init__(tracelist) 23 | 24 | self.ParsedData = {} 25 | self.logging = logging.getLogger('FlexTransform.CSVParser') 26 | self.trace = trace 27 | self.tracelist = tracelist 28 | if self.trace: 29 | self.logging.debug("Initialized CSVParser with tracelist of {} elements.".format(len(tracelist))) 30 | 31 | self.Fields = [] 32 | self.Delimiter = ',' 33 | self.QuoteChar = '"' 34 | self.EscapeChar = None 35 | self.HeaderLine = False 36 | self.DoubleQuote = True 37 | self.QuoteStyle = csv.QUOTE_MINIMAL 38 | self.LineTerminator = '\r\n' 39 | 40 | def ValidateConfig(self, config): 41 | ''' 42 | Validate Dictionary Parser specific configuration options 43 | 44 | The indicatorsKey sets the key in the json document that contains the indicators, or "" if the root of the document contains the indicators 45 | ''' 46 | if config.has_section('CSV'): 47 | if config.has_option('CSV', 'Fields'): 48 | FieldsList = config['CSV']['Fields'] 49 | for field in FieldsList.split(','): 50 | self.Fields.append(field.strip()) 51 | else: 52 | raise Exception("ConfigError", "CSV Configuration does not include the required Fields key") 53 | 54 | if config.has_option('CSV', 'Delimiter'): 55 | self.Delimiter = bytes(config['CSV']['Delimiter'], "utf-8").decode("unicode_escape").strip("\"'") 56 | 57 | if config.has_option('CSV', 'QuoteChar'): 58 | self.QuoteChar = bytes(config['CSV']['QuoteChar'], "utf-8").decode("unicode_escape") 59 | 60 | if config.has_option('CSV', 'EscapeChar'): 61 | self.EscapeChar = bytes(config['CSV']['EscapeChar'], "utf-8").decode("unicode_escape") 62 | 63 | if config.has_option('CSV', 'HeaderLine'): 64 | self.HeaderLine = config.getboolean('CSV', 'HeaderLine', fallback=False) 65 | 66 | if config.has_option('CSV', 'DoubleQuote'): 67 | self.DoubleQuote = config.getboolean('CSV', 'DoubleQuote', fallback=True) 68 | 69 | if config.has_option('CSV', 'QuoteStyle'): 70 | if config['CSV']['QuoteStyle'].lower() == 'none': 71 | self.QuoteStyle = csv.QUOTE_NONE 72 | elif config['CSV']['QuoteStyle'].lower() == 'nonnumeric': 73 | self.QuoteStyle = csv.QUOTE_NONNUMERIC 74 | elif config['CSV']['QuoteStyle'].lower() == 'all': 75 | self.QuoteStyle = csv.QUOTE_ALL 76 | elif config['CSV']['QuoteStyle'].lower() == 'minimal': 77 | self.QuoteStyle = csv.QUOTE_MINIMAL 78 | else: 79 | raise Exception("ConfigError", "Unknown option for CSV QuoteStyle: " + config['CSV']['QuoteStyle']) 80 | 81 | if config.has_option('CSV', 'LineTerminator'): 82 | self.LineTerminator = bytes(config['CSV']['LineTerminator'], "utf-8").decode("unicode_escape") 83 | 84 | def Read(self, file, config): 85 | ''' 86 | Read file and parse into Transform object 87 | ''' 88 | 89 | super(CSVParser, self).Read(file, config) 90 | 91 | self.ParsedData = { 92 | "IndicatorData": [] 93 | } 94 | position = {} 95 | 96 | for idx, field in enumerate(self.Fields): 97 | position[idx] = field 98 | 99 | content = csv.reader(file, delimiter=self.Delimiter) 100 | for line in content: 101 | to_add = {} 102 | for idx, record in enumerate(line): 103 | record = record.strip("\"'" + self.LineTerminator) 104 | if record: 105 | to_add.update({position[idx]: record}) 106 | self.ParsedData["IndicatorData"].append(to_add) 107 | 108 | return self.ParsedData 109 | 110 | def Finalize(self, MappedData): 111 | ''' 112 | Finalize the formatting of the data before being returned to the caller 113 | ''' 114 | 115 | if 'IndicatorData' not in MappedData or MappedData['IndicatorData'].__len__() == 0: 116 | raise Exception('NoIndicatorData', 'Transformed data has no indicators, nothing to write') 117 | 118 | FinalizedData = [] 119 | for indicator in MappedData['IndicatorData']: 120 | DataRow = {} 121 | # Keep passing the IndicatorType forward with the data. This is somewhat messy, but that way we can use it on write 122 | # DataRow['IndicatorType'] = indicator['IndicatorType'] 123 | 124 | for field in self.Fields: 125 | if field not in indicator: 126 | self.logging.warning("Field %s does not exist in IndicatorData", field) 127 | DataRow[field] = '' 128 | elif 'Value' in indicator[field]: 129 | DataRow[field] = indicator[field]['Value'] 130 | else: 131 | self.logging.warning("Field %s does not contain a Value entry", field) 132 | DataRow[field] = '' 133 | 134 | FinalizedData.append(DataRow) 135 | 136 | return FinalizedData 137 | 138 | def Write(self, file, FinalizedData): 139 | ''' 140 | Write the data as csv to the file. 141 | ''' 142 | if isinstance(file, str): 143 | if os.path.exists(file): 144 | file = open(file, "w") 145 | else: 146 | self.logging.error("%s is not a valid filepath", file) 147 | 148 | csv.register_dialect('flext', 149 | delimiter=self.Delimiter, 150 | quotechar=self.QuoteChar, 151 | escapechar=self.EscapeChar, 152 | doublequote=self.DoubleQuote, 153 | lineterminator=self.LineTerminator, 154 | quoting=self.QuoteStyle) 155 | 156 | writer = csv.DictWriter(file, fieldnames=self.Fields, dialect='flext') 157 | 158 | if self.HeaderLine: 159 | writer.writeheader() 160 | 161 | writer.writerows(FinalizedData) 162 | 163 | -------------------------------------------------------------------------------- /FlexTransform/SyntaxParser/DICTParsers/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Aug 26, 2015 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | from .iSightReport import iSightReport 8 | -------------------------------------------------------------------------------- /FlexTransform/SyntaxParser/DICTParsers/iSightReport.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Aug 26, 2015 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | import json 8 | import logging 9 | 10 | 11 | class iSightReport(object): 12 | ''' 13 | Parser for iSight JSON Reports 14 | ''' 15 | 16 | def __init__(self, trace, tracelist): 17 | ''' 18 | Constructor 19 | ''' 20 | self.logging = logging.getLogger('FlexTransform.DICTParser.iSightReports') 21 | self.trace = trace 22 | self.tracelist = tracelist 23 | 24 | def Read(self, reportFile): 25 | ''' 26 | Read in JSON report file and process into indicators and header data 27 | ''' 28 | 29 | jsondoc = json.load(reportFile) 30 | 31 | if "success" in jsondoc and jsondoc["success"] == True: 32 | 33 | if "message" in jsondoc and "report" in jsondoc["message"]: 34 | Report = jsondoc["message"]["report"] 35 | 36 | indicators = [] 37 | 38 | if "tagSection" in Report: 39 | indicators = self._extractIndicators(Report.pop("tagSection")) 40 | 41 | if len(indicators) == 0: 42 | raise Exception("NoData", "iSight JSON document did not contain any indicators") 43 | 44 | ParsedData = {} 45 | ParsedData['IndicatorData'] = indicators 46 | ParsedData['DocumentHeaderData'] = Report 47 | 48 | else: 49 | raise Exception("NoData", "iSight JSON document did not contain a report") 50 | 51 | else: 52 | raise Exception("Unparsable", "iSight JSON document could not be parsed, success field not defined or not True") 53 | 54 | return ParsedData 55 | 56 | def Write(self, reportFile, FinalizedData): 57 | raise Exception("MethodNotDefined", "Write") 58 | 59 | def _extractIndicators(self,tagSection): 60 | 61 | indicators = [] 62 | 63 | for indicatorType in tagSection: 64 | if indicatorType == "main": 65 | # TODO, extract TTP and other targetting data from the main tag 66 | continue 67 | if indicatorType == "networks": 68 | networkList = tagSection["networks"].pop("network") 69 | 70 | if isinstance(networkList, list): 71 | for network in networkList: 72 | # Fix for error in iSight JSON generation that appends a .0 to the end of the asn numbers 73 | if "asn" in network and network["asn"].endswith(".0"): 74 | network["asn"] = network["asn"].replace(".0", "") 75 | indicators.append(network) 76 | else: 77 | indicators.append(networkList) 78 | if indicatorType == "emails": 79 | emailList = tagSection["emails"].pop("email") 80 | 81 | if isinstance(emailList, list): 82 | for email in emailList: 83 | indicators.append(email) 84 | else: 85 | indicators.append(emailList) 86 | if indicatorType == "files": 87 | fileList = tagSection["files"].pop("file") 88 | 89 | if isinstance(fileList, list): 90 | for file in fileList: 91 | indicators.append(file) 92 | else: 93 | indicators.append(fileList) 94 | 95 | return indicators 96 | -------------------------------------------------------------------------------- /FlexTransform/SyntaxParser/DictionaryParser.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Nov 17, 2014 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | import inspect 8 | import json 9 | import logging 10 | 11 | import FlexTransform.SyntaxParser.DICTParsers 12 | from FlexTransform.SyntaxParser.Parser import Parser 13 | 14 | 15 | class DictionaryParser(Parser): 16 | ''' 17 | Key/Value Syntax Parser 18 | ''' 19 | 20 | def __init__(self, trace, tracelist=[]): 21 | ''' 22 | Constructor 23 | ''' 24 | 25 | super(DictionaryParser, self).__init__(trace, tracelist) 26 | self.ParsedData = {} 27 | self.logging = logging.getLogger('FlexTransform.DictionaryParser') 28 | self.trace = trace 29 | self.tracelist = tracelist 30 | if self.trace: 31 | self.logging.debug("Initialized DictionaryParser with tracelist of {} elements.".format(len(tracelist))) 32 | 33 | self.indicatorsKey = "" 34 | self.AdvancedParser = None 35 | 36 | def LoadAdvancedParser(self, CustomParser): 37 | ''' 38 | Returns the Custom Parser Class from the configuration file if it exists 39 | ''' 40 | for name, obj in inspect.getmembers(FlexTransform.SyntaxParser.DICTParsers, inspect.isclass): 41 | if name == CustomParser: 42 | return obj(); 43 | 44 | def ValidateConfig(self, config): 45 | ''' 46 | Validate Dictionary Parser specific configuration options 47 | 48 | The indicatorsKey sets the key in the json document that contains the indicators, or "" if the root of the document contains the indicators 49 | ''' 50 | if config.has_section('DICT'): 51 | if config.has_option('DICT', 'IndicatorsKey'): 52 | self.indicatorsKey = config['DICT']['IndicatorsKey'] 53 | 54 | if config.has_option('DICT', 'CustomParser'): 55 | CustomParser = config['DICT']['CustomParser'] 56 | self.AdvancedParser = self.LoadAdvancedParser(CustomParser) 57 | if self.AdvancedParser is None: 58 | raise Exception('CustomParserNotDefined', 'DICT: ' + CustomParser) 59 | 60 | if config.has_section(CustomParser): 61 | self.AdvancedParser.ValidateConfig(config) 62 | 63 | def Read(self,file,config): 64 | ''' 65 | Read file and parse into Transform object 66 | ''' 67 | 68 | self.ParsedData = {} 69 | 70 | super(DictionaryParser, self).Read(file, config) 71 | 72 | if self.AdvancedParser: 73 | self.ParsedData = self.AdvancedParser.Read(file) 74 | else: 75 | jsondoc = json.load(file) 76 | 77 | if self.indicatorsKey != "": 78 | if self.indicatorsKey in jsondoc: 79 | indicators = jsondoc.pop(self.indicatorsKey) 80 | self.ParsedData['IndicatorData'] = [] 81 | 82 | if isinstance(indicators, list): 83 | for indicator in indicators: 84 | if isinstance(indicator, dict): 85 | self.ParsedData['IndicatorData'].append(indicator) 86 | else: 87 | raise Exception('WrongType', 'Indicator type is not a dictionary: ' + indicator) 88 | 89 | elif isinstance(indicators, dict): 90 | self.ParsedData['IndicatorData'].append(indicators) 91 | 92 | else: 93 | raise Exception('WrongType', 'Indicator type is not a list or dictionary: ' + indicators) 94 | 95 | # Everything else in the document is considered to be header data 96 | if len(jsondoc): 97 | self.ParsedData['DocumentHeaderData'] = jsondoc 98 | 99 | else: 100 | raise Exception('NoIndicatorData', 'Defined indicator key, ' + self.indicatorsKey + ', does not exist in source file') 101 | 102 | else: 103 | raise Exception('NotYetImplemented', 'Paring json dictionaries without an indicatorsKey is not currently supported') 104 | 105 | return self.ParsedData 106 | 107 | def Finalize(self, MappedData): 108 | ''' 109 | Finalize the formatting of the data before being returned to the caller 110 | ''' 111 | 112 | if 'IndicatorData' not in MappedData or MappedData['IndicatorData'].__len__() == 0: 113 | raise Exception('NoIndicatorData', 'Transformed data has no indicators, nothing to write') 114 | 115 | return self._MappedDataToDict(MappedData) 116 | 117 | def Write(self, file, FinalizedData): 118 | ''' 119 | Write the data as json to the file. 120 | ''' 121 | 122 | if self.AdvancedParser: 123 | self.AdvancedParser.Write(file, FinalizedData) 124 | else: 125 | json.dump(FinalizedData, file, sort_keys=True, indent=4) 126 | 127 | def _MappedDataToDict(self, MappedData): 128 | ''' 129 | Take the Transformed data object, and rebuild the dictionary for the XML parser from the schema data 130 | ''' 131 | ParsedData = [] 132 | 133 | for rowType in MappedData: 134 | if isinstance(MappedData[rowType],list): 135 | for row in MappedData[rowType]: 136 | if isinstance(row,dict): 137 | DataRow = self._BuildDictRow(row) 138 | ParsedData.append(DataRow) 139 | else: 140 | raise Exception('NoParsableDataFound', "Data isn't in a parsable dictionary format") 141 | elif isinstance(MappedData[rowType],dict): 142 | DataRow = self._BuildDictRow(MappedData[rowType]) 143 | ParsedData.append(DataRow) 144 | else: 145 | raise Exception('NoParsableDataFound', "Data isn't in a parsable dictionary format") 146 | 147 | return ParsedData 148 | 149 | def _BuildDictRow(self, row): 150 | ''' 151 | Take a row from the MappedData object and return an unflattened dictionary for passing to dict_to_etree 152 | ''' 153 | DataRow = {} 154 | 155 | for k, v in row.items(): 156 | if k == 'IndicatorType': 157 | # Keep passing the IndicatorType forward with the data. This is somewhat messy, but that way we can use it on write 158 | pass 159 | elif 'Value' in v: 160 | if 'valuemap' in v: 161 | DataRow[v['valuemap']] = v['Value'] 162 | else: 163 | DataRow[k] = v['Value'] 164 | else: 165 | self.logging.warning("Field %s does not contain a Value entry", k) 166 | 167 | return DataRow 168 | -------------------------------------------------------------------------------- /FlexTransform/SyntaxParser/KVParser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Oct 15, 2014 3 | 4 | @author: ahoying 5 | """ 6 | 7 | import re 8 | import logging 9 | import os 10 | from FlexTransform.SyntaxParser.Parser import Parser 11 | 12 | 13 | class KVParser(Parser): 14 | """ 15 | Key/Value Syntax Parser 16 | """ 17 | 18 | def __init__(self, trace, tracelist=[]): 19 | """ 20 | Constructor 21 | """ 22 | super(KVParser, self).__init__(trace, tracelist) 23 | self.SeparatorChar = r"\s" 24 | self.QuoteChar = r"[']" 25 | self.KVSeparator = r"[=]" 26 | 27 | self.ParsedData = {} 28 | 29 | self.logging = logging.getLogger('FlexTransform.KVParser') 30 | self.trace = trace 31 | self.tracelist = tracelist 32 | if self.trace: 33 | self.logging.debug("Initialized KVParser with tracelist of {} elements.".format(len(tracelist))) 34 | 35 | def ValidateConfig(self,config): 36 | """ 37 | Validate KV Parser specific configuration options 38 | """ 39 | if config.has_section('KEYVALUE'): 40 | if config.has_option('KEYVALUE', 'SeparatorChar'): 41 | self.SeparatorChar = config['KEYVALUE']['SeparatorChar'] 42 | if config.has_option('KEYVALUE', 'QuoteChar'): 43 | self.QuoteChar = config['KEYVALUE']['QuoteChar'] 44 | if config.has_option('KEYVALUE', 'KVSeparator'): 45 | self.KVSeparator = config['KEYVALUE']['KVSeparator'] 46 | 47 | def Read(self, file, config): 48 | """ 49 | Read file and parse into Transform object 50 | """ 51 | self.ParsedData = {} 52 | 53 | super(KVParser, self).Read(file, config) 54 | 55 | # TODO: Make it clearer what I'm doing here 56 | KVRegex = re.compile( 57 | "([^"+self.KVSeparator.strip("[]")+"]+)"+ 58 | self.KVSeparator+"("+self.QuoteChar+"[^"+self.QuoteChar.strip("[]")+"]+"+self.QuoteChar+ 59 | "|[^"+self.SeparatorChar.strip("[]")+"]+)(?:"+self.SeparatorChar+"|$)") 60 | 61 | self.ParsedData['IndicatorData'] = [] 62 | 63 | for line in file: 64 | try: 65 | if isinstance(line, bytes): 66 | line = line.decode('UTF-8') 67 | 68 | match = KVRegex.findall(line) 69 | DataRow = dict(match) 70 | 71 | if self.QuoteChar: 72 | for k, v in DataRow.items(): 73 | DataRow[k] = v.strip(self.QuoteChar.strip("[]")) 74 | 75 | self.ParsedData['IndicatorData'].append(DataRow) 76 | except: 77 | self.logging.warning("Line could not be parsed: %s", line) 78 | 79 | return self.ParsedData 80 | 81 | def Finalize(self, MappedData): 82 | """ 83 | Finalize the formatting of the data before being returned to the caller 84 | """ 85 | 86 | if 'IndicatorData' not in MappedData or MappedData['IndicatorData'].__len__() == 0: 87 | raise Exception('NoIndicatorData', 'Transformed data has no indicators, nothing to write') 88 | 89 | FinalizedData = [] 90 | for row in MappedData['IndicatorData']: 91 | 92 | indicatorRow = [] 93 | # Keep passing the IndicatorType forward with the data. This is somewhat messy, 94 | # but that way we can use it on write 95 | # DataRow['IndicatorType'] = indicator['IndicatorType'] 96 | 97 | for field in row: 98 | DataRow = {} 99 | if 'Value' in row[field]: 100 | if 'datatype' in row[field]: 101 | if row[field]['datatype'] == 'enum' or row[field]['datatype'] == 'string': 102 | DataRow[field] = self.QuoteChar.strip("[]") + row[field]['Value'] + self.QuoteChar.strip("[]") 103 | else: 104 | DataRow[field] = row[field]['Value'] 105 | else: 106 | DataRow[field] = row[field]['Value'] 107 | indicatorRow.append(DataRow) 108 | else: 109 | if field == 'IndicatorType': 110 | self.logging.info("Field IndicatorType does not contain a Value entry") 111 | else: 112 | self.logging.warning("Field %s does not contain a Value entry", field) 113 | 114 | FinalizedData.append(indicatorRow) 115 | return FinalizedData 116 | 117 | def Write(self, file, FinalizedData): 118 | """ 119 | Write the data as csv to the file. 120 | """ 121 | if isinstance(file, str): 122 | if os.path.exists(file): 123 | file = open(file, "w") 124 | else: 125 | self.logging.error("%s is not a valid filepath", file) 126 | 127 | if self.SeparatorChar == r"\s": 128 | separator = " " 129 | else: 130 | separator = self.SeparatorChar 131 | 132 | toWrite = "" 133 | for indicator in FinalizedData: 134 | for row in indicator: 135 | for key, value in row.items(): 136 | if value: 137 | toWrite += key + self.KVSeparator.strip("[]") + value + separator 138 | toWrite = toWrite[:-1] 139 | toWrite += '\n' 140 | file.write(toWrite) 141 | -------------------------------------------------------------------------------- /FlexTransform/SyntaxParser/Parser.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jul 28, 2014 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | import inspect 8 | import logging 9 | 10 | import FlexTransform.SyntaxParser 11 | 12 | ''' Debugging only ''' 13 | 14 | 15 | class Parser(object): 16 | ''' 17 | Base class for Syntax Parsers 18 | 19 | Implements class methods for finding and loading the appropriate parser based on the configuration file 20 | ''' 21 | 22 | # Dictionary of loaded Parser classes 23 | __KnownParsers = {} 24 | 25 | def __init__(self, trace, tracelist=[]): 26 | ''' 27 | Constructor 28 | ''' 29 | self.logging = logging.getLogger('FlexTransform.Parser') 30 | self.trace = trace 31 | self.tracelist = tracelist 32 | self.traceindex = {} 33 | if self.trace: 34 | for x in self.tracelist: 35 | for v in x["src_fields"]: 36 | self.traceindex[v] = x 37 | for y in x["dst_fields"]: 38 | self.traceindex[y] = x 39 | for w in x["src_IRIs"]: 40 | self.traceindex[w] = x 41 | for z in x["dst_IRIs"]: 42 | self.traceindex[z] = x 43 | self.logging.debug("Initialized Parser with tracelist of {} elements.".format(len(tracelist))) 44 | 45 | @classmethod 46 | def UpdateKnownParsers(cls, ParserName, ParserClass): 47 | cls.__KnownParsers[ParserName] = ParserClass; 48 | 49 | @classmethod 50 | def GetParsers(cls): 51 | return cls.__KnownParsers 52 | 53 | @classmethod 54 | def GetParser(cls, ParserName, trace, tracelist=[]): 55 | for name, obj in inspect.getmembers(FlexTransform.SyntaxParser, inspect.isclass): 56 | if name == ParserName: 57 | return obj(trace, tracelist=tracelist); 58 | 59 | # Virtual methods that must be implemented in child classes 60 | 61 | def ValidateConfig(self,config): 62 | ''' 63 | Base validation method, must be implemented in subclasses 64 | ''' 65 | raise Exception("MethodNotDefined","ValidateConfig") 66 | 67 | def Read(self,file,configurationfile): 68 | ''' 69 | Base document read method, must be implemented in subclasses 70 | TODO: need proper subclassing: All subclasses should call this Read method as well, as it contains 71 | code common to all parsers. 72 | ''' 73 | 74 | ''' Ensure the derived data is available to all parsers, e.g. to extract information from the file 75 | name or metadata 76 | ''' 77 | self.ParsedData = {} 78 | if 'DerivedData' in configurationfile.SchemaConfig: 79 | self.ParsedData['DerivedData'] = {} 80 | for field in configurationfile.SchemaConfig['DerivedData']['fields']: 81 | if 'value' in configurationfile.SchemaConfig['DerivedData']['fields'][field] and configurationfile.SchemaConfig['DerivedData']['fields'][field]['value']: 82 | self.ParsedData['DerivedData'][field] = configurationfile.SchemaConfig['DerivedData']['fields'][field]['value'] 83 | if self.trace and field in self.traceindex: 84 | self.logging.debug("[TRACE {}]: Read: value {} copied to ParsedData['DerivedData'] from SchemaConfig".format(field, self.ParsedData['DerivedData'][field])) 85 | 86 | def Write(self, file, FinalizedData): 87 | ''' 88 | Base document write method, must be implemented in subclasses 89 | ''' 90 | raise Exception("MethodNotDefined", "Write") 91 | 92 | def Finalize(self,data): 93 | ''' 94 | Base document finalize method, must be implemented in subclasses 95 | ''' 96 | raise Exception("MethodNotDefined", "Finalize") -------------------------------------------------------------------------------- /FlexTransform/SyntaxParser/XMLParsers/.gitignore: -------------------------------------------------------------------------------- 1 | /__pycache__/ 2 | -------------------------------------------------------------------------------- /FlexTransform/SyntaxParser/XMLParsers/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jul 27, 2014 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | #from ..XMLParser import XMLParser 8 | from .CFM13 import CFM13 9 | from .CFM20Alert import CFM20Alert 10 | from .STIX import STIX 11 | -------------------------------------------------------------------------------- /FlexTransform/SyntaxParser/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jul 27, 2014 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | from .CSVParser import CSVParser 8 | from .DictionaryParser import DictionaryParser 9 | from .KVParser import KVParser 10 | from .Parser import Parser 11 | from .XMLParser import XMLParser 12 | 13 | # Map Parser types to Parser class names 14 | Parser.UpdateKnownParsers('XML', 'XMLParser') 15 | Parser.UpdateKnownParsers('KEYVALUE', 'KVParser') 16 | Parser.UpdateKnownParsers('DICT', 'DictionaryParser') 17 | Parser.UpdateKnownParsers('CSV', 'CSVParser') 18 | -------------------------------------------------------------------------------- /FlexTransform/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jul 27, 2014 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | from FlexTransform import FlexTransform -------------------------------------------------------------------------------- /FlexTransform/resources/cybox.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/cybox.zip -------------------------------------------------------------------------------- /FlexTransform/resources/images/dev-figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/dev-figure1.png -------------------------------------------------------------------------------- /FlexTransform/resources/images/figure1a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/figure1a.png -------------------------------------------------------------------------------- /FlexTransform/resources/images/figure1b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/figure1b.png -------------------------------------------------------------------------------- /FlexTransform/resources/images/figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/figure2.png -------------------------------------------------------------------------------- /FlexTransform/resources/images/figure3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/figure3.png -------------------------------------------------------------------------------- /FlexTransform/resources/images/figure4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/figure4.png -------------------------------------------------------------------------------- /FlexTransform/resources/ramrod.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/ramrod.zip -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/MBL.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE 3 | FileParser = DICT 4 | 5 | # KEYVALUE Options 6 | [KEYVALUE] 7 | # SeparatorChar = ' ' 8 | QuoteChar = ['] 9 | KVSeparator = [=] 10 | 11 | # DICT Options 12 | [DICT] 13 | IndicatorsKey = indicators 14 | 15 | # SCHEMA Definition 16 | 17 | [SCHEMA] 18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 19 | PrimarySchemaConfiguration = resources/schemaDefinitions/MBL.json 20 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition 21 | # If multiple files are required, separate them with ; 22 | #SiteSchemaConfiguration = 23 | -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/cfm13.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE, DICT 3 | FileParser = XML 4 | 5 | # XML Options 6 | [XML] 7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema 8 | ValidateSchema = no 9 | # SchemaFile must be set if ValidateSchema is yes 10 | # SchemaFile = resources/schemas/CFMMessage13.xsd 11 | # CustomParser is used to load custom XML parsing classes for complex XML documents 12 | CustomParser = CFM13 13 | 14 | # JSON Options (none currently defined) 15 | [JSON] 16 | 17 | # CSV Options 18 | [CSV] 19 | # HeaderLine defines if the CSV file has the field names in the first non-commented row of the file 20 | # HeaderLine defaults to no 21 | HeaderLine = no 22 | # QuotesOptional defines if every field in the CSV file has to be enclosed in quotes. 23 | # QuotesOptional defaults to yes 24 | QuotesOptional = no 25 | # SeparatorChar defines the charactor or charactors that seperate the fields in the file. 26 | # SeparatorChar defaults to , 27 | SeparatorChar = , 28 | # StripSpaces defines if spaces before or after the separator should be striped 29 | # StripSpaces defaults to yes 30 | StripSpaces = yes 31 | # FieldNames have to be defined if HeaderLine is set to no. The FieldNames map to the SCHEMA definition below 32 | FieldNames = ip,host,etc 33 | 34 | # TSV Options 35 | [TSV] 36 | HeaderLine = no 37 | FieldNames = ip,host,etc 38 | 39 | # REGEX Options 40 | [REGEX] 41 | Regex = (\S*)\s+(\S*)\s+\d+\s+([0-9.]+) 42 | FieldNames = host,service,ip 43 | 44 | # KEYVALUE Options 45 | [KEYVALUE] 46 | SeparatorChar = \s 47 | QuoteChar = ['] 48 | KVSeparator = [=] 49 | 50 | # SCHEMA Definition 51 | 52 | [SCHEMA] 53 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 54 | PrimarySchemaConfiguration = resources/schemaDefinitions/cfm13.json 55 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition 56 | # If multiple files are required, separate them with ; 57 | SiteSchemaConfiguration = resources/schemaDefinitions/cfm13-site.json 58 | 59 | # Metadata can be included with certain files. Set MetadataSchemaConfiguration to the json file that defines the metadata schema 60 | MetadataSchemaConfiguration = resources/schemaDefinitions/cfm-metadata.json 61 | -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/cfm20alert.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE 3 | FileParser = XML 4 | 5 | # XML Options 6 | [XML] 7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema 8 | ValidateSchema = yes 9 | # SchemaFile must be set if ValidateSchema is yes 10 | SchemaFile = resources/schemas/CFMAlert.xsd 11 | # CustomParser is used to load custom XML parsing classes for complex XML documents 12 | CustomParser = CFM20Alert 13 | 14 | # SCHEMA Definition 15 | 16 | [SCHEMA] 17 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 18 | PrimarySchemaConfiguration = resources/schemaDefinitions/cfm20alert.json 19 | 20 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition 21 | # If multiple files are required, separate them with ; 22 | # SiteSchemaConfiguration = 23 | 24 | # Metadata can be included with certain files. Set MetadataSchemaConfiguration to the json file that defines the metadata schema 25 | MetadataSchemaConfiguration = resources/schemaDefinitions/cfm-metadata.json 26 | -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/crisp_json.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | FileParser = DICT 3 | 4 | [DICT] 5 | IndicatorsKey = indicators 6 | 7 | # SCHEMA Definition 8 | [SCHEMA] 9 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 10 | PrimarySchemaConfiguration = resources/schemaDefinitions/crisp.json 11 | -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/csv_example.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | FileParser = CSV 3 | 4 | # CSV Options 5 | [CSV] 6 | Fields = action1,indicator,reportedTime,duration1,firewalls,origin,directSource,majorTags,sensitivity,reconAllowed,restriction 7 | Delimiter = "," 8 | QuoteChar = " 9 | EscapeChar = \\ 10 | HeaderLine = false 11 | DoubleQuote = false 12 | QuoteStyle = Minimal 13 | 14 | # SCHEMA Definition 15 | [SCHEMA] 16 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 17 | PrimarySchemaConfiguration = resources/schemaDefinitions/lqmtools.json -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/doe_em.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | FileParser = CSV 3 | 4 | # CSV Options 5 | [CSV] 6 | Fields = indicatorType,indicator,reason,detectedTime 7 | Delimiter = "," 8 | QuoteChar = " 9 | EscapeChar = \\ 10 | HeaderLine = false 11 | DoubleQuote = false 12 | QuoteStyle = Minimal 13 | 14 | # SCHEMA Definition 15 | [SCHEMA] 16 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 17 | PrimarySchemaConfiguration = resources/schemaDefinitions/doe-em.json 18 | -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/flextext.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | FileParser = CSV 3 | 4 | # CSV Options 5 | [CSV] 6 | Fields = Indicator,EmailSenderAddress 7 | Delimiter = "|" 8 | RecordDelim = "\r\n" 9 | QuoteChar = " 10 | EscapeChar = \\ 11 | HeaderLine = false 12 | DoubleQuote = false 13 | QuoteStyle = Minimal 14 | 15 | # SCHEMA Definition 16 | [SCHEMA] 17 | SchemaConfigurationType = Inline 18 | SupportedIndicatorTypes = IPv4-Address-Block 19 | TypeMappings = { "IPv4-Address-Block": [ { "Indicator": "*" } ]} 20 | Indicator_OntologyMapping = IPv4AddressIndicatorValueSemanticComponent 21 | EmailSenderAddress_OntologyMapping = EmailSenderAddressSemanticCocept 22 | 23 | -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/iid_combined_recent.cfg: -------------------------------------------------------------------------------- 1 | # IID Combined Detected - Recent, standard 2 | 3 | [SYNTAX] 4 | FileParser = CSV 5 | 6 | # CSV Options 7 | [CSV] 8 | Fields = sid,uri,target,time,baddom,domain,description1 9 | Delimiter = "," 10 | QuoteChar = " 11 | EscapeChar = \\ 12 | HeaderLine = false 13 | DoubleQuote = false 14 | QuoteStyle = Minimal 15 | 16 | # SCHEMA Definition 17 | [SCHEMA] 18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 19 | PrimarySchemaConfiguration = resources/schemaDefinitions/iid.json 20 | SiteSchemaConfiguration = resources/schemaDefinitions/iid-combined-recent.json -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/iid_host_active.cfg: -------------------------------------------------------------------------------- 1 | # IID Bad Hostname - Active, standard 2 | 3 | [SYNTAX] 4 | FileParser = CSV 5 | 6 | # CSV Options 7 | [CSV] 8 | Fields = domain,time,description1,description2 9 | Delimiter = "," 10 | QuoteChar = " 11 | EscapeChar = \\ 12 | HeaderLine = false 13 | DoubleQuote = false 14 | QuoteStyle = Minimal 15 | 16 | # SCHEMA Definition 17 | [SCHEMA] 18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 19 | PrimarySchemaConfiguration = resources/schemaDefinitions/iid.json -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/iid_host_dynamic.cfg: -------------------------------------------------------------------------------- 1 | # IID Bad Hostname - Dynamic, standard 2 | 3 | [SYNTAX] 4 | FileParser = CSV 5 | 6 | # CSV Options 7 | [CSV] 8 | Fields = domain,time,description1,description2 9 | Delimiter = "," 10 | QuoteChar = " 11 | EscapeChar = \\ 12 | HeaderLine = false 13 | DoubleQuote = false 14 | QuoteStyle = Minimal 15 | 16 | # SCHEMA Definition 17 | [SCHEMA] 18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 19 | PrimarySchemaConfiguration = resources/schemaDefinitions/iid.json 20 | SiteSchemaConfiguration = resources/schemaDefinitions/iid-host-dynamic.json -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/iid_ipv4_recent.cfg: -------------------------------------------------------------------------------- 1 | # IID Bad IP - Recent, standard 2 | 3 | [SYNTAX] 4 | FileParser = CSV 5 | 6 | # CSV Options 7 | [CSV] 8 | Fields = ipv4,time,description1,description2 9 | Delimiter = "," 10 | QuoteChar = " 11 | EscapeChar = \\ 12 | HeaderLine = false 13 | DoubleQuote = false 14 | QuoteStyle = Minimal 15 | 16 | # SCHEMA Definition 17 | [SCHEMA] 18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 19 | PrimarySchemaConfiguration = resources/schemaDefinitions/iid.json 20 | SiteSchemaConfiguration = resources/schemaDefinitions/iid-ipv4-recent.json -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/isight.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | FileParser = DICT 3 | 4 | [DICT] 5 | IndicatorsKey = indicators 6 | 7 | # SCHEMA Definition 8 | [SCHEMA] 9 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 10 | PrimarySchemaConfiguration = resources/schemaDefinitions/isight.json 11 | -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/keyvalue.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE 3 | FileParser = KEYVALUE 4 | 5 | # KEYVALUE Options 6 | [KEYVALUE] 7 | SeparatorChar = & 8 | QuoteChar = ['] 9 | KVSeparator = [=] 10 | 11 | # SCHEMA Definition 12 | 13 | [SCHEMA] 14 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 15 | PrimarySchemaConfiguration = resources/schemaDefinitions/keyvalue.json 16 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition 17 | # If multiple files are required, separate them with ; 18 | #SiteSchemaConfiguration = 19 | -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/lqmtools.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | FileParser = DICT 3 | 4 | # DICT Options (none currently defined) 5 | [DICT] 6 | IndicatorsKey = indicators 7 | # SCHEMA Definition 8 | 9 | [SCHEMA] 10 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 11 | PrimarySchemaConfiguration = resources/schemaDefinitions/lqmtools.json 12 | 13 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition 14 | # If multiple files are required, separate them with ; 15 | #SiteSchemaConfiguration = -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/stix_acs30.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE 3 | FileParser = XML 4 | 5 | # FilenameExtraction can either be None or a dictionary of regularexpression, name pairs that will be references 6 | # by the schema parser. 7 | # In the case of CFM1.3, we expect the UUID to use for the overall document to be in the filename, which 8 | # has the structure: _.Alert.C 9 | FilenameExtraction = None 10 | # FileMetadataExtraction can either be None or a dictionary mapping names to python functions: 11 | FiledataExtraction = None 12 | 13 | # XML Options 14 | [XML] 15 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema 16 | ValidateSchema = no 17 | # CustomParser is used to load custom XML parsing classes for complex XML documents 18 | CustomParser = STIX 19 | 20 | [STIX] 21 | STIXNamespace = http://www.us-cert.gov/essa 22 | STIXAlias = isa 23 | # Change depending on if this is testing or production. Testing prefix is guide.999191., production prefix is guide.19001. 24 | # STIXIDPrefix = guide.999191. 25 | STIXIDPrefix = guide.19001. 26 | # The version of ACS to use for the marking structure. Choices are: None (TLP), 2.1 or 3.0 27 | # ACSVersion = 3.0 28 | 29 | [SCHEMA] 30 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 31 | PrimarySchemaConfiguration = resources/schemaDefinitions/stix.json 32 | SiteSchemaConfiguration = resources/schemaDefinitions/stix-acs30.json -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/stix_essa.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE 3 | FileParser = XML 4 | 5 | # XML Options 6 | [XML] 7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema 8 | ValidateSchema = no 9 | # CustomParser is used to load custom XML parsing classes for complex XML documents 10 | CustomParser = STIX 11 | 12 | [STIX] 13 | STIXNamespace = http://www.us-cert.gov/essa 14 | STIXAlias = isa 15 | # Change depending on if this is testing or production. Testing prefix is guide.999191., production prefix is guide.19001. 16 | # STIXIDPrefix = guide.999191. 17 | STIXIDPrefix = guide.19001. 18 | 19 | [SCHEMA] 20 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 21 | PrimarySchemaConfiguration = resources/schemaDefinitions/stix.json 22 | SiteSchemaConfiguration = resources/schemaDefinitions/stix-essa.json -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/stix_tlp.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE 3 | FileParser = XML 4 | 5 | # XML Options 6 | [XML] 7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema 8 | ValidateSchema = no 9 | # CustomParser is used to load custom XML parsing classes for complex XML documents 10 | CustomParser = STIX 11 | 12 | [STIX] 13 | STIXNamespace = http://www.anl.gov/cfm/stix 14 | STIXAlias = CFM 15 | 16 | [SCHEMA] 17 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 18 | PrimarySchemaConfiguration = resources/schemaDefinitions/stix.json 19 | SiteSchemaConfiguration = resources/schemaDefinitions/stix-tlp.json -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/stix_tlp2.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE 3 | FileParser = XML 4 | 5 | # XML Options 6 | [XML] 7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema 8 | ValidateSchema = no 9 | # CustomParser is used to load custom XML parsing classes for complex XML documents 10 | CustomParser = STIX 11 | 12 | [STIX] 13 | STIXNamespace = http://www.us-cert.gov/essa 14 | STIXAlias = isa 15 | STIXIDPrefix = guide.19001. 16 | 17 | [SCHEMA] 18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 19 | PrimarySchemaConfiguration = resources/schemaDefinitions/stix.json 20 | SiteSchemaConfiguration = resources/schemaDefinitions/stix-tlp2.json -------------------------------------------------------------------------------- /FlexTransform/resources/sampleConfigurations/twitter.cfg: -------------------------------------------------------------------------------- 1 | # Twitter 2 | 3 | [SYNTAX] 4 | FileParser = DICT 5 | 6 | [DICT] 7 | IndicatorsKey = indicators 8 | 9 | [SCHEMA] 10 | PrimarySchemaConfiguration = resources/schemaDefinitions/twitter.json -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/cfm-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "DocumentMetaData": { 3 | "fields": { 4 | "DataSensitivity": { 5 | "description": "OUO marking", 6 | "datatype": "enum", 7 | "required": true, 8 | "defaultValue": "noSensitivity", 9 | "ontologyMappingType": "enum", 10 | "enumValues": { 11 | "ouo": { 12 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#OUOSemanticConcept" 13 | }, 14 | "noSensitivity": { 15 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#NotOUOSemanticConcept" 16 | } 17 | } 18 | }, 19 | "FileName": { 20 | "description": "File name the metadata is attached to", 21 | "datatype": "string", 22 | "required": true, 23 | "ontologyMappingType": "simple", 24 | "ontologyMapping": "" 25 | }, 26 | "PayloadFormat": { 27 | "description": "Schema format of the data", 28 | "datatype": "enum", 29 | "required": true, 30 | "ontologyMappingType": "enum", 31 | "enumValues": { 32 | "STIX": { 33 | "ontologyMapping": "" 34 | }, 35 | "Cfm13Alert": { 36 | "ontologyMapping": "" 37 | }, 38 | "Cfm20Alert": { 39 | "ontologyMapping": "" 40 | } 41 | } 42 | }, 43 | "PayloadType": { 44 | "description": "CFM Payload type marking", 45 | "datatype": "enum", 46 | "required": true, 47 | "defaultValue": "Alert", 48 | "ontologyMappingType": "enum", 49 | "enumValues": { 50 | "Alert": { 51 | "ontologyMapping": "" 52 | } 53 | } 54 | }, 55 | "ReconPolicy": { 56 | "description": "Is additional recon allowed on the indicator", 57 | "datatype": "enum", 58 | "required": true, 59 | "defaultValue": "Touch", 60 | "ontologyMappingType": "enum", 61 | "enumValues": { 62 | "Touch": { 63 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#ReconAllowedSemanticConcept" 64 | }, 65 | "NoTouch": { 66 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#ReconNotAllowedSemanticConcept" 67 | } 68 | } 69 | }, 70 | "SendingSite": { 71 | "description": "Site name that submitted the report", 72 | "datatype": "string", 73 | "required": true, 74 | "ontologyMappingType": "simple", 75 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#SiteAbbreviationSemanticConcept" 76 | }, 77 | "SentTimestamp": { 78 | "description": "The timestamp when the file was uploaded", 79 | "datatype": "datetime", 80 | "dateTimeFormat": "unixtime", 81 | "required": true, 82 | "ontologyMappingType": "simple", 83 | "ontologyMapping": "" 84 | }, 85 | "UploadID": { 86 | "description": "The UUID for the uploaded document", 87 | "datatype": "string", 88 | "required": true, 89 | "ontologyMappingType": "simple", 90 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#UniqueFileIdentifierSemanticConcept" 91 | }, 92 | "DownloadElementExtendedAttribute_Field": { 93 | "description": "Extended information type", 94 | "valuemap": "DownloadElementExtendedAttribute;Field", 95 | "datatype": "enum", 96 | "required": false, 97 | "ontologyMappingType": "enum", 98 | "enumValues": { 99 | "origFileName": { 100 | "ontologyMapping": "" 101 | }, 102 | "orig1.3Filename": { 103 | "ontologyMapping": "" 104 | }, 105 | "comment": { 106 | "ontologyMapping": "" 107 | } 108 | } 109 | }, 110 | "DownloadElementExtendedAttribute_Value": { 111 | "description": "The value for the extended data", 112 | "valuemap": "DownloadElementExtendedAttribute;Value", 113 | "datatype": "string", 114 | "defaultValue": "NoValue", 115 | "requiredIfReferenceField": "DownloadElementExtendedAttribute_Field", 116 | "requiredIfReferenceValuesMatch": [ "*" ], 117 | "ontologyMappingType": "referencedEnum", 118 | "ontologyEnumField": "DownloadElementExtendedAttribute_Field", 119 | "ontologyMappingEnumValues": { 120 | "origFileName": { 121 | "ontologyMapping": "" 122 | }, 123 | "orig1.3Filename": { 124 | "ontologyMapping": "" 125 | }, 126 | "comment": { 127 | "ontologyMapping": "" 128 | } 129 | } 130 | } 131 | } 132 | } 133 | } -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/cfm13-site.json: -------------------------------------------------------------------------------- 1 | { 2 | "DocumentHeaderData": { 3 | "fields": { 4 | "analyzerid": { 5 | "defaultValue": "TEST" 6 | }, 7 | "location": { 8 | "defaultValue": "TEST" 9 | }, 10 | "contact_name": { 11 | "defaultValue": "Test User" 12 | }, 13 | "contact_phone": { 14 | "defaultValue": "555-555-1212", 15 | "required": true 16 | }, 17 | "contact_email": { 18 | "defaultValue": "test@test.int", 19 | "required": true 20 | } 21 | } 22 | } 23 | } -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/doe-em.json: -------------------------------------------------------------------------------- 1 | { 2 | "IndicatorData": { 3 | "types": { 4 | "IPv4-Address-Block": [ {"indicatorType": "IPv4 Address"} ], 5 | "IPv6-Address-Block": [ {"indicatorType": "IPv6 Address"} ], 6 | "DNS-Hostname-Block": [ {"indicatorType": "DNSHostName"}, {"indicatorType": "Domain"} , {"indicatorType": "DNSDomainName"} ], 7 | "URL-Block": [ {"indicatorType": "URL"}, {"indicatorType": "URI"} ], 8 | "Malicious-File-Hash": [ {"indicatorType": "FileMD5Hash"}, {"indicatorType": "FileSHA1Hash"}] 9 | }, 10 | "fields": { 11 | "indicator": { 12 | "description": "The value to be acted upon (e.g. ip, domain name, URL)", 13 | "datatype": "string", 14 | "required": true, 15 | "ontologyMappingType": "simple", 16 | "ontologyMapping": "" 17 | }, 18 | "indicatorType": { 19 | "description": "A type name that informs how to interpret the indicator (e.g. ipv4, emailAddress) (enum)", 20 | "datatype": "enum", 21 | "dependsOn": "indicator", 22 | "mapOntologyToElement": "indicator", 23 | "required": false, 24 | "ontologyMappingType": "enum", 25 | "enumValues": { 26 | "IPv6 Address": { 27 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv6AddressIndicatorValueSemanticComponent" 28 | }, 29 | "IPv4 Address": { 30 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv4AddressIndicatorValueSemanticComponent" 31 | }, 32 | "URL": { 33 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent" 34 | }, 35 | "URI": { 36 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent" 37 | }, 38 | "DNSDomainName": { 39 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent" 40 | }, 41 | "Domain": { 42 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent" 43 | }, 44 | "DNSHostName": { 45 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent" 46 | }, 47 | "FileMD5Hash": { 48 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#MD5FilehashIndicatorValueSemanticComponent" 49 | }, 50 | "FileSHA1Hash": { 51 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#SHA1FilehashIndicatorValueSemanticComponent" 52 | } 53 | } 54 | }, 55 | "reason": { 56 | "description": "Description associated with indicator", 57 | "datatype": "string", 58 | "required": false, 59 | "defaultValue": "noValue", 60 | "ontologyMappingType": "simple", 61 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IndicatorDescriptionSemanticConcept" 62 | }, 63 | "detectedTime": { 64 | "description": "Time the report was generated", 65 | "datatype": "datetime", 66 | "dateTimeFormat": " YYYY-MM-DD HH:mm:ss", 67 | "dateTimeFormatAlternate": ["YYYY-MM-DDTHH:mm:ss","YYYY-MM-DDTHH:mm:ssZ", "YYYY-MM-DD"], 68 | "dateTimezoneDefault": "US/Pacific", 69 | "required": false, 70 | "defaultValue": "&now()", 71 | "ontologyMappingType": "simple", 72 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IndicatorGeneratedTimeSemanticConcept" 73 | } 74 | } 75 | } 76 | } -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/iid-combined-recent.json: -------------------------------------------------------------------------------- 1 | { 2 | "IndicatorData": { 3 | "types": { 4 | "DNS-Hostname-Block": null 5 | }, 6 | "fields": { 7 | "domain": { 8 | "required": false, 9 | "ontologyMapping": "" 10 | }, 11 | "uri": { 12 | "required": true 13 | }, 14 | "combined_description_comment": { 15 | "outputFormat": "[description1], [target]" 16 | }, 17 | "target": { 18 | "required": true 19 | } 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/iid-host-dynamic.json: -------------------------------------------------------------------------------- 1 | { 2 | "IndicatorData": { 3 | "fields": { 4 | "time": { 5 | "ontologyMapping": "" 6 | }, 7 | "durationCalculated": { 8 | "description": "how long the action is supposed to be left in place", 9 | "datatype": "int", 10 | "defaultValue": "&calculate_duration(time)", 11 | "required": false, 12 | "requiredIfReferenceField": "time", 13 | "requiredIfReferenceValuesMatch": ["*"], 14 | "ontologyMappingType": "simple", 15 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#ActionDurationSemanticConcept" 16 | } 17 | } 18 | } 19 | } -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/iid-ipv4-recent.json: -------------------------------------------------------------------------------- 1 | { 2 | "IndicatorData": { 3 | "fields": { 4 | "domain": { 5 | "required": false 6 | }, 7 | "ipv4": { 8 | "required": true 9 | } 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/iid.json: -------------------------------------------------------------------------------- 1 | { 2 | "IndicatorData": { 3 | "types": { 4 | "DNS-Hostname-Block": [ {"domain": "*"}], 5 | "IPv4-Address-Block": [ {"ipv4": "*"} ], 6 | "URL-Block": [ {"uri": "*"} ] 7 | }, 8 | "fields": { 9 | "domain": { 10 | "description": "The domain to be acted upon", 11 | "datatype": "string", 12 | "required": true, 13 | "ontologyMappingType": "simple", 14 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent" 15 | }, 16 | "ipv4": { 17 | "description": "The IPv4 to be acted upon", 18 | "datatype": "string", 19 | "required": false, 20 | "ontologyMappingType": "simple", 21 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv4AddressIndicatorValueSemanticComponent" 22 | }, 23 | "uri": { 24 | "description": "The URL to be acted upon", 25 | "datatype": "string", 26 | "required": false, 27 | "ontologyMappingType": "simple", 28 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent" 29 | }, 30 | "time": { 31 | "description": "Time value associated with indicator", 32 | "datatype": "datetime", 33 | "dateTimeFormat": "YYYYMMDDTHHmmss", 34 | "dateTimeFormatAlternate": [" YYYYMMDDTHHmmss","YYYYMMDDTHHmmss ", "YYYYMMDDTHHmmssZ"], 35 | "required": false, 36 | "ontologyMappingType": "simple", 37 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#TimeOfDetectionSemanticConcept" 38 | }, 39 | "description1": { 40 | "description": "Description associated with indicator", 41 | "datatype": "string", 42 | "required": true, 43 | "defaultValue": "noValue", 44 | "ontologyMappingType": "simple", 45 | "ontologyMapping": "" 46 | }, 47 | "description2": { 48 | "description": "Description associated with indicator", 49 | "datatype": "string", 50 | "required": true, 51 | "defaultValue": "noValue", 52 | "ontologyMappingType": "simple", 53 | "ontologyMapping": "" 54 | }, 55 | "combined_description_comment": { 56 | "description": "Combined [description1] & [description2] fields", 57 | "datatype": "string", 58 | "required": true, 59 | "outputFormat": "[description1], [description2]", 60 | "ontologyMappingType": "simple", 61 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IndicatorDescriptionSemanticConcept" 62 | }, 63 | "sid": { 64 | "description": "ID field from IID", 65 | "datatype": "string", 66 | "required": false, 67 | "ontologyMappingType": "simple", 68 | "ontologyMapping": "" 69 | }, 70 | "target": { 71 | "description": "Target field from IID", 72 | "datatype": "string", 73 | "required": false, 74 | "defaultValue": "noValue", 75 | "ontologyMappingType": "simple", 76 | "ontologyMapping": "" 77 | }, 78 | "baddom": { 79 | "description": "Bad Dom, usually empty", 80 | "datatype": "string", 81 | "required": false, 82 | "defaultValue": "noValue", 83 | "ontologyMappingType": "simple", 84 | "ontologyMapping": "" 85 | } 86 | } 87 | } 88 | } -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/stix-essa.json: -------------------------------------------------------------------------------- 1 | { 2 | "DocumentHeaderData": { 3 | "fields": { 4 | "handling_markingstructures": { 5 | "required": false, 6 | "multiple": true, 7 | "ontologyMappingType": "none", 8 | "subfields": { 9 | "handling_markingstructures_xsitype": {"required": true, "primaryKey": true}, 10 | "handling_markingstructures_identifier": {"required": false, "primaryKeyMatch": "edh2cyberMarking:ISAMarkingsType"}, 11 | "handling_markingstructures_createdatetime": {"required": false, "primaryKeyMatch": "edh2cyberMarking:ISAMarkingsType"}, 12 | "handling_markingstructures_responsibleentity": {"required": false, "primaryKeyMatch": "edh2cyberMarking:ISAMarkingsType"}, 13 | "handling_markingstructures_isamversion": {"required": false, "primaryKeyMatch": "edh2cyberMarking:ISAMarkingsType"}, 14 | "handling_markingstructures_isamversion2": {"required": false, "primaryKeyMatch": "edh2cyberMarkingAssert:ISAMarkingsAssertionType"}, 15 | "handling_markingstructures_mostrestrictive": {"required": false, "primaryKeyMatch": "edh2cyberMarkingAssert:ISAMarkingsAssertionType"}, 16 | "handling_markingstructures_policyref": {"required": false, "primaryKeyMatch": "edh2cyberMarkingAssert:ISAMarkingsAssertionType"}, 17 | "handling_markingstructures_controlset": {"required": false, "primaryKeyMatch": "edh2cyberMarkingAssert:ISAMarkingsAssertionType"} 18 | }, 19 | "defaultFields": { 20 | "handling_markingstructures_xsitype": [ "edh2cyberMarking:ISAMarkingsType", "edh2cyberMarkingAssert:ISAMarkingsAssertionType" ], 21 | "handling_markingstructures_identifier": "", 22 | "handling_markingstructures_responsibleentity": "CUST:USA.DOE", 23 | "handling_markingstructures_createdatetime": "&stix_now()", 24 | "handling_markingstructures_isamversion": "1.0", 25 | "handling_markingstructures_isamversion2": "1.0", 26 | "handling_markingstructures_mostrestrictive": "true", 27 | "handling_markingstructures_policyref": "urn:isa:policy:acs:ns:v2.0?privdefault=permit", 28 | "handling_markingstructures_controlset": "CLS:U CUI:FOUO" 29 | } 30 | }, 31 | "handling_markingstructures_identifier": { 32 | "description": "Single unique identifier associated with the resource.", 33 | "valuemap": "handling;marking_structures;identifier", 34 | "datatype": "string", 35 | "defaultValue": "", 36 | "required": "false", 37 | "memberof": "handling_markingstructures", 38 | "ontologyMappingType": "simple", 39 | "ontologyMapping": "" 40 | }, 41 | "handling_markingstructures_createdatetime": { 42 | "description": "The creation date and time of the associated resource.", 43 | "valuemap": "handling;marking_structures;createdatetime", 44 | "datatype": "datetime", 45 | "dateTimeFormat": "YYYY-MM-DDTHH:mm:ssZZ", 46 | "dateTimeFormatAlternate": ["YYYY-MM-DDTHH:mm:ss", "YYYY-MM-DDTHH:mm:ssZ"], 47 | "defaultValue": "&stix_now()", 48 | "required": "false", 49 | "memberof": "handling_markingstructures", 50 | "ontologyMappingType": "none" 51 | }, 52 | "handling_markingstructures_responsibleentity": { 53 | "description": "Contains a single mandatory CUST: token with an optional single ORIG: token", 54 | "valuemap": "handling;marking_structures;responsibleentity", 55 | "datatype": "string", 56 | "defaultValue": "", 57 | "required": "false", 58 | "memberof": "handling_markingstructures", 59 | "ontologyMappingType": "simple", 60 | "ontologyMapping": "" 61 | }, 62 | "handling_markingstructures_isamversion": { 63 | "description": "ISA Marking Version", 64 | "valuemap": "handling;marking_structures;isam_version", 65 | "datatype": "enum", 66 | "defaultValue": "1.0", 67 | "required": "false", 68 | "memberof": "handling_markingstructures", 69 | "ontologyMappingType": "enum", 70 | "enumValues": { 71 | "1.0": { 72 | "ontologyMapping": "" 73 | } 74 | } 75 | }, 76 | "handling_markingstructures_isamversion2": { 77 | "description": "ISA Marking Version", 78 | "valuemap": "handling;marking_structures;isam_version", 79 | "datatype": "enum", 80 | "defaultValue": "1.0", 81 | "required": "false", 82 | "memberof": "handling_markingstructures", 83 | "ontologyMappingType": "enum", 84 | "enumValues": { 85 | "1.0": { 86 | "ontologyMapping": "" 87 | } 88 | } 89 | }, 90 | "handling_markingstructures_policyref": { 91 | "description": "If multiple policy refs apply, they are provided as space delimited URNs.", 92 | "valuemap": "handling;marking_structures;policyref", 93 | "datatype": "string", 94 | "defaultValue": "", 95 | "required": "false", 96 | "memberof": "handling_markingstructures", 97 | "ontologyMappingType": "simple", 98 | "ontologyMapping": "" 99 | }, 100 | "handling_markingstructures_controlset": { 101 | "description": "Group of data tags that are used to inform automated access control decisions.", 102 | "valuemap": "handling;marking_structures;controlset", 103 | "datatype": "string", 104 | "defaultValue": "", 105 | "required": "false", 106 | "memberof": "handling_markingstructures", 107 | "ontologyMappingType": "simple", 108 | "ontologyMapping": "" 109 | }, 110 | "handling_markingstructures_mostrestrictive": { 111 | "description": "Indicates whether or not this marking structure denotes the most restrictive applied to this structure. Only used in STIX header. Can only be used if the Controlled_Structure is set to //node()", 112 | "valuemap": "handling;marking_structures;most_restrictive", 113 | "datatype": "string", 114 | "defaultValue": "true", 115 | "required": "false", 116 | "memberof": "handling_markingstructures", 117 | "ontologyMappingType": "simple", 118 | "ontologyMapping": "" 119 | }, 120 | "handling_markingstructures_xsitype": { 121 | "defaultValue": "edh2cyberMarkingAssert:ISAMarkingsAssertionType", 122 | "enumValues": { 123 | "edh2cyberMarking:ISAMarkingsType": { 124 | "ontologyMapping": "" 125 | }, 126 | "edh2cyberMarkingAssert:ISAMarkingsAssertionType": { 127 | "ontologyMapping": "" 128 | } 129 | } 130 | }, 131 | "produced_time": { 132 | "dateTimeFormat": "YYYY-MM-DDTHH:mm:ssZ", 133 | "required": true, 134 | "defaultValue": "&stix_now()" 135 | }, 136 | "information_source_name": { 137 | "description": "The source of the report", 138 | "valuemap": "information_source;identity;name", 139 | "datatype": "string", 140 | "required": true, 141 | "defaultValue": "DOE", 142 | "ontologyMappingType": "none", 143 | "ontologyMapping": "" 144 | }, 145 | "information_source_description": { 146 | "description": "The description of the source of the report", 147 | "valuemap": "information_source;description", 148 | "datatype": "string", 149 | "required": true, 150 | "defaultValue": "U.S. Department of Energy", 151 | "ontologyMappingType": "none", 152 | "ontologyMapping": "" 153 | }, 154 | "profiles": { 155 | "required": true 156 | }, 157 | "profiles_profile": { 158 | "defaultValue": "ISA Profile v1.0" 159 | } 160 | } 161 | }, 162 | "IndicatorData": { 163 | "fields": { 164 | "indicator_types": { 165 | "required": false 166 | } 167 | } 168 | } 169 | } -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/stix-tlp.json: -------------------------------------------------------------------------------- 1 | { 2 | "DocumentHeaderData": { 3 | "fields": { 4 | "handling_markingstructures": { 5 | "subfields": { 6 | "handling_markingstructures_color": {"required": true, "primaryKeyMatch": "tlpMarking:TLPMarkingStructureType"}, 7 | "handling_markingstructures_xsitype": {"required": true, "primaryKey": true}, 8 | "handling_markingstructures_statement": {"required": false, "primaryKeyMatch": "simpleMarking:SimpleMarkingStructureType"} 9 | }, 10 | "defaultFields": { 11 | "handling_markingstructures_xsitype": "tlpMarking:TLPMarkingStructureType" 12 | } 13 | }, 14 | "handling_markingstructures_color": { 15 | "defaultValue": "GREEN" 16 | } 17 | } 18 | } 19 | } -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/stix-tlp2.json: -------------------------------------------------------------------------------- 1 | { 2 | "DocumentHeaderData": { 3 | "fields": { 4 | "handling_markingstructures": { 5 | "subfields": { 6 | "handling_markingstructures_color": {"required": true, "primaryKeyMatch": "tlpMarking:TLPMarkingStructureType"}, 7 | "handling_markingstructures_xsitype": {"required": true, "primaryKey": true}, 8 | "handling_markingstructures_statement": {"required": false, "primaryKeyMatch": "simpleMarking:SimpleMarkingStructureType"} 9 | }, 10 | "defaultFields": { 11 | "handling_markingstructures_xsitype": "tlpMarking:TLPMarkingStructureType" 12 | } 13 | }, 14 | "handling_markingstructures_color": { 15 | "defaultValue": "GREEN" 16 | } 17 | , 18 | "produced_time": { 19 | "dateTimeFormat": "YYYY-MM-DDTHH:mm:ssZ", 20 | "required": true, 21 | "defaultValue": "&now()" 22 | }, 23 | "profiles": { 24 | "required": true 25 | }, 26 | "profiles_profile": { 27 | "defaultValue": "ISA Profile v1.0" 28 | } 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /FlexTransform/resources/schemaDefinitions/twitter.json: -------------------------------------------------------------------------------- 1 | { 2 | "IndicatorsData": { 3 | "types": { 4 | "IPv4-Address-Block": [{ "ipv4-addr[*]": "*" }], 5 | "DNS-Hostname-Block": [{ "domain[*]": "*" }], 6 | "URL-Block": [{ "url[*]": "*" }] 7 | }, 8 | "fields": { 9 | "ipv4-addr[*]": { 10 | "description": "The malicious ip address", 11 | "datatype": "string", 12 | "defaultValue": "NoValue", 13 | "required": false, 14 | "ontologyMappingType": "simple", 15 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv4AddressIndicatorValueSemanticComponent" 16 | }, 17 | "domain[*]": { 18 | "description": "The malicious domain", 19 | "datatype": "string", 20 | "defaultValue": "NoValue", 21 | "required": false, 22 | "ontologyMappingType": "simple", 23 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent" 24 | }, 25 | "url[*]": { 26 | "description": "The malicious url", 27 | "datatype": "string", 28 | "defaultValue": "NoValue", 29 | "required": false, 30 | "ontologyMappingType": "simple", 31 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent" 32 | } 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /FlexTransform/resources/schemas/CFMAlert.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 11 | 12 | 13 | The version of the CFMAlert schema being 14 | used. 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 27 | 28 | Should be set to the IRI of the SignalDomain referenced from the KIDS ontology. 29 | 30 | 31 | 33 | 34 | Should be set to the IRI of the SignalCanonicalRepresentation referenced from the KIDS ontology. 35 | 36 | 37 | 39 | 40 | The value of the signal 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 89 | 90 | 91 | 93 | 94 | 96 | 97 | 99 | 100 | 102 | 103 | 105 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 125 | 126 | 127 | The category of action which was 128 | taken, e.g. "block-installed". 129 | 130 | 131 | 132 | 134 | 135 | 136 | A description of the action which was taken - intended to be human readable. 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 154 | 155 | 156 | A general category of malicious 157 | behavior, e.g. scanning. 158 | 159 | 160 | 161 | 163 | 164 | 165 | A specific description of the 166 | behavior which prompted the alert, 167 | e.g. "Excessive requests for 168 | non-existent web pages". 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | -------------------------------------------------------------------------------- /FlexTransform/resources/schemas/CFMDownload.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 17 | 18 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 38 | 39 | 40 | 41 | 43 | 44 | 46 | 47 | 49 | 50 | 52 | 53 | 55 | 56 | 58 | 59 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /FlexTransform/resources/schemas/CFMDownloadRequest.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | A CFM request is an encapsulated prompt for a client 10 | for information from the repository. 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /FlexTransform/resources/schemas/CFMEnvelope.xsd: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | The CFM envelope comprises both the header information and any embedded 10 | message. 11 | 12 | 13 | 14 | The CFMEnvelopeType is comprised of the following elements: 15 | * SubmittingSite - the site idenifier of the submitter 16 | * CFMSchemaVersion - A number indicating the version of the schema used for the envelope 17 | * Authentication Credential - The GPG-signed UUID of the submitting site 18 | * Message - An embedded message - may be another XML document, plain text, or binary data 19 | 20 | 21 | 23 | 24 | 25 | The version identifier for the envelope 26 | schema version. The current value is "2.0" 27 | 28 | 29 | 30 | 32 | 33 | 34 | The sending site is the site shortname as 35 | provided to CFM. E.g., for Argonne, this 36 | would be ANL; for Ames Laboratory, it would 37 | be AMES. Case does not matter. 38 | 39 | 40 | 41 | 43 | 44 | 45 | The timestamp, set by the sender, indicating 46 | when the message was sent. (unix epoch time seconds) 47 | 48 | 49 | 50 | 52 | 53 | 54 | 56 | 57 | 58 | The type of embedded message. Predefined 59 | types are Alert, Report, and Other. * Alert 60 | corresponds to a notification of observed 61 | malicious activity. * Report indicates 62 | informational content such as those provided 63 | by CPP for use with CASA. * Other indicates 64 | that the message is neither an Alert nor a 65 | Report, and normal processing of these types 66 | should not be attempted on this message. 67 | 68 | 69 | 70 | 72 | 73 | 75 | 76 | 77 | An indication of the sensitivity of the 78 | enclosed message. The URI should be a 79 | reference to a sensitivity definition, e.g. 80 | 'http://www.anl.gov/cfm/2.0#OUO' 81 | 82 | An optional element, if it is absent it is 83 | assumed to be non-sensitive. 84 | 85 | OUO is DOE's Official Use Only designation, 86 | indicating that data should be protected at 87 | a Moderate level, and only shared within 88 | DOE. 89 | 90 | 91 | 92 | 94 | 95 | 97 | 98 | 100 | 101 | 103 | 104 | 105 | The message is the content indended for the 106 | recipients. It may be arbitrary, however, 107 | CFM does define some message types/formats 108 | in the schema (e.g. OpenIOC alert). 109 | 110 | 111 | 112 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /FlexTransform/resources/schemas/CFMMessage13.xsd: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 25 | 26 | 28 | 29 | 30 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 73 | 74 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /FlexTransform/resources/stix.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/stix.zip -------------------------------------------------------------------------------- /FlexTransform/test/LQMTTests.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import unittest 4 | from lxml import etree 5 | 6 | from FlexTransform.test.SampleInputs import CFM13ALERT2 7 | from FlexTransform import FlexTransform 8 | 9 | class TestCFM13AlertToLQMT(unittest.TestCase): 10 | output1 = None 11 | namespace = { 12 | 'cybox' : "http://cybox.mitre.org/cybox-2", 13 | 'indicator' : "http://stix.mitre.org/Indicator-2", 14 | 'marking' : "http://data-marking.mitre.org/Marking-1", 15 | 'PortObj' : "http://cybox.mitre.org/objects#PortObject-2", 16 | 'stix' : "http://stix.mitre.org/stix-1", 17 | 'stixCommon' : "http://stix.mitre.org/common-1", 18 | 'stixVocabs' : "http://stix.mitre.org/default_vocabularies-1", 19 | 'xsi' : "http://www.w3.org/2001/XMLSchema-instance", 20 | 'cyboxVocabs' : "http://cybox.mitre.org/default_vocabularies-2", 21 | 'AddressObj' : "http://cybox.mitre.org/objects#AddressObject-2", 22 | 'ArtifactObj' : "http://cybox.mitre.org/objects#ArtifactObject-2", 23 | 'FileObj' : "http://cybox.mitre.org/objects#FileObject-2", 24 | 'URIObj' : "http://cybox.mitre.org/objects#URIObject-2", 25 | 'tlpMarking' : "http://data-marking.mitre.org/extensions/MarkingStructure#TLP-1", 26 | 'CFM' : "http://www.anl.gov/cfm/stix", 27 | 'xmlns' : "http://www.anl.gov/cfm/1.3/IDMEF-Message" 28 | } 29 | 30 | @classmethod 31 | def setUpClass(cls): 32 | current_dir = os.path.dirname(__file__) 33 | transform = FlexTransform.FlexTransform() 34 | 35 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/cfm13.cfg'), 'r') as input_file: 36 | transform.add_parser('cfm13alert', input_file) 37 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/lqmtools.cfg'), 'r') as input_file: 38 | transform.add_parser('lqmtools', input_file) 39 | output1_object = io.StringIO() 40 | 41 | transform.transform(io.StringIO(CFM13ALERT2), 'cfm13alert', 'lqmtools', target_file=output1_object) 42 | output1_object.seek(0) 43 | output1_object.readline() 44 | print(output1_object.getvalue()) 45 | 46 | 47 | def test_alert_analyzerid(self): 48 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/@analyzerid", namespaces=self.namespace)[0], "Fake") 49 | 50 | def test_alert_analyzer_node_location(self): 51 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/xmlns:Node/xmlns:location/text()", namespaces=self.namespace)[0], "1600 Pennslyvania Ave, Washington DC 20005") 52 | 53 | def test_alert_analyzer_node_name(self): 54 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/xmlns:Node/xmlns:name/text()", namespaces=self.namespace)[0], "Nicholas Hendersen, 555-867-5309, nietzsche@doe.gov") 55 | 56 | def test_alert_analyzer_time(self): 57 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AnalyzerTime/text()", namespaces=self.namespace)[0], "2016-03-23T16:45:05+0000") 58 | 59 | def test_alert_AD_number_alerts(self): 60 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AdditionalData[@meaning='number of alerts in this report']/text()", namespaces=self.namespace)[0], "7") 61 | 62 | def test_alert_AD_report_schedule(self): 63 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AdditionalData[@meaning='report schedule']/text()", namespaces=self.namespace)[0], "NoValue") 64 | 65 | def test_alert_AD_report_type(self): 66 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AdditionalData[@meaning='report type']/text()", namespaces=self.namespace)[0], "alerts") 67 | 68 | def test_alert_AD_start_time(self): 69 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AdditionalData[@meaning='report start time']/text()", namespaces=self.namespace)[0], "2016-03-23T16:45:05+0000") 70 | 71 | def test_source_node_address_ipv4(self): 72 | self.assertEqual(set(self.output1.xpath("//xmlns:Address[@category='ipv4-addr']/xmlns:address/text()", namespaces=self.namespace)), set(["10.10.10.10", "11.11.11.11", "12.12.12.12", "13.13.13.13", "14.14.14.14"])) 73 | 74 | def test_source_node_address_url(self): 75 | self.assertEqual(set(self.output1.xpath("//xmlns:Address[not(@category='ipv4-addr')]/xmlns:address/text()", namespaces=self.namespace)), set(["fake.site.com/malicious.js", "bad.domain.be/poor/path"])) 76 | 77 | def test_alert_AD_OUO(self): 78 | self.assertEqual(set(self.output1.xpath("//xmlns:AdditionalData[@meaning='OUO']/text()", namespaces=self.namespace)), set(['0'])) 79 | 80 | def test_alert_AD_restriction(self): 81 | self.assertEqual(set(self.output1.xpath("//xmlns:AdditionalData[@meaning='restriction']/text()", namespaces=self.namespace)),set(['public'])) 82 | 83 | def test_alert_AD_duration(self): 84 | self.assertEqual(set(self.output1.xpath("//xmlns:AdditionalData[@meaning='duration']/text()", namespaces=self.namespace)), set(['0'])) 85 | 86 | def test_alert_AD_recon(self): 87 | self.assertEqual(set(self.output1.xpath("//xmlns:AdditionalData[@meaning='recon']/text()", namespaces=self.namespace)), set(['0'])) 88 | 89 | def test_alert_assessment_action(self): 90 | self.assertEqual(set(self.output1.xpath("//xmlns:Action/@category", namespaces=self.namespace)), set(["block-installed"])) 91 | 92 | def test_alert_classification_reference_name(self): 93 | self.assertEqual(set(self.output1.xpath("//xmlns:Reference/xmlns:name/text()", namespaces=self.namespace)), set(["unknown"])) 94 | 95 | def test_alert_classification_reference_url_false(self): 96 | self.assertEqual(set(self.output1.xpath("//xmlns:url/text()", namespaces=self.namespace)), set([" "])) 97 | 98 | if __name__ == '__main__': 99 | unittest.main() -------------------------------------------------------------------------------- /FlexTransform/test/Readme.md: -------------------------------------------------------------------------------- 1 | - [X] to CFM13Alert 2 | - [X] from STIX 3 | - [X] TLP 4 | - [X] ACS 5 | - [X] from Key/Value Pairs *IP 6 | 7 | - [X] STIX 8 | - [X] to TLP 9 | - [X] from CFM13Alert 10 | - [X] from ACS 11 | - [X] from ACS30 12 | - [X] from Key/Value Pairs *IP 13 | - [X] to ACS 14 | - [X] from CFM13Alert 15 | - [X] from TLP 16 | - [X] from ACS30 17 | - [X] from Key/Value Pairs *IP 18 | - [X] to ACS30 19 | - [X] from CFM13Alert 20 | - [X] from TLP 21 | - [X] from ACS 22 | - [X] from Key/Value Pairs *IP 23 | 24 | - [X] to Key/Value Pairs 25 | - [X] from STIX 26 | - [X] from TLP 27 | - [X] from ACS 28 | - [X] from ACS30 29 | - [X] from CFM13Alert 30 | - [X] from CFM20Alert 31 | 32 | - [X] to LQMT 33 | - [X] from CFM13Alert 34 | - [X] from STIX 35 | - [X] from TLP 36 | - [X] from ACS 37 | - [X] from ACS30 38 | - [X] from Key/Value Pairs *IP 39 | -------------------------------------------------------------------------------- /FlexTransform/test/TestData/cfm13_multiple_site.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE, DICT 3 | FileParser = XML 4 | 5 | # XML Options 6 | [XML] 7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema 8 | ValidateSchema = no 9 | # SchemaFile must be set if ValidateSchema is yes 10 | # SchemaFile = resources/schemas/CFMMessage13.xsd 11 | # CustomParser is used to load custom XML parsing classes for complex XML documents 12 | CustomParser = CFM13 13 | 14 | # JSON Options (none currently defined) 15 | [JSON] 16 | 17 | # CSV Options 18 | [CSV] 19 | # HeaderLine defines if the CSV file has the field names in the first non-commented row of the file 20 | # HeaderLine defaults to no 21 | HeaderLine = no 22 | # QuotesOptional defines if every field in the CSV file has to be enclosed in quotes. 23 | # QuotesOptional defaults to yes 24 | QuotesOptional = no 25 | # SeparatorChar defines the charactor or charactors that seperate the fields in the file. 26 | # SeparatorChar defaults to , 27 | SeparatorChar = , 28 | # StripSpaces defines if spaces before or after the separator should be striped 29 | # StripSpaces defaults to yes 30 | StripSpaces = yes 31 | # FieldNames have to be defined if HeaderLine is set to no. The FieldNames map to the SCHEMA definition below 32 | FieldNames = ip,host,etc 33 | 34 | # TSV Options 35 | [TSV] 36 | HeaderLine = no 37 | FieldNames = ip,host,etc 38 | 39 | # REGEX Options 40 | [REGEX] 41 | Regex = (\S*)\s+(\S*)\s+\d+\s+([0-9.]+) 42 | FieldNames = host,service,ip 43 | 44 | # KEYVALUE Options 45 | [KEYVALUE] 46 | SeparatorChar = \s 47 | QuoteChar = ['] 48 | KVSeparator = [=] 49 | 50 | # SCHEMA Definition 51 | 52 | [SCHEMA] 53 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 54 | PrimarySchemaConfiguration = resources/schemaDefinitions/cfm13.json 55 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition 56 | # If multiple files are required, separate them with ; 57 | SiteSchemaConfiguration = resources/schemaDefinitions/cfm13-site.json;test/TestData/mult_site_config.json 58 | 59 | # Metadata can be included with certain files. Set MetadataSchemaConfiguration to the json file that defines the metadata schema 60 | MetadataSchemaConfiguration = resources/schemaDefinitions/cfm-metadata.json 61 | -------------------------------------------------------------------------------- /FlexTransform/test/TestData/csv-example-2.json: -------------------------------------------------------------------------------- 1 | { 2 | "IndicatorData": { 3 | "types": { 4 | "IPv4-Address-Block": [ {"indicatorType": "IPv4 Address"} ], 5 | "IPv6-Address-Block": [ {"indicatorType": "IPv6 Address"} ], 6 | "DNS-Hostname-Block": [ {"indicatorType": "DNSHostName"}], 7 | "URL-Block": [ {"indicatorType": "URL"}, {"indicatorType": "URI"} ], 8 | "Malicious-File-Hash": [ {"indicatorType": "FileMD5Hash"}, {"indicatorType": "FileSHA1Hash"}] 9 | }, 10 | "fields": { 11 | "indicator": { 12 | "description": "The value to be acted upon (e.g. ip, domain name, URL)", 13 | "datatype": "string", 14 | "required": true, 15 | "ontologyEnumField": "", 16 | "ontologyMappingType": "simple", 17 | "ontologyMapping": "" 18 | }, 19 | "indicatorType": { 20 | "description": "A type name that informs how to interpret the indicator (e.g. ipv4, emailAddress) (enum)", 21 | "datatype": "enum", 22 | "dependsOn": "indicator", 23 | "mapOntologyToElement": "indicator", 24 | "required": false, 25 | "ontologyMappingType": "enum", 26 | "enumValues": { 27 | "IPv6 Address": { 28 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv6AddressIndicatorValueSemanticComponent" 29 | }, 30 | "IPv4 Address": { 31 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv4AddressIndicatorValueSemanticComponent" 32 | }, 33 | "URL": { 34 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent" 35 | }, 36 | "URI": { 37 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent" 38 | }, 39 | "DNSDomainName": { 40 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent" 41 | }, 42 | "DNSHostName": { 43 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent" 44 | }, 45 | "FileMD5Hash": { 46 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#MD5FilehashIndicatorValueSemanticComponent" 47 | }, 48 | "FileSHA1Hash": { 49 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#SHA1FilehashIndicatorValueSemanticComponent" 50 | } 51 | } 52 | }, 53 | "reason": { 54 | "description": "Description associated with indicator", 55 | "datatype": "string", 56 | "required": false, 57 | "defaultValue": "noValue", 58 | "ontologyMappingType": "simple", 59 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IndicatorDescriptionSemanticConcept" 60 | }, 61 | "detectedTime": { 62 | "description": "Time the report was generated", 63 | "datatype": "datetime", 64 | "dateTimeFormat": " YYYY-MM-DD HH:mm:ss", 65 | "required": true, 66 | "ontologyMappingType": "simple", 67 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#TimeAlertProcessedOnClientSemanticConcept" 68 | } 69 | } 70 | } 71 | } -------------------------------------------------------------------------------- /FlexTransform/test/TestData/csv_example_2.cfg: -------------------------------------------------------------------------------- 1 | [SYNTAX] 2 | FileParser = CSV 3 | 4 | # CSV Options 5 | [CSV] 6 | Fields = indicator,indicatorType,reason,detectedTime 7 | Delimiter = "," 8 | QuoteChar = " 9 | EscapeChar = \\ 10 | HeaderLine = false 11 | DoubleQuote = false 12 | QuoteStyle = Minimal 13 | 14 | # SCHEMA Definition 15 | [SCHEMA] 16 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform 17 | PrimarySchemaConfiguration = test/testData/csv-example-2.json -------------------------------------------------------------------------------- /FlexTransform/test/TestData/mult_site_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "DocumentHeaderData": { 3 | "fields": { 4 | "analyzerid": { 5 | "defaultValue": "LeoAtriedes" 6 | }, 7 | "location": { 8 | "defaultValue": "Sand Worm Dave, Arrakeen, Dune 54321" 9 | }, 10 | "contact_phone": { 11 | "defaultValue": "555-867-5309", 12 | "required": true 13 | }, 14 | "report_schedule": { 15 | "defaultValue": "5 minutes" 16 | } 17 | } 18 | }, 19 | "IndicatorData": { 20 | "fields": { 21 | "reference_origin": { 22 | "defaultValue": "user-specific" 23 | }, 24 | "action_duration": { 25 | "defaultValue": "86400" 26 | } 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /FlexTransform/test/ToKeyValue_test.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import unittest 4 | import arrow 5 | 6 | from FlexTransform import FlexTransform 7 | from FlexTransform.test.SampleInputs import STIXTLP, STIXACS, CFM13ALERT 8 | 9 | 10 | class TestCFM13AlertToKeyValue(unittest.TestCase): 11 | output1 = None 12 | 13 | @classmethod 14 | def setUpClass(cls): 15 | current_dir = os.path.dirname(__file__) 16 | transform = FlexTransform.FlexTransform() 17 | 18 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/cfm13.cfg'), 'r') as input_file: 19 | transform.add_parser('cfm13alert', input_file) 20 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/keyvalue.cfg'), 'r') as input_file: 21 | transform.add_parser('keyvalue', input_file) 22 | output1_object = io.StringIO() 23 | 24 | transform.transform(io.StringIO(CFM13ALERT), 'cfm13alert', 'keyvalue', target_file=output1_object) 25 | 26 | cls.output1 = [] 27 | output1_object.seek(0) 28 | for line in output1_object.read().splitlines(): 29 | cls.output1.append(line.split('&')) 30 | 31 | def test_duration(self): 32 | self.assertIn('duration=86400', self.output1[0]) 33 | 34 | def test_serviceport(self): 35 | self.assertIn('service_port=22', self.output1[0]) 36 | 37 | def test_category_name(self): 38 | self.assertIn("category_name='SSH Attack'", self.output1[0]) 39 | 40 | def test_category(self): 41 | self.assertIn("category='Scanning'", self.output1[0]) 42 | 43 | def test_severity(self): 44 | self.assertIn("severity='unknown'", self.output1[0]) 45 | 46 | def test_prior_offenses(self): 47 | self.assertIn('prior_offenses=11', self.output1[0]) 48 | 49 | def test_category_description(self): 50 | self.assertIn("category_description='SSH Attack'", self.output1[0]) 51 | 52 | def test_serviceprotocol(self): 53 | self.assertIn("service_protocol='TCP'", self.output1[0]) 54 | 55 | def test_comment(self): 56 | self.assertIn("comment='No Comment'", self.output1[0]) 57 | 58 | def test_confidence(self): 59 | self.assertIn('confidence=0', self.output1[0]) 60 | 61 | def test_direction(self): 62 | self.assertIn("direction='unknown'", self.output1[0]) 63 | 64 | def test_ipv4(self): 65 | self.assertIn('ipv4=10.10.10.10', self.output1[0]) 66 | 67 | def test_combined_comment(self): 68 | self.assertIn( 69 | "combined_comment='SSH scans against multiple hosts, direction:ingress, confidence:87, severity:high'", 70 | self.output1[0]) 71 | 72 | 73 | class TestSTIXTLPToKeyValue(unittest.TestCase): 74 | output1 = None 75 | 76 | @classmethod 77 | def setUpClass(cls): 78 | current_dir = os.path.dirname(__file__) 79 | transform = FlexTransform.FlexTransform() 80 | 81 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/stix_tlp.cfg'), 'r') as input_file: 82 | transform.add_parser('stix_tlp', input_file) 83 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/keyvalue.cfg'), 'r') as input_file: 84 | transform.add_parser('keyvalue', input_file) 85 | output1_object = io.StringIO() 86 | 87 | transform.transform(io.StringIO(STIXTLP), 'stix_tlp', 'keyvalue', target_file=output1_object) 88 | 89 | cls.output1 = [] 90 | output1_object.seek(0) 91 | for line in output1_object.read().splitlines(): 92 | cls.output1 += line.split('&') 93 | 94 | def test_category(self): 95 | self.assertIs(5, self.output1.count("category='Unspecified'")) 96 | 97 | def test_category_name(self): 98 | self.assertIs(5, self.output1.count("category_name='Unspecified'")) 99 | 100 | def test_severity(self): 101 | self.assertIs(5, self.output1.count("severity='unknown'")) 102 | 103 | def test_comment(self): 104 | self.assertIs(5, self.output1.count("comment='No Comment'")) 105 | 106 | def test_confidence(self): 107 | self.assertIs(5, self.output1.count('confidence=0')) 108 | 109 | def test_direction(self): 110 | self.assertIs(5, self.output1.count("direction='unknown'")) 111 | 112 | def test_ipv4(self): 113 | self.assertIn('ipv4=10.10.10.10', self.output1) 114 | self.assertIn('ipv4=11.11.11.11', self.output1) 115 | self.assertIn('ipv4=12.12.12.12', self.output1) 116 | self.assertIn('ipv4=13.13.13.13', self.output1) 117 | self.assertIn('ipv4=14.14.14.14', self.output1) 118 | 119 | def test_combined_comment(self): 120 | self.assertIs(5, self.output1.count("combined_comment='Energy Sector Indicator'")) 121 | 122 | 123 | class TestSTIXACSToKeyValue(unittest.TestCase): 124 | output1 = None 125 | 126 | @classmethod 127 | def setUpClass(cls): 128 | current_dir = os.path.dirname(__file__) 129 | transform = FlexTransform.FlexTransform() 130 | 131 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/stix_essa.cfg'), 'r') as input_file: 132 | transform.add_parser('stix_acs2', input_file) 133 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/keyvalue.cfg'), 'r') as input_file: 134 | transform.add_parser('keyvalue', input_file) 135 | output1_object = io.StringIO() 136 | 137 | transform.transform(io.StringIO(STIXACS), 'stix_acs2', 'keyvalue', target_file=output1_object) 138 | 139 | cls.output1 = [] 140 | output1_object.seek(0) 141 | for line in output1_object.read().splitlines(): 142 | cls.output1 += line.split('&') 143 | 144 | def test_category(self): 145 | self.assertIs(3, self.output1.count("category='Unspecified'")) 146 | 147 | def test_category_name(self): 148 | self.assertIs(3, self.output1.count("category_name='Unspecified'")) 149 | 150 | def test_severity(self): 151 | self.assertIs(3, self.output1.count("severity='unknown'")) 152 | 153 | def test_comment(self): 154 | self.assertIs(3, self.output1.count("comment='No Comment'")) 155 | 156 | def test_confidence(self): 157 | self.assertIs(3, self.output1.count('confidence=0')) 158 | 159 | def test_direction(self): 160 | self.assertIs(3, self.output1.count("direction='unknown'")) 161 | 162 | def test_fqdn(self): 163 | self.assertIn("fqdn='blog.website.net'", self.output1) 164 | self.assertIn("fqdn='fake.com'", self.output1) 165 | self.assertIn("fqdn='goo.gl/peter'", self.output1) 166 | 167 | def test_combined_comment(self): 168 | self.assertIn("combined_comment='AAA Report Indicator'", self.output1) 169 | self.assertIn("combined_comment='Domain Indicator'", self.output1) 170 | self.assertIn("combined_comment='Just Another Indicator'", self.output1) 171 | 172 | 173 | class TestSTIXACS30ToKeyValue(unittest.TestCase): 174 | output1 = None 175 | 176 | @classmethod 177 | def setUpClass(cls): 178 | current_dir = os.path.dirname(__file__) 179 | transform = FlexTransform.FlexTransform() 180 | 181 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/stix_acs30.cfg'), 'r') as input_file: 182 | transform.add_parser('stix_acs30', input_file) 183 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/keyvalue.cfg'), 'r') as input_file: 184 | transform.add_parser('keyvalue', input_file) 185 | output1_object = io.StringIO() 186 | 187 | transform.transform(io.StringIO(STIXACS), 'stix_acs30', 'keyvalue', target_file=output1_object) 188 | 189 | cls.output1 = [] 190 | output1_object.seek(0) 191 | for line in output1_object.read().splitlines(): 192 | cls.output1 += line.split('&') 193 | 194 | def test_category(self): 195 | self.assertIs(3, self.output1.count("category='Unspecified'")) 196 | 197 | def test_category_name(self): 198 | self.assertIs(3, self.output1.count("category_name='Unspecified'")) 199 | 200 | def test_severity(self): 201 | self.assertIs(3, self.output1.count("severity='unknown'")) 202 | 203 | def test_comment(self): 204 | self.assertIs(3, self.output1.count("comment='No Comment'")) 205 | 206 | def test_confidence(self): 207 | self.assertIs(3, self.output1.count('confidence=0')) 208 | 209 | def test_direction(self): 210 | self.assertIs(3, self.output1.count("direction='unknown'")) 211 | 212 | def test_fqdn(self): 213 | self.assertIn("fqdn='blog.website.net'", self.output1) 214 | self.assertIn("fqdn='fake.com'", self.output1) 215 | self.assertIn("fqdn='goo.gl/peter'", self.output1) 216 | 217 | def test_combined_comment(self): 218 | self.assertIn("combined_comment='AAA Report Indicator'", self.output1) 219 | self.assertIn("combined_comment='Domain Indicator'", self.output1) 220 | self.assertIn("combined_comment='Just Another Indicator'", self.output1) 221 | 222 | 223 | if __name__ == '__main__': 224 | unittest.main() 225 | -------------------------------------------------------------------------------- /FlexTransform/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/test/__init__.py -------------------------------------------------------------------------------- /FlexTransform/test/regression_test.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import unittest 4 | from lxml import etree 5 | 6 | from FlexTransform.test.SampleInputs import STIXTLP 7 | from FlexTransform import FlexTransform 8 | 9 | class regression_tests(unittest.TestCase): 10 | output1 = None 11 | namespace = { 12 | 'cybox' : "http://cybox.mitre.org/cybox-2", 13 | 'indicator' : "http://stix.mitre.org/Indicator-2", 14 | 'marking' : "http://data-marking.mitre.org/Marking-1", 15 | 'PortObj' : "http://cybox.mitre.org/objects#PortObject-2", 16 | 'stix' : "http://stix.mitre.org/stix-1", 17 | 'stixCommon' : "http://stix.mitre.org/common-1", 18 | 'stixVocabs' : "http://stix.mitre.org/default_vocabularies-1", 19 | 'xsi' : "http://www.w3.org/2001/XMLSchema-instance", 20 | 'cyboxVocabs' : "http://cybox.mitre.org/default_vocabularies-2", 21 | 'AddressObj' : "http://cybox.mitre.org/objects#AddressObject-2", 22 | 'ArtifactObj' : "http://cybox.mitre.org/objects#ArtifactObject-2", 23 | 'FileObj' : "http://cybox.mitre.org/objects#FileObject-2", 24 | 'URIObj' : "http://cybox.mitre.org/objects#URIObject-2", 25 | 'tlpMarking' : "http://data-marking.mitre.org/extensions/MarkingStructure#TLP-1", 26 | 'CFM' : "http://www.anl.gov/cfm/stix", 27 | 'xmlns' : "http://www.anl.gov/cfm/1.3/IDMEF-Message" 28 | } 29 | 30 | @classmethod 31 | def setUpClass(cls): 32 | current_dir = os.path.dirname(__file__) 33 | transform = FlexTransform.FlexTransform() 34 | 35 | with open(os.path.join(current_dir, './TestData/cfm13_multiple_site.cfg'), 'r') as input_file: 36 | transform.add_parser('cfm13alert', input_file) 37 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/stix_tlp.cfg'), 'r') as input_file: 38 | transform.add_parser('stix', input_file) 39 | output1_object = io.StringIO() 40 | 41 | transform.transform(io.StringIO(STIXTLP), 'stix', 'cfm13alert', target_file=output1_object) 42 | output1_object.seek(0) 43 | output1_object.readline() 44 | cls.output1 = etree.parse(output1_object) 45 | print(output1_object.getvalue()) 46 | 47 | def test_alert_analyzerid(self): 48 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/@analyzerid", namespaces=self.namespace)[0], "Fake") 49 | 50 | def test_alert_analyzer_node_location(self): 51 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/xmlns:Node/xmlns:location/text()", namespaces=self.namespace)[0], "Sand Worm Dave, Arrakeen, Dune 54321") 52 | 53 | def test_alert_analyzer_node_name(self): 54 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/xmlns:Node/xmlns:name/text()", namespaces=self.namespace)[0], "Test User, 555-867-5309, test@test.int") 55 | 56 | def test_alert_AD_report_schedule(self): 57 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AdditionalData[@meaning='report schedule']/text()", namespaces=self.namespace)[0], "5 minutes") 58 | 59 | def test_alert_AD_duration(self): 60 | self.assertEqual(set(self.output1.xpath("//xmlns:AdditionalData[@meaning='duration']/text()", namespaces=self.namespace)), set(['86400'])) 61 | 62 | def test_reference_origin(self): 63 | self.assertEqual(set(self.output1.xpath("//xmlns:Reference/@origin", namespaces=self.namespace)), set(['user-specific'])) -------------------------------------------------------------------------------- /FlexTransform/test/utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Nov 9, 2015 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | ''' 8 | Test module utilities 9 | ''' 10 | 11 | import json 12 | import arrow 13 | import csv 14 | 15 | def deep_sort(obj): 16 | """ 17 | Recursively sort list or dict nested lists 18 | Based on code from http://stackoverflow.com/questions/18464095/how-to-achieve-assertdictequal-with-assertsequenceequal-applied-to-values 19 | """ 20 | 21 | if isinstance(obj, dict): 22 | _sorted = {} 23 | for key in sorted(obj): 24 | _sorted[key] = deep_sort(obj[key]) 25 | 26 | elif isinstance(obj, list): 27 | new_list = [] 28 | isdict = False 29 | for val in obj: 30 | if (not isdict and isinstance(val, dict)) : 31 | isdict = True 32 | 33 | new_list.append(deep_sort(val)) 34 | 35 | if (isdict) : 36 | # Sort lists of dictionaries by the hash value of the data in the dictionary 37 | _sorted = sorted(new_list, key=lambda d: hash(json.dumps(d, ensure_ascii = True, sort_keys = True))) 38 | else : 39 | _sorted = sorted(new_list) 40 | 41 | else: 42 | _sorted = obj 43 | 44 | return _sorted 45 | 46 | #Used for test cases where the time is based on the current time 47 | #so that test cases dont fail everytime due to there being a constant change 48 | #in the value between the current time and the values stored in the data. 49 | def dynamic_time_change(data): 50 | index = 0 51 | newData = """""" 52 | reader = csv.reader(data.split(), delimiter=',', quotechar='"') 53 | for row in reader: 54 | for x in range(len(row)): 55 | row[x] = '\"' + row[x] + '\"' 56 | if (index < 7): 57 | newData += ','.join(row) + '\n' 58 | elif (index == 7): 59 | row[1] = arrow.utcnow().replace(hours=1).format('YYYYMMDDTHHmmss') + 'Z' 60 | newData += ','.join(row) + '\n' 61 | else: 62 | row[1] = arrow.utcnow().replace(days=4).format('YYYYMMDDTHHmmss') + 'Z' 63 | newData += ','.join(row) + '\n' 64 | index += 1 65 | 66 | return newData 67 | -------------------------------------------------------------------------------- /ISAMarkingExtension/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/ISAMarkingExtension/__init__.py -------------------------------------------------------------------------------- /ISAMarkingExtension/bindings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/ISAMarkingExtension/bindings/__init__.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2015, UChicago Argonne, LLC 2 | All Rights Reserved 3 | 4 | FLEXIBLE TRANSFORM (ANL-SF-15-020) Christopher Strasburg: Argonne National Laboratory 5 | OPEN SOURCE LICENSE 6 | 7 | Under the terms of Contract No. DE-AC02-06CH11357 with UChicago Argonne, LLC, the U.S. Government retains certain rights in this software. 8 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 11 | 3. Neither the names of UChicago Argonne, LLC or the Department of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 12 | 13 | ********************************************************************************* 14 | DISCLAIMER 15 | THE SOFTWARE IS SUPPLIED “AS IS” WITHOUT WARRANTY OF ANY KIND. 16 | NEITHER THE UNTED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. 17 | ********************************************************************************* 18 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include FlexTransform/resources/*.xml 2 | include FlexTransform/resources/*.owl 3 | include FlexTransform/resources/*.zip 4 | include FlexTransform/resources/*.rdf 5 | include FlexTransform/resources/sampleConfigurations/* 6 | include FlexTransform/resources/schemaDefinitions/* 7 | include FlexTransform/resources/schemas/* 8 | include FlexTransform/test/TestData/* -------------------------------------------------------------------------------- /Utils/LQMTtestCFM.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Nov 18, 2014 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | from FlexTransform import FlexTransform 8 | import os 9 | import json 10 | import logging 11 | ''' 12 | # To enable profiling, remove comments below 13 | import cProfile, pstats, io 14 | ''' 15 | 16 | if __name__ == '__main__': 17 | 18 | ''' 19 | # Profiling 20 | pr = cProfile.Profile() 21 | ''' 22 | 23 | currentdir = os.path.dirname(__file__) 24 | logging.basicConfig(format='%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s', level=logging.DEBUG) 25 | 26 | TestDir = os.path.join(currentdir, 'resources/sampleMessages/cfm13Uploads/WithMetadata') 27 | 28 | Transform = FlexTransform.FlexTransform() 29 | Cfm13AlertConfig = open(os.path.join(currentdir,'resources/sampleConfigurations/cfm13.cfg'), 'r') 30 | Transform.add_parser('Cfm13Alert', Cfm13AlertConfig) 31 | 32 | LQMToolsConfig = open(os.path.join(currentdir,'resources/sampleConfigurations/lqmtools.cfg'), 'r') 33 | Transform.add_parser('LQMTools', LQMToolsConfig) 34 | 35 | TransformedData = [] 36 | 37 | for file in os.listdir(TestDir): 38 | if file.startswith('.'): 39 | f = open(os.path.join(TestDir, file), 'r') 40 | metadata = json.load(f) 41 | f.close() 42 | 43 | sourceFile = os.path.join(TestDir, metadata['FileName']) 44 | logging.info(sourceFile) 45 | 46 | ''' 47 | # Profiling 48 | pr.enable() 49 | ''' 50 | 51 | try: 52 | Data = Transform.transform(source_file=sourceFile, source_parser_name=metadata['PayloadFormat'], target_parser_name='LQMTools', source_meta_data=metadata) 53 | except Exception as inst : 54 | logging.exception(inst) 55 | else: 56 | if Data: 57 | TransformedData.extend(Data) 58 | 59 | ''' 60 | # Profiling 61 | pr.disable() 62 | ''' 63 | 64 | ''' 65 | # Profiling 66 | s = io.StringIO() 67 | sortby = 'cumulative' 68 | ps = pstats.Stats(pr, stream=s).sort_stats(sortby) 69 | ps.print_stats() 70 | print(s.getvalue()) 71 | ''' 72 | 73 | out = open(os.path.join(currentdir,'resources/testing/lqmtools-test.json'), 'w') 74 | json.dump(TransformedData, out, sort_keys=True, indent=4) 75 | -------------------------------------------------------------------------------- /Utils/LQMTtestSTIX.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Nov 18, 2014 3 | 4 | @author: ahoying 5 | ''' 6 | 7 | from FlexTransform import FlexTransform 8 | import os 9 | import json 10 | import logging 11 | 12 | if __name__ == '__main__': 13 | 14 | currentdir = os.path.dirname(__file__) 15 | logging.basicConfig(format='%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s', level=logging.DEBUG) 16 | 17 | TestDir = os.path.join(currentdir, 'resources/sampleMessages/stix') 18 | 19 | Transform = FlexTransform() 20 | StixConfig = open(os.path.join(currentdir,'resources/sampleConfigurations/stix_ciscp.cfg'), 'r') 21 | Transform.add_parser('STIX', StixConfig) 22 | 23 | LQMToolsConfig = open(os.path.join(currentdir,'resources/sampleConfigurations/lqmtools.cfg'), 'r') 24 | Transform.add_parser('LQMTools', LQMToolsConfig) 25 | 26 | TransformedData = [] 27 | 28 | for file in os.listdir(TestDir) : 29 | if (file.startswith('CISCP_INDICATOR.')) : 30 | sourceFile = os.path.join(TestDir, file) 31 | 32 | logging.info(sourceFile) 33 | 34 | try : 35 | Data = Transform.transform(source_file=sourceFile, source_parser_name='STIX', target_parser_name='LQMTools') 36 | except Exception as inst : 37 | logging.exception(inst) 38 | else : 39 | if (Data) : 40 | TransformedData.extend(Data) 41 | 42 | out = open(os.path.join(currentdir,'resources/testing/lqmtools-stix-test.json'), 'w') 43 | json.dump(TransformedData, out, sort_keys=True, indent=4) -------------------------------------------------------------------------------- /Utils/subjectCommentParentQuery.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import sys 4 | 5 | import argparse 6 | import rdflib 7 | 8 | 9 | DEFAULT_QUERY = '''PREFIX rdf: 10 | PREFIX owl: 11 | PREFIX xsd: 12 | PREFIX rdfs: 13 | PREFIX : 14 | SELECT DISTINCT ?subject ?comment ?parent 15 | WHERE { ?parent rdfs:subClassOf* :SemanticComponent . 16 | ?subject rdfs:subClassOf ?parent . 17 | OPTIONAL { ?subject rdfs:comment ?comment . } } 18 | ORDER BY ?parent''' 19 | 20 | 21 | def buildAndParseGraph(rdfFile): 22 | '''Instantiate a graph, parse `rdfFile`, and return graph after 23 | parsing. 24 | 25 | :param rdfFile: rdf filename to parse 26 | :type rdfFile: str 27 | :returns: rdflib.Graph 28 | ''' 29 | g = rdflib.Graph() 30 | g.parse(rdfFile) 31 | return g 32 | 33 | 34 | def queryGraph(graph, query=DEFAULT_QUERY): 35 | '''Return the result of `query` on the rdflib.Graph object `graph`. 36 | 37 | Convenience helper function to use DEFAULT_QUERY if no query is 38 | provided. 39 | 40 | :param graph: graph to query 41 | :type graph: rdflib.Graph 42 | :param query: SPARQL query to run on `graph` 43 | :type query: str 44 | :returns: rdflib.query.Result 45 | ''' 46 | return graph.query(query) 47 | 48 | 49 | def writeSubjectAndCommentToCSV(queryRes, outFile=None): 50 | '''Write the `subject`, `comment` and `parent` fields of each row in 51 | `queryRes` to a CSV file `outFile`. 52 | 53 | If outfile is not supplied, results are written to stdout. If a row does 54 | not have a comment, 'None' is printed. 55 | 56 | :param queryRes: queryResults to print 57 | :type queryRes: rdflib.query.Result 58 | :param outFile: CSV output filename 59 | :type outFile: string 60 | ''' 61 | f = open(outFile) if outFile else sys.stdout 62 | # add a comment at the top of the file describing fields 63 | f.write('#subject,comment,parent\n') 64 | for row in queryRes: 65 | f.write('{0},{1},{2}\n'.format(row.subject, row.comment, row.parent)) 66 | if f is not sys.stdout: 67 | f.close() 68 | 69 | 70 | if __name__ == '__main__': 71 | 72 | parser = argparse.ArgumentParser() 73 | parser.add_argument('-i', '--input-file', action='store', required=True, 74 | help='RDF input filename') 75 | parser.add_argument('-o', '--output-file', action='store', required=False, 76 | help='CSV output filename. If absent, use stdout.') 77 | 78 | args = parser.parse_args() 79 | 80 | g = buildAndParseGraph(args.input_file) 81 | q = queryGraph(g) 82 | writeSubjectAndCommentToCSV(q, args.output_file) 83 | -------------------------------------------------------------------------------- /docs/contribute.md: -------------------------------------------------------------------------------- 1 | ##Contributing 2 | ###Github 3 | FlexTransform can be found on Github using the following link: [*https://github.com/anl-cyberscience/FlexTransform*](https://github.com/anl-cyberscience/FlexTransform). 4 | 5 | Users can fork the project to make their own changes and if any bugs or errors are found a pull request can be made and the 6 | issue will be addressed. 7 | 8 | ###Contact 9 | FlexTransform is a tool developed and supported by the CFM team at Argonne National Lab. Any questions about FlexT can be 10 | directed to the CFM team at cfmteam@anl.gov. For general information about the CFM project, visit our [*website*](https://cfm.gss.anl.gov/). 11 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Flexible Transform 2 | [![Build Status](https://travis-ci.org/anl-cyberscience/FlexTransform.svg?branch=master)](https://travis-ci.org/anl-cyberscience/FlexTransform) 3 | [![PyPI version](https://badge.fury.io/py/FlexTransform.svg)](https://badge.fury.io/py/FlexTransform) 4 | [![PyPI](https://img.shields.io/pypi/pyversions/FlexTransform.svg)](https://github.com/anl-cyberscience/FlexTransform) 5 | 6 | Flexible Transform (FlexT) enables dynamic translation between Cyber Threat Intelligence reports (CTI), accomplishing this by digesting CTI data down to its semantic roots (meaning and context). 7 | 8 | ###Overview 9 | ####The Problem 10 | Most cyber defense systems incorporate some form of cyber threat intelligence (CTI) collection and analysis. However, different 11 | systems and CTI sharing communities have implemented a variety of representations to transmit these data (e.g., STIX, OpenIOC, custom CSV). 12 | This diversity of formats presents a challenge when an organization using one format has the opportunity to join sharing 13 | communities where the members share data in different formats. Similarly, merging communities with different CTI formats 14 | can seem a nearly insurmountable challenge, and proceeds at the pace of the slowest member in each community to adopt 15 | a different format. 16 | 17 | Although simple translators can be written to convert data from one format to another, challenges to this approach include the following: 18 | 19 | An exponential increase in the effort required to support new formats. 20 | Potential loss of meaning and context (semantics) between formats. 21 | 22 | The obstacles posed by these challenges lead to the formation of “islands of sharing” defined not by the communities themselves 23 | but by the sharing formats. This pattern leaves smaller organizations, which tend to be unable to participate at all, isolated and defenseless. 24 | 25 | 26 | 27 | Drawing 28 | 29 | 30 | 31 | ####The Solution 32 | FlexT is a tool that enables dynamic translation between formats. FlexT accomplishes this translation by “digesting” CTI 33 | data down to its semantic roots (meaning and context). As Figure 1 shows, making this objective the core of the translation 34 | effort simplifies the process. This approach allows the use of new formats with improved scalability and ensures that the 35 | original meaning and context of CTI data are preserved. 36 | 37 | A “format” in FlexT is broken down into three components: 38 | 39 | `Syntax` – A specification of valid document characters and their composition (e.g., CSV, XML, JSON). 40 | `Schema` – A specification of the valid terms, the data they can convey, and restrictions on their use (e.g., STIX, OpenIOC, IODEF ). 41 | `Semantics` – A definition of the meaning of terms (e.g., SourceIPAddress is the session originating IPv4 address). 42 | 43 | Using FlexT, organizations are empowered to participate in sharing communities using any type of CTI, in any format. When 44 | coupled with a toolset such as Cyber Fed Model’s (CFM’s) Last Quarter Mile Toolset (LQMToolset), participants can not only 45 | share and process CTI, they can take automated action based on that intelligence with an array of security endpoint devices. 46 | 47 | ####Features 48 | Feature | Enabling users to 49 | :-----: | :-------: 50 | Multiple Interfaces | Content from cell 2 51 | Accurate translation | Content in the second column 52 | Easy extensibility | When supporting a new schema, simply define a mapping JSON file and immediately convert to/from any other supported format. 53 | Drawing -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | ##Install 3 | FlexTransform (FlexT) is run using Python3. This means that **Python3** is required to be installed to properly run FlexT. 4 | Once Python3 is installed FlexT can be installed via *pip3*, but it requires the python package *lxml* which itself has 5 | UNIX dependencies. The required dependencies for *lxml* are **libxml** and **libxslt** as well as their associated 6 | development packages. For Debian based systems the following command can be used 7 | 8 | ```bash 9 | $ sudo apt-get install libxml2-dev libxslt-dev python-dev 10 | ``` 11 | *pip* command: 12 | ```shell 13 | $ pip install FlexTransform 14 | ``` 15 | 16 | ##Getting Started 17 | When using FlexT from the command lines there are certian arguments that are required to be passed in to perform the 18 | conversiion. 19 | ```shell 20 | --src-config CONFIG 21 | ``` 22 | This argument is used to pass in the file that contains the parser configuration file for the source file. 23 | ```shell 24 | --src SRC 25 | ``` 26 | This argument is the source file that will be transformed. 27 | ```shell 28 | --dst-config CONFIG 29 | ``` 30 | This argument is used to pass in the file that contains the parser configuration file for the destination file. 31 | ```shell 32 | --dst DST 33 | ``` 34 | This argument is used to pass in the path to where the file will be stored. 35 | 36 | These arguments will be all you need to get started with FlexT when using one of the supported schemas. If an unsupported 37 | schema is going to be used, users can pass in arguments for either the source schema, destination schema, or both. 38 | 39 | ```shell 40 | --source-schema-IRI 41 | ``` 42 | Used to pass in the ontology IRI file for the source file. 43 | 44 | ```shell 45 | --destination-schema-IRI 46 | ``` 47 | Used to pass in the ontology IRI file for the destination file. 48 | 49 | ```shell 50 | flext --src /Path/to/file --src-config /Path/to/file --dst /Path/to/file --dst-config /Path/to/file 51 | ``` 52 | This is the most basic format for transforming a file that is currently supported. -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | ## Usage 2 | Currently, FlexT supports Command-Line access as well as functioning as a Python Library, while future development will add a RESTful API with a local web server. 3 | ### Python Library 4 | FlexT accepts File-like objects, so in addition to allowing for the ```open``` command, you can also use python objects like ```StringIO```. 5 | ```python 6 | from FlexTransform import FlexTransform 7 | flexT = FlexTransform.FlexTransform() 8 | 9 | with open("/Users/cfm/FlexT/FlexTransform/resources/sampleConfigurations/cfm13.cfg", "r") as input_cfg: 10 | flexT.AddParser("cfm13", input_cfg) 11 | with open("/Users/cfm/FlexT/FlexTransform/resources/sampleConfigurations/stix_tlp.cfg", "r") as output_cfg: 12 | flexT.AddParser("stix", output_cfg) 13 | 14 | with open("/Users/cfm/input.xml", "r") as input_file: 15 | with open("/Users/cfm/output.xml", "w") as output_file: 16 | flexT.TransformFile(input_file, "cfm13", "stix", targetFileName=output_file) 17 | ``` 18 | ### Command Line 19 | ```shell 20 | $ flext --src inputFile.txt --src-config srcConfig.cfg --dst outputFile.xml --dst-config dstConfig.cfg 21 | ``` 22 | + Required arguments 23 | + `src` - Source file 24 | + `src-config` - Source file parser configuration 25 | + `dst` - Destination file 26 | + `dst-config` - Destination file parser configuration 27 | + Optional arguments 28 | + `src-metadata` - Source metadata file 29 | + `tbox-uri` - The rui location of the tbox file 30 | + `source-schema-IRI` - Ontological IRI for the source 31 | + `destination-schema-IRI` - Ontological IRI for the destination 32 | 33 | -------------------------------------------------------------------------------- /flexT_dir_input.py: -------------------------------------------------------------------------------- 1 | import os 2 | from FlexTransform import FlexTransform 3 | 4 | if __name__ == '__main__': 5 | 6 | # THESE LOCATIONS MATTER!! 7 | dir_location = "/Users/mhend/Downloads/test/" 8 | src_config_location = "/Users/mhend/git/FlexTransform/FlexTransform/resources/sampleConfigurations/cfm13.cfg" 9 | dst_config_location = "/Users/mhend/git/FlexTransform/FlexTransform/resources/sampleConfigurations/stix_tlp.cfg" 10 | # if output path is absolute, follows that path and creates if necessary. 11 | # If it's relative, then uses the dir containing files as root, creates if necessary 12 | output_folder_path = "FlexT output" 13 | 14 | if not os.path.isdir(dir_location): 15 | print("File path either doesn't exist or isn't a location, exiting") 16 | exit(1) 17 | 18 | for root, dirs, files in os.walk(dir_location): 19 | # Are there files in directory? 20 | if not files: 21 | print("Directory is empty, exiting") 22 | exit(1) 23 | # Is output path abs or relative? 24 | if not os.path.isabs(output_folder_path): 25 | output_folder_path = os.path.join(root, output_folder_path) 26 | if not os.path.isdir(output_folder_path): 27 | os.makedirs(output_folder_path) 28 | 29 | flexT = FlexTransform.FlexTransform() 30 | with open(src_config_location, 'r') as input_file: 31 | flexT.AddParser("src", input_file) 32 | with open(dst_config_location, 'r') as input_file: 33 | flexT.AddParser("dst", input_file) 34 | 35 | for name in files: 36 | if name.startswith("."): 37 | continue 38 | iname = os.path.join(root, name) 39 | oname = os.path.join(output_folder_path, name) 40 | print(iname, oname) 41 | with open(iname, "r") as input_file: 42 | with open(oname, "w") as output_file: 43 | try: 44 | print("Starting processing file: {}".format(iname)) 45 | flexT.TransformFile(input_file, "src", "dst", targetFileName=output_file) 46 | except Exception as e: 47 | print(e) 48 | print("Exception in found in file, skipping it: {}".format(iname)) 49 | break 50 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: FlexTransform 2 | pages: 3 | - index.md 4 | - install.md 5 | - usage.md 6 | - examples.md 7 | - contribute.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='FlexTransform', 5 | version='1.2.1', 6 | description='Flexible Transform is a tool that enables dynamic translation between formats', 7 | long_description='Flexible Transform (FlexT) enables dynamic translation between formats, accomplishing this by digesting CTI data down to its semantic roots (meaning and context)', 8 | url='https://github.com/anl-cyberscience/FlexTransform/', 9 | author='The CFM Team', 10 | author_email='fedhelp@anl.gov', 11 | classifiers=[ 12 | # See: https://pypi.python.org/pypi?%3Aaction=list_classifiers 13 | 14 | # How mature is this project? Common values are 15 | # Development Status :: 1 - Planning 16 | # Development Status :: 2 - Pre-Alpha 17 | # Development Status :: 3 - Alpha 18 | # Development Status :: 4 - Beta 19 | # Development Status :: 5 - Production/Stable 20 | # Development Status :: 6 - Mature 21 | # Development Status :: 7 - Inactive 22 | 'Development Status :: 5 - Production/Stable', 23 | 24 | # Indicate who your project is intended for 25 | 'Intended Audience :: Information Technology', 26 | 'Topic :: Security', 27 | 28 | # Pick your license as you wish (should match 'license' above) 29 | 'License :: Other/Proprietary License', 30 | 31 | # Specify the Python versions you support here. In particular, ensure 32 | # that you indicate whether you support Python 2, Python 3 or both. 33 | 'Programming Language :: Python :: 3', 34 | 'Programming Language :: Python :: 3.3', 35 | 'Programming Language :: Python :: 3.4', 36 | 'Programming Language :: Python :: 3.5', 37 | 'Programming Language :: Python :: 3.6', 38 | ], 39 | keywords='', 40 | packages=find_packages(exclude=['contrib', 'doc', 'tests*']), 41 | install_requires=[ 42 | 'arrow', 43 | 'python-dateutil', 44 | 'lxml', 45 | 'pytz', 46 | 'dumper', 47 | 'rdflib', 48 | ], 49 | entry_points={ 50 | 'console_scripts': [ 51 | 'flext = FlexTransform.FlexT:main', 52 | 'flextbatch = FlexTransform.FlexTBatch:main' 53 | ] 54 | }, 55 | test_suite='nose.collector', 56 | tests_require=['nose'], 57 | include_package_data=True, 58 | package_data={ 59 | 'FlexTransform': [ 60 | 'resources/*.xml', 61 | 'resources/*.owl', 62 | 'resources/*.zip', 63 | 'resources/*.rdf', 64 | 'resources/sampleConfigurations/*', 65 | 'resources/schemaDefinitions/*', 66 | 'resources/schemas/*' 67 | ] 68 | } 69 | ) 70 | --------------------------------------------------------------------------------