├── .gitignore
├── .project
├── .pydevproject
├── .travis.yml
├── ExampleFiles
├── SampleInput-CFM13.xml
└── SampleInput-STIX-TLP.xml
├── FlexT.py
├── FlexTBatch.py
├── FlexTransform
├── Configuration
│ ├── Config.py
│ ├── ConfigFunctions
│ │ ├── ConfigFunctionManager.py
│ │ ├── GlobalFunctions.py
│ │ └── __init__.py
│ └── __init__.py
├── FlexT.py
├── FlexTBatch.py
├── FlexTransform.py
├── OntologyOracle
│ ├── OntologyOracle.py
│ └── __init__.py
├── SchemaParser
│ ├── SchemaParser.py
│ ├── TransformFunctions
│ │ ├── CFM13Functions.py
│ │ ├── CFM20Functions.py
│ │ ├── GlobalFunctions.py
│ │ ├── STIXFunctions.py
│ │ ├── TransformFunctionManager.py
│ │ └── __init__.py
│ └── __init__.py
├── SyntaxParser
│ ├── CSVParser.py
│ ├── DICTParsers
│ │ ├── __init__.py
│ │ └── iSightReport.py
│ ├── DictionaryParser.py
│ ├── KVParser.py
│ ├── Parser.py
│ ├── XMLParser.py
│ ├── XMLParsers
│ │ ├── .gitignore
│ │ ├── CFM13.py
│ │ ├── CFM20Alert.py
│ │ ├── STIX.py
│ │ └── __init__.py
│ └── __init__.py
├── __init__.py
├── resources
│ ├── cybox.zip
│ ├── images
│ │ ├── dev-figure1.png
│ │ ├── figure1a.png
│ │ ├── figure1b.png
│ │ ├── figure2.png
│ │ ├── figure3.png
│ │ └── figure4.png
│ ├── indicator-tbox.owl
│ ├── ramrod.zip
│ ├── sampleConfigurations
│ │ ├── MBL.cfg
│ │ ├── cfm13.cfg
│ │ ├── cfm20alert.cfg
│ │ ├── crisp_json.cfg
│ │ ├── csv_example.cfg
│ │ ├── doe_em.cfg
│ │ ├── flextext.cfg
│ │ ├── iid_combined_recent.cfg
│ │ ├── iid_host_active.cfg
│ │ ├── iid_host_dynamic.cfg
│ │ ├── iid_ipv4_recent.cfg
│ │ ├── isight.cfg
│ │ ├── keyvalue.cfg
│ │ ├── lqmtools.cfg
│ │ ├── stix_acs30.cfg
│ │ ├── stix_essa.cfg
│ │ ├── stix_tlp.cfg
│ │ ├── stix_tlp2.cfg
│ │ └── twitter.cfg
│ ├── schemaDefinitions
│ │ ├── MBL.json
│ │ ├── cfm-metadata.json
│ │ ├── cfm13-site.json
│ │ ├── cfm13.json
│ │ ├── cfm20alert.json
│ │ ├── crisp.json
│ │ ├── doe-em.json
│ │ ├── iid-combined-recent.json
│ │ ├── iid-host-dynamic.json
│ │ ├── iid-ipv4-recent.json
│ │ ├── iid.json
│ │ ├── isight.json
│ │ ├── keyvalue.json
│ │ ├── lqmtools.json
│ │ ├── stix-acs30-broken.json
│ │ ├── stix-acs30.json
│ │ ├── stix-essa.json
│ │ ├── stix-tlp.json
│ │ ├── stix-tlp2.json
│ │ ├── stix.json
│ │ └── twitter.json
│ ├── schemas
│ │ ├── CFMAlert.xsd
│ │ ├── CFMDownload.xsd
│ │ ├── CFMDownloadRequest.xsd
│ │ ├── CFMEnvelope.xsd
│ │ └── CFMMessage13.xsd
│ ├── stix.zip
│ ├── test.owl
│ └── transform.owl
└── test
│ ├── LQMTTests.py
│ ├── OntologyOracle_test.py
│ ├── Readme.md
│ ├── SampleInputs.py
│ ├── TestData
│ ├── cfm13_multiple_site.cfg
│ ├── csv-example-2.json
│ ├── csv_example_2.cfg
│ └── mult_site_config.json
│ ├── ToCFM13_test.py
│ ├── ToKeyValue_test.py
│ ├── ToLQMT_test.py
│ ├── ToStixACS30_test.py
│ ├── ToStixACS_test.py
│ ├── ToStixTLP_test.py
│ ├── __init__.py
│ ├── regression_test.py
│ └── utils.py
├── ISAMarkingExtension
├── __init__.py
├── bindings
│ ├── __init__.py
│ ├── isamarkings.py
│ └── isamarkings30.py
├── isamarkings.py
└── isamarkingsacs30.py
├── LICENSE
├── MANIFEST.in
├── Utils
├── LQMTtestCFM.py
├── LQMTtestSTIX.py
├── addSchemaConfigToTBOX.py
├── iSIGHTtoCFM.py
└── subjectCommentParentQuery.py
├── docs
├── contribute.md
├── examples.md
├── index.md
├── install.md
└── usage.md
├── flexT_dir_input.py
├── mkdocs.yml
├── readme-dev.md
├── readme.md
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # IDE
86 | .idea/
87 | catalog*.xml
88 | .project
89 | .pydevproject
90 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | FlexTransform
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 | python3.5
4 | python 3.0
5 |
6 | /${PROJECT_DIR_NAME}
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - '3.3'
4 | - '3.4'
5 | - '3.5'
6 | - '3.6'
7 | before_install:
8 | - "sudo apt-get -qq update"
9 | - "sudo apt-get install -y -qq libxml2-dev libxslt-dev python-dev"
10 | install:
11 | - "pip install ."
12 | script:
13 | - "nosetests"
14 | deploy:
15 | provider: "pypi"
16 | user: "anl-cyberscience"
17 | password:
18 | secure: HgtKQyGDlTHrhlaH+Cr6F56MTNJ2lwkAjKod7ifUy+dBqZ/FHHqPmLgAbGTlIG9si0dvG1IgOz8de/Vr1+AO3w/J18zmop5xPyv1PreqOCSwbRWK76dJG4vsUB7nW+eRwg29YcPVUmtw+z6qHYEAiR6/GQNKi3ubw5RRO4Oily3QxkOr48pGStiW8f8hHGI8mgLw16saVxAV+yFiaXFikmUHBro1G33W78ZOJhpgxEAMXzPCZHeIepnDZNDmCERX7YZdy9q5uV07WmRqEjLSNpfvdyJSS2T66ZKZ7mcB/2M3sFtz7t5datqAudlNuy9o5JAsPavkxJNz0u6A0V8Ee7ETeRwDKhhuhgBtVStEZHNas7XZaiJzM4TbFTJvgL7lU5mPXzo3cxjuSdlz8Px7Heq3jp5vdE94locYEjd1iQ8KFMIxjQIA15WHKYcDGy9iG4M7iQl7Up3JCCQC+Dsj5v1eHm3YITDd7P8g2c9/xeGcvpWx6zYaukSS2oBuBEuwfQz0JCuVSZkajWg9IETb6iQFml+IZg8uoLP+jOKjDTY4S4a/jiRqHa132q6+bZ5QQWCl8b+fyX+VcPLIAl8BtA/bwQ6cJW1Gni69RSjLxbGe5neUsjfZL4fKO+QnunfxDviRnt4lBqG4Cce2n2YNW40DydaGRXfDR+gUqhDO3IM=
19 | on:
20 | tags: true
21 | distributions: "sdist bdist_wheel"
22 | notifications:
23 | slack:
24 | secure: i2h5/x8NkHYjIxqj1znDK9UkQpjT28aSrIG6YNInURb963+oQV27tRclDDfG+VBigRNF3TvFFApQhzDt/che1W2MsQdVcC8uAL+Lin8IetSV9A3iQL1P3g4NuG6SDPUoWydr1cY/3ONNkZsXms3pSJTrRm9kLYGPB6X7u3TvBgluzdEhxaafUfZgNcV9tPBx1wkzu4/61Qz8Ff1SftIrIitpc1Njytpibjlxl6qo4tlJRacBcu0AagVRtbi1COb5gbL4jdQFqzt1D7BP6ltBLDGEreb3Eucd/s2j8ONwKvem26g9BQCLHx/xD9hoU4rBZ1i8SomkLUZBAFl3avKq6WvYMmFaKe3pVgiFEAPwKNRxoZYLHOGTBgxyKQi1Nuv/iM4MZx15JAQERL4I+Q7BMBRbC+Om3Uv1xAcKfX5CsOyAUdWc6RjwU0XQunqxCQvO/Nvjz5ZMLhU1W7fPUYxU21ye3PgNZq9hcQRWsFDlPPmYM4JmbKgtoNo+vVGUoOvEkG0WId4bJ1W9PXkvyMa4+1c6Gs3tWq78fre8vK/LFcxbRxIufJ5rxUne6TgdQ5Y70gjJgOiqlz43XMmPlmo2/ORLYaN/IyoBZMP1UKAJRYlMEeOPmyWKOoG4gOnCW5yXKlcmuVKDoKdLybBofgRKJCRM7/QKNLhvNSVDvr/J5GU=
25 |
--------------------------------------------------------------------------------
/ExampleFiles/SampleInput-CFM13.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Fake National Lab
7 | Fake Name
8 |
9 |
10 | 2016-02-21T22:50:02+0600
11 | 5 minutes
12 | 2
13 | alerts
14 | 2016-02-21T22:45:53+0700
15 |
16 |
17 | 2016-02-21T22:45:53-0400
18 |
19 |
20 |
21 | 10.10.10.10
22 |
23 |
24 |
25 |
26 |
27 | 22
28 | TCP
29 |
30 |
31 |
32 |
33 | SSH Attack
34 |
35 |
36 |
37 |
38 |
39 |
40 | private
41 | 0
42 | 11
43 | 86400
44 | 1
45 | 0
46 | The Republic of Fake
47 |
48 |
--------------------------------------------------------------------------------
/FlexT.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Jun 17, 2015
3 |
4 | @author: ahoying
5 | '''
6 |
7 | from FlexTransform import FlexTransform
8 | from FlexTransform.OntologyOracle import Oracle
9 | import logging
10 | import rdflib
11 |
12 | import argparse
13 | import os
14 | import sys
15 | import json
16 |
17 |
18 | # Configure logging to send INFO, DEGUB and TRACE messages to stdout and all other logs to stderr
19 | # Based on code from http://stackoverflow.com/questions/2302315/how-can-info-and-debug-logging-message-be-sent-to-stdout-and-higher-level-messag
20 | class LessThenFilter(logging.Filter):
21 | def __init__(self, level):
22 | self._level = level
23 | logging.Filter.__init__(self)
24 |
25 | def filter(self, rec):
26 | return rec.levelno < self._level
27 |
28 |
29 | def main():
30 | log = logging.getLogger()
31 | log.setLevel(logging.NOTSET)
32 |
33 | sh_out = logging.StreamHandler(stream=sys.stdout)
34 | sh_out.setLevel(logging.DEBUG)
35 | sh_out.setFormatter(logging.Formatter('%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s'))
36 | sh_out.addFilter(LessThenFilter(logging.WARNING))
37 | log.addHandler(sh_out)
38 |
39 | sh_err = logging.StreamHandler(stream=sys.stderr)
40 | sh_err.setLevel(logging.WARNING)
41 | sh_err.setFormatter(logging.Formatter('%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s'))
42 | log.addHandler(sh_err)
43 |
44 | parser = argparse.ArgumentParser(
45 | description="Transform a source file's syntax and schema to the target file document type")
46 | parser.add_argument('--src-config',
47 | type=argparse.FileType('r'),
48 | help='Source file parser configuration',
49 | metavar='CONFIG',
50 | required=True)
51 | parser.add_argument('--src',
52 | type=argparse.FileType('r'),
53 | help='Source file',
54 | required=True)
55 | parser.add_argument('--src-metadata',
56 | type=argparse.FileType('r'),
57 | help='Source Metadata file',
58 | required=False)
59 | parser.add_argument('--dst-config',
60 | type=argparse.FileType('r'),
61 | help='Destination file parser configuration',
62 | metavar='CONFIG',
63 | required=True)
64 | parser.add_argument('--dst',
65 | type=argparse.FileType('w'),
66 | help='Destination file',
67 | required=True)
68 |
69 | parser.add_argument('--tbox-uri',
70 | type=argparse.FileType('r'),
71 | help='The uri location of the tbox file to load',
72 | required=False)
73 | parser.add_argument('--source-schema-IRI',
74 | help='The ontology IRI for the destination',
75 | required=False)
76 | parser.add_argument('--destination-schema-IRI',
77 | help='The ontology IRI for the destination',
78 | required=False)
79 |
80 | args = parser.parse_args()
81 |
82 | try:
83 | Transform = FlexTransform.FlexTransform()
84 | Transform.AddParser('src', args.src_config)
85 | Transform.AddParser('dst', args.dst_config)
86 |
87 | metadata = None
88 |
89 | if args.src_metadata:
90 | metadata = json.load(args.src_metadata)
91 |
92 | kb = None
93 |
94 | if args.tbox_uri:
95 | if args.destination_schema_IRI:
96 | kb = Oracle(args.tbox_uri, rdflib.URIRef(args.destination_schema_IRI))
97 | else:
98 | logging.warning(
99 | "Ontology file specified, but no destination schema IRI is given. Ontology will not be used.")
100 |
101 | FinalizedData = Transform.TransformFile(
102 | sourceFileName=args.src,
103 | targetFileName=args.dst,
104 | sourceParserName='src',
105 | targetParserName='dst',
106 | sourceMetaData=metadata,
107 | oracle=kb)
108 | args.dst.close()
109 |
110 | except Exception as inst:
111 | log.error(inst)
112 | args.dst.close()
113 | os.remove(args.dst.name)
114 | exit(1)
115 |
116 | else:
117 | log.info("Success")
118 | exit(0)
119 |
120 | if __name__ == '__main__':
121 | main()
122 |
--------------------------------------------------------------------------------
/FlexTBatch.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Apr 12, 2017
3 |
4 | @author: taxon
5 | '''
6 | #
7 | # FlexTBatch is a simple wrapper script to FlexT that allows the user to start
8 | # FLexT and send it commands via stdin for processing. If there are many files to convert,
9 | # the config files only need to be specified once and will also only be loaded once, saving
10 | # a significant amount of time in processing subsequent transforms
11 | #
12 | # This class is here for running FlexTBatch within the Eclipse environment as FlexTransform.FlextBatch.py
13 | # is not able to be run within Eclipse.
14 | from FlexTransform import FlexTBatch
15 |
16 | def main():
17 | FlexTBatch.main()
18 |
19 | if __name__ == '__main__':
20 | FlexTBatch.main()
21 |
--------------------------------------------------------------------------------
/FlexTransform/Configuration/ConfigFunctions/ConfigFunctionManager.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Jun 13, 2016
3 |
4 | @author: cstrastburg
5 | """
6 |
7 | import inspect
8 | import logging
9 | from collections import defaultdict
10 |
11 | import FlexTransform.Configuration.ConfigFunctions
12 |
13 |
14 | class ConfigFunctionManager(object):
15 | '''
16 | classdocs
17 | '''
18 |
19 | __KnownFunctions = defaultdict(dict)
20 |
21 | def __init__(self, trace, trace_list=[]):
22 | """
23 | Constructor
24 | :param trace_list: list of elements to trace
25 | :return:
26 | """
27 | self.logging = logging.getLogger('FlexTransform.Configuration.ConfigFunctions.ConfigFunctionManager')
28 |
29 | self._FunctionClasses = {}
30 | self.trace = trace
31 | self.trace_list = trace_list
32 | self.trace_index = {}
33 | if self.trace:
34 | for x in self.trace_list:
35 | for v in x["src_fields"]:
36 | self.trace_index[v] = x
37 | for y in x["dst_fields"]:
38 | self.trace_index[y] = x
39 | for w in x["src_IRIs"]:
40 | self.trace_index[w] = x
41 | for z in x["dst_IRIs"]:
42 | self.trace_index[z] = x
43 | self.logging.debug("Initialized ConfigFunctionManager with trace_list of {} elements".format(len(trace_list)))
44 |
45 | @classmethod
46 | def register_function(cls, function_name, required_args, function_class):
47 | cls.__KnownFunctions[function_name] = {
48 | 'class': function_class,
49 | 'RequiredArgs': required_args
50 | }
51 |
52 | @classmethod
53 | def get_function_class(cls, function_name):
54 | if function_name in cls.__KnownFunctions:
55 | class_name = cls.__KnownFunctions[function_name]['class']
56 | else:
57 | raise Exception('FunctionNotRegistered',
58 | "The function %s is not registered with the ConfigFunctionManager" % function_name)
59 |
60 | for name, obj in inspect.getmembers(FlexTransform.Configuration.ConfigFunctions, inspect.isclass):
61 | if name == class_name:
62 | return obj()
63 |
64 | raise Exception('FunctionClassNotFound',
65 | "The Class %s for function %s was not found by the ConfigFunctionManager" % (class_name, function_name))
66 |
67 | def get_function(self, function_name):
68 | if function_name in self.__KnownFunctions:
69 | return True
70 | else:
71 | return False
72 |
73 | def execute_config_function(self, function_name, args):
74 | if function_name in self._FunctionClasses:
75 | function_class = self._FunctionClasses[function_name]
76 | else:
77 | function_class = ConfigFunctionManager.get_function_class(function_name)
78 | self._FunctionClasses[function_name] = function_class
79 |
80 | self._validate_args(function_name, args)
81 | return function_class.Execute(function_name, args)
82 |
83 | def _validate_args(self, function_name, args):
84 | """
85 | Allowed fields for the Args dictionary:
86 |
87 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser.
88 | This is the value of the string between the () in the function name in the .json schema configuration files
89 |
90 | fieldName - Optional - The name of the field being processed
91 |
92 | fileName - Optional - The full path of the source file
93 |
94 | fieldDict - Optional - The dictionary associated with this field
95 |
96 | """
97 | allowed_fields = set(['functionArg', 'fileName', 'fieldName', 'fieldDict'])
98 | RequiredFields = set([])
99 |
100 | if isinstance(args, dict):
101 | for arg in args:
102 | if arg not in allowed_fields:
103 | self.logging.warning('A argument passed to function %s is not allowed: %s' % (function_name, arg))
104 | else:
105 | raise Exception('InvalidArgs',
106 | 'The arguments passed to function %s are not defined or not in dictionary format' % function_name)
107 |
108 | if self.__KnownFunctions[function_name]['RequiredArgs'] is not None:
109 | RequiredFields.update(self.__KnownFunctions[function_name]['RequiredArgs'])
110 |
111 | for arg in RequiredFields:
112 | if arg not in args or args[arg] is None:
113 | raise Exception('InvalidArgs',
114 | 'Function %s args did not include the required %s field, could not process' % (function_name, arg))
115 |
--------------------------------------------------------------------------------
/FlexTransform/Configuration/ConfigFunctions/GlobalFunctions.py:
--------------------------------------------------------------------------------
1 | """
2 | @author: cstrasburg
3 | """
4 |
5 | import logging
6 | import os.path
7 | import re
8 |
9 | import arrow
10 |
11 | from FlexTransform.Configuration.ConfigFunctions import ConfigFunctionManager
12 |
13 |
14 | class GlobalFunctions(object):
15 | """
16 | Contains Configuration functions that multiple configuration files can use:
17 | """
18 |
19 | '''
20 | The _FunctionNames dictionary should contain each function name understood by this class. Each is
21 | mapped to a list with required fields to be passed in the args dictionary, or None if no args are required.
22 |
23 | Allowed fields for the Args dictionary:
24 |
25 | fieldName - Optional - The name of the field being processed
26 |
27 | fileName - Optional - The name of the loaded file (full path)
28 |
29 | functionArg - Optional - The string between the '(' and ')' in the function definition
30 |
31 | fieldDict - Optional - The dictionary of the field where this method is defined
32 |
33 | '''
34 |
35 | __FunctionNames = {
36 | 'getFileCreationDate': ['fileName'],
37 | 'getFileUUID': ['fileName', 'functionArg'],
38 | }
39 |
40 | def __init__(self):
41 | """
42 | Constructor
43 | """
44 | self.logging = logging.getLogger('FlexTransform.Configuration.ConfigFunctions.GlobalFunctions')
45 |
46 | @classmethod
47 | def register_functions(cls):
48 | for FunctionName, RequiredArgs in cls.__FunctionNames.items():
49 | ConfigFunctionManager.register_function(FunctionName, RequiredArgs, 'GlobalFunctions')
50 |
51 | def Execute(self, function_name, args):
52 | value = None
53 |
54 | if function_name not in self.__FunctionNames:
55 | raise Exception('FunctionNotDefined',
56 | 'Function %s is not defined in GlobalFunctions' % (function_name))
57 |
58 | elif function_name == 'getFileCreationDate':
59 | if 'fileName' in args:
60 | try:
61 | rawctime = os.path.getctime(args['fileName'])
62 | ''' Convert to given time format '''
63 | if 'fieldDict' in args and 'dateTimeFormat' in args['fieldDict'] and \
64 | args['fieldDict']['dateTimeFormat'] == 'unixtime':
65 | value = str(arrow.get(rawctime).timestamp)
66 | else:
67 | value = arrow.get(rawctime).format(args['fieldDict']['dateTimeFormat'])
68 | except OSError as e:
69 | self.logging.warning("Could not get file ctime for {}: {}".format(args['fileName'], e))
70 |
71 | elif function_name == 'getFileUUID':
72 | if 'fileName' in args and args['fileName']:
73 | fileName = args['fileName']
74 | baseName = os.path.basename(fileName)
75 | p = re.compile(args['functionArg'])
76 | result = p.match(baseName)
77 | if result is not None:
78 | value = result.group(1)
79 | else:
80 | value = None
81 | self.logging.warning("getFileUUID: could not extract UUID from filename {} using regex {}".format(baseName,args['functionArg']))
82 | if value is None:
83 | self.logging.warning("getFileUUID: no fileName provided in args(); could not extract UUID.")
84 |
85 | return value
86 |
--------------------------------------------------------------------------------
/FlexTransform/Configuration/ConfigFunctions/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Jun 13, 2016
3 |
4 | @author: cstrasburg
5 | """
6 |
7 | import FlexTransform.Configuration
8 |
9 | from .ConfigFunctionManager import ConfigFunctionManager
10 | from .GlobalFunctions import GlobalFunctions
11 |
12 | GlobalFunctions.register_functions()
13 |
--------------------------------------------------------------------------------
/FlexTransform/Configuration/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Jul 27, 2014
3 |
4 | @author: ahoying
5 | '''
6 |
7 | from FlexTransform.Configuration.Config import Config
--------------------------------------------------------------------------------
/FlexTransform/FlexT.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Jun 17, 2015
3 |
4 | @author: ahoying
5 | """
6 |
7 | import argparse
8 | import json
9 | import logging
10 | import os
11 | import sys
12 | import traceback
13 |
14 | import rdflib
15 |
16 | from FlexTransform import FlexTransform
17 | from FlexTransform.OntologyOracle import Oracle
18 |
19 |
20 | # Configure logging to send INFO, DEGUB and TRACE messages to stdout and all other logs to stderr
21 | # Based on code from http://stackoverflow.com/questions/2302315/how-can-info-and-debug-logging-message-be-sent-to-stdout-and-higher-level-messag
22 | class LessThenFilter(logging.Filter):
23 | def __init__(self, level):
24 | self._level = level
25 | logging.Filter.__init__(self)
26 |
27 | def filter(self, rec):
28 | return rec.levelno < self._level
29 |
30 |
31 | def main():
32 | log = logging.getLogger()
33 | log.setLevel(logging.NOTSET)
34 |
35 | sh_out = logging.StreamHandler(stream=sys.stdout)
36 | sh_out.setLevel(logging.DEBUG)
37 | sh_out.setFormatter(logging.Formatter('%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s'))
38 | sh_out.addFilter(LessThenFilter(logging.WARNING))
39 | log.addHandler(sh_out)
40 |
41 | sh_err = logging.StreamHandler(stream=sys.stderr)
42 | sh_err.setLevel(logging.WARNING)
43 | sh_err.setFormatter(logging.Formatter('%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s'))
44 | log.addHandler(sh_err)
45 |
46 | parser = argparse.ArgumentParser(
47 | description="Transform a source file's syntax and schema to the target file document type")
48 | parser.add_argument('--src-config',
49 | type=argparse.FileType('r'),
50 | help='Source file parser configuration',
51 | metavar='CONFIG',
52 | required=True)
53 | parser.add_argument('--src',
54 | type=argparse.FileType('r'),
55 | help='Source file',
56 | required=True)
57 | parser.add_argument('--src-metadata',
58 | type=argparse.FileType('r'),
59 | help='Source Metadata file',
60 | required=False)
61 | parser.add_argument('--dst-config',
62 | type=argparse.FileType('r'),
63 | help='Destination file parser configuration',
64 | metavar='CONFIG',
65 | required=True)
66 | parser.add_argument('--dst',
67 | type=argparse.FileType('w'),
68 | help='Destination file',
69 | required=True)
70 |
71 | parser.add_argument('--tbox-uri',
72 | type=argparse.FileType('r'),
73 | help='The uri location of the tbox file to load',
74 | required=False)
75 | parser.add_argument('--source-schema-IRI',
76 | help='The ontology IRI for the destination',
77 | required=False)
78 | parser.add_argument('--destination-schema-IRI',
79 | help='The ontology IRI for the destination',
80 | required=False)
81 | parser.add_argument('--trace-src-field',
82 | help='Given the name of a field from the source schema, will output trace messages to log.trace() as it is processed',
83 | action='append',
84 | default=[],
85 | required=False)
86 | parser.add_argument('--trace-dst-field',
87 | help='Given the name of a field from the dest schema, will output trace messages to log.trace() as it is processed',
88 | action='append',
89 | default=[],
90 | required=False)
91 | parser.add_argument('--trace-src-IRI',
92 | help='Given the name of an IRI from the source schema, will output trace messages to log.trace() as it is processed',
93 | action='append',
94 | default=[],
95 | required=False)
96 | parser.add_argument('--trace-dst-IRI',
97 | help='Given the name of an IRI from the dest schema, will output trace messages to log.trace() as it is processed',
98 | action='append',
99 | default=[],
100 | required=False)
101 | parser.add_argument('--logging-level', '-l',
102 | help="Set the output level for the logger. Acceptable values: debug, info, warning, error, critical",
103 | required=False)
104 |
105 | args = parser.parse_args()
106 | try:
107 | if args.logging_level:
108 | if args.logging_level.lower() == "debug":
109 | log.setLevel(logging.DEBUG)
110 | elif args.logging_level.lower() == "info":
111 | log.setLevel(logging.INFO)
112 | elif args.logging_level.lower() == "warning":
113 | log.setLevel(logging.WARNING)
114 | elif args.logging_level.lower() == "error":
115 | log.setLevel(logging.ERROR)
116 | elif args.logging_level.lower() == "critical":
117 | log.setLevel(logging.CRITICAL)
118 | transform = FlexTransform.FlexTransform(source_fields=args.trace_src_field,
119 | source_iri=args.trace_src_IRI,
120 | destination_fields=args.trace_dst_field,
121 | destination_iri=args.trace_dst_IRI,
122 | logging_level=logging.NOTSET)
123 |
124 | transform.add_parser('src', args.src_config)
125 | transform.add_parser('dst', args.dst_config)
126 |
127 | metadata = None
128 |
129 | if args.src_metadata:
130 | metadata = json.load(args.src_metadata)
131 |
132 | kb = None
133 |
134 | if args.tbox_uri:
135 | if args.destination_schema_IRI:
136 | kb = Oracle(args.tbox_uri, rdflib.URIRef(args.destination_schema_IRI))
137 | else:
138 | logging.warning(
139 | "Ontology file specified, but no destination schema IRI is given. Ontology will not be used.")
140 |
141 | FinalizedData = transform.transform(
142 | source_file=args.src,
143 | target_file=args.dst,
144 | source_parser_name='src',
145 | target_parser_name='dst',
146 | source_meta_data=metadata,
147 | oracle=kb)
148 | args.dst.close()
149 |
150 | except Exception as inst:
151 | log.error(inst)
152 | ''' For debugging - capture to log.debug instead? '''
153 | traceback.print_exc()
154 | args.dst.close()
155 | os.remove(args.dst.name)
156 | exit(1)
157 |
158 | else:
159 | log.info("Success")
160 | exit(0)
161 |
162 | if __name__ == '__main__':
163 | main()
164 |
--------------------------------------------------------------------------------
/FlexTransform/FlexTBatch.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Apr 12, 2017
3 |
4 | @author: taxon
5 | '''
6 | #
7 | # FlexTBatch is a simple wrapper script to FlexT that allows the user to start
8 | # FlexT and send it commands via stdin for processing. If there are many files to convert,
9 | # the config files only need to be specified once and will also only be loaded once, saving
10 | # a significant amount of time in processing subsequent transforms
11 | #
12 | import argparse
13 | import sys
14 | import logging
15 | import json
16 | from FlexTransform import FlexTransform
17 | #
18 | # process a single command
19 | # valid commands are:
20 | # config {config_name}={config_file}
21 | # loads a configuration file and stores it as config_name
22 | # transform src_format={src_config_name} src_file={source_file} [src_metadata={metadata_file}] [dest_format={dest_confgi_name} dest_file={output_file}
23 | # Transforms the source_file from src_config_name into des_file using dest_config_name and optionally the src_metadata.
24 | # quit
25 | # Terminates the program
26 | #
27 | def processCommand(flexT,inputData):
28 | cmd=inputData[0]
29 | if(cmd=='config'):
30 | try:
31 | # config config_id={config_filename}
32 | if(len(inputData)>2):
33 | logging.error("Invalid config inputData: "+(" ".join(inputData))+"\n")
34 | return False
35 | cvtFormat,filename=inputData[1].split("=")
36 | if(format in flexT.Parsers):
37 | logging.error("Format already specified: "+format+"\n")
38 | return False
39 | with(open(filename,"r")) as cfg:
40 | flexT.add_parser(cvtFormat,cfg)
41 | except Exception as e:
42 | logging.error("An exception has occurred while adding configuration: "+str(e))
43 |
44 | elif(cmd=='list_configs'):
45 | print(json.JSONEncoder().encode({ 'configs': list(flexT.Parsers.keys()) }))
46 | elif(cmd=='transform'):
47 | # src_format={format} src_file={source_filename} src_metadata={source_metdata_filename} dest=format={dest_format} dest_file={dest_filename}
48 | try:
49 | data={}
50 | hasError=False
51 | for kv in inputData[1:]:
52 | key,value=kv.split("=")
53 | if(key in data):
54 | logging.error("Invalid transform inputData - duplicate key: "+key+"\n")
55 | hasError=True
56 | data[key]=value
57 | if(hasError):
58 | return False
59 | required={'src_format','src_file','dest_format','dest_file'}
60 | optional={'src_metadata'}
61 | hasAllRequired=data.keys() >= required
62 | extraKeys=data.keys() - required -optional
63 | if(len(extraKeys)>0):
64 | logging.error("Unsupported keys in transform: "+extraKeys+"\n")
65 | return False
66 | if(not hasAllRequired):
67 | logging.error("Missing required keys in transform: "+(required - data.keys())+"\n")
68 | return False
69 | with open(data['src_file'],"r") as input_file:
70 | with open(data['dest_file'],"w") as output_file:
71 | try:
72 | flexT.transform(source_file=input_file,
73 | source_parser_name=data['src_format'],
74 | target_parser_name=data['dest_format'],
75 | target_file=output_file,
76 | source_meta_data=data.get(data.get('src_metadata')))
77 | except Exception as e:
78 | logging.error("An exception has occurred while transforming file: "+str(e))
79 | return False
80 | except Exception as e:
81 | logging.error("An exception has occurred while setting up for transform: "+str(e))
82 | elif(cmd=='quit'):
83 | return True
84 | else:
85 | logging.error("Unknown command: "+cmd)
86 |
87 | class LessThanFilter(logging.Filter):
88 | def __init__(self, level):
89 | self._level = level
90 | logging.Filter.__init__(self)
91 |
92 | def filter(self, rec):
93 | return rec.levelno < self._level
94 |
95 | def initializeLogging(stdout_level):
96 | log = logging.getLogger()
97 | log.setLevel(logging.NOTSET)
98 |
99 | sh_out = logging.StreamHandler(stream=sys.stdout)
100 | sh_out.setLevel(stdout_level)
101 | sh_out.setFormatter(logging.Formatter('%(levelname)s:%(message)s'))
102 | sh_out.addFilter(LessThanFilter(logging.WARNING))
103 | log.addHandler(sh_out)
104 |
105 | sh_err = logging.StreamHandler(stream=sys.stderr)
106 | sh_err.setLevel(logging.WARNING)
107 | sh_err.setFormatter(logging.Formatter('%(levelname)s:%(message)s'))
108 | log.addHandler(sh_err)
109 |
110 | def main():
111 | parser = argparse.ArgumentParser(
112 | description="Transform a source file's syntax and schema to the target file document type")
113 | parser.add_argument('--delimiter',
114 | help='Delimiter used for input lines (default \'\t\')',
115 | metavar='DELIM_CHAR',
116 | default='\t',
117 | required=False)
118 | parser.add_argument('--output-done-markers',
119 | dest='output_markers',
120 | help='Output {err-done} and {out-done} when processing a command is complete. Useful if a program is controlling batch execution.',
121 | action='store_true',
122 | default=False,
123 | required=False)
124 | parser.add_argument('--stdout-log-level',
125 | dest='stdout_level',
126 | help='Log level to output to stdout. (stderr will always be WARNING)',
127 | choices=['NOTSET','DEBUG','INFO'],
128 | default='NOTSET',
129 | required=False)
130 | args = parser.parse_args()
131 |
132 | initializeLogging(args.stdout_level)
133 | flexT = FlexTransform.FlexTransform()
134 | done=False
135 | while(not done):
136 | try:
137 | inputData=sys.stdin.readline()
138 | cmd=inputData.strip().split(args.delimiter)
139 | done=processCommand(flexT,cmd)
140 | except Exception as e:
141 | logging.error("An exception has occurred while processing input: "+str(e))
142 | if(args.output_markers):
143 | sys.stderr.write('{err-done}\n')
144 | sys.stderr.flush()
145 | sys.stdout.write('{out-done}\n')
146 | sys.stdout.flush()
147 |
148 | if __name__ == '__main__':
149 | main()
150 |
--------------------------------------------------------------------------------
/FlexTransform/FlexTransform.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Jul 27, 2014
3 |
4 | @author: ahoying
5 | """
6 |
7 | import logging
8 | import warnings
9 |
10 | import rdflib
11 |
12 | from .Configuration import Config
13 | from .OntologyOracle import Oracle
14 |
15 |
16 | # TODO: Document in Sphinx compatible format
17 |
18 | class FlexTransform(object):
19 | '''
20 | API for accessing performing Flexible Transform of source documents to target documents based on syntax and schema mappings against the ontology
21 | '''
22 |
23 | def __init__(self,
24 | logging_level=logging.WARN,
25 | trace=None,
26 | source_fields=None,
27 | destination_fields=None,
28 | source_iri=None,
29 | destination_iri=None):
30 | self.Parsers = {}
31 | self.logging = logging.getLogger('FlexTransform')
32 |
33 | if logging_level:
34 | self.logging.setLevel(logging_level)
35 |
36 | self.oracle = None
37 |
38 | if trace is None and (source_fields or source_iri or destination_fields or destination_iri):
39 | self.trace = True
40 | else:
41 | self.trace = trace
42 |
43 | self.trace_list = []
44 | if self.trace:
45 | self._create_trace_list(source_fields=source_fields, destination_fields=destination_fields,
46 | source_iri=source_iri, destination_iri=destination_iri)
47 |
48 | def add_parser(self, parser_name, config_file):
49 | """
50 | Add Parser to FlexTransform Object
51 |
52 | :param parser_name: String name of parser to add
53 | :param config_file: File of parser
54 |
55 | :type parser_name: String
56 | :type config_file: File Object
57 | :return:
58 | """
59 |
60 | parser_config = Config(config_file, parser_name, self.trace, trace_list=self.trace_list)
61 |
62 | if parser_name in self.Parsers:
63 | self.logging.warn('Parser %s already configured, configuration will be overwritten', parser_name)
64 |
65 | if parser_config:
66 | self.Parsers[parser_name] = parser_config
67 |
68 | def add_oracle(self, tbox_location, schema_iri):
69 | '''
70 | Add oracle to the FlexTransform object"
71 | '''
72 |
73 | # TODO add error checking for locations
74 | self.oracle = Oracle(tbox_location, rdflib.URIRef(schema_iri), self.trace, trace_list=self.trace_list)
75 |
76 | def transform(self, source_file, source_parser_name, target_parser_name,
77 | target_file=None, source_meta_data=None, oracle=None):
78 | """
79 | Transform the data from fileName using sourceParserName as the source and targetParserName as the destination.
80 | Returns transformed data to the caller.
81 |
82 | :param source_file: File containing information to be transformed
83 | :param source_parser_name: String descriptor of parser to be used for source
84 | :param target_parser_name:String descriptor of parser to be used for destination
85 | :param target_file: File to place transformed information
86 | :param source_meta_data:
87 | :param oracle: An instance of the OntologyOracle, initialized with the TBOX URI. If NONE, will not be used.
88 |
89 | :type source_file: File Object
90 | :type source_parser_name: String
91 | :type target_parser_name: String
92 | :type target_file: File Object
93 | :return:
94 | """
95 |
96 | if source_file is None or source_parser_name is None or target_parser_name is None:
97 | raise Exception('MissingParameter', 'Required parameter is not defined')
98 |
99 | if source_parser_name not in self.Parsers:
100 | raise Exception('ParserNotFound', 'Source parser %s has not been configured' % source_parser_name)
101 |
102 | if target_parser_name not in self.Parsers:
103 | raise Exception('ParserNotFound', 'Target parser %s has not been configured' % target_parser_name)
104 |
105 | if source_meta_data is not None and not isinstance(source_meta_data, dict):
106 | raise Exception('IncorrectFormat', 'sourceMetaData must be in dictionary format')
107 |
108 | if self.oracle:
109 | oracle = self.oracle
110 |
111 | # Parse and validate configurations
112 | source_config = self.Parsers[source_parser_name]
113 | destination_config = self.Parsers[target_parser_name]
114 |
115 | # Calculate "DerivedData" functions
116 | source_config.calculate_derived_data(source_file=source_file, dest_file=target_file)
117 |
118 | # Parse source file into dictionary object
119 | source_data = source_config.Parser.Read(source_file, source_config)
120 |
121 | if source_data is None:
122 | raise Exception('NoSourceData', 'Source data file could not be parsed, no data')
123 |
124 | # Map source file data to source schema
125 | mapped_data = source_config.SchemaParser.map_data_to_schema(source_data, oracle)
126 |
127 | if source_meta_data is not None:
128 | source_config.SchemaParser.map_metadata_to_schema(source_meta_data)
129 |
130 | # Map source data to destination schema
131 | transformed_data = destination_config.SchemaParser.TransformData(mapped_data, oracle)
132 |
133 | # Finalize data to be written
134 | finalized_data = destination_config.Parser.Finalize(transformed_data)
135 |
136 | if target_file is not None:
137 | destination_config.Parser.Write(target_file, finalized_data)
138 |
139 | return finalized_data
140 |
141 | def _create_trace_list(self, source_fields=None, destination_fields=None, source_iri=None, destination_iri=None):
142 |
143 | trace_list = []
144 | if source_fields:
145 | for arg in source_fields:
146 | trace_list.append({"src_fields": [arg], "src_IRIs": list(), "dst_fields": list(), "dst_IRIs": list()})
147 | if source_iri:
148 | for arg in source_iri:
149 | trace_list.append({"src_fields": list(), "src_IRIs": [arg], "dst_fields": list(), "dst_IRIs": list()})
150 | if destination_fields:
151 | for arg in destination_fields:
152 | trace_list.append({"src_fields": list(), "src_IRIs": list(), "dst_fields": [arg], "dst_IRIs": list()})
153 | if destination_iri:
154 | for arg in destination_iri:
155 | trace_list.append({"src_fields": list(), "src_IRIs": list(), "dst_fields": list(), "dst_IRIs": [arg]})
156 | self.trace_list = trace_list
157 |
158 | def AddParser(self, parserName, configFile, sourceFileName = None, destFileName = None):
159 | warnings.warn('"AddParser()" has been deprecated in favor of "add_parser()"', DeprecationWarning)
160 | self.logging.warn('"AddParser()" has been deprecated in favor of "add_parser()"')
161 | return self.add_parser(parserName, configFile)
162 |
163 | def TransformFile(self, sourceFileName, sourceParserName, targetParserName,
164 | targetFileName=None, sourceMetaData=None, oracle=None):
165 | warnings.warn('"TransformFile()" has been deprecated in favor of "transform()"', DeprecationWarning)
166 | self.logging.warn('"TransformFile()" has been deprecated in favor of "transform()"')
167 | return self.transform(sourceFileName, sourceParserName, targetParserName, target_file=targetFileName,
168 | source_meta_data=sourceMetaData, oracle=oracle)
169 | if __name__ == '__main__':
170 | raise Exception("Unsupported", "FlexTransform.py should not be called directly, use helper script FlexT.py")
171 |
172 |
--------------------------------------------------------------------------------
/FlexTransform/OntologyOracle/__init__.py:
--------------------------------------------------------------------------------
1 | from .OntologyOracle import Oracle
--------------------------------------------------------------------------------
/FlexTransform/SchemaParser/TransformFunctions/CFM13Functions.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Mar 13, 2015
3 |
4 | @author: ahoying
5 | """
6 |
7 | import logging
8 | import pprint
9 |
10 | import arrow
11 |
12 | from FlexTransform.SchemaParser.TransformFunctions import TransformFunctionManager
13 |
14 |
15 | class CFM13Functions(object):
16 | """
17 | Contains Transform functions that multiple schemas utilize
18 | """
19 |
20 | """
21 | The _FunctionNames dictionary should contain each function name understood by this class for with a scope of
22 | indicator data or header data mapped to a list with required fields to be passed in the args dictionary,
23 | or None if no args are required.
24 |
25 | Allowed fields for the Args dictionary:
26 |
27 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser.
28 | This is the value of the string between the () in the function name in the .json
29 | schema configuration files
30 |
31 | fieldName - Required - The name of the current field
32 |
33 | fieldDict - Required - The field dictionary for the current field getting transformed
34 |
35 | currentRow - Optional - The transformed data and associated field dictionaries for the currently processed row
36 |
37 | indicatorType - Optional - The indicator type for the current row
38 |
39 | transformedData - Optional - The dictionary of all current transformed data
40 |
41 | """
42 |
43 | __FunctionNames = {
44 | 'DocumentHeaderData': {
45 | 'CFM13_determineTLP': ['transformedData'],
46 | 'CFM13_determineReportOUO': ['transformedData'],
47 | 'CFM13_earliestIndicatorTime': ['transformedData']
48 | },
49 | 'IndicatorData': {
50 | 'CFM13_GenerateRestrictionsDescription': ['currentRow'],
51 | 'CFM13_SightingsCount': ['functionArg', 'currentRow']
52 | }
53 | }
54 |
55 | def __init__(self):
56 | """
57 | Constructor
58 | """
59 | self.logging = logging.getLogger('FlexTransform.SchemaParser.CFM13Functions')
60 | self.pprint = pprint.PrettyPrinter()
61 |
62 | @classmethod
63 | def RegisterFunctions(cls):
64 | for Scope, Functions in cls.__FunctionNames.items():
65 | for FunctionName, RequiredArgs in Functions.items():
66 | TransformFunctionManager.register_function(Scope, FunctionName, RequiredArgs, 'CFM13Functions')
67 |
68 | def Execute(self, Scope, FunctionName, args):
69 | """
70 | Execute the specific called function with the supplied args
71 | """
72 |
73 | value = None
74 |
75 | if FunctionName not in self.__FunctionNames[Scope]:
76 | raise Exception('FunctionNotDefined',
77 | 'Function %s is not defined in CFM13Functions for document scope %s' % (
78 | FunctionName, Scope))
79 |
80 | if FunctionName == 'CFM13_GenerateRestrictionsDescription':
81 | value = ''
82 | if 'ouo' in args['currentRow'] and 'Value' in args['currentRow']['ouo']:
83 | value += "OUO="
84 | if args['currentRow']['ouo']['Value'] == '1':
85 | value += "True"
86 | else:
87 | value += "False"
88 | if 'recon' in args['currentRow'] and 'Value' in args['currentRow']['recon']:
89 | if value != '':
90 | value += ", "
91 | value += "ReconAllowed="
92 | if args['currentRow']['recon']['Value'] == '0':
93 | value += "True"
94 | else:
95 | value += "False"
96 | if 'restriction' in args['currentRow'] and 'Value' in args['currentRow']['restriction']:
97 | if value != '':
98 | value += ", "
99 | value += "SharingRestrictions=%s" % args['currentRow']['restriction']['Value']
100 |
101 | elif FunctionName == 'CFM13_determineTLP':
102 | valuemap = {"WHITE": 1, "GREEN": 2, "AMBER": 3, "RED": 4}
103 | value = 'WHITE'
104 | for subrow in args['transformedData']['IndicatorData']:
105 | if 'restriction' in subrow:
106 | if subrow['restriction']['Value'] == 'private':
107 | if valuemap['AMBER'] > valuemap[value]:
108 | value = 'AMBER'
109 | if subrow['restriction']['Value'] == 'need-to-know':
110 | if valuemap['GREEN'] > valuemap[value]:
111 | value = 'GREEN'
112 | if 'ouo' in subrow:
113 | if subrow['ouo']['Value'] == '1':
114 | if valuemap['GREEN'] > valuemap[value]:
115 | value = 'GREEN'
116 |
117 | elif FunctionName == 'CFM13_earliestIndicatorTime':
118 | # For now this function is specific to CFM13, it could be made generic if needed in other Schemas
119 | mintime = None
120 | for subrow in args['transformedData']['IndicatorData']:
121 | if 'create_time' in subrow:
122 | indicatorTime = arrow.get(subrow['create_time']['Value'], 'YYYY-MM-DDTHH:mm:ssZ')
123 | if mintime is None or mintime > indicatorTime:
124 | mintime = indicatorTime
125 |
126 | if mintime is not None:
127 | value = mintime.format('YYYY-MM-DDTHH:mm:ssZZ')
128 | else:
129 | value = args['currentRow']['analyzer_time']['Value']
130 |
131 | elif FunctionName == 'CFM13_SightingsCount':
132 | sightings = 1
133 | if args['functionArg'] in args['currentRow'] and 'Value' in args['currentRow'][args['functionArg']]:
134 | sightings += int(args['currentRow'][args['functionArg']]['Value'])
135 |
136 | value = str(sightings)
137 |
138 | elif FunctionName == 'CFM13_determineReportOUO':
139 | '''
140 | This function determines the OUO level of the overall report by assuming that if any included indicator is OUO,
141 | then the entire report is OUO.
142 | '''
143 | value = "0"
144 | self.logging.debug("Evaluating report OUO status based on {} indicators.".format(len(args['transformedData']['IndicatorData'])))
145 | for indicator in args['transformedData']['IndicatorData']:
146 | self.logging.debug("Checking indicator OUO value: {}".format(indicator['ouo']['Value']))
147 | if indicator['ouo']['Value'] == "1":
148 | value = "1"
149 | break
150 | self.logging.debug("Returning value {}".format(value))
151 |
152 | return value
153 |
--------------------------------------------------------------------------------
/FlexTransform/SchemaParser/TransformFunctions/CFM20Functions.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Mar 13, 2015
3 |
4 | @author: ahoying
5 | '''
6 |
7 | import re
8 | import logging
9 |
10 | from FlexTransform.SchemaParser.TransformFunctions import TransformFunctionManager
11 |
12 | class CFM20Functions(object):
13 | '''
14 | Contains Transform functions that multiple schemas utilize
15 | '''
16 |
17 | '''
18 | The _FunctionNames dictionary should contain each function name understood by this class for with a scope of indicator data or header data
19 | mapped to a list with required fields to be passed in the args dictionary, or None if no args are required.
20 |
21 | Allowed fields for the Args dictionary:
22 |
23 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser.
24 | This is the value of the string between the () in the function name in the .json schema configuration files
25 |
26 | fieldName - Required - The name of the current field
27 |
28 | fieldDict - Required - The field dictionary for the current field getting transformed
29 |
30 | currentRow - Optional - The transformed data and associated field dictionaries for the currently processed row
31 |
32 | indicatorType - Optional - The indicator type for the current row
33 |
34 | transformedData - Optional - The dictionary of all current transformed data
35 |
36 | '''
37 |
38 | __FunctionNames = {
39 | 'IndicatorData': {
40 | 'CFM20_determineIndicatorConstraint': ['functionArg', 'currentRow']
41 | }
42 | }
43 |
44 | def __init__(self):
45 | '''
46 | Constructor
47 | '''
48 | self.logging = logging.getLogger('FlexTransform.SchemaParser.CFM20Functions')
49 |
50 | @classmethod
51 | def RegisterFunctions(cls):
52 | for Scope, Functions in cls.__FunctionNames.items() :
53 | for FunctionName, RequiredArgs in Functions.items() :
54 | TransformFunctionManager.register_function(Scope, FunctionName, RequiredArgs, 'CFM20Functions')
55 |
56 | def Execute(self, Scope, FunctionName, args):
57 | '''
58 | Execute the specific called function with the supplied args
59 | '''
60 |
61 | Value = None
62 |
63 | if (FunctionName not in self.__FunctionNames[Scope]) :
64 | raise Exception('FunctionNotDefined','Function %s is not defined in CFM20Functions for document scope %s' % (FunctionName, Scope))
65 |
66 | if (FunctionName == 'CFM20_determineIndicatorConstraint') :
67 | # TODO: It would be great if somehow we could query the ontology to get this. Complete for all indicator constraints.
68 |
69 | if (args['functionArg'] in args['currentRow'] and 'Value' in args['currentRow'][args['functionArg']]) :
70 | indicatorValue = args['currentRow'][args['functionArg']]['Value']
71 | indicatorOntology = args['currentRow'][args['functionArg']]['matchedOntology']
72 |
73 | if (indicatorOntology == 'http://www.anl.gov/cfm/transform.owl#FilenameIndicatorValueSemanticConcept') :
74 | Value = 'http://www.anl.gov/cfm/2.0/current/#StringValueMatch'
75 | elif (re.match(r'^((\d){1,3}\.){3}(\d){1,3}$', indicatorValue)) :
76 | Value = 'http://www.anl.gov/cfm/2.0/current/#IPv4DottedDecimalEquality'
77 | elif (re.match(r'^[a-fA-F0-9]+:+[a-fA-F0-9:]+$', indicatorValue)) :
78 | Value = 'http://www.anl.gov/cfm/2.0/current/#IPv6ColonHexEquality'
79 | elif (re.match(r'^([a-z0-9][^./]+\.)+[a-z]+$', indicatorValue)) :
80 | Value = 'http://www.anl.gov/cfm/2.0/current/#DNSDomainNameMatch'
81 | elif (re.match(r'^((ft|htt)ps?://)?([a-z][^./]+\.)+[a-z]+/.*$', indicatorValue)) :
82 | Value = 'http://www.anl.gov/cfm/2.0/current/#URLMatch'
83 | elif (re.match(r'^[a-fA-F0-9]{32}$', indicatorValue)) :
84 | Value = 'http://www.anl.gov/cfm/2.0/current/#MD5Equality'
85 | elif (re.match(r'^[a-fA-F0-9]{40}$', indicatorValue)) :
86 | Value = 'http://www.anl.gov/cfm/2.0/current/#SHA1Equality'
87 | elif (re.match(r'^\d+$', indicatorValue)) :
88 | Value = 'http://www.anl.gov/cfm/2.0/current/#IntegerEquality'
89 |
90 | if (Value is None) :
91 | # Still didn't find an indicator type, throw exception
92 | raise Exception('unknownIndicatorConstraint', 'CFM 2.0 Indicator constraint could not be determined for data: %s :: field %s' % (args['currentRow'][args['functionArg']]['Value'], indicatorOntology))
93 |
94 | return Value
--------------------------------------------------------------------------------
/FlexTransform/SchemaParser/TransformFunctions/GlobalFunctions.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Mar 13, 2015
3 |
4 | @author: ahoying
5 | '''
6 |
7 | import logging
8 | import uuid
9 |
10 | import arrow
11 |
12 | from FlexTransform.SchemaParser.TransformFunctions import TransformFunctionManager
13 |
14 |
15 | class GlobalFunctions(object):
16 | """
17 | Contains Transform functions that multiple schemas utilize
18 | """
19 |
20 | '''
21 | The _FunctionNames dictionary should contain each function name understood by this class for with a scope of indicator data or header data
22 | mapped to a list with required fields to be passed in the args dictionary, or None if no args are required.
23 |
24 | Allowed fields for the Args dictionary:
25 |
26 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser.
27 | This is the value of the string between the () in the function name in the .json schema configuration files
28 |
29 | fieldName - Required - The name of the current field
30 |
31 | fieldDict - Required - The field dictionary for the current field getting transformed
32 |
33 | currentRow - Optional - The transformed data and associated field dictionaries for the currently processed row
34 |
35 | indicatorType - Optional - The indicator type for the current row
36 |
37 | transformedData - Optional - The dictionary of all current transformed data
38 |
39 | '''
40 |
41 | __FunctionNames = {
42 | 'DocumentHeaderData': {
43 | 'countOfIndicators': ['transformedData'],
44 | 'now': ['fieldDict'],
45 | },
46 | 'IndicatorData': {
47 | 'calculate_duration': ['currentRow'],
48 | 'now': ['fieldDict'],
49 | 'generate_uuid': None,
50 | 'mbl_sourcetype': ['indicatorType']
51 | }
52 | }
53 |
54 | def __init__(self):
55 | """
56 | Constructor
57 | """
58 | self.logging = logging.getLogger('FlexTransform.SchemaParser.GlobalFunctions')
59 |
60 | @classmethod
61 | def RegisterFunctions(cls):
62 | for Scope, Functions in cls.__FunctionNames.items():
63 | for FunctionName, RequiredArgs in Functions.items():
64 | TransformFunctionManager.register_function(Scope, FunctionName, RequiredArgs, 'GlobalFunctions')
65 |
66 | def Execute(self, scope, function_name, args):
67 | """
68 | Execute the specific called function with the supplied args
69 | """
70 |
71 | value = None
72 |
73 | if function_name not in self.__FunctionNames[scope]:
74 | raise Exception('FunctionNotDefined',
75 | 'Function %s is not defined in GlobalFunctions for document scope %s' % (function_name, scope))
76 | if function_name == 'calculate_duration':
77 | if 'functionArg' not in args or not args['functionArg']:
78 | self.logging.error('FlexT function "calculate_duration" requires the field name to base value on')
79 | elif args['functionArg'] not in args['currentRow']:
80 | self.logging.error('FlexT function "calculate_duration": {} not in {}'.format(args['functionArg'], list(args['currentRow'].keys())))
81 | elif args['currentRow'][args['functionArg']]['ParsedValue']:
82 | duration_val = arrow.get(args['currentRow'][args['functionArg']]['ParsedValue']).timestamp - arrow.utcnow().timestamp
83 | if duration_val < 0:
84 | return "0"
85 | return str(duration_val)
86 | elif function_name == 'now':
87 | if 'dateTimeFormat' in args['fieldDict']:
88 | if args['fieldDict']['dateTimeFormat'] == 'unixtime':
89 | value = str(arrow.utcnow().timestamp)
90 | else:
91 | value = arrow.utcnow().format(args['fieldDict']['dateTimeFormat'])
92 | # TODO - Handle case of no 'dateTimeFormat'
93 | elif function_name == 'countOfIndicators':
94 | if 'IndicatorData' in args['transformedData']:
95 | value = str(len(args['transformedData']['IndicatorData']))
96 | else:
97 | value = '0'
98 |
99 | elif function_name == 'generate_uuid':
100 | value = str(uuid.uuid4())
101 |
102 | elif function_name == 'mbl_sourcetype':
103 | if ('ipv4' in args['currentRow'] and args['currentRow']['ipv4']['Value']) or \
104 | ('ipv6' in args['currentRow'] and args['currentRow']['ipv6']['Value']) or \
105 | ('fqdn' in args['currentRow'] and args['currentRow']['fqdn']['Value']):
106 | value = 'block'
107 | elif ('envelopeaddress' in args['currentRow'] and args['currentRow']['envelopeaddress']['Value']) or \
108 | ('subject' in args['currentRow'] and args['currentRow']['subject']['Value']) or \
109 | ('xheader' in args['currentRow'] and args['currentRow']['xheader']['Value']):
110 | value = 'spearphish'
111 | elif ('sizeOnDisk' in args['currentRow'] and args['currentRow']['sizeOnDisk']['Value']) or \
112 | ('compileTime' in args['currentRow'] and args['currentRow']['compileTime']['Value']) or \
113 | ('md5hash' in args['currentRow'] and args['currentRow']['md5hash']['Value']) or \
114 | ('sha1hash' in args['currentRow'] and args['currentRow']['sha1hash']['Value']) or \
115 | ('sha256hash' in args['currentRow'] and args['currentRow']['sha256hash']['Value']):
116 | value = 'malware'
117 | return value
118 |
--------------------------------------------------------------------------------
/FlexTransform/SchemaParser/TransformFunctions/STIXFunctions.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Mar 13, 2015
3 |
4 | @author: ahoying
5 | '''
6 |
7 | import logging
8 |
9 | import arrow
10 |
11 | from FlexTransform.SchemaParser.TransformFunctions import TransformFunctionManager
12 |
13 |
14 | class STIXFunctions(object):
15 | '''
16 | Contains Transform functions that multiple schemas utilize
17 | '''
18 |
19 | '''
20 | The _FunctionNames dictionary should contain each function name understood by this class for with a scope of indicator data or header data
21 | mapped to a list with required fields to be passed in the args dictionary, or None if no args are required.
22 |
23 | Allowed fields for the Args dictionary:
24 |
25 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser.
26 | This is the value of the string between the () in the function name in the .json schema configuration files
27 |
28 | fieldName - Required - The name of the current field
29 |
30 | fieldDict - Required - The field dictionary for the current field getting transformed
31 |
32 | currentRow - Optional - The transformed data and associated field dictionaries for the currently processed row
33 |
34 | indicatorType - Optional - The indicator type for the current row
35 |
36 | transformedData - Optional - The dictionary of all current transformed data
37 |
38 | '''
39 |
40 | __FunctionNames = {
41 | 'DocumentHeaderData': {
42 | 'stix_now': ['fieldDict']
43 | },
44 | 'IndicatorData': {
45 | 'stix_now': ['fieldDict']
46 | }
47 | }
48 |
49 | def __init__(self):
50 | '''
51 | Constructor
52 | '''
53 | self.logging = logging.getLogger('FlexTransform.SchemaParser.STIXFunctions')
54 |
55 | @classmethod
56 | def RegisterFunctions(cls):
57 | for Scope, Functions in cls.__FunctionNames.items() :
58 | for FunctionName, RequiredArgs in Functions.items() :
59 | TransformFunctionManager.register_function(Scope, FunctionName, RequiredArgs, 'STIXFunctions')
60 |
61 | def Execute(self, Scope, function_name, args):
62 | '''
63 | Execute the specific called function with the supplied args
64 | '''
65 |
66 | value = None
67 |
68 | if function_name not in self.__FunctionNames[Scope] :
69 | raise Exception('FunctionNotDefined',
70 | 'Function %s is not defined in STIXFunctions for document scope %s' % (function_name, Scope))
71 | if function_name == 'stix_now':
72 | if 'dateTimeFormat' in args['fieldDict']:
73 | value = arrow.utcnow().format(args['fieldDict']['dateTimeFormat'])
74 | # self.logging.info("Called stix now")
75 | # match = re.match(r"(.*)([+-]\d\d)(\d\d)$", value)
76 | # if match:
77 | # value = match.group(1) + match.group(2) + ":" + match.group(3)
78 | return value
79 |
--------------------------------------------------------------------------------
/FlexTransform/SchemaParser/TransformFunctions/TransformFunctionManager.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Mar 13, 2015
3 |
4 | @author: ahoying
5 | '''
6 |
7 | import inspect
8 | import logging
9 | from collections import defaultdict
10 |
11 | import FlexTransform.SchemaParser.TransformFunctions
12 |
13 |
14 | class TransformFunctionManager(object):
15 |
16 | __KnownFunctions = defaultdict(dict)
17 |
18 | def __init__(self, trace, trace_list=[]):
19 | self.logging = logging.getLogger('FlexTransform.SchemaParser.TransformFunctionManager')
20 | self.trace = trace
21 | self.trace_list = trace_list
22 | if self.trace:
23 | self.logging.debug("Initialized DictionaryParser with tracelist of {} elements.".format(len(trace_list)))
24 |
25 | self._FunctionClasses = {}
26 |
27 | @classmethod
28 | def register_function(cls, scope, function_name, required_args, function_class):
29 | cls.__KnownFunctions[scope][function_name] = {'class': function_class, 'RequiredArgs': required_args}
30 |
31 | @classmethod
32 | def get_function_class(cls, scope, function_name):
33 | if scope in cls.__KnownFunctions and function_name in cls.__KnownFunctions[scope]:
34 | class_name = cls.__KnownFunctions[scope][function_name]['class']
35 | else:
36 | raise Exception(
37 | 'FunctionNotRegistered',
38 | "The function %s is not registered with the TransformFunctionManager for scope %s" % (function_name,
39 | scope))
40 |
41 | for name, obj in inspect.getmembers(FlexTransform.SchemaParser.TransformFunctions, inspect.isclass):
42 | if name == class_name:
43 | return obj();
44 |
45 | raise Exception(
46 | 'FunctionClassNotFound',
47 | "The Class %s for function %s was not found by the TransformFunctionManager" % (class_name, function_name))
48 |
49 | def get_function_scope(self, scope, function_name):
50 | if scope in self.__KnownFunctions and function_name in self.__KnownFunctions[scope]:
51 | return True
52 | else:
53 | return False
54 |
55 | def execute_transform_function(self, scope, function_name, args):
56 | if function_name in self._FunctionClasses:
57 | function_class = self._FunctionClasses[function_name]
58 | else:
59 | function_class = TransformFunctionManager.get_function_class(scope, function_name)
60 | self._FunctionClasses[function_name] = function_class
61 |
62 | self._validate_args(scope, function_name, args)
63 | return function_class.Execute(scope, function_name, args)
64 |
65 | def _validate_args(self, scope, function_name, args):
66 | '''
67 | Allowed fields for the Args dictionary:
68 |
69 | functionArg - Optional - Any arguments passed to the function when it is called from SchemaParser.
70 | This is the value of the string between the () in the function name in the
71 | .json schema configuration files
72 |
73 | fieldName - Required - The name of the current field
74 |
75 | fieldDict - Required - The field dictionary for the current field getting transformed
76 |
77 | currentRow - Optional - The transformed data and associated field dictionaries for the currently processed row
78 |
79 | indicatorType - Optional - The indicator type for the current row
80 |
81 | transformedData - Optional - The dictionary of all current transformed data
82 |
83 | '''
84 | allowed_fields =set(['functionArg', 'fieldName', 'fieldDict', 'currentRow', 'indicatorType', 'transformedData'])
85 | required_fields =set(['fieldName', 'fieldDict'])
86 |
87 | if isinstance(args, dict):
88 | for arg in args:
89 | if arg not in allowed_fields:
90 | self.logging.warning('A argument passed to function %s is not allowed: %s' % (function_name, arg))
91 | else:
92 | raise Exception(
93 | 'InvalidArgs',
94 | 'The arguments passed to function %s are not defined or not in dictionary format' % function_name)
95 |
96 | if self.__KnownFunctions[scope][function_name]['RequiredArgs'] is not None:
97 | required_fields.update(self.__KnownFunctions[scope][function_name]['RequiredArgs'])
98 |
99 | for arg in required_fields:
100 | if arg not in args or args[arg] is None:
101 | raise Exception(
102 | 'InvalidArgs',
103 | 'Function %s args did not include the required %s field, could not process' % (function_name, arg))
104 |
--------------------------------------------------------------------------------
/FlexTransform/SchemaParser/TransformFunctions/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Mar 13, 2015
3 |
4 | @author: ahoying
5 | '''
6 |
7 | import FlexTransform.SchemaParser
8 |
9 | from .TransformFunctionManager import TransformFunctionManager
10 | from .GlobalFunctions import GlobalFunctions
11 | from .CFM13Functions import CFM13Functions
12 | from .CFM20Functions import CFM20Functions
13 | from .STIXFunctions import STIXFunctions
14 |
15 | GlobalFunctions.RegisterFunctions()
--------------------------------------------------------------------------------
/FlexTransform/SchemaParser/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Oct 13, 2014
3 |
4 | @author: ahoying
5 | '''
6 |
7 | from .SchemaParser import SchemaParser
--------------------------------------------------------------------------------
/FlexTransform/SyntaxParser/CSVParser.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Aug 13, 2015
3 |
4 | @author: ahoying
5 | '''
6 |
7 | import logging
8 | import csv
9 | import os
10 | from builtins import str
11 | from FlexTransform.SyntaxParser.Parser import Parser
12 |
13 | class CSVParser(Parser):
14 | '''
15 | Key/Value Syntax Parser
16 | '''
17 |
18 | def __init__(self, trace, tracelist=[]):
19 | '''
20 | Constructor
21 | '''
22 | super(CSVParser, self).__init__(tracelist)
23 |
24 | self.ParsedData = {}
25 | self.logging = logging.getLogger('FlexTransform.CSVParser')
26 | self.trace = trace
27 | self.tracelist = tracelist
28 | if self.trace:
29 | self.logging.debug("Initialized CSVParser with tracelist of {} elements.".format(len(tracelist)))
30 |
31 | self.Fields = []
32 | self.Delimiter = ','
33 | self.QuoteChar = '"'
34 | self.EscapeChar = None
35 | self.HeaderLine = False
36 | self.DoubleQuote = True
37 | self.QuoteStyle = csv.QUOTE_MINIMAL
38 | self.LineTerminator = '\r\n'
39 |
40 | def ValidateConfig(self, config):
41 | '''
42 | Validate Dictionary Parser specific configuration options
43 |
44 | The indicatorsKey sets the key in the json document that contains the indicators, or "" if the root of the document contains the indicators
45 | '''
46 | if config.has_section('CSV'):
47 | if config.has_option('CSV', 'Fields'):
48 | FieldsList = config['CSV']['Fields']
49 | for field in FieldsList.split(','):
50 | self.Fields.append(field.strip())
51 | else:
52 | raise Exception("ConfigError", "CSV Configuration does not include the required Fields key")
53 |
54 | if config.has_option('CSV', 'Delimiter'):
55 | self.Delimiter = bytes(config['CSV']['Delimiter'], "utf-8").decode("unicode_escape").strip("\"'")
56 |
57 | if config.has_option('CSV', 'QuoteChar'):
58 | self.QuoteChar = bytes(config['CSV']['QuoteChar'], "utf-8").decode("unicode_escape")
59 |
60 | if config.has_option('CSV', 'EscapeChar'):
61 | self.EscapeChar = bytes(config['CSV']['EscapeChar'], "utf-8").decode("unicode_escape")
62 |
63 | if config.has_option('CSV', 'HeaderLine'):
64 | self.HeaderLine = config.getboolean('CSV', 'HeaderLine', fallback=False)
65 |
66 | if config.has_option('CSV', 'DoubleQuote'):
67 | self.DoubleQuote = config.getboolean('CSV', 'DoubleQuote', fallback=True)
68 |
69 | if config.has_option('CSV', 'QuoteStyle'):
70 | if config['CSV']['QuoteStyle'].lower() == 'none':
71 | self.QuoteStyle = csv.QUOTE_NONE
72 | elif config['CSV']['QuoteStyle'].lower() == 'nonnumeric':
73 | self.QuoteStyle = csv.QUOTE_NONNUMERIC
74 | elif config['CSV']['QuoteStyle'].lower() == 'all':
75 | self.QuoteStyle = csv.QUOTE_ALL
76 | elif config['CSV']['QuoteStyle'].lower() == 'minimal':
77 | self.QuoteStyle = csv.QUOTE_MINIMAL
78 | else:
79 | raise Exception("ConfigError", "Unknown option for CSV QuoteStyle: " + config['CSV']['QuoteStyle'])
80 |
81 | if config.has_option('CSV', 'LineTerminator'):
82 | self.LineTerminator = bytes(config['CSV']['LineTerminator'], "utf-8").decode("unicode_escape")
83 |
84 | def Read(self, file, config):
85 | '''
86 | Read file and parse into Transform object
87 | '''
88 |
89 | super(CSVParser, self).Read(file, config)
90 |
91 | self.ParsedData = {
92 | "IndicatorData": []
93 | }
94 | position = {}
95 |
96 | for idx, field in enumerate(self.Fields):
97 | position[idx] = field
98 |
99 | content = csv.reader(file, delimiter=self.Delimiter)
100 | for line in content:
101 | to_add = {}
102 | for idx, record in enumerate(line):
103 | record = record.strip("\"'" + self.LineTerminator)
104 | if record:
105 | to_add.update({position[idx]: record})
106 | self.ParsedData["IndicatorData"].append(to_add)
107 |
108 | return self.ParsedData
109 |
110 | def Finalize(self, MappedData):
111 | '''
112 | Finalize the formatting of the data before being returned to the caller
113 | '''
114 |
115 | if 'IndicatorData' not in MappedData or MappedData['IndicatorData'].__len__() == 0:
116 | raise Exception('NoIndicatorData', 'Transformed data has no indicators, nothing to write')
117 |
118 | FinalizedData = []
119 | for indicator in MappedData['IndicatorData']:
120 | DataRow = {}
121 | # Keep passing the IndicatorType forward with the data. This is somewhat messy, but that way we can use it on write
122 | # DataRow['IndicatorType'] = indicator['IndicatorType']
123 |
124 | for field in self.Fields:
125 | if field not in indicator:
126 | self.logging.warning("Field %s does not exist in IndicatorData", field)
127 | DataRow[field] = ''
128 | elif 'Value' in indicator[field]:
129 | DataRow[field] = indicator[field]['Value']
130 | else:
131 | self.logging.warning("Field %s does not contain a Value entry", field)
132 | DataRow[field] = ''
133 |
134 | FinalizedData.append(DataRow)
135 |
136 | return FinalizedData
137 |
138 | def Write(self, file, FinalizedData):
139 | '''
140 | Write the data as csv to the file.
141 | '''
142 | if isinstance(file, str):
143 | if os.path.exists(file):
144 | file = open(file, "w")
145 | else:
146 | self.logging.error("%s is not a valid filepath", file)
147 |
148 | csv.register_dialect('flext',
149 | delimiter=self.Delimiter,
150 | quotechar=self.QuoteChar,
151 | escapechar=self.EscapeChar,
152 | doublequote=self.DoubleQuote,
153 | lineterminator=self.LineTerminator,
154 | quoting=self.QuoteStyle)
155 |
156 | writer = csv.DictWriter(file, fieldnames=self.Fields, dialect='flext')
157 |
158 | if self.HeaderLine:
159 | writer.writeheader()
160 |
161 | writer.writerows(FinalizedData)
162 |
163 |
--------------------------------------------------------------------------------
/FlexTransform/SyntaxParser/DICTParsers/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Aug 26, 2015
3 |
4 | @author: ahoying
5 | '''
6 |
7 | from .iSightReport import iSightReport
8 |
--------------------------------------------------------------------------------
/FlexTransform/SyntaxParser/DICTParsers/iSightReport.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Aug 26, 2015
3 |
4 | @author: ahoying
5 | '''
6 |
7 | import json
8 | import logging
9 |
10 |
11 | class iSightReport(object):
12 | '''
13 | Parser for iSight JSON Reports
14 | '''
15 |
16 | def __init__(self, trace, tracelist):
17 | '''
18 | Constructor
19 | '''
20 | self.logging = logging.getLogger('FlexTransform.DICTParser.iSightReports')
21 | self.trace = trace
22 | self.tracelist = tracelist
23 |
24 | def Read(self, reportFile):
25 | '''
26 | Read in JSON report file and process into indicators and header data
27 | '''
28 |
29 | jsondoc = json.load(reportFile)
30 |
31 | if "success" in jsondoc and jsondoc["success"] == True:
32 |
33 | if "message" in jsondoc and "report" in jsondoc["message"]:
34 | Report = jsondoc["message"]["report"]
35 |
36 | indicators = []
37 |
38 | if "tagSection" in Report:
39 | indicators = self._extractIndicators(Report.pop("tagSection"))
40 |
41 | if len(indicators) == 0:
42 | raise Exception("NoData", "iSight JSON document did not contain any indicators")
43 |
44 | ParsedData = {}
45 | ParsedData['IndicatorData'] = indicators
46 | ParsedData['DocumentHeaderData'] = Report
47 |
48 | else:
49 | raise Exception("NoData", "iSight JSON document did not contain a report")
50 |
51 | else:
52 | raise Exception("Unparsable", "iSight JSON document could not be parsed, success field not defined or not True")
53 |
54 | return ParsedData
55 |
56 | def Write(self, reportFile, FinalizedData):
57 | raise Exception("MethodNotDefined", "Write")
58 |
59 | def _extractIndicators(self,tagSection):
60 |
61 | indicators = []
62 |
63 | for indicatorType in tagSection:
64 | if indicatorType == "main":
65 | # TODO, extract TTP and other targetting data from the main tag
66 | continue
67 | if indicatorType == "networks":
68 | networkList = tagSection["networks"].pop("network")
69 |
70 | if isinstance(networkList, list):
71 | for network in networkList:
72 | # Fix for error in iSight JSON generation that appends a .0 to the end of the asn numbers
73 | if "asn" in network and network["asn"].endswith(".0"):
74 | network["asn"] = network["asn"].replace(".0", "")
75 | indicators.append(network)
76 | else:
77 | indicators.append(networkList)
78 | if indicatorType == "emails":
79 | emailList = tagSection["emails"].pop("email")
80 |
81 | if isinstance(emailList, list):
82 | for email in emailList:
83 | indicators.append(email)
84 | else:
85 | indicators.append(emailList)
86 | if indicatorType == "files":
87 | fileList = tagSection["files"].pop("file")
88 |
89 | if isinstance(fileList, list):
90 | for file in fileList:
91 | indicators.append(file)
92 | else:
93 | indicators.append(fileList)
94 |
95 | return indicators
96 |
--------------------------------------------------------------------------------
/FlexTransform/SyntaxParser/DictionaryParser.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Nov 17, 2014
3 |
4 | @author: ahoying
5 | '''
6 |
7 | import inspect
8 | import json
9 | import logging
10 |
11 | import FlexTransform.SyntaxParser.DICTParsers
12 | from FlexTransform.SyntaxParser.Parser import Parser
13 |
14 |
15 | class DictionaryParser(Parser):
16 | '''
17 | Key/Value Syntax Parser
18 | '''
19 |
20 | def __init__(self, trace, tracelist=[]):
21 | '''
22 | Constructor
23 | '''
24 |
25 | super(DictionaryParser, self).__init__(trace, tracelist)
26 | self.ParsedData = {}
27 | self.logging = logging.getLogger('FlexTransform.DictionaryParser')
28 | self.trace = trace
29 | self.tracelist = tracelist
30 | if self.trace:
31 | self.logging.debug("Initialized DictionaryParser with tracelist of {} elements.".format(len(tracelist)))
32 |
33 | self.indicatorsKey = ""
34 | self.AdvancedParser = None
35 |
36 | def LoadAdvancedParser(self, CustomParser):
37 | '''
38 | Returns the Custom Parser Class from the configuration file if it exists
39 | '''
40 | for name, obj in inspect.getmembers(FlexTransform.SyntaxParser.DICTParsers, inspect.isclass):
41 | if name == CustomParser:
42 | return obj();
43 |
44 | def ValidateConfig(self, config):
45 | '''
46 | Validate Dictionary Parser specific configuration options
47 |
48 | The indicatorsKey sets the key in the json document that contains the indicators, or "" if the root of the document contains the indicators
49 | '''
50 | if config.has_section('DICT'):
51 | if config.has_option('DICT', 'IndicatorsKey'):
52 | self.indicatorsKey = config['DICT']['IndicatorsKey']
53 |
54 | if config.has_option('DICT', 'CustomParser'):
55 | CustomParser = config['DICT']['CustomParser']
56 | self.AdvancedParser = self.LoadAdvancedParser(CustomParser)
57 | if self.AdvancedParser is None:
58 | raise Exception('CustomParserNotDefined', 'DICT: ' + CustomParser)
59 |
60 | if config.has_section(CustomParser):
61 | self.AdvancedParser.ValidateConfig(config)
62 |
63 | def Read(self,file,config):
64 | '''
65 | Read file and parse into Transform object
66 | '''
67 |
68 | self.ParsedData = {}
69 |
70 | super(DictionaryParser, self).Read(file, config)
71 |
72 | if self.AdvancedParser:
73 | self.ParsedData = self.AdvancedParser.Read(file)
74 | else:
75 | jsondoc = json.load(file)
76 |
77 | if self.indicatorsKey != "":
78 | if self.indicatorsKey in jsondoc:
79 | indicators = jsondoc.pop(self.indicatorsKey)
80 | self.ParsedData['IndicatorData'] = []
81 |
82 | if isinstance(indicators, list):
83 | for indicator in indicators:
84 | if isinstance(indicator, dict):
85 | self.ParsedData['IndicatorData'].append(indicator)
86 | else:
87 | raise Exception('WrongType', 'Indicator type is not a dictionary: ' + indicator)
88 |
89 | elif isinstance(indicators, dict):
90 | self.ParsedData['IndicatorData'].append(indicators)
91 |
92 | else:
93 | raise Exception('WrongType', 'Indicator type is not a list or dictionary: ' + indicators)
94 |
95 | # Everything else in the document is considered to be header data
96 | if len(jsondoc):
97 | self.ParsedData['DocumentHeaderData'] = jsondoc
98 |
99 | else:
100 | raise Exception('NoIndicatorData', 'Defined indicator key, ' + self.indicatorsKey + ', does not exist in source file')
101 |
102 | else:
103 | raise Exception('NotYetImplemented', 'Paring json dictionaries without an indicatorsKey is not currently supported')
104 |
105 | return self.ParsedData
106 |
107 | def Finalize(self, MappedData):
108 | '''
109 | Finalize the formatting of the data before being returned to the caller
110 | '''
111 |
112 | if 'IndicatorData' not in MappedData or MappedData['IndicatorData'].__len__() == 0:
113 | raise Exception('NoIndicatorData', 'Transformed data has no indicators, nothing to write')
114 |
115 | return self._MappedDataToDict(MappedData)
116 |
117 | def Write(self, file, FinalizedData):
118 | '''
119 | Write the data as json to the file.
120 | '''
121 |
122 | if self.AdvancedParser:
123 | self.AdvancedParser.Write(file, FinalizedData)
124 | else:
125 | json.dump(FinalizedData, file, sort_keys=True, indent=4)
126 |
127 | def _MappedDataToDict(self, MappedData):
128 | '''
129 | Take the Transformed data object, and rebuild the dictionary for the XML parser from the schema data
130 | '''
131 | ParsedData = []
132 |
133 | for rowType in MappedData:
134 | if isinstance(MappedData[rowType],list):
135 | for row in MappedData[rowType]:
136 | if isinstance(row,dict):
137 | DataRow = self._BuildDictRow(row)
138 | ParsedData.append(DataRow)
139 | else:
140 | raise Exception('NoParsableDataFound', "Data isn't in a parsable dictionary format")
141 | elif isinstance(MappedData[rowType],dict):
142 | DataRow = self._BuildDictRow(MappedData[rowType])
143 | ParsedData.append(DataRow)
144 | else:
145 | raise Exception('NoParsableDataFound', "Data isn't in a parsable dictionary format")
146 |
147 | return ParsedData
148 |
149 | def _BuildDictRow(self, row):
150 | '''
151 | Take a row from the MappedData object and return an unflattened dictionary for passing to dict_to_etree
152 | '''
153 | DataRow = {}
154 |
155 | for k, v in row.items():
156 | if k == 'IndicatorType':
157 | # Keep passing the IndicatorType forward with the data. This is somewhat messy, but that way we can use it on write
158 | pass
159 | elif 'Value' in v:
160 | if 'valuemap' in v:
161 | DataRow[v['valuemap']] = v['Value']
162 | else:
163 | DataRow[k] = v['Value']
164 | else:
165 | self.logging.warning("Field %s does not contain a Value entry", k)
166 |
167 | return DataRow
168 |
--------------------------------------------------------------------------------
/FlexTransform/SyntaxParser/KVParser.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Oct 15, 2014
3 |
4 | @author: ahoying
5 | """
6 |
7 | import re
8 | import logging
9 | import os
10 | from FlexTransform.SyntaxParser.Parser import Parser
11 |
12 |
13 | class KVParser(Parser):
14 | """
15 | Key/Value Syntax Parser
16 | """
17 |
18 | def __init__(self, trace, tracelist=[]):
19 | """
20 | Constructor
21 | """
22 | super(KVParser, self).__init__(trace, tracelist)
23 | self.SeparatorChar = r"\s"
24 | self.QuoteChar = r"[']"
25 | self.KVSeparator = r"[=]"
26 |
27 | self.ParsedData = {}
28 |
29 | self.logging = logging.getLogger('FlexTransform.KVParser')
30 | self.trace = trace
31 | self.tracelist = tracelist
32 | if self.trace:
33 | self.logging.debug("Initialized KVParser with tracelist of {} elements.".format(len(tracelist)))
34 |
35 | def ValidateConfig(self,config):
36 | """
37 | Validate KV Parser specific configuration options
38 | """
39 | if config.has_section('KEYVALUE'):
40 | if config.has_option('KEYVALUE', 'SeparatorChar'):
41 | self.SeparatorChar = config['KEYVALUE']['SeparatorChar']
42 | if config.has_option('KEYVALUE', 'QuoteChar'):
43 | self.QuoteChar = config['KEYVALUE']['QuoteChar']
44 | if config.has_option('KEYVALUE', 'KVSeparator'):
45 | self.KVSeparator = config['KEYVALUE']['KVSeparator']
46 |
47 | def Read(self, file, config):
48 | """
49 | Read file and parse into Transform object
50 | """
51 | self.ParsedData = {}
52 |
53 | super(KVParser, self).Read(file, config)
54 |
55 | # TODO: Make it clearer what I'm doing here
56 | KVRegex = re.compile(
57 | "([^"+self.KVSeparator.strip("[]")+"]+)"+
58 | self.KVSeparator+"("+self.QuoteChar+"[^"+self.QuoteChar.strip("[]")+"]+"+self.QuoteChar+
59 | "|[^"+self.SeparatorChar.strip("[]")+"]+)(?:"+self.SeparatorChar+"|$)")
60 |
61 | self.ParsedData['IndicatorData'] = []
62 |
63 | for line in file:
64 | try:
65 | if isinstance(line, bytes):
66 | line = line.decode('UTF-8')
67 |
68 | match = KVRegex.findall(line)
69 | DataRow = dict(match)
70 |
71 | if self.QuoteChar:
72 | for k, v in DataRow.items():
73 | DataRow[k] = v.strip(self.QuoteChar.strip("[]"))
74 |
75 | self.ParsedData['IndicatorData'].append(DataRow)
76 | except:
77 | self.logging.warning("Line could not be parsed: %s", line)
78 |
79 | return self.ParsedData
80 |
81 | def Finalize(self, MappedData):
82 | """
83 | Finalize the formatting of the data before being returned to the caller
84 | """
85 |
86 | if 'IndicatorData' not in MappedData or MappedData['IndicatorData'].__len__() == 0:
87 | raise Exception('NoIndicatorData', 'Transformed data has no indicators, nothing to write')
88 |
89 | FinalizedData = []
90 | for row in MappedData['IndicatorData']:
91 |
92 | indicatorRow = []
93 | # Keep passing the IndicatorType forward with the data. This is somewhat messy,
94 | # but that way we can use it on write
95 | # DataRow['IndicatorType'] = indicator['IndicatorType']
96 |
97 | for field in row:
98 | DataRow = {}
99 | if 'Value' in row[field]:
100 | if 'datatype' in row[field]:
101 | if row[field]['datatype'] == 'enum' or row[field]['datatype'] == 'string':
102 | DataRow[field] = self.QuoteChar.strip("[]") + row[field]['Value'] + self.QuoteChar.strip("[]")
103 | else:
104 | DataRow[field] = row[field]['Value']
105 | else:
106 | DataRow[field] = row[field]['Value']
107 | indicatorRow.append(DataRow)
108 | else:
109 | if field == 'IndicatorType':
110 | self.logging.info("Field IndicatorType does not contain a Value entry")
111 | else:
112 | self.logging.warning("Field %s does not contain a Value entry", field)
113 |
114 | FinalizedData.append(indicatorRow)
115 | return FinalizedData
116 |
117 | def Write(self, file, FinalizedData):
118 | """
119 | Write the data as csv to the file.
120 | """
121 | if isinstance(file, str):
122 | if os.path.exists(file):
123 | file = open(file, "w")
124 | else:
125 | self.logging.error("%s is not a valid filepath", file)
126 |
127 | if self.SeparatorChar == r"\s":
128 | separator = " "
129 | else:
130 | separator = self.SeparatorChar
131 |
132 | toWrite = ""
133 | for indicator in FinalizedData:
134 | for row in indicator:
135 | for key, value in row.items():
136 | if value:
137 | toWrite += key + self.KVSeparator.strip("[]") + value + separator
138 | toWrite = toWrite[:-1]
139 | toWrite += '\n'
140 | file.write(toWrite)
141 |
--------------------------------------------------------------------------------
/FlexTransform/SyntaxParser/Parser.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Jul 28, 2014
3 |
4 | @author: ahoying
5 | '''
6 |
7 | import inspect
8 | import logging
9 |
10 | import FlexTransform.SyntaxParser
11 |
12 | ''' Debugging only '''
13 |
14 |
15 | class Parser(object):
16 | '''
17 | Base class for Syntax Parsers
18 |
19 | Implements class methods for finding and loading the appropriate parser based on the configuration file
20 | '''
21 |
22 | # Dictionary of loaded Parser classes
23 | __KnownParsers = {}
24 |
25 | def __init__(self, trace, tracelist=[]):
26 | '''
27 | Constructor
28 | '''
29 | self.logging = logging.getLogger('FlexTransform.Parser')
30 | self.trace = trace
31 | self.tracelist = tracelist
32 | self.traceindex = {}
33 | if self.trace:
34 | for x in self.tracelist:
35 | for v in x["src_fields"]:
36 | self.traceindex[v] = x
37 | for y in x["dst_fields"]:
38 | self.traceindex[y] = x
39 | for w in x["src_IRIs"]:
40 | self.traceindex[w] = x
41 | for z in x["dst_IRIs"]:
42 | self.traceindex[z] = x
43 | self.logging.debug("Initialized Parser with tracelist of {} elements.".format(len(tracelist)))
44 |
45 | @classmethod
46 | def UpdateKnownParsers(cls, ParserName, ParserClass):
47 | cls.__KnownParsers[ParserName] = ParserClass;
48 |
49 | @classmethod
50 | def GetParsers(cls):
51 | return cls.__KnownParsers
52 |
53 | @classmethod
54 | def GetParser(cls, ParserName, trace, tracelist=[]):
55 | for name, obj in inspect.getmembers(FlexTransform.SyntaxParser, inspect.isclass):
56 | if name == ParserName:
57 | return obj(trace, tracelist=tracelist);
58 |
59 | # Virtual methods that must be implemented in child classes
60 |
61 | def ValidateConfig(self,config):
62 | '''
63 | Base validation method, must be implemented in subclasses
64 | '''
65 | raise Exception("MethodNotDefined","ValidateConfig")
66 |
67 | def Read(self,file,configurationfile):
68 | '''
69 | Base document read method, must be implemented in subclasses
70 | TODO: need proper subclassing: All subclasses should call this Read method as well, as it contains
71 | code common to all parsers.
72 | '''
73 |
74 | ''' Ensure the derived data is available to all parsers, e.g. to extract information from the file
75 | name or metadata
76 | '''
77 | self.ParsedData = {}
78 | if 'DerivedData' in configurationfile.SchemaConfig:
79 | self.ParsedData['DerivedData'] = {}
80 | for field in configurationfile.SchemaConfig['DerivedData']['fields']:
81 | if 'value' in configurationfile.SchemaConfig['DerivedData']['fields'][field] and configurationfile.SchemaConfig['DerivedData']['fields'][field]['value']:
82 | self.ParsedData['DerivedData'][field] = configurationfile.SchemaConfig['DerivedData']['fields'][field]['value']
83 | if self.trace and field in self.traceindex:
84 | self.logging.debug("[TRACE {}]: Read: value {} copied to ParsedData['DerivedData'] from SchemaConfig".format(field, self.ParsedData['DerivedData'][field]))
85 |
86 | def Write(self, file, FinalizedData):
87 | '''
88 | Base document write method, must be implemented in subclasses
89 | '''
90 | raise Exception("MethodNotDefined", "Write")
91 |
92 | def Finalize(self,data):
93 | '''
94 | Base document finalize method, must be implemented in subclasses
95 | '''
96 | raise Exception("MethodNotDefined", "Finalize")
--------------------------------------------------------------------------------
/FlexTransform/SyntaxParser/XMLParsers/.gitignore:
--------------------------------------------------------------------------------
1 | /__pycache__/
2 |
--------------------------------------------------------------------------------
/FlexTransform/SyntaxParser/XMLParsers/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Jul 27, 2014
3 |
4 | @author: ahoying
5 | '''
6 |
7 | #from ..XMLParser import XMLParser
8 | from .CFM13 import CFM13
9 | from .CFM20Alert import CFM20Alert
10 | from .STIX import STIX
11 |
--------------------------------------------------------------------------------
/FlexTransform/SyntaxParser/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Jul 27, 2014
3 |
4 | @author: ahoying
5 | '''
6 |
7 | from .CSVParser import CSVParser
8 | from .DictionaryParser import DictionaryParser
9 | from .KVParser import KVParser
10 | from .Parser import Parser
11 | from .XMLParser import XMLParser
12 |
13 | # Map Parser types to Parser class names
14 | Parser.UpdateKnownParsers('XML', 'XMLParser')
15 | Parser.UpdateKnownParsers('KEYVALUE', 'KVParser')
16 | Parser.UpdateKnownParsers('DICT', 'DictionaryParser')
17 | Parser.UpdateKnownParsers('CSV', 'CSVParser')
18 |
--------------------------------------------------------------------------------
/FlexTransform/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Jul 27, 2014
3 |
4 | @author: ahoying
5 | '''
6 |
7 | from FlexTransform import FlexTransform
--------------------------------------------------------------------------------
/FlexTransform/resources/cybox.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/cybox.zip
--------------------------------------------------------------------------------
/FlexTransform/resources/images/dev-figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/dev-figure1.png
--------------------------------------------------------------------------------
/FlexTransform/resources/images/figure1a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/figure1a.png
--------------------------------------------------------------------------------
/FlexTransform/resources/images/figure1b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/figure1b.png
--------------------------------------------------------------------------------
/FlexTransform/resources/images/figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/figure2.png
--------------------------------------------------------------------------------
/FlexTransform/resources/images/figure3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/figure3.png
--------------------------------------------------------------------------------
/FlexTransform/resources/images/figure4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/images/figure4.png
--------------------------------------------------------------------------------
/FlexTransform/resources/ramrod.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/ramrod.zip
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/MBL.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE
3 | FileParser = DICT
4 |
5 | # KEYVALUE Options
6 | [KEYVALUE]
7 | # SeparatorChar = ' '
8 | QuoteChar = [']
9 | KVSeparator = [=]
10 |
11 | # DICT Options
12 | [DICT]
13 | IndicatorsKey = indicators
14 |
15 | # SCHEMA Definition
16 |
17 | [SCHEMA]
18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
19 | PrimarySchemaConfiguration = resources/schemaDefinitions/MBL.json
20 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition
21 | # If multiple files are required, separate them with ;
22 | #SiteSchemaConfiguration =
23 |
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/cfm13.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE, DICT
3 | FileParser = XML
4 |
5 | # XML Options
6 | [XML]
7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema
8 | ValidateSchema = no
9 | # SchemaFile must be set if ValidateSchema is yes
10 | # SchemaFile = resources/schemas/CFMMessage13.xsd
11 | # CustomParser is used to load custom XML parsing classes for complex XML documents
12 | CustomParser = CFM13
13 |
14 | # JSON Options (none currently defined)
15 | [JSON]
16 |
17 | # CSV Options
18 | [CSV]
19 | # HeaderLine defines if the CSV file has the field names in the first non-commented row of the file
20 | # HeaderLine defaults to no
21 | HeaderLine = no
22 | # QuotesOptional defines if every field in the CSV file has to be enclosed in quotes.
23 | # QuotesOptional defaults to yes
24 | QuotesOptional = no
25 | # SeparatorChar defines the charactor or charactors that seperate the fields in the file.
26 | # SeparatorChar defaults to ,
27 | SeparatorChar = ,
28 | # StripSpaces defines if spaces before or after the separator should be striped
29 | # StripSpaces defaults to yes
30 | StripSpaces = yes
31 | # FieldNames have to be defined if HeaderLine is set to no. The FieldNames map to the SCHEMA definition below
32 | FieldNames = ip,host,etc
33 |
34 | # TSV Options
35 | [TSV]
36 | HeaderLine = no
37 | FieldNames = ip,host,etc
38 |
39 | # REGEX Options
40 | [REGEX]
41 | Regex = (\S*)\s+(\S*)\s+\d+\s+([0-9.]+)
42 | FieldNames = host,service,ip
43 |
44 | # KEYVALUE Options
45 | [KEYVALUE]
46 | SeparatorChar = \s
47 | QuoteChar = [']
48 | KVSeparator = [=]
49 |
50 | # SCHEMA Definition
51 |
52 | [SCHEMA]
53 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
54 | PrimarySchemaConfiguration = resources/schemaDefinitions/cfm13.json
55 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition
56 | # If multiple files are required, separate them with ;
57 | SiteSchemaConfiguration = resources/schemaDefinitions/cfm13-site.json
58 |
59 | # Metadata can be included with certain files. Set MetadataSchemaConfiguration to the json file that defines the metadata schema
60 | MetadataSchemaConfiguration = resources/schemaDefinitions/cfm-metadata.json
61 |
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/cfm20alert.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE
3 | FileParser = XML
4 |
5 | # XML Options
6 | [XML]
7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema
8 | ValidateSchema = yes
9 | # SchemaFile must be set if ValidateSchema is yes
10 | SchemaFile = resources/schemas/CFMAlert.xsd
11 | # CustomParser is used to load custom XML parsing classes for complex XML documents
12 | CustomParser = CFM20Alert
13 |
14 | # SCHEMA Definition
15 |
16 | [SCHEMA]
17 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
18 | PrimarySchemaConfiguration = resources/schemaDefinitions/cfm20alert.json
19 |
20 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition
21 | # If multiple files are required, separate them with ;
22 | # SiteSchemaConfiguration =
23 |
24 | # Metadata can be included with certain files. Set MetadataSchemaConfiguration to the json file that defines the metadata schema
25 | MetadataSchemaConfiguration = resources/schemaDefinitions/cfm-metadata.json
26 |
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/crisp_json.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | FileParser = DICT
3 |
4 | [DICT]
5 | IndicatorsKey = indicators
6 |
7 | # SCHEMA Definition
8 | [SCHEMA]
9 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
10 | PrimarySchemaConfiguration = resources/schemaDefinitions/crisp.json
11 |
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/csv_example.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | FileParser = CSV
3 |
4 | # CSV Options
5 | [CSV]
6 | Fields = action1,indicator,reportedTime,duration1,firewalls,origin,directSource,majorTags,sensitivity,reconAllowed,restriction
7 | Delimiter = ","
8 | QuoteChar = "
9 | EscapeChar = \\
10 | HeaderLine = false
11 | DoubleQuote = false
12 | QuoteStyle = Minimal
13 |
14 | # SCHEMA Definition
15 | [SCHEMA]
16 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
17 | PrimarySchemaConfiguration = resources/schemaDefinitions/lqmtools.json
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/doe_em.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | FileParser = CSV
3 |
4 | # CSV Options
5 | [CSV]
6 | Fields = indicatorType,indicator,reason,detectedTime
7 | Delimiter = ","
8 | QuoteChar = "
9 | EscapeChar = \\
10 | HeaderLine = false
11 | DoubleQuote = false
12 | QuoteStyle = Minimal
13 |
14 | # SCHEMA Definition
15 | [SCHEMA]
16 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
17 | PrimarySchemaConfiguration = resources/schemaDefinitions/doe-em.json
18 |
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/flextext.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | FileParser = CSV
3 |
4 | # CSV Options
5 | [CSV]
6 | Fields = Indicator,EmailSenderAddress
7 | Delimiter = "|"
8 | RecordDelim = "\r\n"
9 | QuoteChar = "
10 | EscapeChar = \\
11 | HeaderLine = false
12 | DoubleQuote = false
13 | QuoteStyle = Minimal
14 |
15 | # SCHEMA Definition
16 | [SCHEMA]
17 | SchemaConfigurationType = Inline
18 | SupportedIndicatorTypes = IPv4-Address-Block
19 | TypeMappings = { "IPv4-Address-Block": [ { "Indicator": "*" } ]}
20 | Indicator_OntologyMapping = IPv4AddressIndicatorValueSemanticComponent
21 | EmailSenderAddress_OntologyMapping = EmailSenderAddressSemanticCocept
22 |
23 |
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/iid_combined_recent.cfg:
--------------------------------------------------------------------------------
1 | # IID Combined Detected - Recent, standard
2 |
3 | [SYNTAX]
4 | FileParser = CSV
5 |
6 | # CSV Options
7 | [CSV]
8 | Fields = sid,uri,target,time,baddom,domain,description1
9 | Delimiter = ","
10 | QuoteChar = "
11 | EscapeChar = \\
12 | HeaderLine = false
13 | DoubleQuote = false
14 | QuoteStyle = Minimal
15 |
16 | # SCHEMA Definition
17 | [SCHEMA]
18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
19 | PrimarySchemaConfiguration = resources/schemaDefinitions/iid.json
20 | SiteSchemaConfiguration = resources/schemaDefinitions/iid-combined-recent.json
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/iid_host_active.cfg:
--------------------------------------------------------------------------------
1 | # IID Bad Hostname - Active, standard
2 |
3 | [SYNTAX]
4 | FileParser = CSV
5 |
6 | # CSV Options
7 | [CSV]
8 | Fields = domain,time,description1,description2
9 | Delimiter = ","
10 | QuoteChar = "
11 | EscapeChar = \\
12 | HeaderLine = false
13 | DoubleQuote = false
14 | QuoteStyle = Minimal
15 |
16 | # SCHEMA Definition
17 | [SCHEMA]
18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
19 | PrimarySchemaConfiguration = resources/schemaDefinitions/iid.json
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/iid_host_dynamic.cfg:
--------------------------------------------------------------------------------
1 | # IID Bad Hostname - Dynamic, standard
2 |
3 | [SYNTAX]
4 | FileParser = CSV
5 |
6 | # CSV Options
7 | [CSV]
8 | Fields = domain,time,description1,description2
9 | Delimiter = ","
10 | QuoteChar = "
11 | EscapeChar = \\
12 | HeaderLine = false
13 | DoubleQuote = false
14 | QuoteStyle = Minimal
15 |
16 | # SCHEMA Definition
17 | [SCHEMA]
18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
19 | PrimarySchemaConfiguration = resources/schemaDefinitions/iid.json
20 | SiteSchemaConfiguration = resources/schemaDefinitions/iid-host-dynamic.json
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/iid_ipv4_recent.cfg:
--------------------------------------------------------------------------------
1 | # IID Bad IP - Recent, standard
2 |
3 | [SYNTAX]
4 | FileParser = CSV
5 |
6 | # CSV Options
7 | [CSV]
8 | Fields = ipv4,time,description1,description2
9 | Delimiter = ","
10 | QuoteChar = "
11 | EscapeChar = \\
12 | HeaderLine = false
13 | DoubleQuote = false
14 | QuoteStyle = Minimal
15 |
16 | # SCHEMA Definition
17 | [SCHEMA]
18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
19 | PrimarySchemaConfiguration = resources/schemaDefinitions/iid.json
20 | SiteSchemaConfiguration = resources/schemaDefinitions/iid-ipv4-recent.json
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/isight.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | FileParser = DICT
3 |
4 | [DICT]
5 | IndicatorsKey = indicators
6 |
7 | # SCHEMA Definition
8 | [SCHEMA]
9 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
10 | PrimarySchemaConfiguration = resources/schemaDefinitions/isight.json
11 |
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/keyvalue.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE
3 | FileParser = KEYVALUE
4 |
5 | # KEYVALUE Options
6 | [KEYVALUE]
7 | SeparatorChar = &
8 | QuoteChar = [']
9 | KVSeparator = [=]
10 |
11 | # SCHEMA Definition
12 |
13 | [SCHEMA]
14 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
15 | PrimarySchemaConfiguration = resources/schemaDefinitions/keyvalue.json
16 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition
17 | # If multiple files are required, separate them with ;
18 | #SiteSchemaConfiguration =
19 |
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/lqmtools.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | FileParser = DICT
3 |
4 | # DICT Options (none currently defined)
5 | [DICT]
6 | IndicatorsKey = indicators
7 | # SCHEMA Definition
8 |
9 | [SCHEMA]
10 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
11 | PrimarySchemaConfiguration = resources/schemaDefinitions/lqmtools.json
12 |
13 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition
14 | # If multiple files are required, separate them with ;
15 | #SiteSchemaConfiguration =
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/stix_acs30.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE
3 | FileParser = XML
4 |
5 | # FilenameExtraction can either be None or a dictionary of regularexpression, name pairs that will be references
6 | # by the schema parser.
7 | # In the case of CFM1.3, we expect the UUID to use for the overall document to be in the filename, which
8 | # has the structure: _.Alert.C
9 | FilenameExtraction = None
10 | # FileMetadataExtraction can either be None or a dictionary mapping names to python functions:
11 | FiledataExtraction = None
12 |
13 | # XML Options
14 | [XML]
15 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema
16 | ValidateSchema = no
17 | # CustomParser is used to load custom XML parsing classes for complex XML documents
18 | CustomParser = STIX
19 |
20 | [STIX]
21 | STIXNamespace = http://www.us-cert.gov/essa
22 | STIXAlias = isa
23 | # Change depending on if this is testing or production. Testing prefix is guide.999191., production prefix is guide.19001.
24 | # STIXIDPrefix = guide.999191.
25 | STIXIDPrefix = guide.19001.
26 | # The version of ACS to use for the marking structure. Choices are: None (TLP), 2.1 or 3.0
27 | # ACSVersion = 3.0
28 |
29 | [SCHEMA]
30 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
31 | PrimarySchemaConfiguration = resources/schemaDefinitions/stix.json
32 | SiteSchemaConfiguration = resources/schemaDefinitions/stix-acs30.json
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/stix_essa.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE
3 | FileParser = XML
4 |
5 | # XML Options
6 | [XML]
7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema
8 | ValidateSchema = no
9 | # CustomParser is used to load custom XML parsing classes for complex XML documents
10 | CustomParser = STIX
11 |
12 | [STIX]
13 | STIXNamespace = http://www.us-cert.gov/essa
14 | STIXAlias = isa
15 | # Change depending on if this is testing or production. Testing prefix is guide.999191., production prefix is guide.19001.
16 | # STIXIDPrefix = guide.999191.
17 | STIXIDPrefix = guide.19001.
18 |
19 | [SCHEMA]
20 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
21 | PrimarySchemaConfiguration = resources/schemaDefinitions/stix.json
22 | SiteSchemaConfiguration = resources/schemaDefinitions/stix-essa.json
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/stix_tlp.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE
3 | FileParser = XML
4 |
5 | # XML Options
6 | [XML]
7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema
8 | ValidateSchema = no
9 | # CustomParser is used to load custom XML parsing classes for complex XML documents
10 | CustomParser = STIX
11 |
12 | [STIX]
13 | STIXNamespace = http://www.anl.gov/cfm/stix
14 | STIXAlias = CFM
15 |
16 | [SCHEMA]
17 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
18 | PrimarySchemaConfiguration = resources/schemaDefinitions/stix.json
19 | SiteSchemaConfiguration = resources/schemaDefinitions/stix-tlp.json
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/stix_tlp2.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE
3 | FileParser = XML
4 |
5 | # XML Options
6 | [XML]
7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema
8 | ValidateSchema = no
9 | # CustomParser is used to load custom XML parsing classes for complex XML documents
10 | CustomParser = STIX
11 |
12 | [STIX]
13 | STIXNamespace = http://www.us-cert.gov/essa
14 | STIXAlias = isa
15 | STIXIDPrefix = guide.19001.
16 |
17 | [SCHEMA]
18 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
19 | PrimarySchemaConfiguration = resources/schemaDefinitions/stix.json
20 | SiteSchemaConfiguration = resources/schemaDefinitions/stix-tlp2.json
--------------------------------------------------------------------------------
/FlexTransform/resources/sampleConfigurations/twitter.cfg:
--------------------------------------------------------------------------------
1 | # Twitter
2 |
3 | [SYNTAX]
4 | FileParser = DICT
5 |
6 | [DICT]
7 | IndicatorsKey = indicators
8 |
9 | [SCHEMA]
10 | PrimarySchemaConfiguration = resources/schemaDefinitions/twitter.json
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/cfm-metadata.json:
--------------------------------------------------------------------------------
1 | {
2 | "DocumentMetaData": {
3 | "fields": {
4 | "DataSensitivity": {
5 | "description": "OUO marking",
6 | "datatype": "enum",
7 | "required": true,
8 | "defaultValue": "noSensitivity",
9 | "ontologyMappingType": "enum",
10 | "enumValues": {
11 | "ouo": {
12 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#OUOSemanticConcept"
13 | },
14 | "noSensitivity": {
15 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#NotOUOSemanticConcept"
16 | }
17 | }
18 | },
19 | "FileName": {
20 | "description": "File name the metadata is attached to",
21 | "datatype": "string",
22 | "required": true,
23 | "ontologyMappingType": "simple",
24 | "ontologyMapping": ""
25 | },
26 | "PayloadFormat": {
27 | "description": "Schema format of the data",
28 | "datatype": "enum",
29 | "required": true,
30 | "ontologyMappingType": "enum",
31 | "enumValues": {
32 | "STIX": {
33 | "ontologyMapping": ""
34 | },
35 | "Cfm13Alert": {
36 | "ontologyMapping": ""
37 | },
38 | "Cfm20Alert": {
39 | "ontologyMapping": ""
40 | }
41 | }
42 | },
43 | "PayloadType": {
44 | "description": "CFM Payload type marking",
45 | "datatype": "enum",
46 | "required": true,
47 | "defaultValue": "Alert",
48 | "ontologyMappingType": "enum",
49 | "enumValues": {
50 | "Alert": {
51 | "ontologyMapping": ""
52 | }
53 | }
54 | },
55 | "ReconPolicy": {
56 | "description": "Is additional recon allowed on the indicator",
57 | "datatype": "enum",
58 | "required": true,
59 | "defaultValue": "Touch",
60 | "ontologyMappingType": "enum",
61 | "enumValues": {
62 | "Touch": {
63 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#ReconAllowedSemanticConcept"
64 | },
65 | "NoTouch": {
66 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#ReconNotAllowedSemanticConcept"
67 | }
68 | }
69 | },
70 | "SendingSite": {
71 | "description": "Site name that submitted the report",
72 | "datatype": "string",
73 | "required": true,
74 | "ontologyMappingType": "simple",
75 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#SiteAbbreviationSemanticConcept"
76 | },
77 | "SentTimestamp": {
78 | "description": "The timestamp when the file was uploaded",
79 | "datatype": "datetime",
80 | "dateTimeFormat": "unixtime",
81 | "required": true,
82 | "ontologyMappingType": "simple",
83 | "ontologyMapping": ""
84 | },
85 | "UploadID": {
86 | "description": "The UUID for the uploaded document",
87 | "datatype": "string",
88 | "required": true,
89 | "ontologyMappingType": "simple",
90 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#UniqueFileIdentifierSemanticConcept"
91 | },
92 | "DownloadElementExtendedAttribute_Field": {
93 | "description": "Extended information type",
94 | "valuemap": "DownloadElementExtendedAttribute;Field",
95 | "datatype": "enum",
96 | "required": false,
97 | "ontologyMappingType": "enum",
98 | "enumValues": {
99 | "origFileName": {
100 | "ontologyMapping": ""
101 | },
102 | "orig1.3Filename": {
103 | "ontologyMapping": ""
104 | },
105 | "comment": {
106 | "ontologyMapping": ""
107 | }
108 | }
109 | },
110 | "DownloadElementExtendedAttribute_Value": {
111 | "description": "The value for the extended data",
112 | "valuemap": "DownloadElementExtendedAttribute;Value",
113 | "datatype": "string",
114 | "defaultValue": "NoValue",
115 | "requiredIfReferenceField": "DownloadElementExtendedAttribute_Field",
116 | "requiredIfReferenceValuesMatch": [ "*" ],
117 | "ontologyMappingType": "referencedEnum",
118 | "ontologyEnumField": "DownloadElementExtendedAttribute_Field",
119 | "ontologyMappingEnumValues": {
120 | "origFileName": {
121 | "ontologyMapping": ""
122 | },
123 | "orig1.3Filename": {
124 | "ontologyMapping": ""
125 | },
126 | "comment": {
127 | "ontologyMapping": ""
128 | }
129 | }
130 | }
131 | }
132 | }
133 | }
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/cfm13-site.json:
--------------------------------------------------------------------------------
1 | {
2 | "DocumentHeaderData": {
3 | "fields": {
4 | "analyzerid": {
5 | "defaultValue": "TEST"
6 | },
7 | "location": {
8 | "defaultValue": "TEST"
9 | },
10 | "contact_name": {
11 | "defaultValue": "Test User"
12 | },
13 | "contact_phone": {
14 | "defaultValue": "555-555-1212",
15 | "required": true
16 | },
17 | "contact_email": {
18 | "defaultValue": "test@test.int",
19 | "required": true
20 | }
21 | }
22 | }
23 | }
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/doe-em.json:
--------------------------------------------------------------------------------
1 | {
2 | "IndicatorData": {
3 | "types": {
4 | "IPv4-Address-Block": [ {"indicatorType": "IPv4 Address"} ],
5 | "IPv6-Address-Block": [ {"indicatorType": "IPv6 Address"} ],
6 | "DNS-Hostname-Block": [ {"indicatorType": "DNSHostName"}, {"indicatorType": "Domain"} , {"indicatorType": "DNSDomainName"} ],
7 | "URL-Block": [ {"indicatorType": "URL"}, {"indicatorType": "URI"} ],
8 | "Malicious-File-Hash": [ {"indicatorType": "FileMD5Hash"}, {"indicatorType": "FileSHA1Hash"}]
9 | },
10 | "fields": {
11 | "indicator": {
12 | "description": "The value to be acted upon (e.g. ip, domain name, URL)",
13 | "datatype": "string",
14 | "required": true,
15 | "ontologyMappingType": "simple",
16 | "ontologyMapping": ""
17 | },
18 | "indicatorType": {
19 | "description": "A type name that informs how to interpret the indicator (e.g. ipv4, emailAddress) (enum)",
20 | "datatype": "enum",
21 | "dependsOn": "indicator",
22 | "mapOntologyToElement": "indicator",
23 | "required": false,
24 | "ontologyMappingType": "enum",
25 | "enumValues": {
26 | "IPv6 Address": {
27 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv6AddressIndicatorValueSemanticComponent"
28 | },
29 | "IPv4 Address": {
30 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv4AddressIndicatorValueSemanticComponent"
31 | },
32 | "URL": {
33 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent"
34 | },
35 | "URI": {
36 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent"
37 | },
38 | "DNSDomainName": {
39 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent"
40 | },
41 | "Domain": {
42 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent"
43 | },
44 | "DNSHostName": {
45 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent"
46 | },
47 | "FileMD5Hash": {
48 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#MD5FilehashIndicatorValueSemanticComponent"
49 | },
50 | "FileSHA1Hash": {
51 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#SHA1FilehashIndicatorValueSemanticComponent"
52 | }
53 | }
54 | },
55 | "reason": {
56 | "description": "Description associated with indicator",
57 | "datatype": "string",
58 | "required": false,
59 | "defaultValue": "noValue",
60 | "ontologyMappingType": "simple",
61 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IndicatorDescriptionSemanticConcept"
62 | },
63 | "detectedTime": {
64 | "description": "Time the report was generated",
65 | "datatype": "datetime",
66 | "dateTimeFormat": " YYYY-MM-DD HH:mm:ss",
67 | "dateTimeFormatAlternate": ["YYYY-MM-DDTHH:mm:ss","YYYY-MM-DDTHH:mm:ssZ", "YYYY-MM-DD"],
68 | "dateTimezoneDefault": "US/Pacific",
69 | "required": false,
70 | "defaultValue": "&now()",
71 | "ontologyMappingType": "simple",
72 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IndicatorGeneratedTimeSemanticConcept"
73 | }
74 | }
75 | }
76 | }
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/iid-combined-recent.json:
--------------------------------------------------------------------------------
1 | {
2 | "IndicatorData": {
3 | "types": {
4 | "DNS-Hostname-Block": null
5 | },
6 | "fields": {
7 | "domain": {
8 | "required": false,
9 | "ontologyMapping": ""
10 | },
11 | "uri": {
12 | "required": true
13 | },
14 | "combined_description_comment": {
15 | "outputFormat": "[description1], [target]"
16 | },
17 | "target": {
18 | "required": true
19 | }
20 | }
21 | }
22 | }
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/iid-host-dynamic.json:
--------------------------------------------------------------------------------
1 | {
2 | "IndicatorData": {
3 | "fields": {
4 | "time": {
5 | "ontologyMapping": ""
6 | },
7 | "durationCalculated": {
8 | "description": "how long the action is supposed to be left in place",
9 | "datatype": "int",
10 | "defaultValue": "&calculate_duration(time)",
11 | "required": false,
12 | "requiredIfReferenceField": "time",
13 | "requiredIfReferenceValuesMatch": ["*"],
14 | "ontologyMappingType": "simple",
15 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#ActionDurationSemanticConcept"
16 | }
17 | }
18 | }
19 | }
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/iid-ipv4-recent.json:
--------------------------------------------------------------------------------
1 | {
2 | "IndicatorData": {
3 | "fields": {
4 | "domain": {
5 | "required": false
6 | },
7 | "ipv4": {
8 | "required": true
9 | }
10 | }
11 | }
12 | }
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/iid.json:
--------------------------------------------------------------------------------
1 | {
2 | "IndicatorData": {
3 | "types": {
4 | "DNS-Hostname-Block": [ {"domain": "*"}],
5 | "IPv4-Address-Block": [ {"ipv4": "*"} ],
6 | "URL-Block": [ {"uri": "*"} ]
7 | },
8 | "fields": {
9 | "domain": {
10 | "description": "The domain to be acted upon",
11 | "datatype": "string",
12 | "required": true,
13 | "ontologyMappingType": "simple",
14 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent"
15 | },
16 | "ipv4": {
17 | "description": "The IPv4 to be acted upon",
18 | "datatype": "string",
19 | "required": false,
20 | "ontologyMappingType": "simple",
21 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv4AddressIndicatorValueSemanticComponent"
22 | },
23 | "uri": {
24 | "description": "The URL to be acted upon",
25 | "datatype": "string",
26 | "required": false,
27 | "ontologyMappingType": "simple",
28 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent"
29 | },
30 | "time": {
31 | "description": "Time value associated with indicator",
32 | "datatype": "datetime",
33 | "dateTimeFormat": "YYYYMMDDTHHmmss",
34 | "dateTimeFormatAlternate": [" YYYYMMDDTHHmmss","YYYYMMDDTHHmmss ", "YYYYMMDDTHHmmssZ"],
35 | "required": false,
36 | "ontologyMappingType": "simple",
37 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#TimeOfDetectionSemanticConcept"
38 | },
39 | "description1": {
40 | "description": "Description associated with indicator",
41 | "datatype": "string",
42 | "required": true,
43 | "defaultValue": "noValue",
44 | "ontologyMappingType": "simple",
45 | "ontologyMapping": ""
46 | },
47 | "description2": {
48 | "description": "Description associated with indicator",
49 | "datatype": "string",
50 | "required": true,
51 | "defaultValue": "noValue",
52 | "ontologyMappingType": "simple",
53 | "ontologyMapping": ""
54 | },
55 | "combined_description_comment": {
56 | "description": "Combined [description1] & [description2] fields",
57 | "datatype": "string",
58 | "required": true,
59 | "outputFormat": "[description1], [description2]",
60 | "ontologyMappingType": "simple",
61 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IndicatorDescriptionSemanticConcept"
62 | },
63 | "sid": {
64 | "description": "ID field from IID",
65 | "datatype": "string",
66 | "required": false,
67 | "ontologyMappingType": "simple",
68 | "ontologyMapping": ""
69 | },
70 | "target": {
71 | "description": "Target field from IID",
72 | "datatype": "string",
73 | "required": false,
74 | "defaultValue": "noValue",
75 | "ontologyMappingType": "simple",
76 | "ontologyMapping": ""
77 | },
78 | "baddom": {
79 | "description": "Bad Dom, usually empty",
80 | "datatype": "string",
81 | "required": false,
82 | "defaultValue": "noValue",
83 | "ontologyMappingType": "simple",
84 | "ontologyMapping": ""
85 | }
86 | }
87 | }
88 | }
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/stix-essa.json:
--------------------------------------------------------------------------------
1 | {
2 | "DocumentHeaderData": {
3 | "fields": {
4 | "handling_markingstructures": {
5 | "required": false,
6 | "multiple": true,
7 | "ontologyMappingType": "none",
8 | "subfields": {
9 | "handling_markingstructures_xsitype": {"required": true, "primaryKey": true},
10 | "handling_markingstructures_identifier": {"required": false, "primaryKeyMatch": "edh2cyberMarking:ISAMarkingsType"},
11 | "handling_markingstructures_createdatetime": {"required": false, "primaryKeyMatch": "edh2cyberMarking:ISAMarkingsType"},
12 | "handling_markingstructures_responsibleentity": {"required": false, "primaryKeyMatch": "edh2cyberMarking:ISAMarkingsType"},
13 | "handling_markingstructures_isamversion": {"required": false, "primaryKeyMatch": "edh2cyberMarking:ISAMarkingsType"},
14 | "handling_markingstructures_isamversion2": {"required": false, "primaryKeyMatch": "edh2cyberMarkingAssert:ISAMarkingsAssertionType"},
15 | "handling_markingstructures_mostrestrictive": {"required": false, "primaryKeyMatch": "edh2cyberMarkingAssert:ISAMarkingsAssertionType"},
16 | "handling_markingstructures_policyref": {"required": false, "primaryKeyMatch": "edh2cyberMarkingAssert:ISAMarkingsAssertionType"},
17 | "handling_markingstructures_controlset": {"required": false, "primaryKeyMatch": "edh2cyberMarkingAssert:ISAMarkingsAssertionType"}
18 | },
19 | "defaultFields": {
20 | "handling_markingstructures_xsitype": [ "edh2cyberMarking:ISAMarkingsType", "edh2cyberMarkingAssert:ISAMarkingsAssertionType" ],
21 | "handling_markingstructures_identifier": "",
22 | "handling_markingstructures_responsibleentity": "CUST:USA.DOE",
23 | "handling_markingstructures_createdatetime": "&stix_now()",
24 | "handling_markingstructures_isamversion": "1.0",
25 | "handling_markingstructures_isamversion2": "1.0",
26 | "handling_markingstructures_mostrestrictive": "true",
27 | "handling_markingstructures_policyref": "urn:isa:policy:acs:ns:v2.0?privdefault=permit",
28 | "handling_markingstructures_controlset": "CLS:U CUI:FOUO"
29 | }
30 | },
31 | "handling_markingstructures_identifier": {
32 | "description": "Single unique identifier associated with the resource.",
33 | "valuemap": "handling;marking_structures;identifier",
34 | "datatype": "string",
35 | "defaultValue": "",
36 | "required": "false",
37 | "memberof": "handling_markingstructures",
38 | "ontologyMappingType": "simple",
39 | "ontologyMapping": ""
40 | },
41 | "handling_markingstructures_createdatetime": {
42 | "description": "The creation date and time of the associated resource.",
43 | "valuemap": "handling;marking_structures;createdatetime",
44 | "datatype": "datetime",
45 | "dateTimeFormat": "YYYY-MM-DDTHH:mm:ssZZ",
46 | "dateTimeFormatAlternate": ["YYYY-MM-DDTHH:mm:ss", "YYYY-MM-DDTHH:mm:ssZ"],
47 | "defaultValue": "&stix_now()",
48 | "required": "false",
49 | "memberof": "handling_markingstructures",
50 | "ontologyMappingType": "none"
51 | },
52 | "handling_markingstructures_responsibleentity": {
53 | "description": "Contains a single mandatory CUST: token with an optional single ORIG: token",
54 | "valuemap": "handling;marking_structures;responsibleentity",
55 | "datatype": "string",
56 | "defaultValue": "",
57 | "required": "false",
58 | "memberof": "handling_markingstructures",
59 | "ontologyMappingType": "simple",
60 | "ontologyMapping": ""
61 | },
62 | "handling_markingstructures_isamversion": {
63 | "description": "ISA Marking Version",
64 | "valuemap": "handling;marking_structures;isam_version",
65 | "datatype": "enum",
66 | "defaultValue": "1.0",
67 | "required": "false",
68 | "memberof": "handling_markingstructures",
69 | "ontologyMappingType": "enum",
70 | "enumValues": {
71 | "1.0": {
72 | "ontologyMapping": ""
73 | }
74 | }
75 | },
76 | "handling_markingstructures_isamversion2": {
77 | "description": "ISA Marking Version",
78 | "valuemap": "handling;marking_structures;isam_version",
79 | "datatype": "enum",
80 | "defaultValue": "1.0",
81 | "required": "false",
82 | "memberof": "handling_markingstructures",
83 | "ontologyMappingType": "enum",
84 | "enumValues": {
85 | "1.0": {
86 | "ontologyMapping": ""
87 | }
88 | }
89 | },
90 | "handling_markingstructures_policyref": {
91 | "description": "If multiple policy refs apply, they are provided as space delimited URNs.",
92 | "valuemap": "handling;marking_structures;policyref",
93 | "datatype": "string",
94 | "defaultValue": "",
95 | "required": "false",
96 | "memberof": "handling_markingstructures",
97 | "ontologyMappingType": "simple",
98 | "ontologyMapping": ""
99 | },
100 | "handling_markingstructures_controlset": {
101 | "description": "Group of data tags that are used to inform automated access control decisions.",
102 | "valuemap": "handling;marking_structures;controlset",
103 | "datatype": "string",
104 | "defaultValue": "",
105 | "required": "false",
106 | "memberof": "handling_markingstructures",
107 | "ontologyMappingType": "simple",
108 | "ontologyMapping": ""
109 | },
110 | "handling_markingstructures_mostrestrictive": {
111 | "description": "Indicates whether or not this marking structure denotes the most restrictive applied to this structure. Only used in STIX header. Can only be used if the Controlled_Structure is set to //node()",
112 | "valuemap": "handling;marking_structures;most_restrictive",
113 | "datatype": "string",
114 | "defaultValue": "true",
115 | "required": "false",
116 | "memberof": "handling_markingstructures",
117 | "ontologyMappingType": "simple",
118 | "ontologyMapping": ""
119 | },
120 | "handling_markingstructures_xsitype": {
121 | "defaultValue": "edh2cyberMarkingAssert:ISAMarkingsAssertionType",
122 | "enumValues": {
123 | "edh2cyberMarking:ISAMarkingsType": {
124 | "ontologyMapping": ""
125 | },
126 | "edh2cyberMarkingAssert:ISAMarkingsAssertionType": {
127 | "ontologyMapping": ""
128 | }
129 | }
130 | },
131 | "produced_time": {
132 | "dateTimeFormat": "YYYY-MM-DDTHH:mm:ssZ",
133 | "required": true,
134 | "defaultValue": "&stix_now()"
135 | },
136 | "information_source_name": {
137 | "description": "The source of the report",
138 | "valuemap": "information_source;identity;name",
139 | "datatype": "string",
140 | "required": true,
141 | "defaultValue": "DOE",
142 | "ontologyMappingType": "none",
143 | "ontologyMapping": ""
144 | },
145 | "information_source_description": {
146 | "description": "The description of the source of the report",
147 | "valuemap": "information_source;description",
148 | "datatype": "string",
149 | "required": true,
150 | "defaultValue": "U.S. Department of Energy",
151 | "ontologyMappingType": "none",
152 | "ontologyMapping": ""
153 | },
154 | "profiles": {
155 | "required": true
156 | },
157 | "profiles_profile": {
158 | "defaultValue": "ISA Profile v1.0"
159 | }
160 | }
161 | },
162 | "IndicatorData": {
163 | "fields": {
164 | "indicator_types": {
165 | "required": false
166 | }
167 | }
168 | }
169 | }
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/stix-tlp.json:
--------------------------------------------------------------------------------
1 | {
2 | "DocumentHeaderData": {
3 | "fields": {
4 | "handling_markingstructures": {
5 | "subfields": {
6 | "handling_markingstructures_color": {"required": true, "primaryKeyMatch": "tlpMarking:TLPMarkingStructureType"},
7 | "handling_markingstructures_xsitype": {"required": true, "primaryKey": true},
8 | "handling_markingstructures_statement": {"required": false, "primaryKeyMatch": "simpleMarking:SimpleMarkingStructureType"}
9 | },
10 | "defaultFields": {
11 | "handling_markingstructures_xsitype": "tlpMarking:TLPMarkingStructureType"
12 | }
13 | },
14 | "handling_markingstructures_color": {
15 | "defaultValue": "GREEN"
16 | }
17 | }
18 | }
19 | }
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/stix-tlp2.json:
--------------------------------------------------------------------------------
1 | {
2 | "DocumentHeaderData": {
3 | "fields": {
4 | "handling_markingstructures": {
5 | "subfields": {
6 | "handling_markingstructures_color": {"required": true, "primaryKeyMatch": "tlpMarking:TLPMarkingStructureType"},
7 | "handling_markingstructures_xsitype": {"required": true, "primaryKey": true},
8 | "handling_markingstructures_statement": {"required": false, "primaryKeyMatch": "simpleMarking:SimpleMarkingStructureType"}
9 | },
10 | "defaultFields": {
11 | "handling_markingstructures_xsitype": "tlpMarking:TLPMarkingStructureType"
12 | }
13 | },
14 | "handling_markingstructures_color": {
15 | "defaultValue": "GREEN"
16 | }
17 | ,
18 | "produced_time": {
19 | "dateTimeFormat": "YYYY-MM-DDTHH:mm:ssZ",
20 | "required": true,
21 | "defaultValue": "&now()"
22 | },
23 | "profiles": {
24 | "required": true
25 | },
26 | "profiles_profile": {
27 | "defaultValue": "ISA Profile v1.0"
28 | }
29 | }
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/FlexTransform/resources/schemaDefinitions/twitter.json:
--------------------------------------------------------------------------------
1 | {
2 | "IndicatorsData": {
3 | "types": {
4 | "IPv4-Address-Block": [{ "ipv4-addr[*]": "*" }],
5 | "DNS-Hostname-Block": [{ "domain[*]": "*" }],
6 | "URL-Block": [{ "url[*]": "*" }]
7 | },
8 | "fields": {
9 | "ipv4-addr[*]": {
10 | "description": "The malicious ip address",
11 | "datatype": "string",
12 | "defaultValue": "NoValue",
13 | "required": false,
14 | "ontologyMappingType": "simple",
15 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv4AddressIndicatorValueSemanticComponent"
16 | },
17 | "domain[*]": {
18 | "description": "The malicious domain",
19 | "datatype": "string",
20 | "defaultValue": "NoValue",
21 | "required": false,
22 | "ontologyMappingType": "simple",
23 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent"
24 | },
25 | "url[*]": {
26 | "description": "The malicious url",
27 | "datatype": "string",
28 | "defaultValue": "NoValue",
29 | "required": false,
30 | "ontologyMappingType": "simple",
31 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent"
32 | }
33 | }
34 | }
35 | }
--------------------------------------------------------------------------------
/FlexTransform/resources/schemas/CFMAlert.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
11 |
12 |
13 | The version of the CFMAlert schema being
14 | used.
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
27 |
28 | Should be set to the IRI of the SignalDomain referenced from the KIDS ontology.
29 |
30 |
31 |
33 |
34 | Should be set to the IRI of the SignalCanonicalRepresentation referenced from the KIDS ontology.
35 |
36 |
37 |
39 |
40 | The value of the signal
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
89 |
90 |
91 |
93 |
94 |
96 |
97 |
99 |
100 |
102 |
103 |
105 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
125 |
126 |
127 | The category of action which was
128 | taken, e.g. "block-installed".
129 |
130 |
131 |
132 |
134 |
135 |
136 | A description of the action which was taken - intended to be human readable.
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
154 |
155 |
156 | A general category of malicious
157 | behavior, e.g. scanning.
158 |
159 |
160 |
161 |
163 |
164 |
165 | A specific description of the
166 | behavior which prompted the alert,
167 | e.g. "Excessive requests for
168 | non-existent web pages".
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
--------------------------------------------------------------------------------
/FlexTransform/resources/schemas/CFMDownload.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
14 |
15 |
17 |
18 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
38 |
39 |
40 |
41 |
43 |
44 |
46 |
47 |
49 |
50 |
52 |
53 |
55 |
56 |
58 |
59 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/FlexTransform/resources/schemas/CFMDownloadRequest.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 | A CFM request is an encapsulated prompt for a client
10 | for information from the repository.
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/FlexTransform/resources/schemas/CFMEnvelope.xsd:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
9 | The CFM envelope comprises both the header information and any embedded
10 | message.
11 |
12 |
13 |
14 | The CFMEnvelopeType is comprised of the following elements:
15 | * SubmittingSite - the site idenifier of the submitter
16 | * CFMSchemaVersion - A number indicating the version of the schema used for the envelope
17 | * Authentication Credential - The GPG-signed UUID of the submitting site
18 | * Message - An embedded message - may be another XML document, plain text, or binary data
19 |
20 |
21 |
23 |
24 |
25 | The version identifier for the envelope
26 | schema version. The current value is "2.0"
27 |
28 |
29 |
30 |
32 |
33 |
34 | The sending site is the site shortname as
35 | provided to CFM. E.g., for Argonne, this
36 | would be ANL; for Ames Laboratory, it would
37 | be AMES. Case does not matter.
38 |
39 |
40 |
41 |
43 |
44 |
45 | The timestamp, set by the sender, indicating
46 | when the message was sent. (unix epoch time seconds)
47 |
48 |
49 |
50 |
52 |
53 |
54 |
56 |
57 |
58 | The type of embedded message. Predefined
59 | types are Alert, Report, and Other. * Alert
60 | corresponds to a notification of observed
61 | malicious activity. * Report indicates
62 | informational content such as those provided
63 | by CPP for use with CASA. * Other indicates
64 | that the message is neither an Alert nor a
65 | Report, and normal processing of these types
66 | should not be attempted on this message.
67 |
68 |
69 |
70 |
72 |
73 |
75 |
76 |
77 | An indication of the sensitivity of the
78 | enclosed message. The URI should be a
79 | reference to a sensitivity definition, e.g.
80 | 'http://www.anl.gov/cfm/2.0#OUO'
81 |
82 | An optional element, if it is absent it is
83 | assumed to be non-sensitive.
84 |
85 | OUO is DOE's Official Use Only designation,
86 | indicating that data should be protected at
87 | a Moderate level, and only shared within
88 | DOE.
89 |
90 |
91 |
92 |
94 |
95 |
97 |
98 |
100 |
101 |
103 |
104 |
105 | The message is the content indended for the
106 | recipients. It may be arbitrary, however,
107 | CFM does define some message types/formats
108 | in the schema (e.g. OpenIOC alert).
109 |
110 |
111 |
112 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
--------------------------------------------------------------------------------
/FlexTransform/resources/schemas/CFMMessage13.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
25 |
26 |
28 |
29 |
30 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
73 |
74 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
--------------------------------------------------------------------------------
/FlexTransform/resources/stix.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/resources/stix.zip
--------------------------------------------------------------------------------
/FlexTransform/test/LQMTTests.py:
--------------------------------------------------------------------------------
1 | import io
2 | import os
3 | import unittest
4 | from lxml import etree
5 |
6 | from FlexTransform.test.SampleInputs import CFM13ALERT2
7 | from FlexTransform import FlexTransform
8 |
9 | class TestCFM13AlertToLQMT(unittest.TestCase):
10 | output1 = None
11 | namespace = {
12 | 'cybox' : "http://cybox.mitre.org/cybox-2",
13 | 'indicator' : "http://stix.mitre.org/Indicator-2",
14 | 'marking' : "http://data-marking.mitre.org/Marking-1",
15 | 'PortObj' : "http://cybox.mitre.org/objects#PortObject-2",
16 | 'stix' : "http://stix.mitre.org/stix-1",
17 | 'stixCommon' : "http://stix.mitre.org/common-1",
18 | 'stixVocabs' : "http://stix.mitre.org/default_vocabularies-1",
19 | 'xsi' : "http://www.w3.org/2001/XMLSchema-instance",
20 | 'cyboxVocabs' : "http://cybox.mitre.org/default_vocabularies-2",
21 | 'AddressObj' : "http://cybox.mitre.org/objects#AddressObject-2",
22 | 'ArtifactObj' : "http://cybox.mitre.org/objects#ArtifactObject-2",
23 | 'FileObj' : "http://cybox.mitre.org/objects#FileObject-2",
24 | 'URIObj' : "http://cybox.mitre.org/objects#URIObject-2",
25 | 'tlpMarking' : "http://data-marking.mitre.org/extensions/MarkingStructure#TLP-1",
26 | 'CFM' : "http://www.anl.gov/cfm/stix",
27 | 'xmlns' : "http://www.anl.gov/cfm/1.3/IDMEF-Message"
28 | }
29 |
30 | @classmethod
31 | def setUpClass(cls):
32 | current_dir = os.path.dirname(__file__)
33 | transform = FlexTransform.FlexTransform()
34 |
35 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/cfm13.cfg'), 'r') as input_file:
36 | transform.add_parser('cfm13alert', input_file)
37 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/lqmtools.cfg'), 'r') as input_file:
38 | transform.add_parser('lqmtools', input_file)
39 | output1_object = io.StringIO()
40 |
41 | transform.transform(io.StringIO(CFM13ALERT2), 'cfm13alert', 'lqmtools', target_file=output1_object)
42 | output1_object.seek(0)
43 | output1_object.readline()
44 | print(output1_object.getvalue())
45 |
46 |
47 | def test_alert_analyzerid(self):
48 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/@analyzerid", namespaces=self.namespace)[0], "Fake")
49 |
50 | def test_alert_analyzer_node_location(self):
51 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/xmlns:Node/xmlns:location/text()", namespaces=self.namespace)[0], "1600 Pennslyvania Ave, Washington DC 20005")
52 |
53 | def test_alert_analyzer_node_name(self):
54 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/xmlns:Node/xmlns:name/text()", namespaces=self.namespace)[0], "Nicholas Hendersen, 555-867-5309, nietzsche@doe.gov")
55 |
56 | def test_alert_analyzer_time(self):
57 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AnalyzerTime/text()", namespaces=self.namespace)[0], "2016-03-23T16:45:05+0000")
58 |
59 | def test_alert_AD_number_alerts(self):
60 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AdditionalData[@meaning='number of alerts in this report']/text()", namespaces=self.namespace)[0], "7")
61 |
62 | def test_alert_AD_report_schedule(self):
63 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AdditionalData[@meaning='report schedule']/text()", namespaces=self.namespace)[0], "NoValue")
64 |
65 | def test_alert_AD_report_type(self):
66 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AdditionalData[@meaning='report type']/text()", namespaces=self.namespace)[0], "alerts")
67 |
68 | def test_alert_AD_start_time(self):
69 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AdditionalData[@meaning='report start time']/text()", namespaces=self.namespace)[0], "2016-03-23T16:45:05+0000")
70 |
71 | def test_source_node_address_ipv4(self):
72 | self.assertEqual(set(self.output1.xpath("//xmlns:Address[@category='ipv4-addr']/xmlns:address/text()", namespaces=self.namespace)), set(["10.10.10.10", "11.11.11.11", "12.12.12.12", "13.13.13.13", "14.14.14.14"]))
73 |
74 | def test_source_node_address_url(self):
75 | self.assertEqual(set(self.output1.xpath("//xmlns:Address[not(@category='ipv4-addr')]/xmlns:address/text()", namespaces=self.namespace)), set(["fake.site.com/malicious.js", "bad.domain.be/poor/path"]))
76 |
77 | def test_alert_AD_OUO(self):
78 | self.assertEqual(set(self.output1.xpath("//xmlns:AdditionalData[@meaning='OUO']/text()", namespaces=self.namespace)), set(['0']))
79 |
80 | def test_alert_AD_restriction(self):
81 | self.assertEqual(set(self.output1.xpath("//xmlns:AdditionalData[@meaning='restriction']/text()", namespaces=self.namespace)),set(['public']))
82 |
83 | def test_alert_AD_duration(self):
84 | self.assertEqual(set(self.output1.xpath("//xmlns:AdditionalData[@meaning='duration']/text()", namespaces=self.namespace)), set(['0']))
85 |
86 | def test_alert_AD_recon(self):
87 | self.assertEqual(set(self.output1.xpath("//xmlns:AdditionalData[@meaning='recon']/text()", namespaces=self.namespace)), set(['0']))
88 |
89 | def test_alert_assessment_action(self):
90 | self.assertEqual(set(self.output1.xpath("//xmlns:Action/@category", namespaces=self.namespace)), set(["block-installed"]))
91 |
92 | def test_alert_classification_reference_name(self):
93 | self.assertEqual(set(self.output1.xpath("//xmlns:Reference/xmlns:name/text()", namespaces=self.namespace)), set(["unknown"]))
94 |
95 | def test_alert_classification_reference_url_false(self):
96 | self.assertEqual(set(self.output1.xpath("//xmlns:url/text()", namespaces=self.namespace)), set([" "]))
97 |
98 | if __name__ == '__main__':
99 | unittest.main()
--------------------------------------------------------------------------------
/FlexTransform/test/Readme.md:
--------------------------------------------------------------------------------
1 | - [X] to CFM13Alert
2 | - [X] from STIX
3 | - [X] TLP
4 | - [X] ACS
5 | - [X] from Key/Value Pairs *IP
6 |
7 | - [X] STIX
8 | - [X] to TLP
9 | - [X] from CFM13Alert
10 | - [X] from ACS
11 | - [X] from ACS30
12 | - [X] from Key/Value Pairs *IP
13 | - [X] to ACS
14 | - [X] from CFM13Alert
15 | - [X] from TLP
16 | - [X] from ACS30
17 | - [X] from Key/Value Pairs *IP
18 | - [X] to ACS30
19 | - [X] from CFM13Alert
20 | - [X] from TLP
21 | - [X] from ACS
22 | - [X] from Key/Value Pairs *IP
23 |
24 | - [X] to Key/Value Pairs
25 | - [X] from STIX
26 | - [X] from TLP
27 | - [X] from ACS
28 | - [X] from ACS30
29 | - [X] from CFM13Alert
30 | - [X] from CFM20Alert
31 |
32 | - [X] to LQMT
33 | - [X] from CFM13Alert
34 | - [X] from STIX
35 | - [X] from TLP
36 | - [X] from ACS
37 | - [X] from ACS30
38 | - [X] from Key/Value Pairs *IP
39 |
--------------------------------------------------------------------------------
/FlexTransform/test/TestData/cfm13_multiple_site.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | # FileParser can be XML, JSON, CSV, TSV, REGEX, KEYVALUE, DICT
3 | FileParser = XML
4 |
5 | # XML Options
6 | [XML]
7 | # ValidateSchema defaults to no, set to yes if you want the file validated against the schema
8 | ValidateSchema = no
9 | # SchemaFile must be set if ValidateSchema is yes
10 | # SchemaFile = resources/schemas/CFMMessage13.xsd
11 | # CustomParser is used to load custom XML parsing classes for complex XML documents
12 | CustomParser = CFM13
13 |
14 | # JSON Options (none currently defined)
15 | [JSON]
16 |
17 | # CSV Options
18 | [CSV]
19 | # HeaderLine defines if the CSV file has the field names in the first non-commented row of the file
20 | # HeaderLine defaults to no
21 | HeaderLine = no
22 | # QuotesOptional defines if every field in the CSV file has to be enclosed in quotes.
23 | # QuotesOptional defaults to yes
24 | QuotesOptional = no
25 | # SeparatorChar defines the charactor or charactors that seperate the fields in the file.
26 | # SeparatorChar defaults to ,
27 | SeparatorChar = ,
28 | # StripSpaces defines if spaces before or after the separator should be striped
29 | # StripSpaces defaults to yes
30 | StripSpaces = yes
31 | # FieldNames have to be defined if HeaderLine is set to no. The FieldNames map to the SCHEMA definition below
32 | FieldNames = ip,host,etc
33 |
34 | # TSV Options
35 | [TSV]
36 | HeaderLine = no
37 | FieldNames = ip,host,etc
38 |
39 | # REGEX Options
40 | [REGEX]
41 | Regex = (\S*)\s+(\S*)\s+\d+\s+([0-9.]+)
42 | FieldNames = host,service,ip
43 |
44 | # KEYVALUE Options
45 | [KEYVALUE]
46 | SeparatorChar = \s
47 | QuoteChar = [']
48 | KVSeparator = [=]
49 |
50 | # SCHEMA Definition
51 |
52 | [SCHEMA]
53 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
54 | PrimarySchemaConfiguration = resources/schemaDefinitions/cfm13.json
55 | # SiteSchemaConfiguration is the json file that overrides specific values from the primary schema definition
56 | # If multiple files are required, separate them with ;
57 | SiteSchemaConfiguration = resources/schemaDefinitions/cfm13-site.json;test/TestData/mult_site_config.json
58 |
59 | # Metadata can be included with certain files. Set MetadataSchemaConfiguration to the json file that defines the metadata schema
60 | MetadataSchemaConfiguration = resources/schemaDefinitions/cfm-metadata.json
61 |
--------------------------------------------------------------------------------
/FlexTransform/test/TestData/csv-example-2.json:
--------------------------------------------------------------------------------
1 | {
2 | "IndicatorData": {
3 | "types": {
4 | "IPv4-Address-Block": [ {"indicatorType": "IPv4 Address"} ],
5 | "IPv6-Address-Block": [ {"indicatorType": "IPv6 Address"} ],
6 | "DNS-Hostname-Block": [ {"indicatorType": "DNSHostName"}],
7 | "URL-Block": [ {"indicatorType": "URL"}, {"indicatorType": "URI"} ],
8 | "Malicious-File-Hash": [ {"indicatorType": "FileMD5Hash"}, {"indicatorType": "FileSHA1Hash"}]
9 | },
10 | "fields": {
11 | "indicator": {
12 | "description": "The value to be acted upon (e.g. ip, domain name, URL)",
13 | "datatype": "string",
14 | "required": true,
15 | "ontologyEnumField": "",
16 | "ontologyMappingType": "simple",
17 | "ontologyMapping": ""
18 | },
19 | "indicatorType": {
20 | "description": "A type name that informs how to interpret the indicator (e.g. ipv4, emailAddress) (enum)",
21 | "datatype": "enum",
22 | "dependsOn": "indicator",
23 | "mapOntologyToElement": "indicator",
24 | "required": false,
25 | "ontologyMappingType": "enum",
26 | "enumValues": {
27 | "IPv6 Address": {
28 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv6AddressIndicatorValueSemanticComponent"
29 | },
30 | "IPv4 Address": {
31 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IPv4AddressIndicatorValueSemanticComponent"
32 | },
33 | "URL": {
34 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent"
35 | },
36 | "URI": {
37 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#URLIndicatorValueSemanticComponent"
38 | },
39 | "DNSDomainName": {
40 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent"
41 | },
42 | "DNSHostName": {
43 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#DNSIndicatorValueSemanticComponent"
44 | },
45 | "FileMD5Hash": {
46 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#MD5FilehashIndicatorValueSemanticComponent"
47 | },
48 | "FileSHA1Hash": {
49 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#SHA1FilehashIndicatorValueSemanticComponent"
50 | }
51 | }
52 | },
53 | "reason": {
54 | "description": "Description associated with indicator",
55 | "datatype": "string",
56 | "required": false,
57 | "defaultValue": "noValue",
58 | "ontologyMappingType": "simple",
59 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#IndicatorDescriptionSemanticConcept"
60 | },
61 | "detectedTime": {
62 | "description": "Time the report was generated",
63 | "datatype": "datetime",
64 | "dateTimeFormat": " YYYY-MM-DD HH:mm:ss",
65 | "required": true,
66 | "ontologyMappingType": "simple",
67 | "ontologyMapping": "http://www.anl.gov/cfm/transform.owl#TimeAlertProcessedOnClientSemanticConcept"
68 | }
69 | }
70 | }
71 | }
--------------------------------------------------------------------------------
/FlexTransform/test/TestData/csv_example_2.cfg:
--------------------------------------------------------------------------------
1 | [SYNTAX]
2 | FileParser = CSV
3 |
4 | # CSV Options
5 | [CSV]
6 | Fields = indicator,indicatorType,reason,detectedTime
7 | Delimiter = ","
8 | QuoteChar = "
9 | EscapeChar = \\
10 | HeaderLine = false
11 | DoubleQuote = false
12 | QuoteStyle = Minimal
13 |
14 | # SCHEMA Definition
15 | [SCHEMA]
16 | # PrimarySchemaConfiguration is the json file that describes the underlying schema for the document for Flexible Transform
17 | PrimarySchemaConfiguration = test/testData/csv-example-2.json
--------------------------------------------------------------------------------
/FlexTransform/test/TestData/mult_site_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "DocumentHeaderData": {
3 | "fields": {
4 | "analyzerid": {
5 | "defaultValue": "LeoAtriedes"
6 | },
7 | "location": {
8 | "defaultValue": "Sand Worm Dave, Arrakeen, Dune 54321"
9 | },
10 | "contact_phone": {
11 | "defaultValue": "555-867-5309",
12 | "required": true
13 | },
14 | "report_schedule": {
15 | "defaultValue": "5 minutes"
16 | }
17 | }
18 | },
19 | "IndicatorData": {
20 | "fields": {
21 | "reference_origin": {
22 | "defaultValue": "user-specific"
23 | },
24 | "action_duration": {
25 | "defaultValue": "86400"
26 | }
27 | }
28 | }
29 | }
--------------------------------------------------------------------------------
/FlexTransform/test/ToKeyValue_test.py:
--------------------------------------------------------------------------------
1 | import io
2 | import os
3 | import unittest
4 | import arrow
5 |
6 | from FlexTransform import FlexTransform
7 | from FlexTransform.test.SampleInputs import STIXTLP, STIXACS, CFM13ALERT
8 |
9 |
10 | class TestCFM13AlertToKeyValue(unittest.TestCase):
11 | output1 = None
12 |
13 | @classmethod
14 | def setUpClass(cls):
15 | current_dir = os.path.dirname(__file__)
16 | transform = FlexTransform.FlexTransform()
17 |
18 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/cfm13.cfg'), 'r') as input_file:
19 | transform.add_parser('cfm13alert', input_file)
20 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/keyvalue.cfg'), 'r') as input_file:
21 | transform.add_parser('keyvalue', input_file)
22 | output1_object = io.StringIO()
23 |
24 | transform.transform(io.StringIO(CFM13ALERT), 'cfm13alert', 'keyvalue', target_file=output1_object)
25 |
26 | cls.output1 = []
27 | output1_object.seek(0)
28 | for line in output1_object.read().splitlines():
29 | cls.output1.append(line.split('&'))
30 |
31 | def test_duration(self):
32 | self.assertIn('duration=86400', self.output1[0])
33 |
34 | def test_serviceport(self):
35 | self.assertIn('service_port=22', self.output1[0])
36 |
37 | def test_category_name(self):
38 | self.assertIn("category_name='SSH Attack'", self.output1[0])
39 |
40 | def test_category(self):
41 | self.assertIn("category='Scanning'", self.output1[0])
42 |
43 | def test_severity(self):
44 | self.assertIn("severity='unknown'", self.output1[0])
45 |
46 | def test_prior_offenses(self):
47 | self.assertIn('prior_offenses=11', self.output1[0])
48 |
49 | def test_category_description(self):
50 | self.assertIn("category_description='SSH Attack'", self.output1[0])
51 |
52 | def test_serviceprotocol(self):
53 | self.assertIn("service_protocol='TCP'", self.output1[0])
54 |
55 | def test_comment(self):
56 | self.assertIn("comment='No Comment'", self.output1[0])
57 |
58 | def test_confidence(self):
59 | self.assertIn('confidence=0', self.output1[0])
60 |
61 | def test_direction(self):
62 | self.assertIn("direction='unknown'", self.output1[0])
63 |
64 | def test_ipv4(self):
65 | self.assertIn('ipv4=10.10.10.10', self.output1[0])
66 |
67 | def test_combined_comment(self):
68 | self.assertIn(
69 | "combined_comment='SSH scans against multiple hosts, direction:ingress, confidence:87, severity:high'",
70 | self.output1[0])
71 |
72 |
73 | class TestSTIXTLPToKeyValue(unittest.TestCase):
74 | output1 = None
75 |
76 | @classmethod
77 | def setUpClass(cls):
78 | current_dir = os.path.dirname(__file__)
79 | transform = FlexTransform.FlexTransform()
80 |
81 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/stix_tlp.cfg'), 'r') as input_file:
82 | transform.add_parser('stix_tlp', input_file)
83 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/keyvalue.cfg'), 'r') as input_file:
84 | transform.add_parser('keyvalue', input_file)
85 | output1_object = io.StringIO()
86 |
87 | transform.transform(io.StringIO(STIXTLP), 'stix_tlp', 'keyvalue', target_file=output1_object)
88 |
89 | cls.output1 = []
90 | output1_object.seek(0)
91 | for line in output1_object.read().splitlines():
92 | cls.output1 += line.split('&')
93 |
94 | def test_category(self):
95 | self.assertIs(5, self.output1.count("category='Unspecified'"))
96 |
97 | def test_category_name(self):
98 | self.assertIs(5, self.output1.count("category_name='Unspecified'"))
99 |
100 | def test_severity(self):
101 | self.assertIs(5, self.output1.count("severity='unknown'"))
102 |
103 | def test_comment(self):
104 | self.assertIs(5, self.output1.count("comment='No Comment'"))
105 |
106 | def test_confidence(self):
107 | self.assertIs(5, self.output1.count('confidence=0'))
108 |
109 | def test_direction(self):
110 | self.assertIs(5, self.output1.count("direction='unknown'"))
111 |
112 | def test_ipv4(self):
113 | self.assertIn('ipv4=10.10.10.10', self.output1)
114 | self.assertIn('ipv4=11.11.11.11', self.output1)
115 | self.assertIn('ipv4=12.12.12.12', self.output1)
116 | self.assertIn('ipv4=13.13.13.13', self.output1)
117 | self.assertIn('ipv4=14.14.14.14', self.output1)
118 |
119 | def test_combined_comment(self):
120 | self.assertIs(5, self.output1.count("combined_comment='Energy Sector Indicator'"))
121 |
122 |
123 | class TestSTIXACSToKeyValue(unittest.TestCase):
124 | output1 = None
125 |
126 | @classmethod
127 | def setUpClass(cls):
128 | current_dir = os.path.dirname(__file__)
129 | transform = FlexTransform.FlexTransform()
130 |
131 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/stix_essa.cfg'), 'r') as input_file:
132 | transform.add_parser('stix_acs2', input_file)
133 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/keyvalue.cfg'), 'r') as input_file:
134 | transform.add_parser('keyvalue', input_file)
135 | output1_object = io.StringIO()
136 |
137 | transform.transform(io.StringIO(STIXACS), 'stix_acs2', 'keyvalue', target_file=output1_object)
138 |
139 | cls.output1 = []
140 | output1_object.seek(0)
141 | for line in output1_object.read().splitlines():
142 | cls.output1 += line.split('&')
143 |
144 | def test_category(self):
145 | self.assertIs(3, self.output1.count("category='Unspecified'"))
146 |
147 | def test_category_name(self):
148 | self.assertIs(3, self.output1.count("category_name='Unspecified'"))
149 |
150 | def test_severity(self):
151 | self.assertIs(3, self.output1.count("severity='unknown'"))
152 |
153 | def test_comment(self):
154 | self.assertIs(3, self.output1.count("comment='No Comment'"))
155 |
156 | def test_confidence(self):
157 | self.assertIs(3, self.output1.count('confidence=0'))
158 |
159 | def test_direction(self):
160 | self.assertIs(3, self.output1.count("direction='unknown'"))
161 |
162 | def test_fqdn(self):
163 | self.assertIn("fqdn='blog.website.net'", self.output1)
164 | self.assertIn("fqdn='fake.com'", self.output1)
165 | self.assertIn("fqdn='goo.gl/peter'", self.output1)
166 |
167 | def test_combined_comment(self):
168 | self.assertIn("combined_comment='AAA Report Indicator'", self.output1)
169 | self.assertIn("combined_comment='Domain Indicator'", self.output1)
170 | self.assertIn("combined_comment='Just Another Indicator'", self.output1)
171 |
172 |
173 | class TestSTIXACS30ToKeyValue(unittest.TestCase):
174 | output1 = None
175 |
176 | @classmethod
177 | def setUpClass(cls):
178 | current_dir = os.path.dirname(__file__)
179 | transform = FlexTransform.FlexTransform()
180 |
181 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/stix_acs30.cfg'), 'r') as input_file:
182 | transform.add_parser('stix_acs30', input_file)
183 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/keyvalue.cfg'), 'r') as input_file:
184 | transform.add_parser('keyvalue', input_file)
185 | output1_object = io.StringIO()
186 |
187 | transform.transform(io.StringIO(STIXACS), 'stix_acs30', 'keyvalue', target_file=output1_object)
188 |
189 | cls.output1 = []
190 | output1_object.seek(0)
191 | for line in output1_object.read().splitlines():
192 | cls.output1 += line.split('&')
193 |
194 | def test_category(self):
195 | self.assertIs(3, self.output1.count("category='Unspecified'"))
196 |
197 | def test_category_name(self):
198 | self.assertIs(3, self.output1.count("category_name='Unspecified'"))
199 |
200 | def test_severity(self):
201 | self.assertIs(3, self.output1.count("severity='unknown'"))
202 |
203 | def test_comment(self):
204 | self.assertIs(3, self.output1.count("comment='No Comment'"))
205 |
206 | def test_confidence(self):
207 | self.assertIs(3, self.output1.count('confidence=0'))
208 |
209 | def test_direction(self):
210 | self.assertIs(3, self.output1.count("direction='unknown'"))
211 |
212 | def test_fqdn(self):
213 | self.assertIn("fqdn='blog.website.net'", self.output1)
214 | self.assertIn("fqdn='fake.com'", self.output1)
215 | self.assertIn("fqdn='goo.gl/peter'", self.output1)
216 |
217 | def test_combined_comment(self):
218 | self.assertIn("combined_comment='AAA Report Indicator'", self.output1)
219 | self.assertIn("combined_comment='Domain Indicator'", self.output1)
220 | self.assertIn("combined_comment='Just Another Indicator'", self.output1)
221 |
222 |
223 | if __name__ == '__main__':
224 | unittest.main()
225 |
--------------------------------------------------------------------------------
/FlexTransform/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/FlexTransform/test/__init__.py
--------------------------------------------------------------------------------
/FlexTransform/test/regression_test.py:
--------------------------------------------------------------------------------
1 | import io
2 | import os
3 | import unittest
4 | from lxml import etree
5 |
6 | from FlexTransform.test.SampleInputs import STIXTLP
7 | from FlexTransform import FlexTransform
8 |
9 | class regression_tests(unittest.TestCase):
10 | output1 = None
11 | namespace = {
12 | 'cybox' : "http://cybox.mitre.org/cybox-2",
13 | 'indicator' : "http://stix.mitre.org/Indicator-2",
14 | 'marking' : "http://data-marking.mitre.org/Marking-1",
15 | 'PortObj' : "http://cybox.mitre.org/objects#PortObject-2",
16 | 'stix' : "http://stix.mitre.org/stix-1",
17 | 'stixCommon' : "http://stix.mitre.org/common-1",
18 | 'stixVocabs' : "http://stix.mitre.org/default_vocabularies-1",
19 | 'xsi' : "http://www.w3.org/2001/XMLSchema-instance",
20 | 'cyboxVocabs' : "http://cybox.mitre.org/default_vocabularies-2",
21 | 'AddressObj' : "http://cybox.mitre.org/objects#AddressObject-2",
22 | 'ArtifactObj' : "http://cybox.mitre.org/objects#ArtifactObject-2",
23 | 'FileObj' : "http://cybox.mitre.org/objects#FileObject-2",
24 | 'URIObj' : "http://cybox.mitre.org/objects#URIObject-2",
25 | 'tlpMarking' : "http://data-marking.mitre.org/extensions/MarkingStructure#TLP-1",
26 | 'CFM' : "http://www.anl.gov/cfm/stix",
27 | 'xmlns' : "http://www.anl.gov/cfm/1.3/IDMEF-Message"
28 | }
29 |
30 | @classmethod
31 | def setUpClass(cls):
32 | current_dir = os.path.dirname(__file__)
33 | transform = FlexTransform.FlexTransform()
34 |
35 | with open(os.path.join(current_dir, './TestData/cfm13_multiple_site.cfg'), 'r') as input_file:
36 | transform.add_parser('cfm13alert', input_file)
37 | with open(os.path.join(current_dir, '../resources/sampleConfigurations/stix_tlp.cfg'), 'r') as input_file:
38 | transform.add_parser('stix', input_file)
39 | output1_object = io.StringIO()
40 |
41 | transform.transform(io.StringIO(STIXTLP), 'stix', 'cfm13alert', target_file=output1_object)
42 | output1_object.seek(0)
43 | output1_object.readline()
44 | cls.output1 = etree.parse(output1_object)
45 | print(output1_object.getvalue())
46 |
47 | def test_alert_analyzerid(self):
48 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/@analyzerid", namespaces=self.namespace)[0], "Fake")
49 |
50 | def test_alert_analyzer_node_location(self):
51 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/xmlns:Node/xmlns:location/text()", namespaces=self.namespace)[0], "Sand Worm Dave, Arrakeen, Dune 54321")
52 |
53 | def test_alert_analyzer_node_name(self):
54 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:Analyzer/xmlns:Node/xmlns:name/text()", namespaces=self.namespace)[0], "Test User, 555-867-5309, test@test.int")
55 |
56 | def test_alert_AD_report_schedule(self):
57 | self.assertEqual(self.output1.xpath("/xmlns:IDMEF-Message/xmlns:Alert/xmlns:AdditionalData[@meaning='report schedule']/text()", namespaces=self.namespace)[0], "5 minutes")
58 |
59 | def test_alert_AD_duration(self):
60 | self.assertEqual(set(self.output1.xpath("//xmlns:AdditionalData[@meaning='duration']/text()", namespaces=self.namespace)), set(['86400']))
61 |
62 | def test_reference_origin(self):
63 | self.assertEqual(set(self.output1.xpath("//xmlns:Reference/@origin", namespaces=self.namespace)), set(['user-specific']))
--------------------------------------------------------------------------------
/FlexTransform/test/utils.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Nov 9, 2015
3 |
4 | @author: ahoying
5 | '''
6 |
7 | '''
8 | Test module utilities
9 | '''
10 |
11 | import json
12 | import arrow
13 | import csv
14 |
15 | def deep_sort(obj):
16 | """
17 | Recursively sort list or dict nested lists
18 | Based on code from http://stackoverflow.com/questions/18464095/how-to-achieve-assertdictequal-with-assertsequenceequal-applied-to-values
19 | """
20 |
21 | if isinstance(obj, dict):
22 | _sorted = {}
23 | for key in sorted(obj):
24 | _sorted[key] = deep_sort(obj[key])
25 |
26 | elif isinstance(obj, list):
27 | new_list = []
28 | isdict = False
29 | for val in obj:
30 | if (not isdict and isinstance(val, dict)) :
31 | isdict = True
32 |
33 | new_list.append(deep_sort(val))
34 |
35 | if (isdict) :
36 | # Sort lists of dictionaries by the hash value of the data in the dictionary
37 | _sorted = sorted(new_list, key=lambda d: hash(json.dumps(d, ensure_ascii = True, sort_keys = True)))
38 | else :
39 | _sorted = sorted(new_list)
40 |
41 | else:
42 | _sorted = obj
43 |
44 | return _sorted
45 |
46 | #Used for test cases where the time is based on the current time
47 | #so that test cases dont fail everytime due to there being a constant change
48 | #in the value between the current time and the values stored in the data.
49 | def dynamic_time_change(data):
50 | index = 0
51 | newData = """"""
52 | reader = csv.reader(data.split(), delimiter=',', quotechar='"')
53 | for row in reader:
54 | for x in range(len(row)):
55 | row[x] = '\"' + row[x] + '\"'
56 | if (index < 7):
57 | newData += ','.join(row) + '\n'
58 | elif (index == 7):
59 | row[1] = arrow.utcnow().replace(hours=1).format('YYYYMMDDTHHmmss') + 'Z'
60 | newData += ','.join(row) + '\n'
61 | else:
62 | row[1] = arrow.utcnow().replace(days=4).format('YYYYMMDDTHHmmss') + 'Z'
63 | newData += ','.join(row) + '\n'
64 | index += 1
65 |
66 | return newData
67 |
--------------------------------------------------------------------------------
/ISAMarkingExtension/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/ISAMarkingExtension/__init__.py
--------------------------------------------------------------------------------
/ISAMarkingExtension/bindings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anl-cyberscience/FlexTransform/9e2b45e8d674023d6931e8e9c5b4f78bc269d4bd/ISAMarkingExtension/bindings/__init__.py
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright © 2015, UChicago Argonne, LLC
2 | All Rights Reserved
3 |
4 | FLEXIBLE TRANSFORM (ANL-SF-15-020) Christopher Strasburg: Argonne National Laboratory
5 | OPEN SOURCE LICENSE
6 |
7 | Under the terms of Contract No. DE-AC02-06CH11357 with UChicago Argonne, LLC, the U.S. Government retains certain rights in this software.
8 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
11 | 3. Neither the names of UChicago Argonne, LLC or the Department of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
12 |
13 | *********************************************************************************
14 | DISCLAIMER
15 | THE SOFTWARE IS SUPPLIED “AS IS” WITHOUT WARRANTY OF ANY KIND.
16 | NEITHER THE UNTED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
17 | *********************************************************************************
18 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include FlexTransform/resources/*.xml
2 | include FlexTransform/resources/*.owl
3 | include FlexTransform/resources/*.zip
4 | include FlexTransform/resources/*.rdf
5 | include FlexTransform/resources/sampleConfigurations/*
6 | include FlexTransform/resources/schemaDefinitions/*
7 | include FlexTransform/resources/schemas/*
8 | include FlexTransform/test/TestData/*
--------------------------------------------------------------------------------
/Utils/LQMTtestCFM.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Nov 18, 2014
3 |
4 | @author: ahoying
5 | '''
6 |
7 | from FlexTransform import FlexTransform
8 | import os
9 | import json
10 | import logging
11 | '''
12 | # To enable profiling, remove comments below
13 | import cProfile, pstats, io
14 | '''
15 |
16 | if __name__ == '__main__':
17 |
18 | '''
19 | # Profiling
20 | pr = cProfile.Profile()
21 | '''
22 |
23 | currentdir = os.path.dirname(__file__)
24 | logging.basicConfig(format='%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s', level=logging.DEBUG)
25 |
26 | TestDir = os.path.join(currentdir, 'resources/sampleMessages/cfm13Uploads/WithMetadata')
27 |
28 | Transform = FlexTransform.FlexTransform()
29 | Cfm13AlertConfig = open(os.path.join(currentdir,'resources/sampleConfigurations/cfm13.cfg'), 'r')
30 | Transform.add_parser('Cfm13Alert', Cfm13AlertConfig)
31 |
32 | LQMToolsConfig = open(os.path.join(currentdir,'resources/sampleConfigurations/lqmtools.cfg'), 'r')
33 | Transform.add_parser('LQMTools', LQMToolsConfig)
34 |
35 | TransformedData = []
36 |
37 | for file in os.listdir(TestDir):
38 | if file.startswith('.'):
39 | f = open(os.path.join(TestDir, file), 'r')
40 | metadata = json.load(f)
41 | f.close()
42 |
43 | sourceFile = os.path.join(TestDir, metadata['FileName'])
44 | logging.info(sourceFile)
45 |
46 | '''
47 | # Profiling
48 | pr.enable()
49 | '''
50 |
51 | try:
52 | Data = Transform.transform(source_file=sourceFile, source_parser_name=metadata['PayloadFormat'], target_parser_name='LQMTools', source_meta_data=metadata)
53 | except Exception as inst :
54 | logging.exception(inst)
55 | else:
56 | if Data:
57 | TransformedData.extend(Data)
58 |
59 | '''
60 | # Profiling
61 | pr.disable()
62 | '''
63 |
64 | '''
65 | # Profiling
66 | s = io.StringIO()
67 | sortby = 'cumulative'
68 | ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
69 | ps.print_stats()
70 | print(s.getvalue())
71 | '''
72 |
73 | out = open(os.path.join(currentdir,'resources/testing/lqmtools-test.json'), 'w')
74 | json.dump(TransformedData, out, sort_keys=True, indent=4)
75 |
--------------------------------------------------------------------------------
/Utils/LQMTtestSTIX.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Nov 18, 2014
3 |
4 | @author: ahoying
5 | '''
6 |
7 | from FlexTransform import FlexTransform
8 | import os
9 | import json
10 | import logging
11 |
12 | if __name__ == '__main__':
13 |
14 | currentdir = os.path.dirname(__file__)
15 | logging.basicConfig(format='%(name)s (%(pathname)s:%(lineno)d) %(levelname)s:%(message)s', level=logging.DEBUG)
16 |
17 | TestDir = os.path.join(currentdir, 'resources/sampleMessages/stix')
18 |
19 | Transform = FlexTransform()
20 | StixConfig = open(os.path.join(currentdir,'resources/sampleConfigurations/stix_ciscp.cfg'), 'r')
21 | Transform.add_parser('STIX', StixConfig)
22 |
23 | LQMToolsConfig = open(os.path.join(currentdir,'resources/sampleConfigurations/lqmtools.cfg'), 'r')
24 | Transform.add_parser('LQMTools', LQMToolsConfig)
25 |
26 | TransformedData = []
27 |
28 | for file in os.listdir(TestDir) :
29 | if (file.startswith('CISCP_INDICATOR.')) :
30 | sourceFile = os.path.join(TestDir, file)
31 |
32 | logging.info(sourceFile)
33 |
34 | try :
35 | Data = Transform.transform(source_file=sourceFile, source_parser_name='STIX', target_parser_name='LQMTools')
36 | except Exception as inst :
37 | logging.exception(inst)
38 | else :
39 | if (Data) :
40 | TransformedData.extend(Data)
41 |
42 | out = open(os.path.join(currentdir,'resources/testing/lqmtools-stix-test.json'), 'w')
43 | json.dump(TransformedData, out, sort_keys=True, indent=4)
--------------------------------------------------------------------------------
/Utils/subjectCommentParentQuery.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 |
3 | import sys
4 |
5 | import argparse
6 | import rdflib
7 |
8 |
9 | DEFAULT_QUERY = '''PREFIX rdf:
10 | PREFIX owl:
11 | PREFIX xsd:
12 | PREFIX rdfs:
13 | PREFIX :
14 | SELECT DISTINCT ?subject ?comment ?parent
15 | WHERE { ?parent rdfs:subClassOf* :SemanticComponent .
16 | ?subject rdfs:subClassOf ?parent .
17 | OPTIONAL { ?subject rdfs:comment ?comment . } }
18 | ORDER BY ?parent'''
19 |
20 |
21 | def buildAndParseGraph(rdfFile):
22 | '''Instantiate a graph, parse `rdfFile`, and return graph after
23 | parsing.
24 |
25 | :param rdfFile: rdf filename to parse
26 | :type rdfFile: str
27 | :returns: rdflib.Graph
28 | '''
29 | g = rdflib.Graph()
30 | g.parse(rdfFile)
31 | return g
32 |
33 |
34 | def queryGraph(graph, query=DEFAULT_QUERY):
35 | '''Return the result of `query` on the rdflib.Graph object `graph`.
36 |
37 | Convenience helper function to use DEFAULT_QUERY if no query is
38 | provided.
39 |
40 | :param graph: graph to query
41 | :type graph: rdflib.Graph
42 | :param query: SPARQL query to run on `graph`
43 | :type query: str
44 | :returns: rdflib.query.Result
45 | '''
46 | return graph.query(query)
47 |
48 |
49 | def writeSubjectAndCommentToCSV(queryRes, outFile=None):
50 | '''Write the `subject`, `comment` and `parent` fields of each row in
51 | `queryRes` to a CSV file `outFile`.
52 |
53 | If outfile is not supplied, results are written to stdout. If a row does
54 | not have a comment, 'None' is printed.
55 |
56 | :param queryRes: queryResults to print
57 | :type queryRes: rdflib.query.Result
58 | :param outFile: CSV output filename
59 | :type outFile: string
60 | '''
61 | f = open(outFile) if outFile else sys.stdout
62 | # add a comment at the top of the file describing fields
63 | f.write('#subject,comment,parent\n')
64 | for row in queryRes:
65 | f.write('{0},{1},{2}\n'.format(row.subject, row.comment, row.parent))
66 | if f is not sys.stdout:
67 | f.close()
68 |
69 |
70 | if __name__ == '__main__':
71 |
72 | parser = argparse.ArgumentParser()
73 | parser.add_argument('-i', '--input-file', action='store', required=True,
74 | help='RDF input filename')
75 | parser.add_argument('-o', '--output-file', action='store', required=False,
76 | help='CSV output filename. If absent, use stdout.')
77 |
78 | args = parser.parse_args()
79 |
80 | g = buildAndParseGraph(args.input_file)
81 | q = queryGraph(g)
82 | writeSubjectAndCommentToCSV(q, args.output_file)
83 |
--------------------------------------------------------------------------------
/docs/contribute.md:
--------------------------------------------------------------------------------
1 | ##Contributing
2 | ###Github
3 | FlexTransform can be found on Github using the following link: [*https://github.com/anl-cyberscience/FlexTransform*](https://github.com/anl-cyberscience/FlexTransform).
4 |
5 | Users can fork the project to make their own changes and if any bugs or errors are found a pull request can be made and the
6 | issue will be addressed.
7 |
8 | ###Contact
9 | FlexTransform is a tool developed and supported by the CFM team at Argonne National Lab. Any questions about FlexT can be
10 | directed to the CFM team at cfmteam@anl.gov. For general information about the CFM project, visit our [*website*](https://cfm.gss.anl.gov/).
11 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # Flexible Transform
2 | [](https://travis-ci.org/anl-cyberscience/FlexTransform)
3 | [](https://badge.fury.io/py/FlexTransform)
4 | [](https://github.com/anl-cyberscience/FlexTransform)
5 |
6 | Flexible Transform (FlexT) enables dynamic translation between Cyber Threat Intelligence reports (CTI), accomplishing this by digesting CTI data down to its semantic roots (meaning and context).
7 |
8 | ###Overview
9 | ####The Problem
10 | Most cyber defense systems incorporate some form of cyber threat intelligence (CTI) collection and analysis. However, different
11 | systems and CTI sharing communities have implemented a variety of representations to transmit these data (e.g., STIX, OpenIOC, custom CSV).
12 | This diversity of formats presents a challenge when an organization using one format has the opportunity to join sharing
13 | communities where the members share data in different formats. Similarly, merging communities with different CTI formats
14 | can seem a nearly insurmountable challenge, and proceeds at the pace of the slowest member in each community to adopt
15 | a different format.
16 |
17 | Although simple translators can be written to convert data from one format to another, challenges to this approach include the following:
18 |
19 | An exponential increase in the effort required to support new formats.
20 | Potential loss of meaning and context (semantics) between formats.
21 |
22 | The obstacles posed by these challenges lead to the formation of “islands of sharing” defined not by the communities themselves
23 | but by the sharing formats. This pattern leaves smaller organizations, which tend to be unable to participate at all, isolated and defenseless.
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 | ####The Solution
32 | FlexT is a tool that enables dynamic translation between formats. FlexT accomplishes this translation by “digesting” CTI
33 | data down to its semantic roots (meaning and context). As Figure 1 shows, making this objective the core of the translation
34 | effort simplifies the process. This approach allows the use of new formats with improved scalability and ensures that the
35 | original meaning and context of CTI data are preserved.
36 |
37 | A “format” in FlexT is broken down into three components:
38 |
39 | `Syntax` – A specification of valid document characters and their composition (e.g., CSV, XML, JSON).
40 | `Schema` – A specification of the valid terms, the data they can convey, and restrictions on their use (e.g., STIX, OpenIOC, IODEF ).
41 | `Semantics` – A definition of the meaning of terms (e.g., SourceIPAddress is the session originating IPv4 address).
42 |
43 | Using FlexT, organizations are empowered to participate in sharing communities using any type of CTI, in any format. When
44 | coupled with a toolset such as Cyber Fed Model’s (CFM’s) Last Quarter Mile Toolset (LQMToolset), participants can not only
45 | share and process CTI, they can take automated action based on that intelligence with an array of security endpoint devices.
46 |
47 | ####Features
48 | Feature | Enabling users to
49 | :-----: | :-------:
50 | Multiple Interfaces | Content from cell 2
51 | Accurate translation | Content in the second column
52 | Easy extensibility | When supporting a new schema, simply define a mapping JSON file and immediately convert to/from any other supported format.
53 |
--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
1 | # Dependencies
2 | ##Install
3 | FlexTransform (FlexT) is run using Python3. This means that **Python3** is required to be installed to properly run FlexT.
4 | Once Python3 is installed FlexT can be installed via *pip3*, but it requires the python package *lxml* which itself has
5 | UNIX dependencies. The required dependencies for *lxml* are **libxml** and **libxslt** as well as their associated
6 | development packages. For Debian based systems the following command can be used
7 |
8 | ```bash
9 | $ sudo apt-get install libxml2-dev libxslt-dev python-dev
10 | ```
11 | *pip* command:
12 | ```shell
13 | $ pip install FlexTransform
14 | ```
15 |
16 | ##Getting Started
17 | When using FlexT from the command lines there are certian arguments that are required to be passed in to perform the
18 | conversiion.
19 | ```shell
20 | --src-config CONFIG
21 | ```
22 | This argument is used to pass in the file that contains the parser configuration file for the source file.
23 | ```shell
24 | --src SRC
25 | ```
26 | This argument is the source file that will be transformed.
27 | ```shell
28 | --dst-config CONFIG
29 | ```
30 | This argument is used to pass in the file that contains the parser configuration file for the destination file.
31 | ```shell
32 | --dst DST
33 | ```
34 | This argument is used to pass in the path to where the file will be stored.
35 |
36 | These arguments will be all you need to get started with FlexT when using one of the supported schemas. If an unsupported
37 | schema is going to be used, users can pass in arguments for either the source schema, destination schema, or both.
38 |
39 | ```shell
40 | --source-schema-IRI
41 | ```
42 | Used to pass in the ontology IRI file for the source file.
43 |
44 | ```shell
45 | --destination-schema-IRI
46 | ```
47 | Used to pass in the ontology IRI file for the destination file.
48 |
49 | ```shell
50 | flext --src /Path/to/file --src-config /Path/to/file --dst /Path/to/file --dst-config /Path/to/file
51 | ```
52 | This is the most basic format for transforming a file that is currently supported.
--------------------------------------------------------------------------------
/docs/usage.md:
--------------------------------------------------------------------------------
1 | ## Usage
2 | Currently, FlexT supports Command-Line access as well as functioning as a Python Library, while future development will add a RESTful API with a local web server.
3 | ### Python Library
4 | FlexT accepts File-like objects, so in addition to allowing for the ```open``` command, you can also use python objects like ```StringIO```.
5 | ```python
6 | from FlexTransform import FlexTransform
7 | flexT = FlexTransform.FlexTransform()
8 |
9 | with open("/Users/cfm/FlexT/FlexTransform/resources/sampleConfigurations/cfm13.cfg", "r") as input_cfg:
10 | flexT.AddParser("cfm13", input_cfg)
11 | with open("/Users/cfm/FlexT/FlexTransform/resources/sampleConfigurations/stix_tlp.cfg", "r") as output_cfg:
12 | flexT.AddParser("stix", output_cfg)
13 |
14 | with open("/Users/cfm/input.xml", "r") as input_file:
15 | with open("/Users/cfm/output.xml", "w") as output_file:
16 | flexT.TransformFile(input_file, "cfm13", "stix", targetFileName=output_file)
17 | ```
18 | ### Command Line
19 | ```shell
20 | $ flext --src inputFile.txt --src-config srcConfig.cfg --dst outputFile.xml --dst-config dstConfig.cfg
21 | ```
22 | + Required arguments
23 | + `src` - Source file
24 | + `src-config` - Source file parser configuration
25 | + `dst` - Destination file
26 | + `dst-config` - Destination file parser configuration
27 | + Optional arguments
28 | + `src-metadata` - Source metadata file
29 | + `tbox-uri` - The rui location of the tbox file
30 | + `source-schema-IRI` - Ontological IRI for the source
31 | + `destination-schema-IRI` - Ontological IRI for the destination
32 |
33 |
--------------------------------------------------------------------------------
/flexT_dir_input.py:
--------------------------------------------------------------------------------
1 | import os
2 | from FlexTransform import FlexTransform
3 |
4 | if __name__ == '__main__':
5 |
6 | # THESE LOCATIONS MATTER!!
7 | dir_location = "/Users/mhend/Downloads/test/"
8 | src_config_location = "/Users/mhend/git/FlexTransform/FlexTransform/resources/sampleConfigurations/cfm13.cfg"
9 | dst_config_location = "/Users/mhend/git/FlexTransform/FlexTransform/resources/sampleConfigurations/stix_tlp.cfg"
10 | # if output path is absolute, follows that path and creates if necessary.
11 | # If it's relative, then uses the dir containing files as root, creates if necessary
12 | output_folder_path = "FlexT output"
13 |
14 | if not os.path.isdir(dir_location):
15 | print("File path either doesn't exist or isn't a location, exiting")
16 | exit(1)
17 |
18 | for root, dirs, files in os.walk(dir_location):
19 | # Are there files in directory?
20 | if not files:
21 | print("Directory is empty, exiting")
22 | exit(1)
23 | # Is output path abs or relative?
24 | if not os.path.isabs(output_folder_path):
25 | output_folder_path = os.path.join(root, output_folder_path)
26 | if not os.path.isdir(output_folder_path):
27 | os.makedirs(output_folder_path)
28 |
29 | flexT = FlexTransform.FlexTransform()
30 | with open(src_config_location, 'r') as input_file:
31 | flexT.AddParser("src", input_file)
32 | with open(dst_config_location, 'r') as input_file:
33 | flexT.AddParser("dst", input_file)
34 |
35 | for name in files:
36 | if name.startswith("."):
37 | continue
38 | iname = os.path.join(root, name)
39 | oname = os.path.join(output_folder_path, name)
40 | print(iname, oname)
41 | with open(iname, "r") as input_file:
42 | with open(oname, "w") as output_file:
43 | try:
44 | print("Starting processing file: {}".format(iname))
45 | flexT.TransformFile(input_file, "src", "dst", targetFileName=output_file)
46 | except Exception as e:
47 | print(e)
48 | print("Exception in found in file, skipping it: {}".format(iname))
49 | break
50 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: FlexTransform
2 | pages:
3 | - index.md
4 | - install.md
5 | - usage.md
6 | - examples.md
7 | - contribute.md
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(
4 | name='FlexTransform',
5 | version='1.2.1',
6 | description='Flexible Transform is a tool that enables dynamic translation between formats',
7 | long_description='Flexible Transform (FlexT) enables dynamic translation between formats, accomplishing this by digesting CTI data down to its semantic roots (meaning and context)',
8 | url='https://github.com/anl-cyberscience/FlexTransform/',
9 | author='The CFM Team',
10 | author_email='fedhelp@anl.gov',
11 | classifiers=[
12 | # See: https://pypi.python.org/pypi?%3Aaction=list_classifiers
13 |
14 | # How mature is this project? Common values are
15 | # Development Status :: 1 - Planning
16 | # Development Status :: 2 - Pre-Alpha
17 | # Development Status :: 3 - Alpha
18 | # Development Status :: 4 - Beta
19 | # Development Status :: 5 - Production/Stable
20 | # Development Status :: 6 - Mature
21 | # Development Status :: 7 - Inactive
22 | 'Development Status :: 5 - Production/Stable',
23 |
24 | # Indicate who your project is intended for
25 | 'Intended Audience :: Information Technology',
26 | 'Topic :: Security',
27 |
28 | # Pick your license as you wish (should match 'license' above)
29 | 'License :: Other/Proprietary License',
30 |
31 | # Specify the Python versions you support here. In particular, ensure
32 | # that you indicate whether you support Python 2, Python 3 or both.
33 | 'Programming Language :: Python :: 3',
34 | 'Programming Language :: Python :: 3.3',
35 | 'Programming Language :: Python :: 3.4',
36 | 'Programming Language :: Python :: 3.5',
37 | 'Programming Language :: Python :: 3.6',
38 | ],
39 | keywords='',
40 | packages=find_packages(exclude=['contrib', 'doc', 'tests*']),
41 | install_requires=[
42 | 'arrow',
43 | 'python-dateutil',
44 | 'lxml',
45 | 'pytz',
46 | 'dumper',
47 | 'rdflib',
48 | ],
49 | entry_points={
50 | 'console_scripts': [
51 | 'flext = FlexTransform.FlexT:main',
52 | 'flextbatch = FlexTransform.FlexTBatch:main'
53 | ]
54 | },
55 | test_suite='nose.collector',
56 | tests_require=['nose'],
57 | include_package_data=True,
58 | package_data={
59 | 'FlexTransform': [
60 | 'resources/*.xml',
61 | 'resources/*.owl',
62 | 'resources/*.zip',
63 | 'resources/*.rdf',
64 | 'resources/sampleConfigurations/*',
65 | 'resources/schemaDefinitions/*',
66 | 'resources/schemas/*'
67 | ]
68 | }
69 | )
70 |
--------------------------------------------------------------------------------