├── example
├── __init__.py
├── abuse_ch
│ ├── __init__.py
│ ├── abuse.ch.jpg
│ ├── abuse.ch.small.jpg
│ └── generate_abusech_feed.py
├── isight
│ ├── __init__.py
│ ├── isight.png
│ ├── isight.small.jpg
│ ├── isight.config.template
│ ├── isight_api.py
│ ├── isight_config.py
│ ├── isight_helpers.py
│ ├── importer.py
│ └── generate_isight_feed.py
├── mdl
│ ├── mdl.png
│ ├── mdl.small.jpg
│ ├── __init__.py
│ └── generate_mdl_feed.py
├── tor
│ ├── tor.png
│ ├── tor.small.jpg
│ ├── __init__.py
│ └── generate_tor_feed.py
├── stix
│ ├── images
│ │ └── stix.gif
│ ├── sample_data
│ │ ├── indicator-for-c2-ip-address.xml
│ │ ├── STIX_Domain_Watchlist.xml
│ │ ├── STIX_URL_Watchlist.xml
│ │ ├── STIX_IP_Watchlist.xml
│ │ ├── STIX_FileHash_Watchlist.xml.badmd5s
│ │ ├── command-and-control-ip-range.xml
│ │ └── STIX_Phishing_Indicator.xml
│ ├── README.md
│ └── stix_to_feed.py
├── README.md
└── raw
│ └── generate_feed_from_raw_iocs.py
├── test
├── resources
│ ├── taxii-logov2.png
│ └── template.json
├── __init__.py
├── common.py
├── test_05_validate_feed.py
├── test_01_common_integrity.py
├── test_02_cbfeed.py
└── test_03_cbfeedinfo.py
├── fslds-build.md
├── .travis.yml
├── CHANGELOG.md
├── requirements.txt
├── requirements.in
├── cbfeeds
├── __init__.py
├── exceptions.py
└── feed.py
├── .gitignore
├── test.py
├── setup.py
├── LICENSE.md
├── percent_encode_query.py
├── validate_feed.py
└── README.md
/example/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/example/abuse_ch/__init__.py:
--------------------------------------------------------------------------------
1 | from .generate_abusech_feed import create
2 |
--------------------------------------------------------------------------------
/example/isight/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'cb'
2 | from .create_feed import create
3 |
--------------------------------------------------------------------------------
/example/mdl/mdl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/mdl/mdl.png
--------------------------------------------------------------------------------
/example/tor/tor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/tor/tor.png
--------------------------------------------------------------------------------
/example/isight/isight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/isight/isight.png
--------------------------------------------------------------------------------
/example/mdl/mdl.small.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/mdl/mdl.small.jpg
--------------------------------------------------------------------------------
/example/tor/tor.small.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/tor/tor.small.jpg
--------------------------------------------------------------------------------
/example/abuse_ch/abuse.ch.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/abuse_ch/abuse.ch.jpg
--------------------------------------------------------------------------------
/example/stix/images/stix.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/stix/images/stix.gif
--------------------------------------------------------------------------------
/example/isight/isight.small.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/isight/isight.small.jpg
--------------------------------------------------------------------------------
/test/resources/taxii-logov2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/test/resources/taxii-logov2.png
--------------------------------------------------------------------------------
/example/abuse_ch/abuse.ch.small.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/abuse_ch/abuse.ch.small.jpg
--------------------------------------------------------------------------------
/fslds-build.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/fslds/cbfeeds)
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.6"
4 | install:
5 | - "pip install -r requirements.txt"
6 | - "python setup.py install"
7 | script: "python test.py"
8 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
--------------------------------------------------------------------------------
/example/mdl/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 | from .generate_mdl_feed import create
6 |
--------------------------------------------------------------------------------
/example/tor/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 | from .generate_tor_feed import create
6 |
--------------------------------------------------------------------------------
/example/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 | This folder contains examples of using CbFeeds with various external sources.
3 |
4 | > *NOTE*: At present, for the current project scope, only the `mdl` and `tor` examples have been converted to python 3 due to their use in `test.py` (`abuse_ch` example removed from testing since feed data is no longer returned as of 2019.)
5 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | CHANGELOG.md
2 | # Carbon Black EDR Alliance Feed Library Changelog
3 |
4 | ## v1.0.0
5 | #### Features
6 | * Converted to python3
7 | * Added handling of sha256, ja3, ja3s and query reports
8 | * Added unit tests
9 |
10 | > _NOTE: Not all examples not converted at this time!_
11 |
12 | ## v0.8.0
13 | #### Features
14 | * Initial Release
15 |
16 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | #
2 | # This file is autogenerated by pip-compile
3 | # To update, run:
4 | #
5 | # pip-compile requirements.in
6 | #
7 |
8 | certifi==2020.6.20 # via requests
9 | chardet==3.0.4 # via requests
10 | idna==2.10 # via requests
11 | requests==2.24.0 # via -r requirements.in
12 | urllib3==1.25.11 # via requests
13 |
--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | # cbfeeds requirements file
2 | # ~~~~~~~~~~~~~~~~~~~~~~~~~
3 | # If changes to this file are made, use `pip-compile -U -q requirements.in` at the
4 | # command line from within the repository root folder.
5 | #
6 | # NOTE: When compiled, you need to remove the line:
7 | # --index-url https://artifactory-pub.bit9.local/artifactory/api/pypi/pypi-virtual/simple
8 | ################################################################################
9 |
10 | requests>=1.2.3
11 |
--------------------------------------------------------------------------------
/cbfeeds/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 | __all__ = ["CbFeed", "CbFeedInfo", "CbReport", "CbIconError", "CbInvalidFeed", "CbInvalidFeedInfo", "CbInvalidReport",
6 | "CbException"]
7 |
8 | from .exceptions import CbException, CbIconError, CbInvalidFeed, CbInvalidFeedInfo, CbInvalidReport
9 | from .feed import CbFeed, CbFeedInfo, CbReport
10 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 |
3 | # C extensions
4 | *.so
5 |
6 | # Packages
7 | *.egg
8 | *.egg-info
9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 |
21 | # Installer logs
22 | pip-log.txt
23 |
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 |
29 | # Translations
30 | *.mo
31 |
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 |
37 | # PyCharm
38 | .idea/
39 |
40 |
41 | # Test Feeds
42 | *.feed
43 | .DS_Store
44 |
--------------------------------------------------------------------------------
/example/isight/isight.config.template:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Carbon Black iSIGHT Partners Feed Genreator Config Template
3 | ###############################################################################
4 |
5 | # These credentials come from iSight
6 | #
7 | iSightRemoteImportUsername=
8 | iSightRemoteImportPassword=
9 |
10 | iSightRemoteImportPublicKey=
11 | iSightRemoteImportPrivateKey=
12 |
13 | # URL of iSight REST API endpoint
14 | # Effective 15-Oct-2014, mysight-api.isightpartners.com is deprecated in favor of api.isightpartners.com
15 | #
16 | iSightRemoteImportUrl=https://api.isightpartners.com/
17 |
18 | # Number of days (relative to today) to back-pull reports from
19 | #
20 | iSightRemoteImportDaysBack=80
21 |
--------------------------------------------------------------------------------
/cbfeeds/exceptions.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 | __all__ = ["CbException", "CbIconError", "CbInvalidFeed", "CbInvalidFeedInfo", "CbInvalidReport"]
6 |
7 |
8 | # CBFeeds Exception set
9 | class CbException(Exception):
10 | """CBFeeds base exception class"""
11 | pass
12 |
13 |
14 | class CbIconError(CbException):
15 | """Exception for icon related issues"""
16 | pass
17 |
18 |
19 | class CbInvalidFeed(CbException):
20 | """Exception for problems with overall feed structure"""
21 | pass
22 |
23 |
24 | class CbInvalidFeedInfo(CbException):
25 | """Exception for problems with feedinfo information"""
26 | pass
27 |
28 |
29 | class CbInvalidReport(CbException):
30 | """Exception for problems with report information"""
31 | pass
32 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 | import logging
6 | import sys
7 | import unittest
8 |
9 |
10 | class TestCbFeedExamples(unittest.TestCase):
11 | # NOTE: zeus tracker returns: "# ZeuS Tracker has been discontinued on Jul 8th, 2019", so
12 | # test_abusech has been removed.
13 |
14 | def test_mdl(self):
15 | import example.mdl as mdl
16 | mdl.generate_mdl_feed.DAYS_BACK = None # get all data
17 | mdl.create()
18 |
19 | def test_tor(self):
20 | import example.tor as tor
21 | tor.create()
22 |
23 |
24 | if __name__ == '__main__':
25 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='[%(filename)s:%(lineno)d] %(message)s')
26 |
27 | # run the unit tests
28 | #
29 | unittest.main()
30 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | python-cbfeeds
4 | """
5 |
6 | from setuptools import setup
7 |
8 | setup(
9 | name='cbfeeds',
10 | version='1.0.0',
11 | url='http://github.com/carbonblack/cbfeeds',
12 | license='MIT',
13 | author='Carbon Black',
14 | author_email='dev-support@carbonblack.com',
15 | description='Carbon Black Alliance Feeds',
16 | long_description=__doc__,
17 | packages=['cbfeeds', ],
18 | include_package_data=True,
19 | #package_dir = {'': 'src'},
20 | zip_safe=False,
21 | platforms='any',
22 | classifiers=[
23 | 'Environment :: Web Environment',
24 | 'Intended Audience :: Developers',
25 | 'Operating System :: OS Independent',
26 | 'Programming Language :: Python',
27 | 'Topic :: Software Development :: Libraries :: Python Modules'
28 | ],
29 | scripts=['validate_feed.py'],
30 | requires=['requests']
31 |
32 | )
33 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # License
2 |
3 | ```
4 | The MIT License (MIT)
5 |
6 | Copyright (c) 2016-2018 Carbon Black
7 |
8 | Permission is hereby granted, free of charge, to any person obtaining a copy of
9 | this software and associated documentation files (the "Software"), to deal in
10 | the Software without restriction, including without limitation the rights to
11 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
12 | the Software, and to permit persons to whom the Software is furnished to do so,
13 | subject to the following conditions:
14 |
15 | The above copyright notice and this permission notice shall be included in all
16 | copies or substantial portions of the Software.
17 |
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
20 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
21 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
22 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 | ```
25 |
--------------------------------------------------------------------------------
/test/common.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 |
6 | import json
7 | import os
8 | import unittest
9 | from typing import Any, Dict, Tuple
10 |
11 | import cbfeeds
12 |
13 | HOME = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
14 | RESOURCE_PATH_PREFIX = os.path.join(HOME, 'test', 'resources')
15 |
16 |
17 | class TestCommon(unittest.TestCase):
18 | """
19 | Common class for all tests.
20 | """
21 |
22 | # standard test feed file
23 | _test_feed = "./testfeed.json"
24 |
25 | def tearDown(self):
26 | self._clear_test_feed()
27 |
28 | def _clear_test_feed(self) -> None:
29 | """
30 | Remove any local test feeds, if they exist.
31 | """
32 | if os.path.exists(self._test_feed):
33 | os.chmod(self._test_feed, mode=0o777)
34 | os.remove(self._test_feed)
35 |
36 | def _load_feed_file(self, source: str = None) -> Tuple[Dict[str, Any], cbfeeds.CbFeed]:
37 | """
38 | Copy template feed file into memory, mangle as needed, save locally for testing.
39 |
40 | :param source: Alternate template file to read
41 | :return: Tuple of json object (to optionally mangle) and feed object
42 | """
43 | use_source = "template.json" if source is None else source
44 | with open(os.path.join(RESOURCE_PATH_PREFIX, use_source), 'r') as fp:
45 | json_obj = json.load(fp)
46 | self._save_test_feed(json_obj)
47 |
48 | feed = cbfeeds.CbFeed(json_obj["feedinfo"], json_obj["reports"])
49 | return json_obj, feed
50 |
51 | def _save_test_feed(self, json_obj: Dict[str, Any]) -> cbfeeds.CbFeed:
52 | """
53 | Save json object (potentially mangled) to test feed file.
54 |
55 | :param json_obj: source json
56 | :return: potentially mangled feed object
57 | """
58 | with open(self._test_feed, 'w') as fp:
59 | json.dump(json_obj, fp, indent=4, sort_keys=True)
60 | feed = cbfeeds.CbFeed(json_obj["feedinfo"], json_obj["reports"])
61 | return feed
62 |
--------------------------------------------------------------------------------
/example/isight/isight_api.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import hmac
3 | import logging
4 | import requests
5 |
6 | _logger = logging.getLogger(__name__)
7 |
8 | class ISightAPI(object):
9 | """
10 | Helper class for talking to iSIGHT Partners remote API.
11 | """
12 | @staticmethod
13 | def from_config(config):
14 | return ISightAPI( config.iSightRemoteImportUrl,
15 | config.iSightRemoteImportUsername,
16 | config.iSightRemoteImportPassword,
17 | config.iSightRemoteImportPublicKey,
18 | config.iSightRemoteImportPrivateKey)
19 |
20 | def __init__(self, base_url, username, password, public_key, private_key):
21 | self.base_url = base_url
22 | self.username = username
23 | self.password = password
24 | self.public_key = public_key
25 | self.private_key = private_key
26 |
27 | query = None
28 | hashed_query = hmac.new(private_key, query, hashlib.sha256).hexdigest()
29 |
30 | self.headers = {
31 | 'X-Auth' : public_key,
32 | 'X-Auth-Hash' : hashed_query,
33 | 'Authorization' : self.__encode_user_creds(username, password)
34 | }
35 |
36 | def __encode_user_creds(self, user, passw):
37 | """
38 | Private function to setup some Basic Auth stuff...
39 | """
40 | return "Basic " + (user + ":" + passw).encode("base64").rstrip()
41 |
42 | def get_i_and_w(self, days_back_to_retrieve):
43 | """
44 | Retrieve a CSV file of data of all reports from (now-days_back_to_retrieve) until now.
45 | """
46 | params = {'daysBack': days_back_to_retrieve, 'days': days_back_to_retrieve}
47 | url = "%sreport/view/i_and_w" % (self.base_url)
48 |
49 | _logger.info("Connecting to remote API '%s' using params: %s" % (url, params))
50 |
51 | resp = requests.get(url, params=params, headers=self.headers)
52 | resp.raise_for_status()
53 | return resp.content
54 |
55 | def get_report(self, report_id, format='xml'):
56 | """
57 | Download a report in a particular format.
58 | """
59 | url = "%sreport/view/docid/%s" % (self.base_url, report_id)
60 | params = {'format':format}
61 | resp = requests.get(url, params=params, headers=self.headers)
62 | resp.raise_for_status()
63 | return resp.content
64 |
--------------------------------------------------------------------------------
/test/test_05_validate_feed.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 |
6 | import os
7 |
8 | import cbfeeds
9 | import validate_feed
10 | from common import TestCommon
11 |
12 |
13 | class TestValidateFeed(TestCommon):
14 | """
15 | Verify that the validate_feed utility methods work as expected.
16 | """
17 |
18 | def test_01a_neg_file_missing(self):
19 | """
20 | Verify that a non-existant file is trapped
21 | """
22 | try:
23 | validate_feed.validate_file("./nonesuch.json")
24 | self.fail("Did not get expected exception!")
25 | except cbfeeds.CbException:
26 | pass
27 |
28 | def test_01b_neg_file_unreadable(self):
29 | """
30 | Verify that a file that cannot be read is trapped.
31 | """
32 | info, feed = self._load_feed_file()
33 | os.chmod(self._test_feed, mode=0o000)
34 |
35 | try:
36 | validate_feed.validate_file(self._test_feed)
37 | self.fail("Did not get expected exception!")
38 | except cbfeeds.CbException:
39 | pass
40 |
41 | def test_02_neg_not_json(self):
42 | """
43 | Verify that non-json file contents are trapped
44 | """
45 | try:
46 | validate_feed.validate_json("This is not JSON!")
47 | self.fail("Did not get expected exception!")
48 | except cbfeeds.CbException:
49 | pass
50 |
51 | def test_03a_neg_missing_feedinfo(self):
52 | """
53 | Verify that feed information missing a feedinfo entry is detected.
54 | """
55 | info, _ = self._load_feed_file()
56 | del info['feedinfo']
57 | try:
58 | validate_feed.validate_feed(info)
59 | self.fail("Did not get expected exception!")
60 | except cbfeeds.CbException as err:
61 | assert "No 'feedinfo' element found!" in f"{err}"
62 |
63 | def test_03b_neg_missing_reports(self):
64 | """
65 | Verify that feed information missing a reports entry is detected.
66 | """
67 | info, _ = self._load_feed_file()
68 | del info['reports']
69 | try:
70 | validate_feed.validate_feed(info)
71 | self.fail("Did not get expected exception!")
72 | except cbfeeds.CbException as err:
73 | assert "No 'reports' element found!" in f"{err}"
74 |
--------------------------------------------------------------------------------
/example/isight/isight_config.py:
--------------------------------------------------------------------------------
1 |
2 | class ISightConfig(object):
3 | """
4 | Configuration for iSight Connector.
5 |
6 | This class populates fields by reading a config file.
7 | """
8 | def __init__(self, config_filepath):
9 |
10 | self.keys = [
11 | "source_path",
12 | "iSightRemoteImportUsername",
13 | "iSightRemoteImportPassword",
14 | "iSightRemoteImportPublicKey",
15 | "iSightRemoteImportPrivateKey",
16 | "iSightRemoteImportUrl",
17 | "iSightRemoteImportDaysBack",
18 | "iSightLocalRawDataFilename",
19 | ]
20 |
21 | self.source_path = config_filepath
22 |
23 | # HARDCODED DEFAULTS
24 | self.iSightRemoteImportUsername = None
25 | self.iSightRemoteImportPassword = None
26 | self.iSightRemoteImportPublicKey = None
27 | self.iSightRemoteImportPrivateKey = None
28 | self.iSightRemoteImportUrl = "https://mysight-api.isightpartners.com/"
29 | self.iSightRemoteImportDaysBack=180
30 | self.iSightLocalRawDataFilename = None
31 |
32 | with open(config_filepath, "r") as cfg:
33 | lineno = 0
34 | for line in cfg:
35 | try:
36 | lineno += 1
37 |
38 | line = line.strip()
39 | if not line or line[0] == "#":
40 | continue
41 |
42 | name, val = line.split("=", 1)
43 |
44 | # TODO validate name is within spec
45 | # -- this will require careful re-evalutaion of config params as we are
46 | # now relying on properties being listed in .conf file even
47 | # though some of those properties were never listed in this class
48 |
49 | # if we are reading a new value for an existing attribute, lets make
50 | # sure we preserve the type
51 | try:
52 | existing_attr = getattr(self, name)
53 | if existing_attr is not None:
54 | val = type(existing_attr)(val)
55 | except AttributeError:
56 | pass
57 |
58 | setattr(self, name, val)
59 |
60 | except Exception as e:
61 | pass
62 |
63 | def as_dict(self):
64 | """
65 | """
66 | res = {}
67 | for key in self.keys:
68 | res[key] = getattr(self, key)
69 |
70 | return res
71 |
--------------------------------------------------------------------------------
/percent_encode_query.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
4 | ################################################################################
5 |
6 | import logging
7 | import optparse
8 | import sys
9 | from urllib import parse
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | def build_cli_parser() -> optparse.OptionParser:
15 | """
16 | Generate OptionParser to handle command line switches
17 |
18 | :return: optparse.OptionParser
19 | """
20 | usage = "usage: %prog [options]"
21 | desc = "Encode, using percent encoding, a Carbon Black query"
22 |
23 | cmd_parser = optparse.OptionParser(usage=usage, description=desc)
24 |
25 | cmd_parser.add_option("-q", "--query", action="store", type="string", dest="query",
26 | help="Query to encode")
27 | cmd_parser.add_option("-n", "--no-prepend", action="store_false", default=True, dest="prepend",
28 | help=('Do NOT prepend "q=" and "cb.urlver=1" when not found '
29 | 'in the query specified with "--query"'))
30 | return cmd_parser
31 |
32 |
33 | def is_query_complete(query: str) -> bool:
34 | """
35 | Returns indication as to if query includes a q=, cb.q=, or cb.fq
36 |
37 | :param query: the query string to be checked
38 | :return: True if this looks like a CBR query
39 | """
40 | # check for raw query captured from the browser
41 | if query.startswith("cb.urlver="):
42 | return True
43 |
44 | # check for simpler versions
45 | if query.startswith("q=") or query.startswith("cb.q=") or query.startswith("cb.fq="):
46 | return True
47 | return False
48 |
49 |
50 | if __name__ == "__main__":
51 | parser = build_cli_parser()
52 | options, args = parser.parse_args(sys.argv)
53 |
54 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
55 |
56 | if not options.query:
57 | logger.error("-> Must specify a query to encode; use the -q switch or --help for usage")
58 | sys.exit(0)
59 |
60 | logger.info(f"Converting `{options.query}`...")
61 |
62 | # unless overridden by operator, prepend a cb.urlver=1&q= to the query if
63 | # if does not already exist. this makes it possible for customer to copy and
64 | # paste query from CB UI, pass through this script, and add to a feed
65 | #
66 | # see CBAPI-7
67 | #
68 | prepend = "cb.urlver=1&q=" if options.prepend and not is_query_complete(options.query) else ""
69 | print("-" * 80 + f"\n {prepend}" + parse.quote_plus(options.query) + "\n" + "-" * 80)
70 |
--------------------------------------------------------------------------------
/example/stix/sample_data/indicator-for-c2-ip-address.xml:
--------------------------------------------------------------------------------
1 |
21 |
22 | Example watchlist that contains IP information.
23 | Indicators - Watchlist
24 |
25 |
26 |
27 | IP Address for known C2 channel
28 | IP Watchlist
29 |
30 |
31 |
32 | 10.0.0.0
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 | C2 Behavior
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/example/stix/sample_data/STIX_Domain_Watchlist.xml:
--------------------------------------------------------------------------------
1 |
16 |
35 |
36 | Example watchlist that contains domain information.
37 | Indicators - Watchlist
38 |
39 |
40 |
41 | Domain Watchlist
42 | Sample domain Indicator for this watchlist
43 |
44 |
45 |
46 | malicious1.example.com##comma##malicious2.example.com##comma##malicious3.example.com
47 |
48 |
49 |
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/example/stix/sample_data/STIX_URL_Watchlist.xml:
--------------------------------------------------------------------------------
1 |
19 |
38 |
39 | Example watchlist that contains URL information.
40 | Indicators - Watchlist
41 |
42 |
43 |
44 | URL Watchlist
45 | Sample URL Indicator for this watchlist
46 |
47 |
48 |
49 | http://example.com/foo/malicious1.html##comma##http://example.com/foo/malicious2.html##comma##http://example.com/foo/malicious3.html
50 |
51 |
52 |
53 |
54 |
55 |
56 |
--------------------------------------------------------------------------------
/example/stix/sample_data/STIX_IP_Watchlist.xml:
--------------------------------------------------------------------------------
1 |
19 |
38 |
39 | Example watchlist that contains IP information.
40 | Indicators - Watchlist
41 |
42 |
43 |
44 | IP Watchlist
45 | Sample IP Address Indicator for this watchlist. This contains one indicator with a set of three IP addresses in the watchlist.
46 |
47 |
48 |
49 | 10.0.0.0##comma##10.0.0.1##comma##10.0.0.2
50 |
51 |
52 |
53 |
54 |
55 |
56 |
--------------------------------------------------------------------------------
/example/stix/README.md:
--------------------------------------------------------------------------------
1 | # STIX to Cb Feed
2 |
3 | STIX is the Structured Threat Information eXpression, developed and curated by Mitre as a serialization format to share Cyber Threat Intelligence Information. You can find more information at http://stix.mitre.org.
4 |
5 | The objectives of STIX are notably larger than the CB Feeds format, so a one-to-one translation is not possible. However, for simpler STIX Package formats, it is possible to translate the STIX Package into a Carbon Black feed.
6 |
7 | *Note*: The diversity of STIX package structures can cause the translation to have unexpected results. Send us feedback (or a pull request!) with any recommendations or improvements surfaced by your source data!
8 |
9 | # stix_to_feed.py
10 |
11 | This script requires:
12 |
13 | * cbfeeds
14 | * python-stix
15 | * Docs: http://stix.readthedocs.org/en/latest/
16 | * Github: https://github.com/STIXProject/python-stix
17 | * PyPI: https://pypi.python.org/pypi/stix/
18 |
19 | Given a STIX Package or a directory of STIX Packages, it will translate all suitable indicators into a Cb Feed Report. Example:
20 |
21 | [root@localhost stix]$ python stix_to_feed.py -i sample_data/ -o stix.feed
22 | -> Including 3 observables from sample_data/command-and-control-ip-range.xml.
23 | -> Including 1 observables from sample_data/indicator-for-c2-ip-address.xml.
24 | -> Including 3 observables from sample_data/STIX_Domain_Watchlist.xml.
25 | -> Including 3 observables from sample_data/STIX_IP_Watchlist.xml.
26 | -> No suitable observables found in sample_data/STIX_Phishing_Indicator.xml; skipping.
27 | -> No suitable observables found in sample_data/STIX_URL_Watchlist.xml; skipping.
28 |
29 | Suitable indicators are:
30 |
31 | * DomainNameObjects
32 | * AddressValueObjects
33 | * FileObjects with MD5 Hash
34 |
35 | Only these objects with no conditionals or Any Equals conditions are translated.
36 |
37 | The sample packages in the sample\_data directory are collected from the STIX documentation. This parser was also tested against the Mandiant APT1 and FireEye Poison Ivy reports. Those results:
38 |
39 | [root@localhost other_data]$ python stix_to_feed.py -i sample_data/ -o stix.feed
40 | -> Including 2046 observables from sample_data/APT1/Appendix_D_FQDNs.xml.
41 | -> Including 1007 observables from sample_data/APT1/Appendix_E_MD5s.xml.
42 | -> No suitable observables found in sample_data/APT1/Appendix_F_SSLCertificates.xml; skipping.
43 | -> Including 1797 observables from sample_data/APT1/Appendix_G_IOCs_Full.xml.
44 | -> No suitable observables found in sample_data/APT1/Appendix_G_IOCs_No_Observables.xml; skipping.
45 | -> Including 1797 observables from sample_data/APT1/Appendix_G_IOCs_No_OpenIOC.xml.
46 | -> No suitable observables found in sample_data/APT1/Mandiant_APT1_Report.xml; skipping.
47 | -> Including 506 observables from sample_data/Poison Ivy/fireeye-pivy-indicators.xml.
48 | -> Including 506 observables from sample_data/Poison Ivy/fireeye-pivy-observables.xml.
49 | -> Including 506 observables from sample_data/Poison Ivy/fireeye-pivy-report-with-indicators.xml.
50 | -> No suitable observables found in sample_data/Poison Ivy/fireeye-pivy-report.xml; skipping.
51 |
52 | Those packages are too large to include in the sample data, they are available from the Samples page at Mitre STIX: http://stix.mitre.org/language/version1.1/samples.html.
53 |
54 | # Changelog
55 |
56 | 4 Aug 14 - 1.0 - initial cut
57 |
58 |
--------------------------------------------------------------------------------
/example/isight/isight_helpers.py:
--------------------------------------------------------------------------------
1 |
2 | import csv
3 | import time
4 |
5 | def remove_non_ascii(s): return "".join([x for x in s if ord(x)<128])
6 |
7 | def get_field(row, field_name, do_remove_non_ascii=False):
8 | val = row.get(field_name) or row.get(field_name.lower())
9 | if val:
10 | if do_remove_non_ascii:
11 | val = remove_non_ascii(val)
12 | return val.strip()
13 | return None
14 |
15 | def isight_csv_to_iocs_dict(isight_csv_entries):
16 | """
17 | Converts CSV data (with header) to dictionary of dict[tuple] = another dict,
18 | where tuple = (report_id, title, product_type, report_timestamp_in_epoch_secs)
19 |
20 | and dict[tuple] = {'md5':[...], 'ipaddr':[...], 'domain':[...]}
21 | """
22 | iocs_by_report_dict = {}
23 | if not isight_csv_entries:
24 | print("no entries provided")
25 | return iocs_by_report_dict
26 |
27 | reports = []
28 |
29 | for isight_csv in isight_csv_entries:
30 |
31 | iwcsv = csv.DictReader(isight_csv.split('\n'), delimiter=',', quotechar='"')
32 |
33 | i = 0
34 |
35 | for row in iwcsv:
36 | report_id = get_field(row, "ReportID")
37 | report_timestamp = int(get_field(row, "Publishdate_Mysql", True) or time.time())
38 | title = get_field(row, 'Title')
39 | product_type = get_field(row, 'Product_Type')
40 | ip = get_field(row, 'IPs', True)
41 | domain = get_field(row, 'Domain', True)
42 | md5 = get_field(row, 'MD5', True)
43 | attachment_md5 = get_field(row, 'Attachment_MD5', True)
44 |
45 | i = i + 1
46 |
47 | if not report_id:
48 | print(("Report did not have a report_id: %s" % title))
49 | continue
50 |
51 | # @todo consider using 'Related_Domains'
52 |
53 | network_identifier = row.get('Network_Identifier') or row.get('network_identifier')
54 | file_identifier = row.get('File_Identifier') or row.get('file_identifier')
55 |
56 | #tup = (report_id, title, product_type, report_timestamp)
57 | tup = report_id
58 |
59 | ips = set()
60 | md5s = set()
61 | domains = set()
62 |
63 | if tup in iocs_by_report_dict:
64 | ips = set(iocs_by_report_dict[tup]['ipaddr'])
65 | md5s = set(iocs_by_report_dict[tup]['md5'])
66 | domains = set(iocs_by_report_dict[tup]['domain'])
67 |
68 | else:
69 | iocs_by_report_dict[tup] = {}
70 |
71 |
72 | iocs_by_report_dict[tup]["title"] = title
73 | iocs_by_report_dict[tup]["product_type"] = product_type
74 | iocs_by_report_dict[tup]["report_timestamp"] = report_timestamp
75 |
76 | if network_identifier and network_identifier.lower() == "attacker":
77 | if ip and len(ip) > 0:
78 | ips.add(ip)
79 |
80 | if domain and len(domain) > 0:
81 | domains.add(domain)
82 |
83 | if file_identifier and file_identifier.lower() == "attacker":
84 | if md5 and len(md5) > 0:
85 | md5s.add(md5)
86 |
87 | if attachment_md5 and len(attachment_md5) > 0:
88 | md5s.add(attachment_md5)
89 |
90 | iocs_by_report_dict[tup]['ipaddr'] = list(ips)
91 | iocs_by_report_dict[tup]['domain'] = list(domains)
92 | iocs_by_report_dict[tup]['md5'] = list(md5s)
93 |
94 | return iocs_by_report_dict
95 |
--------------------------------------------------------------------------------
/example/stix/sample_data/STIX_FileHash_Watchlist.xml.badmd5s:
--------------------------------------------------------------------------------
1 |
20 |
39 |
40 | Example file watchlist
41 | Indicators - Watchlist
42 |
43 |
44 |
45 | File Hash Watchlist
46 | Indicator that contains malicious file hashes.
47 |
48 |
49 |
50 |
51 |
52 | MD5
53 | 01234567890abcdef01234567890abcdef##comma##abcdef1234567890abcdef1234567890##comma##00112233445566778899aabbccddeeff
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/example/abuse_ch/generate_abusech_feed.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 | import sys
6 | import os
7 | import time
8 |
9 | # third part lib imports
10 | import requests
11 |
12 | from distutils.version import StrictVersion
13 |
14 | if StrictVersion(requests.__version__) < StrictVersion("1.2.3"):
15 | # only in 1.2.3+ did response objects support iteration
16 | raise ImportError("requires requests >= 1.2.3")
17 |
18 | # our imports
19 | sys.path.insert(0, "../../")
20 | from cbfeeds import CbReport
21 | from cbfeeds import CbFeed
22 | from cbfeeds import CbFeedInfo
23 |
24 |
25 | def get_zeus():
26 | reports = []
27 | r = requests.get("https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist")
28 | lines = r.text.split("\n")
29 | domains = []
30 | for line in lines:
31 | if len(line) < 3: continue
32 | if line[0] == "#": continue
33 |
34 | domains.append(line.strip())
35 |
36 | fields = {'iocs': {
37 | "dns": domains,
38 | },
39 | 'timestamp': int(time.mktime(time.gmtime())),
40 | 'link': "https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist",
41 | 'id': 'abusech-zeus',
42 | 'title': 'abuse.ch Zeus hit on Standard domain blocklist',
43 | 'score': 100,
44 | }
45 | reports.append(CbReport(**fields))
46 | return reports
47 |
48 |
49 | #
50 | # Gives 500 error, keeping it for historical reasons
51 | #
52 | # def get_palevo():
53 | # reports = []
54 | # r = requests.get("https://palevotracker.abuse.ch/blocklists.php?download=domainblocklist")
55 | # lines = r.text.split("\n")
56 | # domains = []
57 | # for line in lines:
58 | # if len(line) < 3: continue
59 | # if line[0] == "#": continue
60 | #
61 | # domains.append(line.strip())
62 | #
63 | # fields = {'iocs': {
64 | # "dns": domains,
65 | # },
66 | # 'timestamp': int(time.mktime(time.gmtime())),
67 | # 'link': "https://palevotracker.abuse.ch/blocklists.php?download=domainblocklist",
68 | # 'id': 'abusech-palevo',
69 | # 'title': 'abuse.ch Palevo hit on domain blocklist',
70 | # 'score': 100,
71 | # }
72 | # reports.append(CbReport(**fields))
73 | # return reports
74 |
75 |
76 | def create():
77 | reports = []
78 | reports.extend(get_zeus())
79 | #reports.extend(get_palevo())
80 |
81 | feedinfo = {'name': 'abusech',
82 | 'display_name': "abuse.ch Malware Domains",
83 | 'provider_url': "http://www.abuse.ch",
84 | 'summary': "abuse.ch tracks C&C servers for Zeus and Palevo malware. " +
85 | "This feed combines the two domain names blocklists.",
86 | 'tech_data': "There are no requirements to share any data to receive this feed.",
87 | 'icon': "abuse.ch.jpg",
88 | 'icon_small': "abuse.ch.small.jpg",
89 | 'category': "Open Source"
90 | }
91 |
92 | # the lazy way to the icon
93 | old_cwd = os.getcwd()
94 | os.chdir(os.path.dirname(os.path.realpath(__file__)))
95 |
96 | feedinfo = CbFeedInfo(**feedinfo)
97 | feed = CbFeed(feedinfo, reports)
98 | feed_bytes = feed.dump()
99 |
100 | os.chdir(old_cwd)
101 |
102 | return feed_bytes
103 |
104 |
105 | if __name__ == "__main__":
106 | if len(sys.argv) != 2:
107 | print("usage: generate_abuse.ch_feed.py [outfile]")
108 | sys.exit()
109 |
110 | feed_created = create()
111 | open(sys.argv[1], "w").write(feed_created)
112 |
--------------------------------------------------------------------------------
/example/isight/importer.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import logging
4 | import time
5 | from cbisight.isight_api import ISightAPI
6 |
7 | _logger = logging.getLogger(__name__)
8 |
9 | class ImporterDisabled(Exception):
10 | def __init__(self, *args, **kwargs):
11 | Exception.__init__(self, *args, **kwargs)
12 |
13 | class iSightLocalImporter(object):
14 | """
15 | Reads CSV files from a directory
16 | """
17 | def __init__(self, local_directory):
18 | """
19 | TODO
20 | """
21 | self.local_directory = local_directory
22 |
23 | if not self.local_directory:
24 | raise ImporterDisabled("iSightLocalImporter missing required field!")
25 |
26 | if not os.path.exists(local_directory):
27 | raise Exception("iSightLocalImporter specified directory not found!")
28 |
29 | self.processed_files = []
30 |
31 | def get_csv_data(self):
32 | """
33 | TODO
34 | """
35 | filepaths = os.listdir(self.local_directory)
36 | results = []
37 | for filepath in filepaths:
38 | if filepath.endswith('-processed'):
39 | continue
40 | try:
41 | full_filepath = os.path.join(self.local_directory, filepath)
42 | data = file(full_filepath, 'rb').read()
43 | results.append(data)
44 | self.processed_files.append(full_filepath)
45 | except:
46 | _logger.exception("Caught exception for: %s" % filepath)
47 | return results
48 |
49 | def on_processing_done(self):
50 | """
51 | We don't want to keep importing the same files (although presumably we protect
52 | against that with our database), so rename it after.
53 | """
54 | for filepath in self.processed_files:
55 | try:
56 | os.rename(filepath, filepath + "-processed")
57 | except:
58 | _logger.exception("Caught exception for: %s" % filepath)
59 |
60 |
61 | class iSightRemoteImporter(object):
62 | """
63 | Basic API for downloading IOCs and Reports from iSight Partners
64 | """
65 | def __init__(self, base_url, username, password, public_key, private_key, days_back_to_retrieve, save_responses_directory):
66 | """
67 | TODO
68 | """
69 | if not base_url or \
70 | not username or \
71 | not password or \
72 | not public_key or \
73 | not private_key or \
74 | not days_back_to_retrieve:
75 | raise ImporterDisabled("iSightRemoteImporter missing required field(s)")
76 |
77 | self.api = ISightAPI(base_url, username, password, public_key, private_key)
78 | self.days_back_to_retrieve = days_back_to_retrieve
79 | self.save_responses_directory = save_responses_directory
80 |
81 | def get_csv_data(self):
82 | """
83 | Uses the iSight API Class to download the file, optionally save the response,
84 | and return the data.
85 | """
86 | rawcsv = self.api.get_i_and_w(self.days_back_to_retrieve)
87 | if len(rawcsv) > 0:
88 | if self.save_responses_directory and os.path.exists(self.save_responses_directory):
89 | try:
90 | filename = "isight-remote-api-%s.csv" % time.strftime('%Y-%m-%d-%H_%M_%S', time.gmtime(time.time()))
91 | file(os.path.join(self.save_responses_directory, filename), 'wb').write(rawcsv)
92 | except:
93 | _logger.exception("Trying to save response!")
94 | return [rawcsv]
95 | else:
96 | _logger.error("Received blank response!")
97 | return []
98 |
99 | def on_processing_done(self):
100 | """
101 | Nothing to see here.
102 | """
103 | return
104 |
--------------------------------------------------------------------------------
/example/stix/sample_data/command-and-control-ip-range.xml:
--------------------------------------------------------------------------------
1 |
2 |
23 |
24 | Example Command and Control IP Range
25 |
26 |
27 |
28 |
29 |
30 | 198.51.100.2
31 |
32 |
33 |
34 |
35 |
36 |
37 | 198.51.100.17
38 |
39 |
40 |
41 |
42 |
43 |
44 | 203.0.113.19
45 |
46 |
47 |
48 |
49 |
50 |
51 | Malware C2 Channel
52 |
53 |
54 | Malware C2
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/test/test_01_common_integrity.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 |
6 | import cbfeeds
7 | from common import TestCommon
8 |
9 |
10 | class TestCommonIntegrity(TestCommon):
11 | """
12 | Verify that the unit test common methods work as expected.
13 | """
14 |
15 | def test_01_neg_feedinfo_missing(self):
16 | """
17 | Verify that missing feed info is trapped.
18 | """
19 | info, _ = self._load_feed_file()
20 | del info['feedinfo']
21 | try:
22 | self._save_test_feed(info)
23 | self.fail("Did not get expected exception!")
24 | except KeyError:
25 | pass
26 |
27 | def test_02_neg_feedinfo_not_dict(self):
28 | """
29 | Verify that missing feed info is trapped.
30 | """
31 | info, _ = self._load_feed_file()
32 | info['feedinfo'] = "bogus"
33 | try:
34 | self._save_test_feed(info)
35 | self.fail("Did not get expected exception!")
36 | except cbfeeds.exceptions.CbInvalidFeed as err:
37 | assert "The supplied `feedinfo` parameter does not appear to be a valid dictionary" in err.args[0]
38 |
39 | def test_03_neg_feedinfo_empty_dict(self):
40 | """
41 | Verify that missing feed info is trapped.
42 | """
43 | info, _ = self._load_feed_file()
44 | info['feedinfo'] = {}
45 | try:
46 | feed = self._save_test_feed(info)
47 | feed.validate()
48 | self.fail("Did not get expected exception!")
49 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
50 | assert "FeedInfo missing required field(s)" in err.args[0]
51 |
52 | def test_04_neg_reports_missing(self):
53 | """
54 | Verify that missing reports info is trapped.
55 | """
56 | info, _ = self._load_feed_file()
57 | del info['reports']
58 | try:
59 | self._save_test_feed(info)
60 | self.fail("Did not get expected exception!")
61 | except KeyError:
62 | pass
63 |
64 | def test_05_neg_reports_not_list(self):
65 | """
66 | Verify that invalid reports info (not list) is trapped.
67 | """
68 | info, _ = self._load_feed_file()
69 | info['reports'] = "bogus"
70 | try:
71 | self._save_test_feed(info)
72 | self.fail("Did not get expected exception!")
73 | except cbfeeds.exceptions.CbInvalidFeed as err:
74 | assert "The supplied `reports` parameter does not appear to be a valid list" in err.args[0]
75 |
76 | def test_06_neg_reports_not_list_of_dict(self):
77 | """
78 | Verify that invalid reports info (list item not dict) is trapped.
79 | """
80 | info, _ = self._load_feed_file()
81 | info['reports'] = ["bogus"]
82 | try:
83 | self._save_test_feed(info)
84 | self.fail("Did not get expected exception!")
85 | except cbfeeds.exceptions.CbInvalidFeed as err:
86 | assert "The `reports` parameter must be a list of dictionaries" in err.args[0]
87 |
88 | def test_10_cbfeed_using_cbfeedinfo_object(self):
89 | """
90 | Verify that a CbFeedInfo object can be used in creating a CbFeed object.
91 | """
92 | info, feed = self._load_feed_file()
93 | fi = cbfeeds.CbFeedInfo(**info['feedinfo'])
94 | cbf = cbfeeds.CbFeed(fi, info['reports'])
95 | assert cbf.dump() == feed.dump()
96 |
97 | def test_11_cbfeed_using_list_of_cbreport_objects(self):
98 | """
99 | Verify that a CbFeedInfo object can be used in creating a CbFeed object.
100 | """
101 | info, feed = self._load_feed_file()
102 | rp = [cbfeeds.CbReport(**rep) for rep in info['reports']]
103 | cbf = cbfeeds.CbFeed(info['feedinfo'], rp)
104 | assert cbf.dump() == feed.dump()
105 |
106 |
--------------------------------------------------------------------------------
/example/tor/generate_tor_feed.py:
--------------------------------------------------------------------------------
1 | # stdlib imports
2 | import logging
3 | import os
4 | import sys
5 | import time
6 | from typing import Dict, List
7 |
8 | import requests
9 |
10 | # coding: utf-8
11 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
12 | ################################################################################
13 |
14 | # third part lib imports
15 |
16 | # our imports
17 | sys.path.insert(0, "../../")
18 | from cbfeeds import CbReport
19 | from cbfeeds import CbFeed
20 | from cbfeeds import CbFeedInfo
21 |
22 | logger = logging.getLogger(__name__)
23 |
24 |
25 | def get_tor_nodes() -> List[Dict]:
26 | """
27 | Read the remote source and return the tor node information.
28 | :return: list of node info
29 | """
30 | nodes = []
31 | url = "https://onionoo.torproject.org/details?type=relay&running=true"
32 | jsonurl = requests.get(url)
33 | text = jsonurl.json()
34 | for entry in text['relays']:
35 | try:
36 | for address in entry['or_addresses']:
37 | # IPv4 addresses are ip:port, IPv6 addresses are [ip]:port:
38 | # "or_addresses":["80.101.115.170:5061","[2001:980:3b4f:1:240:caff:fe8d:f02c]:5061"],
39 | # process only IPv4 addresses for now
40 | if address.count(':') == 1:
41 | # All IPv4 addresses will end up here.
42 | ipv4, port = address.split(':')
43 | nodes.append({'ip': ipv4,
44 | 'name': entry['nickname'],
45 | 'port': port,
46 | 'firstseen': entry['first_seen'],
47 | 'lastseen': entry['last_seen'],
48 | 'contact': entry.get("contact", "none")})
49 | except Exception as err:
50 | logger.warning(f"{err} while parsing: {entry}")
51 | return nodes
52 |
53 |
54 | def build_reports(nodes: List[Dict]) -> List[CbReport]:
55 | """
56 | Convert tor nodes to reports.
57 |
58 | :param nodes: list of tor nodes
59 | :return: list of reports
60 | """
61 | # TODO - this is one "report" per TOR node IP. Not ideal.
62 | reports = []
63 | unique_ips = set()
64 | for node in nodes:
65 | # avoid duplicated reports
66 | # CBAPI-22
67 | if node['ip'] in unique_ips:
68 | continue
69 | else:
70 | unique_ips.add(node['ip'])
71 |
72 | fields = {'iocs': {
73 | 'ipv4': [node['ip'], ]
74 | },
75 | 'score': 0,
76 | 'timestamp': int(time.mktime(time.gmtime())),
77 | 'link': 'http://www.torproject.org',
78 | 'id': "TOR-Node-%s" % node['ip'],
79 | 'title': "%s has been a TOR exit node since %s and was last seen %s on port %s. Contact: %s"
80 | % (node['ip'], node['firstseen'], node['lastseen'], node['port'], node['contact'])}
81 | reports.append(CbReport(**fields))
82 |
83 | return reports
84 |
85 |
86 | def create() -> str:
87 | """
88 | Create tor feed.
89 |
90 | :return: feed info as JSON string
91 | """
92 | nodes = get_tor_nodes()
93 | reports = build_reports(nodes)
94 |
95 | iconhome = os.path.dirname(__file__)
96 |
97 | feedinfo = {'name': 'tor',
98 | 'display_name': "Tor Exit Nodes",
99 | 'provider_url': 'https://www.torproject.org/',
100 | 'summary': "This feed is a list of Tor Node IP addresses, updated every 30 minutes.",
101 | 'tech_data': "There are no requirements to share any data to receive this feed.",
102 | 'icon': os.path.join(iconhome, 'tor.png'),
103 | 'icon_small': os.path.join(iconhome, 'tor.small.jpg'),
104 | 'category': 'Open Source',
105 | }
106 |
107 | logger.info(f">> Feed `{feedinfo['display_name']}` generated with {len(reports)} reports")
108 |
109 | feedinfo = CbFeedInfo(**feedinfo)
110 | feed = CbFeed(feedinfo, reports)
111 | created_feed = feed.dump()
112 |
113 | return created_feed
114 |
115 |
116 | if __name__ == "__main__":
117 | if len(sys.argv) != 2:
118 | print("usage: %s [outfile]" % sys.argv[0])
119 | sys.exit(0)
120 |
121 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
122 |
123 | info = create()
124 | with open(sys.argv[1], "w") as fp2:
125 | fp2.write(info)
126 |
--------------------------------------------------------------------------------
/example/mdl/generate_mdl_feed.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 |
6 | import csv
7 | import logging
8 | import os
9 | import sys
10 | import time
11 | import urllib.parse as urlparse
12 | from datetime import datetime, timedelta
13 | from distutils.version import StrictVersion
14 | from typing import List, Optional
15 |
16 | # third part lib imports
17 | import requests
18 |
19 | if StrictVersion(requests.__version__) < StrictVersion("1.2.3"):
20 | # only in 1.2.3+ did response objects support iteration
21 | raise ImportError("requires requests >= 1.2.3")
22 |
23 | # our imports
24 | sys.path.insert(0, "../../")
25 | from cbfeeds import CbReport
26 | from cbfeeds import CbFeed
27 | from cbfeeds import CbFeedInfo
28 |
29 | # NOTE: as of 10/03/2020, the feed only returns data in the year 2009; adding functionality for ALL data
30 | DAYS_BACK: Optional[int] = 90 # use number for days back from today, None for all data
31 |
32 | logger = logging.getLogger(__name__)
33 |
34 |
35 | def reports_from_csv(lines: List[str]) -> List[CbReport]:
36 | """
37 | Takes a file-like object that is full list of CSV data from rom malwaredomainlist.
38 | creates a report per line.
39 | """
40 | reports = []
41 | unique_domains = set()
42 |
43 | # fixing line referencing in except clause before it is actually referenced.
44 | line = None
45 | try:
46 | for line in lines:
47 | line = line.strip() # trim spaces
48 | if len(line) == 0:
49 | continue
50 | try:
51 | rawdate, url, ip, reverse_lookup, desc, registrant, asn, _, _, _ = list(csv.reader([line]))[0]
52 |
53 | # rawdate 2013/10/27_03:06
54 | report_date = time.strptime(rawdate, "%Y/%m/%d_%H:%M")
55 |
56 | # skip any report older than DAYS_BACK, unless defined as None
57 | if DAYS_BACK is not None:
58 | report_datetime = datetime.fromtimestamp(time.mktime(report_date))
59 | start = datetime.now() - timedelta(days=DAYS_BACK)
60 | if report_datetime < start:
61 | continue
62 |
63 | # url www.slivki.com.ua/as/Ponynl.exe
64 | url = urlparse.urlsplit(f"http://{url}")
65 | host = url.netloc
66 | if ":" in host:
67 | host = host.split(":", 1)[0]
68 |
69 | if len(host) <= 3:
70 | logger.debug(f"WARNING: no domain, skipping line {line}")
71 | continue
72 |
73 | # avoid duplicate report ids
74 | # CBAPI-21
75 | if host in unique_domains:
76 | continue
77 | else:
78 | unique_domains.add(host)
79 |
80 | fields = {'iocs': {
81 | "dns": [host],
82 | },
83 | 'timestamp': int(time.mktime(report_date)),
84 | 'link': "http://www.malwaredomainlist.com/mdl.php",
85 | 'id': 'MDL-%s-%s' % (time.strftime("%Y%m%d-%H%M", report_date), host),
86 | 'title': '%s found on malware domain list: "%s"' % (host, desc) +
87 | ' IP (reverse lookup) at the time: %s (%s)' % (ip, reverse_lookup),
88 | 'score': 100,
89 | }
90 |
91 | reports.append(CbReport(**fields))
92 |
93 | except Exception as err:
94 | logger.warning(f"WARNING: error parsing {line}\n{err}")
95 | except Exception as err2:
96 | logger.info(f"Unexpected exception with linw `{line}:\n{err2}")
97 |
98 | return reports
99 |
100 |
101 | def create(local_csv_file: str = None) -> str:
102 | """
103 | Create a feed from www.malwaredomainlist.com.
104 |
105 | :param local_csv_file: path to local file to use instead of remote call
106 | :return: feed JSON.
107 | """
108 | if local_csv_file: # use local
109 | with open(local_csv_file, "r") as fp2:
110 | lines = fp2.readlines()
111 | else: # use remote
112 | r = requests.get("http://www.malwaredomainlist.com/mdlcsv.php", stream=True)
113 | lines = r.text.split("\r\n")
114 |
115 | iconhome = os.path.dirname(__file__)
116 | reports = reports_from_csv(lines)
117 | feedinfo = {'name': 'mdl',
118 | 'display_name': "Malware Domain List",
119 | 'provider_url': "http://www.malwaredomainlist.com/mdl.php",
120 | 'summary': "Malware Domain List is a non-commercial community project to track domains used by " +
121 | "malware. This feed contains the most recent 180 days of entries.",
122 | 'tech_data': "There are no requirements to share any data to receive this feed.",
123 | 'icon': os.path.join(iconhome, "mdl.png"),
124 | 'icon_small': os.path.join(iconhome, "mdl.small.jpg"),
125 | 'category': "Open Source"
126 | }
127 |
128 | logger.info(f">> Feed `{feedinfo['display_name']}` generated with {len(reports)} reports")
129 | feedinfo = CbFeedInfo(**feedinfo)
130 | the_feed = CbFeed(feedinfo, reports)
131 | feed_json = the_feed.dump()
132 |
133 | return feed_json
134 |
135 |
136 | if __name__ == "__main__":
137 | if len(sys.argv) <= 1:
138 | print("usage: generate_mdl_feed.py [local.csv]")
139 | sys.exit()
140 |
141 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
142 |
143 | outfile = sys.argv[1]
144 | localcsv = None
145 | if len(sys.argv) > 2:
146 | localcsv = sys.argv[2]
147 |
148 | feed = create(localcsv)
149 | with open(outfile, "w") as fp:
150 | fp.write(feed)
151 |
--------------------------------------------------------------------------------
/test/test_02_cbfeed.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 |
6 | import json
7 |
8 | import cbfeeds
9 | from common import TestCommon
10 |
11 |
12 | class TestCbFeedMethods(TestCommon):
13 | """
14 | Validate the core methods of the CBFeed class.
15 | """
16 |
17 | # ----- Basic Validation ----------------------------------------- #
18 |
19 | def test_01_validate_feed(self):
20 | """
21 | Verify that overall feed validation works.
22 | """
23 | _, feed = self._load_feed_file()
24 | feed.validate()
25 |
26 | def test_02_validate_feed_serialized(self):
27 | """
28 | Verify that overall feed validation works with serialized data.
29 | """
30 | _, feed = self._load_feed_file()
31 | feed.validate(serialized_data=feed.dump())
32 |
33 | # ----- Method validation ----------------------------------------- #
34 |
35 | def test_03a_neg_validate_feedinfo_missing(self):
36 | """
37 | Verify that CBFeed.validate detects missing feedinfo.
38 | """
39 | _, feed = self._load_feed_file()
40 | del feed.data['feedinfo']
41 | try:
42 | feed.validate()
43 | self.fail("Did not get expected exception!")
44 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
45 | assert "Feed missing 'feedinfo' data" in err.args[0]
46 |
47 | def test_03b_neg_validate_feedinfo_missing_serialized(self):
48 | """
49 | Verify that CBFeed.validate detects missing feedinfo in serialized mode.
50 | """
51 | info, feed = self._load_feed_file()
52 | del info['feedinfo']
53 | try:
54 | feed.validate(serialized_data=json.dumps(info))
55 | self.fail("Did not get expected exception!")
56 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
57 | assert "Feed missing 'feedinfo' data" in err.args[0]
58 |
59 | def test_04a_neg_validate_reports_missing(self):
60 | """
61 | Verify that CBFeed.validate detects missing reports.
62 | """
63 | _, feed = self._load_feed_file()
64 | del feed.data['reports']
65 | try:
66 | feed.validate()
67 | self.fail("Did not get expected exception!")
68 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
69 | assert "Feed missing 'reports' structure" in err.args[0]
70 |
71 | def test_04b_neg_validate_reports_missing_serialized(self):
72 | """
73 | Verify that CBFeed.validate detects missing reports in serialized mode.
74 | """
75 | info, feed = self._load_feed_file()
76 | del info['reports']
77 | try:
78 | feed.validate(serialized_data=json.dumps(info))
79 | self.fail("Did not get expected exception!")
80 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
81 | assert "Feed missing 'reports' structure" in err.args[0]
82 |
83 | def test_05a_neg_validate_feed_strict_bad_feedinfo(self):
84 | """
85 | Verify that CBFeed.validate detects non-CB feedinfo fields if strict.
86 | """
87 | info, _ = self._load_feed_file()
88 | info['feedinfo']['booga'] = "foobar"
89 | try:
90 | feed = cbfeeds.CbFeed(info['feedinfo'], info['reports'])
91 | feed.validate(strict=True)
92 | self.fail("Did not get expected exception!")
93 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
94 | assert "Problem with feed `QA Feed BWF912316192`: Feedinfo includes unknown field: booga" in err.args[0]
95 |
96 | def test_05b_neg_validate_feed_strict_bad_report(self):
97 | """
98 | Verify that CBFeed.validate detects non-CB feedinfo fields if strict.
99 | """
100 | info, _ = self._load_feed_file()
101 | info['reports'][1]['booga'] = "foobar"
102 | try:
103 | feed = cbfeeds.CbFeed(info['feedinfo'], info['reports'])
104 | feed.validate(strict=True)
105 | self.fail("Did not get expected exception!")
106 | except cbfeeds.exceptions.CbInvalidReport as err:
107 | assert ("Problem with feed `QA Feed BWF912316192`, report `WithSha256`: Report includes "
108 | f"unknown field: booga") in err.args[0]
109 |
110 | def test_06_neg_validate_reports_list_dup_id(self):
111 | """
112 | Verify that validate_report_list detects duplicate ids.
113 | """
114 | info, feed = self._load_feed_file()
115 | reports = info['reports']
116 | reports[0]['id'] = reports[1]['id']
117 |
118 | try:
119 | feed.validate_report_list(reports)
120 | self.fail("Did not get expected exception!")
121 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
122 | assert "Duplicate report id 'WithSha256" in err.args[0]
123 |
124 | def test_07_validate_iter_iocs(self):
125 | """
126 | Verify that iter_iocs returns all iocs properly.
127 | """
128 | _, feed = self._load_feed_file()
129 |
130 | checkoff = {'md5|dbb379c9337cc31b24743e7cf81ee8bd': True,
131 | 'sha256|94dcf0531121e13a73114e8806f096d31e21dab4a8b1bfef95b5e0171a9a0556': True,
132 | 'ipv4|158.106.122.248': True,
133 | 'ipv6|7F1F:67E6:4BA0:5935:453A:A3AA:D69C:6146': True,
134 | 'dns|spend.policy.issue.net': True,
135 | 'ja3|07f362079e7f3d5a8855549fcc9a441e': True,
136 | 'ja3s|0fa6b3b35df905b209742cf80c06f7da': True,
137 | 'query|process_name:foobar.exe': True,
138 | }
139 | extras = []
140 | for item in feed.iter_iocs():
141 | key = f"{item['type']}|{item['ioc']}"
142 | if key in checkoff:
143 | del checkoff[key]
144 | else:
145 | extras.append(key)
146 |
147 | def test_07_validate_dump(self):
148 | """
149 | Verify that dump() works as expected.
150 | """
151 | info, feed = self._load_feed_file()
152 | check = feed.dump()
153 | assert check == json.dumps(info, indent=2, sort_keys=True)
154 |
--------------------------------------------------------------------------------
/example/raw/generate_feed_from_raw_iocs.py:
--------------------------------------------------------------------------------
1 | # stdlib imports
2 | import re
3 | import sys
4 | import time
5 | import optparse
6 | import socket
7 | import base64
8 | import hashlib
9 |
10 | # cb imports
11 | sys.path.insert(0, "../../")
12 | from cbfeeds import CbReport
13 | from cbfeeds import CbFeed
14 | from cbfeeds import CbFeedInfo
15 |
16 | def gen_report_id(iocs):
17 | """
18 | a report id should be unique
19 | because generate_feed_from_raw may be run repeatedly on the same data, it should
20 | also be deterministic.
21 | this routine sorts all the indicators, then hashes in order to meet these criteria
22 | """
23 | md5 = hashlib.md5()
24 |
25 | # sort the iocs so that a re-order of the same set of iocs results in the same report id
26 | iocs.sort()
27 |
28 | for ioc in iocs:
29 | md5.update(ioc.strip().encode("utf-8"))
30 |
31 | return md5.hexdigest()
32 |
33 | def build_reports(options):
34 |
35 | reports = []
36 |
37 | ips = []
38 | domains = []
39 | md5s = []
40 |
41 | # read all of the lines (of text) from the provided
42 | # input file (of IOCs)
43 | #
44 | raw_iocs = open(options.ioc_filename, encoding='utf-8').readlines()
45 |
46 | # iterate over each of the lines
47 | # attempt to determine if each line is a suitable
48 | # ipv4 address, dns name, or md5
49 | #
50 | for raw_ioc in raw_iocs:
51 |
52 | # strip off any leading or trailing whitespace
53 | # skip any empty lines
54 | #
55 | raw_ioc = raw_ioc.strip()
56 | if len(raw_ioc) == 0:
57 | continue
58 |
59 | try:
60 | # attempt to parse the line as an ipv4 address
61 | #
62 | socket.inet_aton(raw_ioc)
63 |
64 | # parsed as an ipv4 address!
65 | #
66 | ips.append(raw_ioc)
67 | except Exception as e:
68 |
69 | # attept to parse the line as a md5 and, if that fails,
70 | # as a domain. use trivial parsing
71 | #
72 | if 32 == len(raw_ioc) and \
73 | re.findall(r"([a-fA-F\d]{32})", raw_ioc):
74 | md5s.append(raw_ioc)
75 | elif -1 != raw_ioc.find("."):
76 | domains.append(raw_ioc)
77 |
78 | fields = {'iocs': {
79 | },
80 | 'timestamp': int(time.mktime(time.gmtime())),
81 | 'link': options.url,
82 | 'title': options.report,
83 | 'id': gen_report_id(ips + domains + md5s),
84 | 'score': 100}
85 |
86 | if options.tags is not None:
87 | fields['tags'] = options.tags.split(',')
88 |
89 | if options.description is not None:
90 | fields['description'] = options.description
91 |
92 | if len(ips) > 0:
93 | fields['iocs']['ipv4'] = ips
94 | if len(domains) > 0:
95 | fields['iocs']['dns'] = domains
96 | if len(md5s) > 0:
97 | fields['iocs']['md5'] = md5s
98 |
99 | reports.append(CbReport(**fields))
100 |
101 | return reports
102 |
103 | def create_feed(options):
104 |
105 | # generate the required feed information fields
106 | # based on command-line arguments
107 | #
108 | feedinfo = {'name': options.name,
109 | 'display_name': options.display_name,
110 | 'provider_url': options.url,
111 | 'summary': options.summary,
112 | 'tech_data': options.techdata}
113 |
114 | # if an icon was provided, encode as base64 and
115 | # include in the feed information
116 | #
117 | if options.icon:
118 | bytes = base64.b64encode(open(options.icon,'rb').read())
119 | feedinfo['icon'] = bytes.decode("utf-8")
120 |
121 | # if a small icon was provided, encode as base64 and
122 | # include in the feed information
123 | #
124 | if options.small_icon:
125 | bytes = base64.b64encode(open(options.small_icon, 'rb').read())
126 | feedinfo['icon_small'] = bytes.decode('utf-8')
127 |
128 | # if a feed category was provided, include it in the feed information
129 | #
130 | if options.category:
131 | feedinfo['category'] = options.category
132 |
133 | # build a CbFeedInfo instance
134 | # this does field validation
135 | #
136 | feedinfo = CbFeedInfo(**feedinfo)
137 |
138 | # build a list of reports (always one report in this
139 | # case). the single report will include all the IOCs
140 | #
141 | reports = build_reports(options)
142 |
143 | # build a CbFeed instance
144 | # this does field validation (including on the report data)
145 | #
146 | feed = CbFeed(feedinfo, reports)
147 |
148 | return feed.dump()
149 |
150 | def _build_cli_parser():
151 | usage = "usage: %prog [options]"
152 | desc = "Convert a flat file of IOCs to a Carbon Black feed"
153 |
154 | parser = optparse.OptionParser(usage=usage, description=desc)
155 |
156 | parser.add_option("-n", "--name", action="store", type="string", dest="name",
157 | help="Feed Name")
158 | parser.add_option("-d", "--displayname", action="store", type="string", dest="display_name",
159 | help="Feed Display Name")
160 | parser.add_option("-u", "--url", action="store", type="string", dest="url",
161 | help="Feed Provider URL")
162 | parser.add_option("-s", "--summary", action="store", type="string", dest="summary",
163 | help="Feed Summary")
164 | parser.add_option("-t", "--techdata", action="store", type="string", dest="techdata",
165 | help="Feed Technical Description")
166 | parser.add_option("-c", "--category", action="store", type="string", dest="category",
167 | help="Feed Category")
168 | parser.add_option("-i", "--icon", action="store", type="string", dest="icon",
169 | help="Icon File (PNG format)")
170 | parser.add_option("-S", "--small-icon", action="store", type="string", dest="small_icon",
171 | help="Small icon file (50x50 pixels) (PNG format)")
172 | parser.add_option("-I", "--iocs", action="store", type="string", dest="ioc_filename",
173 | help="IOC filename")
174 | parser.add_option("-r", "--report", action="store", type="string", dest="report",
175 | help="Report Name")
176 | parser.add_option("-g", "--tags", action="store", type="string", dest="tags",
177 | help="Optional comma-delimited report tags")
178 | parser.add_option("-D", "--description", action="store", type="string", dest="description",
179 | help="A brief description of the report.")
180 |
181 | return parser
182 |
183 | if __name__ == "__main__":
184 |
185 | parser = _build_cli_parser()
186 | options, args = parser.parse_args(sys.argv)
187 |
188 | if not options.name or \
189 | not options.display_name or \
190 | not options.url or \
191 | not options.summary or \
192 | not options.techdata or \
193 | not options.ioc_filename or \
194 | not options.report:
195 | print("-> Missing option")
196 | sys.exit(0)
197 |
198 | print((create_feed(options)))
199 |
--------------------------------------------------------------------------------
/validate_feed.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
4 | ################################################################################
5 |
6 | import argparse
7 | import json
8 | import logging
9 | import os
10 | import sys
11 | from typing import Any, Dict, Set, Tuple
12 |
13 | import cbfeeds
14 |
15 | logger = logging.getLogger(__name__)
16 |
17 |
18 | ################################################################################
19 | # Utility Functions
20 | ################################################################################
21 |
22 | def build_cli_parser() -> argparse.ArgumentParser:
23 | """
24 | generate ArgumentParser to handle command line switches.
25 | """
26 | desc = "Validate a Carbon Black Response feed"
27 |
28 | cmd_parser = argparse.ArgumentParser(description=desc)
29 |
30 | cmd_parser.add_argument("-f", "--feed_filename",
31 | help="Feed Filename(s) to validate",
32 | type=str, required=True, action="append")
33 |
34 | cmd_parser.add_argument("-p", "--pedantic",
35 | help="Validates that no non-standard JSON elements exist",
36 | action="store_true", default=False)
37 |
38 | cmd_parser.add_argument("-e", "--exclude",
39 | help="Filename of 'exclude' list - newline delimited indicators to consider invalid",
40 | default=None)
41 |
42 | cmd_parser.add_argument("-i", "--include",
43 | help="Filename of 'include' list - newline delimited indicators to consider valid",
44 | default=None)
45 |
46 | return cmd_parser
47 |
48 |
49 | def validate_file(filename: str) -> str:
50 | """
51 | Validate that the file exists and is readable.
52 |
53 | :param filename: The name of the file to read
54 | :return: file contents
55 | """
56 | if filename.strip() == "" or not os.path.exists(filename):
57 | raise cbfeeds.CbException(f"No such feed file: `{filename}`")
58 |
59 | try:
60 | with open(filename, 'r') as fp:
61 | return fp.read()
62 | except Exception as err:
63 | raise cbfeeds.CbException(f"Unable to read feed file: `{filename}`: {err}")
64 |
65 |
66 | def validate_json(contents: str) -> Dict[str, Any]:
67 | """
68 | Validate that the file is well-formed JSON.
69 |
70 | :param contents: file contents in supposed JSON format
71 | :return: json object
72 | """
73 | try:
74 | return json.loads(contents)
75 | except Exception as err:
76 | raise cbfeeds.CbException(f"Unable to process feed JSON: {err}")
77 |
78 |
79 | def validate_feed(feed: Dict[str, Any], pedantic: bool = False) -> cbfeeds.CbFeed:
80 | """
81 | Validate that the file is valid as compared to the CB feeds schema.
82 |
83 | :param feed: the digested feed
84 | :param pedantic: If True, perform pedantic validation
85 | :return: CbFeed object
86 | """
87 | # verify that we have both of the required feedinfo and reports elements
88 | if "feedinfo" not in feed:
89 | raise cbfeeds.CbException("No 'feedinfo' element found!")
90 | if "reports" not in feed:
91 | raise cbfeeds.CbException("No 'reports' element found!")
92 |
93 | # Create the cbfeed object
94 | feed = cbfeeds.CbFeed(feed["feedinfo"], feed["reports"])
95 |
96 | # Validate the feed -- this validates that all required fields are present, and that
97 | # all required values are within valid ranges
98 | feed.validate()
99 |
100 | return feed
101 |
102 |
103 | def validate_against_include_exclude(feed: cbfeeds.CbFeed, include: Set, exclude: Set) -> None:
104 | """
105 | Ensure that no feed indicators are 'excluded' or blacklisted.
106 |
107 | :param feed: feed to be validated
108 | :param include: set of included IOCs
109 | :param exclude: set of excluded IOCs
110 | """
111 | for ioc in feed.iter_iocs():
112 | if ioc["ioc"] in exclude and not ioc["ioc"] in include:
113 | raise Exception(ioc)
114 |
115 |
116 | def gen_include_exclude_sets(include_filename: str = None, exclude_filename: str = None) -> Tuple[Set, Set]:
117 | """
118 | Generate an include and an exclude set of indicators by reading indicators from flat, newline-delimited files.
119 |
120 | :param include_filename: path to file containing include entries
121 | :param exclude_filename: path to file containing exclude entries
122 | """
123 | include = set()
124 | exclude = set()
125 |
126 | if include_filename:
127 | if not os.path.exists(include_filename):
128 | raise cbfeeds.CbException(f"No such include file: {include_filename}")
129 | for indicator in open(include_filename).readlines():
130 | include.add(indicator.strip())
131 |
132 | if exclude_filename:
133 | if not os.path.exists(exclude_filename):
134 | raise cbfeeds.CbException(f"No such include file: {exclude_filename}")
135 | for indicator in open(exclude_filename).readlines():
136 | exclude.add(indicator.strip())
137 |
138 | return include, exclude
139 |
140 |
141 | def validation_cycle(filename: str) -> bool:
142 | """
143 | Generate include and exclude (whitelist and blacklist) sets of indicators. Feed validation will fail if a feed
144 | ioc is blacklisted unless it is also whitelisted.
145 |
146 | :param filename: filename contaning feed information
147 | :return: False if there were problems, True if ok
148 | """
149 | include, exclude = gen_include_exclude_sets(options.include, options.exclude)
150 |
151 | try:
152 | contents = validate_file(filename)
153 | except Exception as err:
154 | logger.error(f"Feed file invalid: {err}")
155 | return False
156 |
157 | try:
158 | jsondict = validate_json(contents)
159 | except Exception as err:
160 | logger.error(f"Feed json for `{filename}` is invalid: {err}")
161 | return False
162 |
163 | try:
164 | feed = validate_feed(jsondict)
165 | except Exception as err:
166 | logger.error(f"Feed `{filename}` is invalid: {err}")
167 | return False
168 |
169 | if len(exclude) > 0 or len(include) > 0:
170 | try:
171 | validate_against_include_exclude(feed, include, exclude)
172 | logger.info(" ... validated against include and exclude lists")
173 | except Exception as err:
174 | logger.error(f" ... unnable to validate against the include and exclude lists:\n{err}")
175 | return False
176 |
177 | extra = "" if not options.pedantic else " and contains no non-CB elements"
178 | logger.info(f"Feed `{filename}` is good{extra}!")
179 | return True
180 |
181 |
182 | ################################################################################
183 | # Main
184 | ################################################################################
185 |
186 | if __name__ == "__main__":
187 | parser = build_cli_parser()
188 | options = parser.parse_args()
189 |
190 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
191 |
192 | feed_filenames = options.feed_filename
193 | if not feed_filenames:
194 | logger.error("-> Must specify one or more feed filenames to validate; use the -f switch or --help for usage")
195 | sys.exit(0)
196 |
197 | sep = False
198 | for feed_filename in feed_filenames:
199 | if sep:
200 | logger.info('\n ----- \n')
201 | validation_cycle(feed_filename)
202 | sep = True
203 |
--------------------------------------------------------------------------------
/example/stix/stix_to_feed.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import time
4 | import optparse
5 |
6 | sys.path.insert(0, "../../")
7 | from cbfeeds import CbReport
8 | from cbfeeds import CbFeed
9 | from cbfeeds import CbFeedInfo
10 |
11 | from stix.core import STIXPackage
12 | from stix.utils.parser import EntityParser, UnsupportedVersionError
13 | from cybox.bindings.file_object import FileObjectType
14 | from cybox.bindings.domain_name_object import DomainNameObjectType
15 | from cybox.bindings.address_object import AddressObjectType
16 |
17 | from stix.utils import nsparser
18 | import mixbox.namespaces
19 | from mixbox.namespaces import Namespace
20 |
21 | ADDITIONAL_NAMESPACES = [
22 | Namespace('http://us-cert.gov/ciscp', 'CISCP',
23 | 'http://www.us-cert.gov/sites/default/files/STIX_Namespace/ciscp_vocab_v1.1.1.xsd')
24 | ]
25 |
26 |
27 | def merge(d1, d2):
28 | """ given two dictionaries, return a single dictionary
29 | that merges the two.
30 | """
31 |
32 | result = d1
33 | if not d2: return result
34 | for k in d2:
35 | if k in result:
36 | result[k].extend(d2[k])
37 | else:
38 | result[k] = d2[k]
39 | return result
40 |
41 |
42 | def no_conditionals(obj):
43 | """ return true only if:
44 | - object has no conditionals applied or
45 | - conditionals are jsut "Any Equals"
46 | """
47 | # if they're not on the object...
48 | if not hasattr(obj, "apply_condition") or not hasattr(obj, "condition"):
49 | return True
50 |
51 | # ...or if they're not defined...
52 | if not obj.apply_condition or not obj.condition:
53 | return True
54 |
55 | # ... or if they're defined and any equals...
56 | if obj.apply_condition.lower() == "any" and \
57 | obj.condition.lower() == "equals":
58 | return True
59 |
60 | return False
61 |
62 |
63 | def parse_File(file_obj):
64 | """ parse a FileObjectType and return a list of md5s
65 | if they exist and not subject to any conditionals. """
66 |
67 | if not hasattr(file_obj, "Hashes") or not hasattr(file_obj.Hashes, "Hash"):
68 | return
69 |
70 | iocs = {}
71 | iocs['md5'] = []
72 | for h in file_obj.Hashes.Hash:
73 | if not hasattr(h, "Type"):
74 | continue
75 |
76 | # only get md5s that are true if any are present. if not specified, assume so.
77 | if no_conditionals(h.Type) and \
78 | (h.Type.valueOf_ and h.Type.valueOf_.lower() == "md5"):
79 | md5s = h.Simple_Hash_Value
80 | iocs['md5'].extend(md5s.valueOf_.split(md5s.delimiter))
81 | return iocs
82 |
83 |
84 | def parse_observable(observable):
85 | """ for each observable, if it's of a supported type,
86 | the parse out the values and return. """
87 |
88 | obj = observable.to_obj()
89 | if not obj or not hasattr(obj, "Object") or not hasattr(obj.Object, "Properties"): return
90 | prop = obj.Object.Properties
91 |
92 | iocs = {}
93 |
94 | if type(prop) == AddressObjectType:
95 | ips = prop.Address_Value
96 | if no_conditionals(ips):
97 | iocs['ipv4'] = ips.valueOf_.split(ips.delimiter)
98 |
99 | elif type(prop) == DomainNameObjectType:
100 | domains = prop.Value
101 | if no_conditionals(domains):
102 | iocs['dns'] = domains.valueOf_.split(domains.delimiter)
103 |
104 | elif type(prop) == FileObjectType:
105 | merge(iocs, parse_File(prop))
106 |
107 | return iocs
108 |
109 |
110 | def parse_observables(observables):
111 | """ iterate over the set of observables, parse out
112 | visibile indicators and return a dictionary of
113 | iocs present and suitable for feed inclusion. """
114 |
115 | iocs = {}
116 | for observable in observables:
117 | try:
118 | merge(iocs, parse_observable(observable))
119 | except Exception as e:
120 | print(("-> Unexpected error parsing observable: {0}; continuing.".format(e)))
121 |
122 | return iocs
123 |
124 |
125 | def build_report(fname):
126 | """ parse the provided STIX package and create a
127 | CB Feed Report that includes all suitable observables
128 | as CB IOCs """
129 |
130 | # The python STIX libs are pedantic about document versions. See
131 | # https://github.com/STIXProject/python-stix/issues/124
132 | # parser = EntityParser()
133 | # pkg = parser.parse_xml(fname, check_version=False)
134 |
135 | pkg = STIXPackage.from_xml(fname)
136 |
137 | iocs = {}
138 | if pkg.observables:
139 | iocs = parse_observables(pkg.observables.observables)
140 |
141 | if pkg.indicators:
142 | for indicator in pkg.indicators:
143 | iocs = merge(iocs, parse_observables(indicator.observables))
144 |
145 | ts = int(time.mktime(pkg.timestamp.timetuple())) if pkg.timestamp else int(time.mktime(time.gmtime()))
146 | fields = {'iocs': iocs,
147 | 'score': 100, # does STIX have a severity field?
148 | 'timestamp': ts,
149 | 'link': 'http://stix.mitre.org',
150 | 'id': pkg.id_,
151 | 'title': pkg.stix_header.title,
152 | }
153 |
154 | if len(list(iocs.keys())) == 0 or all(len(iocs[k]) == 0 for k in iocs):
155 | print(("-> No suitable observables found in {0}; skipping.".format(fname)))
156 | return None
157 |
158 | print(("-> Including %s observables from {0}.".format(sum(len(iocs[k]) for k in iocs), fname)))
159 | return CbReport(**fields)
160 |
161 |
162 | def build_cli_parser():
163 | """
164 | generate OptionParser to handle command line switches
165 | """
166 |
167 | usage = "usage: %prog [options]"
168 | desc = "Best-effort conversion of one of more STIX Packages into a CB Feed"
169 |
170 | parser = optparse.OptionParser(usage=usage, description=desc)
171 |
172 | parser.add_option("-i", "--input", action="store", default=None, type="string", dest="input",
173 | help="STIX Package(s) to process. If a directory, will recursively process all .xml")
174 | parser.add_option("-o", "--output", action="store", default=None, type="string", dest="output",
175 | help="CB Feed output filename")
176 |
177 | return parser
178 |
179 |
180 | def build_reports(input_source):
181 | """ given an input file or directory,
182 | build a list of Cb Feed Reports.
183 |
184 | This structure chooses to have one
185 | report per STIX Package, with all
186 | suitable observables associated.
187 |
188 | Based on your STIX Package structure,
189 | you may prefer a different arrangement.
190 | """
191 |
192 | reports = []
193 | if os.path.isfile(input_source):
194 | reports.append(build_report(input_source))
195 | else:
196 | for root, dirs, files in os.walk(input_source):
197 | for f in files:
198 | if not f.endswith("xml"): continue
199 | try:
200 | rep = build_report(os.path.join(root, f))
201 | if rep: reports.append(rep)
202 | except UnsupportedVersionError as e:
203 | print(("-> Skipping {0}\n"
204 | "UnsupportedVersionError: {1}\n"
205 | "see https://github.com/STIXProject/python-stix/issues/124".format(
206 | f, e)))
207 | except Exception as e:
208 | print(("-> Unexpected error parsing {0}: {1}; skipping.".format(f, e)))
209 |
210 | return reports
211 |
212 |
213 | def create(input_source):
214 | reports = build_reports(input_source)
215 |
216 | # ****************************
217 | # TODO - you probably want to change these values to reflect your
218 | # local input source
219 | feedinfo = {'name': 'stiximport',
220 | 'display_name': "STIX Package Import",
221 | 'provider_url': 'http://stix.mitre.org',
222 | 'summary': "This feed was imported from stix package(s) at %s" % input_source,
223 | 'tech_data': "There are no requirements to share any data to receive this feed.",
224 | 'icon': 'images/stix.gif'
225 | }
226 |
227 | feedinfo = CbFeedInfo(**feedinfo)
228 | feed = CbFeed(feedinfo, reports)
229 | return feed.dump()
230 |
231 |
232 | if __name__ == "__main__":
233 | parser = build_cli_parser()
234 | options, args = parser.parse_args(sys.argv)
235 | if not options.input or not options.output:
236 | print("-> Must specify and input file/directory and output filename")
237 | sys.exit(-1)
238 |
239 |
240 | #
241 | # Adding namespaces that aren't in defaults
242 | #
243 | def _update_namespaces():
244 | for i in ADDITIONAL_NAMESPACES:
245 | nsparser.STIX_NAMESPACES.add_namespace(i)
246 | mixbox.namespaces.register_namespace(i)
247 |
248 |
249 | _update_namespaces()
250 |
251 | bytes = create(options.input)
252 | open(options.output, "w").write(bytes)
253 |
--------------------------------------------------------------------------------
/test/resources/template.json:
--------------------------------------------------------------------------------
1 | {
2 | "feedinfo": {
3 | "category": "Carbon Black",
4 | "display_name": "QA Feed BWF912316192",
5 | "icon": "/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCAAyADIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD9MKhvL630+3ee6uIraBPvSzOEUfUnpWZ401afw/4O13VLYKbmysLi5iDDI3JGzDI9Mivj66nsNY8L6f4w+Iet6xrtxqU00djpVmyoo8ogMWY/Ki5YcIoPI98AH07qfx48AaTIUn8UWbsvX7NunH5xhqr2f7Q/w7vpAkfiaFSf+e0E0Q/NkFfJrfEvQLE7dJ+HuhxR9jqck945+pLqP0pq/FPT7g7b3wD4XmiPUW9vLA/4MsnH5UAfdGjeI9K8RQmbStStNSiHVrWdZAPrtPFaNfEvhOy8H+OtU8jw5/a3gbxQIpJrXyro3Fs5RC5UPxIhwp5yRwetfQf7NXjbV/HXw8lu9auPtd3a30los7KAzoEjYFsdT85GfYd6APV6KKKAM3xNpp1rw3q2nqMm7tJoB/wNCv8AWvhu+jbVPgbp8m0+boutTQSL3SOeNWBPoN0bD8q+9q+XV0XTfAXx11nwxr1pHP4U8WFZYklyI/ML74+R02yb06/xAnigD550Pw7qniW7FrpOnXWo3HeO1iaQj3OBwPc16r4f/ZT8Z6rGJtRNjoUGNzfapt7geu1Mj8yK6Xx3+0jd+ELy98NeD/D9r4cgsZntzJLCN+5SQWWMYVenfdmvEfEnjrxD4vkL6zrN5qAJz5c0pMY+ifdH4CgR7/ofwh8N/DDSfEHiaHxbDr+q6VptxiG1MarFJJE0a7gGZuSxAyR1rvf2VdLbT/hHazMu37bdTXA9xkR5/wDIf8q+fV0O60H4faV4XtYi3iXxlcw3MsH8UdorYgVvTe5L/RRmvs3wr4fg8KeG9M0e35hsbdIA3TdtUAsfcnn8TQM1aKKKACvO/jb8K4vij4V8iErDrNmTNYztwN3dCeytgfQgHtXolFAHxJq2ky/FVHs7uP8As/4l6Wv2e4tbjCHVUQYBBP8Ay3UDGD94DI9szw34Ht/BNovibxxaSW8EbH7BoU6lJ9QlXpuU8rED95iOegzmvqb4sfBHSviZGt5HIdK8QQAeRqUI5OOivj7w9DnI7elc18P/ANneWz1weIfHWq/8JRrUZAhSR2lhj2/dYl+XI7AgAe9AFf4E/DrVNS1q5+Ivi5c6zqGTZW7rjyIyMbsfw/L8qjsufXj3WiigAooooAKKKKACiiigAooooAKKKKAP/9k=",
6 | "icon_small": "/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCAAyADIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD9MKhvL630+3ee6uIraBPvSzOEUfUnpWZ401afw/4O13VLYKbmysLi5iDDI3JGzDI9Mivj66nsNY8L6f4w+Iet6xrtxqU00djpVmyoo8ogMWY/Ki5YcIoPI98AH07qfx48AaTIUn8UWbsvX7NunH5xhqr2f7Q/w7vpAkfiaFSf+e0E0Q/NkFfJrfEvQLE7dJ+HuhxR9jqck945+pLqP0pq/FPT7g7b3wD4XmiPUW9vLA/4MsnH5UAfdGjeI9K8RQmbStStNSiHVrWdZAPrtPFaNfEvhOy8H+OtU8jw5/a3gbxQIpJrXyro3Fs5RC5UPxIhwp5yRwetfQf7NXjbV/HXw8lu9auPtd3a30los7KAzoEjYFsdT85GfYd6APV6KKKAM3xNpp1rw3q2nqMm7tJoB/wNCv8AWvhu+jbVPgbp8m0+boutTQSL3SOeNWBPoN0bD8q+9q+XV0XTfAXx11nwxr1pHP4U8WFZYklyI/ML74+R02yb06/xAnigD550Pw7qniW7FrpOnXWo3HeO1iaQj3OBwPc16r4f/ZT8Z6rGJtRNjoUGNzfapt7geu1Mj8yK6Xx3+0jd+ELy98NeD/D9r4cgsZntzJLCN+5SQWWMYVenfdmvEfEnjrxD4vkL6zrN5qAJz5c0pMY+ifdH4CgR7/ofwh8N/DDSfEHiaHxbDr+q6VptxiG1MarFJJE0a7gGZuSxAyR1rvf2VdLbT/hHazMu37bdTXA9xkR5/wDIf8q+fV0O60H4faV4XtYi3iXxlcw3MsH8UdorYgVvTe5L/RRmvs3wr4fg8KeG9M0e35hsbdIA3TdtUAsfcnn8TQM1aKKKACvO/jb8K4vij4V8iErDrNmTNYztwN3dCeytgfQgHtXolFAHxJq2ky/FVHs7uP8As/4l6Wv2e4tbjCHVUQYBBP8Ay3UDGD94DI9szw34Ht/BNovibxxaSW8EbH7BoU6lJ9QlXpuU8rED95iOegzmvqb4sfBHSviZGt5HIdK8QQAeRqUI5OOivj7w9DnI7elc18P/ANneWz1weIfHWq/8JRrUZAhSR2lhj2/dYl+XI7AgAe9AFf4E/DrVNS1q5+Ivi5c6zqGTZW7rjyIyMbsfw/L8qjsufXj3WiigAooooAKKKKACiiigAooooAKKKKAP/9k=",
7 | "name": "qafeedbwf912316192",
8 | "provider_rating": 2.0,
9 | "provider_url": "https://confluence.carbonblack.local/display/CB/CB+Response+QA",
10 | "summary": "Feed generated by QA Integration Framework (QA Feed BWF912316192)",
11 | "tech_data": "Will trigger on MD5 hash of ['dbb379c9337cc31b24743e7cf81ee8bd'] SHA-256 hash of ['94dcf0531121e13a73114e8806f096d31e21dab4a8b1bfef95b5e0171a9a0556']Event query of ['process_name:foobar.exe']IPV4 of ['158.106.122.248']IPV6 of ['7F1F:67E6:4BA0:5935:453A:A3AA:D69C:6146']Domain of ['spend.policy.issue.net']JA3 hash of ['07f362079e7f3d5a8855549fcc9a441e']JA3s hash of ['0fa6b3b35df905b209742cf80c06f7da']",
12 | "version": 2
13 | },
14 | "reports": [
15 | {
16 | "description": "MD5 hash ['dbb379c9337cc31b24743e7cf81ee8bd'] ",
17 | "id": "WithMd5",
18 | "iocs": {
19 | "md5": [
20 | "dbb379c9337cc31b24743e7cf81ee8bd"
21 | ]
22 | },
23 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests",
24 | "score": 42,
25 | "tags": [
26 | "harmless",
27 | "test",
28 | "md5"
29 | ],
30 | "timestamp": 1603400539,
31 | "title": "CB Response QA ID79998635109553919580397252507510773"
32 | },
33 | {
34 | "description": "SHA-256 hash ['94dcf0531121e13a73114e8806f096d31e21dab4a8b1bfef95b5e0171a9a0556'] ",
35 | "id": "WithSha256",
36 | "iocs": {
37 | "sha256": [
38 | "94dcf0531121e13a73114e8806f096d31e21dab4a8b1bfef95b5e0171a9a0556"
39 | ]
40 | },
41 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests",
42 | "score": 42,
43 | "tags": [
44 | "harmless",
45 | "test",
46 | "sha256"
47 | ],
48 | "timestamp": 1603400539,
49 | "title": "CB Response QA ID21537004960345562837781950598866988"
50 | },
51 | {
52 | "description": "Query ['process_name:notepad.exe'] ",
53 | "id": "WithQueryEvent",
54 | "iocs": {
55 | "query": {
56 | "index_type": "events",
57 | "search_query": [
58 | "cb.urlver=1&q=process_name%3Anotepad.exe"
59 | ]
60 | }
61 | },
62 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests",
63 | "score": 42,
64 | "tags": [
65 | "harmless",
66 | "test",
67 | "event_query"
68 | ],
69 | "timestamp": 1603400539,
70 | "title": "CB Response QA ID92415881359400855663635535031560171"
71 | },
72 | {
73 | "description": "Query ['observed_filename:notepad.exe'] ",
74 | "id": "WithQueryModule",
75 | "iocs": {
76 | "query": {
77 | "index_type": "modules",
78 | "search_query": [
79 | "cb.urlver=1&cb.q.observed_filename=notepad.exe"
80 | ]
81 | }
82 | },
83 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests",
84 | "score": 42,
85 | "tags": [
86 | "harmless",
87 | "test",
88 | "event_query"
89 | ],
90 | "timestamp": 1603400539,
91 | "title": "CB Response QA ID92415881359400855663635535031560171"
92 | },
93 | {
94 | "description": "IPV4 addr ['158.106.122.248'] ",
95 | "id": "WithIpv4",
96 | "iocs": {
97 | "ipv4": [
98 | "158.106.122.248"
99 | ]
100 | },
101 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests",
102 | "score": 42,
103 | "tags": [
104 | "harmless",
105 | "test",
106 | "ipv4"
107 | ],
108 | "timestamp": 1603400539,
109 | "title": "CB Response QA ID10273408120814052893561200852998840"
110 | },
111 | {
112 | "description": "IPV6 addr ['7F1F:67E6:4BA0:5935:453A:A3AA:D69C:6146'] ",
113 | "id": "WithIpv6",
114 | "iocs": {
115 | "ipv6": [
116 | "7F1F:67E6:4BA0:5935:453A:A3AA:D69C:6146"
117 | ]
118 | },
119 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests",
120 | "score": 42,
121 | "tags": [
122 | "harmless",
123 | "test",
124 | "ipv6"
125 | ],
126 | "timestamp": 1603400539,
127 | "title": "CB Response QA ID12770488195773987668619301433420948"
128 | },
129 | {
130 | "description": "DNS ['spend.policy.issue.net'] ",
131 | "id": "WithDns",
132 | "iocs": {
133 | "dns": [
134 | "spend.policy.issue.net"
135 | ]
136 | },
137 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests",
138 | "score": 42,
139 | "tags": [
140 | "harmless",
141 | "test",
142 | "dns"
143 | ],
144 | "timestamp": 1603400539,
145 | "title": "CB Response QA ID14254988097523942646333256976090972"
146 | },
147 | {
148 | "description": "ja3 hash ['07f362079e7f3d5a8855549fcc9a441e'] ",
149 | "id": "WithJa3",
150 | "iocs": {
151 | "ja3": [
152 | "07f362079e7f3d5a8855549fcc9a441e"
153 | ]
154 | },
155 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests",
156 | "score": 42,
157 | "tags": [
158 | "harmless",
159 | "test",
160 | "ja3"
161 | ],
162 | "timestamp": 1603400539,
163 | "title": "CB Response QA ID81650673725828231840001285253822155"
164 | },
165 | {
166 | "description": "ja3s hash ['0fa6b3b35df905b209742cf80c06f7da'] ",
167 | "id": "WithJa3s",
168 | "iocs": {
169 | "ja3s": [
170 | "0fa6b3b35df905b209742cf80c06f7da"
171 | ]
172 | },
173 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests",
174 | "score": 42,
175 | "tags": [
176 | "harmless",
177 | "test",
178 | "ja3s"
179 | ],
180 | "timestamp": 1603400539,
181 | "title": "CB Response QA ID64694413371534478005243943570681010"
182 | }
183 | ]
184 | }
185 |
--------------------------------------------------------------------------------
/example/isight/generate_isight_feed.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import time
4 | import json
5 | import pprint
6 | import isight_api
7 | import isight_config
8 | import isight_helpers
9 | import xml.etree.ElementTree as ET
10 |
11 | score_stats = {}
12 |
13 | # our imports
14 | sys.path.insert(0, "../../")
15 | from cbfeeds import CbReport
16 | from cbfeeds import CbFeed
17 | from cbfeeds import CbFeedInfo
18 |
19 | def generate_feed_information():
20 | """
21 | return a dictionary of feed information
22 | this is feed 'metadata' - the description of the feed, and not the feed contents
23 | """
24 | feed = {}
25 | feed["name"] = "iSIGHT"
26 | feed["display_name"] = "iSIGHT Partners feed"
27 | feed["summary"] = "iSIGHT Partners provides a cyber intelligence feed"
28 | feed["tech_data"] = "There are no requirements to share any data with Carbon Black to receive this feed. The underlying IOC data is provided by iSIGHT Partners"
29 | feed["provider_url"] = "http://www.isightpartners.com/"
30 | feed["icon"] = "isight.png"
31 | feed["icon_small"] = "isight.small.jpg"
32 | feed["category"] = "Partner"
33 |
34 | return CbFeedInfo(**feed)
35 |
36 | def retrieve_report_score(report_name, api, default_score):
37 | """
38 | return a numeric score, between 1 and 100, corresponding
39 | with the report. This requires a round-trip to the iSight api
40 | endpoint to retrieve an XML encoded report. That report, in
41 | turn, includes a 'criticality' rating which we can translate
42 | into a numeric score.
43 | """
44 |
45 | global score_stats
46 |
47 | #print " -> looking up score for %s..." % (report_name)
48 | data = api.get_report(report_name, "xml")
49 |
50 | root = ET.fromstring(data)
51 |
52 | # @todo don't hardcode offset here, but look for named indicator?
53 | # @todo "intel" reports don't have a risk rating
54 | #
55 | for field in root[1]:
56 | if field.tag != "Field":
57 | continue
58 | if field.attrib['name'] == 'Risk Rating':
59 | if field.text.strip() in score_stats:
60 | score_stats[field.text.strip()] = score_stats[field.text.strip()] + 1
61 | else:
62 | score_stats[field.text.strip()] = 1
63 |
64 | rating = field._children[0].text
65 | if 'HIGH' == rating:
66 | return 100
67 | elif 'MEDIUM' == rating:
68 | return 80
69 | elif 'LOW' == rating:
70 | return 60
71 | else:
72 | print(("WARNING: can't find score for %s; using default" % report_name))
73 | return default_score
74 |
75 | if "MISSING" in score_stats:
76 | score_stats["MISSING"] = score_stats["MISSING"] + 1
77 | else:
78 | score_stats["MISSING"] = 1
79 |
80 | print(("WARNING: can't find score for %s; using default" % report_name))
81 | return default_score
82 |
83 | def generate_reports(raw, api):
84 | """
85 | generate the reports data as a list of dictionaries.
86 |
87 | each list entry corresponds to a single report,
88 | which is a single report in the case of iSight.
89 | """
90 |
91 | reports = []
92 |
93 | for rawkey in list(raw.keys()):
94 |
95 | entry = {}
96 |
97 | rawentry = raw[rawkey]
98 |
99 | entry["id"] = rawkey
100 | entry["title"] = rawentry["title"]
101 | entry["link"] = "https://mysight.isightpartners.com/report/full/%s" % (rawkey)
102 | entry["timestamp"] = rawentry["report_timestamp"]
103 | entry["iocs"] = {}
104 |
105 | for rawmd5 in rawentry["md5"]:
106 | if not "md5" in entry["iocs"]:
107 | entry["iocs"]["md5"] = []
108 |
109 | entry["iocs"]["md5"].append(rawmd5)
110 |
111 | # @todo uncomment this block to support ips
112 | #
113 | #for rawip in rawentry["ipaddr"]:
114 | # if not "ipv4" in entry["iocs"]:
115 | # entry["iocs"]["ipv4"] = []
116 | #
117 | # entry["iocs"]["ipv4"].append(rawip)
118 |
119 | for rawdns in rawentry["domain"]:
120 | if not "dns" in entry["iocs"]:
121 | entry["iocs"]["dns"] = []
122 |
123 | entry["iocs"]["dns"].append(rawdns)
124 |
125 | # if we ended up with no IOCs for this report, just skip it.
126 | #
127 | if len(entry["iocs"]) == 0:
128 | continue
129 |
130 | # the score or severity is not provided as part of the iSight
131 | # report enumeration (their "i_and_w" or "indications and warnings"
132 | # api. instead, we must retreive the report in XML format, parse the
133 | # report, and look for the criticality.
134 | #
135 | # Some iSIGHT reports have NO criticality rating.
136 | # For lack of clear obvious next steps, simply report the score as
137 | # 75 -- "medium high"
138 | #
139 | entry["score"] = retrieve_report_score(entry["id"], api, 75)
140 |
141 | reports.append(CbReport(**entry))
142 |
143 | return reports
144 |
145 | def create(config_file, existing_csv=None, reports_to_skip=[]):
146 | # parse the configuration file
147 | # this configuration file includes the keys needed to talk to the
148 | # iSight report server, etc.
149 | #
150 | #print "-> Parsing iSight configuration..."
151 | cfg = isight_config.ISightConfig(config_file)
152 |
153 | # instantiate a local iSight API object
154 | #
155 | #print "-> Instantiating an iSight API object..."
156 | api = isight_api.ISightAPI(cfg.iSightRemoteImportUrl,
157 | cfg.iSightRemoteImportUsername,
158 | cfg.iSightRemoteImportPassword,
159 | cfg.iSightRemoteImportPublicKey,
160 | cfg.iSightRemoteImportPrivateKey)
161 |
162 | if not existing_csv:
163 | # query the iSight report server for raw CSV report data
164 | # query 'back' the specified number of days
165 | #
166 | #print "-> Querying iSight server for last %d days of reports..." % (cfg.iSightRemoteImportDaysBack)
167 | #
168 | # @todo iSIGHT has a new-and-improved REST API which could be used instead of this legacy API
169 | #
170 | raw_report_data = api.get_i_and_w(cfg.iSightRemoteImportDaysBack)
171 |
172 | # save off the raw report data for future reference
173 | #
174 | #print "-> Saving iSight report data to iSight.csv..."
175 | f = open('iSight.csv', 'w')
176 | f.write(raw_report_data)
177 | f.close()
178 | else:
179 | raw_report_data = open(existing_csv, "r").read()
180 |
181 | # convert the raw report data into something more managable
182 | # in particular, a list of dictionaries, with each dictionary describing a report
183 | # this helper routine accounts for the fact that report data is spread across
184 | # multiple lines of the raw CSV blob
185 | #
186 | results = isight_helpers.isight_csv_to_iocs_dict([raw_report_data])
187 |
188 | # set up a dictionary for basic stat tracking
189 | #
190 | stats = {'md5' : {'total' : 0, 'max' : 0},
191 | 'ipaddr' : {'total' : 0, 'max' : 0},
192 | 'domain' : {'total' : 0, 'max' : 0}}
193 |
194 | for report_id in list(results.keys()):
195 | stats['md5']['total'] += len(results[report_id]['md5'])
196 | if len(results[report_id]['md5']) > stats['md5']['max']:
197 | stats['md5']['max'] = len(results[report_id]['md5'])
198 | stats['ipaddr']['total'] += len(results[report_id]['ipaddr'])
199 | if len(results[report_id]['ipaddr']) > stats['ipaddr']['max']:
200 | stats['ipaddr']['max'] = len(results[report_id]['ipaddr'])
201 | stats['domain']['total'] += len(results[report_id]['domain'])
202 | if len(results[report_id]['domain']) > stats['domain']['max']:
203 | stats['domain']['max'] = len(results[report_id]['domain'])
204 |
205 | #print " -> Total Reports: %d" % (len(results.keys()))
206 | #print " -> ----------------------------------------------- ---"
207 | #print " -> Maximum number of MD5s in one report: %d" % (stats['md5']['max'])
208 | #print " -> Total MD5s across all reports: %d" % (stats['md5']['total'])
209 | #print " -> Maximum number of IPv4 addresses in one report: %d" % (stats['ipaddr']['max'])
210 | #print " -> Total IPv4 addresses in all reports: %d" % (stats['ipaddr']['total'])
211 | #print " -> Maximum number of DNS names in one report: %d" % (stats['domain']['max'])
212 | #print " -> Total DNS names in all reports: %d" % (stats['domain']['total'])
213 |
214 | # generate the feed data from the raw iSight report data
215 | #
216 | #print "-> Generating feed data..."
217 | reports = generate_reports(results, api)
218 |
219 | # shim to skip entire reports
220 | reports = [report for report in reports if report.data['id'] not in reports_to_skip]
221 |
222 | # generate the feed metadata (feed information)
223 | # this is a static description of the feed itself
224 | #
225 |
226 | # lazy way out
227 | cwd_old = os.getcwd()
228 | os.chdir(os.path.dirname(os.path.realpath(__file__)))
229 |
230 | #print "-> Generating feed metadata..."
231 | feedinfo = generate_feed_information()
232 |
233 | # write out feed document
234 | #
235 | feed = CbFeed(feedinfo, reports)
236 |
237 | #print "-> Writing out completed feed document..."
238 | return feed.dump()
239 |
240 | os.chdir(cwd_old)
241 |
242 | #print "-> Done!"
243 |
244 | if __name__ == "__main__":
245 | #print "-> iSIGHT Partners Carbon Black feed generator"
246 | if len(sys.argv) < 3:
247 | print("\n USAGE: generate_isight_feed.py [existing_csv]\n")
248 | sys.exit(0)
249 | cfg = sys.argv[1]
250 | out = sys.argv[2]
251 | csv = None
252 | if len(sys.argv) == 4:
253 | csv = sys.arv[3]
254 |
255 | reports_to_skip = ["Intel-989749",]
256 |
257 | bytes = create(cfg, existing_csv=csv, reports_to_skip=reports_to_skip)
258 | open(out, "w").write(bytes)
259 |
260 |
--------------------------------------------------------------------------------
/example/stix/sample_data/STIX_Phishing_Indicator.xml:
--------------------------------------------------------------------------------
1 |
2 |
25 |
41 |
42 | STIX Phishing Indicator Example
43 | Indicators - Phishing
44 |
45 |
46 |
47 | "US-China" Phishing Indicator
48 | Malicious E-mail
49 | This is a cyber threat indicator for instances of "US-China" phishing attempts.
50 |
51 | 2012-12-01T09:30:47Z
52 | 2013-02-01T09:30:47Z
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 | @state.gov
61 |
62 |
63 |
64 |
65 |
66 |
67 | pdf
68 | 87022
69 |
70 |
71 | MD5
72 | cf2b3ad32a8a4cfb05e9dfc45875bd70
73 |
74 |
75 |
76 | Contains
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 | Phishing
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 | Remedy
95 | Email Block
96 | Redirect and quarantine new matching email
97 |
98 | Prevent future instances of similar phishing attempts from reaching targeted recipients in order to eliminate possibility of compromise from targeted recipient falling for phishing lure.
99 |
100 |
101 |
102 |
103 |
104 | Remedy
105 | Web Link Block
106 | Block malicous links on web proxies
107 |
108 | Prevent execution/navigation to known malicious web URLs.
109 |
110 |
111 |
112 |
113 |
114 | Remedy
115 | Domain Traffic Block
116 | Block traffic to/from malicous domains via firewalls and DNS servers.
117 |
118 | Prevent any traffic (potentially containing malicious logic, data exfil, C2, etc.) to or from known malicious domains.
119 |
120 |
121 |
122 |
123 |
124 | Response
125 | Malicous Email Cleanup
126 | Remove existing matching email from the mail servers
127 |
128 | Cleanup any known malicious emails from mail servers (potentially in Inboxes, Sent folders, Deleted folders, etc.) to prevent any future exploitation from those particular emails.
129 |
130 |
131 |
132 |
133 |
134 | Response
135 | Phishing Target Identification
136 | Review mail logs to identify other targeted recipients
137 |
138 | Identify all targeted victims of a particular phishing campaign in order to enable notification and to support more strategic cyber threat intelligence activities (TTP characterization, Campaign analysis, ThreatActor attribution, etc.).
139 |
140 |
141 |
142 |
143 |
144 | Response
145 | Phishing Target Notification
146 | Notify targeted recipients
147 |
148 | Notify all targeted victims of a particular phishing campaign to ensure they are aware they have been targeted and to help them understand how to avoid falling for phishing attacks.
149 |
150 |
151 |
152 |
153 |
154 | Response
155 | Super Secret Proprietary Response
156 | Carry out some sensitive action that is applicable only within the environment of the affected organization.
157 |
158 |
159 |
160 |
161 |
162 | ancestor-or-self::stix:Indicator//node()
163 |
164 |
165 |
166 | ancestor-or-self::stix:Indicator//indicator:SuggestedCOAs/indicator:SuggestedCOA/stixCommon:Course_Of_Action[@id="example:COA-e46d2565-754e-4ac3-9f44-2de1bfb1e71d"]
167 |
168 |
169 |
170 |
171 | High
172 | MITRE
173 |
174 |
175 |
176 | MITRE
177 |
178 |
179 |
180 |
181 | MITRE
182 |
183 |
184 | 2012-12-01T09:30:47Z
185 |
186 |
187 |
188 |
189 |
190 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Carbon Black Feeds [](https://travis-ci.org/carbonblack/cbfeeds)
2 |
3 |
4 | ## License
5 |
6 | Use of the Carbon Black Feeds API is governed by the license found in LICENSE.md.
7 |
8 | ## Overview
9 |
10 | Carbon Black Response 4.0+ ships with support for threat intelligence feeds. The Indicators of Compromise (IOCs)
11 | contained in the feeds are compared to the sensor data as it arrives on the server. Any activity matching an
12 | IOC is tagged; users can search for the tags and, optionally, register for e-mail alerts.
13 |
14 | Feeds allow Carbon Black servers to use freely available threat intelligence, proprietary customer threat data,
15 | and provides a mechanism to feed threat indicators from on-premise analytic sources to Carbon Black for verification,
16 | detection, visibility and analysis.
17 |
18 | The CB Response 4.0+ server supports three types of indicators:
19 |
20 | * Binary MD5s
21 | * IPv4 addresses
22 | * DNS names
23 |
24 | The CB Response 5.0+ server adds support for two new types of indicators:
25 |
26 | * Process Queries (Process Searches)
27 | * Binary Queries (Binary Searches)
28 |
29 | The CB Response 6.1+ server adds support for one new type of indicator:
30 |
31 | * IPv6 addresses
32 |
33 | The CB Response 7.0+ server adds support for one new type of indicator:
34 |
35 | * Binary SHA-256
36 |
37 | The CB Response 7.3+ server adds support for two new types of indicator:
38 |
39 | * Ja3 hash
40 | * Ja3s hash
41 |
42 | Please note that query IOC types have significant differences as compared to MD5s, IPv4 and IPv6 addresses, and DNS names. Please see notes below regarding their usage.
43 |
44 | The feed format, described in the "Feed Structure" section below, is designed for simplicity. This should make it
45 | easy to add support for feed data from any input source.
46 |
47 | Example feed creation scripts are included. See the 'Examples' section in this document for a listing of the examples.
48 |
49 | > _**NOTE:** As of this version, python 3 is a requirement._
50 |
51 | ## Using the Carbon Black Feeds API
52 |
53 | The Carbon Black Feeds API (CBFAPI) is found on github at:
54 |
55 | https://github.com/carbonblack/cbfeeds
56 |
57 | The CBFAPI is a collection of documentation, example scripts, and a helper library to help create and validate Carbon
58 | Black feeds. It is not required in order to build a Carbon Black feed - a feed can be created in any language that
59 | allows for building JSON, or even built by hand. The feed file itself must match the feed structure, or schema,
60 | defined in the "Feed Structure" section below.
61 |
62 | ### Getting started with CBFAPI
63 |
64 | #### Ensure Python3.x and pip3 are installed:
65 | root@localhost carbonblack]# python3 --version
66 | root@localhost carbonblack]# pip3 --version
67 |
68 | #### Installation instructions/guidance
69 |
70 | [root@localhost carbonblack]# yum install git
71 | [root@localhost carbonblack]# yum install centos-release-scl -y
72 | [root@localhost carbonblack]# yum install rh-python38
73 | [root@localhost carbonblack]# scl enable rh-python38 bash
74 |
75 | #### clone the github cbfeed repository:
76 |
77 | [root@localhost carbonblack]# git clone https://github.com/carbonblack/cbfeeds.git
78 | Initialized empty Git repository in /root/repos/carbonblack/cbfeeds/.git/
79 | remote: Reusing existing pack: 80, done.
80 | remote: Counting objects: 25, done.
81 | remote: Compressing objects: 100% (25/25), done.
82 | Receiving objects: 100% (105/105), 38.03 KiB | 17 KiB/s, done.
83 | Resolving deltas: 100% (50/50), done.
84 | remote: Total 105 (delta 10), reused 0 (delta 0)
85 |
86 | #### Navigate to the newly-created cbfeeds directory
87 |
88 | [root@localhost carbonblack]# ls
89 | cbfeeds
90 | [root@localhost carbonblack]# cd cbfeeds/
91 | [root@localhost cbfeeds]# ls
92 | cbfeeds/ LICENSE.md README.md setup.py validate_feed.py
93 | example/ percent_encode_query.py requirements.txt test.py
94 |
95 | #### Install requirements:
96 | [root@localhost carbonblack]# pip3 install -r cbfeeds/requirements.txt
97 |
98 | #### Run the setup.py file to install cbfeeds module.
99 | [root@localhost carbonblack]# python3 cbfeeds/setup.py install
100 |
101 | #### Navigate to the example directory and use the example `generate_tor_feed.py` (inside the example/tor/ directory) script to generate a feed from live tor egress IPs
102 |
103 | [root@localhost cbfeeds]# cd example/
104 | [root@localhost example]# python3 tor/generate_tor_feed.py example_tor_feed.feed
105 | [root@localhost example]# ls -l example_tor_feed.feed
106 | -rw-r--r--. 1 root root 2179084 Mar 25 08:09 example_tor_feed.feed
107 |
108 | #### Use the example `validate_feed.py` (inside the parent cbfeeds/ directory) script to validate the tor feed (or a feed of your choosing)
109 |
110 | [root@localhost cbfeeds]# python validate_feed.py --feedfile example/example_tor_feed.feed
111 | -> Validated that file exists and is readable
112 | -> Validated that feed file is valid JSON
113 | -> Validated that the feed file includes all necessary CB elements
114 | -> Validated that all element values are within CB feed
115 |
116 | ## Feed Structure
117 |
118 | * Feed: a Carbon Black feed
119 | * FeedInfo: Feed metadata: name, description, etc
120 | * Reports: a list of report
121 | * Report metadata: title, id, URL
122 | * IOCs for this report
123 |
124 | A feed is a JSON structure with two entries:
125 |
126 | * feedinfo
127 | * reports
128 |
129 | The `feedinfo` structure is a list of basic feed metadata. `reports` is a list of `report` structures.
130 | Each `report` has report metadata and a list of IOCs.
131 |
132 | ### feedinfo
133 |
134 | `feedinfo` is a JSON structure with the following entries:
135 |
136 | | name | status | description |
137 | | ---------------- | -------- |-------------|
138 | | `display_name` | REQUIRED | Display name for the user interface. |
139 | | `name` | REQUIRED | Internal name; must not include spaces or special characters. See Notes. |
140 | | `provider_url` | REQUIRED | Human-consumpable link to view more information about this feed. |
141 | | `summary` | REQUIRED | A short description of this feed. |
142 | | `tech_data` | REQUIRED | More detailed technical description, to include data sharing requirements (if any) |
143 | | `category` | _OPTIONAL_ | Category of the feed i.e. Open Source, Partner, Connector, First Party etc. |
144 | | `icon` | _OPTIONAL_ | A base64 encoded version of the image to use in the user interface |
145 | | `icon_small` | _OPTIONAL_ | A base64 encoded version of a smaller icon |
146 | | `provider_rating`| _OPTIONAL_ | Provider rating for the feed. |
147 | | `version` | _OPTIONAL_ | Version of the feed source. |
148 |
149 | Notes:
150 |
151 | The 'name' field cannot not include spaces or special characters. Typically, it should be unique per-feed on a single server.
152 |
153 | #### Icon
154 |
155 | Recommended size/dpi for regular icon is 370px x 97px, 72 dpi.
156 |
157 | #### Small Icon (icon_small)
158 |
159 | Recommended size/dpi for small icon is 100px x 100px, 72dpi
160 |
161 | Explanation of `category` parameters:
162 |
163 | | Category Name | Description |
164 | | ------------- | ----------- |
165 | | `Carbon Black` | Intelligence based on output from host-based integrations |
166 | | `Carbon Black First Party` | Intelligence generated inside the Threat Intelligence Cloud by the Carbon Black Research team |
167 | | `Connectors` | Intelligence connectors from third party technologies Carbon Black have integrated with |
168 | | `Meta-feed` | Includes a theme-based aggregate of selected intelligence indicators from other feeds |
169 | | `Partner` | Proprietary threat intelligence provided to the Threat Intelligence Cloud via a partner agreement. |
170 | | `Open Source` | Open Source intelligence that is generally available to the public |
171 |
172 |
173 | An example `feedinfo` structure, from the `generate_tor_feed.py` script:
174 |
175 | ```
176 | "feedinfo": {
177 | "name": "tor",
178 | "display_name": "Tor Exit Nodes",
179 | "provider_url": "https://torproject.org/",
180 | "summary": "This feed is a list of Tor Node IP addresses, updated every 30 minutes.",
181 | "tech_data": "There are no requirements to share any data to receive this feed.",
182 | "icon": "tor.png",
183 | "icon_small": "tor.small.png",
184 | "category": "Open Source"
185 | }
186 | ```
187 |
188 | ### report
189 |
190 | A `report` is a JSON structure with the following entries:
191 |
192 | | name | status | description |
193 | | -------------- | -------- |-------------|
194 | | `id` | REQUIRED | A report id, must be unique per feed `name` for the lifetime of the feed. Must be alphanumeric (including no spaces).|
195 | | `iocs` | REQUIRED | The IOCs for this report. A match on __any__ IOC will cause the activity to be tagged with this report id. The IOC format is described below.|
196 | | `link` | REQUIRED | Human-consumbable link to information about this report.|
197 | | `score` | REQUIRED | The severity of this report from -100 to 100, with 100 most critical.|
198 | | `timestamp` | REQUIRED | Time this report was last updated, in seconds since epoch (GMT). This should always be updated whenever the content of the report changes.|
199 | | `title` | REQUIRED | A one-line title describing this report.|
200 | | `description` | _OPTIONAL_ | A description of the report. |
201 | | `tags` | _OPTIONAL_ | A comma separated list of identifiers to tag the report. |
202 |
203 | ### iocs
204 |
205 | CB Response 4.0+ ships supports four types of IOCs:
206 |
207 | * IPv4 addresses
208 | * domain names
209 | * md5s
210 |
211 | CB Response 5.0+ supports all 4.0 IOCs and adds one additional type:
212 |
213 | * query - this contains query related to modules or events
214 |
215 | CB Response 6.1+ supports all 5.0 IOCs and adds one additional type:
216 |
217 | * ipv6 addresses
218 |
219 | The CB Response 7.0+ server adds support for one new type of indicator:
220 |
221 | * Binary SHA-256
222 |
223 | The CB Response 7.3+ server adds support for two new types of indicator:
224 |
225 | * Ja3 hash
226 | * Ja3s hash
227 |
228 | `iocs` is a structure with one or more of these entries:
229 |
230 | | name | status | description |
231 | | -------------- | -------- |-------------|
232 | | `dns` | _OPTIONAL_ | A list of domain names|
233 | | `ipv4` | _OPTIONAL_ | A list of IPv4 addresses in dotted decimal form|
234 | | `ipv6` | _OPTIONAL_ | A list of IPv6 addresses|
235 | | `ja3` | _OPTIONAL_ | A list of ja3 hashes (md5)|
236 | | `ja3s` | _OPTIONAL_ | A list of ja3s hashes (md5)|
237 | | `md5` | _OPTIONAL_ | A list of md5s|
238 | | `query` | _OPTIONAL_ | A query of type "events" or "modules"|
239 | | `sha256` | _OPTIONAL_ | A list of sha-256s|
240 |
241 | An example `reports` list with two `report` structures, each with one IPv4 IOC, from the example_tor.py script:
242 |
243 | ```
244 | "reports": [
245 | {
246 | "timestamp": 1380773388,
247 | "iocs": {
248 | "ipv4": [
249 | "100.2.142.8"
250 | ]
251 | },
252 | "link": "https://www.dan.me.uk/tornodes",
253 | "id": "TOR-Node-100.2.142.8",
254 | "title": "As of Wed Oct 2 20:09:48 2013 GMT, 100.2.142.8 has been a TOR exit for 26 days, 0:44:42. Contact: Adam Langley "
255 | },
256 | {
257 | "timestamp": 1380773388,
258 | "iocs": {
259 | "ipv4": [
260 | "100.4.7.69"
261 | ]
262 | },
263 | "link": "https://www.dan.me.uk/tornodes",
264 | "id": "TOR-Node-100.4.7.69",
265 | "title": "As of Wed Oct 2 20:09:48 2013 GMT, 100.4.7.69 has been a TOR exit for 61 days, 2:07:23. Contact: GPG KeyID: 0x1F40CBDC Jeremy "
266 | }
267 | ]
268 | ```
269 | Another example with "query" IOC:
270 |
271 | ```
272 | "reports":
273 | [
274 | {
275 | "title": "Notepad processes",
276 | "timestamp": 1388538906,
277 | "iocs": {
278 | "query": [
279 | {
280 | "index_type": "events",
281 | "search_query": "cb.urlver=1&q=process_name%3Anotepad.exe"
282 | }
283 | ]
284 | },
285 | "score": 50,
286 | "link": "http://www.myfeedserver/feed/report/notepad_proc",
287 |
288 | "id": "notepad_proc"
289 | },
290 | {
291 | "title": "Newly loaded modules",
292 | "timestamp": 1388570000,
293 | "iocs":
294 | {
295 | "query": [
296 | {
297 | "index_type": "modules",
298 | "search_query": "cb.urlver=1&q=is_executable_image%3Afalse"
299 | }
300 | ]
301 | },
302 | "score": 50,
303 |
304 | "link": "http://www.dxmtest1.org/02",
305 | "id": "new_mod_loads"
306 | }
307 | ]
308 | ```
309 | ## Validation criteria for "query" IOC reports
310 | Following conditions apply for "query" IOC reports
311 |
312 | * the "iocs" element can only contain one "query" element
313 | * only "events" and "modules" are valid values for "index_type" element
314 | * a report with a query CANNOT also have other IOCs
315 |
316 | The "search_query" syntax is particularly noteworthy. The following conditions apply for the "search_query" field:
317 |
318 | * the "search_query" syntax is described in CB Enterprise Server Query Overview documentation
319 | * the query itself should be prepended with a q=
320 | * the query should be percent-encoded. This can be accomplished in several ways, including:
321 | * by copying a query from the Carbon Black UI
322 | * by using a quoting library such as included with python in urllib.
323 | * by using the included percent_encode_query.py script
324 |
325 | As with all feeds, it is highly recommended to provide initial validation of the feed with the included validate_feed.py script. For any feeds that include query IOCs, it is recommended to run feed_query_validate.py in the cbapi github repo.
326 |
327 | ## Performance ramifications of "query" IOC reports
328 |
329 | Queries IOCs impose a much higher performance cost on the CB Response Server than md5, dns, and ip IOCs. Furthermore, the relative costs of queries can very signficantly. As a general rule, 'events' queries are more expensive than 'modules' queries. The use of wildcards, long paths, joined seearches, or multiple terms are also expensive.
330 |
331 | It is recommended that feed developers take care in constructing query IOCs and test against representative server prior to deploying in production.
332 |
333 | ## Feed Synchronization
334 |
335 | The CB Response server periodically synchronizes enabled feeds. There are two types of feed synchronization:
336 |
337 | * Incremental
338 | * Full
339 |
340 | Incremental synchronization updates any new reports and reports with updated timestamps. Deleted reports and those reports which have been changed, but without a change to the report timestamp, are not synchronized.
341 |
342 | Full synchronization accounts for all feed changes, even when the report timestamp is not changed or a report is deleted.
343 |
344 | Full synchronization occurs less frequently than incremental synchronization. It can be triggered manually via the web console or via the Carbon Black Client API. Alternatively, the following practices will result in all report changes being synchronized via incremental synchronization:
345 |
346 | * Update all report timestamps whenever there is a change to the report. The accuracy of the timestamp is less important than the fact that the timestamp increases.
347 | * For reports to be deleted, remove all IOCs from the report and update the timestamp rather than removing the report.
348 |
349 | ## Examples
350 |
351 | Several example scripts are included in the 'example' subdirectory. These example scripts illustrate using the Carbon Black cbfeeds API to generate Carbon Black feeds from a variety of data sources.
352 |
353 | | directory | name | description |
354 | | --------- | --------------- | ------------|
355 | | abuse_ch | abuse.ch | The Swiss security blog abuse.ch tracks C&C servers for Zeus, SpyEye and Palevo malware.|
356 | | isight | iSIGHT Partners | iSIGHT Partners customers can use their API key to generate a Carbon Black feed from iSIGHT Partners cyber threat intelligence.|
357 | | mdl | Malware Domain List | Malware Domain List is a non-commercial community project to track domains used by malware.|
358 | | raw | raw | Build a Carbon Black feed from a raw list of IOCs.|
359 | | tor | Tor | Provide a Carbon Black feed from a live list of Tor exit nodes provided by torproject.org|
360 |
--------------------------------------------------------------------------------
/test/test_03_cbfeedinfo.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 |
6 | import base64
7 | import os
8 |
9 | import cbfeeds
10 | from cbfeeds.feed import CbFeedInfo
11 | from common import TestCommon
12 |
13 | RESOURCES = os.path.abspath(os.path.join(os.path.dirname(__file__), "resources"))
14 |
15 |
16 | class TestCbFeedInfoMethods(TestCommon):
17 | """
18 | Validate the methods in the CbFeedInfo class.
19 | """
20 |
21 | def test_00a_init_unknown_key(self):
22 | """
23 | Verify that an initialized feedinfo object only retains known keys.
24 | """
25 | info, _ = self._load_feed_file()
26 | info['feedinfo']['foobar'] = "should vanish"
27 | cfi = CbFeedInfo(**info['feedinfo'])
28 | assert "foobar" not in cfi.data
29 |
30 | def test_00b_init_unknown_key_strict(self):
31 | """
32 | Verify that an initialized feedinfo object only retains known keys.
33 | """
34 | info, _ = self._load_feed_file()
35 | info['feedinfo']['foobar'] = "should vanish"
36 | try:
37 | CbFeedInfo(strict=True, **info['feedinfo'])
38 | self.fail("Did not get expected exception!")
39 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
40 | assert "Feedinfo includes unknown field: foobar" in err.args[0]
41 |
42 | def test_00c_validate_unknown_key_unstrict(self):
43 | """
44 | Verify that validate with strict=False will turn off strictness in addition to validation.
45 | """
46 | info, _ = self._load_feed_file()
47 | cfi = CbFeedInfo(strict=True, **info['feedinfo'])
48 | cfi._data['foobar'] = "should vanish"
49 | cfi.validate(strict=False)
50 | assert "foobar" not in cfi.data
51 | assert not cfi.strict
52 |
53 | def test_00d_validate_unknown_key_strict(self):
54 | """
55 | Verify that validate with strict=True will turn on strictness in addition to validation.
56 | """
57 | info, _ = self._load_feed_file()
58 | cfi = CbFeedInfo(**info['feedinfo'])
59 | cfi._data['foobar'] = "should vanish"
60 | try:
61 | cfi.validate(strict=True)
62 | self.fail("Did not get expected exception!")
63 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
64 | assert "Feedinfo includes unknown field: foobar" in err.args[0]
65 | assert cfi.strict
66 |
67 | def test_01a_update_unknown_key(self):
68 | """
69 | Verify that updated feedinfo data only retains known keys.
70 | """
71 | info, _ = self._load_feed_file()
72 | cfi = CbFeedInfo(**info['feedinfo'])
73 | info['feedinfo']['foobar'] = "should vanish"
74 | cfi.data = info['feedinfo']
75 | assert "foobar" not in cfi.data
76 |
77 | def test_01b_neg_update_unknown_key_strict(self):
78 | """
79 | Verify that updated feedinfo data only retains known keys.
80 | """
81 | info, _ = self._load_feed_file()
82 | cfi = CbFeedInfo(strict=True, **info['feedinfo'])
83 | info['feedinfo']['foobar'] = "should vanish"
84 | try:
85 | cfi.data = info['feedinfo']
86 | self.fail("Did not get expected exception!")
87 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
88 | assert "Feedinfo includes unknown field: foobar" in err.args[0]
89 |
90 | # ----- Icon checks when data is initialized/updated ------------------------------
91 |
92 | # NOTE: both icon and icon_small go through the same checks for validity, so these tests are not duplicated
93 |
94 | def test_02a_init_icon_path(self):
95 | """
96 | Verify that a path supplied for icon is read and the contents used for the icon.
97 | """
98 | info, _ = self._load_feed_file()
99 | info['feedinfo']['icon'] = os.path.join(RESOURCES, "taxii-logov2.png")
100 | cfi = CbFeedInfo(**info['feedinfo'])
101 | assert cfi.data['icon'] != info['feedinfo']['icon']
102 |
103 | def test_02b_neg_init_icon_path_invalid(self):
104 | """
105 | On initialization, detect an icon path that does not exist.
106 | """
107 | info, _ = self._load_feed_file()
108 | info['feedinfo']['icon'] = "./foobar.png"
109 | try:
110 | CbFeedInfo(validate=False, **info['feedinfo'])
111 | self.fail("Did not get expected exception!")
112 | except cbfeeds.exceptions.CbIconError as err:
113 | assert "Unknown error reading/encoding icon data" in err.args[0]
114 |
115 | def test_02c_neg_init_icon_path_unreadable(self):
116 | """
117 | On initialization, detect an icon path that cannot be read.
118 | """
119 | source = "./foobar.png"
120 | with open(source, 'w') as fp:
121 | fp.write("Text that won't be read")
122 | os.chmod(source, 0o000)
123 |
124 | info, _ = self._load_feed_file()
125 | info['feedinfo']['icon'] = source
126 | try:
127 | CbFeedInfo(validate=False, **info['feedinfo'])
128 | self.fail("Did not get expected exception!")
129 | except cbfeeds.exceptions.CbIconError as err:
130 | assert "Permission denied" in err.args[0]
131 | finally:
132 | os.chmod(source, 0o777)
133 | os.remove(source)
134 |
135 | def test_02d_neg_init_icon_data_invalid_bad_padding(self):
136 | """
137 | Verify that bad encoding for the icon field is detected.
138 | """
139 | info, _ = self._load_feed_file()
140 | info['feedinfo']['icon'] = info['feedinfo']['icon'][:-2]
141 | try:
142 | CbFeedInfo(**info['feedinfo'])
143 | self.fail("Did not get expected exception!")
144 | except cbfeeds.exceptions.CbIconError as err:
145 | assert "Unknown error reading/encoding icon data: Incorrect padding" in err.args[0]
146 |
147 | def test_02e_neg_init_icon_data_invalid_bad_encoding(self):
148 | """
149 | Verify that bad encoding for the icon field is detected.
150 | """
151 | info, _ = self._load_feed_file()
152 | info['feedinfo']['icon'] = info['feedinfo']['icon'] + "%$"
153 | try:
154 | CbFeedInfo(**info['feedinfo'])
155 | self.fail("Did not get expected exception!")
156 | except cbfeeds.exceptions.CbIconError as err:
157 | assert "Unknown error reading/encoding icon data: Non-base64 digit found" in err.args[0]
158 |
159 | def test_02f_neg_init_icon_not_str(self):
160 | """
161 | Verify that a non-string entry for icon is detected.
162 | """
163 | info, _ = self._load_feed_file()
164 | info['feedinfo']['icon'] = 12345
165 | try:
166 | CbFeedInfo(**info['feedinfo'])
167 | self.fail("Did not get expected exception!")
168 | except cbfeeds.exceptions.CbIconError as err:
169 | assert "`icon` field is not a string (path or base64 data)" in err.args[0]
170 |
171 | # ----- validate() method testing --------------------------------------------------
172 |
173 | def test_03a_neg_validate_display_name_missing(self):
174 | """
175 | Verify that missing "display_name" is detected.
176 | """
177 | info, _ = self._load_feed_file()
178 | del info['feedinfo']['display_name']
179 | try:
180 | CbFeedInfo(**info['feedinfo'])
181 | self.fail("Did not get expected exception!")
182 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
183 | assert "FeedInfo missing required field(s): display_name" in err.args[0]
184 |
185 | def test_03b_neg_validate_name_missing(self):
186 | """
187 | Verify that missing "name" is detected.
188 | """
189 | info, _ = self._load_feed_file()
190 | del info['feedinfo']['name']
191 | try:
192 | CbFeedInfo(**info['feedinfo'])
193 | self.fail("Did not get expected exception!")
194 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
195 | assert "FeedInfo missing required field(s): name" in err.args[0]
196 |
197 | def test_03c_neg_validate_provider_url_missing(self):
198 | """
199 | Verify that missing "provider_url" is detected.
200 | """
201 | info, _ = self._load_feed_file()
202 | del info['feedinfo']['provider_url']
203 | try:
204 | CbFeedInfo(**info['feedinfo'])
205 | self.fail("Did not get expected exception!")
206 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
207 | assert "FeedInfo missing required field(s): provider_url" in err.args[0]
208 |
209 | def test_03d_neg_validate_summary_missing(self):
210 | """
211 | Verify that missing "summary" is detected.
212 | """
213 | info, _ = self._load_feed_file()
214 | del info['feedinfo']['summary']
215 | try:
216 | CbFeedInfo(**info['feedinfo'])
217 | self.fail("Did not get expected exception!")
218 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
219 | assert "FeedInfo missing required field(s): summary" in err.args[0]
220 |
221 | def test_03e_neg_validate_tech_data_missing(self):
222 | """
223 | Verify that missing "tech_data" is detected.
224 | """
225 | info, _ = self._load_feed_file()
226 | del info['feedinfo']['tech_data']
227 | try:
228 | CbFeedInfo(**info['feedinfo'])
229 | self.fail("Did not get expected exception!")
230 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
231 | assert "FeedInfo missing required field(s): tech_data" in err.args[0]
232 |
233 | def test_04a_validate_optional_category_missing(self):
234 | """
235 | Verify that missing optional "category" is allowed.
236 | """
237 | info, _ = self._load_feed_file()
238 | del info['feedinfo']['category']
239 | cfi = CbFeedInfo(**info['feedinfo'])
240 | assert 'category' not in cfi.data
241 |
242 | def test_04b_validate_optional_icon_missing(self):
243 | """
244 | Verify that missing optional "icon" is allowed.
245 | """
246 | info, _ = self._load_feed_file()
247 | del info['feedinfo']['icon']
248 | cfi = CbFeedInfo(**info['feedinfo'])
249 | assert 'icon' not in cfi.data
250 |
251 | def test_04c_validate_optional_icon_small_missing(self):
252 | """
253 | Verify that missing optional "icon_small" is allowed.
254 | """
255 | info, _ = self._load_feed_file()
256 | del info['feedinfo']['icon_small']
257 | cfi = CbFeedInfo(**info['feedinfo'])
258 | assert 'icon_small' not in cfi.data
259 |
260 | def test_04d_validate_optional_provider_rating_missing(self):
261 | """
262 | Verify that missing optional "provider_rating" is allowed.
263 | """
264 | info, _ = self._load_feed_file()
265 | del info['feedinfo']['provider_rating']
266 | cfi = CbFeedInfo(**info['feedinfo'])
267 | assert 'provider_rating' not in cfi.data
268 |
269 | def test_04e_validate_optional_version_missing(self):
270 | """
271 | Verify that missing optional "version" is allowed.
272 | """
273 | info, _ = self._load_feed_file()
274 | del info['feedinfo']['version']
275 | cfi = CbFeedInfo(**info['feedinfo'])
276 | assert 'version' not in cfi.data
277 |
278 | # NOTE: both icon and icon_small go through the same checks for validity, so these tests are not duplicated
279 |
280 | def test_05a_neg_validate_icon_bad_data_not_image(self):
281 | """
282 | Verify that bad data (not jpg, png or gif) for the icon field is detected.
283 | """
284 | info, _ = self._load_feed_file()
285 | info['feedinfo']['icon'] = base64.b64encode(bytes("This is bad data!", "utf-8")).decode('ascii')
286 | try:
287 | CbFeedInfo(**info['feedinfo'])
288 | self.fail("Did not get expected exception!")
289 | except cbfeeds.exceptions.CbIconError as err:
290 | assert "Supplied data does not appear to be a usable image format" in err.args[0]
291 |
292 | def test_05b_validate_icon_not_str(self):
293 | """
294 | Verify that bad data not a string for icon field (should have been converted to base64 string)
295 | """
296 | info, _ = self._load_feed_file()
297 | cfi = CbFeedInfo(**info['feedinfo'])
298 | # noinspection PyTypeChecker
299 | cfi.data['icon'] = bytes(info['feedinfo']['icon'], 'ascii')
300 | try:
301 | cfi.validate()
302 | self.fail("Did not get expected exception!")
303 | except cbfeeds.exceptions.CbIconError as err:
304 | assert "Icon must be string of base64 data" in err.args[0]
305 |
306 | def test_06a_neg_validate_provider_rating_not_numeric(self):
307 | """
308 | Verify that provider_rating with a non-numeric value is detected.
309 | """
310 | info, _ = self._load_feed_file()
311 | info['feedinfo']['provider_rating'] = "foobar"
312 | try:
313 | CbFeedInfo(**info['feedinfo'])
314 | self.fail("Did not get expected exception!")
315 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
316 | assert "FeedInfo field `provider_rating` must be int or float" in err.args[0]
317 |
318 | def test_06b_neg_validate_version_not_numeric(self):
319 | """
320 | Verify that version with a non-numeric value is detected.
321 | """
322 | info, _ = self._load_feed_file()
323 | info['feedinfo']['version'] = "foobar"
324 | try:
325 | CbFeedInfo(**info['feedinfo'])
326 | self.fail("Did not get expected exception!")
327 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
328 | assert "FeedInfo field `version` must be int or float" in err.args[0]
329 |
330 | def test_07a_neg_validate_category_not_str_or_bytes(self):
331 | """
332 | Verify that category with a non-string value is detected.
333 | """
334 | info, _ = self._load_feed_file()
335 | info['feedinfo']['category'] = 4
336 | try:
337 | CbFeedInfo(**info['feedinfo'])
338 | self.fail("Did not get expected exception!")
339 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
340 | assert "FeedInfo field `category` must be str or bytes" in err.args[0]
341 |
342 | def test_07b_neg_validate_display_name_not_str_or_bytes(self):
343 | """
344 | Verify that display_name with a non-string value is detected.
345 | """
346 | info, _ = self._load_feed_file()
347 | info['feedinfo']['display_name'] = 4
348 | try:
349 | CbFeedInfo(**info['feedinfo'])
350 | self.fail("Did not get expected exception!")
351 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
352 | assert "FeedInfo field `display_name` must be str or bytes" in err.args[0]
353 |
354 | def test_07c_neg_validate_name_not_str_or_bytes(self):
355 | """
356 | Verify that name with a non-string value is detected.
357 | """
358 | info, _ = self._load_feed_file()
359 | info['feedinfo']['name'] = 4
360 | try:
361 | CbFeedInfo(**info['feedinfo'])
362 | self.fail("Did not get expected exception!")
363 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
364 | assert "FeedInfo field `name` must be str or bytes" in err.args[0]
365 |
366 | def test_07d_neg_validate_provider_url_not_str_or_bytes(self):
367 | """
368 | Verify that provider_url with a non-string value is detected.
369 | """
370 | info, _ = self._load_feed_file()
371 | info['feedinfo']['provider_url'] = 4
372 | try:
373 | CbFeedInfo(**info['feedinfo'])
374 | self.fail("Did not get expected exception!")
375 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
376 | assert "FeedInfo field `provider_url` must be str or bytes" in err.args[0]
377 |
378 | def test_07e_neg_validate_summary_not_str_or_bytes(self):
379 | """
380 | Verify that summary with a non-string value is detected.
381 | """
382 | info, _ = self._load_feed_file()
383 | info['feedinfo']['summary'] = 4
384 | try:
385 | CbFeedInfo(**info['feedinfo'])
386 | self.fail("Did not get expected exception!")
387 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
388 | assert "FeedInfo field `summary` must be str or bytes" in err.args[0]
389 |
390 | def test_07f_neg_validate_tech_data_not_str_or_bytes(self):
391 | """
392 | Verify that tech_data with a non-string value is detected.
393 | """
394 | info, _ = self._load_feed_file()
395 | info['feedinfo']['tech_data'] = 4
396 | try:
397 | CbFeedInfo(**info['feedinfo'])
398 | self.fail("Did not get expected exception!")
399 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
400 | assert "FeedInfo field `tech_data` must be str or bytes" in err.args[0]
401 |
402 | def test_08a_neg_validate_category_empty_string(self):
403 | """
404 | Verify that category with a empty string value is detected.
405 | """
406 | info, _ = self._load_feed_file()
407 | info['feedinfo']['category'] = ""
408 | try:
409 | CbFeedInfo(**info['feedinfo'])
410 | self.fail("Did not get expected exception!")
411 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
412 | assert "The 'category' field must not be an empty string" in err.args[0]
413 |
414 | def test_08b_neg_validate_display_name_empty_string(self):
415 | """
416 | Verify that display_name with a empty string value is detected.
417 | """
418 | info, _ = self._load_feed_file()
419 | info['feedinfo']['display_name'] = ""
420 | try:
421 | CbFeedInfo(**info['feedinfo'])
422 | self.fail("Did not get expected exception!")
423 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
424 | assert "The 'display_name' field must not be an empty string" in err.args[0]
425 |
426 | def test_08c_neg_validate_name_empty_string(self):
427 | """
428 | Verify that name with a empty string value is detected.
429 | """
430 | info, _ = self._load_feed_file()
431 | info['feedinfo']['name'] = ""
432 | try:
433 | CbFeedInfo(**info['feedinfo'])
434 | self.fail("Did not get expected exception!")
435 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
436 | assert "The 'name' field must not be an empty string" in err.args[0]
437 |
438 | def test_08d_neg_validate_summary_empty_string(self):
439 | """
440 | Verify that summary with a empty string value is detected.
441 | """
442 | info, _ = self._load_feed_file()
443 | info['feedinfo']['summary'] = ""
444 | try:
445 | CbFeedInfo(**info['feedinfo'])
446 | self.fail("Did not get expected exception!")
447 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
448 | assert "The 'summary' field must not be an empty string" in err.args[0]
449 |
450 | def test_08e_neg_validate_tech_data_empty_string(self):
451 | """
452 | Verify that tech_data with a empty string value is detected.
453 | """
454 | info, _ = self._load_feed_file()
455 | info['feedinfo']['tech_data'] = ""
456 | try:
457 | CbFeedInfo(**info['feedinfo'])
458 | self.fail("Did not get expected exception!")
459 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
460 | assert "The 'tech_data' field must not be an empty string" in err.args[0]
461 |
462 | def test_09_neg_validate_name_alphanumeric(self):
463 | """
464 | Verify that name with a non alphanumeric string value is detected.
465 | """
466 | info, _ = self._load_feed_file()
467 | info['feedinfo']['name'] = "invalid_name"
468 | try:
469 | CbFeedInfo(**info['feedinfo'])
470 | self.fail("Did not get expected exception!")
471 | except cbfeeds.exceptions.CbInvalidFeedInfo as err:
472 | assert "Feed name `invalid_name` may only contain a-z, A-Z, 0-9 and must have one character" in err.args[0]
473 |
--------------------------------------------------------------------------------
/cbfeeds/feed.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved.
3 | ################################################################################
4 |
5 | import base64
6 | import imghdr
7 | import ipaddress
8 | import json
9 | import logging
10 | import os
11 | import re
12 | import tempfile
13 | import time
14 | from typing import Any, Dict, Generator, List, Optional, Tuple, Union
15 |
16 | from cbfeeds import CbIconError, CbInvalidFeed, CbInvalidFeedInfo, CbInvalidReport
17 |
18 | logger = logging.getLogger(__name__)
19 |
20 |
21 | class CbFeedInfo(object):
22 | """
23 | Class to handle the data in the feedinfo section of a feed.
24 | """
25 |
26 | def __init__(self, validate: bool = True, strict: bool = False, **kwargs):
27 | """
28 | Initialize the class. Any keys that are not required or optional will be ignored.
29 |
30 | :param validate: If True, validate after initialization (default True)
31 | :param strict: If True, raise exception on unknown fields instead of dropping them
32 | :param kwargs: feedinfo data as a dict
33 | """
34 | # internal data
35 | self._data: Dict[str, Union[str, int, float]] = {}
36 |
37 | # these fields are required in every feed descriptor
38 | self.required = ["display_name", "name", "provider_url", "summary", "tech_data", ]
39 |
40 | # optional, my not be in every feed
41 | self.optional = ["category", "icon", "icon_small", "provider_rating", "version"]
42 |
43 | # these fields are expected to be numeric
44 | self.is_numeric = ["provider_rating", "version"]
45 |
46 | # these fields are strings that cannot be empty
47 | self.noemptystrings = ["name", "display_name", "summary", "tech_data", "category"]
48 |
49 | self.strict = strict
50 |
51 | self.data = kwargs
52 | if validate:
53 | self.validate()
54 |
55 | def __str__(self):
56 | """Return a descriptive string of the object."""
57 | return f"CbFeedInfo({self._data.get('name', 'unnamed')})"
58 |
59 | def __repr__(self):
60 | """Return the canonical string representation of the object."""
61 | return repr(self._data)
62 |
63 | @property
64 | def data(self) -> Dict[str, Union[str, int, float]]:
65 | """
66 | :return: the internally stored value
67 | """
68 | return self._data
69 |
70 | @data.setter
71 | def data(self, new_data: Dict[str, Union[str, int, float]]) -> None:
72 | """
73 | Update the internal data, ignoring unknown keys.
74 |
75 | :param new_data: new structure to update data with
76 | """
77 | self._data = new_data
78 |
79 | pruner = []
80 | for key in self._data.keys():
81 | if key not in self.required and key not in self.optional:
82 | if self.strict:
83 | raise CbInvalidFeedInfo(f"Feedinfo includes unknown field: {key}")
84 | else:
85 | pruner.append(key)
86 | for item in pruner:
87 | del self._data[item]
88 | logger.debug(f"Pruned unknown field `{item}` from feedinfo")
89 |
90 | def is_base64(data: str, strict: bool = False) -> Tuple[bool, Optional[str]]:
91 | try:
92 | if isinstance(data, str):
93 | use_data = data
94 | elif isinstance(data, bytes):
95 | use_data = data.decode('ascii')
96 | else:
97 | raise CbIconError("Data must be str or bytes in base64 encoding format")
98 | check = base64.b64encode(base64.b64decode(use_data, validate=strict)).decode('ascii') == use_data
99 | return check, None
100 | except Exception as err2:
101 | return False, f"{err2}"
102 |
103 | # NOTE: if they are present, the icon fields could just be paths to actual data (for convenience)
104 |
105 | for icon_field in ["icon", "icon_small"]:
106 | if icon_field in self._data and self._data[icon_field] is not None and self._data[icon_field] != "":
107 | if not isinstance(self._data[icon_field], (str, bytes)):
108 | raise CbIconError(f"`{icon_field}` field is not a string (path or base64 data)")
109 |
110 | # Check to see if it is base64 encodable data (no strict check)
111 | if is_base64(self._data[icon_field])[0]: # looks to be valid base64, as far as we can tell
112 | continue
113 |
114 | # Failed decoding check, check for path
115 | if os.path.exists(self._data[icon_field]):
116 | icon_path = self._data.pop(icon_field)
117 | try:
118 | with open(icon_path, "rb") as icon_file:
119 | self._data[icon_field] = base64.b64encode(icon_file.read()).decode('ascii')
120 | except Exception as err:
121 | raise CbIconError(f"Unknown error reading/encoding {icon_field} data: {err}")
122 |
123 | # not a path, may be data
124 | ok, err = is_base64(self._data[icon_field], strict=True)
125 | if not ok:
126 | raise CbIconError(f"Unknown error reading/encoding {icon_field} data: {err}")
127 |
128 | # --------------------------------------------------
129 |
130 | def validate(self, strict: bool = None) -> None:
131 | """
132 | Perform a set of checks to validate data before we export the feed.
133 |
134 | :param strict: If True or False, changes srict setting of class; True raises exception on non-CB fields, False
135 | prunes them
136 | :raises: CbInvalidFeed if there are validation problems
137 | """
138 | if strict is not None:
139 | if isinstance(strict, bool):
140 | self.strict = strict
141 | else:
142 | raise TypeError("`strict` parameter must be a boolean")
143 | self.data = self._data # re-asess
144 |
145 | # verify that all required fields are there
146 | if not all([x in self.data.keys() for x in self.required]):
147 | missing_fields = ", ".join(set(self.required).difference(set(self.data.keys())))
148 | raise CbInvalidFeedInfo("FeedInfo missing required field(s): %s" % missing_fields)
149 |
150 | # check to see if icon_field is a string or bytes base64 decoded
151 | for icon_field in ["icon", "icon_small"]:
152 | if icon_field in self.data:
153 | try:
154 | # If there's any bytes or unicode here, an exception will be thrown
155 | if not isinstance(self.data[icon_field], str):
156 | raise CbIconError("Icon must be string of base64 data")
157 |
158 | # check data for image information
159 | tf = tempfile.NamedTemporaryFile()
160 | tf.write(base64.b64decode(self.data[icon_field]))
161 | tf.flush()
162 | what = imghdr.what(tf.name)
163 | if what not in ['png', 'gif', 'jpeg']:
164 | raise CbIconError(f"Supplied data does not appear to be a usable image format (is {what})")
165 | except TypeError as err:
166 | raise CbIconError("Icon must either be path or base64 data. \
167 | Path does not exist and base64 decode failed with: %s" % err)
168 |
169 | # All fields in feedinfo must be strings unless otherwise stated
170 | for key in self.data.keys():
171 | if key in self.is_numeric:
172 | if not isinstance(self.data[key], (int, float)):
173 | raise CbInvalidFeedInfo(
174 | f"FeedInfo field `{key}` must be int or float, not type {type(self.data[key])}")
175 | else:
176 | if not isinstance(self.data[key], (str, bytes)):
177 | raise CbInvalidFeedInfo(
178 | f"FeedInfo field `{key}` must be str or bytes, not type {type(self.data[key])}")
179 |
180 | # certain fields, when present, must not be empty strings
181 | for key in self.data.keys():
182 | if key in self.noemptystrings and self.data[key] == "":
183 | raise CbInvalidFeedInfo(f"The '{key}' field must not be an empty string")
184 |
185 | # validate shortname of this field is just a-z and 0-9, with at least one character
186 | if not self.data["name"].isalnum():
187 | raise CbInvalidFeedInfo(f"Feed name `{self.data['name']}` may only contain a-z, A-Z, "
188 | "0-9 and must have one character")
189 |
190 |
191 | class CbReport(object):
192 | """
193 | Class to handle the data in the reports section of a feed.
194 | """
195 |
196 | def __init__(self, allow_negative_scores: bool = False, validate: bool = True, strict: bool = False, **kwargs):
197 | """
198 | Initialize the class.
199 |
200 | :param allow_negative_scores: If True, allow for negative scores
201 | :param validate: If True, validate
202 | :param strict: If True, raise exception on unknown fields instead of dropping them
203 | :param kwargs: actual report data
204 | """
205 | # negative scores introduced in CB 4.2
206 | # negative scores indicate a measure of "goodness" versus "badness"
207 | self.allow_negative_scores = allow_negative_scores
208 |
209 | # these fields are required in every report
210 | self.required = ["iocs", "timestamp", "link", "title", "id", "score"]
211 |
212 | # these fields must be of type string
213 | self.typestring = ["link", "title", "id", "description"]
214 |
215 | # these fields must be of type int
216 | self.typeint = ["timestamp", "score"]
217 |
218 | # these fields are optional
219 | self.optional = ["tags", "description"]
220 |
221 | # valid IOC types are "md5", "ipv4", "dns", "query"
222 | self.valid_ioc_types = ["md5", "sha256", "ipv4", "ipv6", "dns", "query", "ja3", "ja3s"]
223 |
224 | # valid index_type options for "query" IOC
225 | self.valid_query_ioc_types = ["events", "modules"]
226 |
227 | # valid query fields
228 | self.valid_query_fields = ["index_type", "search_query"]
229 |
230 | if "timestamp" not in kwargs:
231 | kwargs["timestamp"] = int(time.mktime(time.gmtime()))
232 |
233 | self.strict = strict
234 | self._rid = f"Report '" + f"{kwargs.get('id', '???')}" + "'" # for exception identification
235 |
236 | self.data = kwargs
237 | if validate:
238 | self.validate()
239 |
240 | def __str__(self):
241 | """Return a descriptive string of the object."""
242 | return "CbReport(%s)" % (self.data.get("title", self.data.get("id", '')))
243 |
244 | def __repr__(self):
245 | """Return the canonical string representation of the object."""
246 | return repr(self.data)
247 |
248 | @property
249 | def data(self) -> Dict[str, Union[str, int, Dict, List]]:
250 | """
251 | :return: the internally stored value
252 | """
253 | return self._data
254 |
255 | @data.setter
256 | def data(self, new_data: Dict[str, Union[str, int, Dict, List]]) -> None:
257 | """
258 | Update the internal data, ignoring unknown keys.
259 |
260 | :param new_data: new structure to update data with
261 | """
262 | self._data = new_data
263 |
264 | pruner = []
265 | for key, value in new_data.items():
266 | if key not in self.required and key not in self.optional:
267 | if self.strict:
268 | raise CbInvalidReport(f"Report includes unknown field: {key}")
269 | else:
270 | pruner.append(key)
271 |
272 | # handle query dict
273 | if key == "iocs":
274 | if isinstance(value, Dict):
275 | for key2, value2 in value.items():
276 | if key2 == "query" and isinstance(value2, Dict): # cope with bad data (for now)
277 | pruner2 = []
278 | for key3 in value2.keys():
279 | if key3 not in self.valid_query_fields:
280 | if self.strict:
281 | raise CbInvalidReport(f"{self._rid}, field 'ioc' query includes"
282 | f" unknown field: {key3}")
283 | else:
284 | pruner2.append(key3)
285 | for item in pruner2:
286 | del self._data[key][key2][item]
287 | logger.debug(f"Pruned unknown query ioc field `{item}` from report")
288 |
289 | for item in pruner:
290 | del self._data[item]
291 | logger.debug(f"Pruned unknown field `{item}` from feedinfo")
292 |
293 | # --------------------------------------------------
294 |
295 | def validate(self, strict: bool = None) -> None:
296 | """
297 | Perform a set of checks to validate report data.
298 |
299 | :param strict: If True or False, changes srict setting of class; True raises exception on non-CB fields, False
300 | prunes them
301 | :raises: CbInvalidReport if there are validation problems
302 | """
303 | if strict is not None:
304 | if isinstance(strict, bool):
305 | self.strict = strict
306 | else:
307 | raise TypeError("`strict` parameter must be a boolean")
308 | self.data = self._data # re-asess
309 |
310 | # validate we have all required keys
311 | if not all([x in self.data.keys() for x in self.required]):
312 | missing_fields = ", ".join(set(self.required).difference(set(self.data.keys())))
313 | raise CbInvalidReport(f"Report missing required field(s): {missing_fields}")
314 |
315 | # CBAPI-36
316 | # verify that all fields that should be strings are strings or bytes
317 | for key in self.typestring:
318 | if key in self.data.keys():
319 | if not isinstance(self.data[key], (str, bytes)):
320 | raise CbInvalidReport(f"{self._rid}, field '{key}', must be of type str or bytes, but seems to"
321 | f" be of type {type(self.data[key])}")
322 |
323 | # verify that all fields that should be ints are ints
324 | for key in self.typeint:
325 | if key in self.data.keys():
326 | if not isinstance(self.data[key], (int, float)):
327 | raise CbInvalidReport(f"{self._rid}, field '{key}', must be an int")
328 | else:
329 | self.data[key] = int(self.data[key]) # make sure it's int
330 |
331 | # validate that tags is a list of alphanumeric strings
332 | if "tags" in self.data.keys():
333 | if not isinstance(self.data["tags"], List):
334 | raise CbInvalidReport(f"{self._rid}, field 'tags', must be a list of str")
335 | for tag in self.data["tags"]:
336 | if not isinstance(tag, str):
337 | raise CbInvalidReport(f"{self._rid}, field 'tag', has entry not a string ({tag}, type {type(tag)})")
338 |
339 | if tag.lower() == "event_query": # the one exception
340 | pass
341 | else:
342 | if len(tag) > 32 or len(tag) < 1:
343 | raise CbInvalidReport(f"{self._rid}, field 'tag', has an entry that is not 1-32"
344 | f" characters in length ({tag})")
345 | if not str(tag).isalnum():
346 | raise CbInvalidReport(
347 | f"{self._rid}, field 'tag', has an entry that is not alphanumeric ({tag})")
348 |
349 | # validate score is integer between -100 (if so specified) or 0 and 100
350 | bottom = -100 if self.allow_negative_scores else 0
351 | if not self.allow_negative_scores and self.data["score"] < 0:
352 | raise CbInvalidReport(f"{self._rid}, field 'score' ({self.data['score']}), out of range {bottom} to 100")
353 |
354 | if self.data["score"] < -100 or self.data["score"] > 100:
355 | raise CbInvalidReport(f"{self._rid}, field 'score' ({self.data['score']}), out of range {bottom} to 100")
356 |
357 | # validate id of this report is just a-z and 0-9 and - and ., with at least one character
358 | if not re.match("^[a-zA-Z0-9-_.]+$", self.data["id"]):
359 | raise CbInvalidReport(
360 | f"{self._rid} (the id) is invalid and may only contain a-z, A-Z, 0-9, or one of [-_.]")
361 |
362 | # convenience variable for next tests
363 | iocs = self.data['iocs']
364 |
365 | # validate that there are at least one type of ioc present
366 | if not isinstance(iocs, Dict):
367 | raise CbInvalidReport(f"{self._rid}, field 'iocs', has bad format (must be dict)")
368 |
369 | if len(iocs.keys()) == 0:
370 | raise CbInvalidReport(f"{self._rid}, field 'iocs', has no entries")
371 |
372 | # validate there is at least one IOC for each report and each IOC entry has at least one entry
373 | for key, item in iocs.items():
374 | if key not in self.valid_ioc_types:
375 | raise CbInvalidReport(f"{self._rid}, field 'iocs', unknown ioc '{key}'")
376 |
377 | if key.lower() == "query":
378 | if not isinstance(item, Dict):
379 | raise CbInvalidReport(f"{self._rid}, field 'iocs', ioc '{key}', is not a dictionary")
380 | # NOTE: other query ioc testing below
381 | else:
382 | if not isinstance(item, List):
383 | raise CbInvalidReport(f"{self._rid}, field 'iocs', ioc '{key}', is not a list of str")
384 | if len(item) == 0:
385 | raise CbInvalidReport(f"{self._rid}, field 'iocs', ioc '{key}', must have at least 1 entry")
386 | for i in item:
387 | if not isinstance(i, str):
388 | raise CbInvalidReport(
389 | f"{self._rid}, field 'iocs', ioc '{key}', has non-str entry (({i}, type {type(i)})")
390 |
391 | # Let us check and make sure that for "query" ioc type does not contain other types of ioc
392 | query_ioc = "query" in iocs.keys()
393 | if query_ioc:
394 | extras = []
395 | for key in iocs.keys():
396 | if key not in ["query"]:
397 | extras.append(key)
398 | if len(extras) > 0:
399 | raise CbInvalidReport(f"{self._rid}, field 'iocs', has extra keys: {extras}")
400 |
401 | iocs_query = iocs["query"] # for cleaner code
402 |
403 | # validate that the index_type field exists
404 | if "index_type" not in iocs_query.keys():
405 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'query' section missing 'index_type'")
406 |
407 | # validate that the index_type is a valid value
408 | if not iocs_query.get("index_type", None) in self.valid_query_ioc_types:
409 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'index_type' is not a known type"
410 | f" ({iocs_query.get('index_type', None)})")
411 |
412 | # validate that the search_query field exists
413 | if "search_query" not in iocs_query.keys():
414 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'query' section missing 'search_query'")
415 |
416 | # validate that the search_query field is at least minimally valid
417 | # in particular, we are looking for a "q=" (process) or "cb.q.????=" (binary)
418 | # this is by no means a complete validation, but it does provide a protection
419 | # against leaving the actual query unqualified
420 | for item in iocs_query["search_query"]:
421 | if "q=" not in item and "cb.q." not in item:
422 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'query' has bad 'search_query': {item}")
423 |
424 | for kvpair in item.split('&'):
425 | if len(kvpair.split('=')) != 2:
426 | continue # ignore simple items
427 | qparts = kvpair.split('=')
428 | if qparts[0] == 'q' or qparts[0].startswith("cb.q."):
429 | self._is_valid_query(qparts[1])
430 |
431 | # validate md5 hashes
432 | for md5 in iocs.get("md5", []):
433 | x = re.findall(r"^([a-fA-F\d]{32})$", md5)
434 | if len(x) == 0:
435 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'mp5' has invalid hash: {md5}")
436 |
437 | # validate ja3 hashes
438 | for ja3 in iocs.get("ja3", []):
439 | x = re.findall(r"^([a-fA-F\d]{32})$", ja3)
440 | if len(x) == 0:
441 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'ja3' has invalid hash: {ja3}")
442 |
443 | # validate ja3s hashes
444 | for ja3s in iocs.get("ja3s", []):
445 | x = re.findall(r"^([a-fA-F\d]{32})$", ja3s)
446 | if len(x) == 0:
447 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'ja3s' has invalid hash: {ja3s}")
448 |
449 | # validate sha256 hashes
450 | for sha256 in iocs.get("sha256", []):
451 | x = re.findall(r"^([a-fA-F\d]{64})$", sha256)
452 | if len(x) == 0:
453 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'sha256' has invalid hash: {sha256}")
454 |
455 | # validate ipv4
456 | for ipv4 in iocs.get("ipv4", []):
457 | try:
458 | ipaddress.ip_address(ipv4)
459 | except ValueError as err:
460 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'ipv4' value of {err}")
461 |
462 | # validate ipv6
463 | for ipv6 in iocs.get("ipv6", []):
464 | try:
465 | ipaddress.ip_address(ipv6)
466 | except ValueError as err:
467 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'ipv6' value of {err}")
468 |
469 | # validate domains
470 | # NOTE: as per spec: https://datatracker.ietf.org/doc/rfc1035/?include_text=1
471 | for dns in iocs.get("dns", []):
472 | if len(dns.strip()) == 0:
473 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'dns' is empty")
474 | if len(dns.strip()) > 253:
475 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'dns' exceeds maximum size of 253 characters")
476 |
477 | # break into octets
478 | parts = dns.split('.')
479 | if len(parts) == 1:
480 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'dns' value has too few octets ({dns})")
481 |
482 | # trailing . is valid, as per http://www.dns-sd.org/TrailingDotsInDomainNames.html
483 | if len(parts[-1]) == 0:
484 | parts = parts[:-2] # clip it
485 |
486 | # spec limits dns to 127 octets, will likely never hit this due to overall length checks
487 | if len(parts) > 127:
488 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'dns' value has too many octets ({dns})")
489 |
490 | # parts defined as per https://datatracker.ietf.org/doc/rfc1035/?include_text=1, section 2.3.1
491 | # However, examples draw upon sources that provide domains that seem to break this, so we will
492 | # loosen the strict validation.
493 | for part in parts:
494 | x = re.findall(r'^[a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9]?$', part)
495 | if len(x) == 0:
496 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'dns' is invalid : {dns}")
497 |
498 | def _is_valid_query(self, q: str) -> None:
499 | """
500 | Make a determination as to if this is a valid query.
501 |
502 | :param q: query entry
503 | """
504 | # the query itself must be percent-encoded
505 | # verify there are only non-reserved characters present
506 | # no logic to detect unescaped '%' characters
507 | for c in q:
508 | if c not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.~%*()":
509 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'query' has unescaped non-reserved character "
510 | f"'{c}' found in query; use percent-encoding")
511 |
512 |
513 | # --------------------------------------------------------------------------------
514 |
515 | class CbJSONEncoder(json.JSONEncoder):
516 | """Custom JSON encoder for CbFeed."""
517 |
518 | def default(self, o):
519 | return o.dump()
520 |
521 |
522 | class CbFeed(object):
523 | """
524 | Class to hold feed information.
525 | """
526 |
527 | def __init__(self, feedinfo: Union[CbFeedInfo, Dict[str, Union[str, int, float]]],
528 | reports: List[Union[CbReport, Dict[str, Union[str, int, Dict, List]]]]):
529 | """
530 | Initialize the class.
531 |
532 | :param feedinfo: feedinfo portion of a feed, as dict or CbFeedInfo object
533 | :param reports: reports portion of a feed, as list of dict or list of CbReport objects
534 | """
535 | # basic sanity check!
536 | if not isinstance(feedinfo, (Dict, CbFeedInfo)):
537 | raise CbInvalidFeed("The supplied `feedinfo` parameter does not appear to be a valid dictionary"
538 | f" or CbFeedInfo object (is {type(feedinfo)})")
539 | if not isinstance(reports, List):
540 | raise CbInvalidFeed(f"The supplied `reports` parameter does not appear to be a valid list"
541 | f" (is {type(reports)})")
542 | else:
543 | for item in reports:
544 | if not isinstance(item, (Dict, CbReport)):
545 | raise CbInvalidFeed(f"The `reports` parameter must be a list of dictionaries"
546 | f" or CbReport objects (saw {type(item)})")
547 |
548 | use_feed = feedinfo if isinstance(feedinfo, Dict) else feedinfo.data
549 | use_rep = [rep if isinstance(rep, Dict) else rep.data for rep in reports]
550 |
551 | # save raw data internally
552 | self.data = {'feedinfo': use_feed,
553 | 'reports': use_rep}
554 |
555 | def __repr__(self):
556 | """Return the canonical string representation of the object."""
557 | return repr(self.data)
558 |
559 | def __str__(self):
560 | """Return a descriptive string of the object."""
561 | return f"CbFeed({self.data.get('feedinfo', 'unknown')})"
562 |
563 | # --------------------------------------------------
564 |
565 | def validate(self, serialized_data: str = None, strict: bool = False) -> None:
566 | """
567 | Validates the feed information.
568 |
569 | :param serialized_data: serialized data for the feed (JSON string)
570 | :param strict: If True, throw exception for non-CB fields, otherwise just prune them
571 | """
572 | if not serialized_data:
573 | # this should be identity, but just to be safe.
574 | serialized_data = self.dump(validate=False)
575 |
576 | data = json.loads(serialized_data)
577 |
578 | if "feedinfo" not in data:
579 | raise CbInvalidFeedInfo("Feed missing 'feedinfo' data")
580 |
581 | if 'reports' not in data:
582 | raise CbInvalidFeedInfo("Feed missing 'reports' structure")
583 |
584 | dispname = data['feedinfo'].get('display_name', "???")
585 |
586 | # validate the feed info
587 | try:
588 | CbFeedInfo(strict=strict, validate=True, **data["feedinfo"])
589 | except Exception as err:
590 | raise CbInvalidFeedInfo(f"Problem with feed `{dispname}`: {err}")
591 |
592 | # validate each report individually
593 | for rep in data["reports"]:
594 | try:
595 | CbReport(strict=strict, validate=True, **rep)
596 | except Exception as err:
597 | raise CbInvalidReport(f"Problem with feed `{dispname}`, report `{rep['id']}`: {err}")
598 |
599 | # validate the reports as a whole
600 | self.validate_report_list(data["reports"])
601 |
602 | def dump(self, validate: bool = True, sort_keys: bool = True) -> str:
603 | """
604 | Dumps the feed data.
605 |
606 | :param validate: is set, validates feed before dumping
607 | :param sort_keys: If True, pretty it up by storing the keys
608 | :return: json string of feed data
609 | """
610 | if validate:
611 | self.validate()
612 | return json.dumps(self.data, cls=CbJSONEncoder, indent=2, sort_keys=sort_keys)
613 |
614 | def iter_iocs(self) -> Generator:
615 | """
616 | Yields all iocs in the feed.
617 |
618 | :return: iterator of all iocs
619 | """
620 | data = json.loads(self.dump(validate=False))
621 | for report in data["reports"]:
622 | for md5 in report.get("iocs", {}).get("md5", []):
623 | yield {"type": "md5", "ioc": md5, "report_id": report.get("id", "")}
624 | for sha256 in report.get("iocs", {}).get("sha256", []):
625 | yield {"type": "sha256", "ioc": sha256, "report_id": report.get("id", "")}
626 | for ipv4 in report.get("iocs", {}).get("ipv4", []):
627 | yield {"type": "ipv4", "ioc": ipv4, "report_id": report.get("id", "")}
628 | for ipv6 in report.get("iocs", {}).get("ipv6", []):
629 | yield {"type": "ipv6", "ioc": ipv6, "report_id": report.get("id", "")}
630 | for domain in report.get("iocs", {}).get("dns", []):
631 | yield {"type": "dns", "ioc": domain, "report_id": report.get("id", "")}
632 | for ja3 in report.get("iocs", {}).get("ja3", []):
633 | yield {"type": "ja3", "ioc": ja3, "report_id": report.get("id", "")}
634 | for ja3s in report.get("iocs", {}).get("ja3s", []):
635 | yield {"type": "ja3s", "ioc": ja3s, "report_id": report.get("id", "")}
636 | for query in report.get("iocs", {}).get("query", {}).get("search_query", {}):
637 | yield {"type": "query", "ioc": query, "report_id": report.get("id", "")}
638 |
639 | @staticmethod
640 | def validate_report_list(reports: List[Dict[str, Any]]) -> None:
641 | """
642 | Validates reports as a set, as compared to each report as a standalone entity.
643 |
644 | :param reports: list of reports
645 | """
646 |
647 | reportids = set()
648 |
649 | # Verify that no two reports have the same feed id -- see CBAPI-17
650 | for report in reports:
651 | if report['id'] in reportids:
652 | raise CbInvalidFeedInfo(f"Duplicate report id '{report['id']}'")
653 | reportids.add(report['id'])
654 |
--------------------------------------------------------------------------------