├── example ├── __init__.py ├── abuse_ch │ ├── __init__.py │ ├── abuse.ch.jpg │ ├── abuse.ch.small.jpg │ └── generate_abusech_feed.py ├── isight │ ├── __init__.py │ ├── isight.png │ ├── isight.small.jpg │ ├── isight.config.template │ ├── isight_api.py │ ├── isight_config.py │ ├── isight_helpers.py │ ├── importer.py │ └── generate_isight_feed.py ├── mdl │ ├── mdl.png │ ├── mdl.small.jpg │ ├── __init__.py │ └── generate_mdl_feed.py ├── tor │ ├── tor.png │ ├── tor.small.jpg │ ├── __init__.py │ └── generate_tor_feed.py ├── stix │ ├── images │ │ └── stix.gif │ ├── sample_data │ │ ├── indicator-for-c2-ip-address.xml │ │ ├── STIX_Domain_Watchlist.xml │ │ ├── STIX_URL_Watchlist.xml │ │ ├── STIX_IP_Watchlist.xml │ │ ├── STIX_FileHash_Watchlist.xml.badmd5s │ │ ├── command-and-control-ip-range.xml │ │ └── STIX_Phishing_Indicator.xml │ ├── README.md │ └── stix_to_feed.py ├── README.md └── raw │ └── generate_feed_from_raw_iocs.py ├── test ├── resources │ ├── taxii-logov2.png │ └── template.json ├── __init__.py ├── common.py ├── test_05_validate_feed.py ├── test_01_common_integrity.py ├── test_02_cbfeed.py └── test_03_cbfeedinfo.py ├── fslds-build.md ├── .travis.yml ├── CHANGELOG.md ├── requirements.txt ├── requirements.in ├── cbfeeds ├── __init__.py ├── exceptions.py └── feed.py ├── .gitignore ├── test.py ├── setup.py ├── LICENSE.md ├── percent_encode_query.py ├── validate_feed.py └── README.md /example/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /example/abuse_ch/__init__.py: -------------------------------------------------------------------------------- 1 | from .generate_abusech_feed import create 2 | -------------------------------------------------------------------------------- /example/isight/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'cb' 2 | from .create_feed import create 3 | -------------------------------------------------------------------------------- /example/mdl/mdl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/mdl/mdl.png -------------------------------------------------------------------------------- /example/tor/tor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/tor/tor.png -------------------------------------------------------------------------------- /example/isight/isight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/isight/isight.png -------------------------------------------------------------------------------- /example/mdl/mdl.small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/mdl/mdl.small.jpg -------------------------------------------------------------------------------- /example/tor/tor.small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/tor/tor.small.jpg -------------------------------------------------------------------------------- /example/abuse_ch/abuse.ch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/abuse_ch/abuse.ch.jpg -------------------------------------------------------------------------------- /example/stix/images/stix.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/stix/images/stix.gif -------------------------------------------------------------------------------- /example/isight/isight.small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/isight/isight.small.jpg -------------------------------------------------------------------------------- /test/resources/taxii-logov2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/test/resources/taxii-logov2.png -------------------------------------------------------------------------------- /example/abuse_ch/abuse.ch.small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carbonblack/cbfeeds/HEAD/example/abuse_ch/abuse.ch.small.jpg -------------------------------------------------------------------------------- /fslds-build.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/fslds/cbfeeds.svg?branch=python3-constr-validation)](https://travis-ci.org/fslds/cbfeeds) -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | install: 5 | - "pip install -r requirements.txt" 6 | - "python setup.py install" 7 | script: "python test.py" 8 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | -------------------------------------------------------------------------------- /example/mdl/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | from .generate_mdl_feed import create 6 | -------------------------------------------------------------------------------- /example/tor/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | from .generate_tor_feed import create 6 | -------------------------------------------------------------------------------- /example/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | This folder contains examples of using CbFeeds with various external sources. 3 | 4 | > *NOTE*: At present, for the current project scope, only the `mdl` and `tor` examples have been converted to python 3 due to their use in `test.py` (`abuse_ch` example removed from testing since feed data is no longer returned as of 2019.) 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | CHANGELOG.md 2 | # Carbon Black EDR Alliance Feed Library Changelog 3 | 4 | ## v1.0.0 5 | #### Features 6 | * Converted to python3 7 | * Added handling of sha256, ja3, ja3s and query reports 8 | * Added unit tests 9 | 10 | > _NOTE: Not all examples not converted at this time!_ 11 | 12 | ## v0.8.0 13 | #### Features 14 | * Initial Release 15 | 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile 3 | # To update, run: 4 | # 5 | # pip-compile requirements.in 6 | # 7 | 8 | certifi==2020.6.20 # via requests 9 | chardet==3.0.4 # via requests 10 | idna==2.10 # via requests 11 | requests==2.24.0 # via -r requirements.in 12 | urllib3==1.25.11 # via requests 13 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | # cbfeeds requirements file 2 | # ~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | # If changes to this file are made, use `pip-compile -U -q requirements.in` at the 4 | # command line from within the repository root folder. 5 | # 6 | # NOTE: When compiled, you need to remove the line: 7 | # --index-url https://artifactory-pub.bit9.local/artifactory/api/pypi/pypi-virtual/simple 8 | ################################################################################ 9 | 10 | requests>=1.2.3 11 | -------------------------------------------------------------------------------- /cbfeeds/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | __all__ = ["CbFeed", "CbFeedInfo", "CbReport", "CbIconError", "CbInvalidFeed", "CbInvalidFeedInfo", "CbInvalidReport", 6 | "CbException"] 7 | 8 | from .exceptions import CbException, CbIconError, CbInvalidFeed, CbInvalidFeedInfo, CbInvalidReport 9 | from .feed import CbFeed, CbFeedInfo, CbReport 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | 37 | # PyCharm 38 | .idea/ 39 | 40 | 41 | # Test Feeds 42 | *.feed 43 | .DS_Store 44 | -------------------------------------------------------------------------------- /example/isight/isight.config.template: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Carbon Black iSIGHT Partners Feed Genreator Config Template 3 | ############################################################################### 4 | 5 | # These credentials come from iSight 6 | # 7 | iSightRemoteImportUsername= 8 | iSightRemoteImportPassword= 9 | 10 | iSightRemoteImportPublicKey= 11 | iSightRemoteImportPrivateKey= 12 | 13 | # URL of iSight REST API endpoint 14 | # Effective 15-Oct-2014, mysight-api.isightpartners.com is deprecated in favor of api.isightpartners.com 15 | # 16 | iSightRemoteImportUrl=https://api.isightpartners.com/ 17 | 18 | # Number of days (relative to today) to back-pull reports from 19 | # 20 | iSightRemoteImportDaysBack=80 21 | -------------------------------------------------------------------------------- /cbfeeds/exceptions.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | __all__ = ["CbException", "CbIconError", "CbInvalidFeed", "CbInvalidFeedInfo", "CbInvalidReport"] 6 | 7 | 8 | # CBFeeds Exception set 9 | class CbException(Exception): 10 | """CBFeeds base exception class""" 11 | pass 12 | 13 | 14 | class CbIconError(CbException): 15 | """Exception for icon related issues""" 16 | pass 17 | 18 | 19 | class CbInvalidFeed(CbException): 20 | """Exception for problems with overall feed structure""" 21 | pass 22 | 23 | 24 | class CbInvalidFeedInfo(CbException): 25 | """Exception for problems with feedinfo information""" 26 | pass 27 | 28 | 29 | class CbInvalidReport(CbException): 30 | """Exception for problems with report information""" 31 | pass 32 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | import logging 6 | import sys 7 | import unittest 8 | 9 | 10 | class TestCbFeedExamples(unittest.TestCase): 11 | # NOTE: zeus tracker returns: "# ZeuS Tracker has been discontinued on Jul 8th, 2019", so 12 | # test_abusech has been removed. 13 | 14 | def test_mdl(self): 15 | import example.mdl as mdl 16 | mdl.generate_mdl_feed.DAYS_BACK = None # get all data 17 | mdl.create() 18 | 19 | def test_tor(self): 20 | import example.tor as tor 21 | tor.create() 22 | 23 | 24 | if __name__ == '__main__': 25 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='[%(filename)s:%(lineno)d] %(message)s') 26 | 27 | # run the unit tests 28 | # 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | python-cbfeeds 4 | """ 5 | 6 | from setuptools import setup 7 | 8 | setup( 9 | name='cbfeeds', 10 | version='1.0.0', 11 | url='http://github.com/carbonblack/cbfeeds', 12 | license='MIT', 13 | author='Carbon Black', 14 | author_email='dev-support@carbonblack.com', 15 | description='Carbon Black Alliance Feeds', 16 | long_description=__doc__, 17 | packages=['cbfeeds', ], 18 | include_package_data=True, 19 | #package_dir = {'': 'src'}, 20 | zip_safe=False, 21 | platforms='any', 22 | classifiers=[ 23 | 'Environment :: Web Environment', 24 | 'Intended Audience :: Developers', 25 | 'Operating System :: OS Independent', 26 | 'Programming Language :: Python', 27 | 'Topic :: Software Development :: Libraries :: Python Modules' 28 | ], 29 | scripts=['validate_feed.py'], 30 | requires=['requests'] 31 | 32 | ) 33 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | ``` 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2016-2018 Carbon Black 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of 9 | this software and associated documentation files (the "Software"), to deal in 10 | the Software without restriction, including without limitation the rights to 11 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 12 | the Software, and to permit persons to whom the Software is furnished to do so, 13 | subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 20 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 21 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 22 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | ``` 25 | -------------------------------------------------------------------------------- /test/common.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | 6 | import json 7 | import os 8 | import unittest 9 | from typing import Any, Dict, Tuple 10 | 11 | import cbfeeds 12 | 13 | HOME = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) 14 | RESOURCE_PATH_PREFIX = os.path.join(HOME, 'test', 'resources') 15 | 16 | 17 | class TestCommon(unittest.TestCase): 18 | """ 19 | Common class for all tests. 20 | """ 21 | 22 | # standard test feed file 23 | _test_feed = "./testfeed.json" 24 | 25 | def tearDown(self): 26 | self._clear_test_feed() 27 | 28 | def _clear_test_feed(self) -> None: 29 | """ 30 | Remove any local test feeds, if they exist. 31 | """ 32 | if os.path.exists(self._test_feed): 33 | os.chmod(self._test_feed, mode=0o777) 34 | os.remove(self._test_feed) 35 | 36 | def _load_feed_file(self, source: str = None) -> Tuple[Dict[str, Any], cbfeeds.CbFeed]: 37 | """ 38 | Copy template feed file into memory, mangle as needed, save locally for testing. 39 | 40 | :param source: Alternate template file to read 41 | :return: Tuple of json object (to optionally mangle) and feed object 42 | """ 43 | use_source = "template.json" if source is None else source 44 | with open(os.path.join(RESOURCE_PATH_PREFIX, use_source), 'r') as fp: 45 | json_obj = json.load(fp) 46 | self._save_test_feed(json_obj) 47 | 48 | feed = cbfeeds.CbFeed(json_obj["feedinfo"], json_obj["reports"]) 49 | return json_obj, feed 50 | 51 | def _save_test_feed(self, json_obj: Dict[str, Any]) -> cbfeeds.CbFeed: 52 | """ 53 | Save json object (potentially mangled) to test feed file. 54 | 55 | :param json_obj: source json 56 | :return: potentially mangled feed object 57 | """ 58 | with open(self._test_feed, 'w') as fp: 59 | json.dump(json_obj, fp, indent=4, sort_keys=True) 60 | feed = cbfeeds.CbFeed(json_obj["feedinfo"], json_obj["reports"]) 61 | return feed 62 | -------------------------------------------------------------------------------- /example/isight/isight_api.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import hmac 3 | import logging 4 | import requests 5 | 6 | _logger = logging.getLogger(__name__) 7 | 8 | class ISightAPI(object): 9 | """ 10 | Helper class for talking to iSIGHT Partners remote API. 11 | """ 12 | @staticmethod 13 | def from_config(config): 14 | return ISightAPI( config.iSightRemoteImportUrl, 15 | config.iSightRemoteImportUsername, 16 | config.iSightRemoteImportPassword, 17 | config.iSightRemoteImportPublicKey, 18 | config.iSightRemoteImportPrivateKey) 19 | 20 | def __init__(self, base_url, username, password, public_key, private_key): 21 | self.base_url = base_url 22 | self.username = username 23 | self.password = password 24 | self.public_key = public_key 25 | self.private_key = private_key 26 | 27 | query = None 28 | hashed_query = hmac.new(private_key, query, hashlib.sha256).hexdigest() 29 | 30 | self.headers = { 31 | 'X-Auth' : public_key, 32 | 'X-Auth-Hash' : hashed_query, 33 | 'Authorization' : self.__encode_user_creds(username, password) 34 | } 35 | 36 | def __encode_user_creds(self, user, passw): 37 | """ 38 | Private function to setup some Basic Auth stuff... 39 | """ 40 | return "Basic " + (user + ":" + passw).encode("base64").rstrip() 41 | 42 | def get_i_and_w(self, days_back_to_retrieve): 43 | """ 44 | Retrieve a CSV file of data of all reports from (now-days_back_to_retrieve) until now. 45 | """ 46 | params = {'daysBack': days_back_to_retrieve, 'days': days_back_to_retrieve} 47 | url = "%sreport/view/i_and_w" % (self.base_url) 48 | 49 | _logger.info("Connecting to remote API '%s' using params: %s" % (url, params)) 50 | 51 | resp = requests.get(url, params=params, headers=self.headers) 52 | resp.raise_for_status() 53 | return resp.content 54 | 55 | def get_report(self, report_id, format='xml'): 56 | """ 57 | Download a report in a particular format. 58 | """ 59 | url = "%sreport/view/docid/%s" % (self.base_url, report_id) 60 | params = {'format':format} 61 | resp = requests.get(url, params=params, headers=self.headers) 62 | resp.raise_for_status() 63 | return resp.content 64 | -------------------------------------------------------------------------------- /test/test_05_validate_feed.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | 6 | import os 7 | 8 | import cbfeeds 9 | import validate_feed 10 | from common import TestCommon 11 | 12 | 13 | class TestValidateFeed(TestCommon): 14 | """ 15 | Verify that the validate_feed utility methods work as expected. 16 | """ 17 | 18 | def test_01a_neg_file_missing(self): 19 | """ 20 | Verify that a non-existant file is trapped 21 | """ 22 | try: 23 | validate_feed.validate_file("./nonesuch.json") 24 | self.fail("Did not get expected exception!") 25 | except cbfeeds.CbException: 26 | pass 27 | 28 | def test_01b_neg_file_unreadable(self): 29 | """ 30 | Verify that a file that cannot be read is trapped. 31 | """ 32 | info, feed = self._load_feed_file() 33 | os.chmod(self._test_feed, mode=0o000) 34 | 35 | try: 36 | validate_feed.validate_file(self._test_feed) 37 | self.fail("Did not get expected exception!") 38 | except cbfeeds.CbException: 39 | pass 40 | 41 | def test_02_neg_not_json(self): 42 | """ 43 | Verify that non-json file contents are trapped 44 | """ 45 | try: 46 | validate_feed.validate_json("This is not JSON!") 47 | self.fail("Did not get expected exception!") 48 | except cbfeeds.CbException: 49 | pass 50 | 51 | def test_03a_neg_missing_feedinfo(self): 52 | """ 53 | Verify that feed information missing a feedinfo entry is detected. 54 | """ 55 | info, _ = self._load_feed_file() 56 | del info['feedinfo'] 57 | try: 58 | validate_feed.validate_feed(info) 59 | self.fail("Did not get expected exception!") 60 | except cbfeeds.CbException as err: 61 | assert "No 'feedinfo' element found!" in f"{err}" 62 | 63 | def test_03b_neg_missing_reports(self): 64 | """ 65 | Verify that feed information missing a reports entry is detected. 66 | """ 67 | info, _ = self._load_feed_file() 68 | del info['reports'] 69 | try: 70 | validate_feed.validate_feed(info) 71 | self.fail("Did not get expected exception!") 72 | except cbfeeds.CbException as err: 73 | assert "No 'reports' element found!" in f"{err}" 74 | -------------------------------------------------------------------------------- /example/isight/isight_config.py: -------------------------------------------------------------------------------- 1 | 2 | class ISightConfig(object): 3 | """ 4 | Configuration for iSight Connector. 5 | 6 | This class populates fields by reading a config file. 7 | """ 8 | def __init__(self, config_filepath): 9 | 10 | self.keys = [ 11 | "source_path", 12 | "iSightRemoteImportUsername", 13 | "iSightRemoteImportPassword", 14 | "iSightRemoteImportPublicKey", 15 | "iSightRemoteImportPrivateKey", 16 | "iSightRemoteImportUrl", 17 | "iSightRemoteImportDaysBack", 18 | "iSightLocalRawDataFilename", 19 | ] 20 | 21 | self.source_path = config_filepath 22 | 23 | # HARDCODED DEFAULTS 24 | self.iSightRemoteImportUsername = None 25 | self.iSightRemoteImportPassword = None 26 | self.iSightRemoteImportPublicKey = None 27 | self.iSightRemoteImportPrivateKey = None 28 | self.iSightRemoteImportUrl = "https://mysight-api.isightpartners.com/" 29 | self.iSightRemoteImportDaysBack=180 30 | self.iSightLocalRawDataFilename = None 31 | 32 | with open(config_filepath, "r") as cfg: 33 | lineno = 0 34 | for line in cfg: 35 | try: 36 | lineno += 1 37 | 38 | line = line.strip() 39 | if not line or line[0] == "#": 40 | continue 41 | 42 | name, val = line.split("=", 1) 43 | 44 | # TODO validate name is within spec 45 | # -- this will require careful re-evalutaion of config params as we are 46 | # now relying on properties being listed in .conf file even 47 | # though some of those properties were never listed in this class 48 | 49 | # if we are reading a new value for an existing attribute, lets make 50 | # sure we preserve the type 51 | try: 52 | existing_attr = getattr(self, name) 53 | if existing_attr is not None: 54 | val = type(existing_attr)(val) 55 | except AttributeError: 56 | pass 57 | 58 | setattr(self, name, val) 59 | 60 | except Exception as e: 61 | pass 62 | 63 | def as_dict(self): 64 | """ 65 | """ 66 | res = {} 67 | for key in self.keys: 68 | res[key] = getattr(self, key) 69 | 70 | return res 71 | -------------------------------------------------------------------------------- /percent_encode_query.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 4 | ################################################################################ 5 | 6 | import logging 7 | import optparse 8 | import sys 9 | from urllib import parse 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def build_cli_parser() -> optparse.OptionParser: 15 | """ 16 | Generate OptionParser to handle command line switches 17 | 18 | :return: optparse.OptionParser 19 | """ 20 | usage = "usage: %prog [options]" 21 | desc = "Encode, using percent encoding, a Carbon Black query" 22 | 23 | cmd_parser = optparse.OptionParser(usage=usage, description=desc) 24 | 25 | cmd_parser.add_option("-q", "--query", action="store", type="string", dest="query", 26 | help="Query to encode") 27 | cmd_parser.add_option("-n", "--no-prepend", action="store_false", default=True, dest="prepend", 28 | help=('Do NOT prepend "q=" and "cb.urlver=1" when not found ' 29 | 'in the query specified with "--query"')) 30 | return cmd_parser 31 | 32 | 33 | def is_query_complete(query: str) -> bool: 34 | """ 35 | Returns indication as to if query includes a q=, cb.q=, or cb.fq 36 | 37 | :param query: the query string to be checked 38 | :return: True if this looks like a CBR query 39 | """ 40 | # check for raw query captured from the browser 41 | if query.startswith("cb.urlver="): 42 | return True 43 | 44 | # check for simpler versions 45 | if query.startswith("q=") or query.startswith("cb.q=") or query.startswith("cb.fq="): 46 | return True 47 | return False 48 | 49 | 50 | if __name__ == "__main__": 51 | parser = build_cli_parser() 52 | options, args = parser.parse_args(sys.argv) 53 | 54 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') 55 | 56 | if not options.query: 57 | logger.error("-> Must specify a query to encode; use the -q switch or --help for usage") 58 | sys.exit(0) 59 | 60 | logger.info(f"Converting `{options.query}`...") 61 | 62 | # unless overridden by operator, prepend a cb.urlver=1&q= to the query if 63 | # if does not already exist. this makes it possible for customer to copy and 64 | # paste query from CB UI, pass through this script, and add to a feed 65 | # 66 | # see CBAPI-7 67 | # 68 | prepend = "cb.urlver=1&q=" if options.prepend and not is_query_complete(options.query) else "" 69 | print("-" * 80 + f"\n {prepend}" + parse.quote_plus(options.query) + "\n" + "-" * 80) 70 | -------------------------------------------------------------------------------- /example/stix/sample_data/indicator-for-c2-ip-address.xml: -------------------------------------------------------------------------------- 1 | 21 | 22 | Example watchlist that contains IP information. 23 | Indicators - Watchlist 24 | 25 | 26 | 27 | IP Address for known C2 channel 28 | IP Watchlist 29 | 30 | 31 | 32 | 10.0.0.0 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | C2 Behavior 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /example/stix/sample_data/STIX_Domain_Watchlist.xml: -------------------------------------------------------------------------------- 1 | 16 | 35 | 36 | Example watchlist that contains domain information. 37 | Indicators - Watchlist 38 | 39 | 40 | 41 | Domain Watchlist 42 | Sample domain Indicator for this watchlist 43 | 44 | 45 | 46 | malicious1.example.com##comma##malicious2.example.com##comma##malicious3.example.com 47 | 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /example/stix/sample_data/STIX_URL_Watchlist.xml: -------------------------------------------------------------------------------- 1 | 19 | 38 | 39 | Example watchlist that contains URL information. 40 | Indicators - Watchlist 41 | 42 | 43 | 44 | URL Watchlist 45 | Sample URL Indicator for this watchlist 46 | 47 | 48 | 49 | http://example.com/foo/malicious1.html##comma##http://example.com/foo/malicious2.html##comma##http://example.com/foo/malicious3.html 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /example/stix/sample_data/STIX_IP_Watchlist.xml: -------------------------------------------------------------------------------- 1 | 19 | 38 | 39 | Example watchlist that contains IP information. 40 | Indicators - Watchlist 41 | 42 | 43 | 44 | IP Watchlist 45 | Sample IP Address Indicator for this watchlist. This contains one indicator with a set of three IP addresses in the watchlist. 46 | 47 | 48 | 49 | 10.0.0.0##comma##10.0.0.1##comma##10.0.0.2 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /example/stix/README.md: -------------------------------------------------------------------------------- 1 | # STIX to Cb Feed 2 | 3 | STIX is the Structured Threat Information eXpression, developed and curated by Mitre as a serialization format to share Cyber Threat Intelligence Information. You can find more information at http://stix.mitre.org. 4 | 5 | The objectives of STIX are notably larger than the CB Feeds format, so a one-to-one translation is not possible. However, for simpler STIX Package formats, it is possible to translate the STIX Package into a Carbon Black feed. 6 | 7 | *Note*: The diversity of STIX package structures can cause the translation to have unexpected results. Send us feedback (or a pull request!) with any recommendations or improvements surfaced by your source data! 8 | 9 | # stix_to_feed.py 10 | 11 | This script requires: 12 | 13 | * cbfeeds 14 | * python-stix 15 | * Docs: http://stix.readthedocs.org/en/latest/ 16 | * Github: https://github.com/STIXProject/python-stix 17 | * PyPI: https://pypi.python.org/pypi/stix/ 18 | 19 | Given a STIX Package or a directory of STIX Packages, it will translate all suitable indicators into a Cb Feed Report. Example: 20 | 21 | [root@localhost stix]$ python stix_to_feed.py -i sample_data/ -o stix.feed 22 | -> Including 3 observables from sample_data/command-and-control-ip-range.xml. 23 | -> Including 1 observables from sample_data/indicator-for-c2-ip-address.xml. 24 | -> Including 3 observables from sample_data/STIX_Domain_Watchlist.xml. 25 | -> Including 3 observables from sample_data/STIX_IP_Watchlist.xml. 26 | -> No suitable observables found in sample_data/STIX_Phishing_Indicator.xml; skipping. 27 | -> No suitable observables found in sample_data/STIX_URL_Watchlist.xml; skipping. 28 | 29 | Suitable indicators are: 30 | 31 | * DomainNameObjects 32 | * AddressValueObjects 33 | * FileObjects with MD5 Hash 34 | 35 | Only these objects with no conditionals or Any Equals conditions are translated. 36 | 37 | The sample packages in the sample\_data directory are collected from the STIX documentation. This parser was also tested against the Mandiant APT1 and FireEye Poison Ivy reports. Those results: 38 | 39 | [root@localhost other_data]$ python stix_to_feed.py -i sample_data/ -o stix.feed 40 | -> Including 2046 observables from sample_data/APT1/Appendix_D_FQDNs.xml. 41 | -> Including 1007 observables from sample_data/APT1/Appendix_E_MD5s.xml. 42 | -> No suitable observables found in sample_data/APT1/Appendix_F_SSLCertificates.xml; skipping. 43 | -> Including 1797 observables from sample_data/APT1/Appendix_G_IOCs_Full.xml. 44 | -> No suitable observables found in sample_data/APT1/Appendix_G_IOCs_No_Observables.xml; skipping. 45 | -> Including 1797 observables from sample_data/APT1/Appendix_G_IOCs_No_OpenIOC.xml. 46 | -> No suitable observables found in sample_data/APT1/Mandiant_APT1_Report.xml; skipping. 47 | -> Including 506 observables from sample_data/Poison Ivy/fireeye-pivy-indicators.xml. 48 | -> Including 506 observables from sample_data/Poison Ivy/fireeye-pivy-observables.xml. 49 | -> Including 506 observables from sample_data/Poison Ivy/fireeye-pivy-report-with-indicators.xml. 50 | -> No suitable observables found in sample_data/Poison Ivy/fireeye-pivy-report.xml; skipping. 51 | 52 | Those packages are too large to include in the sample data, they are available from the Samples page at Mitre STIX: http://stix.mitre.org/language/version1.1/samples.html. 53 | 54 | # Changelog 55 | 56 | 4 Aug 14 - 1.0 - initial cut 57 | 58 | -------------------------------------------------------------------------------- /example/isight/isight_helpers.py: -------------------------------------------------------------------------------- 1 | 2 | import csv 3 | import time 4 | 5 | def remove_non_ascii(s): return "".join([x for x in s if ord(x)<128]) 6 | 7 | def get_field(row, field_name, do_remove_non_ascii=False): 8 | val = row.get(field_name) or row.get(field_name.lower()) 9 | if val: 10 | if do_remove_non_ascii: 11 | val = remove_non_ascii(val) 12 | return val.strip() 13 | return None 14 | 15 | def isight_csv_to_iocs_dict(isight_csv_entries): 16 | """ 17 | Converts CSV data (with header) to dictionary of dict[tuple] = another dict, 18 | where tuple = (report_id, title, product_type, report_timestamp_in_epoch_secs) 19 | 20 | and dict[tuple] = {'md5':[...], 'ipaddr':[...], 'domain':[...]} 21 | """ 22 | iocs_by_report_dict = {} 23 | if not isight_csv_entries: 24 | print("no entries provided") 25 | return iocs_by_report_dict 26 | 27 | reports = [] 28 | 29 | for isight_csv in isight_csv_entries: 30 | 31 | iwcsv = csv.DictReader(isight_csv.split('\n'), delimiter=',', quotechar='"') 32 | 33 | i = 0 34 | 35 | for row in iwcsv: 36 | report_id = get_field(row, "ReportID") 37 | report_timestamp = int(get_field(row, "Publishdate_Mysql", True) or time.time()) 38 | title = get_field(row, 'Title') 39 | product_type = get_field(row, 'Product_Type') 40 | ip = get_field(row, 'IPs', True) 41 | domain = get_field(row, 'Domain', True) 42 | md5 = get_field(row, 'MD5', True) 43 | attachment_md5 = get_field(row, 'Attachment_MD5', True) 44 | 45 | i = i + 1 46 | 47 | if not report_id: 48 | print(("Report did not have a report_id: %s" % title)) 49 | continue 50 | 51 | # @todo consider using 'Related_Domains' 52 | 53 | network_identifier = row.get('Network_Identifier') or row.get('network_identifier') 54 | file_identifier = row.get('File_Identifier') or row.get('file_identifier') 55 | 56 | #tup = (report_id, title, product_type, report_timestamp) 57 | tup = report_id 58 | 59 | ips = set() 60 | md5s = set() 61 | domains = set() 62 | 63 | if tup in iocs_by_report_dict: 64 | ips = set(iocs_by_report_dict[tup]['ipaddr']) 65 | md5s = set(iocs_by_report_dict[tup]['md5']) 66 | domains = set(iocs_by_report_dict[tup]['domain']) 67 | 68 | else: 69 | iocs_by_report_dict[tup] = {} 70 | 71 | 72 | iocs_by_report_dict[tup]["title"] = title 73 | iocs_by_report_dict[tup]["product_type"] = product_type 74 | iocs_by_report_dict[tup]["report_timestamp"] = report_timestamp 75 | 76 | if network_identifier and network_identifier.lower() == "attacker": 77 | if ip and len(ip) > 0: 78 | ips.add(ip) 79 | 80 | if domain and len(domain) > 0: 81 | domains.add(domain) 82 | 83 | if file_identifier and file_identifier.lower() == "attacker": 84 | if md5 and len(md5) > 0: 85 | md5s.add(md5) 86 | 87 | if attachment_md5 and len(attachment_md5) > 0: 88 | md5s.add(attachment_md5) 89 | 90 | iocs_by_report_dict[tup]['ipaddr'] = list(ips) 91 | iocs_by_report_dict[tup]['domain'] = list(domains) 92 | iocs_by_report_dict[tup]['md5'] = list(md5s) 93 | 94 | return iocs_by_report_dict 95 | -------------------------------------------------------------------------------- /example/stix/sample_data/STIX_FileHash_Watchlist.xml.badmd5s: -------------------------------------------------------------------------------- 1 | 20 | 39 | 40 | Example file watchlist 41 | Indicators - Watchlist 42 | 43 | 44 | 45 | File Hash Watchlist 46 | Indicator that contains malicious file hashes. 47 | 48 | 49 | 50 | 51 | 52 | MD5 53 | 01234567890abcdef01234567890abcdef##comma##abcdef1234567890abcdef1234567890##comma##00112233445566778899aabbccddeeff 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /example/abuse_ch/generate_abusech_feed.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | import sys 6 | import os 7 | import time 8 | 9 | # third part lib imports 10 | import requests 11 | 12 | from distutils.version import StrictVersion 13 | 14 | if StrictVersion(requests.__version__) < StrictVersion("1.2.3"): 15 | # only in 1.2.3+ did response objects support iteration 16 | raise ImportError("requires requests >= 1.2.3") 17 | 18 | # our imports 19 | sys.path.insert(0, "../../") 20 | from cbfeeds import CbReport 21 | from cbfeeds import CbFeed 22 | from cbfeeds import CbFeedInfo 23 | 24 | 25 | def get_zeus(): 26 | reports = [] 27 | r = requests.get("https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist") 28 | lines = r.text.split("\n") 29 | domains = [] 30 | for line in lines: 31 | if len(line) < 3: continue 32 | if line[0] == "#": continue 33 | 34 | domains.append(line.strip()) 35 | 36 | fields = {'iocs': { 37 | "dns": domains, 38 | }, 39 | 'timestamp': int(time.mktime(time.gmtime())), 40 | 'link': "https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist", 41 | 'id': 'abusech-zeus', 42 | 'title': 'abuse.ch Zeus hit on Standard domain blocklist', 43 | 'score': 100, 44 | } 45 | reports.append(CbReport(**fields)) 46 | return reports 47 | 48 | 49 | # 50 | # Gives 500 error, keeping it for historical reasons 51 | # 52 | # def get_palevo(): 53 | # reports = [] 54 | # r = requests.get("https://palevotracker.abuse.ch/blocklists.php?download=domainblocklist") 55 | # lines = r.text.split("\n") 56 | # domains = [] 57 | # for line in lines: 58 | # if len(line) < 3: continue 59 | # if line[0] == "#": continue 60 | # 61 | # domains.append(line.strip()) 62 | # 63 | # fields = {'iocs': { 64 | # "dns": domains, 65 | # }, 66 | # 'timestamp': int(time.mktime(time.gmtime())), 67 | # 'link': "https://palevotracker.abuse.ch/blocklists.php?download=domainblocklist", 68 | # 'id': 'abusech-palevo', 69 | # 'title': 'abuse.ch Palevo hit on domain blocklist', 70 | # 'score': 100, 71 | # } 72 | # reports.append(CbReport(**fields)) 73 | # return reports 74 | 75 | 76 | def create(): 77 | reports = [] 78 | reports.extend(get_zeus()) 79 | #reports.extend(get_palevo()) 80 | 81 | feedinfo = {'name': 'abusech', 82 | 'display_name': "abuse.ch Malware Domains", 83 | 'provider_url': "http://www.abuse.ch", 84 | 'summary': "abuse.ch tracks C&C servers for Zeus and Palevo malware. " + 85 | "This feed combines the two domain names blocklists.", 86 | 'tech_data': "There are no requirements to share any data to receive this feed.", 87 | 'icon': "abuse.ch.jpg", 88 | 'icon_small': "abuse.ch.small.jpg", 89 | 'category': "Open Source" 90 | } 91 | 92 | # the lazy way to the icon 93 | old_cwd = os.getcwd() 94 | os.chdir(os.path.dirname(os.path.realpath(__file__))) 95 | 96 | feedinfo = CbFeedInfo(**feedinfo) 97 | feed = CbFeed(feedinfo, reports) 98 | feed_bytes = feed.dump() 99 | 100 | os.chdir(old_cwd) 101 | 102 | return feed_bytes 103 | 104 | 105 | if __name__ == "__main__": 106 | if len(sys.argv) != 2: 107 | print("usage: generate_abuse.ch_feed.py [outfile]") 108 | sys.exit() 109 | 110 | feed_created = create() 111 | open(sys.argv[1], "w").write(feed_created) 112 | -------------------------------------------------------------------------------- /example/isight/importer.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import logging 4 | import time 5 | from cbisight.isight_api import ISightAPI 6 | 7 | _logger = logging.getLogger(__name__) 8 | 9 | class ImporterDisabled(Exception): 10 | def __init__(self, *args, **kwargs): 11 | Exception.__init__(self, *args, **kwargs) 12 | 13 | class iSightLocalImporter(object): 14 | """ 15 | Reads CSV files from a directory 16 | """ 17 | def __init__(self, local_directory): 18 | """ 19 | TODO 20 | """ 21 | self.local_directory = local_directory 22 | 23 | if not self.local_directory: 24 | raise ImporterDisabled("iSightLocalImporter missing required field!") 25 | 26 | if not os.path.exists(local_directory): 27 | raise Exception("iSightLocalImporter specified directory not found!") 28 | 29 | self.processed_files = [] 30 | 31 | def get_csv_data(self): 32 | """ 33 | TODO 34 | """ 35 | filepaths = os.listdir(self.local_directory) 36 | results = [] 37 | for filepath in filepaths: 38 | if filepath.endswith('-processed'): 39 | continue 40 | try: 41 | full_filepath = os.path.join(self.local_directory, filepath) 42 | data = file(full_filepath, 'rb').read() 43 | results.append(data) 44 | self.processed_files.append(full_filepath) 45 | except: 46 | _logger.exception("Caught exception for: %s" % filepath) 47 | return results 48 | 49 | def on_processing_done(self): 50 | """ 51 | We don't want to keep importing the same files (although presumably we protect 52 | against that with our database), so rename it after. 53 | """ 54 | for filepath in self.processed_files: 55 | try: 56 | os.rename(filepath, filepath + "-processed") 57 | except: 58 | _logger.exception("Caught exception for: %s" % filepath) 59 | 60 | 61 | class iSightRemoteImporter(object): 62 | """ 63 | Basic API for downloading IOCs and Reports from iSight Partners 64 | """ 65 | def __init__(self, base_url, username, password, public_key, private_key, days_back_to_retrieve, save_responses_directory): 66 | """ 67 | TODO 68 | """ 69 | if not base_url or \ 70 | not username or \ 71 | not password or \ 72 | not public_key or \ 73 | not private_key or \ 74 | not days_back_to_retrieve: 75 | raise ImporterDisabled("iSightRemoteImporter missing required field(s)") 76 | 77 | self.api = ISightAPI(base_url, username, password, public_key, private_key) 78 | self.days_back_to_retrieve = days_back_to_retrieve 79 | self.save_responses_directory = save_responses_directory 80 | 81 | def get_csv_data(self): 82 | """ 83 | Uses the iSight API Class to download the file, optionally save the response, 84 | and return the data. 85 | """ 86 | rawcsv = self.api.get_i_and_w(self.days_back_to_retrieve) 87 | if len(rawcsv) > 0: 88 | if self.save_responses_directory and os.path.exists(self.save_responses_directory): 89 | try: 90 | filename = "isight-remote-api-%s.csv" % time.strftime('%Y-%m-%d-%H_%M_%S', time.gmtime(time.time())) 91 | file(os.path.join(self.save_responses_directory, filename), 'wb').write(rawcsv) 92 | except: 93 | _logger.exception("Trying to save response!") 94 | return [rawcsv] 95 | else: 96 | _logger.error("Received blank response!") 97 | return [] 98 | 99 | def on_processing_done(self): 100 | """ 101 | Nothing to see here. 102 | """ 103 | return 104 | -------------------------------------------------------------------------------- /example/stix/sample_data/command-and-control-ip-range.xml: -------------------------------------------------------------------------------- 1 | 2 | 23 | 24 | Example Command and Control IP Range 25 | 26 | 27 | 28 | 29 | 30 | 198.51.100.2 31 | 32 | 33 | 34 | 35 | 36 | 37 | 198.51.100.17 38 | 39 | 40 | 41 | 42 | 43 | 44 | 203.0.113.19 45 | 46 | 47 | 48 | 49 | 50 | 51 | Malware C2 Channel 52 | 53 | 54 | Malware C2 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /test/test_01_common_integrity.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | 6 | import cbfeeds 7 | from common import TestCommon 8 | 9 | 10 | class TestCommonIntegrity(TestCommon): 11 | """ 12 | Verify that the unit test common methods work as expected. 13 | """ 14 | 15 | def test_01_neg_feedinfo_missing(self): 16 | """ 17 | Verify that missing feed info is trapped. 18 | """ 19 | info, _ = self._load_feed_file() 20 | del info['feedinfo'] 21 | try: 22 | self._save_test_feed(info) 23 | self.fail("Did not get expected exception!") 24 | except KeyError: 25 | pass 26 | 27 | def test_02_neg_feedinfo_not_dict(self): 28 | """ 29 | Verify that missing feed info is trapped. 30 | """ 31 | info, _ = self._load_feed_file() 32 | info['feedinfo'] = "bogus" 33 | try: 34 | self._save_test_feed(info) 35 | self.fail("Did not get expected exception!") 36 | except cbfeeds.exceptions.CbInvalidFeed as err: 37 | assert "The supplied `feedinfo` parameter does not appear to be a valid dictionary" in err.args[0] 38 | 39 | def test_03_neg_feedinfo_empty_dict(self): 40 | """ 41 | Verify that missing feed info is trapped. 42 | """ 43 | info, _ = self._load_feed_file() 44 | info['feedinfo'] = {} 45 | try: 46 | feed = self._save_test_feed(info) 47 | feed.validate() 48 | self.fail("Did not get expected exception!") 49 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 50 | assert "FeedInfo missing required field(s)" in err.args[0] 51 | 52 | def test_04_neg_reports_missing(self): 53 | """ 54 | Verify that missing reports info is trapped. 55 | """ 56 | info, _ = self._load_feed_file() 57 | del info['reports'] 58 | try: 59 | self._save_test_feed(info) 60 | self.fail("Did not get expected exception!") 61 | except KeyError: 62 | pass 63 | 64 | def test_05_neg_reports_not_list(self): 65 | """ 66 | Verify that invalid reports info (not list) is trapped. 67 | """ 68 | info, _ = self._load_feed_file() 69 | info['reports'] = "bogus" 70 | try: 71 | self._save_test_feed(info) 72 | self.fail("Did not get expected exception!") 73 | except cbfeeds.exceptions.CbInvalidFeed as err: 74 | assert "The supplied `reports` parameter does not appear to be a valid list" in err.args[0] 75 | 76 | def test_06_neg_reports_not_list_of_dict(self): 77 | """ 78 | Verify that invalid reports info (list item not dict) is trapped. 79 | """ 80 | info, _ = self._load_feed_file() 81 | info['reports'] = ["bogus"] 82 | try: 83 | self._save_test_feed(info) 84 | self.fail("Did not get expected exception!") 85 | except cbfeeds.exceptions.CbInvalidFeed as err: 86 | assert "The `reports` parameter must be a list of dictionaries" in err.args[0] 87 | 88 | def test_10_cbfeed_using_cbfeedinfo_object(self): 89 | """ 90 | Verify that a CbFeedInfo object can be used in creating a CbFeed object. 91 | """ 92 | info, feed = self._load_feed_file() 93 | fi = cbfeeds.CbFeedInfo(**info['feedinfo']) 94 | cbf = cbfeeds.CbFeed(fi, info['reports']) 95 | assert cbf.dump() == feed.dump() 96 | 97 | def test_11_cbfeed_using_list_of_cbreport_objects(self): 98 | """ 99 | Verify that a CbFeedInfo object can be used in creating a CbFeed object. 100 | """ 101 | info, feed = self._load_feed_file() 102 | rp = [cbfeeds.CbReport(**rep) for rep in info['reports']] 103 | cbf = cbfeeds.CbFeed(info['feedinfo'], rp) 104 | assert cbf.dump() == feed.dump() 105 | 106 | -------------------------------------------------------------------------------- /example/tor/generate_tor_feed.py: -------------------------------------------------------------------------------- 1 | # stdlib imports 2 | import logging 3 | import os 4 | import sys 5 | import time 6 | from typing import Dict, List 7 | 8 | import requests 9 | 10 | # coding: utf-8 11 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 12 | ################################################################################ 13 | 14 | # third part lib imports 15 | 16 | # our imports 17 | sys.path.insert(0, "../../") 18 | from cbfeeds import CbReport 19 | from cbfeeds import CbFeed 20 | from cbfeeds import CbFeedInfo 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | def get_tor_nodes() -> List[Dict]: 26 | """ 27 | Read the remote source and return the tor node information. 28 | :return: list of node info 29 | """ 30 | nodes = [] 31 | url = "https://onionoo.torproject.org/details?type=relay&running=true" 32 | jsonurl = requests.get(url) 33 | text = jsonurl.json() 34 | for entry in text['relays']: 35 | try: 36 | for address in entry['or_addresses']: 37 | # IPv4 addresses are ip:port, IPv6 addresses are [ip]:port: 38 | # "or_addresses":["80.101.115.170:5061","[2001:980:3b4f:1:240:caff:fe8d:f02c]:5061"], 39 | # process only IPv4 addresses for now 40 | if address.count(':') == 1: 41 | # All IPv4 addresses will end up here. 42 | ipv4, port = address.split(':') 43 | nodes.append({'ip': ipv4, 44 | 'name': entry['nickname'], 45 | 'port': port, 46 | 'firstseen': entry['first_seen'], 47 | 'lastseen': entry['last_seen'], 48 | 'contact': entry.get("contact", "none")}) 49 | except Exception as err: 50 | logger.warning(f"{err} while parsing: {entry}") 51 | return nodes 52 | 53 | 54 | def build_reports(nodes: List[Dict]) -> List[CbReport]: 55 | """ 56 | Convert tor nodes to reports. 57 | 58 | :param nodes: list of tor nodes 59 | :return: list of reports 60 | """ 61 | # TODO - this is one "report" per TOR node IP. Not ideal. 62 | reports = [] 63 | unique_ips = set() 64 | for node in nodes: 65 | # avoid duplicated reports 66 | # CBAPI-22 67 | if node['ip'] in unique_ips: 68 | continue 69 | else: 70 | unique_ips.add(node['ip']) 71 | 72 | fields = {'iocs': { 73 | 'ipv4': [node['ip'], ] 74 | }, 75 | 'score': 0, 76 | 'timestamp': int(time.mktime(time.gmtime())), 77 | 'link': 'http://www.torproject.org', 78 | 'id': "TOR-Node-%s" % node['ip'], 79 | 'title': "%s has been a TOR exit node since %s and was last seen %s on port %s. Contact: %s" 80 | % (node['ip'], node['firstseen'], node['lastseen'], node['port'], node['contact'])} 81 | reports.append(CbReport(**fields)) 82 | 83 | return reports 84 | 85 | 86 | def create() -> str: 87 | """ 88 | Create tor feed. 89 | 90 | :return: feed info as JSON string 91 | """ 92 | nodes = get_tor_nodes() 93 | reports = build_reports(nodes) 94 | 95 | iconhome = os.path.dirname(__file__) 96 | 97 | feedinfo = {'name': 'tor', 98 | 'display_name': "Tor Exit Nodes", 99 | 'provider_url': 'https://www.torproject.org/', 100 | 'summary': "This feed is a list of Tor Node IP addresses, updated every 30 minutes.", 101 | 'tech_data': "There are no requirements to share any data to receive this feed.", 102 | 'icon': os.path.join(iconhome, 'tor.png'), 103 | 'icon_small': os.path.join(iconhome, 'tor.small.jpg'), 104 | 'category': 'Open Source', 105 | } 106 | 107 | logger.info(f">> Feed `{feedinfo['display_name']}` generated with {len(reports)} reports") 108 | 109 | feedinfo = CbFeedInfo(**feedinfo) 110 | feed = CbFeed(feedinfo, reports) 111 | created_feed = feed.dump() 112 | 113 | return created_feed 114 | 115 | 116 | if __name__ == "__main__": 117 | if len(sys.argv) != 2: 118 | print("usage: %s [outfile]" % sys.argv[0]) 119 | sys.exit(0) 120 | 121 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') 122 | 123 | info = create() 124 | with open(sys.argv[1], "w") as fp2: 125 | fp2.write(info) 126 | -------------------------------------------------------------------------------- /example/mdl/generate_mdl_feed.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | 6 | import csv 7 | import logging 8 | import os 9 | import sys 10 | import time 11 | import urllib.parse as urlparse 12 | from datetime import datetime, timedelta 13 | from distutils.version import StrictVersion 14 | from typing import List, Optional 15 | 16 | # third part lib imports 17 | import requests 18 | 19 | if StrictVersion(requests.__version__) < StrictVersion("1.2.3"): 20 | # only in 1.2.3+ did response objects support iteration 21 | raise ImportError("requires requests >= 1.2.3") 22 | 23 | # our imports 24 | sys.path.insert(0, "../../") 25 | from cbfeeds import CbReport 26 | from cbfeeds import CbFeed 27 | from cbfeeds import CbFeedInfo 28 | 29 | # NOTE: as of 10/03/2020, the feed only returns data in the year 2009; adding functionality for ALL data 30 | DAYS_BACK: Optional[int] = 90 # use number for days back from today, None for all data 31 | 32 | logger = logging.getLogger(__name__) 33 | 34 | 35 | def reports_from_csv(lines: List[str]) -> List[CbReport]: 36 | """ 37 | Takes a file-like object that is full list of CSV data from rom malwaredomainlist. 38 | creates a report per line. 39 | """ 40 | reports = [] 41 | unique_domains = set() 42 | 43 | # fixing line referencing in except clause before it is actually referenced. 44 | line = None 45 | try: 46 | for line in lines: 47 | line = line.strip() # trim spaces 48 | if len(line) == 0: 49 | continue 50 | try: 51 | rawdate, url, ip, reverse_lookup, desc, registrant, asn, _, _, _ = list(csv.reader([line]))[0] 52 | 53 | # rawdate 2013/10/27_03:06 54 | report_date = time.strptime(rawdate, "%Y/%m/%d_%H:%M") 55 | 56 | # skip any report older than DAYS_BACK, unless defined as None 57 | if DAYS_BACK is not None: 58 | report_datetime = datetime.fromtimestamp(time.mktime(report_date)) 59 | start = datetime.now() - timedelta(days=DAYS_BACK) 60 | if report_datetime < start: 61 | continue 62 | 63 | # url www.slivki.com.ua/as/Ponynl.exe 64 | url = urlparse.urlsplit(f"http://{url}") 65 | host = url.netloc 66 | if ":" in host: 67 | host = host.split(":", 1)[0] 68 | 69 | if len(host) <= 3: 70 | logger.debug(f"WARNING: no domain, skipping line {line}") 71 | continue 72 | 73 | # avoid duplicate report ids 74 | # CBAPI-21 75 | if host in unique_domains: 76 | continue 77 | else: 78 | unique_domains.add(host) 79 | 80 | fields = {'iocs': { 81 | "dns": [host], 82 | }, 83 | 'timestamp': int(time.mktime(report_date)), 84 | 'link': "http://www.malwaredomainlist.com/mdl.php", 85 | 'id': 'MDL-%s-%s' % (time.strftime("%Y%m%d-%H%M", report_date), host), 86 | 'title': '%s found on malware domain list: "%s"' % (host, desc) + 87 | ' IP (reverse lookup) at the time: %s (%s)' % (ip, reverse_lookup), 88 | 'score': 100, 89 | } 90 | 91 | reports.append(CbReport(**fields)) 92 | 93 | except Exception as err: 94 | logger.warning(f"WARNING: error parsing {line}\n{err}") 95 | except Exception as err2: 96 | logger.info(f"Unexpected exception with linw `{line}:\n{err2}") 97 | 98 | return reports 99 | 100 | 101 | def create(local_csv_file: str = None) -> str: 102 | """ 103 | Create a feed from www.malwaredomainlist.com. 104 | 105 | :param local_csv_file: path to local file to use instead of remote call 106 | :return: feed JSON. 107 | """ 108 | if local_csv_file: # use local 109 | with open(local_csv_file, "r") as fp2: 110 | lines = fp2.readlines() 111 | else: # use remote 112 | r = requests.get("http://www.malwaredomainlist.com/mdlcsv.php", stream=True) 113 | lines = r.text.split("\r\n") 114 | 115 | iconhome = os.path.dirname(__file__) 116 | reports = reports_from_csv(lines) 117 | feedinfo = {'name': 'mdl', 118 | 'display_name': "Malware Domain List", 119 | 'provider_url': "http://www.malwaredomainlist.com/mdl.php", 120 | 'summary': "Malware Domain List is a non-commercial community project to track domains used by " + 121 | "malware. This feed contains the most recent 180 days of entries.", 122 | 'tech_data': "There are no requirements to share any data to receive this feed.", 123 | 'icon': os.path.join(iconhome, "mdl.png"), 124 | 'icon_small': os.path.join(iconhome, "mdl.small.jpg"), 125 | 'category': "Open Source" 126 | } 127 | 128 | logger.info(f">> Feed `{feedinfo['display_name']}` generated with {len(reports)} reports") 129 | feedinfo = CbFeedInfo(**feedinfo) 130 | the_feed = CbFeed(feedinfo, reports) 131 | feed_json = the_feed.dump() 132 | 133 | return feed_json 134 | 135 | 136 | if __name__ == "__main__": 137 | if len(sys.argv) <= 1: 138 | print("usage: generate_mdl_feed.py [local.csv]") 139 | sys.exit() 140 | 141 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') 142 | 143 | outfile = sys.argv[1] 144 | localcsv = None 145 | if len(sys.argv) > 2: 146 | localcsv = sys.argv[2] 147 | 148 | feed = create(localcsv) 149 | with open(outfile, "w") as fp: 150 | fp.write(feed) 151 | -------------------------------------------------------------------------------- /test/test_02_cbfeed.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | 6 | import json 7 | 8 | import cbfeeds 9 | from common import TestCommon 10 | 11 | 12 | class TestCbFeedMethods(TestCommon): 13 | """ 14 | Validate the core methods of the CBFeed class. 15 | """ 16 | 17 | # ----- Basic Validation ----------------------------------------- # 18 | 19 | def test_01_validate_feed(self): 20 | """ 21 | Verify that overall feed validation works. 22 | """ 23 | _, feed = self._load_feed_file() 24 | feed.validate() 25 | 26 | def test_02_validate_feed_serialized(self): 27 | """ 28 | Verify that overall feed validation works with serialized data. 29 | """ 30 | _, feed = self._load_feed_file() 31 | feed.validate(serialized_data=feed.dump()) 32 | 33 | # ----- Method validation ----------------------------------------- # 34 | 35 | def test_03a_neg_validate_feedinfo_missing(self): 36 | """ 37 | Verify that CBFeed.validate detects missing feedinfo. 38 | """ 39 | _, feed = self._load_feed_file() 40 | del feed.data['feedinfo'] 41 | try: 42 | feed.validate() 43 | self.fail("Did not get expected exception!") 44 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 45 | assert "Feed missing 'feedinfo' data" in err.args[0] 46 | 47 | def test_03b_neg_validate_feedinfo_missing_serialized(self): 48 | """ 49 | Verify that CBFeed.validate detects missing feedinfo in serialized mode. 50 | """ 51 | info, feed = self._load_feed_file() 52 | del info['feedinfo'] 53 | try: 54 | feed.validate(serialized_data=json.dumps(info)) 55 | self.fail("Did not get expected exception!") 56 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 57 | assert "Feed missing 'feedinfo' data" in err.args[0] 58 | 59 | def test_04a_neg_validate_reports_missing(self): 60 | """ 61 | Verify that CBFeed.validate detects missing reports. 62 | """ 63 | _, feed = self._load_feed_file() 64 | del feed.data['reports'] 65 | try: 66 | feed.validate() 67 | self.fail("Did not get expected exception!") 68 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 69 | assert "Feed missing 'reports' structure" in err.args[0] 70 | 71 | def test_04b_neg_validate_reports_missing_serialized(self): 72 | """ 73 | Verify that CBFeed.validate detects missing reports in serialized mode. 74 | """ 75 | info, feed = self._load_feed_file() 76 | del info['reports'] 77 | try: 78 | feed.validate(serialized_data=json.dumps(info)) 79 | self.fail("Did not get expected exception!") 80 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 81 | assert "Feed missing 'reports' structure" in err.args[0] 82 | 83 | def test_05a_neg_validate_feed_strict_bad_feedinfo(self): 84 | """ 85 | Verify that CBFeed.validate detects non-CB feedinfo fields if strict. 86 | """ 87 | info, _ = self._load_feed_file() 88 | info['feedinfo']['booga'] = "foobar" 89 | try: 90 | feed = cbfeeds.CbFeed(info['feedinfo'], info['reports']) 91 | feed.validate(strict=True) 92 | self.fail("Did not get expected exception!") 93 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 94 | assert "Problem with feed `QA Feed BWF912316192`: Feedinfo includes unknown field: booga" in err.args[0] 95 | 96 | def test_05b_neg_validate_feed_strict_bad_report(self): 97 | """ 98 | Verify that CBFeed.validate detects non-CB feedinfo fields if strict. 99 | """ 100 | info, _ = self._load_feed_file() 101 | info['reports'][1]['booga'] = "foobar" 102 | try: 103 | feed = cbfeeds.CbFeed(info['feedinfo'], info['reports']) 104 | feed.validate(strict=True) 105 | self.fail("Did not get expected exception!") 106 | except cbfeeds.exceptions.CbInvalidReport as err: 107 | assert ("Problem with feed `QA Feed BWF912316192`, report `WithSha256`: Report includes " 108 | f"unknown field: booga") in err.args[0] 109 | 110 | def test_06_neg_validate_reports_list_dup_id(self): 111 | """ 112 | Verify that validate_report_list detects duplicate ids. 113 | """ 114 | info, feed = self._load_feed_file() 115 | reports = info['reports'] 116 | reports[0]['id'] = reports[1]['id'] 117 | 118 | try: 119 | feed.validate_report_list(reports) 120 | self.fail("Did not get expected exception!") 121 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 122 | assert "Duplicate report id 'WithSha256" in err.args[0] 123 | 124 | def test_07_validate_iter_iocs(self): 125 | """ 126 | Verify that iter_iocs returns all iocs properly. 127 | """ 128 | _, feed = self._load_feed_file() 129 | 130 | checkoff = {'md5|dbb379c9337cc31b24743e7cf81ee8bd': True, 131 | 'sha256|94dcf0531121e13a73114e8806f096d31e21dab4a8b1bfef95b5e0171a9a0556': True, 132 | 'ipv4|158.106.122.248': True, 133 | 'ipv6|7F1F:67E6:4BA0:5935:453A:A3AA:D69C:6146': True, 134 | 'dns|spend.policy.issue.net': True, 135 | 'ja3|07f362079e7f3d5a8855549fcc9a441e': True, 136 | 'ja3s|0fa6b3b35df905b209742cf80c06f7da': True, 137 | 'query|process_name:foobar.exe': True, 138 | } 139 | extras = [] 140 | for item in feed.iter_iocs(): 141 | key = f"{item['type']}|{item['ioc']}" 142 | if key in checkoff: 143 | del checkoff[key] 144 | else: 145 | extras.append(key) 146 | 147 | def test_07_validate_dump(self): 148 | """ 149 | Verify that dump() works as expected. 150 | """ 151 | info, feed = self._load_feed_file() 152 | check = feed.dump() 153 | assert check == json.dumps(info, indent=2, sort_keys=True) 154 | -------------------------------------------------------------------------------- /example/raw/generate_feed_from_raw_iocs.py: -------------------------------------------------------------------------------- 1 | # stdlib imports 2 | import re 3 | import sys 4 | import time 5 | import optparse 6 | import socket 7 | import base64 8 | import hashlib 9 | 10 | # cb imports 11 | sys.path.insert(0, "../../") 12 | from cbfeeds import CbReport 13 | from cbfeeds import CbFeed 14 | from cbfeeds import CbFeedInfo 15 | 16 | def gen_report_id(iocs): 17 | """ 18 | a report id should be unique 19 | because generate_feed_from_raw may be run repeatedly on the same data, it should 20 | also be deterministic. 21 | this routine sorts all the indicators, then hashes in order to meet these criteria 22 | """ 23 | md5 = hashlib.md5() 24 | 25 | # sort the iocs so that a re-order of the same set of iocs results in the same report id 26 | iocs.sort() 27 | 28 | for ioc in iocs: 29 | md5.update(ioc.strip().encode("utf-8")) 30 | 31 | return md5.hexdigest() 32 | 33 | def build_reports(options): 34 | 35 | reports = [] 36 | 37 | ips = [] 38 | domains = [] 39 | md5s = [] 40 | 41 | # read all of the lines (of text) from the provided 42 | # input file (of IOCs) 43 | # 44 | raw_iocs = open(options.ioc_filename, encoding='utf-8').readlines() 45 | 46 | # iterate over each of the lines 47 | # attempt to determine if each line is a suitable 48 | # ipv4 address, dns name, or md5 49 | # 50 | for raw_ioc in raw_iocs: 51 | 52 | # strip off any leading or trailing whitespace 53 | # skip any empty lines 54 | # 55 | raw_ioc = raw_ioc.strip() 56 | if len(raw_ioc) == 0: 57 | continue 58 | 59 | try: 60 | # attempt to parse the line as an ipv4 address 61 | # 62 | socket.inet_aton(raw_ioc) 63 | 64 | # parsed as an ipv4 address! 65 | # 66 | ips.append(raw_ioc) 67 | except Exception as e: 68 | 69 | # attept to parse the line as a md5 and, if that fails, 70 | # as a domain. use trivial parsing 71 | # 72 | if 32 == len(raw_ioc) and \ 73 | re.findall(r"([a-fA-F\d]{32})", raw_ioc): 74 | md5s.append(raw_ioc) 75 | elif -1 != raw_ioc.find("."): 76 | domains.append(raw_ioc) 77 | 78 | fields = {'iocs': { 79 | }, 80 | 'timestamp': int(time.mktime(time.gmtime())), 81 | 'link': options.url, 82 | 'title': options.report, 83 | 'id': gen_report_id(ips + domains + md5s), 84 | 'score': 100} 85 | 86 | if options.tags is not None: 87 | fields['tags'] = options.tags.split(',') 88 | 89 | if options.description is not None: 90 | fields['description'] = options.description 91 | 92 | if len(ips) > 0: 93 | fields['iocs']['ipv4'] = ips 94 | if len(domains) > 0: 95 | fields['iocs']['dns'] = domains 96 | if len(md5s) > 0: 97 | fields['iocs']['md5'] = md5s 98 | 99 | reports.append(CbReport(**fields)) 100 | 101 | return reports 102 | 103 | def create_feed(options): 104 | 105 | # generate the required feed information fields 106 | # based on command-line arguments 107 | # 108 | feedinfo = {'name': options.name, 109 | 'display_name': options.display_name, 110 | 'provider_url': options.url, 111 | 'summary': options.summary, 112 | 'tech_data': options.techdata} 113 | 114 | # if an icon was provided, encode as base64 and 115 | # include in the feed information 116 | # 117 | if options.icon: 118 | bytes = base64.b64encode(open(options.icon,'rb').read()) 119 | feedinfo['icon'] = bytes.decode("utf-8") 120 | 121 | # if a small icon was provided, encode as base64 and 122 | # include in the feed information 123 | # 124 | if options.small_icon: 125 | bytes = base64.b64encode(open(options.small_icon, 'rb').read()) 126 | feedinfo['icon_small'] = bytes.decode('utf-8') 127 | 128 | # if a feed category was provided, include it in the feed information 129 | # 130 | if options.category: 131 | feedinfo['category'] = options.category 132 | 133 | # build a CbFeedInfo instance 134 | # this does field validation 135 | # 136 | feedinfo = CbFeedInfo(**feedinfo) 137 | 138 | # build a list of reports (always one report in this 139 | # case). the single report will include all the IOCs 140 | # 141 | reports = build_reports(options) 142 | 143 | # build a CbFeed instance 144 | # this does field validation (including on the report data) 145 | # 146 | feed = CbFeed(feedinfo, reports) 147 | 148 | return feed.dump() 149 | 150 | def _build_cli_parser(): 151 | usage = "usage: %prog [options]" 152 | desc = "Convert a flat file of IOCs to a Carbon Black feed" 153 | 154 | parser = optparse.OptionParser(usage=usage, description=desc) 155 | 156 | parser.add_option("-n", "--name", action="store", type="string", dest="name", 157 | help="Feed Name") 158 | parser.add_option("-d", "--displayname", action="store", type="string", dest="display_name", 159 | help="Feed Display Name") 160 | parser.add_option("-u", "--url", action="store", type="string", dest="url", 161 | help="Feed Provider URL") 162 | parser.add_option("-s", "--summary", action="store", type="string", dest="summary", 163 | help="Feed Summary") 164 | parser.add_option("-t", "--techdata", action="store", type="string", dest="techdata", 165 | help="Feed Technical Description") 166 | parser.add_option("-c", "--category", action="store", type="string", dest="category", 167 | help="Feed Category") 168 | parser.add_option("-i", "--icon", action="store", type="string", dest="icon", 169 | help="Icon File (PNG format)") 170 | parser.add_option("-S", "--small-icon", action="store", type="string", dest="small_icon", 171 | help="Small icon file (50x50 pixels) (PNG format)") 172 | parser.add_option("-I", "--iocs", action="store", type="string", dest="ioc_filename", 173 | help="IOC filename") 174 | parser.add_option("-r", "--report", action="store", type="string", dest="report", 175 | help="Report Name") 176 | parser.add_option("-g", "--tags", action="store", type="string", dest="tags", 177 | help="Optional comma-delimited report tags") 178 | parser.add_option("-D", "--description", action="store", type="string", dest="description", 179 | help="A brief description of the report.") 180 | 181 | return parser 182 | 183 | if __name__ == "__main__": 184 | 185 | parser = _build_cli_parser() 186 | options, args = parser.parse_args(sys.argv) 187 | 188 | if not options.name or \ 189 | not options.display_name or \ 190 | not options.url or \ 191 | not options.summary or \ 192 | not options.techdata or \ 193 | not options.ioc_filename or \ 194 | not options.report: 195 | print("-> Missing option") 196 | sys.exit(0) 197 | 198 | print((create_feed(options))) 199 | -------------------------------------------------------------------------------- /validate_feed.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 4 | ################################################################################ 5 | 6 | import argparse 7 | import json 8 | import logging 9 | import os 10 | import sys 11 | from typing import Any, Dict, Set, Tuple 12 | 13 | import cbfeeds 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | ################################################################################ 19 | # Utility Functions 20 | ################################################################################ 21 | 22 | def build_cli_parser() -> argparse.ArgumentParser: 23 | """ 24 | generate ArgumentParser to handle command line switches. 25 | """ 26 | desc = "Validate a Carbon Black Response feed" 27 | 28 | cmd_parser = argparse.ArgumentParser(description=desc) 29 | 30 | cmd_parser.add_argument("-f", "--feed_filename", 31 | help="Feed Filename(s) to validate", 32 | type=str, required=True, action="append") 33 | 34 | cmd_parser.add_argument("-p", "--pedantic", 35 | help="Validates that no non-standard JSON elements exist", 36 | action="store_true", default=False) 37 | 38 | cmd_parser.add_argument("-e", "--exclude", 39 | help="Filename of 'exclude' list - newline delimited indicators to consider invalid", 40 | default=None) 41 | 42 | cmd_parser.add_argument("-i", "--include", 43 | help="Filename of 'include' list - newline delimited indicators to consider valid", 44 | default=None) 45 | 46 | return cmd_parser 47 | 48 | 49 | def validate_file(filename: str) -> str: 50 | """ 51 | Validate that the file exists and is readable. 52 | 53 | :param filename: The name of the file to read 54 | :return: file contents 55 | """ 56 | if filename.strip() == "" or not os.path.exists(filename): 57 | raise cbfeeds.CbException(f"No such feed file: `{filename}`") 58 | 59 | try: 60 | with open(filename, 'r') as fp: 61 | return fp.read() 62 | except Exception as err: 63 | raise cbfeeds.CbException(f"Unable to read feed file: `{filename}`: {err}") 64 | 65 | 66 | def validate_json(contents: str) -> Dict[str, Any]: 67 | """ 68 | Validate that the file is well-formed JSON. 69 | 70 | :param contents: file contents in supposed JSON format 71 | :return: json object 72 | """ 73 | try: 74 | return json.loads(contents) 75 | except Exception as err: 76 | raise cbfeeds.CbException(f"Unable to process feed JSON: {err}") 77 | 78 | 79 | def validate_feed(feed: Dict[str, Any], pedantic: bool = False) -> cbfeeds.CbFeed: 80 | """ 81 | Validate that the file is valid as compared to the CB feeds schema. 82 | 83 | :param feed: the digested feed 84 | :param pedantic: If True, perform pedantic validation 85 | :return: CbFeed object 86 | """ 87 | # verify that we have both of the required feedinfo and reports elements 88 | if "feedinfo" not in feed: 89 | raise cbfeeds.CbException("No 'feedinfo' element found!") 90 | if "reports" not in feed: 91 | raise cbfeeds.CbException("No 'reports' element found!") 92 | 93 | # Create the cbfeed object 94 | feed = cbfeeds.CbFeed(feed["feedinfo"], feed["reports"]) 95 | 96 | # Validate the feed -- this validates that all required fields are present, and that 97 | # all required values are within valid ranges 98 | feed.validate() 99 | 100 | return feed 101 | 102 | 103 | def validate_against_include_exclude(feed: cbfeeds.CbFeed, include: Set, exclude: Set) -> None: 104 | """ 105 | Ensure that no feed indicators are 'excluded' or blacklisted. 106 | 107 | :param feed: feed to be validated 108 | :param include: set of included IOCs 109 | :param exclude: set of excluded IOCs 110 | """ 111 | for ioc in feed.iter_iocs(): 112 | if ioc["ioc"] in exclude and not ioc["ioc"] in include: 113 | raise Exception(ioc) 114 | 115 | 116 | def gen_include_exclude_sets(include_filename: str = None, exclude_filename: str = None) -> Tuple[Set, Set]: 117 | """ 118 | Generate an include and an exclude set of indicators by reading indicators from flat, newline-delimited files. 119 | 120 | :param include_filename: path to file containing include entries 121 | :param exclude_filename: path to file containing exclude entries 122 | """ 123 | include = set() 124 | exclude = set() 125 | 126 | if include_filename: 127 | if not os.path.exists(include_filename): 128 | raise cbfeeds.CbException(f"No such include file: {include_filename}") 129 | for indicator in open(include_filename).readlines(): 130 | include.add(indicator.strip()) 131 | 132 | if exclude_filename: 133 | if not os.path.exists(exclude_filename): 134 | raise cbfeeds.CbException(f"No such include file: {exclude_filename}") 135 | for indicator in open(exclude_filename).readlines(): 136 | exclude.add(indicator.strip()) 137 | 138 | return include, exclude 139 | 140 | 141 | def validation_cycle(filename: str) -> bool: 142 | """ 143 | Generate include and exclude (whitelist and blacklist) sets of indicators. Feed validation will fail if a feed 144 | ioc is blacklisted unless it is also whitelisted. 145 | 146 | :param filename: filename contaning feed information 147 | :return: False if there were problems, True if ok 148 | """ 149 | include, exclude = gen_include_exclude_sets(options.include, options.exclude) 150 | 151 | try: 152 | contents = validate_file(filename) 153 | except Exception as err: 154 | logger.error(f"Feed file invalid: {err}") 155 | return False 156 | 157 | try: 158 | jsondict = validate_json(contents) 159 | except Exception as err: 160 | logger.error(f"Feed json for `{filename}` is invalid: {err}") 161 | return False 162 | 163 | try: 164 | feed = validate_feed(jsondict) 165 | except Exception as err: 166 | logger.error(f"Feed `{filename}` is invalid: {err}") 167 | return False 168 | 169 | if len(exclude) > 0 or len(include) > 0: 170 | try: 171 | validate_against_include_exclude(feed, include, exclude) 172 | logger.info(" ... validated against include and exclude lists") 173 | except Exception as err: 174 | logger.error(f" ... unnable to validate against the include and exclude lists:\n{err}") 175 | return False 176 | 177 | extra = "" if not options.pedantic else " and contains no non-CB elements" 178 | logger.info(f"Feed `{filename}` is good{extra}!") 179 | return True 180 | 181 | 182 | ################################################################################ 183 | # Main 184 | ################################################################################ 185 | 186 | if __name__ == "__main__": 187 | parser = build_cli_parser() 188 | options = parser.parse_args() 189 | 190 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') 191 | 192 | feed_filenames = options.feed_filename 193 | if not feed_filenames: 194 | logger.error("-> Must specify one or more feed filenames to validate; use the -f switch or --help for usage") 195 | sys.exit(0) 196 | 197 | sep = False 198 | for feed_filename in feed_filenames: 199 | if sep: 200 | logger.info('\n ----- \n') 201 | validation_cycle(feed_filename) 202 | sep = True 203 | -------------------------------------------------------------------------------- /example/stix/stix_to_feed.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import optparse 5 | 6 | sys.path.insert(0, "../../") 7 | from cbfeeds import CbReport 8 | from cbfeeds import CbFeed 9 | from cbfeeds import CbFeedInfo 10 | 11 | from stix.core import STIXPackage 12 | from stix.utils.parser import EntityParser, UnsupportedVersionError 13 | from cybox.bindings.file_object import FileObjectType 14 | from cybox.bindings.domain_name_object import DomainNameObjectType 15 | from cybox.bindings.address_object import AddressObjectType 16 | 17 | from stix.utils import nsparser 18 | import mixbox.namespaces 19 | from mixbox.namespaces import Namespace 20 | 21 | ADDITIONAL_NAMESPACES = [ 22 | Namespace('http://us-cert.gov/ciscp', 'CISCP', 23 | 'http://www.us-cert.gov/sites/default/files/STIX_Namespace/ciscp_vocab_v1.1.1.xsd') 24 | ] 25 | 26 | 27 | def merge(d1, d2): 28 | """ given two dictionaries, return a single dictionary 29 | that merges the two. 30 | """ 31 | 32 | result = d1 33 | if not d2: return result 34 | for k in d2: 35 | if k in result: 36 | result[k].extend(d2[k]) 37 | else: 38 | result[k] = d2[k] 39 | return result 40 | 41 | 42 | def no_conditionals(obj): 43 | """ return true only if: 44 | - object has no conditionals applied or 45 | - conditionals are jsut "Any Equals" 46 | """ 47 | # if they're not on the object... 48 | if not hasattr(obj, "apply_condition") or not hasattr(obj, "condition"): 49 | return True 50 | 51 | # ...or if they're not defined... 52 | if not obj.apply_condition or not obj.condition: 53 | return True 54 | 55 | # ... or if they're defined and any equals... 56 | if obj.apply_condition.lower() == "any" and \ 57 | obj.condition.lower() == "equals": 58 | return True 59 | 60 | return False 61 | 62 | 63 | def parse_File(file_obj): 64 | """ parse a FileObjectType and return a list of md5s 65 | if they exist and not subject to any conditionals. """ 66 | 67 | if not hasattr(file_obj, "Hashes") or not hasattr(file_obj.Hashes, "Hash"): 68 | return 69 | 70 | iocs = {} 71 | iocs['md5'] = [] 72 | for h in file_obj.Hashes.Hash: 73 | if not hasattr(h, "Type"): 74 | continue 75 | 76 | # only get md5s that are true if any are present. if not specified, assume so. 77 | if no_conditionals(h.Type) and \ 78 | (h.Type.valueOf_ and h.Type.valueOf_.lower() == "md5"): 79 | md5s = h.Simple_Hash_Value 80 | iocs['md5'].extend(md5s.valueOf_.split(md5s.delimiter)) 81 | return iocs 82 | 83 | 84 | def parse_observable(observable): 85 | """ for each observable, if it's of a supported type, 86 | the parse out the values and return. """ 87 | 88 | obj = observable.to_obj() 89 | if not obj or not hasattr(obj, "Object") or not hasattr(obj.Object, "Properties"): return 90 | prop = obj.Object.Properties 91 | 92 | iocs = {} 93 | 94 | if type(prop) == AddressObjectType: 95 | ips = prop.Address_Value 96 | if no_conditionals(ips): 97 | iocs['ipv4'] = ips.valueOf_.split(ips.delimiter) 98 | 99 | elif type(prop) == DomainNameObjectType: 100 | domains = prop.Value 101 | if no_conditionals(domains): 102 | iocs['dns'] = domains.valueOf_.split(domains.delimiter) 103 | 104 | elif type(prop) == FileObjectType: 105 | merge(iocs, parse_File(prop)) 106 | 107 | return iocs 108 | 109 | 110 | def parse_observables(observables): 111 | """ iterate over the set of observables, parse out 112 | visibile indicators and return a dictionary of 113 | iocs present and suitable for feed inclusion. """ 114 | 115 | iocs = {} 116 | for observable in observables: 117 | try: 118 | merge(iocs, parse_observable(observable)) 119 | except Exception as e: 120 | print(("-> Unexpected error parsing observable: {0}; continuing.".format(e))) 121 | 122 | return iocs 123 | 124 | 125 | def build_report(fname): 126 | """ parse the provided STIX package and create a 127 | CB Feed Report that includes all suitable observables 128 | as CB IOCs """ 129 | 130 | # The python STIX libs are pedantic about document versions. See 131 | # https://github.com/STIXProject/python-stix/issues/124 132 | # parser = EntityParser() 133 | # pkg = parser.parse_xml(fname, check_version=False) 134 | 135 | pkg = STIXPackage.from_xml(fname) 136 | 137 | iocs = {} 138 | if pkg.observables: 139 | iocs = parse_observables(pkg.observables.observables) 140 | 141 | if pkg.indicators: 142 | for indicator in pkg.indicators: 143 | iocs = merge(iocs, parse_observables(indicator.observables)) 144 | 145 | ts = int(time.mktime(pkg.timestamp.timetuple())) if pkg.timestamp else int(time.mktime(time.gmtime())) 146 | fields = {'iocs': iocs, 147 | 'score': 100, # does STIX have a severity field? 148 | 'timestamp': ts, 149 | 'link': 'http://stix.mitre.org', 150 | 'id': pkg.id_, 151 | 'title': pkg.stix_header.title, 152 | } 153 | 154 | if len(list(iocs.keys())) == 0 or all(len(iocs[k]) == 0 for k in iocs): 155 | print(("-> No suitable observables found in {0}; skipping.".format(fname))) 156 | return None 157 | 158 | print(("-> Including %s observables from {0}.".format(sum(len(iocs[k]) for k in iocs), fname))) 159 | return CbReport(**fields) 160 | 161 | 162 | def build_cli_parser(): 163 | """ 164 | generate OptionParser to handle command line switches 165 | """ 166 | 167 | usage = "usage: %prog [options]" 168 | desc = "Best-effort conversion of one of more STIX Packages into a CB Feed" 169 | 170 | parser = optparse.OptionParser(usage=usage, description=desc) 171 | 172 | parser.add_option("-i", "--input", action="store", default=None, type="string", dest="input", 173 | help="STIX Package(s) to process. If a directory, will recursively process all .xml") 174 | parser.add_option("-o", "--output", action="store", default=None, type="string", dest="output", 175 | help="CB Feed output filename") 176 | 177 | return parser 178 | 179 | 180 | def build_reports(input_source): 181 | """ given an input file or directory, 182 | build a list of Cb Feed Reports. 183 | 184 | This structure chooses to have one 185 | report per STIX Package, with all 186 | suitable observables associated. 187 | 188 | Based on your STIX Package structure, 189 | you may prefer a different arrangement. 190 | """ 191 | 192 | reports = [] 193 | if os.path.isfile(input_source): 194 | reports.append(build_report(input_source)) 195 | else: 196 | for root, dirs, files in os.walk(input_source): 197 | for f in files: 198 | if not f.endswith("xml"): continue 199 | try: 200 | rep = build_report(os.path.join(root, f)) 201 | if rep: reports.append(rep) 202 | except UnsupportedVersionError as e: 203 | print(("-> Skipping {0}\n" 204 | "UnsupportedVersionError: {1}\n" 205 | "see https://github.com/STIXProject/python-stix/issues/124".format( 206 | f, e))) 207 | except Exception as e: 208 | print(("-> Unexpected error parsing {0}: {1}; skipping.".format(f, e))) 209 | 210 | return reports 211 | 212 | 213 | def create(input_source): 214 | reports = build_reports(input_source) 215 | 216 | # **************************** 217 | # TODO - you probably want to change these values to reflect your 218 | # local input source 219 | feedinfo = {'name': 'stiximport', 220 | 'display_name': "STIX Package Import", 221 | 'provider_url': 'http://stix.mitre.org', 222 | 'summary': "This feed was imported from stix package(s) at %s" % input_source, 223 | 'tech_data': "There are no requirements to share any data to receive this feed.", 224 | 'icon': 'images/stix.gif' 225 | } 226 | 227 | feedinfo = CbFeedInfo(**feedinfo) 228 | feed = CbFeed(feedinfo, reports) 229 | return feed.dump() 230 | 231 | 232 | if __name__ == "__main__": 233 | parser = build_cli_parser() 234 | options, args = parser.parse_args(sys.argv) 235 | if not options.input or not options.output: 236 | print("-> Must specify and input file/directory and output filename") 237 | sys.exit(-1) 238 | 239 | 240 | # 241 | # Adding namespaces that aren't in defaults 242 | # 243 | def _update_namespaces(): 244 | for i in ADDITIONAL_NAMESPACES: 245 | nsparser.STIX_NAMESPACES.add_namespace(i) 246 | mixbox.namespaces.register_namespace(i) 247 | 248 | 249 | _update_namespaces() 250 | 251 | bytes = create(options.input) 252 | open(options.output, "w").write(bytes) 253 | -------------------------------------------------------------------------------- /test/resources/template.json: -------------------------------------------------------------------------------- 1 | { 2 | "feedinfo": { 3 | "category": "Carbon Black", 4 | "display_name": "QA Feed BWF912316192", 5 | "icon": "/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCAAyADIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD9MKhvL630+3ee6uIraBPvSzOEUfUnpWZ401afw/4O13VLYKbmysLi5iDDI3JGzDI9Mivj66nsNY8L6f4w+Iet6xrtxqU00djpVmyoo8ogMWY/Ki5YcIoPI98AH07qfx48AaTIUn8UWbsvX7NunH5xhqr2f7Q/w7vpAkfiaFSf+e0E0Q/NkFfJrfEvQLE7dJ+HuhxR9jqck945+pLqP0pq/FPT7g7b3wD4XmiPUW9vLA/4MsnH5UAfdGjeI9K8RQmbStStNSiHVrWdZAPrtPFaNfEvhOy8H+OtU8jw5/a3gbxQIpJrXyro3Fs5RC5UPxIhwp5yRwetfQf7NXjbV/HXw8lu9auPtd3a30los7KAzoEjYFsdT85GfYd6APV6KKKAM3xNpp1rw3q2nqMm7tJoB/wNCv8AWvhu+jbVPgbp8m0+boutTQSL3SOeNWBPoN0bD8q+9q+XV0XTfAXx11nwxr1pHP4U8WFZYklyI/ML74+R02yb06/xAnigD550Pw7qniW7FrpOnXWo3HeO1iaQj3OBwPc16r4f/ZT8Z6rGJtRNjoUGNzfapt7geu1Mj8yK6Xx3+0jd+ELy98NeD/D9r4cgsZntzJLCN+5SQWWMYVenfdmvEfEnjrxD4vkL6zrN5qAJz5c0pMY+ifdH4CgR7/ofwh8N/DDSfEHiaHxbDr+q6VptxiG1MarFJJE0a7gGZuSxAyR1rvf2VdLbT/hHazMu37bdTXA9xkR5/wDIf8q+fV0O60H4faV4XtYi3iXxlcw3MsH8UdorYgVvTe5L/RRmvs3wr4fg8KeG9M0e35hsbdIA3TdtUAsfcnn8TQM1aKKKACvO/jb8K4vij4V8iErDrNmTNYztwN3dCeytgfQgHtXolFAHxJq2ky/FVHs7uP8As/4l6Wv2e4tbjCHVUQYBBP8Ay3UDGD94DI9szw34Ht/BNovibxxaSW8EbH7BoU6lJ9QlXpuU8rED95iOegzmvqb4sfBHSviZGt5HIdK8QQAeRqUI5OOivj7w9DnI7elc18P/ANneWz1weIfHWq/8JRrUZAhSR2lhj2/dYl+XI7AgAe9AFf4E/DrVNS1q5+Ivi5c6zqGTZW7rjyIyMbsfw/L8qjsufXj3WiigAooooAKKKKACiiigAooooAKKKKAP/9k=", 6 | "icon_small": "/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCAAyADIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD9MKhvL630+3ee6uIraBPvSzOEUfUnpWZ401afw/4O13VLYKbmysLi5iDDI3JGzDI9Mivj66nsNY8L6f4w+Iet6xrtxqU00djpVmyoo8ogMWY/Ki5YcIoPI98AH07qfx48AaTIUn8UWbsvX7NunH5xhqr2f7Q/w7vpAkfiaFSf+e0E0Q/NkFfJrfEvQLE7dJ+HuhxR9jqck945+pLqP0pq/FPT7g7b3wD4XmiPUW9vLA/4MsnH5UAfdGjeI9K8RQmbStStNSiHVrWdZAPrtPFaNfEvhOy8H+OtU8jw5/a3gbxQIpJrXyro3Fs5RC5UPxIhwp5yRwetfQf7NXjbV/HXw8lu9auPtd3a30los7KAzoEjYFsdT85GfYd6APV6KKKAM3xNpp1rw3q2nqMm7tJoB/wNCv8AWvhu+jbVPgbp8m0+boutTQSL3SOeNWBPoN0bD8q+9q+XV0XTfAXx11nwxr1pHP4U8WFZYklyI/ML74+R02yb06/xAnigD550Pw7qniW7FrpOnXWo3HeO1iaQj3OBwPc16r4f/ZT8Z6rGJtRNjoUGNzfapt7geu1Mj8yK6Xx3+0jd+ELy98NeD/D9r4cgsZntzJLCN+5SQWWMYVenfdmvEfEnjrxD4vkL6zrN5qAJz5c0pMY+ifdH4CgR7/ofwh8N/DDSfEHiaHxbDr+q6VptxiG1MarFJJE0a7gGZuSxAyR1rvf2VdLbT/hHazMu37bdTXA9xkR5/wDIf8q+fV0O60H4faV4XtYi3iXxlcw3MsH8UdorYgVvTe5L/RRmvs3wr4fg8KeG9M0e35hsbdIA3TdtUAsfcnn8TQM1aKKKACvO/jb8K4vij4V8iErDrNmTNYztwN3dCeytgfQgHtXolFAHxJq2ky/FVHs7uP8As/4l6Wv2e4tbjCHVUQYBBP8Ay3UDGD94DI9szw34Ht/BNovibxxaSW8EbH7BoU6lJ9QlXpuU8rED95iOegzmvqb4sfBHSviZGt5HIdK8QQAeRqUI5OOivj7w9DnI7elc18P/ANneWz1weIfHWq/8JRrUZAhSR2lhj2/dYl+XI7AgAe9AFf4E/DrVNS1q5+Ivi5c6zqGTZW7rjyIyMbsfw/L8qjsufXj3WiigAooooAKKKKACiiigAooooAKKKKAP/9k=", 7 | "name": "qafeedbwf912316192", 8 | "provider_rating": 2.0, 9 | "provider_url": "https://confluence.carbonblack.local/display/CB/CB+Response+QA", 10 | "summary": "Feed generated by QA Integration Framework (QA Feed BWF912316192)", 11 | "tech_data": "Will trigger on MD5 hash of ['dbb379c9337cc31b24743e7cf81ee8bd'] SHA-256 hash of ['94dcf0531121e13a73114e8806f096d31e21dab4a8b1bfef95b5e0171a9a0556']Event query of ['process_name:foobar.exe']IPV4 of ['158.106.122.248']IPV6 of ['7F1F:67E6:4BA0:5935:453A:A3AA:D69C:6146']Domain of ['spend.policy.issue.net']JA3 hash of ['07f362079e7f3d5a8855549fcc9a441e']JA3s hash of ['0fa6b3b35df905b209742cf80c06f7da']", 12 | "version": 2 13 | }, 14 | "reports": [ 15 | { 16 | "description": "MD5 hash ['dbb379c9337cc31b24743e7cf81ee8bd'] ", 17 | "id": "WithMd5", 18 | "iocs": { 19 | "md5": [ 20 | "dbb379c9337cc31b24743e7cf81ee8bd" 21 | ] 22 | }, 23 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests", 24 | "score": 42, 25 | "tags": [ 26 | "harmless", 27 | "test", 28 | "md5" 29 | ], 30 | "timestamp": 1603400539, 31 | "title": "CB Response QA ID79998635109553919580397252507510773" 32 | }, 33 | { 34 | "description": "SHA-256 hash ['94dcf0531121e13a73114e8806f096d31e21dab4a8b1bfef95b5e0171a9a0556'] ", 35 | "id": "WithSha256", 36 | "iocs": { 37 | "sha256": [ 38 | "94dcf0531121e13a73114e8806f096d31e21dab4a8b1bfef95b5e0171a9a0556" 39 | ] 40 | }, 41 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests", 42 | "score": 42, 43 | "tags": [ 44 | "harmless", 45 | "test", 46 | "sha256" 47 | ], 48 | "timestamp": 1603400539, 49 | "title": "CB Response QA ID21537004960345562837781950598866988" 50 | }, 51 | { 52 | "description": "Query ['process_name:notepad.exe'] ", 53 | "id": "WithQueryEvent", 54 | "iocs": { 55 | "query": { 56 | "index_type": "events", 57 | "search_query": [ 58 | "cb.urlver=1&q=process_name%3Anotepad.exe" 59 | ] 60 | } 61 | }, 62 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests", 63 | "score": 42, 64 | "tags": [ 65 | "harmless", 66 | "test", 67 | "event_query" 68 | ], 69 | "timestamp": 1603400539, 70 | "title": "CB Response QA ID92415881359400855663635535031560171" 71 | }, 72 | { 73 | "description": "Query ['observed_filename:notepad.exe'] ", 74 | "id": "WithQueryModule", 75 | "iocs": { 76 | "query": { 77 | "index_type": "modules", 78 | "search_query": [ 79 | "cb.urlver=1&cb.q.observed_filename=notepad.exe" 80 | ] 81 | } 82 | }, 83 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests", 84 | "score": 42, 85 | "tags": [ 86 | "harmless", 87 | "test", 88 | "event_query" 89 | ], 90 | "timestamp": 1603400539, 91 | "title": "CB Response QA ID92415881359400855663635535031560171" 92 | }, 93 | { 94 | "description": "IPV4 addr ['158.106.122.248'] ", 95 | "id": "WithIpv4", 96 | "iocs": { 97 | "ipv4": [ 98 | "158.106.122.248" 99 | ] 100 | }, 101 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests", 102 | "score": 42, 103 | "tags": [ 104 | "harmless", 105 | "test", 106 | "ipv4" 107 | ], 108 | "timestamp": 1603400539, 109 | "title": "CB Response QA ID10273408120814052893561200852998840" 110 | }, 111 | { 112 | "description": "IPV6 addr ['7F1F:67E6:4BA0:5935:453A:A3AA:D69C:6146'] ", 113 | "id": "WithIpv6", 114 | "iocs": { 115 | "ipv6": [ 116 | "7F1F:67E6:4BA0:5935:453A:A3AA:D69C:6146" 117 | ] 118 | }, 119 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests", 120 | "score": 42, 121 | "tags": [ 122 | "harmless", 123 | "test", 124 | "ipv6" 125 | ], 126 | "timestamp": 1603400539, 127 | "title": "CB Response QA ID12770488195773987668619301433420948" 128 | }, 129 | { 130 | "description": "DNS ['spend.policy.issue.net'] ", 131 | "id": "WithDns", 132 | "iocs": { 133 | "dns": [ 134 | "spend.policy.issue.net" 135 | ] 136 | }, 137 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests", 138 | "score": 42, 139 | "tags": [ 140 | "harmless", 141 | "test", 142 | "dns" 143 | ], 144 | "timestamp": 1603400539, 145 | "title": "CB Response QA ID14254988097523942646333256976090972" 146 | }, 147 | { 148 | "description": "ja3 hash ['07f362079e7f3d5a8855549fcc9a441e'] ", 149 | "id": "WithJa3", 150 | "iocs": { 151 | "ja3": [ 152 | "07f362079e7f3d5a8855549fcc9a441e" 153 | ] 154 | }, 155 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests", 156 | "score": 42, 157 | "tags": [ 158 | "harmless", 159 | "test", 160 | "ja3" 161 | ], 162 | "timestamp": 1603400539, 163 | "title": "CB Response QA ID81650673725828231840001285253822155" 164 | }, 165 | { 166 | "description": "ja3s hash ['0fa6b3b35df905b209742cf80c06f7da'] ", 167 | "id": "WithJa3s", 168 | "iocs": { 169 | "ja3s": [ 170 | "0fa6b3b35df905b209742cf80c06f7da" 171 | ] 172 | }, 173 | "link": "https://gitlab.bit9.local/cbqa/cbr-server-tests", 174 | "score": 42, 175 | "tags": [ 176 | "harmless", 177 | "test", 178 | "ja3s" 179 | ], 180 | "timestamp": 1603400539, 181 | "title": "CB Response QA ID64694413371534478005243943570681010" 182 | } 183 | ] 184 | } 185 | -------------------------------------------------------------------------------- /example/isight/generate_isight_feed.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import json 5 | import pprint 6 | import isight_api 7 | import isight_config 8 | import isight_helpers 9 | import xml.etree.ElementTree as ET 10 | 11 | score_stats = {} 12 | 13 | # our imports 14 | sys.path.insert(0, "../../") 15 | from cbfeeds import CbReport 16 | from cbfeeds import CbFeed 17 | from cbfeeds import CbFeedInfo 18 | 19 | def generate_feed_information(): 20 | """ 21 | return a dictionary of feed information 22 | this is feed 'metadata' - the description of the feed, and not the feed contents 23 | """ 24 | feed = {} 25 | feed["name"] = "iSIGHT" 26 | feed["display_name"] = "iSIGHT Partners feed" 27 | feed["summary"] = "iSIGHT Partners provides a cyber intelligence feed" 28 | feed["tech_data"] = "There are no requirements to share any data with Carbon Black to receive this feed. The underlying IOC data is provided by iSIGHT Partners" 29 | feed["provider_url"] = "http://www.isightpartners.com/" 30 | feed["icon"] = "isight.png" 31 | feed["icon_small"] = "isight.small.jpg" 32 | feed["category"] = "Partner" 33 | 34 | return CbFeedInfo(**feed) 35 | 36 | def retrieve_report_score(report_name, api, default_score): 37 | """ 38 | return a numeric score, between 1 and 100, corresponding 39 | with the report. This requires a round-trip to the iSight api 40 | endpoint to retrieve an XML encoded report. That report, in 41 | turn, includes a 'criticality' rating which we can translate 42 | into a numeric score. 43 | """ 44 | 45 | global score_stats 46 | 47 | #print " -> looking up score for %s..." % (report_name) 48 | data = api.get_report(report_name, "xml") 49 | 50 | root = ET.fromstring(data) 51 | 52 | # @todo don't hardcode offset here, but look for named indicator? 53 | # @todo "intel" reports don't have a risk rating 54 | # 55 | for field in root[1]: 56 | if field.tag != "Field": 57 | continue 58 | if field.attrib['name'] == 'Risk Rating': 59 | if field.text.strip() in score_stats: 60 | score_stats[field.text.strip()] = score_stats[field.text.strip()] + 1 61 | else: 62 | score_stats[field.text.strip()] = 1 63 | 64 | rating = field._children[0].text 65 | if 'HIGH' == rating: 66 | return 100 67 | elif 'MEDIUM' == rating: 68 | return 80 69 | elif 'LOW' == rating: 70 | return 60 71 | else: 72 | print(("WARNING: can't find score for %s; using default" % report_name)) 73 | return default_score 74 | 75 | if "MISSING" in score_stats: 76 | score_stats["MISSING"] = score_stats["MISSING"] + 1 77 | else: 78 | score_stats["MISSING"] = 1 79 | 80 | print(("WARNING: can't find score for %s; using default" % report_name)) 81 | return default_score 82 | 83 | def generate_reports(raw, api): 84 | """ 85 | generate the reports data as a list of dictionaries. 86 | 87 | each list entry corresponds to a single report, 88 | which is a single report in the case of iSight. 89 | """ 90 | 91 | reports = [] 92 | 93 | for rawkey in list(raw.keys()): 94 | 95 | entry = {} 96 | 97 | rawentry = raw[rawkey] 98 | 99 | entry["id"] = rawkey 100 | entry["title"] = rawentry["title"] 101 | entry["link"] = "https://mysight.isightpartners.com/report/full/%s" % (rawkey) 102 | entry["timestamp"] = rawentry["report_timestamp"] 103 | entry["iocs"] = {} 104 | 105 | for rawmd5 in rawentry["md5"]: 106 | if not "md5" in entry["iocs"]: 107 | entry["iocs"]["md5"] = [] 108 | 109 | entry["iocs"]["md5"].append(rawmd5) 110 | 111 | # @todo uncomment this block to support ips 112 | # 113 | #for rawip in rawentry["ipaddr"]: 114 | # if not "ipv4" in entry["iocs"]: 115 | # entry["iocs"]["ipv4"] = [] 116 | # 117 | # entry["iocs"]["ipv4"].append(rawip) 118 | 119 | for rawdns in rawentry["domain"]: 120 | if not "dns" in entry["iocs"]: 121 | entry["iocs"]["dns"] = [] 122 | 123 | entry["iocs"]["dns"].append(rawdns) 124 | 125 | # if we ended up with no IOCs for this report, just skip it. 126 | # 127 | if len(entry["iocs"]) == 0: 128 | continue 129 | 130 | # the score or severity is not provided as part of the iSight 131 | # report enumeration (their "i_and_w" or "indications and warnings" 132 | # api. instead, we must retreive the report in XML format, parse the 133 | # report, and look for the criticality. 134 | # 135 | # Some iSIGHT reports have NO criticality rating. 136 | # For lack of clear obvious next steps, simply report the score as 137 | # 75 -- "medium high" 138 | # 139 | entry["score"] = retrieve_report_score(entry["id"], api, 75) 140 | 141 | reports.append(CbReport(**entry)) 142 | 143 | return reports 144 | 145 | def create(config_file, existing_csv=None, reports_to_skip=[]): 146 | # parse the configuration file 147 | # this configuration file includes the keys needed to talk to the 148 | # iSight report server, etc. 149 | # 150 | #print "-> Parsing iSight configuration..." 151 | cfg = isight_config.ISightConfig(config_file) 152 | 153 | # instantiate a local iSight API object 154 | # 155 | #print "-> Instantiating an iSight API object..." 156 | api = isight_api.ISightAPI(cfg.iSightRemoteImportUrl, 157 | cfg.iSightRemoteImportUsername, 158 | cfg.iSightRemoteImportPassword, 159 | cfg.iSightRemoteImportPublicKey, 160 | cfg.iSightRemoteImportPrivateKey) 161 | 162 | if not existing_csv: 163 | # query the iSight report server for raw CSV report data 164 | # query 'back' the specified number of days 165 | # 166 | #print "-> Querying iSight server for last %d days of reports..." % (cfg.iSightRemoteImportDaysBack) 167 | # 168 | # @todo iSIGHT has a new-and-improved REST API which could be used instead of this legacy API 169 | # 170 | raw_report_data = api.get_i_and_w(cfg.iSightRemoteImportDaysBack) 171 | 172 | # save off the raw report data for future reference 173 | # 174 | #print "-> Saving iSight report data to iSight.csv..." 175 | f = open('iSight.csv', 'w') 176 | f.write(raw_report_data) 177 | f.close() 178 | else: 179 | raw_report_data = open(existing_csv, "r").read() 180 | 181 | # convert the raw report data into something more managable 182 | # in particular, a list of dictionaries, with each dictionary describing a report 183 | # this helper routine accounts for the fact that report data is spread across 184 | # multiple lines of the raw CSV blob 185 | # 186 | results = isight_helpers.isight_csv_to_iocs_dict([raw_report_data]) 187 | 188 | # set up a dictionary for basic stat tracking 189 | # 190 | stats = {'md5' : {'total' : 0, 'max' : 0}, 191 | 'ipaddr' : {'total' : 0, 'max' : 0}, 192 | 'domain' : {'total' : 0, 'max' : 0}} 193 | 194 | for report_id in list(results.keys()): 195 | stats['md5']['total'] += len(results[report_id]['md5']) 196 | if len(results[report_id]['md5']) > stats['md5']['max']: 197 | stats['md5']['max'] = len(results[report_id]['md5']) 198 | stats['ipaddr']['total'] += len(results[report_id]['ipaddr']) 199 | if len(results[report_id]['ipaddr']) > stats['ipaddr']['max']: 200 | stats['ipaddr']['max'] = len(results[report_id]['ipaddr']) 201 | stats['domain']['total'] += len(results[report_id]['domain']) 202 | if len(results[report_id]['domain']) > stats['domain']['max']: 203 | stats['domain']['max'] = len(results[report_id]['domain']) 204 | 205 | #print " -> Total Reports: %d" % (len(results.keys())) 206 | #print " -> ----------------------------------------------- ---" 207 | #print " -> Maximum number of MD5s in one report: %d" % (stats['md5']['max']) 208 | #print " -> Total MD5s across all reports: %d" % (stats['md5']['total']) 209 | #print " -> Maximum number of IPv4 addresses in one report: %d" % (stats['ipaddr']['max']) 210 | #print " -> Total IPv4 addresses in all reports: %d" % (stats['ipaddr']['total']) 211 | #print " -> Maximum number of DNS names in one report: %d" % (stats['domain']['max']) 212 | #print " -> Total DNS names in all reports: %d" % (stats['domain']['total']) 213 | 214 | # generate the feed data from the raw iSight report data 215 | # 216 | #print "-> Generating feed data..." 217 | reports = generate_reports(results, api) 218 | 219 | # shim to skip entire reports 220 | reports = [report for report in reports if report.data['id'] not in reports_to_skip] 221 | 222 | # generate the feed metadata (feed information) 223 | # this is a static description of the feed itself 224 | # 225 | 226 | # lazy way out 227 | cwd_old = os.getcwd() 228 | os.chdir(os.path.dirname(os.path.realpath(__file__))) 229 | 230 | #print "-> Generating feed metadata..." 231 | feedinfo = generate_feed_information() 232 | 233 | # write out feed document 234 | # 235 | feed = CbFeed(feedinfo, reports) 236 | 237 | #print "-> Writing out completed feed document..." 238 | return feed.dump() 239 | 240 | os.chdir(cwd_old) 241 | 242 | #print "-> Done!" 243 | 244 | if __name__ == "__main__": 245 | #print "-> iSIGHT Partners Carbon Black feed generator" 246 | if len(sys.argv) < 3: 247 | print("\n USAGE: generate_isight_feed.py [existing_csv]\n") 248 | sys.exit(0) 249 | cfg = sys.argv[1] 250 | out = sys.argv[2] 251 | csv = None 252 | if len(sys.argv) == 4: 253 | csv = sys.arv[3] 254 | 255 | reports_to_skip = ["Intel-989749",] 256 | 257 | bytes = create(cfg, existing_csv=csv, reports_to_skip=reports_to_skip) 258 | open(out, "w").write(bytes) 259 | 260 | -------------------------------------------------------------------------------- /example/stix/sample_data/STIX_Phishing_Indicator.xml: -------------------------------------------------------------------------------- 1 | 2 | 25 | 41 | 42 | STIX Phishing Indicator Example 43 | Indicators - Phishing 44 | 45 | 46 | 47 | "US-China" Phishing Indicator 48 | Malicious E-mail 49 | This is a cyber threat indicator for instances of "US-China" phishing attempts. 50 | 51 | 2012-12-01T09:30:47Z 52 | 2013-02-01T09:30:47Z 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | @state.gov 61 | 62 | 63 | 64 | 65 | 66 | 67 | pdf 68 | 87022 69 | 70 | 71 | MD5 72 | cf2b3ad32a8a4cfb05e9dfc45875bd70 73 | 74 | 75 | 76 | Contains 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | Phishing 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | Remedy 95 | Email Block 96 | Redirect and quarantine new matching email 97 | 98 | Prevent future instances of similar phishing attempts from reaching targeted recipients in order to eliminate possibility of compromise from targeted recipient falling for phishing lure. 99 | 100 | 101 | 102 | 103 | 104 | Remedy 105 | Web Link Block 106 | Block malicous links on web proxies 107 | 108 | Prevent execution/navigation to known malicious web URLs. 109 | 110 | 111 | 112 | 113 | 114 | Remedy 115 | Domain Traffic Block 116 | Block traffic to/from malicous domains via firewalls and DNS servers. 117 | 118 | Prevent any traffic (potentially containing malicious logic, data exfil, C2, etc.) to or from known malicious domains. 119 | 120 | 121 | 122 | 123 | 124 | Response 125 | Malicous Email Cleanup 126 | Remove existing matching email from the mail servers 127 | 128 | Cleanup any known malicious emails from mail servers (potentially in Inboxes, Sent folders, Deleted folders, etc.) to prevent any future exploitation from those particular emails. 129 | 130 | 131 | 132 | 133 | 134 | Response 135 | Phishing Target Identification 136 | Review mail logs to identify other targeted recipients 137 | 138 | Identify all targeted victims of a particular phishing campaign in order to enable notification and to support more strategic cyber threat intelligence activities (TTP characterization, Campaign analysis, ThreatActor attribution, etc.). 139 | 140 | 141 | 142 | 143 | 144 | Response 145 | Phishing Target Notification 146 | Notify targeted recipients 147 | 148 | Notify all targeted victims of a particular phishing campaign to ensure they are aware they have been targeted and to help them understand how to avoid falling for phishing attacks. 149 | 150 | 151 | 152 | 153 | 154 | Response 155 | Super Secret Proprietary Response 156 | Carry out some sensitive action that is applicable only within the environment of the affected organization. 157 | 158 | 159 | 160 | 161 | 162 | ancestor-or-self::stix:Indicator//node() 163 | 164 | 165 | 166 | ancestor-or-self::stix:Indicator//indicator:SuggestedCOAs/indicator:SuggestedCOA/stixCommon:Course_Of_Action[@id="example:COA-e46d2565-754e-4ac3-9f44-2de1bfb1e71d"] 167 | 168 | 169 | 170 | 171 | High 172 | MITRE 173 | 174 | 175 | 176 | MITRE 177 | 178 | 179 | 180 | 181 | MITRE 182 | 183 | 184 | 2012-12-01T09:30:47Z 185 | 186 | 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Carbon Black Feeds [![Build Status](https://travis-ci.org/carbonblack/cbfeeds.svg?branch=master)](https://travis-ci.org/carbonblack/cbfeeds) 2 | 3 | 4 | ## License 5 | 6 | Use of the Carbon Black Feeds API is governed by the license found in LICENSE.md. 7 | 8 | ## Overview 9 | 10 | Carbon Black Response 4.0+ ships with support for threat intelligence feeds. The Indicators of Compromise (IOCs) 11 | contained in the feeds are compared to the sensor data as it arrives on the server. Any activity matching an 12 | IOC is tagged; users can search for the tags and, optionally, register for e-mail alerts. 13 | 14 | Feeds allow Carbon Black servers to use freely available threat intelligence, proprietary customer threat data, 15 | and provides a mechanism to feed threat indicators from on-premise analytic sources to Carbon Black for verification, 16 | detection, visibility and analysis. 17 | 18 | The CB Response 4.0+ server supports three types of indicators: 19 | 20 | * Binary MD5s 21 | * IPv4 addresses 22 | * DNS names 23 | 24 | The CB Response 5.0+ server adds support for two new types of indicators: 25 | 26 | * Process Queries (Process Searches) 27 | * Binary Queries (Binary Searches) 28 | 29 | The CB Response 6.1+ server adds support for one new type of indicator: 30 | 31 | * IPv6 addresses 32 | 33 | The CB Response 7.0+ server adds support for one new type of indicator: 34 | 35 | * Binary SHA-256 36 | 37 | The CB Response 7.3+ server adds support for two new types of indicator: 38 | 39 | * Ja3 hash 40 | * Ja3s hash 41 | 42 | Please note that query IOC types have significant differences as compared to MD5s, IPv4 and IPv6 addresses, and DNS names. Please see notes below regarding their usage. 43 | 44 | The feed format, described in the "Feed Structure" section below, is designed for simplicity. This should make it 45 | easy to add support for feed data from any input source. 46 | 47 | Example feed creation scripts are included. See the 'Examples' section in this document for a listing of the examples. 48 | 49 | > _**NOTE:** As of this version, python 3 is a requirement._ 50 | 51 | ## Using the Carbon Black Feeds API 52 | 53 | The Carbon Black Feeds API (CBFAPI) is found on github at: 54 | 55 | https://github.com/carbonblack/cbfeeds 56 | 57 | The CBFAPI is a collection of documentation, example scripts, and a helper library to help create and validate Carbon 58 | Black feeds. It is not required in order to build a Carbon Black feed - a feed can be created in any language that 59 | allows for building JSON, or even built by hand. The feed file itself must match the feed structure, or schema, 60 | defined in the "Feed Structure" section below. 61 | 62 | ### Getting started with CBFAPI 63 | 64 | #### Ensure Python3.x and pip3 are installed: 65 | root@localhost carbonblack]# python3 --version 66 | root@localhost carbonblack]# pip3 --version 67 | 68 | #### Installation instructions/guidance 69 | 70 | [root@localhost carbonblack]# yum install git 71 | [root@localhost carbonblack]# yum install centos-release-scl -y 72 | [root@localhost carbonblack]# yum install rh-python38 73 | [root@localhost carbonblack]# scl enable rh-python38 bash 74 | 75 | #### clone the github cbfeed repository: 76 | 77 | [root@localhost carbonblack]# git clone https://github.com/carbonblack/cbfeeds.git 78 | Initialized empty Git repository in /root/repos/carbonblack/cbfeeds/.git/ 79 | remote: Reusing existing pack: 80, done. 80 | remote: Counting objects: 25, done. 81 | remote: Compressing objects: 100% (25/25), done. 82 | Receiving objects: 100% (105/105), 38.03 KiB | 17 KiB/s, done. 83 | Resolving deltas: 100% (50/50), done. 84 | remote: Total 105 (delta 10), reused 0 (delta 0) 85 | 86 | #### Navigate to the newly-created cbfeeds directory 87 | 88 | [root@localhost carbonblack]# ls 89 | cbfeeds 90 | [root@localhost carbonblack]# cd cbfeeds/ 91 | [root@localhost cbfeeds]# ls 92 | cbfeeds/ LICENSE.md README.md setup.py validate_feed.py 93 | example/ percent_encode_query.py requirements.txt test.py 94 | 95 | #### Install requirements: 96 | [root@localhost carbonblack]# pip3 install -r cbfeeds/requirements.txt 97 | 98 | #### Run the setup.py file to install cbfeeds module. 99 | [root@localhost carbonblack]# python3 cbfeeds/setup.py install 100 | 101 | #### Navigate to the example directory and use the example `generate_tor_feed.py` (inside the example/tor/ directory) script to generate a feed from live tor egress IPs 102 | 103 | [root@localhost cbfeeds]# cd example/ 104 | [root@localhost example]# python3 tor/generate_tor_feed.py example_tor_feed.feed 105 | [root@localhost example]# ls -l example_tor_feed.feed 106 | -rw-r--r--. 1 root root 2179084 Mar 25 08:09 example_tor_feed.feed 107 | 108 | #### Use the example `validate_feed.py` (inside the parent cbfeeds/ directory) script to validate the tor feed (or a feed of your choosing) 109 | 110 | [root@localhost cbfeeds]# python validate_feed.py --feedfile example/example_tor_feed.feed 111 | -> Validated that file exists and is readable 112 | -> Validated that feed file is valid JSON 113 | -> Validated that the feed file includes all necessary CB elements 114 | -> Validated that all element values are within CB feed 115 | 116 | ## Feed Structure 117 | 118 | * Feed: a Carbon Black feed 119 | * FeedInfo: Feed metadata: name, description, etc 120 | * Reports: a list of report 121 | * Report metadata: title, id, URL 122 | * IOCs for this report 123 | 124 | A feed is a JSON structure with two entries: 125 | 126 | * feedinfo 127 | * reports 128 | 129 | The `feedinfo` structure is a list of basic feed metadata. `reports` is a list of `report` structures. 130 | Each `report` has report metadata and a list of IOCs. 131 | 132 | ### feedinfo 133 | 134 | `feedinfo` is a JSON structure with the following entries: 135 | 136 | | name | status | description | 137 | | ---------------- | -------- |-------------| 138 | | `display_name` | REQUIRED | Display name for the user interface. | 139 | | `name` | REQUIRED | Internal name; must not include spaces or special characters. See Notes. | 140 | | `provider_url` | REQUIRED | Human-consumpable link to view more information about this feed. | 141 | | `summary` | REQUIRED | A short description of this feed. | 142 | | `tech_data` | REQUIRED | More detailed technical description, to include data sharing requirements (if any) | 143 | | `category` | _OPTIONAL_ | Category of the feed i.e. Open Source, Partner, Connector, First Party etc. | 144 | | `icon` | _OPTIONAL_ | A base64 encoded version of the image to use in the user interface | 145 | | `icon_small` | _OPTIONAL_ | A base64 encoded version of a smaller icon | 146 | | `provider_rating`| _OPTIONAL_ | Provider rating for the feed. | 147 | | `version` | _OPTIONAL_ | Version of the feed source. | 148 | 149 | Notes: 150 | 151 | The 'name' field cannot not include spaces or special characters. Typically, it should be unique per-feed on a single server. 152 | 153 | #### Icon 154 | 155 | Recommended size/dpi for regular icon is 370px x 97px, 72 dpi. 156 | 157 | #### Small Icon (icon_small) 158 | 159 | Recommended size/dpi for small icon is 100px x 100px, 72dpi 160 | 161 | Explanation of `category` parameters: 162 | 163 | | Category Name | Description | 164 | | ------------- | ----------- | 165 | | `Carbon Black` | Intelligence based on output from host-based integrations | 166 | | `Carbon Black First Party` | Intelligence generated inside the Threat Intelligence Cloud by the Carbon Black Research team | 167 | | `Connectors` | Intelligence connectors from third party technologies Carbon Black have integrated with | 168 | | `Meta-feed` | Includes a theme-based aggregate of selected intelligence indicators from other feeds | 169 | | `Partner` | Proprietary threat intelligence provided to the Threat Intelligence Cloud via a partner agreement. | 170 | | `Open Source` | Open Source intelligence that is generally available to the public | 171 | 172 | 173 | An example `feedinfo` structure, from the `generate_tor_feed.py` script: 174 | 175 | ``` 176 | "feedinfo": { 177 | "name": "tor", 178 | "display_name": "Tor Exit Nodes", 179 | "provider_url": "https://torproject.org/", 180 | "summary": "This feed is a list of Tor Node IP addresses, updated every 30 minutes.", 181 | "tech_data": "There are no requirements to share any data to receive this feed.", 182 | "icon": "tor.png", 183 | "icon_small": "tor.small.png", 184 | "category": "Open Source" 185 | } 186 | ``` 187 | 188 | ### report 189 | 190 | A `report` is a JSON structure with the following entries: 191 | 192 | | name | status | description | 193 | | -------------- | -------- |-------------| 194 | | `id` | REQUIRED | A report id, must be unique per feed `name` for the lifetime of the feed. Must be alphanumeric (including no spaces).| 195 | | `iocs` | REQUIRED | The IOCs for this report. A match on __any__ IOC will cause the activity to be tagged with this report id. The IOC format is described below.| 196 | | `link` | REQUIRED | Human-consumbable link to information about this report.| 197 | | `score` | REQUIRED | The severity of this report from -100 to 100, with 100 most critical.| 198 | | `timestamp` | REQUIRED | Time this report was last updated, in seconds since epoch (GMT). This should always be updated whenever the content of the report changes.| 199 | | `title` | REQUIRED | A one-line title describing this report.| 200 | | `description` | _OPTIONAL_ | A description of the report. | 201 | | `tags` | _OPTIONAL_ | A comma separated list of identifiers to tag the report. | 202 | 203 | ### iocs 204 | 205 | CB Response 4.0+ ships supports four types of IOCs: 206 | 207 | * IPv4 addresses 208 | * domain names 209 | * md5s 210 | 211 | CB Response 5.0+ supports all 4.0 IOCs and adds one additional type: 212 | 213 | * query - this contains query related to modules or events 214 | 215 | CB Response 6.1+ supports all 5.0 IOCs and adds one additional type: 216 | 217 | * ipv6 addresses 218 | 219 | The CB Response 7.0+ server adds support for one new type of indicator: 220 | 221 | * Binary SHA-256 222 | 223 | The CB Response 7.3+ server adds support for two new types of indicator: 224 | 225 | * Ja3 hash 226 | * Ja3s hash 227 | 228 | `iocs` is a structure with one or more of these entries: 229 | 230 | | name | status | description | 231 | | -------------- | -------- |-------------| 232 | | `dns` | _OPTIONAL_ | A list of domain names| 233 | | `ipv4` | _OPTIONAL_ | A list of IPv4 addresses in dotted decimal form| 234 | | `ipv6` | _OPTIONAL_ | A list of IPv6 addresses| 235 | | `ja3` | _OPTIONAL_ | A list of ja3 hashes (md5)| 236 | | `ja3s` | _OPTIONAL_ | A list of ja3s hashes (md5)| 237 | | `md5` | _OPTIONAL_ | A list of md5s| 238 | | `query` | _OPTIONAL_ | A query of type "events" or "modules"| 239 | | `sha256` | _OPTIONAL_ | A list of sha-256s| 240 | 241 | An example `reports` list with two `report` structures, each with one IPv4 IOC, from the example_tor.py script: 242 | 243 | ``` 244 | "reports": [ 245 | { 246 | "timestamp": 1380773388, 247 | "iocs": { 248 | "ipv4": [ 249 | "100.2.142.8" 250 | ] 251 | }, 252 | "link": "https://www.dan.me.uk/tornodes", 253 | "id": "TOR-Node-100.2.142.8", 254 | "title": "As of Wed Oct 2 20:09:48 2013 GMT, 100.2.142.8 has been a TOR exit for 26 days, 0:44:42. Contact: Adam Langley " 255 | }, 256 | { 257 | "timestamp": 1380773388, 258 | "iocs": { 259 | "ipv4": [ 260 | "100.4.7.69" 261 | ] 262 | }, 263 | "link": "https://www.dan.me.uk/tornodes", 264 | "id": "TOR-Node-100.4.7.69", 265 | "title": "As of Wed Oct 2 20:09:48 2013 GMT, 100.4.7.69 has been a TOR exit for 61 days, 2:07:23. Contact: GPG KeyID: 0x1F40CBDC Jeremy " 266 | } 267 | ] 268 | ``` 269 | Another example with "query" IOC: 270 | 271 | ``` 272 | "reports": 273 | [ 274 | { 275 | "title": "Notepad processes", 276 | "timestamp": 1388538906, 277 | "iocs": { 278 | "query": [ 279 | { 280 | "index_type": "events", 281 | "search_query": "cb.urlver=1&q=process_name%3Anotepad.exe" 282 | } 283 | ] 284 | }, 285 | "score": 50, 286 | "link": "http://www.myfeedserver/feed/report/notepad_proc", 287 | 288 | "id": "notepad_proc" 289 | }, 290 | { 291 | "title": "Newly loaded modules", 292 | "timestamp": 1388570000, 293 | "iocs": 294 | { 295 | "query": [ 296 | { 297 | "index_type": "modules", 298 | "search_query": "cb.urlver=1&q=is_executable_image%3Afalse" 299 | } 300 | ] 301 | }, 302 | "score": 50, 303 | 304 | "link": "http://www.dxmtest1.org/02", 305 | "id": "new_mod_loads" 306 | } 307 | ] 308 | ``` 309 | ## Validation criteria for "query" IOC reports 310 | Following conditions apply for "query" IOC reports 311 | 312 | * the "iocs" element can only contain one "query" element 313 | * only "events" and "modules" are valid values for "index_type" element 314 | * a report with a query CANNOT also have other IOCs 315 | 316 | The "search_query" syntax is particularly noteworthy. The following conditions apply for the "search_query" field: 317 | 318 | * the "search_query" syntax is described in CB Enterprise Server Query Overview documentation 319 | * the query itself should be prepended with a q= 320 | * the query should be percent-encoded. This can be accomplished in several ways, including: 321 | * by copying a query from the Carbon Black UI 322 | * by using a quoting library such as included with python in urllib. 323 | * by using the included percent_encode_query.py script 324 | 325 | As with all feeds, it is highly recommended to provide initial validation of the feed with the included validate_feed.py script. For any feeds that include query IOCs, it is recommended to run feed_query_validate.py in the cbapi github repo. 326 | 327 | ## Performance ramifications of "query" IOC reports 328 | 329 | Queries IOCs impose a much higher performance cost on the CB Response Server than md5, dns, and ip IOCs. Furthermore, the relative costs of queries can very signficantly. As a general rule, 'events' queries are more expensive than 'modules' queries. The use of wildcards, long paths, joined seearches, or multiple terms are also expensive. 330 | 331 | It is recommended that feed developers take care in constructing query IOCs and test against representative server prior to deploying in production. 332 | 333 | ## Feed Synchronization 334 | 335 | The CB Response server periodically synchronizes enabled feeds. There are two types of feed synchronization: 336 | 337 | * Incremental 338 | * Full 339 | 340 | Incremental synchronization updates any new reports and reports with updated timestamps. Deleted reports and those reports which have been changed, but without a change to the report timestamp, are not synchronized. 341 | 342 | Full synchronization accounts for all feed changes, even when the report timestamp is not changed or a report is deleted. 343 | 344 | Full synchronization occurs less frequently than incremental synchronization. It can be triggered manually via the web console or via the Carbon Black Client API. Alternatively, the following practices will result in all report changes being synchronized via incremental synchronization: 345 | 346 | * Update all report timestamps whenever there is a change to the report. The accuracy of the timestamp is less important than the fact that the timestamp increases. 347 | * For reports to be deleted, remove all IOCs from the report and update the timestamp rather than removing the report. 348 | 349 | ## Examples 350 | 351 | Several example scripts are included in the 'example' subdirectory. These example scripts illustrate using the Carbon Black cbfeeds API to generate Carbon Black feeds from a variety of data sources. 352 | 353 | | directory | name | description | 354 | | --------- | --------------- | ------------| 355 | | abuse_ch | abuse.ch | The Swiss security blog abuse.ch tracks C&C servers for Zeus, SpyEye and Palevo malware.| 356 | | isight | iSIGHT Partners | iSIGHT Partners customers can use their API key to generate a Carbon Black feed from iSIGHT Partners cyber threat intelligence.| 357 | | mdl | Malware Domain List | Malware Domain List is a non-commercial community project to track domains used by malware.| 358 | | raw | raw | Build a Carbon Black feed from a raw list of IOCs.| 359 | | tor | Tor | Provide a Carbon Black feed from a live list of Tor exit nodes provided by torproject.org| 360 | -------------------------------------------------------------------------------- /test/test_03_cbfeedinfo.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # VMware Carbon Black EDR Taxii Connector © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | 6 | import base64 7 | import os 8 | 9 | import cbfeeds 10 | from cbfeeds.feed import CbFeedInfo 11 | from common import TestCommon 12 | 13 | RESOURCES = os.path.abspath(os.path.join(os.path.dirname(__file__), "resources")) 14 | 15 | 16 | class TestCbFeedInfoMethods(TestCommon): 17 | """ 18 | Validate the methods in the CbFeedInfo class. 19 | """ 20 | 21 | def test_00a_init_unknown_key(self): 22 | """ 23 | Verify that an initialized feedinfo object only retains known keys. 24 | """ 25 | info, _ = self._load_feed_file() 26 | info['feedinfo']['foobar'] = "should vanish" 27 | cfi = CbFeedInfo(**info['feedinfo']) 28 | assert "foobar" not in cfi.data 29 | 30 | def test_00b_init_unknown_key_strict(self): 31 | """ 32 | Verify that an initialized feedinfo object only retains known keys. 33 | """ 34 | info, _ = self._load_feed_file() 35 | info['feedinfo']['foobar'] = "should vanish" 36 | try: 37 | CbFeedInfo(strict=True, **info['feedinfo']) 38 | self.fail("Did not get expected exception!") 39 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 40 | assert "Feedinfo includes unknown field: foobar" in err.args[0] 41 | 42 | def test_00c_validate_unknown_key_unstrict(self): 43 | """ 44 | Verify that validate with strict=False will turn off strictness in addition to validation. 45 | """ 46 | info, _ = self._load_feed_file() 47 | cfi = CbFeedInfo(strict=True, **info['feedinfo']) 48 | cfi._data['foobar'] = "should vanish" 49 | cfi.validate(strict=False) 50 | assert "foobar" not in cfi.data 51 | assert not cfi.strict 52 | 53 | def test_00d_validate_unknown_key_strict(self): 54 | """ 55 | Verify that validate with strict=True will turn on strictness in addition to validation. 56 | """ 57 | info, _ = self._load_feed_file() 58 | cfi = CbFeedInfo(**info['feedinfo']) 59 | cfi._data['foobar'] = "should vanish" 60 | try: 61 | cfi.validate(strict=True) 62 | self.fail("Did not get expected exception!") 63 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 64 | assert "Feedinfo includes unknown field: foobar" in err.args[0] 65 | assert cfi.strict 66 | 67 | def test_01a_update_unknown_key(self): 68 | """ 69 | Verify that updated feedinfo data only retains known keys. 70 | """ 71 | info, _ = self._load_feed_file() 72 | cfi = CbFeedInfo(**info['feedinfo']) 73 | info['feedinfo']['foobar'] = "should vanish" 74 | cfi.data = info['feedinfo'] 75 | assert "foobar" not in cfi.data 76 | 77 | def test_01b_neg_update_unknown_key_strict(self): 78 | """ 79 | Verify that updated feedinfo data only retains known keys. 80 | """ 81 | info, _ = self._load_feed_file() 82 | cfi = CbFeedInfo(strict=True, **info['feedinfo']) 83 | info['feedinfo']['foobar'] = "should vanish" 84 | try: 85 | cfi.data = info['feedinfo'] 86 | self.fail("Did not get expected exception!") 87 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 88 | assert "Feedinfo includes unknown field: foobar" in err.args[0] 89 | 90 | # ----- Icon checks when data is initialized/updated ------------------------------ 91 | 92 | # NOTE: both icon and icon_small go through the same checks for validity, so these tests are not duplicated 93 | 94 | def test_02a_init_icon_path(self): 95 | """ 96 | Verify that a path supplied for icon is read and the contents used for the icon. 97 | """ 98 | info, _ = self._load_feed_file() 99 | info['feedinfo']['icon'] = os.path.join(RESOURCES, "taxii-logov2.png") 100 | cfi = CbFeedInfo(**info['feedinfo']) 101 | assert cfi.data['icon'] != info['feedinfo']['icon'] 102 | 103 | def test_02b_neg_init_icon_path_invalid(self): 104 | """ 105 | On initialization, detect an icon path that does not exist. 106 | """ 107 | info, _ = self._load_feed_file() 108 | info['feedinfo']['icon'] = "./foobar.png" 109 | try: 110 | CbFeedInfo(validate=False, **info['feedinfo']) 111 | self.fail("Did not get expected exception!") 112 | except cbfeeds.exceptions.CbIconError as err: 113 | assert "Unknown error reading/encoding icon data" in err.args[0] 114 | 115 | def test_02c_neg_init_icon_path_unreadable(self): 116 | """ 117 | On initialization, detect an icon path that cannot be read. 118 | """ 119 | source = "./foobar.png" 120 | with open(source, 'w') as fp: 121 | fp.write("Text that won't be read") 122 | os.chmod(source, 0o000) 123 | 124 | info, _ = self._load_feed_file() 125 | info['feedinfo']['icon'] = source 126 | try: 127 | CbFeedInfo(validate=False, **info['feedinfo']) 128 | self.fail("Did not get expected exception!") 129 | except cbfeeds.exceptions.CbIconError as err: 130 | assert "Permission denied" in err.args[0] 131 | finally: 132 | os.chmod(source, 0o777) 133 | os.remove(source) 134 | 135 | def test_02d_neg_init_icon_data_invalid_bad_padding(self): 136 | """ 137 | Verify that bad encoding for the icon field is detected. 138 | """ 139 | info, _ = self._load_feed_file() 140 | info['feedinfo']['icon'] = info['feedinfo']['icon'][:-2] 141 | try: 142 | CbFeedInfo(**info['feedinfo']) 143 | self.fail("Did not get expected exception!") 144 | except cbfeeds.exceptions.CbIconError as err: 145 | assert "Unknown error reading/encoding icon data: Incorrect padding" in err.args[0] 146 | 147 | def test_02e_neg_init_icon_data_invalid_bad_encoding(self): 148 | """ 149 | Verify that bad encoding for the icon field is detected. 150 | """ 151 | info, _ = self._load_feed_file() 152 | info['feedinfo']['icon'] = info['feedinfo']['icon'] + "%$" 153 | try: 154 | CbFeedInfo(**info['feedinfo']) 155 | self.fail("Did not get expected exception!") 156 | except cbfeeds.exceptions.CbIconError as err: 157 | assert "Unknown error reading/encoding icon data: Non-base64 digit found" in err.args[0] 158 | 159 | def test_02f_neg_init_icon_not_str(self): 160 | """ 161 | Verify that a non-string entry for icon is detected. 162 | """ 163 | info, _ = self._load_feed_file() 164 | info['feedinfo']['icon'] = 12345 165 | try: 166 | CbFeedInfo(**info['feedinfo']) 167 | self.fail("Did not get expected exception!") 168 | except cbfeeds.exceptions.CbIconError as err: 169 | assert "`icon` field is not a string (path or base64 data)" in err.args[0] 170 | 171 | # ----- validate() method testing -------------------------------------------------- 172 | 173 | def test_03a_neg_validate_display_name_missing(self): 174 | """ 175 | Verify that missing "display_name" is detected. 176 | """ 177 | info, _ = self._load_feed_file() 178 | del info['feedinfo']['display_name'] 179 | try: 180 | CbFeedInfo(**info['feedinfo']) 181 | self.fail("Did not get expected exception!") 182 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 183 | assert "FeedInfo missing required field(s): display_name" in err.args[0] 184 | 185 | def test_03b_neg_validate_name_missing(self): 186 | """ 187 | Verify that missing "name" is detected. 188 | """ 189 | info, _ = self._load_feed_file() 190 | del info['feedinfo']['name'] 191 | try: 192 | CbFeedInfo(**info['feedinfo']) 193 | self.fail("Did not get expected exception!") 194 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 195 | assert "FeedInfo missing required field(s): name" in err.args[0] 196 | 197 | def test_03c_neg_validate_provider_url_missing(self): 198 | """ 199 | Verify that missing "provider_url" is detected. 200 | """ 201 | info, _ = self._load_feed_file() 202 | del info['feedinfo']['provider_url'] 203 | try: 204 | CbFeedInfo(**info['feedinfo']) 205 | self.fail("Did not get expected exception!") 206 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 207 | assert "FeedInfo missing required field(s): provider_url" in err.args[0] 208 | 209 | def test_03d_neg_validate_summary_missing(self): 210 | """ 211 | Verify that missing "summary" is detected. 212 | """ 213 | info, _ = self._load_feed_file() 214 | del info['feedinfo']['summary'] 215 | try: 216 | CbFeedInfo(**info['feedinfo']) 217 | self.fail("Did not get expected exception!") 218 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 219 | assert "FeedInfo missing required field(s): summary" in err.args[0] 220 | 221 | def test_03e_neg_validate_tech_data_missing(self): 222 | """ 223 | Verify that missing "tech_data" is detected. 224 | """ 225 | info, _ = self._load_feed_file() 226 | del info['feedinfo']['tech_data'] 227 | try: 228 | CbFeedInfo(**info['feedinfo']) 229 | self.fail("Did not get expected exception!") 230 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 231 | assert "FeedInfo missing required field(s): tech_data" in err.args[0] 232 | 233 | def test_04a_validate_optional_category_missing(self): 234 | """ 235 | Verify that missing optional "category" is allowed. 236 | """ 237 | info, _ = self._load_feed_file() 238 | del info['feedinfo']['category'] 239 | cfi = CbFeedInfo(**info['feedinfo']) 240 | assert 'category' not in cfi.data 241 | 242 | def test_04b_validate_optional_icon_missing(self): 243 | """ 244 | Verify that missing optional "icon" is allowed. 245 | """ 246 | info, _ = self._load_feed_file() 247 | del info['feedinfo']['icon'] 248 | cfi = CbFeedInfo(**info['feedinfo']) 249 | assert 'icon' not in cfi.data 250 | 251 | def test_04c_validate_optional_icon_small_missing(self): 252 | """ 253 | Verify that missing optional "icon_small" is allowed. 254 | """ 255 | info, _ = self._load_feed_file() 256 | del info['feedinfo']['icon_small'] 257 | cfi = CbFeedInfo(**info['feedinfo']) 258 | assert 'icon_small' not in cfi.data 259 | 260 | def test_04d_validate_optional_provider_rating_missing(self): 261 | """ 262 | Verify that missing optional "provider_rating" is allowed. 263 | """ 264 | info, _ = self._load_feed_file() 265 | del info['feedinfo']['provider_rating'] 266 | cfi = CbFeedInfo(**info['feedinfo']) 267 | assert 'provider_rating' not in cfi.data 268 | 269 | def test_04e_validate_optional_version_missing(self): 270 | """ 271 | Verify that missing optional "version" is allowed. 272 | """ 273 | info, _ = self._load_feed_file() 274 | del info['feedinfo']['version'] 275 | cfi = CbFeedInfo(**info['feedinfo']) 276 | assert 'version' not in cfi.data 277 | 278 | # NOTE: both icon and icon_small go through the same checks for validity, so these tests are not duplicated 279 | 280 | def test_05a_neg_validate_icon_bad_data_not_image(self): 281 | """ 282 | Verify that bad data (not jpg, png or gif) for the icon field is detected. 283 | """ 284 | info, _ = self._load_feed_file() 285 | info['feedinfo']['icon'] = base64.b64encode(bytes("This is bad data!", "utf-8")).decode('ascii') 286 | try: 287 | CbFeedInfo(**info['feedinfo']) 288 | self.fail("Did not get expected exception!") 289 | except cbfeeds.exceptions.CbIconError as err: 290 | assert "Supplied data does not appear to be a usable image format" in err.args[0] 291 | 292 | def test_05b_validate_icon_not_str(self): 293 | """ 294 | Verify that bad data not a string for icon field (should have been converted to base64 string) 295 | """ 296 | info, _ = self._load_feed_file() 297 | cfi = CbFeedInfo(**info['feedinfo']) 298 | # noinspection PyTypeChecker 299 | cfi.data['icon'] = bytes(info['feedinfo']['icon'], 'ascii') 300 | try: 301 | cfi.validate() 302 | self.fail("Did not get expected exception!") 303 | except cbfeeds.exceptions.CbIconError as err: 304 | assert "Icon must be string of base64 data" in err.args[0] 305 | 306 | def test_06a_neg_validate_provider_rating_not_numeric(self): 307 | """ 308 | Verify that provider_rating with a non-numeric value is detected. 309 | """ 310 | info, _ = self._load_feed_file() 311 | info['feedinfo']['provider_rating'] = "foobar" 312 | try: 313 | CbFeedInfo(**info['feedinfo']) 314 | self.fail("Did not get expected exception!") 315 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 316 | assert "FeedInfo field `provider_rating` must be int or float" in err.args[0] 317 | 318 | def test_06b_neg_validate_version_not_numeric(self): 319 | """ 320 | Verify that version with a non-numeric value is detected. 321 | """ 322 | info, _ = self._load_feed_file() 323 | info['feedinfo']['version'] = "foobar" 324 | try: 325 | CbFeedInfo(**info['feedinfo']) 326 | self.fail("Did not get expected exception!") 327 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 328 | assert "FeedInfo field `version` must be int or float" in err.args[0] 329 | 330 | def test_07a_neg_validate_category_not_str_or_bytes(self): 331 | """ 332 | Verify that category with a non-string value is detected. 333 | """ 334 | info, _ = self._load_feed_file() 335 | info['feedinfo']['category'] = 4 336 | try: 337 | CbFeedInfo(**info['feedinfo']) 338 | self.fail("Did not get expected exception!") 339 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 340 | assert "FeedInfo field `category` must be str or bytes" in err.args[0] 341 | 342 | def test_07b_neg_validate_display_name_not_str_or_bytes(self): 343 | """ 344 | Verify that display_name with a non-string value is detected. 345 | """ 346 | info, _ = self._load_feed_file() 347 | info['feedinfo']['display_name'] = 4 348 | try: 349 | CbFeedInfo(**info['feedinfo']) 350 | self.fail("Did not get expected exception!") 351 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 352 | assert "FeedInfo field `display_name` must be str or bytes" in err.args[0] 353 | 354 | def test_07c_neg_validate_name_not_str_or_bytes(self): 355 | """ 356 | Verify that name with a non-string value is detected. 357 | """ 358 | info, _ = self._load_feed_file() 359 | info['feedinfo']['name'] = 4 360 | try: 361 | CbFeedInfo(**info['feedinfo']) 362 | self.fail("Did not get expected exception!") 363 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 364 | assert "FeedInfo field `name` must be str or bytes" in err.args[0] 365 | 366 | def test_07d_neg_validate_provider_url_not_str_or_bytes(self): 367 | """ 368 | Verify that provider_url with a non-string value is detected. 369 | """ 370 | info, _ = self._load_feed_file() 371 | info['feedinfo']['provider_url'] = 4 372 | try: 373 | CbFeedInfo(**info['feedinfo']) 374 | self.fail("Did not get expected exception!") 375 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 376 | assert "FeedInfo field `provider_url` must be str or bytes" in err.args[0] 377 | 378 | def test_07e_neg_validate_summary_not_str_or_bytes(self): 379 | """ 380 | Verify that summary with a non-string value is detected. 381 | """ 382 | info, _ = self._load_feed_file() 383 | info['feedinfo']['summary'] = 4 384 | try: 385 | CbFeedInfo(**info['feedinfo']) 386 | self.fail("Did not get expected exception!") 387 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 388 | assert "FeedInfo field `summary` must be str or bytes" in err.args[0] 389 | 390 | def test_07f_neg_validate_tech_data_not_str_or_bytes(self): 391 | """ 392 | Verify that tech_data with a non-string value is detected. 393 | """ 394 | info, _ = self._load_feed_file() 395 | info['feedinfo']['tech_data'] = 4 396 | try: 397 | CbFeedInfo(**info['feedinfo']) 398 | self.fail("Did not get expected exception!") 399 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 400 | assert "FeedInfo field `tech_data` must be str or bytes" in err.args[0] 401 | 402 | def test_08a_neg_validate_category_empty_string(self): 403 | """ 404 | Verify that category with a empty string value is detected. 405 | """ 406 | info, _ = self._load_feed_file() 407 | info['feedinfo']['category'] = "" 408 | try: 409 | CbFeedInfo(**info['feedinfo']) 410 | self.fail("Did not get expected exception!") 411 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 412 | assert "The 'category' field must not be an empty string" in err.args[0] 413 | 414 | def test_08b_neg_validate_display_name_empty_string(self): 415 | """ 416 | Verify that display_name with a empty string value is detected. 417 | """ 418 | info, _ = self._load_feed_file() 419 | info['feedinfo']['display_name'] = "" 420 | try: 421 | CbFeedInfo(**info['feedinfo']) 422 | self.fail("Did not get expected exception!") 423 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 424 | assert "The 'display_name' field must not be an empty string" in err.args[0] 425 | 426 | def test_08c_neg_validate_name_empty_string(self): 427 | """ 428 | Verify that name with a empty string value is detected. 429 | """ 430 | info, _ = self._load_feed_file() 431 | info['feedinfo']['name'] = "" 432 | try: 433 | CbFeedInfo(**info['feedinfo']) 434 | self.fail("Did not get expected exception!") 435 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 436 | assert "The 'name' field must not be an empty string" in err.args[0] 437 | 438 | def test_08d_neg_validate_summary_empty_string(self): 439 | """ 440 | Verify that summary with a empty string value is detected. 441 | """ 442 | info, _ = self._load_feed_file() 443 | info['feedinfo']['summary'] = "" 444 | try: 445 | CbFeedInfo(**info['feedinfo']) 446 | self.fail("Did not get expected exception!") 447 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 448 | assert "The 'summary' field must not be an empty string" in err.args[0] 449 | 450 | def test_08e_neg_validate_tech_data_empty_string(self): 451 | """ 452 | Verify that tech_data with a empty string value is detected. 453 | """ 454 | info, _ = self._load_feed_file() 455 | info['feedinfo']['tech_data'] = "" 456 | try: 457 | CbFeedInfo(**info['feedinfo']) 458 | self.fail("Did not get expected exception!") 459 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 460 | assert "The 'tech_data' field must not be an empty string" in err.args[0] 461 | 462 | def test_09_neg_validate_name_alphanumeric(self): 463 | """ 464 | Verify that name with a non alphanumeric string value is detected. 465 | """ 466 | info, _ = self._load_feed_file() 467 | info['feedinfo']['name'] = "invalid_name" 468 | try: 469 | CbFeedInfo(**info['feedinfo']) 470 | self.fail("Did not get expected exception!") 471 | except cbfeeds.exceptions.CbInvalidFeedInfo as err: 472 | assert "Feed name `invalid_name` may only contain a-z, A-Z, 0-9 and must have one character" in err.args[0] 473 | -------------------------------------------------------------------------------- /cbfeeds/feed.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # Carbon Black EDR Copyright © 2013-2020 VMware, Inc. All Rights Reserved. 3 | ################################################################################ 4 | 5 | import base64 6 | import imghdr 7 | import ipaddress 8 | import json 9 | import logging 10 | import os 11 | import re 12 | import tempfile 13 | import time 14 | from typing import Any, Dict, Generator, List, Optional, Tuple, Union 15 | 16 | from cbfeeds import CbIconError, CbInvalidFeed, CbInvalidFeedInfo, CbInvalidReport 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | class CbFeedInfo(object): 22 | """ 23 | Class to handle the data in the feedinfo section of a feed. 24 | """ 25 | 26 | def __init__(self, validate: bool = True, strict: bool = False, **kwargs): 27 | """ 28 | Initialize the class. Any keys that are not required or optional will be ignored. 29 | 30 | :param validate: If True, validate after initialization (default True) 31 | :param strict: If True, raise exception on unknown fields instead of dropping them 32 | :param kwargs: feedinfo data as a dict 33 | """ 34 | # internal data 35 | self._data: Dict[str, Union[str, int, float]] = {} 36 | 37 | # these fields are required in every feed descriptor 38 | self.required = ["display_name", "name", "provider_url", "summary", "tech_data", ] 39 | 40 | # optional, my not be in every feed 41 | self.optional = ["category", "icon", "icon_small", "provider_rating", "version"] 42 | 43 | # these fields are expected to be numeric 44 | self.is_numeric = ["provider_rating", "version"] 45 | 46 | # these fields are strings that cannot be empty 47 | self.noemptystrings = ["name", "display_name", "summary", "tech_data", "category"] 48 | 49 | self.strict = strict 50 | 51 | self.data = kwargs 52 | if validate: 53 | self.validate() 54 | 55 | def __str__(self): 56 | """Return a descriptive string of the object.""" 57 | return f"CbFeedInfo({self._data.get('name', 'unnamed')})" 58 | 59 | def __repr__(self): 60 | """Return the canonical string representation of the object.""" 61 | return repr(self._data) 62 | 63 | @property 64 | def data(self) -> Dict[str, Union[str, int, float]]: 65 | """ 66 | :return: the internally stored value 67 | """ 68 | return self._data 69 | 70 | @data.setter 71 | def data(self, new_data: Dict[str, Union[str, int, float]]) -> None: 72 | """ 73 | Update the internal data, ignoring unknown keys. 74 | 75 | :param new_data: new structure to update data with 76 | """ 77 | self._data = new_data 78 | 79 | pruner = [] 80 | for key in self._data.keys(): 81 | if key not in self.required and key not in self.optional: 82 | if self.strict: 83 | raise CbInvalidFeedInfo(f"Feedinfo includes unknown field: {key}") 84 | else: 85 | pruner.append(key) 86 | for item in pruner: 87 | del self._data[item] 88 | logger.debug(f"Pruned unknown field `{item}` from feedinfo") 89 | 90 | def is_base64(data: str, strict: bool = False) -> Tuple[bool, Optional[str]]: 91 | try: 92 | if isinstance(data, str): 93 | use_data = data 94 | elif isinstance(data, bytes): 95 | use_data = data.decode('ascii') 96 | else: 97 | raise CbIconError("Data must be str or bytes in base64 encoding format") 98 | check = base64.b64encode(base64.b64decode(use_data, validate=strict)).decode('ascii') == use_data 99 | return check, None 100 | except Exception as err2: 101 | return False, f"{err2}" 102 | 103 | # NOTE: if they are present, the icon fields could just be paths to actual data (for convenience) 104 | 105 | for icon_field in ["icon", "icon_small"]: 106 | if icon_field in self._data and self._data[icon_field] is not None and self._data[icon_field] != "": 107 | if not isinstance(self._data[icon_field], (str, bytes)): 108 | raise CbIconError(f"`{icon_field}` field is not a string (path or base64 data)") 109 | 110 | # Check to see if it is base64 encodable data (no strict check) 111 | if is_base64(self._data[icon_field])[0]: # looks to be valid base64, as far as we can tell 112 | continue 113 | 114 | # Failed decoding check, check for path 115 | if os.path.exists(self._data[icon_field]): 116 | icon_path = self._data.pop(icon_field) 117 | try: 118 | with open(icon_path, "rb") as icon_file: 119 | self._data[icon_field] = base64.b64encode(icon_file.read()).decode('ascii') 120 | except Exception as err: 121 | raise CbIconError(f"Unknown error reading/encoding {icon_field} data: {err}") 122 | 123 | # not a path, may be data 124 | ok, err = is_base64(self._data[icon_field], strict=True) 125 | if not ok: 126 | raise CbIconError(f"Unknown error reading/encoding {icon_field} data: {err}") 127 | 128 | # -------------------------------------------------- 129 | 130 | def validate(self, strict: bool = None) -> None: 131 | """ 132 | Perform a set of checks to validate data before we export the feed. 133 | 134 | :param strict: If True or False, changes srict setting of class; True raises exception on non-CB fields, False 135 | prunes them 136 | :raises: CbInvalidFeed if there are validation problems 137 | """ 138 | if strict is not None: 139 | if isinstance(strict, bool): 140 | self.strict = strict 141 | else: 142 | raise TypeError("`strict` parameter must be a boolean") 143 | self.data = self._data # re-asess 144 | 145 | # verify that all required fields are there 146 | if not all([x in self.data.keys() for x in self.required]): 147 | missing_fields = ", ".join(set(self.required).difference(set(self.data.keys()))) 148 | raise CbInvalidFeedInfo("FeedInfo missing required field(s): %s" % missing_fields) 149 | 150 | # check to see if icon_field is a string or bytes base64 decoded 151 | for icon_field in ["icon", "icon_small"]: 152 | if icon_field in self.data: 153 | try: 154 | # If there's any bytes or unicode here, an exception will be thrown 155 | if not isinstance(self.data[icon_field], str): 156 | raise CbIconError("Icon must be string of base64 data") 157 | 158 | # check data for image information 159 | tf = tempfile.NamedTemporaryFile() 160 | tf.write(base64.b64decode(self.data[icon_field])) 161 | tf.flush() 162 | what = imghdr.what(tf.name) 163 | if what not in ['png', 'gif', 'jpeg']: 164 | raise CbIconError(f"Supplied data does not appear to be a usable image format (is {what})") 165 | except TypeError as err: 166 | raise CbIconError("Icon must either be path or base64 data. \ 167 | Path does not exist and base64 decode failed with: %s" % err) 168 | 169 | # All fields in feedinfo must be strings unless otherwise stated 170 | for key in self.data.keys(): 171 | if key in self.is_numeric: 172 | if not isinstance(self.data[key], (int, float)): 173 | raise CbInvalidFeedInfo( 174 | f"FeedInfo field `{key}` must be int or float, not type {type(self.data[key])}") 175 | else: 176 | if not isinstance(self.data[key], (str, bytes)): 177 | raise CbInvalidFeedInfo( 178 | f"FeedInfo field `{key}` must be str or bytes, not type {type(self.data[key])}") 179 | 180 | # certain fields, when present, must not be empty strings 181 | for key in self.data.keys(): 182 | if key in self.noemptystrings and self.data[key] == "": 183 | raise CbInvalidFeedInfo(f"The '{key}' field must not be an empty string") 184 | 185 | # validate shortname of this field is just a-z and 0-9, with at least one character 186 | if not self.data["name"].isalnum(): 187 | raise CbInvalidFeedInfo(f"Feed name `{self.data['name']}` may only contain a-z, A-Z, " 188 | "0-9 and must have one character") 189 | 190 | 191 | class CbReport(object): 192 | """ 193 | Class to handle the data in the reports section of a feed. 194 | """ 195 | 196 | def __init__(self, allow_negative_scores: bool = False, validate: bool = True, strict: bool = False, **kwargs): 197 | """ 198 | Initialize the class. 199 | 200 | :param allow_negative_scores: If True, allow for negative scores 201 | :param validate: If True, validate 202 | :param strict: If True, raise exception on unknown fields instead of dropping them 203 | :param kwargs: actual report data 204 | """ 205 | # negative scores introduced in CB 4.2 206 | # negative scores indicate a measure of "goodness" versus "badness" 207 | self.allow_negative_scores = allow_negative_scores 208 | 209 | # these fields are required in every report 210 | self.required = ["iocs", "timestamp", "link", "title", "id", "score"] 211 | 212 | # these fields must be of type string 213 | self.typestring = ["link", "title", "id", "description"] 214 | 215 | # these fields must be of type int 216 | self.typeint = ["timestamp", "score"] 217 | 218 | # these fields are optional 219 | self.optional = ["tags", "description"] 220 | 221 | # valid IOC types are "md5", "ipv4", "dns", "query" 222 | self.valid_ioc_types = ["md5", "sha256", "ipv4", "ipv6", "dns", "query", "ja3", "ja3s"] 223 | 224 | # valid index_type options for "query" IOC 225 | self.valid_query_ioc_types = ["events", "modules"] 226 | 227 | # valid query fields 228 | self.valid_query_fields = ["index_type", "search_query"] 229 | 230 | if "timestamp" not in kwargs: 231 | kwargs["timestamp"] = int(time.mktime(time.gmtime())) 232 | 233 | self.strict = strict 234 | self._rid = f"Report '" + f"{kwargs.get('id', '???')}" + "'" # for exception identification 235 | 236 | self.data = kwargs 237 | if validate: 238 | self.validate() 239 | 240 | def __str__(self): 241 | """Return a descriptive string of the object.""" 242 | return "CbReport(%s)" % (self.data.get("title", self.data.get("id", ''))) 243 | 244 | def __repr__(self): 245 | """Return the canonical string representation of the object.""" 246 | return repr(self.data) 247 | 248 | @property 249 | def data(self) -> Dict[str, Union[str, int, Dict, List]]: 250 | """ 251 | :return: the internally stored value 252 | """ 253 | return self._data 254 | 255 | @data.setter 256 | def data(self, new_data: Dict[str, Union[str, int, Dict, List]]) -> None: 257 | """ 258 | Update the internal data, ignoring unknown keys. 259 | 260 | :param new_data: new structure to update data with 261 | """ 262 | self._data = new_data 263 | 264 | pruner = [] 265 | for key, value in new_data.items(): 266 | if key not in self.required and key not in self.optional: 267 | if self.strict: 268 | raise CbInvalidReport(f"Report includes unknown field: {key}") 269 | else: 270 | pruner.append(key) 271 | 272 | # handle query dict 273 | if key == "iocs": 274 | if isinstance(value, Dict): 275 | for key2, value2 in value.items(): 276 | if key2 == "query" and isinstance(value2, Dict): # cope with bad data (for now) 277 | pruner2 = [] 278 | for key3 in value2.keys(): 279 | if key3 not in self.valid_query_fields: 280 | if self.strict: 281 | raise CbInvalidReport(f"{self._rid}, field 'ioc' query includes" 282 | f" unknown field: {key3}") 283 | else: 284 | pruner2.append(key3) 285 | for item in pruner2: 286 | del self._data[key][key2][item] 287 | logger.debug(f"Pruned unknown query ioc field `{item}` from report") 288 | 289 | for item in pruner: 290 | del self._data[item] 291 | logger.debug(f"Pruned unknown field `{item}` from feedinfo") 292 | 293 | # -------------------------------------------------- 294 | 295 | def validate(self, strict: bool = None) -> None: 296 | """ 297 | Perform a set of checks to validate report data. 298 | 299 | :param strict: If True or False, changes srict setting of class; True raises exception on non-CB fields, False 300 | prunes them 301 | :raises: CbInvalidReport if there are validation problems 302 | """ 303 | if strict is not None: 304 | if isinstance(strict, bool): 305 | self.strict = strict 306 | else: 307 | raise TypeError("`strict` parameter must be a boolean") 308 | self.data = self._data # re-asess 309 | 310 | # validate we have all required keys 311 | if not all([x in self.data.keys() for x in self.required]): 312 | missing_fields = ", ".join(set(self.required).difference(set(self.data.keys()))) 313 | raise CbInvalidReport(f"Report missing required field(s): {missing_fields}") 314 | 315 | # CBAPI-36 316 | # verify that all fields that should be strings are strings or bytes 317 | for key in self.typestring: 318 | if key in self.data.keys(): 319 | if not isinstance(self.data[key], (str, bytes)): 320 | raise CbInvalidReport(f"{self._rid}, field '{key}', must be of type str or bytes, but seems to" 321 | f" be of type {type(self.data[key])}") 322 | 323 | # verify that all fields that should be ints are ints 324 | for key in self.typeint: 325 | if key in self.data.keys(): 326 | if not isinstance(self.data[key], (int, float)): 327 | raise CbInvalidReport(f"{self._rid}, field '{key}', must be an int") 328 | else: 329 | self.data[key] = int(self.data[key]) # make sure it's int 330 | 331 | # validate that tags is a list of alphanumeric strings 332 | if "tags" in self.data.keys(): 333 | if not isinstance(self.data["tags"], List): 334 | raise CbInvalidReport(f"{self._rid}, field 'tags', must be a list of str") 335 | for tag in self.data["tags"]: 336 | if not isinstance(tag, str): 337 | raise CbInvalidReport(f"{self._rid}, field 'tag', has entry not a string ({tag}, type {type(tag)})") 338 | 339 | if tag.lower() == "event_query": # the one exception 340 | pass 341 | else: 342 | if len(tag) > 32 or len(tag) < 1: 343 | raise CbInvalidReport(f"{self._rid}, field 'tag', has an entry that is not 1-32" 344 | f" characters in length ({tag})") 345 | if not str(tag).isalnum(): 346 | raise CbInvalidReport( 347 | f"{self._rid}, field 'tag', has an entry that is not alphanumeric ({tag})") 348 | 349 | # validate score is integer between -100 (if so specified) or 0 and 100 350 | bottom = -100 if self.allow_negative_scores else 0 351 | if not self.allow_negative_scores and self.data["score"] < 0: 352 | raise CbInvalidReport(f"{self._rid}, field 'score' ({self.data['score']}), out of range {bottom} to 100") 353 | 354 | if self.data["score"] < -100 or self.data["score"] > 100: 355 | raise CbInvalidReport(f"{self._rid}, field 'score' ({self.data['score']}), out of range {bottom} to 100") 356 | 357 | # validate id of this report is just a-z and 0-9 and - and ., with at least one character 358 | if not re.match("^[a-zA-Z0-9-_.]+$", self.data["id"]): 359 | raise CbInvalidReport( 360 | f"{self._rid} (the id) is invalid and may only contain a-z, A-Z, 0-9, or one of [-_.]") 361 | 362 | # convenience variable for next tests 363 | iocs = self.data['iocs'] 364 | 365 | # validate that there are at least one type of ioc present 366 | if not isinstance(iocs, Dict): 367 | raise CbInvalidReport(f"{self._rid}, field 'iocs', has bad format (must be dict)") 368 | 369 | if len(iocs.keys()) == 0: 370 | raise CbInvalidReport(f"{self._rid}, field 'iocs', has no entries") 371 | 372 | # validate there is at least one IOC for each report and each IOC entry has at least one entry 373 | for key, item in iocs.items(): 374 | if key not in self.valid_ioc_types: 375 | raise CbInvalidReport(f"{self._rid}, field 'iocs', unknown ioc '{key}'") 376 | 377 | if key.lower() == "query": 378 | if not isinstance(item, Dict): 379 | raise CbInvalidReport(f"{self._rid}, field 'iocs', ioc '{key}', is not a dictionary") 380 | # NOTE: other query ioc testing below 381 | else: 382 | if not isinstance(item, List): 383 | raise CbInvalidReport(f"{self._rid}, field 'iocs', ioc '{key}', is not a list of str") 384 | if len(item) == 0: 385 | raise CbInvalidReport(f"{self._rid}, field 'iocs', ioc '{key}', must have at least 1 entry") 386 | for i in item: 387 | if not isinstance(i, str): 388 | raise CbInvalidReport( 389 | f"{self._rid}, field 'iocs', ioc '{key}', has non-str entry (({i}, type {type(i)})") 390 | 391 | # Let us check and make sure that for "query" ioc type does not contain other types of ioc 392 | query_ioc = "query" in iocs.keys() 393 | if query_ioc: 394 | extras = [] 395 | for key in iocs.keys(): 396 | if key not in ["query"]: 397 | extras.append(key) 398 | if len(extras) > 0: 399 | raise CbInvalidReport(f"{self._rid}, field 'iocs', has extra keys: {extras}") 400 | 401 | iocs_query = iocs["query"] # for cleaner code 402 | 403 | # validate that the index_type field exists 404 | if "index_type" not in iocs_query.keys(): 405 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'query' section missing 'index_type'") 406 | 407 | # validate that the index_type is a valid value 408 | if not iocs_query.get("index_type", None) in self.valid_query_ioc_types: 409 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'index_type' is not a known type" 410 | f" ({iocs_query.get('index_type', None)})") 411 | 412 | # validate that the search_query field exists 413 | if "search_query" not in iocs_query.keys(): 414 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'query' section missing 'search_query'") 415 | 416 | # validate that the search_query field is at least minimally valid 417 | # in particular, we are looking for a "q=" (process) or "cb.q.????=" (binary) 418 | # this is by no means a complete validation, but it does provide a protection 419 | # against leaving the actual query unqualified 420 | for item in iocs_query["search_query"]: 421 | if "q=" not in item and "cb.q." not in item: 422 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'query' has bad 'search_query': {item}") 423 | 424 | for kvpair in item.split('&'): 425 | if len(kvpair.split('=')) != 2: 426 | continue # ignore simple items 427 | qparts = kvpair.split('=') 428 | if qparts[0] == 'q' or qparts[0].startswith("cb.q."): 429 | self._is_valid_query(qparts[1]) 430 | 431 | # validate md5 hashes 432 | for md5 in iocs.get("md5", []): 433 | x = re.findall(r"^([a-fA-F\d]{32})$", md5) 434 | if len(x) == 0: 435 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'mp5' has invalid hash: {md5}") 436 | 437 | # validate ja3 hashes 438 | for ja3 in iocs.get("ja3", []): 439 | x = re.findall(r"^([a-fA-F\d]{32})$", ja3) 440 | if len(x) == 0: 441 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'ja3' has invalid hash: {ja3}") 442 | 443 | # validate ja3s hashes 444 | for ja3s in iocs.get("ja3s", []): 445 | x = re.findall(r"^([a-fA-F\d]{32})$", ja3s) 446 | if len(x) == 0: 447 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'ja3s' has invalid hash: {ja3s}") 448 | 449 | # validate sha256 hashes 450 | for sha256 in iocs.get("sha256", []): 451 | x = re.findall(r"^([a-fA-F\d]{64})$", sha256) 452 | if len(x) == 0: 453 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'sha256' has invalid hash: {sha256}") 454 | 455 | # validate ipv4 456 | for ipv4 in iocs.get("ipv4", []): 457 | try: 458 | ipaddress.ip_address(ipv4) 459 | except ValueError as err: 460 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'ipv4' value of {err}") 461 | 462 | # validate ipv6 463 | for ipv6 in iocs.get("ipv6", []): 464 | try: 465 | ipaddress.ip_address(ipv6) 466 | except ValueError as err: 467 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'ipv6' value of {err}") 468 | 469 | # validate domains 470 | # NOTE: as per spec: https://datatracker.ietf.org/doc/rfc1035/?include_text=1 471 | for dns in iocs.get("dns", []): 472 | if len(dns.strip()) == 0: 473 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'dns' is empty") 474 | if len(dns.strip()) > 253: 475 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'dns' exceeds maximum size of 253 characters") 476 | 477 | # break into octets 478 | parts = dns.split('.') 479 | if len(parts) == 1: 480 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'dns' value has too few octets ({dns})") 481 | 482 | # trailing . is valid, as per http://www.dns-sd.org/TrailingDotsInDomainNames.html 483 | if len(parts[-1]) == 0: 484 | parts = parts[:-2] # clip it 485 | 486 | # spec limits dns to 127 octets, will likely never hit this due to overall length checks 487 | if len(parts) > 127: 488 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'dns' value has too many octets ({dns})") 489 | 490 | # parts defined as per https://datatracker.ietf.org/doc/rfc1035/?include_text=1, section 2.3.1 491 | # However, examples draw upon sources that provide domains that seem to break this, so we will 492 | # loosen the strict validation. 493 | for part in parts: 494 | x = re.findall(r'^[a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9]?$', part) 495 | if len(x) == 0: 496 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'dns' is invalid : {dns}") 497 | 498 | def _is_valid_query(self, q: str) -> None: 499 | """ 500 | Make a determination as to if this is a valid query. 501 | 502 | :param q: query entry 503 | """ 504 | # the query itself must be percent-encoded 505 | # verify there are only non-reserved characters present 506 | # no logic to detect unescaped '%' characters 507 | for c in q: 508 | if c not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.~%*()": 509 | raise CbInvalidReport(f"{self._rid}, field 'iocs', 'query' has unescaped non-reserved character " 510 | f"'{c}' found in query; use percent-encoding") 511 | 512 | 513 | # -------------------------------------------------------------------------------- 514 | 515 | class CbJSONEncoder(json.JSONEncoder): 516 | """Custom JSON encoder for CbFeed.""" 517 | 518 | def default(self, o): 519 | return o.dump() 520 | 521 | 522 | class CbFeed(object): 523 | """ 524 | Class to hold feed information. 525 | """ 526 | 527 | def __init__(self, feedinfo: Union[CbFeedInfo, Dict[str, Union[str, int, float]]], 528 | reports: List[Union[CbReport, Dict[str, Union[str, int, Dict, List]]]]): 529 | """ 530 | Initialize the class. 531 | 532 | :param feedinfo: feedinfo portion of a feed, as dict or CbFeedInfo object 533 | :param reports: reports portion of a feed, as list of dict or list of CbReport objects 534 | """ 535 | # basic sanity check! 536 | if not isinstance(feedinfo, (Dict, CbFeedInfo)): 537 | raise CbInvalidFeed("The supplied `feedinfo` parameter does not appear to be a valid dictionary" 538 | f" or CbFeedInfo object (is {type(feedinfo)})") 539 | if not isinstance(reports, List): 540 | raise CbInvalidFeed(f"The supplied `reports` parameter does not appear to be a valid list" 541 | f" (is {type(reports)})") 542 | else: 543 | for item in reports: 544 | if not isinstance(item, (Dict, CbReport)): 545 | raise CbInvalidFeed(f"The `reports` parameter must be a list of dictionaries" 546 | f" or CbReport objects (saw {type(item)})") 547 | 548 | use_feed = feedinfo if isinstance(feedinfo, Dict) else feedinfo.data 549 | use_rep = [rep if isinstance(rep, Dict) else rep.data for rep in reports] 550 | 551 | # save raw data internally 552 | self.data = {'feedinfo': use_feed, 553 | 'reports': use_rep} 554 | 555 | def __repr__(self): 556 | """Return the canonical string representation of the object.""" 557 | return repr(self.data) 558 | 559 | def __str__(self): 560 | """Return a descriptive string of the object.""" 561 | return f"CbFeed({self.data.get('feedinfo', 'unknown')})" 562 | 563 | # -------------------------------------------------- 564 | 565 | def validate(self, serialized_data: str = None, strict: bool = False) -> None: 566 | """ 567 | Validates the feed information. 568 | 569 | :param serialized_data: serialized data for the feed (JSON string) 570 | :param strict: If True, throw exception for non-CB fields, otherwise just prune them 571 | """ 572 | if not serialized_data: 573 | # this should be identity, but just to be safe. 574 | serialized_data = self.dump(validate=False) 575 | 576 | data = json.loads(serialized_data) 577 | 578 | if "feedinfo" not in data: 579 | raise CbInvalidFeedInfo("Feed missing 'feedinfo' data") 580 | 581 | if 'reports' not in data: 582 | raise CbInvalidFeedInfo("Feed missing 'reports' structure") 583 | 584 | dispname = data['feedinfo'].get('display_name', "???") 585 | 586 | # validate the feed info 587 | try: 588 | CbFeedInfo(strict=strict, validate=True, **data["feedinfo"]) 589 | except Exception as err: 590 | raise CbInvalidFeedInfo(f"Problem with feed `{dispname}`: {err}") 591 | 592 | # validate each report individually 593 | for rep in data["reports"]: 594 | try: 595 | CbReport(strict=strict, validate=True, **rep) 596 | except Exception as err: 597 | raise CbInvalidReport(f"Problem with feed `{dispname}`, report `{rep['id']}`: {err}") 598 | 599 | # validate the reports as a whole 600 | self.validate_report_list(data["reports"]) 601 | 602 | def dump(self, validate: bool = True, sort_keys: bool = True) -> str: 603 | """ 604 | Dumps the feed data. 605 | 606 | :param validate: is set, validates feed before dumping 607 | :param sort_keys: If True, pretty it up by storing the keys 608 | :return: json string of feed data 609 | """ 610 | if validate: 611 | self.validate() 612 | return json.dumps(self.data, cls=CbJSONEncoder, indent=2, sort_keys=sort_keys) 613 | 614 | def iter_iocs(self) -> Generator: 615 | """ 616 | Yields all iocs in the feed. 617 | 618 | :return: iterator of all iocs 619 | """ 620 | data = json.loads(self.dump(validate=False)) 621 | for report in data["reports"]: 622 | for md5 in report.get("iocs", {}).get("md5", []): 623 | yield {"type": "md5", "ioc": md5, "report_id": report.get("id", "")} 624 | for sha256 in report.get("iocs", {}).get("sha256", []): 625 | yield {"type": "sha256", "ioc": sha256, "report_id": report.get("id", "")} 626 | for ipv4 in report.get("iocs", {}).get("ipv4", []): 627 | yield {"type": "ipv4", "ioc": ipv4, "report_id": report.get("id", "")} 628 | for ipv6 in report.get("iocs", {}).get("ipv6", []): 629 | yield {"type": "ipv6", "ioc": ipv6, "report_id": report.get("id", "")} 630 | for domain in report.get("iocs", {}).get("dns", []): 631 | yield {"type": "dns", "ioc": domain, "report_id": report.get("id", "")} 632 | for ja3 in report.get("iocs", {}).get("ja3", []): 633 | yield {"type": "ja3", "ioc": ja3, "report_id": report.get("id", "")} 634 | for ja3s in report.get("iocs", {}).get("ja3s", []): 635 | yield {"type": "ja3s", "ioc": ja3s, "report_id": report.get("id", "")} 636 | for query in report.get("iocs", {}).get("query", {}).get("search_query", {}): 637 | yield {"type": "query", "ioc": query, "report_id": report.get("id", "")} 638 | 639 | @staticmethod 640 | def validate_report_list(reports: List[Dict[str, Any]]) -> None: 641 | """ 642 | Validates reports as a set, as compared to each report as a standalone entity. 643 | 644 | :param reports: list of reports 645 | """ 646 | 647 | reportids = set() 648 | 649 | # Verify that no two reports have the same feed id -- see CBAPI-17 650 | for report in reports: 651 | if report['id'] in reportids: 652 | raise CbInvalidFeedInfo(f"Duplicate report id '{report['id']}'") 653 | reportids.add(report['id']) 654 | --------------------------------------------------------------------------------