├── tests
    ├── __init__.py
    └── output_filters
    │   ├── __init__.py
    │   ├── alexa
    │       ├── __init__.py
    │       └── lookup_domains_test.py
    │   ├── util
    │       ├── __init__.py
    │       ├── config_test.py
    │       ├── error_messages_test.py
    │       ├── domains_test.py
    │       └── blacklist_test.py
    │   ├── opendns
    │       ├── __init__.py
    │       ├── lookup_domains_test.py
    │       └── related_domains_test.py
    │   ├── virustotal
    │       ├── __init__.py
    │       ├── lookup_domains_test.py
    │       └── lookup_hashes_test.py
    │   ├── base_filters
    │       ├── __init__.py
    │       ├── threat_feed_test.py
    │       ├── chain_test.py
    │       └── output_filter_test.py
    │   ├── shadowserver
    │       ├── __init__.py
    │       └── lookup_hashes_test.py
    │   ├── data
    │       ├── domains_whitelist.txt
    │       ├── domains_blacklist.txt
    │       ├── hashes_blacklist.txt
    │       ├── opendns
    │       │   └── lookup_domains
    │       │   │   ├── categorization.json
    │       │   │   ├── expected.json
    │       │   │   └── security.json
    │       ├── cache.shadowserver.LookupHashesFilter.json
    │       ├── test_osxcollector_config.yaml
    │       ├── cache.virustotal.LookupDomainsFilter.json
    │       └── cache.virustotal.LookupHashesFilter.json
    │   ├── find_blacklisted_test.py
    │   ├── run_filter_test.py
    │   ├── find_domains_test.py
    │   └── related_files_test.py
├── .deactivate.sh
├── .activate.sh
├── osxcollector
    ├── output_filters
    │   ├── alexa
    │   │   ├── __init__.py
    │   │   └── lookup_rankings.py
    │   ├── chrome
    │   │   ├── __init__.py
    │   │   ├── find_extensions.py
    │   │   └── sort_history.py
    │   ├── firefox
    │   │   ├── __init__.py
    │   │   ├── find_extensions.py
    │   │   └── sort_history.py
    │   ├── opendns
    │   │   ├── __init__.py
    │   │   ├── lookup_domains.py
    │   │   └── related_domains.py
    │   ├── util
    │   │   ├── __init__.py
    │   │   ├── error_messages.py
    │   │   ├── config.py
    │   │   ├── domains.py
    │   │   ├── dict_utils.py
    │   │   └── blacklist.py
    │   ├── base_filters
    │   │   ├── __init__.py
    │   │   ├── chain.py
    │   │   ├── threat_feed.py
    │   │   └── output_filter.py
    │   ├── shadowserver
    │   │   ├── __init__.py
    │   │   └── lookup_hashes.py
    │   ├── virustotal
    │   │   ├── __init__.py
    │   │   ├── lookup_hashes.py
    │   │   ├── lookup_urls.py
    │   │   └── lookup_domains.py
    │   ├── summary_filters
    │   │   ├── __init__.py
    │   │   ├── summary.py
    │   │   └── text.py
    │   ├── __init__.py
    │   ├── exceptions.py
    │   ├── find_blacklisted.py
    │   ├── related_files.py
    │   ├── find_domains.py
    │   └── analyze.py
    └── __init__.py
├── MANIFEST.in
├── requirements.txt
├── requirements-dev.txt
├── .gitignore
├── Makefile
├── LICENSE.md
├── .coveragerc
├── .secrets.baseline
├── tox.ini
├── setup.py
├── .travis.yml
├── .pre-commit-config.yaml
└── osxcollector.yaml.example


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.deactivate.sh:
--------------------------------------------------------------------------------
1 | deactivate
2 | 


--------------------------------------------------------------------------------
/tests/output_filters/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/output_filters/alexa/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/output_filters/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.activate.sh:
--------------------------------------------------------------------------------
1 | virtualenv_run/bin/activate


--------------------------------------------------------------------------------
/tests/output_filters/opendns/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/output_filters/virustotal/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/alexa/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/chrome/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/firefox/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/opendns/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/output_filters/base_filters/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/output_filters/shadowserver/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/base_filters/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/shadowserver/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/virustotal/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE.md
3 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/summary_filters/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/output_filters/data/domains_whitelist.txt:
--------------------------------------------------------------------------------
1 | yelp.com
2 | yelp.co.uk
3 | 


--------------------------------------------------------------------------------
/tests/output_filters/data/domains_blacklist.txt:
--------------------------------------------------------------------------------
1 | example.com
2 | example.co.uk
3 | example.org
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | PyYAML==5.1
2 | simplejson==3.10.0
3 | six==1.12.0
4 | threat_intel==0.1.29
5 | tldextract==2.0.2
6 | 


--------------------------------------------------------------------------------
/osxcollector/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | coverage==4.5.3
3 | flake8==3.7.7
4 | mock==2.0.0
5 | pre-commit>=1.0.0
6 | pytest==4.4.0
7 | tox==3.8.6
8 | 


--------------------------------------------------------------------------------
/tests/output_filters/data/hashes_blacklist.txt:
--------------------------------------------------------------------------------
1 | ffff5f60462c38b1d235cb3509876543
2 | ffff234d2a50a42a87389f1234561a21
3 | ffff51e77b442ee23188d87e4abcdef0
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .tox/
 3 | *.egg-info/
 4 | .DS_Store
 5 | .idea/
 6 | config.yaml
 7 | osxcollector.yaml
 8 | virtualenv_run/
 9 | .coverage
10 | dist/
11 | sdist/
12 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 | 
5 | import logging
6 | 
7 | # Suppress output from tldextract module
8 | logging.getLogger('tldextract').addHandler(logging.NullHandler())
9 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .DELETE_ON_ERROR:
 2 | 
 3 | all:
 4 | 	echo >&2 "Must specify target."
 5 | 
 6 | test:
 7 | 	tox
 8 | 
 9 | venv:
10 | 	tox -evenv
11 | 
12 | install-hooks:
13 | 	tox -e pre-commit -- install -f --install-hooks
14 | 
15 | clean:
16 | 	rm -rf build/ dist/ osxcollector_output_filters.egg-info/ .tox/ virtualenv_run/
17 | 	find . -name '*.pyc' -delete
18 | 	find . -name '__pycache__' -delete
19 | 
20 | .PHONY: all test venv install-hooks clean
21 | 


--------------------------------------------------------------------------------
/tests/output_filters/data/opendns/lookup_domains/categorization.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "bango.com": {
 3 |         "status": 1,
 4 |         "content_categories": [
 5 |             "Search Engines",
 6 |             "Business Services",
 7 |             "Research/Reference"
 8 |         ],
 9 |         "security_categories": []
10 |     },
11 |     "dango.com": {
12 |         "status": 0,
13 |         "content_categories": [
14 |             "Phishing"
15 |         ],
16 |         "security_categories": []
17 |     },
18 |     "dingo.com": null,
19 |     "bingo.com": null
20 | }
21 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/exceptions.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # All exceptions thrown by the osxcollector.output_filters module
 4 | #
 5 | from __future__ import absolute_import
 6 | from __future__ import unicode_literals
 7 | 
 8 | 
 9 | class OutputFilterError(Exception):
10 |     pass
11 | 
12 | 
13 | class MissingConfigError(OutputFilterError):
14 | 
15 |     """An error to throw when configuration is missing"""
16 |     pass
17 | 
18 | 
19 | class BadDomainError(OutputFilterError):
20 | 
21 |     """An error to throw when a domain is invalid."""
22 |     pass
23 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
2 | 
3 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
4 | 
5 | You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
6 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | source =
 4 |     .
 5 | omit =
 6 |     .tox/*
 7 |     virtualenv_run/*
 8 |     setup.py
 9 | 
10 | [report]
11 | show_missing = True
12 | skip_covered = True
13 | 
14 | exclude_lines =
15 |     # Have to re-enable the standard pragma
16 |     \#\s*pragma: no cover
17 | 
18 |     # Don't complain if tests don't hit defensive assertion code:
19 |     ^\s*raise AssertionError\b
20 |     ^\s*raise NotImplementedError\b
21 |     ^\s*return NotImplemented\b
22 |     ^\s*raise$
23 | 
24 |     # Don't complain if non-runnable code isn't run:
25 |     ^if __name__ == ['"]__main__['"]:$
26 | 
27 | [html]
28 | directory = coverage-html
29 | 
30 | # vim:ft=dosini
31 | 


--------------------------------------------------------------------------------
/tests/output_filters/data/opendns/lookup_domains/expected.json:
--------------------------------------------------------------------------------
 1 | [[{
 2 |     "domain": "dango.com",
 3 |     "categorization": {
 4 |         "status": 0,
 5 |         "content_categories": [
 6 |             "Phishing"
 7 |         ],
 8 |         "security_categories": [],
 9 |         "suspicious": true
10 |     },
11 |     "link": "https://investigate.opendns.com/domain-view/name/dango.com/view",
12 |     "security": {
13 |         "dga_score": 0,
14 |         "asn_score": -0.1608560065526172,
15 |         "rip_score": 0,
16 |         "securerank2": 0.04721624022600212,
17 |         "prefix_score": 0,
18 |         "attack": "",
19 |         "found": true,
20 |         "threat_type": ""
21 |     }
22 | }]]
23 | 


--------------------------------------------------------------------------------
/.secrets.baseline:
--------------------------------------------------------------------------------
 1 | {
 2 |   "exclude": {
 3 |     "files": ".*tests/.*|\\\\.pre-commit-config\\\\.yaml",
 4 |     "lines": null
 5 |   },
 6 |   "generated_at": "2019-04-05T11:02:14Z",
 7 |   "plugins_used": [
 8 |     {
 9 |       "base64_limit": 4.5,
10 |       "name": "Base64HighEntropyString"
11 |     },
12 |     {
13 |       "hex_limit": 3,
14 |       "name": "HexHighEntropyString"
15 |     },
16 |     {
17 |       "name": "PrivateKeyDetector"
18 |     }
19 |   ],
20 |   "results": {
21 |     ".travis.yml": [
22 |       {
23 |         "hashed_secret": "468b2bbbf250e477bd35e81cafa3bf8c7ce36285",
24 |         "line_number": 19,
25 |         "type": "Base64 High Entropy String"
26 |       }
27 |     ]
28 |   },
29 |   "version": "0.12.2"
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/output_filters/base_filters/threat_feed_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
 6 | from tests.output_filters.run_filter_test import RunFilterTest
 7 | 
 8 | 
 9 | class TestThreatFeedFilter(RunFilterTest):
10 | 
11 |     def test_run_threat_feed_filter(self):
12 |         input_blobs = [
13 |             {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
14 |             {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
15 |         ]
16 |         self.run_test(lambda: ThreatFeedFilter('dinky', 'feed_test'), input_blobs, expected_output_blobs=input_blobs)
17 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | project = osxcollector_output_filters
 3 | envlist = py27,py36
 4 | tox_pip_extensions_ext_pip_custom_platform = true
 5 | tox_pip_extensions_ext_venv_update = true
 6 | 
 7 | [testenv]
 8 | deps = -rrequirements-dev.txt
 9 | commands =
10 |     flake8 .
11 |     {envpython} --version
12 |     coverage --version
13 |     coverage run -m pytest --strict {posargs:tests}
14 |     coverage report -m --show-missing
15 | 
16 | [testenv:pre-commit]
17 | deps = pre-commit>=1.0.0
18 | commands = pre-commit run --all-files
19 | 
20 | [testenv:venv]
21 | envdir = virtualenv_run
22 | basepython = python3.6
23 | commands =
24 | 
25 | [flake8]
26 | exclude = .git,__pycache__,.tox,virtualenv_run
27 | # E501 - long lines
28 | ignore = E501
29 | max_line_length = 140
30 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/util/error_messages.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # A set of simple methods for writing messages to stderr
 4 | #
 5 | from __future__ import absolute_import
 6 | from __future__ import unicode_literals
 7 | 
 8 | import sys
 9 | from traceback import extract_tb
10 | from traceback import format_list
11 | 
12 | 
13 | def write_exception(e):
14 |     exc_type, _, exc_traceback = sys.exc_info()
15 |     msg = ', '.join(str(a) for a in e.args)
16 |     sys.stderr.write('[ERROR] {0} {1}\n'.format(exc_type.__name__, msg))
17 |     for line in format_list(extract_tb(exc_traceback)):
18 |         sys.stderr.write(line)
19 | 
20 | 
21 | def write_error_message(message):
22 |     sys.stderr.write('[ERROR] ')
23 |     sys.stderr.write(message)
24 |     sys.stderr.write('\n')
25 | 


--------------------------------------------------------------------------------
/tests/output_filters/base_filters/chain_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from osxcollector.output_filters.base_filters.chain import ChainFilter
 6 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
 7 | from tests.output_filters.run_filter_test import RunFilterTest
 8 | 
 9 | 
10 | class TestChainFilter(RunFilterTest):
11 | 
12 |     def test_run_chain_filter(self):
13 |         input_blobs = [
14 |             {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
15 |             {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
16 |         ]
17 |         self.run_test(lambda: ChainFilter([OutputFilter(), OutputFilter()]), input_blobs=input_blobs, expected_output_blobs=input_blobs)
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from setuptools import find_packages
 3 | from setuptools import setup
 4 | 
 5 | 
 6 | with open('README.md', 'r') as fh:
 7 |     long_description = fh.read()
 8 | 
 9 | setup(
10 |     name='osxcollector_output_filters',
11 |     version='1.1.1',
12 |     author='Yelp Security',
13 |     author_email='opensource@yelp.com',
14 |     description='Filters that process and transform the output of OSXCollector',
15 |     long_description=long_description,
16 |     long_description_content_type='text/markdown',
17 |     license='GNU General Public License',
18 |     url='https://github.com/Yelp/osxcollector_output_filters',
19 |     setup_requires='setuptools',
20 |     packages=find_packages(exclude=['tests']),
21 |     provides=['osxcollector'],
22 |     install_requires=[
23 |         'PyYAML>=5.0',
24 |         'threat_intel',
25 |         'tldextract',
26 |         'simplejson',
27 |         'six',
28 |     ],
29 | )
30 | 


--------------------------------------------------------------------------------
/tests/output_filters/data/cache.shadowserver.LookupHashesFilter.json:
--------------------------------------------------------------------------------
1 | {"shadowserver-bin-test": {"5d87de61cb368c93325dd910c202b8647f8e90ed": {"os_version": "10.10", "filesize": "48976", "reference": "os_all", "sha1": "5D87DE61CB368C93325DD910C202B8647F8E90ED", "dirname": "/System/Library/Extensions/System.kext/PlugIns/Libkern.kext", "binary": "1", "sha256": "1FAFE48F626FDC030B0A0EFC1008D51CD3078D1B3EC95F808D12AFBFEF458B23", "filetimestamp": "09/19/2014 00:42:35", "source": "MacAppInfo", "sha512": "C1CAEB26F892FE3C00B3B6BAB462058C772F91824092BF9B2E183F66D885278B6F0C6DA65D06994A45166501F1A889E38D5D234AE18ECBD2EF3CFD9F4388DC8F", "language": "English", "md5": "6746005C822CEB6737B871698D3ED22F", "bit": "64", "filename": "Libkern", "os_name": "Mac OS X 10.10 (build 14A389)", "application_type": "Mach-O 64-bit kext bundle x86_64", "crc32": "5332564F", "os_mfg": "Apple Inc.", "source_version": "1.1"}, "816a85d89ae34d2dc73b8c768eecb03935c568ba": {"sha1": "816a85d89ae34d2dc73b8c768eecb03935c568ba"}}}


--------------------------------------------------------------------------------
/tests/output_filters/util/config_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import pytest
 6 | from mock import patch
 7 | 
 8 | from osxcollector.output_filters.util.config import config_get_deep
 9 | 
10 | 
11 | class TestCreateBlacklist:
12 | 
13 |     @pytest.fixture(scope='module', autouse=True)
14 |     def patched_config(self):
15 |         config_initial_contents = {
16 |             'a': 'b',
17 |             'c': {'d': 'e'},
18 |             'f': 1,
19 |             'g': ['apple', 'banana'],
20 |         }
21 |         with patch('osxcollector.output_filters.util.config._read_config', return_value=config_initial_contents):
22 |             yield
23 | 
24 |     def test_read_top_level_key(self):
25 |         assert config_get_deep('a') == 'b'
26 | 
27 |     def test_read_multi_level_key(self):
28 |         assert config_get_deep('c.d') == 'e'
29 | 
30 |     def test_numeric_val(self):
31 |         assert config_get_deep('f') == 1
32 | 
33 |     def test_list_val(self):
34 |         assert config_get_deep('g') == ['apple', 'banana']
35 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | matrix:
 3 |   include:
 4 |     - env: TOXENV=py27
 5 |       python: '2.7'
 6 |     - env: TOXENV=py36
 7 |       python: '3.6'
 8 |     - env: TOXENV=pre-commit
 9 | 
10 | install: sudo pip install tox-travis
11 | 
12 | script: tox
13 | deploy:
14 |   provider: pypi
15 |   user: yelplabs
16 |   skip_existing: true
17 |   distributions: "sdist bdist_wheel"
18 |   password:
19 |     secure: WMygawcYeJGbe4cJlQECKKEZJLYsirgutlihp8Yn4iAKRjpDFmOTwB0B8NjaYsB9pBvz7MLz913ukIhzsHhZLMYE6GRpwjiGfaSXupC4zDVkdi14KPJIo7dff/1p0rGGtZmYa/iohC/HDgbF4iXcBcwdzrvDBDqwPFaM/5J4LxF+KunXCVopsmQTkBEsMNz/K55By3xCO3qxupixTDYy+VOVv3F6Bs8hChqKmql9vvi2ZZPVq9y7io13T7JREKKv8ZOyIq+AGXtrZvnzVuNPfW6PE3eBUv2BUy0xEuwsqjX4goQ0bzRiVKS4XdIl8HmZD/aj2mkaXSw6HLST4+/+im2uNVIPHTEfyqDgUwtZInQF9zML3wGANGIfS+z1ZKfirSO0DiTKpFMkbiM5K0D+VRKFChblCOqQ5WiU9jhcLrHDHED7aLT7pIAlEcizeTpTwF9ZR5Eg48wB59A3q5b6aTsw1t9Q2kIyEnXHgf3JUGtRu0BO2ATGL324Dkzoa8DN2CeZ0F3fWCaI7gqFssrytDPwG+ct17yJkLQXnRTemFm/fCw9YC+onSufa+pV2qlRZRFywifgwIaAwL7hXC4w2qF4d5RzEOtCZjlmOBCLZ6/r/dwhnS+bBdHSuxxcfV1WB5RZLemqNJfbRdQheM0Ld6tzqZZRi5SB7gohnLEJWlc=
20 |   on:
21 |     tags: true
22 |     repo: Yelp/osxcollector_output_filters
23 |     branch: master
24 | 


--------------------------------------------------------------------------------
/tests/output_filters/base_filters/output_filter_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
 6 | from tests.output_filters.run_filter_test import RunFilterTest
 7 | 
 8 | 
 9 | class TestOutputFilter(RunFilterTest):
10 | 
11 |     def test_filter_line(self):
12 |         input_blobs = [
13 |             {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
14 |             {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
15 |         ]
16 |         output_filter = OutputFilter()
17 |         for blob in input_blobs:
18 |             output = output_filter.filter_line(blob)
19 |             assert output == blob
20 | 
21 |     def test_end_of_lines(self):
22 |         output_filter = OutputFilter()
23 |         output = output_filter.end_of_lines()
24 |         assert output == []
25 | 
26 |     def test_run_output_filter(self):
27 |         input_blobs = [
28 |             {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
29 |             {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
30 |         ]
31 |         self.run_test(OutputFilter, input_blobs, expected_output_blobs=input_blobs)
32 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: '^tests/output_filters/data/.*$'
 2 | repos:
 3 | -   repo: git://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v2.1.0
 5 |     hooks:
 6 |     -   id: check-json
 7 |     -   id: check-yaml
 8 |     -   id: end-of-file-fixer
 9 |     -   id: trailing-whitespace
10 |     -   id: name-tests-test
11 |     -   id: requirements-txt-fixer
12 |     -   id: double-quote-string-fixer
13 |     -   id: flake8
14 |     -   id: fix-encoding-pragma
15 | -   repo: git://github.com/pre-commit/mirrors-autopep8
16 |     rev: v1.4.3
17 |     hooks:
18 |     -   id: autopep8
19 | -   repo: git://github.com/asottile/reorder_python_imports
20 |     rev: v1.4.0
21 |     hooks:
22 |     -   id: reorder-python-imports
23 |         args: [
24 |             '--add-import', 'from __future__ import absolute_import',
25 |             '--add-import', 'from __future__ import unicode_literals',
26 |         ]
27 |         exclude: setup.py
28 | -   repo: git://github.com/asottile/add-trailing-comma
29 |     rev: v1.0.0
30 |     hooks:
31 |     -   id: add-trailing-comma
32 | -   repo: https://github.com/Yelp/detect-secrets
33 |     rev: v0.12.2
34 |     hooks:
35 |     -   id: detect-secrets
36 |         args: ['--baseline', '.secrets.baseline']
37 |         exclude: .*tests/.*|\.pre-commit-config\.yaml
38 |         language_version: python2.7
39 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/util/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Config is a very simplistic class for reading YAML config.
 4 | #
 5 | from __future__ import absolute_import
 6 | from __future__ import unicode_literals
 7 | 
 8 | import os
 9 | 
10 | import yaml
11 | try:
12 |     from yaml import CSafeLoader as SafeLoader
13 | except ImportError:
14 |     from yaml import SafeLoader
15 | 
16 | from osxcollector.output_filters.exceptions import MissingConfigError
17 | from osxcollector.output_filters.util.dict_utils import DictUtils
18 | 
19 | 
20 | def config_get_deep(key, default=None):
21 |     """Reads from the config.
22 | 
23 |     Args:
24 |         key: Dictionary key to lookup in config
25 |         default: Value to return if key is not found
26 |     Returns:
27 |         Value from config or default if not found otherwise
28 |     """
29 |     return DictUtils.get_deep(_read_config(), key, default)
30 | 
31 | 
32 | def _read_config():
33 |     """Reads and parses the YAML file.
34 | 
35 |     Returns:
36 |         dict of config
37 |     """
38 |     with open(_config_file_path()) as source:
39 |         return yaml.load(source.read(), Loader=SafeLoader)
40 | 
41 | 
42 | def _config_file_path():
43 |     """Find the path to the config file.
44 | 
45 |     Returns:
46 |         String file path
47 |     Raises:
48 |         MissingConfigError if no config file is found
49 |     """
50 |     for loc in os.curdir, os.path.expanduser('~'), os.environ.get('OSXCOLLECTOR_CONF', ''):
51 |         path = os.path.join(loc, 'osxcollector.yaml')
52 |         if os.path.exists(path):
53 |             return path
54 |     raise MissingConfigError()
55 | 


--------------------------------------------------------------------------------
/tests/output_filters/util/error_messages_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from osxcollector.output_filters.exceptions import BadDomainError
 6 | from osxcollector.output_filters.util.error_messages import write_error_message
 7 | from osxcollector.output_filters.util.error_messages import write_exception
 8 | 
 9 | 
10 | class TestWriteException:
11 | 
12 |     def test_simple_exception(self, capsys):
13 |         try:
14 |             raise Exception()
15 |         except Exception as e:
16 |             write_exception(e)
17 | 
18 |         output = capsys.readouterr().err
19 |         assert 0 == output.find('[ERROR]')
20 | 
21 |     def test_specific_exception(self, capsys):
22 |         try:
23 |             raise BadDomainError()
24 |         except Exception as e:
25 |             write_exception(e)
26 | 
27 |         output = capsys.readouterr().err
28 |         assert output.find('[ERROR] BadDomainError') == 0
29 | 
30 |     def test_exception_message(self, capsys):
31 |         try:
32 |             raise BadDomainError('Look for me in validation')
33 |         except Exception as e:
34 |             write_exception(e)
35 | 
36 |         output = capsys.readouterr().err
37 |         assert output.find('[ERROR] BadDomainError Look for me in validation') == 0
38 | 
39 | 
40 | class TestWriteErrorMessage:
41 | 
42 |     def test_write_error_message(self, capsys):
43 |         message = 'Look for me in validation'
44 |         expected = '[ERROR] Look for me in validation\n'
45 | 
46 |         write_error_message(message)
47 | 
48 |         output = capsys.readouterr().err
49 |         assert output == expected
50 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/summary_filters/summary.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import sys
 6 | from collections import defaultdict
 7 | 
 8 | import six
 9 | 
10 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
11 | 
12 | 
13 | class SummaryFilter(OutputFilter):
14 |     """Base class for summary filters."""
15 | 
16 |     def __init__(self, show_signature_chain=False, show_browser_ext=False, summary_output_file=None, group_by_iocs=False, group_key=None, **kwargs):
17 |         super(SummaryFilter, self).__init__(**kwargs)
18 |         self._iocs = []
19 |         self._iocs_by_key = defaultdict(list)
20 |         self._vthash = []
21 |         self._vtdomain = []
22 |         self._opendns = []
23 |         self._alexarank = []
24 |         self._blacklist = []
25 |         self._related = []
26 |         self._signature_chain = []
27 |         self._extensions = []
28 |         self._show_signature_chain = show_signature_chain
29 |         self._show_browser_ext = show_browser_ext
30 |         self._group_by_iocs = group_by_iocs
31 |         self._group_key = group_key
32 | 
33 |         self._add_to_blacklist = []
34 | 
35 |         self._close_file = False
36 | 
37 |         self._open_output_stream(summary_output_file)
38 | 
39 |     def _open_output_stream(self, summary_output_file):
40 |         if summary_output_file:
41 |             if isinstance(summary_output_file, six.string_types):
42 |                 self._output_stream = open(summary_output_file, 'w')
43 |                 self._close_file = True
44 |             else:
45 |                 # not a string, most likely already opened output stream
46 |                 self._output_stream = summary_output_file
47 |         else:
48 |             self._output_stream = sys.stdout
49 | 
50 |     def __del__(self):
51 |         self._close_output_stream()
52 | 
53 |     def _close_output_stream(self):
54 |         if self._close_file:
55 |             self._output_stream.close()
56 | 


--------------------------------------------------------------------------------
/tests/output_filters/opendns/lookup_domains_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import simplejson
 6 | from mock import patch
 7 | from threat_intel.opendns import InvestigateApi
 8 | 
 9 | from osxcollector.output_filters.opendns.lookup_domains import LookupDomainsFilter
10 | from tests.output_filters.run_filter_test import RunFilterTest
11 | 
12 | 
13 | class TestLookupDomainsFilter(RunFilterTest):
14 | 
15 |     def test_no_domains(self):
16 |         input_blobs = [
17 |             {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
18 |             {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
19 |         ]
20 | 
21 |         self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
22 | 
23 |     def _read_json(self, file_name):
24 |         with(open(file_name, 'r')) as fp:
25 |             contents = fp.read()
26 |             return simplejson.loads(contents)
27 | 
28 |     def test_no_security_information(self):
29 |         input_blobs = [
30 |             {'osxcollector_domains': ['bingo.com', 'dingo.com', 'bango.com', 'dango.com'], 'banana': {'a': 11}},
31 |         ]
32 |         file_name_pattern = 'tests/output_filters/data/opendns/lookup_domains/{0}'
33 |         categorization = self._read_json(file_name_pattern.format('categorization.json'))
34 |         security = self._read_json(file_name_pattern.format('security.json'))
35 | 
36 |         with patch.object(
37 |             InvestigateApi, 'categorization', autospec=True,
38 |             return_value=categorization,
39 |         ), patch.object(
40 |             InvestigateApi, 'security', autospec=True, return_value=security,
41 |         ):
42 |             output_blobs = self.run_test(LookupDomainsFilter, input_blobs=input_blobs)
43 | 
44 |         expected_categorization = self._read_json(file_name_pattern.format('expected.json'))
45 |         self.assert_key_added_to_blob('osxcollector_opendns', expected_categorization, input_blobs, output_blobs)
46 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/util/domains.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Utilities for dealing with domain names
 4 | #
 5 | from __future__ import absolute_import
 6 | from __future__ import unicode_literals
 7 | 
 8 | import re
 9 | 
10 | import six
11 | import tldextract
12 | 
13 | from osxcollector.output_filters.exceptions import BadDomainError
14 | 
15 | 
16 | def expand_domain(domain):
17 |     """A generator that returns the input with and without the subdomain.
18 | 
19 |     Args:
20 |         domain - string
21 |     Returns:
22 |         generator that returns strings
23 |     """
24 |     extraction = tldextract.extract(domain)
25 | 
26 |     if extraction.subdomain:
27 |         subdomain = '.'.join(extraction)
28 |         yield subdomain
29 | 
30 |     fulldomain = '.'.join(extraction[1:])
31 |     yield fulldomain
32 | 
33 | 
34 | def clean_domain(unclean_domain):
35 |     """Removing errant characters and stuff from a domain name.
36 | 
37 |     A bit of careful dancing with character encodings. Eventually, some consumer of the domain string is gonna
38 |     deal with it as ASCII. Make sure to encode as ASCII explicitly, so ASCII encoding errors can be ignored.
39 | 
40 |     Args:
41 |         unclean_domain: string
42 |     Returns:
43 |         string domain name
44 |     Raises:
45 |         BadDomainError - when a clean domain can't be made
46 |     """
47 |     if not isinstance(unclean_domain, six.text_type):
48 |         unclean_domain = unclean_domain.decode('utf-8', errors='ignore')
49 | 
50 |     unclean_domain = re.sub(r'^[^a-zA-Z0-9]*(.*?)[^a-zA-Z0-9]*$', r'\1', unclean_domain)
51 | 
52 |     extracted = tldextract.extract(unclean_domain)
53 |     if bool(extracted.domain and extracted.suffix):
54 |         start_index = 1 if not extracted.subdomain else 0
55 |         domain = '.'.join(extracted[start_index:]).lstrip('.')
56 |         return domain.encode('ascii', errors='ignore') if six.PY2 else \
57 |             domain.encode('utf8').decode('ascii', errors='ignore')
58 | 
59 |     raise BadDomainError(u'Can not clean {0} {1}'.format(unclean_domain, repr(extracted)))
60 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/shadowserver/lookup_hashes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # LookupHashesFilter uses ShadowServer to lookup the values in 'sha1' and add 'osxcollector_shadowserver' key.
 5 | #
 6 | from __future__ import absolute_import
 7 | from __future__ import unicode_literals
 8 | 
 9 | import os.path
10 | 
11 | from threat_intel.shadowserver import ShadowServerApi
12 | 
13 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
14 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
15 | from osxcollector.output_filters.util.config import config_get_deep
16 | 
17 | 
18 | class LookupHashesFilter(ThreatFeedFilter):
19 | 
20 |     """A class to lookup hashes using ShadowServer API."""
21 | 
22 |     def __init__(self, lookup_when=None, **kwargs):
23 |         super(LookupHashesFilter, self).__init__('sha1', 'osxcollector_shadowserver', lookup_when=lookup_when, **kwargs)
24 | 
25 |     def _lookup_iocs(self, all_iocs):
26 |         """Looks up the ShadowServer info for a set of hashes.
27 | 
28 |         Args:
29 |             all_iocs - a list of hashes.
30 |         Returns:
31 |             A dict with hash as key and threat info as value
32 |         """
33 |         cache_file_name = config_get_deep('shadowserver.LookupHashesFilter.cache_file_name', None)
34 |         ss = ShadowServerApi(cache_file_name=cache_file_name)
35 |         return ss.get_bin_test(all_iocs)
36 | 
37 |     def _should_add_threat_info_to_blob(self, blob, threat_info):
38 |         """Only add info from ShadowServer if the hash and the filename match.
39 | 
40 |         Args:
41 |             blob - A dict of data representing a line of output from OSXCollector
42 |             threat_info - The threat info from ShadowServer
43 |         Returns:
44 |             boolean
45 |         """
46 |         blob_filename = os.path.split(blob.get('file_path', ''))[-1]
47 |         return blob_filename == threat_info.get('filename', '')
48 | 
49 | 
50 | def main():
51 |     run_filter_main(LookupHashesFilter)
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     main()
56 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/util/dict_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | 
 6 | class DictUtils(object):
 7 | 
 8 |     """A set of method for manipulating dictionaries."""
 9 | 
10 |     @classmethod
11 |     def _link_path_to_chain(cls, path):
12 |         """Helper method for get_deep
13 | 
14 |         Args:
15 |             path: A str representing a chain of keys separated '.' or an enumerable set of strings
16 |         Returns:
17 |             an enumerable set of strings
18 |         """
19 |         if path == '':
20 |             return []
21 |         elif type(path) in (list, tuple, set):
22 |             return path
23 |         else:
24 |             return path.split('.')
25 | 
26 |     @classmethod
27 |     def _get_deep_by_chain(cls, x, chain, default=None):
28 |         """Grab data from a dict using a ['key1', 'key2', 'key3'] chain param to do deep traversal.
29 | 
30 |         Args:
31 |             x: A dict
32 |             chain: an enumerable set of strings
33 |             default: A value to return if the path can not be found
34 |         Returns:
35 |             The value of the key or default
36 |         """
37 |         if chain == []:
38 |             return default
39 |         try:
40 |             for link in chain:
41 |                 try:
42 |                     x = x[link]
43 |                 except (KeyError, TypeError):
44 |                     x = x[int(link)]
45 |         except (KeyError, TypeError, ValueError):
46 |             x = default
47 |         return x
48 | 
49 |     @classmethod
50 |     def get_deep(cls, x, path='', default=None):
51 |         """Grab data from a dict using a 'key1.key2.key3' path param to do deep traversal.
52 | 
53 |         Args:
54 |             x: A dict
55 |             path: A 'deep path' to retrieve in the dict
56 |             default: A value to return if the path can not be found
57 |         Returns:
58 |             The value of the key or default
59 |         """
60 |         chain = cls._link_path_to_chain(path)
61 |         return cls._get_deep_by_chain(x, chain, default=default)
62 | 


--------------------------------------------------------------------------------
/tests/output_filters/data/test_osxcollector_config.yaml:
--------------------------------------------------------------------------------
 1 | # The OpenDNSFilter requires an API key for OpenDNS Investigate
 2 | api_key:
 3 |     opendns: "00FAABADF00D"
 4 |     virustotal: "00FAABADF00D"
 5 | 
 6 | # The BlacklistFilter allows for multiple blacklists to be compared against at once
 7 | # Each blacklists requires:
 8 | #  - blacklist_name, A name
 9 | #  - blacklist_keys, JSON paths. These can be of the form "a.b" to look at "b" in {"a": {"b": "foo"}}
10 | #  - value_file, the path to a file containing values considered blacklisted. Any line starting with # is skipped
11 | #  - blacklist_is_regex, should values in the file be treated as Python regex
12 | blacklists:
13 |  -  blacklist_name: "hashes"
14 |     blacklist_keys:
15 |      - "md5"
16 |      - "sha1"
17 |      - "sha2"
18 |     blacklist_file_path: "./tests/output_filters/data/hashes_blacklist.txt"
19 |     blacklist_is_regex: False
20 |  -  blacklist_name: "domains"
21 |     blacklist_keys:
22 |      - "osxcollector_domains"
23 |     blacklist_file_path: "./tests/output_filters/data/domains_blacklist.txt"
24 |     blacklist_is_domains: True
25 |     blacklist_is_regex: True
26 | 
27 | domain_whitelist:
28 |     blacklist_name: "domain_whitelist"
29 |     blacklist_keys:
30 |      - "osxcollector_domains"
31 |     blacklist_file_path: "./tests/output_filters/data/domains_whitelist.txt"
32 |     blacklist_is_domains: True
33 |     blacklist_is_regex: True
34 | 
35 | shadowserver:
36 |     LookupHashesFilter:
37 |         cache_file_name: "./tests/output_filters/data/cache.shadowserver.LookupHashesFilter.json"
38 | 
39 | virustotal:
40 |      LookupHashesFilter:
41 |          cache_file_name: "./tests/output_filters/data/cache.virustotal.LookupHashesFilter.json"
42 |      LookupDomainsFilter:
43 |          cache_file_name: "./tests/output_filters/data/cache.virustotal.LookupDomainsFilter.json"
44 | 
45 | # No cache data is available or opendns yet
46 | opendns:
47 | #     LookupDomainsFilter:
48 | #         cache_file_name: "./tests/output_filters/data/cache.opendns.LookupDomainsFilter.json"
49 |     RelatedDomainsFilter:
50 |         cache_file_name: "./tests/output_filters/data/cache.opendns.RelatedDomainsFilter.json"
51 | 


--------------------------------------------------------------------------------
/tests/output_filters/data/opendns/lookup_domains/security.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "bango.com": null,
 3 |     "dango.com": {
 4 |         "dga_score": 0,
 5 |         "geodiversity_normalized": [
 6 |             [
 7 |                 "IS",
 8 |                 0.9996267573230843
 9 |             ],
10 |             [
11 |                 "US",
12 |                 0.0003732426769157135
13 |             ]
14 |         ],
15 |         "asn_score": -0.1608560065526172,
16 |         "rip_score": 0,
17 |         "securerank2": 0.04721624022600212,
18 |         "popularity": 0,
19 |         "geoscore": 0,
20 |         "ks_test": 0,
21 |         "prefix_score": 0,
22 |         "attack": "",
23 |         "pagerank": 0,
24 |         "geodiversity": [
25 |             [
26 |                 "IS",
27 |                 0.5
28 |             ],
29 |             [
30 |                 "US",
31 |                 0.5
32 |             ]
33 |         ],
34 |         "found": true,
35 |         "perplexity": 0.3866686930931377,
36 |         "entropy": 3.5351745656359026,
37 |         "fastflux": false,
38 |         "threat_type": "",
39 |         "tld_geodiversity": []
40 |     },
41 |     "dingo.com": {
42 |         "dga_score": 0,
43 |         "geodiversity_normalized": [
44 |             [
45 |                 "IS",
46 |                 0.9996267573230843
47 |             ],
48 |             [
49 |                 "US",
50 |                 0.0003732426769157135
51 |             ]
52 |         ],
53 |         "asn_score": -0.1608560065526172,
54 |         "rip_score": 0,
55 |         "securerank2": 0.04721624022600212,
56 |         "popularity": 0,
57 |         "geoscore": 0,
58 |         "ks_test": 0,
59 |         "prefix_score": 0,
60 |         "attack": "",
61 |         "pagerank": 0,
62 |         "geodiversity": [
63 |             [
64 |                 "IS",
65 |                 0.5
66 |             ],
67 |             [
68 |                 "US",
69 |                 0.5
70 |             ]
71 |         ],
72 |         "found": true,
73 |         "perplexity": 0.3866686930931377,
74 |         "entropy": 3.5351745656359026,
75 |         "fastflux": false,
76 |         "threat_type": "",
77 |         "tld_geodiversity": []
78 |     },
79 |     "bingo.com": null
80 | }
81 | 


--------------------------------------------------------------------------------
/tests/output_filters/find_blacklisted_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from osxcollector.output_filters.find_blacklisted import FindBlacklistedFilter
 6 | from tests.output_filters.run_filter_test import RunFilterTest
 7 | 
 8 | 
 9 | class TestFindBlacklistedFilter(RunFilterTest):
10 | 
11 |     def test_simple_hashes(self):
12 |         input_blobs = [
13 |             {'md5': 'ffff5f60462c38b1d235cb3509876543'},
14 |             {'sha1': 'ffff234d2a50a42a87389f1234561a21'},
15 |             {'sha2': 'ffff51e77b442ee23188d87e4abcdef0'},
16 |         ]
17 |         expected_blacklists = [
18 |             {'hashes': ['ffff5f60462c38b1d235cb3509876543']},
19 |             {'hashes': ['ffff234d2a50a42a87389f1234561a21']},
20 |             {'hashes': ['ffff51e77b442ee23188d87e4abcdef0']},
21 |         ]
22 |         self._run_test(input_blobs, expected_blacklists)
23 | 
24 |     def test_no_hashes(self):
25 |         input_blobs = [
26 |             # Not the right key
27 |             {'apple': 'ffff5f60462c38b1d235cb3509876543'},
28 |             # Value not on blacklist
29 |             {'sha1': 'aaaa234d2a50a42a87389f1234561a21'},
30 |         ]
31 |         expected_blacklists = [
32 |             None,
33 |             None,
34 |         ]
35 |         self._run_test(input_blobs, expected_blacklists)
36 | 
37 |     def test_simple_domains(self):
38 |         input_blobs = [
39 |             {'osxcollector_domains': ['biz.example.com']},
40 |             {'osxcollector_domains': ['www.example.co.uk']},
41 |             {'osxcollector_domains': ['example.org']},
42 |         ]
43 |         expected_blacklists = [
44 |             {'domains': ['example.com']},
45 |             {'domains': ['example.co.uk']},
46 |             {'domains': ['example.org']},
47 |         ]
48 |         self._run_test(input_blobs, expected_blacklists)
49 | 
50 |     def _run_test(self, input_blobs, expected_blacklists):
51 | 
52 |         output_blobs = self.run_test(FindBlacklistedFilter, input_blobs)
53 | 
54 |         # added_key, expected_values, input_blobs, output_blobs
55 |         self.assert_key_added_to_blob('osxcollector_blacklist', expected_blacklists, input_blobs, output_blobs)
56 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/firefox/find_extensions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # FindExtensionsFilter reads the Firefox JSON blobs and creates records about the extensions and plugins.
 5 | #
 6 | from __future__ import absolute_import
 7 | from __future__ import unicode_literals
 8 | 
 9 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
10 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
11 | from osxcollector.output_filters.util.dict_utils import DictUtils
12 | 
13 | 
14 | class FindExtensionsFilter(OutputFilter):
15 | 
16 |     """Reads the Firefox JSON blobs and creates records about the extensions and plugins.
17 | 
18 |     In the output look a line where:
19 |     ('osxcollector_section' == 'firefox' and 'osxcollector_subsection' == 'json_files')
20 |     and then parse the heck out of the extensions.
21 |     """
22 | 
23 |     def __init__(self, **kwargs):
24 |         super(FindExtensionsFilter, self).__init__(**kwargs)
25 |         self._new_lines = []
26 | 
27 |     def filter_line(self, blob):
28 |         if 'firefox' != blob.get('osxcollector_section') or 'json_files' != blob.get('osxcollector_subsection'):
29 |             return blob
30 | 
31 |         if blob.get('osxcollector_json_file') not in ['addons.json', 'extensions.json']:
32 |             return blob
33 | 
34 |         extensions_blobs = DictUtils.get_deep(blob, 'contents.addons', [])
35 |         for addon in extensions_blobs:
36 |             extension = {
37 |                 'osxcollector_section': 'firefox',
38 |                 'osxcollector_subsection': 'extensions',
39 |                 'osxcollector_incident_id': blob['osxcollector_incident_id'],
40 |                 'name': DictUtils.get_deep(addon, 'defaultLocale.name', addon.get('name')),
41 |                 'description': DictUtils.get_deep(addon, 'defaultLocale.description', addon.get('description')),
42 |                 'path': addon.get('id'),
43 |             }
44 |             if blob.get('osxcollector_username'):
45 |                 extension['osxcollector_username'] = blob['osxcollector_username']
46 | 
47 |             self._new_lines.append(extension)
48 | 
49 |         return None
50 | 
51 |     def end_of_lines(self):
52 |         return self._new_lines
53 | 
54 | 
55 | def main():
56 |     run_filter_main(FindExtensionsFilter)
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/chrome/find_extensions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # FindExtensionsFilter reads the Chrome preferences JSON blob and creates records about the extensions and plugins.
 5 | #
 6 | from __future__ import absolute_import
 7 | from __future__ import unicode_literals
 8 | 
 9 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
10 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
11 | from osxcollector.output_filters.util.dict_utils import DictUtils
12 | 
13 | 
14 | class FindExtensionsFilter(OutputFilter):
15 | 
16 |     """Reads the Chrome preferences JSON blob and creates records about the extensions and plugins.
17 | 
18 |     In the output look a line where:
19 |     ('osxcollector_section' == 'chrome' and 'osxcollector_subsection' == 'preferences')
20 |     and then parse the heck out of the extensions.
21 |     """
22 | 
23 |     def __init__(self, **kwargs):
24 |         super(FindExtensionsFilter, self).__init__(**kwargs)
25 |         self._new_lines = []
26 | 
27 |     def filter_line(self, blob):
28 |         if 'chrome' != blob.get('osxcollector_section') or 'preferences' != blob.get('osxcollector_subsection'):
29 |             return blob
30 | 
31 |         extensions_blob = DictUtils.get_deep(blob, 'contents.extensions.settings', {})
32 |         for key in extensions_blob:
33 |             setting = extensions_blob[key]
34 |             extension = {
35 |                 'osxcollector_section': 'chrome',
36 |                 'osxcollector_subsection': 'extensions',
37 |                 'osxcollector_incident_id': blob['osxcollector_incident_id'],
38 |                 'state': setting.get('state'),
39 |                 'was_installed_by_default': setting.get('was_installed_by_default'),
40 |                 'name': DictUtils.get_deep(setting, 'manifest.name'),
41 |                 'description': DictUtils.get_deep(setting, 'manifest.description'),
42 |                 'path': setting.get('path'),
43 |             }
44 |             if blob.get('osxcollector_username'):
45 |                 extension['osxcollector_username'] = blob['osxcollector_username']
46 | 
47 |             self._new_lines.append(extension)
48 | 
49 |         return None
50 | 
51 |     def end_of_lines(self):
52 |         return self._new_lines
53 | 
54 | 
55 | def main():
56 |     run_filter_main(FindExtensionsFilter)
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/osxcollector.yaml.example:
--------------------------------------------------------------------------------
 1 | api_key:
 2 |     # The OpenDNSFilter requires an API key for OpenDNS Investigate
 3 |     opendns: "ADD YOUR KEY"
 4 | 
 5 |     # The VTHashesFilter requires an API key for VirusTotal
 6 |     virustotal: "ADD YOUR KEY"
 7 | 
 8 | # The BlacklistFilter allows for multiple blacklists to be compared against at once
 9 | # Each blacklists requires:
10 | #  - blacklist_name, A name
11 | #  - blacklist_keys, JSON paths. These can be of the form "a.b" to look at "b" in {"a": {"b": "foo"}}
12 | #  - blacklist_file_path, the path to a file containing values considered blacklisted. Any line starting with # is skipped
13 | #  - blacklist_is_regex, should values in the file be treated as Python regex
14 | #  - blacklist_is_domains, should values in the file be treated as domains and analyzed with some smart regex to retrieve subdomain etc.
15 | blacklists:
16 |  -  blacklist_name: "hashes"
17 |     blacklist_keys:
18 |      - "md5"
19 |      - "sha1"
20 |      - "sha2"
21 |     blacklist_file_path: "/tmp/hash_blacklist.txt"
22 |     blacklist_is_regex: False
23 |  -  blacklist_name: "domains"
24 |     blacklist_keys:
25 |      - "osxcollector_domains"
26 |     blacklist_file_path: "/tmp/domain_blacklist.txt"
27 |     blacklist_is_regex: False
28 |     blacklist_is_domains: True
29 | 
30 | # domain_whitelist is a special blacklist entry. Anything on this list won't be looked up with
31 | # OpenDNS or VirusTotal
32 | domain_whitelist:
33 |     blacklist_name: "Ignore Domains"
34 |     blacklist_keys:
35 |      - "osxcollector_domains"
36 |     blacklist_file_path: "/tmp/domain_whitelist.txt"
37 |     blacklist_is_domains: True
38 |     blacklist_is_regex: True
39 | 
40 | opendns:
41 |     LookupDomainsFilter:
42 |         cache_file_name: "/tmp/cache.opendns.LookupDomainsFilter.json"
43 |     RelatedDomainsFilter:
44 |         cache_file_name: "/tmp/cache.opendns.RelatedDomainsFilter.json"
45 | 
46 | shadowserver:
47 |     LookupHashesFilter:
48 |         cache_file_name: "/tmp/cache.shadowserver.LookupHashesFilter.json"
49 | 
50 | virustotal:
51 |     LookupHashesFilter:
52 |         cache_file_name: "/tmp/cache.virustotal.LookupHashesFilter.json"
53 |     LookupDomainsFilter:
54 |         cache_file_name: "/tmp/cache.virustotal.LookupDomainsFilter.json"
55 |     LookupURLsFilter:
56 |         cache_file_name: "/tmp/cache.virustotal.LookupURLsFilter.json"
57 |     resources_per_req: 4
58 | 
59 | alexa:
60 |     LookupRankingsFilter:
61 |         cache_file_name: "/tmp/cache.alexa.LookupRankingsFilter.json"
62 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/alexa/lookup_rankings.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # AlexaRankingFilter uses the AWIS API to lookup Alexa traffic rankings.
 5 | #
 6 | from __future__ import absolute_import
 7 | from __future__ import unicode_literals
 8 | 
 9 | from threat_intel.alexaranking import AlexaRankingApi
10 | 
11 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
12 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
13 | from osxcollector.output_filters.util.config import config_get_deep
14 | 
15 | 
16 | class LookupRankingsFilter(ThreatFeedFilter):
17 | 
18 |     """A class to lookup traffic rankings using AWIS API."""
19 | 
20 |     def __init__(self, lookup_when=None, **kwargs):
21 |         super(LookupRankingsFilter, self).__init__(
22 |             'osxcollector_domains',
23 |             'osxcollector_alexa_rank',
24 |             lookup_when=lookup_when,
25 |             name_of_api_key=None,
26 |             **kwargs
27 |         )
28 | 
29 |     def _lookup_iocs(self, domains, resource_per_req=25):
30 |         """Caches the Alexa ranking info for a set of domains.
31 | 
32 |         Args:
33 |             domains - a list of domains.
34 |         Returns:
35 |             A dict with domain as key and threat info as value
36 |         """
37 |         traffic_info = {}
38 | 
39 |         cache_file_name = config_get_deep('alexa.LookupRankingsFilter.cache_file_name', None)
40 |         ar = AlexaRankingApi(resource_per_req, cache_file_name=cache_file_name)
41 | 
42 |         iocs = domains
43 |         reports = ar.get_alexa_rankings(iocs)
44 |         for domain in reports:
45 |             report = reports[domain]
46 |             if report and self._should_store_ioc_info(report):
47 |                 traffic_info[domain] = report
48 | 
49 |         return traffic_info
50 | 
51 |     def _should_store_ioc_info(self, report):
52 |         """Only store if traffic ranking passes a certain threshold.
53 | 
54 |         Args:
55 |             report - a dict from get_alexa_rankings
56 |         Returns:
57 |             booleans
58 |         """
59 |         # Always include Alexa ranking information since we do not yet
60 |         # categorize by it for the output summaries, rather just use it as an
61 |         # additional source of threat intel.
62 |         return True
63 | 
64 | 
65 | def main():
66 |     run_filter_main(LookupRankingsFilter)
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     main()
71 | 


--------------------------------------------------------------------------------
/tests/output_filters/util/domains_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import pytest
 6 | 
 7 | from osxcollector.output_filters.exceptions import BadDomainError
 8 | from osxcollector.output_filters.util.domains import clean_domain
 9 | from osxcollector.output_filters.util.domains import expand_domain
10 | 
11 | 
12 | class TestCleanDomain:
13 | 
14 |     def _test_clean_domain(self, dirty_domain, expected):
15 |         domain = clean_domain(dirty_domain)
16 |         assert domain == expected
17 | 
18 |     def test_trailing_and_leading_dots(self):
19 |         self._test_clean_domain('.www.example.com.', 'www.example.com')
20 | 
21 |     def test_trailing_and_leading_slashes(self):
22 |         self._test_clean_domain('//www.example.com//', 'www.example.com')
23 | 
24 |     def test_unicode_prefix(self):
25 |         self._test_clean_domain('\xadwww.example.com', 'www.example.com')
26 | 
27 |     def test_unicode_prefix2(self):
28 |         self._test_clean_domain(u'\xadwww.example.com', 'www.example.com')
29 | 
30 |     def test_unicode_mid(self):
31 |         self._test_clean_domain('stinkum.\xadexample.com', 'stinkum.example.com')
32 | 
33 |     def test_unicode_mid2(self):
34 |         self._test_clean_domain(u'stinkum.\xadexample.com', 'stinkum.example.com')
35 | 
36 |     def test_punicoded(self):
37 |         # TODO: OSXCollector is confused by stuff that ought to be punycode... or something
38 |         self._test_clean_domain('hotmaıll.com', 'hotmall.com')
39 | 
40 |     def test_unicode_punicoded(self):
41 |         self._test_clean_domain(u'hotmaıll.com', 'hotmall.com')
42 | 
43 |     def test_single_word(self):
44 |         with pytest.raises(BadDomainError):
45 |             clean_domain('oneword')
46 | 
47 | 
48 | class TestExpandDomain:
49 | 
50 |     def _test_expand_domain(self, initial_domain, expected):
51 |         expanded = list(expand_domain(initial_domain))
52 |         assert sorted(expanded) == sorted(expected)
53 | 
54 |     def test_simple_subdomain(self):
55 |         self._test_expand_domain('www.example.com', ['example.com', 'www.example.com'])
56 | 
57 |     def test_no_subdomain(self):
58 |         self._test_expand_domain('example.com', ['example.com'])
59 | 
60 |     def test_complex_subdomain(self):
61 |         self._test_expand_domain('www.foo.bar.whiz.example.com', ['example.com', 'www.foo.bar.whiz.example.com'])
62 | 
63 |     def test_unicode_subdomain(self):
64 |         self._test_expand_domain('www.jobbörse.com', ['www.jobbörse.com', 'jobbörse.com'])
65 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/find_blacklisted.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # FindBlacklistedFilter adds 'osxcollector_blacklist' key to lines matching a blacklist.
 5 | #
 6 | from __future__ import absolute_import
 7 | from __future__ import unicode_literals
 8 | 
 9 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
10 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
11 | from osxcollector.output_filters.util.blacklist import create_blacklist
12 | from osxcollector.output_filters.util.config import config_get_deep
13 | 
14 | 
15 | class FindBlacklistedFilter(OutputFilter):
16 | 
17 |     """Adds 'osxcollector_blacklist' key to lines matching a blacklist.
18 | 
19 |     This filters compares each line to a set of blacklists and marks lines that match the blacklist.
20 |     This is proving useful for filtering known hashes, known bad filenames, known bad domains, etc.
21 | 
22 |     Configuration Keys:
23 |         blacklist_name       - [REQUIRED] the name of the blacklist
24 |         blacklist_keys       - [REQUIRED] get the value of these keys and compare against the blacklist
25 |         blacklist_is_regex   - [REQUIRED] should the values in the blacklist file be treated as regex
26 |         blacklist_file_path  - [REQUIRED if no blacklist_data_feed] path to a file with the actual values to blacklist
27 |         blacklist_data_feed  - [REQUIRED if no blacklist_file_path] name of the data feed from which data is read
28 |         blacklist_is_domains - [OPTIONAL] interpret values as domains and do some smart regex and subdomain stuff with them
29 |     """
30 | 
31 |     def __init__(self, **kwargs):
32 |         super(FindBlacklistedFilter, self).__init__(**kwargs)
33 |         data_feeds = kwargs.get('data_feeds', {})
34 |         self._blacklists = [
35 |             create_blacklist(config_chunk, data_feeds)
36 |             for config_chunk in config_get_deep('blacklists')
37 |         ]
38 | 
39 |     def filter_line(self, blob):
40 |         """Find blacklisted values in a line.
41 | 
42 |         Lines are never cached, every line in produces a line out.
43 |         """
44 |         for blacklist in self._blacklists:
45 |             matching_term = blacklist.match_line(blob)
46 |             if matching_term:
47 |                 blob.setdefault('osxcollector_blacklist', {})
48 |                 blob['osxcollector_blacklist'].setdefault(blacklist.name, [])
49 |                 blob['osxcollector_blacklist'][blacklist.name].append(matching_term)
50 |                 break
51 | 
52 |         return blob
53 | 
54 | 
55 | def main():
56 |     run_filter_main(FindBlacklistedFilter)
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/tests/output_filters/alexa/lookup_domains_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from osxcollector.output_filters.virustotal.lookup_domains import LookupDomainsFilter
 6 | from tests.output_filters.run_filter_test import RunFilterTest
 7 | 
 8 | 
 9 | class TestLookupDomainsFilter(RunFilterTest):
10 | 
11 |     def test_no_domains(self):
12 |         input_blobs = [
13 |             {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
14 |             {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
15 |         ]
16 | 
17 |         self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
18 | 
19 |     def test_benign_domains(self):
20 |         input_blobs = [
21 |             {'osxcollector_domains': ['good.example.com'], 'dingo': 'bingo', 'apple': [3, 14]},
22 |             {'osxcollector_domains': ['good.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'},
23 |         ]
24 | 
25 |         self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
26 | 
27 |     def test_suspicious_domains(self):
28 |         input_blobs = [
29 |             {'osxcollector_domains': ['evil.example.com'], 'dingo': 'bingo', 'apple': [3, 14]},
30 |             {'osxcollector_domains': ['evil.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'},
31 |         ]
32 |         expected_vtdomains = [
33 |             [
34 |                 {
35 |                     'domain': 'evil.example.com',
36 |                     'response_code': 1,
37 |                     'detections': {
38 |                         'undetected_referrer_samples': 0,
39 |                         'undetected_communicating_samples': 0,
40 |                         'detected_downloaded_samples': 5,
41 |                         'detected_referrer_samples': 5,
42 |                         'detected_communicating_samples': 5,
43 |                         'detected_urls': 5,
44 |                     },
45 |                     'categorization': {},
46 |                 },
47 |             ],
48 |             [
49 |                 {
50 |                     'domain': 'evil.example.co.uk',
51 |                     'response_code': 1,
52 |                     'detections': {
53 |                         'undetected_referrer_samples': 0,
54 |                         'undetected_communicating_samples': 0,
55 |                         'detected_downloaded_samples': 4,
56 |                         'detected_referrer_samples': 5,
57 |                         'detected_communicating_samples': 5,
58 |                         'detected_urls': 6,
59 |                     },
60 |                     'categorization': {},
61 |                 },
62 |             ],
63 |         ]
64 |         output_blobs = self.run_test(LookupDomainsFilter, input_blobs=input_blobs)
65 |         self.assert_key_added_to_blob('osxcollector_vtdomain', expected_vtdomains, input_blobs, output_blobs)
66 | 


--------------------------------------------------------------------------------
/tests/output_filters/virustotal/lookup_domains_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from osxcollector.output_filters.virustotal.lookup_domains import LookupDomainsFilter
 6 | from tests.output_filters.run_filter_test import RunFilterTest
 7 | 
 8 | 
 9 | class TestLookupDomainsFilter(RunFilterTest):
10 | 
11 |     def test_no_domains(self):
12 |         input_blobs = [
13 |             {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
14 |             {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
15 |         ]
16 | 
17 |         self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
18 | 
19 |     def test_benign_domains(self):
20 |         input_blobs = [
21 |             {'osxcollector_domains': ['good.example.com'], 'dingo': 'bingo', 'apple': [3, 14]},
22 |             {'osxcollector_domains': ['good.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'},
23 |         ]
24 | 
25 |         self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
26 | 
27 |     def test_suspicious_domains(self):
28 |         input_blobs = [
29 |             {'osxcollector_domains': ['evil.example.com'], 'dingo': 'bingo', 'apple': [3, 14]},
30 |             {'osxcollector_domains': ['evil.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'},
31 |         ]
32 |         expected_vtdomains = [
33 |             [
34 |                 {
35 |                     'domain': 'evil.example.com',
36 |                     'response_code': 1,
37 |                     'detections': {
38 |                         'undetected_referrer_samples': 0,
39 |                         'undetected_communicating_samples': 0,
40 |                         'detected_downloaded_samples': 5,
41 |                         'detected_referrer_samples': 5,
42 |                         'detected_communicating_samples': 5,
43 |                         'detected_urls': 5,
44 |                     },
45 |                     'categorization': {},
46 |                 },
47 |             ],
48 |             [
49 |                 {
50 |                     'domain': 'evil.example.co.uk',
51 |                     'response_code': 1,
52 |                     'detections': {
53 |                         'undetected_referrer_samples': 0,
54 |                         'undetected_communicating_samples': 0,
55 |                         'detected_downloaded_samples': 4,
56 |                         'detected_referrer_samples': 5,
57 |                         'detected_communicating_samples': 5,
58 |                         'detected_urls': 6,
59 |                     },
60 |                     'categorization': {},
61 |                 },
62 |             ],
63 |         ]
64 |         output_blobs = self.run_test(LookupDomainsFilter, input_blobs=input_blobs)
65 |         self.assert_key_added_to_blob('osxcollector_vtdomain', expected_vtdomains, input_blobs, output_blobs)
66 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/virustotal/lookup_hashes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # LookupHashesFilter uses VirusTotal to lookup the values in 'sha2' and add 'osxcollector_vthash' key.
 5 | #
 6 | from __future__ import absolute_import
 7 | from __future__ import unicode_literals
 8 | 
 9 | from threat_intel.virustotal import VirusTotalApi
10 | 
11 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
12 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
13 | from osxcollector.output_filters.util.config import config_get_deep
14 | 
15 | 
16 | class LookupHashesFilter(ThreatFeedFilter):
17 | 
18 |     """A class to lookup hashes using VirusTotal API."""
19 | 
20 |     def __init__(self, lookup_when=None, **kwargs):
21 |         super(LookupHashesFilter, self).__init__(
22 |             'sha2',
23 |             'osxcollector_vthash', lookup_when=lookup_when,
24 |             name_of_api_key='virustotal', **kwargs
25 |         )
26 | 
27 |     def _lookup_iocs(self, all_iocs, resource_per_req=25):
28 |         """Caches the VirusTotal info for a set of hashes.
29 | 
30 |         Args:
31 |             all_iocs - a list of hashes.
32 |         Returns:
33 |             A dict with hash as key and threat info as value
34 |         """
35 |         threat_info = {}
36 | 
37 |         cache_file_name = config_get_deep('virustotal.LookupHashesFilter.cache_file_name', None)
38 |         vt = VirusTotalApi(self._api_key, resource_per_req, cache_file_name=cache_file_name)
39 |         reports = vt.get_file_reports(all_iocs)
40 | 
41 |         for hash_val in reports:
42 |             report = reports[hash_val]
43 |             if not report:
44 |                 continue
45 |             if self._should_store_ioc_info(report):
46 |                 threat_info[hash_val] = self._trim_hash_report(report)
47 | 
48 |         return threat_info
49 | 
50 |     def _should_store_ioc_info(self, report, min_hits=1):
51 |         """Only store if the hash has > min_hits positive detections.
52 | 
53 |         Args:
54 |             report - A dict response from get_file_reports
55 |             min_hits - Minimum number of VT positives
56 |         Returns:
57 |             boolean
58 |         """
59 |         return 1 == report.get('response_code') and min_hits < report.get('positives', 0)
60 | 
61 |     def _trim_hash_report(self, report):
62 |         """Copy just the required keys from the report into a new report.
63 | 
64 |         Args:
65 |             report - A dict response from get_file_reports
66 |         Returns:
67 |             A smaller dict
68 |         """
69 |         copy_keys = [
70 |             'scan_id',
71 |             'sha1',
72 |             'sha256',
73 |             'md5',
74 |             'scan_date',
75 |             'permalink',
76 |             'positives',
77 |             'total',
78 |             'response_code',
79 |         ]
80 | 
81 |         return dict([(key, report.get(key)) for key in copy_keys])
82 | 
83 | 
84 | def main():
85 |     run_filter_main(LookupHashesFilter)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     main()
90 | 


--------------------------------------------------------------------------------
/tests/output_filters/virustotal/lookup_hashes_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from osxcollector.output_filters.virustotal.lookup_hashes import LookupHashesFilter
 6 | from tests.output_filters.run_filter_test import RunFilterTest
 7 | 
 8 | 
 9 | class TestLookupHashesFilter(RunFilterTest):
10 | 
11 |     def test_no_hashes(self):
12 |         input_blobs = [
13 |             {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
14 |             {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
15 |         ]
16 |         self.run_test(LookupHashesFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
17 | 
18 |     def test_benign_hashes(self):
19 |         input_blobs = [
20 |             {'sha2': 'b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7', 'dingo': 'bingo', 'apple': [3, 14]},
21 |             {'sha2': '52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c', 'bingo': 'bongo', 'orange': 'banana'},
22 |         ]
23 |         self.run_test(LookupHashesFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
24 | 
25 |     def test_suspicious_hashes(self):
26 |         input_blobs = [
27 |             {'sha2': 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302', 'dingo': 'bingo', 'apple': [3, 14]},
28 |             {'sha2': '6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360', 'bingo': 'bongo', 'orange': 'banana'},
29 |         ]
30 |         expected_vthashes = [
31 |             [
32 |                 {
33 |                     'scan_id': 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302-1273894724',
34 |                     'sha1': 'da9b79f2fd33d002033b69a9a346af4671a9e16b',
35 |                     'sha256': 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302',
36 |                     'md5': '0c71d8cedc8bbb2b619a76d1478c4348',
37 |                     'scan_date': '2015-01-15 16:42:01',
38 |                     'permalink': 'https://www.virustotal.com/file/'
39 |                     'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302/analysis/1273894724/',
40 |                     'total': 40,
41 |                     'positives': 40,
42 |                     'response_code': 1,
43 |                 },
44 |             ],
45 |             [
46 |                 {
47 |                     'scan_id': '52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c-1273894724',
48 |                     'sha1': '92e3750a9f0eef6290dd83867eff88064e9c01bb',
49 |                     'sha256': '6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360',
50 |                     'md5': '06506cc06cf0167ea583de62c98eae2c',
51 |                     'scan_date': '2010-05-15 03:38:44',
52 |                     'permalink': 'https://www.virustotal.com/file/'
53 |                     '6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360/analysis/1273894724/',
54 |                     'total': 40,
55 |                     'positives': 40,
56 |                     'response_code': 1,
57 |                 },
58 |             ],
59 |         ]
60 |         output_blobs = self.run_test(LookupHashesFilter, input_blobs=input_blobs)
61 |         self.assert_key_added_to_blob('osxcollector_vthash', expected_vthashes, input_blobs, output_blobs)
62 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/firefox/sort_history.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # SortHistoryFilter creates a clean sorted Firefox browser history and tags lines with {'osxcollector_browser_history': 'firefox'}
 5 | #
 6 | from __future__ import absolute_import
 7 | from __future__ import unicode_literals
 8 | 
 9 | import copy
10 | 
11 | import six
12 | 
13 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
14 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
15 | 
16 | 
17 | class SortHistoryFilter(OutputFilter):
18 | 
19 |     """Joins Firefox browser history 'visits' and 'urls' tables, producing a time sorted browser history.
20 | 
21 |     In the output look for lines where:
22 |     ('osxcollector_section' == 'chrome' and 'osxcollector_subsection' == 'history' and 'osxcollector_table_name' == 'visits')
23 |     for some snazzy browser history stuff.
24 |     """
25 | 
26 |     def __init__(self, **kwargs):
27 |         super(SortHistoryFilter, self).__init__(**kwargs)
28 | 
29 |         self._visits_table = dict()
30 |         self._places_table = dict()
31 | 
32 |     def filter_line(self, blob):
33 |         """Cache the 'visits' and 'urls' tables."""
34 |         if 'firefox' == blob.get('osxcollector_section') and 'history' == blob.get('osxcollector_subsection'):
35 |             table = blob.get('osxcollector_table_name')
36 | 
37 |             if 'moz_historyvisits' == table:
38 |                 if self._validate_visit(blob):
39 |                     self._visits_table[blob['place_id']] = blob
40 |                     blob = None  # Consume the line
41 |             elif 'moz_places' == table:
42 |                 if self._validate_places(blob):
43 |                     self._places_table[blob['id']] = blob
44 |                     blob = None  # Consume the line
45 | 
46 |         return blob
47 | 
48 |     def end_of_lines(self):
49 |         """Join the 'visits' and 'urls' tables into a single browser history and timeline."""
50 |         history = list()
51 | 
52 |         for visit in six.itervalues(self._visits_table):
53 |             place = self._places_table.get(visit.get('place_id'))
54 |             if place:
55 |                 add_keys = [key for key in visit if key not in place]
56 |                 record = copy.deepcopy(place)
57 |                 for key in add_keys:
58 |                     record[key] = visit[key]
59 |                 record['osxcollector_browser_history'] = 'firefox'
60 |                 history.append(record)
61 | 
62 |         return sorted(history, key=lambda x: x['last_visit_date'], reverse=True)
63 | 
64 |     @classmethod
65 |     def _validate_visit(cls, blob):
66 |         """Does the visit dict have the required fields?
67 | 
68 |         Args:
69 |             blob: a visit dict
70 |         Returns:
71 |             boolean
72 |         """
73 |         required_fields = ['place_id']
74 |         return all([field in blob for field in required_fields])
75 | 
76 |     @classmethod
77 |     def _validate_places(cls, blob):
78 |         """Does the place dict have the required fields?
79 | 
80 |         Args:
81 |             blob: a place dict
82 |         Returns:
83 |             boolean
84 |         """
85 |         required_fields = ['id']
86 |         return all([field in blob for field in required_fields])
87 | 
88 | 
89 | def main():
90 |     run_filter_main(SortHistoryFilter)
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     main()
95 | 


--------------------------------------------------------------------------------
/tests/output_filters/shadowserver/lookup_hashes_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from osxcollector.output_filters.shadowserver.lookup_hashes import LookupHashesFilter
 6 | from tests.output_filters.run_filter_test import RunFilterTest
 7 | 
 8 | 
 9 | class TestLookupHashesFilter(RunFilterTest):
10 | 
11 |     def setup_method(self, method):
12 |         self._known_sha1_input = [
13 |             {
14 |                 'sha2': '1fafe48f626fdc030b0a0efc1008d51cd3078d1b3ec95f808d12afbfef458b23',
15 |                 'sha1': '5d87de61cb368c93325dd910c202b8647f8e90ed',
16 |                 'ctime': '2014-12-05 16:50:48',
17 |                 'osxcollector_plist_path': '/System/Library/Extensions/System.kext/PlugIns/Libkern.kext/Info.plist',
18 |                 'mtime': '2014-09-19 00:42:35',
19 |                 'osxcollector_incident_id': 'RecalibratedTurnip-2014_12_21-18_49_52',
20 |                 'osxcollector_section': 'kext',
21 |                 'osxcollector_bundle_id': 'com.apple.kpi.libkern',
22 |                 'file_path': '/System/Library/Extensions/System.kext/PlugIns/Libkern.kext/Libkern',
23 |                 'md5': '6746005c822ceb6737b871698d3ed22f',
24 |             },
25 |         ]
26 |         self._unknown_sha1_input = [
27 |             {
28 |                 'sha2': '5148211a7bc4a5d02913b0037805f20704f329e1739b5a6d2338fc84c1780b71',
29 |                 'sha1': '816a85d89ae34d2dc73b8c768eecb03935c568ba',
30 |                 'ctime': '2014-12-05 16:53:07',
31 |                 'osxcollector_plist_path': '/System/Library/Extensions/AMDRadeonX3000GLDriver.bundle/Contents/Info.plist',
32 |                 'mtime': '2014-09-28 22:34:42',
33 |                 'osxcollector_incident_id': 'RecalibratedTurnip-2014_12_21-18_49_52',
34 |                 'osxcollector_section': 'kext',
35 |                 'osxcollector_bundle_id': 'com.apple.AMDRadeonX3000GLDriver',
36 |                 'file_path': '/System/Library/Extensions/AMDRadeonX3000GLDriver.bundle/Contents/MacOS/AMDRadeonX3000GLDriver',
37 |                 'md5': '967698d9ad4171bed991df85e1c72e56',
38 |             },
39 |         ]
40 | 
41 |     def test_no_match(self):
42 |         output_blobs = self.run_test(LookupHashesFilter, self._unknown_sha1_input)
43 |         assert len(output_blobs) == 1
44 |         assert 'osxcollector_shadowserver' not in output_blobs[0]
45 | 
46 |     def test_known_match(self):
47 |         output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input)
48 |         assert len(output_blobs) == 1
49 |         assert 'osxcollector_shadowserver' in output_blobs[0]
50 | 
51 |     def test_known_match_different_path_prefix(self):
52 |         self._known_sha1_input[0]['file_path'] = '/new_path/Libkern'
53 |         output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input)
54 |         assert len(output_blobs) == 1
55 |         assert 'osxcollector_shadowserver' in output_blobs[0]
56 | 
57 |     def test_wrong_filename(self):
58 |         """Change the filename and don't match"""
59 |         self._known_sha1_input[0]['file_path'] = 'wrong_name'
60 |         output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input)
61 |         assert len(output_blobs) == 1
62 |         assert 'osxcollector_shadowserver' not in output_blobs[0]
63 | 
64 |     def test_partial_filename(self):
65 |         """Change the filename and don't match"""
66 |         self._known_sha1_input[0]['file_path'] = '/System/Library/Extensions/System.kext/PlugIns/Libkern.kext/Not_Quite_Libkern'
67 |         output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input)
68 |         assert len(output_blobs) == 1
69 |         assert 'osxcollector_shadowserver' not in output_blobs[0]
70 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/virustotal/lookup_urls.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # LookupURLsFilter uses VirusTotal to lookup the URLs in 'LSQuarantineDataURLString' and add 'osxcollector_vturl' key.
  5 | #
  6 | from __future__ import absolute_import
  7 | from __future__ import unicode_literals
  8 | 
  9 | import re
 10 | 
 11 | from threat_intel import VirusTotalApi
 12 | 
 13 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
 14 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
 15 | from osxcollector.output_filters.util.config import config_get_deep
 16 | 
 17 | 
 18 | class LookupURLsFilter(ThreatFeedFilter):
 19 | 
 20 |     """A class to find suspicious URLs using VirusTotal API."""
 21 | 
 22 |     SCHEMES = re.compile('https?')
 23 | 
 24 |     def __init__(self, lookup_when=None, **kwargs):
 25 |         lookup_when_url_scheme_matches = self._generate_lookup_when(lookup_when)
 26 |         super(LookupURLsFilter, self).__init__(
 27 |             'LSQuarantineDataURLString', 'osxcollector_vturl',
 28 |             lookup_when=lookup_when_url_scheme_matches,
 29 |             name_of_api_key='virustotal', **kwargs
 30 |         )
 31 | 
 32 |     def _generate_lookup_when(self, only_lookup_when):
 33 |         """Generates functions that checks whether the blob contains a valid URL
 34 |         in LSQuarantineDataURLString field.
 35 |         """
 36 |         def check_url_scheme(blob):
 37 |             return self.SCHEMES.match(blob['LSQuarantineDataURLString']) and (not only_lookup_when or only_lookup_when(blob))
 38 |         return check_url_scheme
 39 | 
 40 |     def _lookup_iocs(self, all_iocs, resource_per_req=25):
 41 |         """Caches the VirusTotal info for a set of URLs.
 42 | 
 43 |         Args:
 44 |             all_iocs - a list of URLs.
 45 |         Returns:
 46 |             A dict with URL as key and threat info as value
 47 |         """
 48 |         threat_info = {}
 49 | 
 50 |         cache_file_name = config_get_deep('virustotal.LookupURLsFilter.cache_file_name', None)
 51 |         vt = VirusTotalApi(self._api_key, resource_per_req, cache_file_name=cache_file_name)
 52 |         reports = vt.get_url_reports(all_iocs)
 53 | 
 54 |         for url in reports:
 55 |             report = reports[url]
 56 |             if not report:
 57 |                 continue
 58 |             if self._should_store_ioc_info(report):
 59 |                 threat_info[url] = self._trim_url_report(report)
 60 | 
 61 |         return threat_info
 62 | 
 63 |     def _should_store_ioc_info(self, report, min_hits=1):
 64 |         """Only store if the hash has > min_hits positive detections.
 65 | 
 66 |         Args:
 67 |             report - A dict response from get_url_reports
 68 |             min_hits - Minimum number of VT positives
 69 |         Returns:
 70 |             boolean
 71 |         """
 72 |         return 1 == report.get('response_code') and min_hits < report.get('positives', 0)
 73 | 
 74 |     def _trim_url_report(self, report):
 75 |         """Copy just the required keys from the report into a new report.
 76 | 
 77 |         Args:
 78 |             report - A dict response from get_url_reports
 79 |         Returns:
 80 |             A smaller dict
 81 |         """
 82 |         copy_keys = [
 83 |             'scan_id',
 84 |             'resource',
 85 |             'url',
 86 |             'scan_date',
 87 |             'permalink',
 88 |             'positives',
 89 |             'total',
 90 |             'response_code',
 91 |         ]
 92 | 
 93 |         return dict([(key, report.get(key)) for key in copy_keys])
 94 | 
 95 | 
 96 | def main():
 97 |     run_filter_main(LookupURLsFilter)
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     main()
102 | 


--------------------------------------------------------------------------------
/tests/output_filters/opendns/related_domains_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from osxcollector.output_filters.opendns.related_domains import RelatedDomainsFilter
 6 | from tests.output_filters.run_filter_test import RunFilterTest
 7 | 
 8 | 
 9 | class TestRelatedDomainsFilter(RunFilterTest):
10 | 
11 |     def setup_method(self, method):
12 |         self._initial_domains = ['zendesk.com', 'jpmorganaccess.com', 'opendns.zendesk.com', 'yelp.com']
13 |         self._initial_ips = ['159.53.60.177']
14 | 
15 |     def _run_test(self, input_blobs, expected_relateddomains):
16 |         def create_filter():
17 |             return RelatedDomainsFilter(initial_domains=self._initial_domains, initial_ips=self._initial_ips)
18 |         output_blobs = self.run_test(create_filter, input_blobs=input_blobs)
19 |         self.assert_key_added_to_blob('osxcollector_related', expected_relateddomains, input_blobs, output_blobs)
20 | 
21 |     def test_no_domains(self):
22 |         input_blobs = [
23 |             {'tater': 'tots'},
24 |         ]
25 |         expected_relateddomains = None
26 |         self._run_test(input_blobs, expected_relateddomains)
27 | 
28 |     def test_direct_domain_match(self):
29 |         # Direct meaning the domain in the input is an initial domain
30 |         input_blobs = [
31 |             {'osxcollector_domains': ['opendns.zendesk.com']},
32 |         ]
33 |         expected_relateddomains = [
34 |             {
35 |                 'domains': {'opendns.zendesk.com': ['opendns.zendesk.com']},
36 |             },
37 |         ]
38 |         self._run_test(input_blobs, expected_relateddomains)
39 | 
40 |     def test_related_domain_match(self):
41 |         input_blobs = [
42 |             {'osxcollector_domains': ['webmd.com']},
43 |         ]
44 |         expected_relateddomains = [
45 |             {
46 |                 'domains': {'webmd.com': ['opendns.zendesk.com', 'zendesk.com']},
47 |             },
48 |         ]
49 |         self._run_test(input_blobs, expected_relateddomains)
50 | 
51 |     def test_multiple_related_domain_match(self):
52 |         input_blobs = [
53 |             {'osxcollector_domains': ['webmd.com', 'hushmail.zendesk.com']},
54 |         ]
55 |         expected_relateddomains = [
56 |             {
57 |                 'domains':
58 |                 {
59 |                     'webmd.com': ['opendns.zendesk.com', 'zendesk.com'],
60 |                     'hushmail.zendesk.com': ['opendns.zendesk.com'],
61 |                 },
62 |             },
63 |         ]
64 |         self._run_test(input_blobs, expected_relateddomains)
65 | 
66 |     def test_direct_and_related_domain_match(self):
67 |         input_blobs = [
68 |             {'osxcollector_domains': ['zendesk.com']},
69 |         ]
70 |         expected_relateddomains = [
71 |             {
72 |                 'domains': {'zendesk.com': ['opendns.zendesk.com', 'zendesk.com']},
73 |             },
74 |         ]
75 |         self._run_test(input_blobs, expected_relateddomains)
76 | 
77 |     def test_direct_ip_match(self):
78 |         input_blobs = [
79 |             {'osxcollector_domains': ['jpmorganaccess.com']},
80 |         ]
81 |         expected_relateddomains = [
82 |             {
83 |                 'domains': {'jpmorganaccess.com': ['159.53.60.177', 'jpmorganaccess.com', 'opendns.zendesk.com', 'zendesk.com']},
84 |             },
85 |         ]
86 |         self._run_test(input_blobs, expected_relateddomains)
87 | 
88 |     def test_whitelist_domain(self):
89 |         input_blobs = [
90 |             {'osxcollector_domains': ['yelp.com']},
91 |         ]
92 |         expected_relateddomains = [
93 |             None,
94 |         ]
95 |         self._run_test(input_blobs, expected_relateddomains)
96 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/related_files.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # RelatedFilesFilter finds files related to specific terms or file names.
  5 | #
  6 | from __future__ import absolute_import
  7 | from __future__ import unicode_literals
  8 | 
  9 | import os.path
 10 | from argparse import ArgumentParser
 11 | 
 12 | import simplejson
 13 | 
 14 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
 15 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
 16 | from osxcollector.output_filters.util.dict_utils import DictUtils
 17 | 
 18 | 
 19 | class RelatedFilesFilter(OutputFilter):
 20 | 
 21 |     """RelatedFilesFilter finds files related to specific terms or file names.
 22 | 
 23 |     The file paths passed to the filter during creation are split into arrays of
 24 |     directory or file names. Anything matching a stop list of common directory names
 25 |     is discarded.
 26 |     """
 27 | 
 28 |     def __init__(self, when=None, file_terms=None, **kwargs):
 29 |         super(RelatedFilesFilter, self).__init__()
 30 |         self._all_blobs = list()
 31 |         self._terms = set()
 32 |         self._usernames = set()
 33 | 
 34 |         self._when = when
 35 | 
 36 |         if file_terms:
 37 |             for val in file_terms:
 38 |                 self._create_terms(val)
 39 | 
 40 |     def _create_terms(self, val):
 41 |         for term in os.path.normpath(val.lower()).split(os.path.sep):
 42 |             if len(term) > 1 and term not in self.STOP_WORDS:
 43 |                 self._terms.add(term)
 44 | 
 45 |     def filter_line(self, blob):
 46 |         self._all_blobs.append(blob)
 47 | 
 48 |         if self._when and self._when(blob):
 49 |             for key in self.FILE_NAME_KEYS:
 50 |                 val = DictUtils.get_deep(blob, key)
 51 |                 if val:
 52 |                     self._create_terms(val)
 53 |         if 'osxcollector_username' in blob:
 54 |             self._usernames.add(blob['osxcollector_username'].lower())
 55 | 
 56 |         return None
 57 | 
 58 |     def end_of_lines(self):
 59 |         self._terms = self._terms - self._usernames
 60 | 
 61 |         for blob in self._all_blobs:
 62 |             line = simplejson.dumps(blob).lower()
 63 |             for term in self._terms:
 64 |                 if term in line:
 65 |                     blob.setdefault('osxcollector_related', {})
 66 |                     blob['osxcollector_related'].setdefault('files', [])
 67 |                     blob['osxcollector_related']['files'].append(term)
 68 | 
 69 |         return self._all_blobs
 70 | 
 71 |     def get_argument_parser(self):
 72 |         parser = ArgumentParser()
 73 |         group = parser.add_argument_group('RelatedFilesFilter')
 74 |         group.add_argument(
 75 |             '-f', '--file-term', dest='file_terms', default=[], action='append',
 76 |             help='[OPTIONAL] Suspicious terms to use in pivoting through file names.  May be specified more than once.',
 77 |         )
 78 |         return parser
 79 | 
 80 |     @property
 81 |     def terms(self):
 82 |         return self._terms
 83 | 
 84 |     @property
 85 |     def usernames(self):
 86 |         return self._usernames
 87 | 
 88 |     # Keys to look in to find file paths
 89 |     FILE_NAME_KEYS = [
 90 |         'file_path',
 91 |         'osxcollector_plist_path',
 92 |     ]
 93 | 
 94 |     # Words that can never be terms
 95 |     STOP_WORDS = [
 96 |         'applications',
 97 |         'bin',
 98 |         'contents',
 99 |         'cores',
100 |         'coreservices',
101 |         'dev',
102 |         'downloads',
103 |         'extensions',
104 |         'frameworks',
105 |         'helpers',
106 |         'home',
107 |         'information',
108 |         'libexec',
109 |         'libraries',
110 |         'library',
111 |         'macos',
112 |         'malware',
113 |         'net',
114 |         'network',
115 |         'opt',
116 |         'plugins',
117 |         'private',
118 |         'privateframeworks',
119 |         'python',
120 |         'resources',
121 |         'sbin',
122 |         'support',
123 |         'system',
124 |         'tmp',
125 |         'user',
126 |         'users',
127 |         'usr',
128 |         'utilities',
129 |         'versions',
130 |         'var',
131 |     ]
132 | 
133 | 
134 | def main():
135 |     run_filter_main(RelatedFilesFilter)
136 | 
137 | 
138 | if __name__ == '__main__':
139 |     main()
140 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/find_domains.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # FindDomainsFilter looks for domains in all input lines and adds those domains into the 'osxcollector_domains' key.
  5 | #
  6 | from __future__ import absolute_import
  7 | from __future__ import unicode_literals
  8 | 
  9 | import logging
 10 | import re
 11 | 
 12 | import six
 13 | from six.moves.urllib.parse import unquote_plus
 14 | from six.moves.urllib.parse import urlsplit
 15 | 
 16 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
 17 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
 18 | from osxcollector.output_filters.exceptions import BadDomainError
 19 | from osxcollector.output_filters.util.domains import clean_domain
 20 | from osxcollector.output_filters.util.domains import expand_domain
 21 | 
 22 | 
 23 | class FindDomainsFilter(OutputFilter):
 24 | 
 25 |     """Adds 'osxcollector_domains' key to output lines.
 26 | 
 27 |     This filters parses domains out of anywhere in an output line and adds them to a clean array in the line.
 28 |     This is helpful as a pre-processing step before sending the domains off to threat APIs or matching against
 29 |     threat feeds.
 30 |     """
 31 | 
 32 |     def __init__(self, **kwargs):
 33 |         super(FindDomainsFilter, self).__init__(**kwargs)
 34 |         self._domains = set()
 35 | 
 36 |     def filter_line(self, blob):
 37 |         """Find domains in a line."""
 38 |         self._domains = set()
 39 |         self._look_for_domains(blob)
 40 | 
 41 |         # self._domains accumulates domains during calls to _look_for_domains
 42 |         if len(self._domains):
 43 |             blob['osxcollector_domains'] = sorted(list(self._domains))
 44 | 
 45 |         return blob
 46 | 
 47 |     def _look_for_domains(self, val, key=None):
 48 |         """Given a value and perhaps a key, look for domains.
 49 | 
 50 |         Args:
 51 |             val: The value, could be of any type
 52 |             key: A string key associated with the value.
 53 |         """
 54 |         if isinstance(val, six.string_types):
 55 |             if key in self.HOST_KEYS:
 56 |                 self._add_domain(val)
 57 |                 return
 58 |             if -1 != self.SCHEMES.search(val):
 59 |                 # Sometimes values are complex strings, like JSON or pickle encoded stuff.
 60 |                 # Try splitting the string on non-URL related punctuation
 61 |                 for maybe_url in re.split(r'[ \'\(\)\"\[\]\{\}\;\n\t#@\^&\*=]+', val):
 62 |                     domain = self._url_to_domain(maybe_url)
 63 |                     self._add_domain(domain)
 64 |         elif isinstance(val, list):
 65 |             for elem in val:
 66 |                 self._look_for_domains(elem)
 67 |         elif isinstance(val, dict):
 68 |             for key, elem in six.iteritems(val):
 69 |                 self._look_for_domains(elem, key)
 70 |                 self._look_for_domains(key)
 71 | 
 72 |     def _url_to_domain(self, maybe_url):
 73 |         """Converts an URL to a domain.
 74 | 
 75 |         The code deals with eccentricities of both unquote_plus and split_url.
 76 | 
 77 |         Args:
 78 |             maybe_url - a string that might be an URL.
 79 |         Returns:
 80 |             a string representing the domain or None
 81 |         """
 82 |         if self.SCHEMES.match(maybe_url):
 83 |             url = unquote_plus(maybe_url)
 84 | 
 85 |             try:
 86 |                 split_url = urlsplit(url)
 87 |                 if split_url.hostname:
 88 |                     return split_url.hostname
 89 |             # in case "url" is not a valid URL, just log a message
 90 |             except ValueError as e:
 91 |                 logging.info('Cannot split the URL: {0}. Hint: {1}'.format(url, e))
 92 | 
 93 |         return None
 94 | 
 95 |     def _add_domain(self, domain):
 96 |         """Clean a domain and store it internally"""
 97 |         if not domain:
 98 |             return
 99 | 
100 |         try:
101 |             domain = clean_domain(domain)
102 |             for extracted in expand_domain(domain):
103 |                 self._domains.add(extracted)
104 |         except BadDomainError:
105 |             pass
106 | 
107 |     SCHEMES = re.compile('((https?)|ftp)')
108 |     HOST_KEYS = ['host', 'host_key', 'baseDomain']
109 | 
110 | 
111 | def main():
112 |     run_filter_main(FindDomainsFilter)
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     main()
117 | 


--------------------------------------------------------------------------------
/tests/output_filters/run_filter_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import simplejson
  6 | from mock import patch
  7 | from six import StringIO
  8 | 
  9 | from osxcollector.output_filters.base_filters.output_filter import _run_filter
 10 | 
 11 | 
 12 | class RunFilterTest:
 13 | 
 14 |     def run_test(self, create_filter, input_blobs=None, expected_output_blobs=None):
 15 |         """Mocks out stdin, stdout, and config then runs input lines through an OutputFilter.
 16 | 
 17 |         Args:
 18 |             create_filter: A callable that returns an OutputFilter.
 19 |             input_blobs: An array of dicts to pass to OutputFilter. These will be serialized into strings and passed as stdin.
 20 |             expected_output_blobs: An array of dicts the output of the OutputFilter must match.
 21 |         """
 22 |         if not input_blobs:
 23 |             input_blobs = []
 24 |         input_lines = '\n'.join([simplejson.dumps(blob) for blob in input_blobs])
 25 | 
 26 |         with patch(
 27 |             'sys.stdin', StringIO(input_lines),
 28 |         ), patch(
 29 |             'sys.stdout', new_callable=StringIO,
 30 |         ) as mock_stdout, patch(
 31 |             'osxcollector.output_filters.util.config._config_file_path',
 32 |             return_value='./tests/output_filters/data/test_osxcollector_config.yaml',
 33 |         ):
 34 |             output_filter = create_filter()
 35 |             _run_filter(output_filter)
 36 |             output_lines = [line for line in mock_stdout.getvalue().split('\n') if len(line)]
 37 |             output_blobs = [simplejson.loads(line) for line in output_lines]
 38 | 
 39 |             if expected_output_blobs:
 40 |                 assert len(output_blobs) == len(expected_output_blobs)
 41 | 
 42 |                 for expected_blob, actual_blob in zip(expected_output_blobs[1:], output_blobs[1:]):
 43 |                     assert_equal_sorted(expected_blob, actual_blob)
 44 | 
 45 |             return output_blobs
 46 | 
 47 |     def assert_key_added_to_blob(self, added_key, expected_values, input_blobs, output_blobs):
 48 |         """Verifies that a single key has been added to each input_blob with an expected value.
 49 | 
 50 |         Asserts that effectively:
 51 |         output_blobs = [input_blob.update(key=expected_value) for expected_value, input_blob in zip(expected_values, input_blobs)]
 52 | 
 53 |         Args:
 54 |             added_key: The name of the key that should have been added.
 55 |             expected_values: A list containing the expected value of the key for each input_blob
 56 |             input_blobs: A list of dicts that were the initial input.
 57 |             output_blobs: A list of dicts that are the output.
 58 |         """
 59 | 
 60 |         if expected_values:
 61 |             actual_values = list(blob.get(added_key, None) for blob in output_blobs)
 62 |             for actual, expected in zip(actual_values, expected_values):
 63 |                 assert_equal_sorted(actual, expected)
 64 | 
 65 |         # Minus the added key, the input should be unchanged
 66 |         for input_blob, output_blob in zip(input_blobs, output_blobs):
 67 |             if added_key in output_blob:
 68 |                 del output_blob[added_key]
 69 |             assert_equal_sorted(input_blob, output_blob)
 70 | 
 71 |     def load_reports(self, filename):
 72 |         with open(filename, 'r') as fp:
 73 |             file_contents = fp.read()
 74 |             reports = simplejson.loads(file_contents)
 75 |         return reports
 76 | 
 77 | 
 78 | def assert_equal_sorted(a, b):
 79 |     """A version of T.assert_equal that ignores the ordering of lists or sets.
 80 | 
 81 |     Args:
 82 |         a: first item to compare
 83 |         b: next time to compare
 84 |     Raises:
 85 |         assert when items don't match
 86 |     """
 87 |     assert sort_for_comparison(a) == sort_for_comparison(b)
 88 | 
 89 | 
 90 | def sort_for_comparison(val):
 91 |     """Sort the input if it is a list or dict or set, return it unchanged otherwise.
 92 | 
 93 |     Args:
 94 |         val: A value of any type
 95 |     Returns:
 96 |         A more easily comparable version of the input
 97 |     """
 98 |     if isinstance(val, list):
 99 |         try:
100 |             return sorted(val)
101 |         except Exception:
102 |             return val
103 |     elif isinstance(val, set):
104 |         return sort_for_comparison(list(val))
105 |     elif isinstance(val, dict):
106 |         for key in val:
107 |             val[key] = sort_for_comparison(val[key])
108 |         return val
109 |     else:
110 |         return val
111 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/base_filters/chain.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # ChainFilter is a base class that passes each line through a chain of OutputFilters.
  4 | #
  5 | from __future__ import absolute_import
  6 | from __future__ import unicode_literals
  7 | 
  8 | from argparse import ArgumentParser
  9 | 
 10 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
 11 | 
 12 | 
 13 | class ChainFilter(OutputFilter):
 14 | 
 15 |     """ChainFilter is a base class that passes each line through a chain of OutputFilters.
 16 | 
 17 |     This is useful for constructing a single OutputFilter that does multiple things without
 18 |     having to run `python -m FilterOne | python -m FilterTwo | python -m FilterThree`.
 19 |     """
 20 | 
 21 |     def __init__(self, chain, **kwargs):
 22 |         """Adds the property _next_link to each OutputFilter in the chain.
 23 | 
 24 |         Treating the chain as a linked list makes it easy to know which filter runs after the current filter.
 25 |         _next_link should be present and have a value of None for the final link in the chain.
 26 | 
 27 |         Args:
 28 |             chain: An enumerable of OutputFilters.
 29 |         """
 30 |         super(ChainFilter, self).__init__(**kwargs)
 31 | 
 32 |         prev_link = None
 33 |         for cur_link in chain:
 34 |             if prev_link:
 35 |                 prev_link._next_link = cur_link
 36 |             cur_link._next_link = None
 37 |             prev_link = cur_link
 38 | 
 39 |         self._head_of_chain = chain[0]
 40 | 
 41 |     def filter_line(self, blob):
 42 |         """Each Line of OSXCollector output will be passed to filter_line.
 43 | 
 44 |         Passes the line to the filter at the head of the chain. Output from each filter flows to it's _next_link.
 45 | 
 46 |         Args:
 47 |             blob: A dict representing one line of output from OSXCollector.
 48 |         Returns:
 49 |             A dict or None
 50 |         """
 51 |         return self._on_filter_line(blob, self._head_of_chain)
 52 | 
 53 |     def _on_filter_line(self, blob, link):
 54 |         """Pass the line to a link in the chain and pass any output to the next link.
 55 | 
 56 |         Args:
 57 |             blob: A dict representing one line of output from OSXCollector.
 58 |             link: An OutputFilter
 59 |         Returns:
 60 |             A dict or None
 61 |         """
 62 |         if not link or not blob:
 63 |             return blob
 64 |         return self._on_filter_line(link.filter_line(blob), link._next_link)
 65 | 
 66 |     def end_of_lines(self):
 67 |         """Pass end_of_lines to the filter at the head of the chain.
 68 | 
 69 |         Returns:
 70 |             An enumerable of dicts
 71 |         """
 72 |         return self._on_end_of_lines(self._head_of_chain)
 73 | 
 74 |     def _on_end_of_lines(self, link):
 75 |         """Pass end_of_lines to a link in the chain and pass any output to the next link.
 76 | 
 77 |         Args:
 78 |             link: An OutputFilter
 79 |         Returns:
 80 |             An enumerable of dicts
 81 |         """
 82 |         if not link._next_link:
 83 |             return link.end_of_lines()
 84 | 
 85 |         filtered_lines = []
 86 |         for blob in link.end_of_lines():
 87 |             filtered_line = self._on_filter_line(blob, link._next_link)
 88 |             if filtered_line:
 89 |                 filtered_lines.append(filtered_line)
 90 | 
 91 |         final_lines = self._on_end_of_lines(link._next_link)
 92 |         if final_lines:
 93 |             filtered_lines.extend(final_lines)
 94 | 
 95 |         return filtered_lines
 96 | 
 97 |     def get_argument_parser(self):
 98 |         """Collects the ArgumentParsers from every OutputFilter in the chain.
 99 | 
100 |         Returns:
101 |             An `argparse.ArgumentParser`
102 |         """
103 |         parsers_to_chain = []
104 | 
105 |         cur_link = self._head_of_chain
106 |         while cur_link:
107 |             arg_parser = cur_link.get_argument_parser()
108 |             if arg_parser:
109 |                 parsers_to_chain.append(arg_parser)
110 |             cur_link = cur_link._next_link
111 | 
112 |         parser = self._on_get_argument_parser()
113 |         if parser:
114 |             parsers_to_chain.append(parser)
115 | 
116 |         if parsers_to_chain:
117 |             return ArgumentParser(parents=parsers_to_chain, conflict_handler='resolve')
118 | 
119 |         return None
120 | 
121 |     def _on_get_argument_parser(self):
122 |         """Returns an ArgumentParser with arguments for just this OutputFilter (not the contained chained OutputFilters).
123 | 
124 |         Returns:
125 |             An `argparse.ArgumentParser`
126 |         """
127 |         return None
128 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/virustotal/lookup_domains.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # LookupDomainsFilter uses VirusTotal to lookup the values in 'osxcollector_domains' and add 'osxcollector_vtdomain' key.
  5 | #
  6 | from __future__ import absolute_import
  7 | from __future__ import unicode_literals
  8 | 
  9 | from threat_intel.virustotal import VirusTotalApi
 10 | 
 11 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
 12 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
 13 | from osxcollector.output_filters.util.blacklist import create_blacklist
 14 | from osxcollector.output_filters.util.config import config_get_deep
 15 | 
 16 | 
 17 | class LookupDomainsFilter(ThreatFeedFilter):
 18 | 
 19 |     """A class to lookup hashes using VirusTotal API."""
 20 | 
 21 |     def __init__(self, lookup_when=None, **kwargs):
 22 |         super(LookupDomainsFilter, self).__init__(
 23 |             'osxcollector_domains', 'osxcollector_vtdomain',
 24 |             lookup_when=lookup_when, name_of_api_key='virustotal', **kwargs
 25 |         )
 26 |         self._whitelist = create_blacklist(
 27 |             config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}),
 28 |         )
 29 | 
 30 |     def _lookup_iocs(self, all_iocs, resource_per_req=25):
 31 |         """Caches the VirusTotal info for a set of domains.
 32 | 
 33 |         Domains on a whitelist will be ignored.
 34 | 
 35 |         Args:
 36 |             all_iocs - a list of domains.
 37 |         Returns:
 38 |             A dict with domain as key and threat info as value
 39 |         """
 40 |         threat_info = {}
 41 | 
 42 |         cache_file_name = config_get_deep('virustotal.LookupDomainsFilter.cache_file_name', None)
 43 |         vt = VirusTotalApi(self._api_key, resource_per_req, cache_file_name=cache_file_name)
 44 | 
 45 |         iocs = [x for x in all_iocs if not self._whitelist.match_values(x)]
 46 |         reports = vt.get_domain_reports(iocs)
 47 |         for domain in reports:
 48 |             if not reports[domain]:
 49 |                 continue
 50 | 
 51 |             trimmed_report = self._trim_domain_report(domain, reports[domain])
 52 |             if self._should_store_ioc_info(trimmed_report):
 53 |                 threat_info[domain] = trimmed_report
 54 | 
 55 |         return threat_info
 56 | 
 57 |     def _should_store_ioc_info(self, trimmed_report):
 58 |         """Decide whether a report from VT is interesting enough to store in the output.
 59 | 
 60 |         Args:
 61 |             trimmed_report: A dict of data from VT
 62 |         Returns:
 63 |             boolean
 64 |         """
 65 |         sample_keys = [
 66 |             ('detected_downloaded_samples', 3),
 67 |             ('detected_referrer_samples', 3),
 68 |             ('detected_communicating_samples', 3),
 69 |             ('detected_urls', 3),
 70 |         ]
 71 |         detections = trimmed_report.get('detections', {})
 72 |         for sample_key, threshold in sample_keys:
 73 |             if detections.get(sample_key, 0) >= threshold:
 74 |                 return True
 75 |         return False
 76 | 
 77 |     def _trim_domain_report(self, domain, initial_report):
 78 |         """Reorganizes and compacts a VT domain report.
 79 | 
 80 |         Args:
 81 |             domain - string domain name
 82 |             initial_report - dict result of calling VirusTotalApi.get_domain_reports for the domain
 83 | 
 84 |         Returns:
 85 |             A reorganized and compacted dict.
 86 |         """
 87 |         trimmed_report = {}
 88 | 
 89 |         sample_keys = [
 90 |             'undetected_referrer_samples',
 91 |             'undetected_communicating_samples',
 92 |             'detected_downloaded_samples',
 93 |             'detected_referrer_samples',
 94 |             'detected_communicating_samples',
 95 |             'detected_urls',
 96 |         ]
 97 |         detections = {}
 98 |         for sample_key in sample_keys:
 99 |             detections[sample_key] = len(initial_report.get(sample_key, []))
100 |         trimmed_report['detections'] = detections
101 | 
102 |         categorization_keys = [
103 |             'categories',
104 |             'BitDefender category',
105 |             'BitDefender domain info',
106 |             'Websense ThreatSeeker category',
107 |             'Webutation domain info',
108 |             'WOT domain info',
109 |             'TrendMicro category',
110 |         ]
111 |         categorization = {}
112 |         for copy_key in categorization_keys:
113 |             if copy_key in initial_report:
114 |                 categorization[copy_key] = initial_report[copy_key]
115 |         trimmed_report['categorization'] = categorization
116 | 
117 |         just_copy_keys = [
118 |             'response_code',
119 |         ]
120 |         for copy_key in just_copy_keys:
121 |             if copy_key in initial_report:
122 |                 trimmed_report[copy_key] = initial_report[copy_key]
123 | 
124 |         trimmed_report['domain'] = domain
125 | 
126 |         return trimmed_report
127 | 
128 | 
129 | def main():
130 |     run_filter_main(LookupDomainsFilter)
131 | 
132 | 
133 | if __name__ == '__main__':
134 |     main()
135 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/base_filters/threat_feed.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # ThreatFeedFilter is a base class to find info on IOCs using some random API.
  4 | #
  5 | from __future__ import absolute_import
  6 | from __future__ import unicode_literals
  7 | 
  8 | import six
  9 | 
 10 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
 11 | from osxcollector.output_filters.util.config import config_get_deep
 12 | 
 13 | 
 14 | class ThreatFeedFilter(OutputFilter):
 15 | 
 16 |     """A base class to find info on IOCs using some random API.
 17 | 
 18 |     Derived classes need only to implement _lookup_iocs()
 19 |     If necessary, they should implement _should_add_threat_info_to_blob.
 20 | 
 21 |     It is assumed that the API uses an api_key stored in the config.
 22 |     """
 23 | 
 24 |     def __init__(self, ioc_key, output_key, lookup_when=None, name_of_api_key=None, **kwargs):
 25 |         """Configure the ThreatFeedFilter.
 26 | 
 27 |         Args:
 28 |             ioc_key: A string key to look for in each line of OSXCollector output.
 29 |                 The value of this key is the potential IOC to lookup in a threat feed.
 30 |             output_key: A string key which is added to output lines and contains the result of threat feed lookups.
 31 |             lookup_when: A boolean function to call to decide whether to perform a lookup on a line.
 32 |                 Use lookup_when to limit which IOCs are looked up.
 33 |             name_of_api_key: A string name of the key in the 'api_key' section of config.
 34 |         """
 35 |         super(ThreatFeedFilter, self).__init__(**kwargs)
 36 | 
 37 |         if name_of_api_key:
 38 |             self._api_key = config_get_deep('api_key.{0}'.format(name_of_api_key))
 39 | 
 40 |         self._lookup_when = lookup_when
 41 |         self._blobs_with_iocs = list()
 42 |         self.ioc_set = set()
 43 | 
 44 |         self._ioc_key = ioc_key
 45 |         self._output_key = output_key
 46 | 
 47 |     def _lookup_iocs(self, all_iocs):
 48 |         """Looks up threat info for IOCs.
 49 | 
 50 |         This is the only method a derived class needs to implement.
 51 | 
 52 |         Args:
 53 |             all_iocs: An enumerable of strings representing all IOCs to lookup.
 54 |         Returns:
 55 |             A dict of the form {ioc_value: threat_info}
 56 |         """
 57 |         raise NotImplementedError('Derived classes must implement _lookup_iocs')
 58 | 
 59 |     def _should_add_threat_info_to_blob(self, blob, threat_info):
 60 |         """Threat info is only added to a blob if this returns True.
 61 | 
 62 |         Override this method in derived classes to correlate threat_info and blob data.
 63 | 
 64 |         For example, the ShadowServer filter looks up SHA1 hashes. Since SHA1 hashes for different files collide, the ShadowServer
 65 |         filter overrides _should_add_threat_info_to_blob and verifies that the filename in the blob matches the filename in the threat
 66 |         info.
 67 | 
 68 |         Args:
 69 |             blob: A dict of data representing a line of output from OSXCollector
 70 |             threat_info: A dict of threat info.
 71 |         Returns:
 72 |             boolean
 73 |         """
 74 |         return True
 75 | 
 76 |     def filter_line(self, blob):
 77 |         """Accumulate IOCs to lookup with the ThreatFeed.
 78 | 
 79 |         Args:
 80 |             blob: A dict representing one line of output from OSXCollector.
 81 |         Returns:
 82 |             A dict or None
 83 |         """
 84 |         if self._ioc_key in blob and (not self._lookup_when or self._lookup_when(blob)):
 85 |             ioc_list = blob[self._ioc_key]
 86 |             if isinstance(ioc_list, six.string_types):
 87 |                 ioc_list = [ioc_list]
 88 | 
 89 |             if len(ioc_list) > 10:
 90 |                 return blob
 91 | 
 92 |             for ioc in ioc_list:
 93 |                 if not len(ioc):
 94 |                     continue
 95 | 
 96 |                 self.ioc_set.add(ioc)
 97 | 
 98 |             self._blobs_with_iocs.append(blob)
 99 |             return None
100 |         else:
101 |             return blob
102 | 
103 |     def end_of_lines(self):
104 |         """Performs threat feed lookup on the IOCs and adds output to the stored Lines.
105 | 
106 |         Returns:
107 |             An enumerable of dicts
108 |         """
109 |         if self.ioc_set:
110 |             self._add_threat_info_to_blobs()
111 |         return self._blobs_with_iocs
112 | 
113 |     def _add_threat_info_to_blobs(self):
114 |         """Adds threat info to blobs.
115 | 
116 |         Args:
117 |             all_threat_info: A dict of the form {ioc_value: threat_info}
118 |         """
119 |         self.ioc_set = sorted(list(self.ioc_set))
120 |         all_threat_info = self._lookup_iocs(self.ioc_set)
121 |         for blob in self._blobs_with_iocs:
122 |             ioc_list = blob[self._ioc_key]
123 |             if isinstance(ioc_list, six.string_types):
124 |                 ioc_list = [ioc_list]
125 | 
126 |             for ioc in ioc_list:
127 |                 info = all_threat_info.get(ioc)
128 |                 if not info:
129 |                     continue
130 | 
131 |                 if self._should_add_threat_info_to_blob(blob, info):
132 |                     blob.setdefault(self._output_key, [])
133 |                     blob[self._output_key].append(info)
134 | 


--------------------------------------------------------------------------------
/tests/output_filters/data/cache.virustotal.LookupDomainsFilter.json:
--------------------------------------------------------------------------------
1 | {"virustotal-domain-reports": {"evil.example.com": {"detected_downloaded_samples": [{"positives": 10, "sha256": "c66e5a89051acf14fcec03618e4c00e9bfc095352bdd94ffa216a16041010aab", "total": 12}, {"positives": 9, "sha256": "4725839ffd5fd40205f8e255864031016a001cca8ff3574ddd2c6fd7ac6a23e8", "total": 10}, {"positives": 34, "sha256": "07598d5335710987284370cf9ce4a5a4a6bcc46b06429f0f7cc93714c73e7785", "total": 40}, {"positives": 10, "sha256": "1b7caa3073b83dadd52e38e5e833fbac4ab57253b2945f2e699ac253db2b4300", "total": 12}, {"positives": 88, "sha256": "8e6338540084c2118d1d032b83f0488bdb9a933f615c0d3c0e85863027072c92", "total": 100}], "detected_referrer_samples": [{"positives": 11, "sha256": "71e9d3cafeaca051deca9e1040d40af97850aec19ee1047f4ed7ff5ebb057247", "total": 53}, {"positives": 15, "sha256": "de46b5cbd9dc1446ade90b30524fc903a13c99b30bf534aca4ab4b07ad83943a", "total": 53}, {"positives": 16, "sha256": "c19fb354851a1ca670f374e1e60d50531dc44b78d5621732092946870ac79edc", "total": 53}, {"positives": 16, "sha256": "72c399c1ccd4597d77f2017aa38b094d098c2e3db97ff4f46244a0178a17030e", "total": 54}, {"positives": 14, "sha256": "b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7", "total": 54}], "response_code": 1, "detected_communicating_samples": [{"positives": 24, "date": "2014-05-30 02:31:16", "sha256": "31ce992017ae628e59fb0599330ba18483777f6f281c660b036649825296a3cb", "total": 52}, {"positives": 34, "date": "2014-04-29 23:16:50", "sha256": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302", "total": 51}, {"positives": 15, "date": "2013-07-09 06:23:40", "sha256": "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360", "total": 46}, {"positives": 34, "date": "2014-04-29 23:16:46", "sha256": "7db46a7eb4baeeb342d37a6fc05910adeed339450701c600ad973a77aa28b121", "total": 51}, {"positives": 15, "date": "2013-07-09 06:23:00", "sha256": "f33c27745f2bd87344be790465ef984a972fd539dc83bd4f61d4242c607ef1ee", "total": 46}], "undetected_communicating_samples": [], "detected_urls": [{"total": 23, "positives": 6, "url": "www.example.com/bingo", "scan_date": "2015-01-23 14:27"}, {"total": 25, "positives": 23, "url": "www.example.com/bongo", "scan_date": "2015-01-23 14:30"}, {"total": 14, "positives": 12, "url": "www.example.com/dingo", "scan_date": "2015-01-23 14:20"}, {"total": 20, "positives": 18, "url": "www.example.com/dongo", "scan_date": "2015-01-23 14:22"}, {"total": 20, "positives": 19, "url": "www.example.com/orange", "scan_date": "2015-01-23 14:28"}], "undetected_referrer_samples": []}, "good.example.com": {"detected_downloaded_samples": [], "detected_referrer_samples": [], "response_code": 0, "detected_communicating_samples": [], "undetected_communicating_samples": [], "detected_urls": [], "undetected_referrer_samples": []}, "good.example.co.uk": {"detected_downloaded_samples": [], "detected_referrer_samples": [], "response_code": 0, "detected_communicating_samples": [], "undetected_communicating_samples": [], "detected_urls": [], "undetected_referrer_samples": []}, "evil.example.co.uk": {"detected_downloaded_samples": [{"positives": 0, "date": "2014-07-29 09:46:22", "sha256": "3044d232d1815c9e1584f406b67c3331c0eaebd304cd280d578e75368e5b0c3a", "total": 54}, {"positives": 0, "date": "2013-05-16 08:57:51", "sha256": "f33c27745f2bd87344be790465ef984a972fd539dc83bd4f61d4242c607ef1ee", "total": 46}, {"positives": 0, "date": "2013-05-03 01:16:53", "sha256": "841f2c2faadf6a28aeb4fd29d7cd3a4156af20a68bd9ad7c2f41db64db06015f", "total": 46}, {"positives": 0, "date": "2013-04-27 20:50:18", "sha256": "02900e181b1941c79c73dadddbd03a8f6f974ca884baf5860cd5a54ac4fb97e1", "total": 46}], "detected_referrer_samples": [{"positives": 16, "sha256": "0de277bca1df07e691c865c84a0dfd849ac0124fab8f9ccde9c28fb3abe24abc", "total": 54}, {"positives": 16, "sha256": "ca34d60f2c1dc20932f2fb8adce1be2a8b9389054d67343ea4c86b9cc9ffabb0", "total": 54}, {"positives": 16, "sha256": "aefddcb96b75fe89195dbbfdd2c373f72492d5f71903eeeaae4afa1f71865515", "total": 54}, {"positives": 15, "sha256": "bee4aecc415e23328b9139b9abdb34a22b36e28da13c75aa8699e07fd7b10307", "total": 53}, {"positives": 16, "sha256": "c6065d39610471d970242a303c3e8905a7200c8aa2fd4bc69a95a3b618df4cb9", "total": 54}], "response_code": 1, "detected_communicating_samples": [{"positives": 24, "date": "2014-05-30 02:31:16", "sha256": "31ce992017ae628e59fb0599330ba18483777f6f281c660b036649825296a3cb", "total": 52}, {"positives": 34, "date": "2014-04-29 23:16:50", "sha256": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302", "total": 51}, {"positives": 15, "date": "2013-07-09 06:23:30", "sha256": "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360", "total": 46}, {"positives": 39, "date": "2014-04-29 23:16:50", "sha256": "b22eb5ff3793b551470f4758e4cff656b5168e10cfce24ac51ebd5a8a7fffe4a", "total": 51}, {"positives": 35, "date": "2013-07-09 06:23:30", "sha256": "26692ab17432ad292810c31ce4cee9e43c0166e23c2b05950751d52fc1decbd2", "total": 46}], "undetected_communicating_samples": [], "detected_urls": [{"total": 61, "positives": 3, "url": "http://www.example.co.uk/regents-park", "scan_date": "2014-11-27 13:56:37"}, {"total": 59, "positives": 3, "url": "http://www.example.co.uk/hyde-park", "scan_date": "2014-10-07 01:04:17"}, {"total": 59, "positives": 2, "url": "http://www.example.co.uk/st-jamess-park", "scan_date": "2014-10-07 00:22:03"}, {"total": 59, "positives": 2, "url": "http://www.example.co.uk/green-park", "scan_date": "2014-09-30 16:27:23"}, {"total": 58, "positives": 3, "url": "http://www.example.co.uk/the-royal-parks", "scan_date": "2014-08-08 18:23:25"}, {"total": 58, "positives": 3, "url": "http://www.example.co.uk/hampstead-heath", "scan_date": "2014-08-06 11:53:22"}], "undetected_referrer_samples": []}}}


--------------------------------------------------------------------------------
/osxcollector/output_filters/base_filters/output_filter.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # An OutputFilter transforms the output from OSXCollector. Every filter must derive from OutputFilter.
  4 | #
  5 | # _run_filter is a default implementation of a main that reads input from stdin, feeds it to an OutputFilter, and
  6 | # spits the output to stdout.
  7 | #
  8 | from __future__ import absolute_import
  9 | from __future__ import unicode_literals
 10 | 
 11 | import sys
 12 | from argparse import ArgumentParser
 13 | 
 14 | import simplejson
 15 | import six
 16 | 
 17 | from osxcollector.output_filters.util.error_messages import write_exception
 18 | 
 19 | 
 20 | class OutputFilter(object):
 21 | 
 22 |     """An OutputFilter transforms the output from OSXCollector. Every filter must derive from OutputFilter.
 23 | 
 24 |     The basic flow of data through an OutputFilter:
 25 |     - Each line of OSXCollector output is passed to OutputFilter.filter_line
 26 |     - After all lines have been passed to filter_line, a final call is made OutputFilter.to end_of_lines
 27 | 
 28 |     There are two common ways a filter deals with lines:
 29 |     - A filter that modifies each line independent of other lines could simply implement filter_line.
 30 |     - A filter that modifies each line based on other lines may want to accumulate lines until end_of_lines is called,
 31 |       then bulk operate on all lines at once.
 32 | 
 33 |     OutputFilters use the words 'line' or 'blob' to refer to OSXCollector output.
 34 |     """
 35 | 
 36 |     def __init__(self, **kwargs):
 37 |         """Skeleton for __init__
 38 | 
 39 |         Args:
 40 |             kwargs: Variable arguments are used to pass filter specific args to OutputFilters.
 41 |         """
 42 |         pass
 43 | 
 44 |     def filter_line(self, blob):
 45 |         """Each Line of OSXCollector output will be passed to filter_line.
 46 | 
 47 |         The OutputFilter should return the line, either modified or unmodified.
 48 |         The OutputFilter can also choose to return nothing, effectively swallowing the line.
 49 | 
 50 |         Args:
 51 |             blob: A dict representing one line of output from OSXCollector.
 52 |         Returns:
 53 |             A dict or None
 54 |         """
 55 |         return blob
 56 | 
 57 |     def end_of_lines(self):
 58 |         """Called after all lines have been fed to filter_output_line.
 59 | 
 60 |         The OutputFilter performs any processing that requires the complete input to have already been fed.
 61 | 
 62 |         Returns:
 63 |             An enumerable of dicts
 64 |         """
 65 |         return []
 66 | 
 67 |     def get_argument_parser(self):
 68 |         """Describes commandline arguments for this OutputFilter.
 69 | 
 70 |         The names of the `dest` param for the argument in the ArgumentParser must match the name of positional or
 71 |         named arguments in `__init__`
 72 | 
 73 |         Returns:
 74 |             An `argparse.ArgumentParser`
 75 |         """
 76 |         return None
 77 | 
 78 | 
 79 | def _unbuffered_input(read_from):
 80 |     """A generator to allow lines to be read before EOF is reached.
 81 | 
 82 |     Args:
 83 |         read_from: A stream to read from
 84 |     Returns:
 85 |         yields strings
 86 |     """
 87 |     line = read_from.readline()
 88 |     while bool(line):
 89 |         if isinstance(line, six.binary_type):
 90 |             line = line.decode('latin-1', errors='ignore')
 91 |         yield line.encode('utf-8', errors='ignore') if six.PY2 else line
 92 |         line = read_from.readline()
 93 | 
 94 | 
 95 | def _run_filter(output_filter, input_stream=None, output_stream=None, *args, **kwargs):
 96 |     """Feeds stdin to an instance of OutputFilter and spews to stdout.
 97 | 
 98 |     Args:
 99 |         output_filter: An instance of OutputFilter.
100 |         input_stream: Where to read input from.
101 |         output_stream: Where to write output to.
102 |     """
103 |     if not input_stream:
104 |         input_stream = sys.stdin
105 |     if not output_stream:
106 |         output_stream = sys.stdout
107 | 
108 |     for json_string in _unbuffered_input(input_stream):
109 |         try:
110 |             blob = simplejson.loads(json_string)
111 |         except simplejson.JSONDecodeError as e:
112 |             write_exception(e)
113 |             continue
114 | 
115 |         blob = output_filter.filter_line(blob)
116 |         if blob:
117 |             output_stream.write(simplejson.dumps(blob))
118 |             output_stream.write('\n')
119 | 
120 |     final_blobs = output_filter.end_of_lines()
121 |     for blob in final_blobs:
122 |         output_stream.write(simplejson.dumps(blob))
123 |         output_stream.write('\n')
124 | 
125 |     output_stream.flush()
126 | 
127 | 
128 | def run_filter_main(output_filter_cls):
129 |     """A `main` method which runs an OutputFilter.
130 | 
131 |     Args:
132 |         output_filter_cls: Class name of the OutputFilter
133 |     """
134 |     filter_arguments = output_filter_cls().get_argument_parser()
135 |     argument_parents = [filter_arguments] if filter_arguments else []
136 | 
137 |     parser = ArgumentParser(parents=argument_parents, conflict_handler='resolve')
138 |     parser.add_argument(
139 |         '-i', '--input-file', dest='input_file', default=None,
140 |         help='[OPTIONAL] Path to OSXCollector output to read. Defaults to stdin otherwise.',
141 |     )
142 |     parser.add_argument(
143 |         '-o', '--output-file', dest='output_file', default=None,
144 |         help='[OPTIONAL] Path where OSXCollector output augmented with the analysis results will be written to. '
145 |              'Defaults to stdout otherwise.',
146 |     )
147 |     args = parser.parse_args()
148 | 
149 |     output_filter = output_filter_cls(**vars(args))
150 | 
151 |     fp_in = open(args.input_file, 'r') if args.input_file else None
152 |     fp_out = open(args.output_file, 'w') if args.output_file else None
153 | 
154 |     _run_filter(output_filter, input_stream=fp_in, output_stream=fp_out)
155 | 
156 |     if args.input_file:
157 |         fp_in.close()
158 | 
159 |     if args.output_file:
160 |         fp_out.close()
161 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/util/blacklist.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Utilities for dealing with blacklists
  4 | #
  5 | from __future__ import absolute_import
  6 | from __future__ import unicode_literals
  7 | 
  8 | import logging
  9 | import os
 10 | import re
 11 | 
 12 | import six
 13 | 
 14 | from osxcollector.output_filters.exceptions import BadDomainError
 15 | from osxcollector.output_filters.exceptions import MissingConfigError
 16 | from osxcollector.output_filters.util.dict_utils import DictUtils
 17 | from osxcollector.output_filters.util.domains import clean_domain
 18 | 
 19 | 
 20 | def create_blacklist(config_chunk, data_feeds={}):
 21 |     """Reads the config and builds a Blacklist.
 22 | 
 23 |     The blacklist config is sufficiently complex that much of this method deals with simply validating config
 24 | 
 25 |     Args:
 26 |         config_chunk: A dict of config for building the blacklist
 27 |         data_feeds: Dict of generator functions returning the blacklist data
 28 |     Returns:
 29 |         A Blacklist
 30 |     Raises:
 31 |         MissingConfigError - when required key does not exist.
 32 |     """
 33 |     required_keys = ['blacklist_name', 'blacklist_keys']
 34 |     if not all([key in config_chunk for key in required_keys]):
 35 |         raise MissingConfigError('Blacklist config is missing a required key.\nRequired keys are: {0}'.format(repr(required_keys)))
 36 | 
 37 |     if not isinstance(config_chunk['blacklist_keys'], list):
 38 |         raise MissingConfigError('The value of \'blacklist_keys\' in Blacklist config must be a list')
 39 | 
 40 |     blacklist_name = config_chunk.get('blacklist_name')
 41 |     blacklist_keys = config_chunk.get('blacklist_keys')
 42 |     blacklist_file_path = config_chunk.get('blacklist_file_path')
 43 |     blacklist_data_feed = config_chunk.get('blacklist_data_feed')
 44 |     if blacklist_file_path:
 45 |         if not os.path.exists(blacklist_file_path):
 46 |             raise MissingConfigError('The blacklist file {} does not exist'.format(blacklist_file_path))
 47 |         blacklist_data_generator = _read_blacklist_file(blacklist_file_path)
 48 |     elif blacklist_data_feed:
 49 |         if blacklist_data_feed not in data_feeds:
 50 |             raise MissingConfigError('Data feed {} not found among provided generators'.format(blacklist_data_feed))
 51 |         blacklist_data_generator = data_feeds[blacklist_data_feed]()
 52 |     else:
 53 |         raise MissingConfigError('Blacklist config is missing a data input.\nEither select a file or a generator object')
 54 |     blacklist_is_regex = config_chunk.get('blacklist_is_regex', False)
 55 |     blacklist_is_domains = config_chunk.get('blacklist_is_domains', False)
 56 |     return Blacklist(blacklist_name, blacklist_keys, blacklist_data_generator, blacklist_is_regex, blacklist_is_domains)
 57 | 
 58 | 
 59 | def _read_blacklist_file(filepath):
 60 |     """ Parse blacklist file """
 61 |     with open(filepath, 'r') as f:
 62 |         for line in f:
 63 |             line = line.strip()
 64 |             if line and not line.startswith('#'):
 65 |                 yield line
 66 | 
 67 | 
 68 | class Blacklist(object):
 69 | 
 70 |     def __init__(self, name, blacklisted_keys, input_generator, is_regex=False, is_domains=False):
 71 |         """Build a blacklist from the data in the blacklist file.
 72 | 
 73 |         Built in smarts make it easy to build a blacklist of domains
 74 | 
 75 |         Raises:
 76 |             MissingConfigError - when required config key does not exist.
 77 |         """
 78 |         self._name = name
 79 |         self._blacklisted_keys = blacklisted_keys
 80 |         self._is_domains = is_domains
 81 |         self._is_regex = is_regex or self._is_domains
 82 |         self._blacklisted_values = dict(
 83 |             self._convert_to_matching_term(val) for val in input_generator if val
 84 |         )
 85 |         self._blacklisted_values.pop(None, None)
 86 | 
 87 |     def _convert_to_matching_term(self, blacklisted_value):
 88 |         """Convert a blacklisted_value to a regex.
 89 | 
 90 |         Args:
 91 |             blacklisted_value - string of value on a blacklist
 92 |             blacklist_is_domains - Boolean if true, the blacklisted_value is treated as a domain.
 93 |         Returns:
 94 |             MatchingTerm
 95 |         """
 96 |         display_name = blacklisted_value
 97 | 
 98 |         if self._is_domains:
 99 |             try:
100 |                 domain = clean_domain(blacklisted_value)
101 |             except BadDomainError:
102 |                 if not isinstance(blacklisted_value, six.text_type):
103 |                     blacklisted_value = blacklisted_value.decode('utf8')
104 |                 logging.warning(
105 |                     u'Blacklisted value "{0}" cannot be resolved as a domain name'
106 |                     .format(blacklisted_value),
107 |                 )
108 |                 return None, None
109 | 
110 |             blacklisted_value = re.compile(r'^(.+\.)*{0}$'.format(re.escape(domain)))
111 | 
112 |         elif self._is_regex:
113 |             blacklisted_value = re.compile(blacklisted_value)
114 | 
115 |         return blacklisted_value, display_name
116 | 
117 |     def match_line(self, blob):
118 |         """Determines whether a line matches the blacklist.
119 | 
120 |         Returns:
121 |             String of matched term is the value matches, None otherwise
122 |         """
123 |         for key in self._blacklisted_keys:
124 |             values = DictUtils.get_deep(blob, key)
125 |             if not values:
126 |                 continue
127 | 
128 |             matching_term = self.match_values(values)
129 |             if matching_term:
130 |                 return matching_term
131 | 
132 |         return None
133 | 
134 |     def match_values(self, values):
135 |         """Determines whether an array of values match the blacklist.
136 | 
137 |         Returns:
138 |             String of matched term is the value matches, None otherwise
139 |         """
140 |         if not isinstance(values, list):
141 |             values = [values]
142 | 
143 |         for val in values:
144 |             if self._is_regex or self._is_domains:
145 |                 return next(
146 |                     (
147 |                         name for term, name in six.iteritems(self._blacklisted_values) if term.search(val)
148 |                     ), None,
149 |                 )
150 |             else:
151 |                 return self._blacklisted_values.get(val, None)
152 |         return None
153 | 
154 |     @property
155 |     def name(self):
156 |         return self._name
157 | 


--------------------------------------------------------------------------------
/tests/output_filters/find_domains_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | from osxcollector.output_filters.find_domains import FindDomainsFilter
  6 | from tests.output_filters.run_filter_test import RunFilterTest
  7 | 
  8 | 
  9 | class TestFindDomainsFilter(RunFilterTest):
 10 | 
 11 |     """Tests many variants of blobs with a domain in them."""
 12 | 
 13 |     def _run_test(self, input_blob, expected_domains):
 14 |         output_blobs = self.run_test(FindDomainsFilter, [input_blob])
 15 |         self.assert_key_added_to_blob('osxcollector_domains', [expected_domains], [input_blob], output_blobs)
 16 | 
 17 |     def test_no_domain(self):
 18 |         input_blob = {'fungo': 'kidney'}
 19 |         self._run_test(input_blob, None)
 20 | 
 21 |     def test_tld(self):
 22 |         input_blob = {'fungo': 'http://www.example.com'}
 23 |         expected_domains = ['example.com', 'www.example.com']
 24 |         self._run_test(input_blob, expected_domains)
 25 | 
 26 |     def test_bare_domain(self):
 27 |         input_blob = {'fungo': 'http://example.com'}
 28 |         expected_domains = ['example.com']
 29 |         self._run_test(input_blob, expected_domains)
 30 | 
 31 |     def test_uk_domain(self):
 32 |         input_blob = {'fungo': 'http://www.example.co.uk'}
 33 |         expected_domains = ['example.co.uk', 'www.example.co.uk']
 34 |         self._run_test(input_blob, expected_domains)
 35 | 
 36 |     def test_info_domain(self):
 37 |         input_blob = {'fungo': 'http://www.example.info'}
 38 |         expected_domains = ['example.info', 'www.example.info']
 39 |         self._run_test(input_blob, expected_domains)
 40 | 
 41 |     def test_ftp_scheme(self):
 42 |         input_blob = {'fungo': 'ftp://example.com'}
 43 |         expected_domains = ['example.com']
 44 |         self._run_test(input_blob, expected_domains)
 45 | 
 46 |     def test_domain_in_path(self):
 47 |         input_blob = {'fungo': 'http://www.example.com/bango?p=http://www.dingo.com'}
 48 |         expected_domains = [
 49 |             'dingo.com',
 50 |             'example.com',
 51 |             'www.dingo.com',
 52 |             'www.example.com',
 53 |         ]
 54 |         self._run_test(input_blob, expected_domains)
 55 | 
 56 |     def test_quoted_domain(self):
 57 |         input_blob = {'fungo': 'http%3A//www.example.com'}
 58 |         expected_domains = [
 59 |             'example.com',
 60 |             'www.example.com',
 61 |         ]
 62 |         self._run_test(input_blob, expected_domains)
 63 | 
 64 |     def test_quoted_in_path(self):
 65 |         input_blob = {'fungo': 'http://www.example.com/bango?p=http%3A//www.dingo.co.uk'}
 66 |         expected_domains = [
 67 |             'dingo.co.uk',
 68 |             'example.com',
 69 |             'www.dingo.co.uk',
 70 |             'www.example.com',
 71 |         ]
 72 |         self._run_test(input_blob, expected_domains)
 73 | 
 74 |     def test_domain_in_key(self):
 75 |         input_blob = {'http://www.example.com': 'zungo'}
 76 |         expected_domains = [
 77 |             'example.com',
 78 |             'www.example.com',
 79 |         ]
 80 |         self._run_test(input_blob, expected_domains)
 81 | 
 82 |     def test_list(self):
 83 |         input_blob = {'fungo': ['http://www.example.com', 'https://www.zzz.sample.org']}
 84 |         expected_domains = [
 85 |             'example.com',
 86 |             'sample.org',
 87 |             'www.example.com',
 88 |             'www.zzz.sample.org',
 89 |         ]
 90 |         self._run_test(input_blob, expected_domains)
 91 | 
 92 |     def test_dict(self):
 93 |         input_blob = {'fungo': {'http://www.example.com': 'https://www.zzz.sample.org'}}
 94 |         expected_domains = [
 95 |             'example.com',
 96 |             'sample.org',
 97 |             'www.example.com',
 98 |             'www.zzz.sample.org',
 99 |         ]
100 |         self._run_test(input_blob, expected_domains)
101 | 
102 |     def test_list_of_dict(self):
103 |         input_blob = {
104 |             'fungo': [
105 |                 {'http://www.example.com': 'https://www.zzz.sample.org'},
106 |                 {'a': 'https://www.dingo.co.uk'},
107 |             ],
108 |         }
109 |         expected_domains = [
110 |             'dingo.co.uk',
111 |             'example.com',
112 |             'sample.org',
113 |             'www.dingo.co.uk',
114 |             'www.example.com',
115 |             'www.zzz.sample.org',
116 |         ]
117 |         self._run_test(input_blob, expected_domains)
118 | 
119 |     def test_tokenizing(self):
120 |         input_blob = {
121 |             'fungo': [
122 |                 '{"bar":\'http://www.example.com\'}',
123 |                 '(http://www.example2.com)',
124 |                 ';http://www.example3.com\n',
125 |                 'http://example4.com.',
126 |                 '#@^%$*http://www.xxx.yyy.zzz.example.com/fungo/digno',
127 |             ],
128 |         }
129 |         expected_domains = [
130 |             'example.com',
131 |             'example2.com',
132 |             'example3.com',
133 |             'example4.com',
134 |             'www.example.com',
135 |             'www.example2.com',
136 |             'www.example3.com',
137 |             'www.xxx.yyy.zzz.example.com',
138 |         ]
139 |         self._run_test(input_blob, expected_domains)
140 | 
141 |     def test_special_keys_domain(self):
142 |         input_blob = {'host': 'www.example.com'}
143 |         expected_domains = [
144 |             'example.com',
145 |             'www.example.com',
146 |         ]
147 |         self._run_test(input_blob, expected_domains)
148 | 
149 |     def test_no_dupes(self):
150 |         input_blob = {
151 |             'host': 'www.example.com',
152 |             'another_thing': 'http://www.example.com',
153 |             'https://www.example.com': True,
154 |             'dictation': {'threepete': ['ftp://example.com', 'http://example.com', 'https://www.example.com']},
155 |         }
156 |         expected_domains = [
157 |             'example.com',
158 |             'www.example.com',
159 |         ]
160 |         self._run_test(input_blob, expected_domains)
161 | 
162 |     def test_special_keys_url(self):
163 |         input_blob = {'host': 'https://www.example.com'}
164 |         expected_domains = [
165 |             'example.com',
166 |             'www.example.com',
167 |         ]
168 |         self._run_test(input_blob, expected_domains)
169 | 
170 |     def test_not_valid_url(self):
171 |         input_blob = {'term': 'https://blah.bork.yarn.dorn-duh-%5DYYYY.WW,boo:bloom,fizz:(ault:(akh_ugh:!it,pook:NOOM)),rort:!(\'@tuht\',dort))'}
172 |         expected_domains = None
173 |         self._run_test(input_blob, expected_domains)
174 | 


--------------------------------------------------------------------------------
/tests/output_filters/related_files_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | from osxcollector.output_filters.related_files import RelatedFilesFilter
  6 | from tests.output_filters.run_filter_test import assert_equal_sorted
  7 | from tests.output_filters.run_filter_test import RunFilterTest
  8 | 
  9 | 
 10 | def when_anytime(blob):
 11 |     """A simple when that always returns True"""
 12 |     return True
 13 | 
 14 | 
 15 | class RelatedFilesFilterTest(RunFilterTest):
 16 | 
 17 |     """Created a RelateFilesFilter, calls run_test, and performs additional filter specific validation."""
 18 | 
 19 |     def teardown_method(self, method):
 20 |         self._output_filter = None
 21 | 
 22 |     def _run_test(
 23 |         self, input_blobs=None, when=when_anytime, file_terms=None, expected_terms=None,
 24 |         expected_usernames=None, expected_is_related=None,
 25 |     ):
 26 |         """Created a RelateFilesFilter, calls run_test, and performs additional filter specific validation.
 27 | 
 28 |         Args:
 29 |             input_blob: An enumerable of dicts
 30 |             when: A callable when to init the RelatedFilesFilter with
 31 |             file_terms: An enumerable of strings to init the RelatedFilesFilter with
 32 |             expected_terms: The expected final value of RelatedFilesFilter.terms
 33 |             expected_usernames: The expected final value of RelatedFilesFilter.usernames
 34 |             expected_is_related: An enumerable of the expected value of 'osxcollector_related' for each output_blob
 35 |         """
 36 | 
 37 |         def create_related_files_filter():
 38 |             self._output_filter = RelatedFilesFilter(when=when, file_terms=file_terms)
 39 |             return self._output_filter
 40 | 
 41 |         output_blobs = self.run_test(create_related_files_filter, input_blobs=input_blobs)
 42 |         if expected_terms:
 43 |             assert_equal_sorted(expected_terms, self._output_filter.terms)
 44 |         if expected_usernames:
 45 |             assert_equal_sorted(expected_usernames, self._output_filter.usernames)
 46 |         if expected_is_related:
 47 |             self.assert_key_added_to_blob('osxcollector_related', expected_is_related, input_blobs, output_blobs)
 48 |         return output_blobs
 49 | 
 50 | 
 51 | class TestCreateTerms(RelatedFilesFilterTest):
 52 | 
 53 |     """Focuses on testing that terms are properly created."""
 54 | 
 55 |     def test_single_term(self):
 56 |         file_terms = ['one_word']
 57 |         expected = ['one_word']
 58 |         self._run_test(file_terms=file_terms, expected_terms=expected)
 59 | 
 60 |     def test_multi_terms(self):
 61 |         file_terms = ['one_word', 'pants', 'face']
 62 |         expected = ['one_word', 'pants', 'face']
 63 |         self._run_test(file_terms=file_terms, expected_terms=expected)
 64 | 
 65 |     def test_split_terms(self):
 66 |         file_terms = ['/ivanlei/source/osxcollector']
 67 |         expected = ['ivanlei', 'source', 'osxcollector']
 68 |         self._run_test(file_terms=file_terms, expected_terms=expected)
 69 | 
 70 |     def test_whitelist_terms(self):
 71 |         file_terms = ['/Users/ivanlei/source/osxcollector', '/Users/ivanlei/virtual_envs/osxcollector/bin/python']
 72 |         expected = ['ivanlei', 'source', 'osxcollector', 'virtual_envs']
 73 |         self._run_test(file_terms=file_terms, expected_terms=expected)
 74 | 
 75 |     def test_whitelist_username_terms(self):
 76 |         file_terms = ['/Users/ivanlei/source/osxcollector', '/Users/ivanlei/virtual_envs/osxcollector/bin/python']
 77 |         expected = ['source', 'osxcollector', 'virtual_envs']
 78 |         blob = {'osxcollector_username': 'ivanlei'}
 79 |         expected_usernames = ['ivanlei']
 80 | 
 81 |         self._run_test(input_blobs=[blob], file_terms=file_terms, expected_terms=expected, expected_usernames=expected_usernames)
 82 | 
 83 | 
 84 | class TestFindUserNames(RelatedFilesFilterTest):
 85 | 
 86 |     """Focuses on ensuring that usernames are found so they can be ignored as terms."""
 87 | 
 88 |     def test_find_username(self):
 89 |         blob = {'osxcollector_username': 'bob'}
 90 |         expected_usernames = ['bob']
 91 |         self._run_test(input_blobs=[blob], expected_usernames=expected_usernames)
 92 | 
 93 |     def test_find_multiple_username(self):
 94 |         blobs = [
 95 |             {'osxcollector_username': 'bob'},
 96 |             {'osxcollector_username': 'jim'},
 97 |             {'osxcollector_username': 'bob'},
 98 |             {'banana': 'pants'},
 99 |         ]
100 |         expected_usernames = ['bob', 'jim']
101 |         self._run_test(input_blobs=blobs, expected_usernames=expected_usernames)
102 | 
103 | 
104 | class TestRelatedFilesFilter(RelatedFilesFilterTest):
105 | 
106 |     """Tests the overall functionality of the filter."""
107 | 
108 |     def test_single_term(self):
109 |         input_blobs = [
110 |             {'banana': '/var/bin/magic_value'},
111 |         ]
112 |         expected_is_related = [
113 |             {'files': ['magic_value']},
114 |         ]
115 |         file_terms = ['magic_value']
116 |         self._run_test(input_blobs=input_blobs, file_terms=file_terms, expected_is_related=expected_is_related)
117 | 
118 |     def test_multi_term(self):
119 |         input_blobs = [
120 |             {'avocado': '/var/bin/magic/hat'},
121 |             {'mango': '/var/bin/value/hat'},
122 |             {'shandy': '/var/bin/magic/value/hat'},
123 |         ]
124 |         expected_is_related = [
125 |             {'files': ['magic']},
126 |             {'files': ['value']},
127 |             {'files': ['magic', 'value']},
128 |         ]
129 |         file_terms = ['magic', 'value']
130 |         self._run_test(input_blobs=input_blobs, file_terms=file_terms, expected_is_related=expected_is_related)
131 | 
132 |     def test_split_term(self):
133 |         input_blobs = [
134 |             {'avocado': '/var/bin/magic/hat'},
135 |             {'mango': '/var/bin/value/hat'},
136 |             {'shandy': '/var/bin/magic/value/hat'},
137 |         ]
138 |         expected_is_related = [
139 |             {'files': ['magic']},
140 |             {'files': ['value']},
141 |             {'files': ['magic', 'value']},
142 |         ]
143 |         file_terms = ['magic/value']
144 |         self._run_test(input_blobs=input_blobs, file_terms=file_terms, expected_is_related=expected_is_related)
145 | 
146 |     def test_discover_term(self):
147 |         input_blobs = [
148 |             {'file_path': '/var/bin/magic/value'},
149 |             {'carrot': '/var/bin/magic/hat'},
150 |             {'apple': '/var/bin/value/hat'},
151 |             {'lemmon': '/lime/rickey'},
152 |         ]
153 |         expected_is_related = [
154 |             {'files': ['magic', 'value']},
155 |             {'files': ['magic']},
156 |             {'files': ['value']},
157 |             None,
158 |         ]
159 |         self._run_test(input_blobs=input_blobs, expected_is_related=expected_is_related)
160 | 
161 |     def test_skip_username(self):
162 |         input_blobs = [
163 |             {'file_path': '/var/bin/magic/value', 'osxcollector_username': 'magic'},
164 |             {'carrot': '/var/bin/magic/hat'},
165 |             {'apple': '/var/bin/value/hat'},
166 |             {'lemmon': '/lime/rickey'},
167 |         ]
168 |         expected_is_related = [
169 |             {'files': ['value']},
170 |             None,
171 |             {'files': ['value']},
172 |             None,
173 |         ]
174 |         self._run_test(input_blobs=input_blobs, expected_is_related=expected_is_related)
175 | 
176 |     def test_when(self):
177 |         def when_binbing(blob):
178 |             return 'bingbing' in blob
179 | 
180 |         input_blobs = [
181 |             {'file_path': '/var/bin/magic', 'bingbing': True, 'osxcollector_username': 'hat'},
182 |             {'file_path': '/var/bin/value'},
183 |             {'carrot': '/var/bin/magic/hat'},
184 |             {'apple': '/var/bin/value/hat'},
185 |             {'lemmon': '/lime/rickey'},
186 |         ]
187 |         expected_is_related = [
188 |             {'files': ['magic']},
189 |             None,
190 |             {'files': ['magic']},
191 |             None,
192 |             None,
193 |         ]
194 |         self._run_test(input_blobs=input_blobs, when=when_binbing, expected_is_related=expected_is_related)
195 | 


--------------------------------------------------------------------------------
/tests/output_filters/util/blacklist_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | from copy import deepcopy
  6 | 
  7 | import pytest
  8 | from mock import call
  9 | from mock import patch
 10 | 
 11 | from osxcollector.output_filters.exceptions import MissingConfigError
 12 | from osxcollector.output_filters.util.blacklist import create_blacklist
 13 | 
 14 | 
 15 | class TestCreateBlacklist:
 16 | 
 17 |     @pytest.fixture(scope='function', autouse=True)
 18 |     def file_contents(self):
 19 |         file_contents = [
 20 |             # Fruits
 21 |             'apple', 'banana',
 22 | 
 23 |             # Cars
 24 |             'corolla', 'datsun',
 25 |         ]
 26 |         with patch(
 27 |             'osxcollector.output_filters.util.blacklist._read_blacklist_file',
 28 |             return_value=file_contents,
 29 |         ) as file_contents:
 30 |             yield file_contents
 31 | 
 32 |     @pytest.fixture(scope='function')
 33 |     def blacklist_data(self):
 34 |         yield {
 35 |             'blacklist_name': 'only_required',
 36 |             'blacklist_keys': ['fruit_name'],
 37 |             'blacklist_file_path': '/who/cares/I/mock/this.txt',
 38 |         }
 39 | 
 40 |     @pytest.fixture(scope='module', autouse=True)
 41 |     def mock_exists(self):
 42 |         with patch('os.path.exists', return_value=True):
 43 |             yield
 44 | 
 45 |     def test_only_required_keys(self, blacklist_data):
 46 |         blacklist = create_blacklist(blacklist_data)
 47 |         assert blacklist.name == blacklist_data['blacklist_name']
 48 |         assert blacklist._blacklisted_keys == blacklist_data['blacklist_keys']
 49 |         assert not blacklist._is_regex
 50 |         assert not blacklist._is_domains
 51 | 
 52 |     def test_missing_required_keys(self, blacklist_data):
 53 |         for key in blacklist_data:
 54 |             _blacklist_data = deepcopy(blacklist_data)
 55 |             del _blacklist_data[key]
 56 |             with pytest.raises(MissingConfigError):
 57 |                 create_blacklist(_blacklist_data)
 58 | 
 59 |     def test_missing_data_input(self, blacklist_data):
 60 |         blacklist_data.pop('blacklist_file_path')
 61 |         with pytest.raises(MissingConfigError):
 62 |             create_blacklist(blacklist_data)
 63 | 
 64 |     def test_required_with_two_keys(self, blacklist_data):
 65 |         blacklist_data['blacklist_keys'] = ['fruit_name', 'car_name']
 66 |         blacklist = create_blacklist(blacklist_data)
 67 |         assert blacklist._blacklisted_keys == blacklist_data['blacklist_keys']
 68 | 
 69 |     def test_keys_not_list(self, blacklist_data):
 70 |         blacklist_data['blacklist_keys'] = 'fruit_name'
 71 |         with pytest.raises(MissingConfigError):
 72 |             create_blacklist(blacklist_data)
 73 | 
 74 |     def test_is_regex(self, blacklist_data):
 75 |         blacklist_data['blacklist_is_regex'] = True
 76 |         blacklist = create_blacklist(blacklist_data)
 77 |         assert blacklist._is_regex
 78 | 
 79 |     def test_is_domains(self, blacklist_data, file_contents):
 80 |         file_contents.return_value = ['apple.com', 'banana.org']
 81 |         # Setting 'blacklist_is_domains' overrides 'blacklist_is_regex'
 82 |         blacklist_data['blacklist_is_domains'] = True
 83 |         blacklist_data['blacklist_is_regex'] = False
 84 |         blacklist = create_blacklist(blacklist_data)
 85 |         assert blacklist._is_regex
 86 |         assert blacklist._is_domains
 87 | 
 88 |     # TODO: Refactor OSXCollector Output Filters to work with unicode-based domains
 89 |     def test_bad_domains_unicode(self, blacklist_data):
 90 |         unicode_domain_1 = 'yelp.公司'
 91 |         unicode_domain_2 = 'www.Yülp.tld'
 92 |         unicode_domain_3 = 'иelф.р'
 93 |         unicode_domains = [unicode_domain_1, unicode_domain_2, unicode_domain_3]
 94 |         blacklist_data['blacklist_is_domains'] = True
 95 |         with patch(
 96 |             'osxcollector.output_filters.util.blacklist._read_blacklist_file',
 97 |             return_value=unicode_domains,
 98 |         ):
 99 |             with patch('logging.warning', autospec=True) as patched_logging_warning:
100 |                 create_blacklist(blacklist_data)
101 |                 assert patched_logging_warning.call_count == 3
102 | 
103 |         calls = [
104 |             call(
105 |                 u'Blacklisted value "{0}" cannot be resolved as a domain name'
106 |                 .format(unicode_domain),
107 |             ) for unicode_domain in unicode_domains
108 |         ]
109 |         assert calls == patched_logging_warning.call_args_list
110 | 
111 |     def test_bad_domains(self, blacklist_data):
112 |         blacklist_data['blacklist_is_domains'] = True
113 |         with patch('logging.warning', autospec=True) as patched_logging_warning:
114 |             blacklist = create_blacklist(blacklist_data)
115 |             assert patched_logging_warning.call_count == 4
116 |             calls = [
117 |                 call('Blacklisted value "apple" cannot be resolved as a domain name'),
118 |                 call('Blacklisted value "banana" cannot be resolved as a domain name'),
119 |                 call('Blacklisted value "corolla" cannot be resolved as a domain name'),
120 |                 call('Blacklisted value "datsun" cannot be resolved as a domain name'),
121 |             ]
122 |             assert calls == patched_logging_warning.call_args_list
123 | 
124 |         blob = {'fruit_name': 'apple.com'}
125 |         assert not blacklist.match_line(blob)
126 | 
127 |     def test_match_fruit(self, blacklist_data):
128 |         good_blobs = [
129 |             {'fruit_name': 'apple'},
130 |             {'fruit_name': 'banana'},
131 |         ]
132 |         bad_blobs = [
133 |             {'car_name': 'corolla'},
134 |             {'car_name': 'datsun'},
135 |         ]
136 | 
137 |         blacklist = create_blacklist(blacklist_data)
138 |         for blob in good_blobs:
139 |             assert blacklist.match_line(blob)
140 |         for blob in bad_blobs:
141 |             assert not blacklist.match_line(blob)
142 | 
143 |     def test_match_fruit_and_cars(self, blacklist_data):
144 |         good_blobs = [
145 |             {'fruit_name': 'apple'},
146 |             {'fruit_name': 'banana'},
147 |             {'car_name': 'corolla'},
148 |             {'car_name': 'datsun'},
149 |         ]
150 | 
151 |         blacklist_data['blacklist_keys'] = ['fruit_name', 'car_name']
152 |         blacklist = create_blacklist(blacklist_data)
153 |         for blob in good_blobs:
154 |             assert blacklist.match_line(blob)
155 | 
156 |     def test_match_fruit_regex(self, blacklist_data, file_contents):
157 |         good_blobs = [
158 |             {'fruit_name': 'apple'},
159 |         ]
160 | 
161 |         bad_blobs = [
162 |             {'fruit_name': 'banana'},
163 |             {'car_name': 'corolla'},
164 |             {'car_name': 'datsun'},
165 |         ]
166 | 
167 |         blacklist_data['blacklist_is_regex'] = True
168 |         file_contents.return_value = ['app.*', 'ban.+org']
169 |         blacklist = create_blacklist(blacklist_data)
170 |         for blob in good_blobs:
171 |             assert blacklist.match_line(blob)
172 |         for blob in bad_blobs:
173 |             assert not blacklist.match_line(blob)
174 | 
175 |     def test_match_domains(self, blacklist_data, file_contents):
176 |         good_blobs = [
177 |             {'fruit_name': 'apple.com'},
178 |             {'fruit_name': 'www.apple.com'},
179 |             {'fruit_name': 'www.another-thing.apple.com'},
180 |         ]
181 | 
182 |         bad_blobs = [
183 |             {'fruit_name': 'cran-apple.com'},
184 |             {'fruit_name': 'apple.org'},
185 |             {'fruit_name': 'apple.com.jp'},
186 |             {'car_name': 'apple.com'},
187 |         ]
188 |         blacklist_data['blacklist_is_domains'] = True
189 |         file_contents.return_value = ['apple.com']
190 |         blacklist = create_blacklist(blacklist_data)
191 |         for blob in good_blobs:
192 |             assert blacklist.match_line(blob)
193 |         for blob in bad_blobs:
194 |             assert not blacklist.match_line(blob)
195 | 
196 |     def test_match_domains_data_feed(self, blacklist_data):
197 |         good_blobs = [
198 |             {'fruit_name': 'apple.com'},
199 |             {'fruit_name': 'www.apple.com'},
200 |             {'fruit_name': 'www.another-thing.apple.com'},
201 |         ]
202 | 
203 |         bad_blobs = [
204 |             {'fruit_name': 'cran-apple.com'},
205 |             {'fruit_name': 'apple.org'},
206 |             {'fruit_name': 'apple.com.jp'},
207 |             {'car_name': 'apple.com'},
208 |         ]
209 |         blacklist_data['blacklist_is_domains'] = True
210 |         blacklist_data['blacklist_data_feed'] = 'domain_list'
211 |         blacklist_data.pop('blacklist_file_path')
212 | 
213 |         def mock_generator():
214 |             for domain in ['apple.com']:
215 |                 yield domain
216 | 
217 |         blacklist = create_blacklist(
218 |             blacklist_data, {'domain_list': mock_generator},
219 |         )
220 |         for blob in good_blobs:
221 |             assert blacklist.match_line(blob)
222 |         for blob in bad_blobs:
223 |             assert not blacklist.match_line(blob)
224 | 
225 |     # TODO: Refactor OSXCollector Output Filters to work with unicode-based domains
226 |     def test_log_unicode_domain(self):
227 |         config_chunk = {
228 |             'blacklist_name': 'Unicode domain',
229 |             'blacklist_keys': ['visited_domain'],
230 |             'blacklist_file_path': 'not_really_a_blacklist.txt',
231 |             'blacklist_is_domains': True,
232 |         }
233 |         file_contents = ['Bücher.tld', 'yelp.公司', 'www.Yülp.tld', 'иelф.р']
234 |         with patch(
235 |             'osxcollector.output_filters.util.blacklist._read_blacklist_file',
236 |             return_value=file_contents,
237 |         ), patch('logging.warning', autospec=True) as patched_logging_warning:
238 |             blacklist = create_blacklist(config_chunk)
239 |             assert patched_logging_warning.call_count == 4
240 |             calls = [
241 |                 call(
242 |                     u'Blacklisted value "{0}" cannot be resolved as a domain name'
243 |                     .format(domain),
244 |                 ) for domain in file_contents
245 |             ]
246 |             assert calls == patched_logging_warning.call_args_list
247 | 
248 |         blob = {'visted_domain': 'Bücher.tld'}
249 |         assert not blacklist.match_line(blob)
250 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/summary_filters/text.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import sys
  6 | from numbers import Number
  7 | 
  8 | import six
  9 | 
 10 | from osxcollector.output_filters.summary_filters.summary import SummaryFilter
 11 | 
 12 | 
 13 | class TextSummaryFilter(SummaryFilter):
 14 |     """Prints the analysis summary (AKA "Very Readable Output") in plain text format."""
 15 | 
 16 |     END_COLOR = '\033[0m'
 17 |     SECTION_COLOR = '\033[1m'
 18 |     BOT_COLOR = '\033[93m\033[1m'
 19 |     KEY_COLOR = '\033[94m'
 20 |     VAL_COLOR = '\033[32m'
 21 | 
 22 |     def __init__(self, monochrome=False, text_output_file=None, **kwargs):
 23 |         super(TextSummaryFilter, self).__init__(summary_output_file=text_output_file, **kwargs)
 24 |         self._monochrome = monochrome
 25 | 
 26 |     def filter_line(self, blob):
 27 |         """Each Line of OSXCollector output will be passed to filter_line.
 28 | 
 29 |         The OutputFilter should return the line, either modified or unmodified.
 30 |         The OutputFilter can also choose to return nothing, effectively swallowing the line.
 31 | 
 32 |         Args:
 33 |             output_line: A dict
 34 | 
 35 |         Returns:
 36 |             A dict or None
 37 |         """
 38 |         if 'osxcollector_vthash' in blob:
 39 |             self._vthash.append(blob)
 40 | 
 41 |         if 'osxcollector_vtdomain' in blob:
 42 |             self._vtdomain.append(blob)
 43 | 
 44 |         if 'osxcollector_opendns' in blob:
 45 |             self._opendns.append(blob)
 46 | 
 47 |         if 'osxcollector_blacklist' in blob:
 48 |             self._blacklist.append(blob)
 49 | 
 50 |         if 'osxcollector_related' in blob:
 51 |             self._related.append(blob)
 52 | 
 53 |         if self._show_signature_chain:
 54 |             if 'signature_chain' in blob and blob['osxcollector_section'] in ['startup', 'kext']:
 55 |                 signature_chain = blob['signature_chain']
 56 |                 if not len(signature_chain) or 'Apple Root CA' != signature_chain[-1]:
 57 |                     self._signature_chain.append(blob)
 58 | 
 59 |         if self._show_browser_ext:
 60 |             if blob['osxcollector_section'] in ['firefox', 'chrome'] and blob.get('osxcollector_subsection') == 'extensions':
 61 |                 self._extensions.append(blob)
 62 | 
 63 |         return blob
 64 | 
 65 |     def _write(self, msg, color=END_COLOR):
 66 |         if not self._monochrome:
 67 |             self._output_stream.write(color)
 68 |         try:
 69 |             self._output_stream.write(msg.encode('utf-8', errors='ignore'))
 70 |         except UnicodeDecodeError as err:
 71 |             self._output_stream.write(msg)
 72 |             sys.stderr.write('Unicode decode error: {0}'.format(err))
 73 |         if not self._monochrome:
 74 |             self._output_stream.write(self.END_COLOR)
 75 | 
 76 |     def end_of_lines(self):
 77 |         """Called after all lines have been fed to filter_output_line.
 78 | 
 79 |         The OutputFilter can do any batch processing on that requires the complete input.
 80 | 
 81 |         Returns:
 82 |             An array of dicts (empty array if no lines remain)
 83 |         """
 84 |         self._write('== Very Readable Output Bot ==\n', self.BOT_COLOR)
 85 |         self._write('Let\'s see what\'s up with this machine.\n\n', self.BOT_COLOR)
 86 | 
 87 |         if len(self._vthash):
 88 |             self._write('Dang! You\'ve got known malware on this machine. Hope it\'s commodity stuff\n', self.BOT_COLOR)
 89 |             self._summarize_blobs(self._vthash)
 90 |             self._write('Sheesh! This is why we can\'t have nice things!\n\n', self.BOT_COLOR)
 91 | 
 92 |         if len(self._vtdomain):
 93 |             self._write('I see you\'ve been visiting some \'questionable\' sites. If you trust VirusTotal that is.\n', self.BOT_COLOR)
 94 |             self._summarize_blobs(self._vtdomain)
 95 |             self._write('I hope it was worth it!\n\n', self.BOT_COLOR)
 96 | 
 97 |         if len(self._opendns):
 98 |             self._write('Well, here\'s some domains OpenDNS wouldn\'t recommend.\n', self.BOT_COLOR)
 99 |             self._summarize_blobs(self._opendns)
100 |             self._write('You know you shouldn\'t just click every link you see? #truth\n\n', self.BOT_COLOR)
101 | 
102 |         if len(self._blacklist):
103 |             self._write('We put stuff on a blacklist for a reason. Mostly so you don\'t do this.\n', self.BOT_COLOR)
104 |             self._summarize_blobs(self._blacklist)
105 |             self._write('SMH\n\n', self.BOT_COLOR)
106 | 
107 |         if len(self._related):
108 |             self._write('This whole things started with just a few clues. Now look what I found.\n', self.BOT_COLOR)
109 |             self._summarize_blobs(self._related)
110 |             self._write('Nothing hides from Very Readable Output Bot\n\n', self.BOT_COLOR)
111 | 
112 |         if len(self._signature_chain):
113 |             self._write('If these binaries were signed by \'Apple Root CA\' I\'d trust them more.\n', self.BOT_COLOR)
114 |             self._summarize_blobs(self._signature_chain)
115 |             self._write('Let\'s just try and stick with some safe software\n\n', self.BOT_COLOR)
116 | 
117 |         if len(self._extensions):
118 |             self._write('Let\'s see what\'s hiding in the browser, shall we.\n', self.BOT_COLOR)
119 |             self._summarize_blobs(self._extensions)
120 |             self._write('You know these things have privileges galore.\n\n', self.BOT_COLOR)
121 | 
122 |         if len(self._add_to_blacklist):
123 |             self._add_to_blacklist = list(set(self._add_to_blacklist))
124 |             self._write('If I were you, I\'d probably update my blacklists to include:\n', self.BOT_COLOR)
125 |             for key, val in self._add_to_blacklist:
126 |                 self._summarize_val(key, val)
127 |             self._write('That might just help things, Skippy!\n\n', self.BOT_COLOR)
128 | 
129 |         self._write('== Very Readable Output Bot ==\n', self.BOT_COLOR)
130 |         self._write('#kaythanksbye', self.BOT_COLOR)
131 | 
132 |         return []
133 | 
134 |     def _summarize_blobs(self, blobs):
135 |         for blob in blobs:
136 |             self._summarize_line(blob)
137 | 
138 |             add_to_blacklist = False
139 | 
140 |             if 'osxcollector_vthash' in blob:
141 |                 self._summarize_vthash(blob)
142 |                 add_to_blacklist = True
143 | 
144 |             if 'osxcollector_vtdomain' in blob:
145 |                 self._summarize_vtdomain(blob)
146 | 
147 |             if 'osxcollector_opendns' in blob:
148 |                 self._summarize_opendns(blob)
149 | 
150 |             if 'osxcollector_blacklist' in blob:
151 |                 for key in blob['osxcollector_blacklist']:
152 |                     self._summarize_val('blacklist-{0}'.format(key), blob['osxcollector_blacklist'][key])
153 | 
154 |             if 'osxcollector_related' in blob:
155 |                 for key in blob['osxcollector_related']:
156 |                     self._summarize_val('related-{0}'.format(key), blob['osxcollector_related'][key])
157 | 
158 |             if 'md5' in blob and '' == blob['md5']:
159 |                 add_to_blacklist = True
160 | 
161 |             if add_to_blacklist:
162 |                 blacklists = blob.get('osxcollector_blacklist', {})
163 |                 values_on_blacklist = blacklists.get('hashes', [])
164 |                 for key in ['md5', 'sha1', 'sha2']:
165 |                     val = blob.get(key, '')
166 |                     if len(val) and val not in values_on_blacklist:
167 |                         self._add_to_blacklist.append((key, val))
168 | 
169 |                 values_on_blacklist = blacklists.get('domains', [])
170 |                 for domain in blob.get('osxcollector_domains', []):
171 |                     if domain not in values_on_blacklist:
172 |                         self._add_to_blacklist.append(('domain', domain))
173 | 
174 |     def _summarize_line(self, blob):
175 |         section = blob.get('osxcollector_section')
176 |         subsection = blob.get('osxcollector_subsection', '')
177 | 
178 |         self._write('- {0} {1}\n'.format(section, subsection), self.SECTION_COLOR)
179 |         for key in sorted(blob.keys()):
180 |             if not key.startswith('osxcollector') and blob.get(key):
181 |                 val = blob.get(key)
182 |                 self._summarize_val(key, val)
183 | 
184 |     def _summarize_vthash(self, blob):
185 |         for blob in blob['osxcollector_vthash']:
186 |             for key in ['positives', 'total', 'scan_date', 'permalink']:
187 |                 val = blob.get(key)
188 |                 self._summarize_val(key, val, 'vthash')
189 | 
190 |     def _summarize_vtdomain(self, blob):
191 |         for blob in blob['osxcollector_vtdomain']:
192 |             for key in ['domain', 'detections']:
193 |                 val = blob.get(key)
194 |                 self._summarize_val(key, val, 'vtdomain')
195 | 
196 |     def _summarize_opendns(self, blob):
197 |         for blob in blob['osxcollector_opendns']:
198 |             for key in ['domain', 'categorization', 'security', 'link']:
199 |                 val = blob.get(key)
200 |                 self._summarize_val(key, val, 'opendns')
201 | 
202 |     def _summarize_val(self, key, val, prefix=None):
203 |         self._print_key(key, prefix)
204 |         self._print_val(val)
205 |         self._write('\n')
206 | 
207 |     def _print_key(self, key, prefix):
208 |         if not prefix:
209 |             prefix = ''
210 |         else:
211 |             prefix += '-'
212 | 
213 |         self._write('  {0}{1}'.format(prefix, key), self.KEY_COLOR)
214 |         self._write(': ')
215 | 
216 |     def _print_val(self, val):
217 |         if isinstance(val, list):
218 |             self._write('[')
219 |             for index, elem in enumerate(val):
220 |                 self._print_val(elem)
221 |                 if index != len(val) - 1:
222 |                     self._write(', ')
223 |             self._write(']')
224 |         elif isinstance(val, dict):
225 |             self._write('{')
226 |             for index, key in enumerate(val):
227 |                 self._write('"')
228 |                 self._write(key, self.VAL_COLOR)
229 |                 self._write('": ')
230 |                 self._print_val(val[key])
231 |                 if index != len(val) - 1:
232 |                     self._write(', ')
233 |             self._write('}')
234 |         elif isinstance(val, six.string_types):
235 |             val = val[:480]
236 |             self._write('"')
237 |             self._write(val, self.VAL_COLOR)
238 |             self._write('"')
239 |         elif isinstance(val, Number):
240 |             self._write('{0}'.format(val), self.VAL_COLOR)
241 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/opendns/lookup_domains.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # LookupDomainsFilter uses OpenDNS to lookup the values in 'osxcollector_domains' and adds the 'osxcollector_opendns' key.
  4 | #
  5 | from __future__ import absolute_import
  6 | from __future__ import unicode_literals
  7 | 
  8 | import logging
  9 | from collections import namedtuple
 10 | 
 11 | import six
 12 | from threat_intel.opendns import InvestigateApi
 13 | 
 14 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
 15 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
 16 | from osxcollector.output_filters.util.blacklist import create_blacklist
 17 | from osxcollector.output_filters.util.config import config_get_deep
 18 | 
 19 | 
 20 | class LookupDomainsFilter(ThreatFeedFilter):
 21 | 
 22 |     """Uses OpenDNS to lookup the values in 'osxcollector_domains' and adds the 'osxcollector_opendns' key."""
 23 | 
 24 |     # Domain categories to consider suspicious
 25 |     SUSPICIOUS_CATEGORIES = [
 26 |         'Adware',
 27 |         'Botnet',
 28 |         'Typo Squatting',
 29 |         'Drive-by Downloads/Exploits',
 30 |         'Mobile Threats',
 31 |         'High Risk Sites and Locations',
 32 |         'Malware',
 33 |         'Phishing',
 34 |     ]
 35 | 
 36 |     SecurityCheck = namedtuple('SecurityCheck', ['key', 'min', 'max', 'threshold'])
 37 |     SECURITY_CHECKS = [
 38 |         # Domain Generation Algorithm. This score is generated based on the likeliness of the domain name being
 39 |         # generated by an algorithm rather than a human. This algorithm is designed to identify domains which have
 40 |         # been created using an automated randomization strategy, which is a common evasion technique in malware kits
 41 |         # or botnets. This score ranges from -100 (suspicious) to 0 (benign)
 42 |         # <http://labs.opendns.com/2013/10/24/mysterious-dga-lets-investigate-sgraph/>
 43 |         SecurityCheck('dga_score', -100, 0, -70),
 44 | 
 45 |         # Suspicious rank for a domain that reviews based on the lookup behavior of client IP for the domain.
 46 |         # Securerank is designed to identify hostnames requested by known infected clients but never requested
 47 |         # by clean clients, assuming these domains are more likely to be bad.
 48 |         # Scores returned range from -100 (suspicious) to 100 (benign).
 49 |         # <http://labs.opendns.com/2013/03/28/secure-rank-a-large-scale-discovery-algorithm-for-predictive-detection/>
 50 |         SecurityCheck('securerank2', -100, 100, -10),
 51 | 
 52 |         # ASN reputation score, ranges from -100 to 0 with -100 being very suspicious
 53 |         SecurityCheck('asn_score', -100, 0, -3),
 54 | 
 55 |         # Prefix ranks domains given their IP prefixes (An IP prefix is the first three octets in an IP address)
 56 |         # and the reputation score of these prefixes.
 57 |         # Ranges from -100 to 0, -100 being very suspicious
 58 |         SecurityCheck('prefix_score', -100, 0, -12),
 59 | 
 60 |         # RIP ranks domains given their IP addresses and the reputation score of these IP addresses.
 61 |         # Ranges from -100 to 0, -100 being very suspicious
 62 |         SecurityCheck('rip_score', -100, 0, -25),
 63 |     ]
 64 | 
 65 |     SECURITY_BAD_KEYS = [
 66 |         # The name of any known attacks associated with this domain.
 67 |         # Returns blank is no known threat associated with domain.
 68 |         'attack',
 69 | 
 70 |         # The type of the known attack, such as botnet or APT.
 71 |         # Returns blank if no known threat associated with domain.
 72 |         'threat_type',
 73 |     ]
 74 | 
 75 |     def __init__(self, lookup_when=None, **kwargs):
 76 |         super(LookupDomainsFilter, self).__init__(
 77 |             'osxcollector_domains', 'osxcollector_opendns',
 78 |             lookup_when=lookup_when, name_of_api_key='opendns', **kwargs
 79 |         )
 80 |         self._whitelist = create_blacklist(
 81 |             config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}),
 82 |         )
 83 | 
 84 |     def _lookup_iocs(self, all_iocs):
 85 |         """Caches the OpenDNS info for a set of domains.
 86 | 
 87 |         Domains on a whitelist will be ignored.
 88 |         First, lookup the categorization details for each domain.
 89 |         Next, if the categorization seems suspicious or unknown, lookup detailed security info.
 90 |         Finally, if the categorization or security info is suspicious, save the threat info.
 91 | 
 92 |         Args:
 93 |             all_iocs: an enumerable of string domain names.
 94 |         Returns:
 95 |             A dict {domain: opendns_info}
 96 |         """
 97 |         threat_info = {}
 98 | 
 99 |         cache_file_name = config_get_deep('opendns.LookupDomainsFilter.cache_file_name', None)
100 |         investigate = InvestigateApi(self._api_key, cache_file_name=cache_file_name)
101 | 
102 |         iocs = [x for x in all_iocs if not self._whitelist.match_values(x)]
103 | 
104 |         categorization = investigate.categorization(iocs)
105 | 
106 |         # Mark the categorization as suspicious
107 |         for domain, categorization_info in six.iteritems(categorization):
108 |             if categorization_info:
109 |                 categorization_info['suspicious'] = \
110 |                     self._is_category_info_suspicious(categorization_info)
111 |             else:
112 |                 logging.warning(
113 |                     'No categorization for domain {0}'.format(domain),
114 |                 )
115 |                 categorization[domain] = {'suspicious': False}
116 | 
117 |         # Decide which values to lookup security info for
118 |         iocs = [domain for domain in categorization if self._should_get_security_info(categorization[domain])]
119 | 
120 |         security = investigate.security(iocs)
121 | 
122 |         for domain, security_info in six.iteritems(security):
123 |             if security_info:
124 |                 security_info['suspicious'] = \
125 |                     self._is_security_info_suspicious(security_info)
126 |             else:
127 |                 logging.warning(
128 |                     'No security information for domain {0}'.format(domain),
129 |                 )
130 |                 security[domain] = {'suspicious': False}
131 | 
132 |         for domain in security:
133 |             if self._should_store_ioc_info(categorization[domain], security[domain]):
134 |                 threat_info[domain] = {
135 |                     'domain': domain,
136 |                     'categorization': categorization[domain],
137 |                     'security': self._trim_security_result(security[domain]),
138 |                     'link': 'https://investigate.opendns.com/domain-view/name/{0}/view'.format(
139 |                         domain.encode('utf-8', errors='ignore') if six.PY2 else domain,
140 |                     ),
141 |                 }
142 | 
143 |         return threat_info
144 | 
145 |     def _is_category_info_suspicious(self, category_info):
146 |         """Figure out whether the categorization info is suspicious.
147 | 
148 |         Args:
149 |             category_info: A dict of info returned by the OpenDNS categorization call
150 |         Returns:
151 |             boolean
152 |         """
153 |         status = category_info['status']
154 |         content_categories = category_info['content_categories']
155 |         security_categories = category_info['security_categories']
156 | 
157 |         return -1 == status or len(security_categories) or any([cat in self.SUSPICIOUS_CATEGORIES for cat in content_categories])
158 | 
159 |     def _should_get_security_info(self, categorization_info):
160 |         """Figure out whether the categorization info on the domain is interesting enough to gather more data.
161 | 
162 |         If the domain isn't categorized, or is categorized as suspicious, get security info.
163 | 
164 |         Args:
165 |             categorization_info: A dict of info returned by the OpenDNS categorization call
166 |         Returns:
167 |             boolean
168 |         """
169 |         status = categorization_info.get('status', 0)
170 |         content_categories = categorization_info.get('content_categories', [])
171 |         security_categories = categorization_info.get('security_categories', [])
172 | 
173 |         return categorization_info['suspicious'] or \
174 |             (0 == status and 0 == len(content_categories) and 0 == len(security_categories))
175 | 
176 |     def _is_security_info_suspicious(self, security_info):
177 |         """Analyzes info from OpenDNS and makes a boolean determination of suspicious or not.
178 | 
179 |         Either looks for low values for a specific set of properties, looks for known participation in
180 |         a threat campaign, or looks for unknown domains.
181 | 
182 |         Args:
183 |             security_info: The result of a call to the security endpoint
184 |         Returns:
185 |             boolean
186 |         """
187 |         # Categorization of site
188 |         if any([security_info.get(key, None) for key in self.SECURITY_BAD_KEYS]):
189 |             return True
190 | 
191 |         for security_check in self.SECURITY_CHECKS:
192 |             if security_info.get(security_check.key, security_check.max) <= security_check.threshold:
193 |                 return True
194 | 
195 |         if not security_info.get('found', False):
196 |             return True
197 | 
198 |         return False
199 | 
200 |     def _should_store_ioc_info(self, category_info, security_info):
201 |         """Figure out whether the data gathered is interesting enough to store in the output.
202 | 
203 |         Args:
204 |             category_info: A dict of info returned by the OpenDNS categorization call
205 |             security_info: A dict of info returned by the OpenDNS security call
206 |         Returns:
207 |             boolean
208 |         """
209 |         return category_info['suspicious'] or security_info['suspicious']
210 | 
211 |     def _trim_security_result(self, security_info):
212 |         """Converts the results of a security call into a smaller dict.
213 | 
214 |         Args:
215 |             security_info: The result of a call to the security endpoint.
216 |         Returns:
217 |             A dict
218 |         """
219 |         # dga_score sometimes has the wrong sign, fix that please
220 |         dga_score = security_info.get('dga_score', 0)
221 |         if dga_score > 0:
222 |             security_info['dga_score'] = -1 * dga_score
223 | 
224 |         # There's a lot of info in the security_info, trim it
225 |         result = {}
226 |         for security_check in self.SECURITY_CHECKS:
227 |             if security_check.key in security_info:
228 |                 result[security_check.key] = security_info[security_check.key]
229 |         for key in self.SECURITY_BAD_KEYS:
230 |             if key in security_info:
231 |                 result[key] = security_info[key]
232 | 
233 |         result['found'] = security_info.get('found', False)
234 | 
235 |         return result
236 | 
237 | 
238 | def main():
239 |     run_filter_main(LookupDomainsFilter)
240 | 
241 | 
242 | if __name__ == '__main__':
243 |     main()
244 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/opendns/related_domains.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # RelatedDomains uses OpenDNS to find domains related to input domains or IPs.
  4 | # Adds 'osxcollector_related' key to the output:
  5 | # {
  6 | #    'osxcollector_related': {
  7 | #        'domains': {
  8 | #            'domain_in_line.com': ['related_domain.com'],
  9 | #            'another.com': ['1.2.3.4']
 10 | #        }
 11 | #     }
 12 | # }
 13 | #
 14 | from __future__ import absolute_import
 15 | from __future__ import unicode_literals
 16 | 
 17 | from argparse import ArgumentParser
 18 | 
 19 | import six
 20 | from threat_intel.opendns import InvestigateApi
 21 | 
 22 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
 23 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
 24 | from osxcollector.output_filters.util.blacklist import create_blacklist
 25 | from osxcollector.output_filters.util.config import config_get_deep
 26 | from osxcollector.output_filters.util.domains import expand_domain
 27 | 
 28 | 
 29 | DEFAULT_RELATED_DOMAINS_GENERATIONS = 2
 30 | 
 31 | 
 32 | class RelatedDomainsFilter(OutputFilter):
 33 | 
 34 |     """Uses OpenDNS to find domains related to input domains or IPs.
 35 | 
 36 |     A whitelist of domains to ignore is read during initialization.
 37 |     Adds 'osxcollector_related' key to the output:
 38 |     ```python
 39 |     {
 40 |        'osxcollector_related': {
 41 |            'domains': {
 42 |                'domain_in_line.com': ['related_domain.com'],
 43 |                'another.com': ['1.2.3.4']
 44 |            }
 45 |         }
 46 |     }
 47 |     ```
 48 |     """
 49 | 
 50 |     def __init__(
 51 |         self,
 52 |         initial_domains=None,
 53 |         initial_ips=None,
 54 |         generations=DEFAULT_RELATED_DOMAINS_GENERATIONS,
 55 |         related_when=None,
 56 |         **kwargs
 57 |     ):
 58 |         """Initializes the RelatedDomainsFilter.
 59 | 
 60 |         Args:
 61 |             initial_domains: an enumerable of string domain names
 62 |             initial_ips: an enumerable of string IPs in the form ''
 63 |             generations: How many generations of related domains to retrieve. Passing 1
 64 |               means just find the domains related to the initial input. Passing 2 means also find the
 65 |               domains related to the domains related to the initial input.
 66 |             related_when: A boolean function to call to decide whether to add the domains from a line to
 67 |               the list of related domains.
 68 |         """
 69 |         super(RelatedDomainsFilter, self).__init__(**kwargs)
 70 |         self._whitelist = create_blacklist(
 71 |             config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}),
 72 |         )
 73 | 
 74 |         cache_file_name = config_get_deep('opendns.RelatedDomainsFilter.cache_file_name', None)
 75 |         self._investigate = InvestigateApi(config_get_deep('api_key.opendns'), cache_file_name=cache_file_name)
 76 | 
 77 |         self._domains_to_lookup = set(initial_domains) if initial_domains else set()
 78 |         self._ips_to_lookup = set(initial_ips) if initial_ips else set()
 79 | 
 80 |         self._related_when = related_when
 81 |         self._generation_count = generations
 82 | 
 83 |         self._all_blobs = list()
 84 | 
 85 |     def filter_line(self, blob):
 86 |         """Accumulate a set of all domains.
 87 | 
 88 |         Args:
 89 |             blob: A dict representing one line of output from OSXCollector.
 90 |         Returns:
 91 |             A dict or None
 92 |         """
 93 |         self._all_blobs.append(blob)
 94 | 
 95 |         if 'osxcollector_domains' in blob and self._related_when and self._related_when(blob):
 96 |             for domain in blob.get('osxcollector_domains'):
 97 |                 self._domains_to_lookup.add(domain)
 98 | 
 99 |         return None
100 | 
101 |     def end_of_lines(self):
102 |         """Called after all lines have been fed to filter_output_line.
103 | 
104 |         The OutputFilter performs any processing that requires the complete input to have already been fed.
105 | 
106 |         Returns:
107 |             An enumerable of dicts
108 |         """
109 |         domains_to_related = self._perform_lookup_for_all_domains(self._domains_to_lookup, self._ips_to_lookup)
110 | 
111 |         if domains_to_related:
112 |             for blob in self._all_blobs:
113 |                 for domain in blob.get('osxcollector_domains', []):
114 |                     add_related_domains = False
115 |                     if domain in domains_to_related:
116 |                         blob.setdefault('osxcollector_related', {})
117 |                         blob['osxcollector_related'].setdefault('domains', {})
118 |                         blob['osxcollector_related']['domains'].setdefault(domain, [])
119 |                         blob['osxcollector_related']['domains'][domain] += domains_to_related[domain]
120 |                         add_related_domains = True
121 | 
122 |                     # Unique the related domains
123 |                     if add_related_domains:
124 |                         blob['osxcollector_related']['domains'][domain] = list(set(blob['osxcollector_related']['domains'][domain]))
125 | 
126 |         return self._all_blobs
127 | 
128 |     def get_argument_parser(self):
129 |         parser = ArgumentParser()
130 |         group = parser.add_argument_group('opendns.RelatedDomainsFilter')
131 |         group.add_argument(
132 |             '-d', '--domain', dest='initial_domains', default=[], action='append',
133 |             help='[OPTIONAL] Suspicious domains to use in pivoting.  May be specified more than once.',
134 |         )
135 |         group.add_argument(
136 |             '-i', '--ip', dest='initial_ips', default=[], action='append',
137 |             help='[OPTIONAL] Suspicious IP to use in pivoting.  May be specified more than once.',
138 |         )
139 |         group.add_argument(
140 |             '--related-domains-generations', dest='generations', default=DEFAULT_RELATED_DOMAINS_GENERATIONS,
141 |             help='[OPTIONAL] How many generations of related domains to lookup with OpenDNS',
142 |         )
143 |         return parser
144 | 
145 |     def _filter_domains_by_whitelist(self, domains):
146 |         """Remove all domains that are on the whitelist.
147 | 
148 |         Args:
149 |             domains: An enumerable of domains
150 |         Returns:
151 |             An enumerable of domains
152 |         """
153 |         return [x for x in list(domains) if not self._whitelist.match_values(x)]
154 | 
155 |     def _perform_lookup_for_all_domains(self, domains_to_lookup, ips_to_lookup):
156 |         """Lookup all the domains related to the input domains or IPs.
157 | 
158 |         Args:
159 |             domains_to_lookup: Enumerable of domains
160 |             ips_to_lookup: Enumerable of IPs
161 |         Returns:
162 |             A dict mapping {'related_domain': ['initial_domainA', 'initial_domainB']}
163 |         """
164 |         self._domains_to_lookup = self._filter_domains_by_whitelist(self._domains_to_lookup)
165 | 
166 |         domains_to_related = {}
167 | 
168 |         what_to_lookup = [(domain, True) for domain in domains_to_lookup] + [(ip, False) for ip in ips_to_lookup]
169 | 
170 |         for domain_or_ip, is_domain in what_to_lookup:
171 |             related_domains = self._perform_lookup_for_single_domain(domain_or_ip, is_domain, self._generation_count)
172 |             related_domains = self._filter_domains_by_whitelist(related_domains)
173 |             for related_domain in related_domains:
174 |                 domains_to_related.setdefault(related_domain, set())
175 |                 domains_to_related[related_domain].add(domain_or_ip)
176 | 
177 |         return domains_to_related
178 | 
179 |     def _perform_lookup_for_single_domain(self, domain_or_ip, is_domain, generation_count):
180 |         """Given a domain or IP, lookup the Nth related domains.
181 | 
182 |         Args:
183 |             domain_or_ip: A string domain name or IP
184 |             is_domain: A boolean of whether the previous arg is a domain or IP
185 |             generation_count: A count of generations to lookup
186 |         Returns:
187 |             set of related domains
188 |         """
189 |         domains_found = set([domain_or_ip]) if is_domain else set()
190 |         generation_results = set([domain_or_ip])
191 | 
192 |         # For IPs, do one IP specific lookup then switch to domain lookups
193 |         if not is_domain:
194 |             generation_results = self._find_related_domains(None, generation_results)
195 |             domains_found |= generation_results
196 |             generation_count -= 1
197 | 
198 |         while generation_count > 0:
199 |             if len(generation_results):
200 |                 generation_results = self._find_related_domains(generation_results, None)
201 |                 domains_found |= generation_results
202 | 
203 |             generation_count -= 1
204 | 
205 |         return domains_found
206 | 
207 |     def _find_related_domains(self, domains, ips):
208 |         """Calls OpenDNS to find related domains and normalizes the responses.
209 | 
210 |         Args:
211 |             domains: An enumerable of domains
212 |             ips: An enumerable of IPs
213 |         Returns:
214 |             An enumerable of domains
215 |         """
216 |         related_domains = set()
217 | 
218 |         if domains:
219 |             domains = self._filter_domains_by_whitelist(domains)
220 |             cooccurrence_info = self._investigate.cooccurrences(domains)
221 |             cooccurrence_domains = self._cooccurrences_to_domains(cooccurrence_info)
222 |             related_domains.update(cooccurrence_domains)
223 | 
224 |         if ips:
225 |             rr_history_info = self._investigate.rr_history(ips)
226 |             related_domains.update(self._rr_history_to_domains(rr_history_info))
227 | 
228 |         return related_domains
229 | 
230 |     def _cooccurrences_to_domains(self, cooccurrence_info):
231 |         """Parse the results of a call to the OpenDNS cooccurrences endpoint.
232 | 
233 |         Args:
234 |             cooccurrence_info: Result of a call to cooccurrences
235 |         Returns:
236 |             An enumerable of domains
237 |         """
238 |         domains = set()
239 | 
240 |         for domain, cooccurence in six.iteritems(cooccurrence_info):
241 |             for occur_domain in cooccurence.get('pfs2', []):
242 |                 for elem in expand_domain(occur_domain[0]):
243 |                     domains.add(elem)
244 | 
245 |         return domains
246 | 
247 |     def _rr_history_to_domains(self, rr_history_info):
248 |         """Parse the results of a call to the OpenDNS rr_history endpoint.
249 | 
250 |         Args:
251 |             rr_history_info: Result of a call to rr_history
252 |         Returns:
253 |             An enumerable of domains
254 |         """
255 |         domains = set()
256 | 
257 |         for ip, rr_history in six.iteritems(rr_history_info):
258 |             for rr_domain in rr_history.get('rrs', []):
259 |                 for elem in expand_domain(rr_domain['rr']):
260 |                     domains.add(elem)
261 | 
262 |         return domains
263 | 
264 | 
265 | def main():
266 |     run_filter_main(RelatedDomainsFilter)
267 | 
268 | 
269 | if __name__ == '__main__':
270 |     main()
271 | 


--------------------------------------------------------------------------------
/osxcollector/output_filters/analyze.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # The AnalyzeFilter is a handy little tool that ties together many filters to attempt to
  5 | # enhance the output of OSXCollector with data from threat APIs, compare against blacklists,
  6 | # search for lines related to suspicious domains, ips, or files, and generally figure shit out.
  7 | #
  8 | # The more detailed description of what goes on:
  9 | #  1. Parse out browser extension information.
 10 | #  2. Find all the domains in every line. Add them to the output lines.
 11 | #  3. Find any file hashes or domains that are on blacklists. Mark those lines.
 12 | #  4. Take any filepaths from the command line and mark all lines related to those.
 13 | #  5. Take any domain or IP from the command line and use OpenDNS Investigate API to find all the domains
 14 | #     related to those domains and all the domains related to those related domains - basically the 1st and 2nd
 15 | #     generation related domains. Mark any lines where these domains appear.
 16 | #  6. Lookup all sha1 hashes in ShadowServer's bin-test whitelist.
 17 | #     Files that match both hash and filename are ignored by further filters.
 18 | #  7. Lookup file hashes in VirusTotal and mark any lines with suspicious files hashes.
 19 | #  8. Lookup all the domains in the file with OpenDNS Investigate. Categorize and score the domains.
 20 | #     Mark all the lines that contain domains that were scored as "suspicious".
 21 | #  9. Lookup suspicious domains, those domains on a blacklist, or those related to the initial input in VirusTotal.
 22 | # 10. Cleanup the browser history and sort it in descending time order.
 23 | # 11. Save all the enhanced output to a new file.
 24 | # 12. Look at all the interesting lines in the file and try to summarize them in some very human readable output.
 25 | # 13. Party!
 26 | #
 27 | from __future__ import absolute_import
 28 | from __future__ import unicode_literals
 29 | 
 30 | from argparse import ArgumentParser
 31 | 
 32 | from osxcollector.output_filters.alexa.lookup_rankings import LookupRankingsFilter as ArLookupRankingsFilter
 33 | from osxcollector.output_filters.base_filters.chain import ChainFilter
 34 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
 35 | from osxcollector.output_filters.chrome.find_extensions import FindExtensionsFilter as ChromeExtensionsFilter
 36 | from osxcollector.output_filters.chrome.sort_history import SortHistoryFilter as ChromeHistoryFilter
 37 | from osxcollector.output_filters.find_blacklisted import FindBlacklistedFilter
 38 | from osxcollector.output_filters.find_domains import FindDomainsFilter
 39 | from osxcollector.output_filters.firefox.find_extensions import FindExtensionsFilter as FirefoxExtensionsFilter
 40 | from osxcollector.output_filters.firefox.sort_history import SortHistoryFilter as FirefoxHistoryFilter
 41 | from osxcollector.output_filters.opendns.lookup_domains import LookupDomainsFilter as OpenDnsLookupDomainsFilter
 42 | from osxcollector.output_filters.opendns.related_domains import RelatedDomainsFilter as OpenDnsRelatedDomainsFilter
 43 | from osxcollector.output_filters.related_files import RelatedFilesFilter
 44 | from osxcollector.output_filters.shadowserver.lookup_hashes import LookupHashesFilter as ShadowServerLookupHashesFilter
 45 | from osxcollector.output_filters.summary_filters.html import HtmlSummaryFilter
 46 | from osxcollector.output_filters.summary_filters.text import TextSummaryFilter
 47 | from osxcollector.output_filters.virustotal.lookup_domains import LookupDomainsFilter as VtLookupDomainsFilter
 48 | from osxcollector.output_filters.virustotal.lookup_hashes import LookupHashesFilter as VtLookupHashesFilter
 49 | 
 50 | 
 51 | class AnalyzeFilter(ChainFilter):
 52 | 
 53 |     """AnalyzeFilter chains all the other filters to produce maximum effect.
 54 | 
 55 |     A lot of the smarts of AnalyzeFilter are around what filters to run in which order and how results of one filter should
 56 |     effect the operations of the next filter.
 57 |     """
 58 | 
 59 |     def __init__(
 60 |         self, no_opendns=False, no_virustotal=False, no_shadowserver=False,
 61 |         no_alexa=False, readout=False, **kwargs
 62 |     ):
 63 | 
 64 |         filter_chain = []
 65 | 
 66 |         if not readout:
 67 |             filter_chain.append(ChromeExtensionsFilter(**kwargs))
 68 |             filter_chain.append(FirefoxExtensionsFilter(**kwargs))
 69 | 
 70 |             filter_chain.append(FindDomainsFilter(**kwargs))
 71 | 
 72 |             # Do Alexa ranking lookups first since they are dependent only on FindDomainsFilter
 73 |             if not no_alexa:
 74 |                 filter_chain.append(ArLookupRankingsFilter(**kwargs))
 75 | 
 76 |             # Do hash related lookups first. This is done first since hash lookup is not influenced
 77 |             # by anything but other hash lookups.
 78 |             if not no_shadowserver:
 79 |                 filter_chain.append(ShadowServerLookupHashesFilter(**kwargs))
 80 |             if not no_virustotal:
 81 |                 filter_chain.append(
 82 |                     VtLookupHashesFilter(lookup_when=AnalyzeFilter.lookup_when_not_in_shadowserver, **kwargs),
 83 |                 )
 84 | 
 85 |             # Find blacklisted stuff next. Finding blacklisted domains requires running FindDomainsFilter first.
 86 |             filter_chain.append(FindBlacklistedFilter(**kwargs))
 87 | 
 88 |             # RelatedFilesFilter and OpenDnsRelatedDomainsFilter use command line args in addition to previous filter
 89 |             # results to find lines of interest.
 90 |             filter_chain.append(RelatedFilesFilter(when=AnalyzeFilter.find_related_when, **kwargs))
 91 |             if not no_opendns:
 92 |                 filter_chain.append(
 93 |                     OpenDnsRelatedDomainsFilter(related_when=AnalyzeFilter.find_related_when, **kwargs),
 94 |                 )
 95 | 
 96 |             # Lookup threat info on suspicious and related stuff
 97 |             if not no_opendns:
 98 |                 filter_chain.append(
 99 |                     OpenDnsLookupDomainsFilter(lookup_when=AnalyzeFilter.lookup_when_not_in_shadowserver, **kwargs),
100 |                 )
101 |             if not no_virustotal:
102 |                 filter_chain.append(
103 |                     VtLookupDomainsFilter(lookup_when=AnalyzeFilter.lookup_domains_in_vt_when, **kwargs),
104 |                 )
105 | 
106 |             # Sort browser history for maximum pretty
107 |             filter_chain.append(FirefoxHistoryFilter(**kwargs))
108 |             filter_chain.append(ChromeHistoryFilter(**kwargs))
109 | 
110 |             filter_chain.append(TextSummaryFilter(**kwargs))
111 |             filter_chain.append(HtmlSummaryFilter(**kwargs))
112 | 
113 |         super(AnalyzeFilter, self).__init__(filter_chain, **kwargs)
114 | 
115 |     def _on_get_argument_parser(self):
116 |         """Returns an ArgumentParser with arguments for just this OutputFilter (not the contained chained OutputFilters).
117 | 
118 |         Returns:
119 |             An `argparse.ArgumentParser`
120 |         """
121 |         parser = ArgumentParser()
122 |         group = parser.add_argument_group('AnalyzeFilter')
123 |         group.add_argument(
124 |             '--readout', dest='readout', action='store_true', default=False,
125 |             help='[OPTIONAL] Skip the analysis and just output really readable analysis',
126 |         )
127 |         group.add_argument(
128 |             '--no-opendns', dest='no_opendns', action='store_true', default=False,
129 |             help='[OPTIONAL] Don\'t run OpenDNS filters',
130 |         )
131 |         group.add_argument(
132 |             '--no-virustotal', dest='no_virustotal', action='store_true', default=False,
133 |             help='[OPTIONAL] Don\'t run VirusTotal filters',
134 |         )
135 |         group.add_argument(
136 |             '--no-shadowserver', dest='no_shadowserver', action='store_true', default=False,
137 |             help='[OPTIONAL] Don\'t run ShadowServer filters',
138 |         )
139 |         group.add_argument(
140 |             '--no-alexa', dest='no_alexa', action='store_true', default=False,
141 |             help='[OPTIONAL] Don\'t run AlexaRanking filters',
142 |         )
143 |         group.add_argument(
144 |             '-M', '--monochrome', dest='monochrome', action='store_true', default=False,
145 |             help='[OPTIONAL] Output monochrome analysis',
146 |         )
147 |         group.add_argument(
148 |             '--show-signature-chain', dest='show_signature_chain', action='store_true', default=False,
149 |             help='[OPTIONAL] Output unsigned startup items and kexts.',
150 |         )
151 |         group.add_argument(
152 |             '--show-browser-ext', dest='show_browser_ext', action='store_true', default=False,
153 |             help='[OPTIONAL] Output the list of installed browser extensions.',
154 |         )
155 |         group.add_argument(
156 |             '-t', '--text', dest='text_output_file', default=None,
157 |             help='[OPTIONAL] Path to the output file where summary in plain text format will be written to.',
158 |         )
159 |         group.add_argument(
160 |             '-w', '--html', dest='html_output_file', default=None,
161 |             help='[OPTIONAL] Path to the output file where summary in HTML format will be written to.',
162 |         )
163 |         group.add_argument(
164 |             '-c', '--group-by-iocs', dest='group_by_iocs', action='store_true', default=False,
165 |             help='[OPTIONAL] Summarize the output grouped by IOCs instead of by threat indicators.',
166 |         )
167 |         group.add_argument(
168 |             '-k', '--group-key', dest='group_key', default=None,
169 |             help='[OPTIONAL] If sorting by IOCs, select which key to group by (sha1/sha2/domain)',
170 |         )
171 |         return parser
172 | 
173 |     @staticmethod
174 |     def include_in_summary(blob):
175 |         _KEYS_FOR_SUMMARY = [
176 |             'osxcollector_vthash',
177 |             'osxcollector_vtdomain',
178 |             'osxcollector_opendns',
179 |             'osxcollector_blacklist',
180 |             'osxcollector_related',
181 |         ]
182 | 
183 |         return any([key in blob for key in _KEYS_FOR_SUMMARY])
184 | 
185 |     @staticmethod
186 |     def lookup_when_not_in_shadowserver(blob):
187 |         """ShadowServer whitelists blobs that can be ignored."""
188 |         return 'osxcollector_shadowserver' not in blob
189 | 
190 |     @staticmethod
191 |     def lookup_domains_in_vt_when(blob):
192 |         """VT domain lookup is a final step and what to lookup is dependent upon what has been found so far."""
193 |         return AnalyzeFilter.lookup_when_not_in_shadowserver(blob) and AnalyzeFilter.include_in_summary(blob)
194 | 
195 |     @staticmethod
196 |     def find_related_when(blob):
197 |         """When to find related terms or domains.
198 | 
199 |         Stuff in ShadowServer is not interesting.
200 |         Blacklisted file paths are worth investigating.
201 |         Files where the md5 could not be calculated are also interesting. Root should be able to read files.
202 |         Files with a bad hash in VT are obviously malware, go find related bad stuff.
203 | 
204 |         Args:
205 |             blob - a line of output from OSXCollector
206 |         Returns:
207 |             boolean
208 |         """
209 |         if 'osxcollector_shadowserver' in blob:
210 |             return False
211 |         if '' == blob.get('md5', None):
212 |             return True
213 |         return any([key in blob for key in ['osxcollector_vthash', 'osxcollector_related']])
214 | 
215 | 
216 | def main():
217 |     run_filter_main(AnalyzeFilter)
218 | 
219 | 
220 | if __name__ == '__main__':
221 |     main()
222 | 


--------------------------------------------------------------------------------
/tests/output_filters/data/cache.virustotal.LookupHashesFilter.json:
--------------------------------------------------------------------------------
1 | {"virustotal-file-reports": {"b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7": {"scan_id": "b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7-1273894724", "sha256": "b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": false, "result": null, "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": false, "result": null, "version": "10.00", "update": "20100514"}, "nProtect": {"detected": false, "result": null, "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": false, "result": null, "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": false, "result": null, "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": false, "result": null, "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": false, "result": null, "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": false, "result": null, "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": false, "result": null, "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": false, "result": null, "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": false, "result": null, "version": "4842", "update": "20100515"}, "DrWeb": {"detected": false, "result": null, "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": false, "result": null, "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": false, "result": null, "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": false, "result": null, "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": false, "result": null, "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": false, "result": null, "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": false, "result": null, "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": false, "result": null, "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": false, "result": null, "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 0, "total": 40, "resource": "bd34339415ce6a7d692c90779993dd6f", "scan_date": "2015-01-23 16:23:00", "md5": "bd34339415ce6a7d692c90779993dd6f", "permalink": "https://www.virustotal.com/file/b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7/analysis/1273894724/", "sha1": "2a27c19560f7ad8017d79c1eb8eb2c91fffb9291", "positives": 0, "verbose_msg": "Scan finished, scan information embedded in this object"}, "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360": {"scan_id": "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c-1273894724", "sha256": "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": true, "result": "Trojan.Generic.3611249", "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": true, "result": "Trojan.VB.acgy", "version": "10.00", "update": "20100514"}, "nProtect": {"detected": true, "result": "Trojan.Generic.3611249", "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": true, "result": "Trojan.VB.JFDE", "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": true, "result": "a variant of Win32/Qhost.NTY", "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": true, "result": "Win32/ASuspect.HDBBD", "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": true, "result": "Generic.dx!rkx", "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": true, "result": "TR/VB.acgy.1", "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": true, "result": "W32/Smalltroj.YFHZ", "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": true, "result": "Win32:Malware-gen", "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": true, "result": "Heur.Suspicious", "version": "4842", "update": "20100515"}, "DrWeb": {"detected": true, "result": "Trojan.Hosts.37", "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": true, "result": "Trojan/VB.gen", "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": true, "result": "Win32.TRVB.Acgy", "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": true, "result": "Troj/VBHost-A", "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": true, "result": "Trojan.Win32.VB.acgy", "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": true, "result": "Generic.dx!rkx", "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": true, "result": "Trojan/VB.yqh", "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": true, "result": "Trojan.Generic.3611249", "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": true, "result": "Trojan.KillAV", "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 1, "total": 40, "resource": "06506cc06cf0167ea583de62c98eae2c", "scan_date": "2010-05-15 03:38:44", "md5": "06506cc06cf0167ea583de62c98eae2c", "permalink": "https://www.virustotal.com/file/6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360/analysis/1273894724/", "sha1": "92e3750a9f0eef6290dd83867eff88064e9c01bb", "positives": 40, "verbose_msg": "Scan finished, scan information embedded in this object"}, "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c": {"scan_id": "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c-1273894724", "sha256": "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": false, "result": null, "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": false, "result": null, "version": "10.00", "update": "20100514"}, "nProtect": {"detected": false, "result": null, "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": false, "result": null, "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": false, "result": "a variant of Win32/Qhost.NTY", "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": false, "result": null, "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": false, "result": null, "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": false, "result": null, "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": false, "result": null, "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": false, "result": null, "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": false, "result": null, "version": "4842", "update": "20100515"}, "DrWeb": {"detected": false, "result": null, "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": false, "result": null, "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": false, "result": null, "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": false, "result": null, "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": false, "result": null, "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": false, "result": null, "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": false, "result": null, "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": false, "result": null, "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": false, "result": null, "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 0, "total": 40, "resource": "99017f6eebbac24f351415dd410d522d", "scan_date": "2010-05-15 03:38:44", "md5": "99017f6eebbac24f351415dd410d522d", "permalink": "https://www.virustotal.com/file/52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c/analysis/1273894724/", "sha1": "4d1740485713a2ab3a4f5822a01f645fe8387f92", "positives": 0, "verbose_msg": "Scan finished, scan information embedded in this object"}, "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302": {"scan_id": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302-1273894724", "sha256": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": true, "result": "Trojan.Generic.3611249", "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": true, "result": "Trojan.VB.acgy", "version": "10.00", "update": "20100514"}, "nProtect": {"detected": true, "result": "Trojan.Generic.3611249", "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": true, "result": "Trojan.VB.JFDE", "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": true, "result": "a variant of Win32/Qhost.NTY", "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": true, "result": "Win32/ASuspect.HDBBD", "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": true, "result": "Generic.dx!rkx", "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": true, "result": "TR/VB.acgy.1", "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": true, "result": "W32/Smalltroj.YFHZ", "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": true, "result": "Win32:Malware-gen", "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": true, "result": "Heur.Suspicious", "version": "4842", "update": "20100515"}, "DrWeb": {"detected": true, "result": "Trojan.Hosts.37", "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": true, "result": "Trojan/VB.gen", "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": true, "result": "Win32.TRVB.Acgy", "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": true, "result": "Troj/VBHost-A", "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": true, "result": "Trojan.Win32.VB.acgy", "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": true, "result": "Generic.dx!rkx", "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": true, "result": "Trojan/VB.yqh", "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": true, "result": "Trojan.Generic.3611249", "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": true, "result": "Trojan.KillAV", "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 1, "total": 40, "resource": "0c71d8cedc8bbb2b619a76d1478c4348", "scan_date": "2015-01-15 16:42:01", "md5": "0c71d8cedc8bbb2b619a76d1478c4348", "permalink": "https://www.virustotal.com/file/b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302/analysis/1273894724/", "sha1": "da9b79f2fd33d002033b69a9a346af4671a9e16b", "positives": 40, "verbose_msg": "Scan finished, scan information embedded in this object"}}}


--------------------------------------------------------------------------------
/osxcollector/output_filters/chrome/sort_history.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # SortHistoryFilter creates a clean sorted Chrome browser history and tags lines with {'osxcollector_browser_history': 'chrome'}
  5 | #
  6 | from __future__ import absolute_import
  7 | from __future__ import unicode_literals
  8 | 
  9 | import six
 10 | 
 11 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
 12 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
 13 | 
 14 | 
 15 | class SortHistoryFilter(OutputFilter):
 16 | 
 17 |     """Joins Chrome browser history 'visits' and 'urls' tables, producing a time sorted browser history.
 18 | 
 19 |     In the output look for lines where:
 20 |     ('osxcollector_section' == 'chrome' and 'osxcollector_subsection' == 'history' and 'osxcollector_table_name' == 'visits')
 21 |     for some snazzy browser history stuff.
 22 |     """
 23 | 
 24 |     def __init__(self, **kwargs):
 25 |         super(SortHistoryFilter, self).__init__(**kwargs)
 26 | 
 27 |         self._visits_table = dict()
 28 |         self._urls_table = dict()
 29 | 
 30 |     def filter_line(self, blob):
 31 |         """Cache the 'visits' and 'urls' tables."""
 32 |         if 'chrome' == blob.get('osxcollector_section') and 'history' == blob.get('osxcollector_subsection'):
 33 |             table = blob.get('osxcollector_table_name')
 34 | 
 35 |             if 'visits' == table:
 36 |                 if self._validate_visit(blob):
 37 |                     self._visits_table[blob['id']] = blob
 38 |                     blob = None  # Consume the line
 39 |             elif 'urls' == table:
 40 |                 if self._validate_urls(blob):
 41 |                     self._urls_table[blob['id']] = blob
 42 |                     blob = None  # Consume the line
 43 | 
 44 |         return blob
 45 | 
 46 |     def end_of_lines(self):
 47 |         """Join the 'visits' and 'urls' tables into a single browser history and timeline."""
 48 |         history = list()
 49 | 
 50 |         for visit in six.itervalues(self._visits_table):
 51 |             url = self._urls_table.get(visit.get('url'))
 52 |             if url:
 53 |                 record = {
 54 |                     'url': url['url'].encode('utf-8') if six.PY2 else url['url'],
 55 |                     'title': url['title'].encode('utf-8') if six.PY2 else url['url'],
 56 |                     'last_visit_time': url['last_visit_time'],
 57 |                     'visit_time': visit['visit_time'],
 58 |                     'core_transition': self.PAGE_TRANSITION.get_core_transition(visit['transition']),
 59 |                     'page_transition': self.PAGE_TRANSITION.get_qualifier_transitions(visit['transition']),
 60 |                     'osxcollector_browser_history': 'chrome',
 61 |                 }
 62 | 
 63 |                 # Add all the OSXCollector specific keys to the record
 64 |                 for key in visit:
 65 |                     if key.startswith('osxcollector_'):
 66 |                         record[key] = visit[key]
 67 |                 for key in url:
 68 |                     if key.startswith('osxcollector_') and key not in record:
 69 |                         record[key] = url[key]
 70 | 
 71 |                 history.append(record)
 72 | 
 73 |         return sorted(history, key=lambda x: x['last_visit_time'], reverse=True)
 74 | 
 75 |     @classmethod
 76 |     def _validate_visit(cls, blob):
 77 |         """Does the visit dict have the required fields?
 78 | 
 79 |         Args:
 80 |             blob: a visit dict
 81 |         Returns:
 82 |             boolean
 83 |         """
 84 |         required_fields = ['id', 'url', 'visit_time', 'transition']
 85 |         return all([field in blob for field in required_fields])
 86 | 
 87 |     @classmethod
 88 |     def _validate_urls(cls, blob):
 89 |         """Does the url dict have the required fields?
 90 | 
 91 |         Args:
 92 |             blob: a url dict
 93 |         Returns:
 94 |             boolean
 95 |         """
 96 |         required_fields = ['id', 'url', 'title', 'last_visit_time']
 97 |         return all([field in blob for field in required_fields])
 98 | 
 99 |     class PAGE_TRANSITION:
100 | 
101 |         """Constants that detail page transitions in the Chrome 'visits' table.
102 | 
103 |         These constants comes from:
104 |         <http://src.chromium.org/svn/trunk/src/content/public/common/page_transition_types_list.h>_
105 |         """
106 |         # User got to this page by clicking a link on another page.
107 |         CORE_LINK = 0
108 | 
109 |         # User got this page by typing the URL in the URL bar.  This should not be
110 |         # used for cases where the user selected a choice that didn't look at all
111 |         # like a URL; see GENERATED below.
112 |         #
113 |         # We also use this for other "explicit" navigation actions.
114 |         CORE_TYPED = 1
115 | 
116 |         # User got to this page through a suggestion in the UI, for example,
117 |         # through the destinations page.
118 |         CORE_AUTO_BOOKMARK = 2
119 | 
120 |         # This is a subframe navigation. This is any content that is automatically
121 |         # loaded in a non-toplevel frame. For example, if a page consists of
122 |         # several frames containing ads, those ad URLs will have this transition
123 |         # type. The user may not even realize the content in these pages is a
124 |         # separate frame, so may not care about the URL (see MANUAL below).
125 |         CORE_AUTO_SUBFRAME = 3
126 | 
127 |         # For subframe navigations that are explicitly requested by the user and
128 |         # generate new navigation entries in the back/forward list. These are
129 |         # probably more important than frames that were automatically loaded in
130 |         # the background because the user probably cares about the fact that this
131 |         # link was loaded.
132 |         CORE_MANUAL_SUBFRAME = 4
133 | 
134 |         # User got to this page by typing in the URL bar and selecting an entry
135 |         # that did not look like a URL.  For example, a match might have the URL
136 |         # of a Google search result page, but appear like "Search Google for ...".
137 |         # These are not quite the same as TYPED navigations because the user
138 |         # didn't type or see the destination URL.
139 |         # See also KEYWORD.
140 |         CORE_GENERATED = 5
141 | 
142 |         # The page was specified in the command line or is the start page.
143 |         CORE_START_PAGE = 6
144 | 
145 |         # The user filled out values in a form and submitted it. NOTE that in
146 |         # some situations submitting a form does not result in this transition
147 |         # type. This can happen if the form uses script to submit the contents.
148 |         CORE_FORM_SUBMIT = 7
149 | 
150 |         # The user "reloaded" the page, either by hitting the reload button or by
151 |         # hitting enter in the address bar.  NOTE: This is distinct from the
152 |         # concept of whether a particular load uses "reload semantics" (i.e.
153 |         # bypasses cached data).  For this reason, lots of code needs to pass
154 |         # around the concept of whether a load should be treated as a "reload"
155 |         # separately from their tracking of this transition type, which is mainly
156 |         # used for proper scoring for consumers who care about how frequently a
157 |         # user typed/visited a particular URL.
158 |         #
159 |         # SessionRestore and undo tab close use this transition type too.
160 |         CORE_RELOAD = 8
161 | 
162 |         # The url was generated from a replaceable keyword other than the default
163 |         # search provider. If the user types a keyword (which also applies to
164 |         # tab-to-search) in the omnibox this qualifier is applied to the transition
165 |         # type of the generated url. TemplateURLModel then may generate an
166 |         # additional visit with a transition type of KEYWORD_GENERATED against the
167 |         # url 'http:#' + keyword. For example, if you do a tab-to-search against
168 |         # wikipedia the generated url has a transition qualifer of KEYWORD, and
169 |         # TemplateURLModel generates a visit for 'wikipedia.org' with a transition
170 |         # type of KEYWORD_GENERATED.
171 |         CORE_KEYWORD = 9
172 | 
173 |         # Corresponds to a visit generated for a keyword. See description of
174 |         # KEYWORD for more details.
175 |         CORE_KEYWORD_GENERATED = 10
176 | 
177 |         CORE_MASK = 0xFF
178 | 
179 |         @classmethod
180 |         def get_core_transition(cls, value):
181 |             """Translates a numeric page transition into a human readable description.
182 | 
183 |             Args:
184 |                 value: A numeric value represented as a Number or String
185 | 
186 |             Returns:
187 |                 A string
188 |             """
189 |             try:
190 |                 value = int(value) & cls.CORE_MASK
191 |             except ValueError:
192 |                 return 'ERROR'
193 | 
194 |             if cls.CORE_LINK == value:
195 |                 return 'link'
196 |             elif cls.CORE_TYPED == value:
197 |                 return 'typed'
198 |             elif cls.CORE_AUTO_BOOKMARK == value:
199 |                 return 'auto_bookmark'
200 |             elif cls.CORE_AUTO_SUBFRAME == value:
201 |                 return 'auto_subframe'
202 |             elif cls.CORE_MANUAL_SUBFRAME == value:
203 |                 return 'manual_subframe'
204 |             elif cls.CORE_GENERATED == value:
205 |                 return 'generated'
206 |             elif cls.CORE_START_PAGE == value:
207 |                 return 'start_page'
208 |             elif cls.CORE_FORM_SUBMIT == value:
209 |                 return 'form_submit'
210 |             elif cls.CORE_RELOAD == value:
211 |                 return 'reload'
212 |             elif cls.CORE_KEYWORD == value:
213 |                 return 'keyword'
214 |             elif cls.CORE_KEYWORD_GENERATED == value:
215 |                 return 'generated'
216 |             return 'UNKNOWN'
217 | 
218 |         # A managed user attempted to visit a URL but was blocked.
219 |         QUALIFIER_BLOCKED = 0x00800000
220 | 
221 |         # User used the Forward or Back button to navigate among browsing history.
222 |         QUALIFIER_FORWARD_BACK = 0x01000000
223 | 
224 |         # User used the address bar to trigger this navigation.
225 |         QUALIFIER_FROM_ADDRESS_BAR = 0x02000000
226 | 
227 |         # User is navigating to the home page.
228 |         QUALIFIER_HOME_PAGE = 0x04000000
229 | 
230 |         # The beginning of a navigation chain.
231 |         QUALIFIER_CHAIN_START = 0x10000000
232 | 
233 |         # The last transition in a redirect chain.
234 |         QUALIFIER_CHAIN_END = 0x20000000
235 | 
236 |         # Redirects caused by JavaScript or a meta refresh tag on the page.
237 |         QUALIFIER_CLIENT_REDIRECT = 0x40000000
238 | 
239 |         # Redirects sent from the server by HTTP headers. It might be nice to
240 |         # break this out into 2 types in the future, permanent or temporary, if we
241 |         # can get that information from WebKit.
242 |         QUALIFIER_SERVER_REDIRECT = 0x80000000
243 | 
244 |         QUALIFIER_MASK = 0xFFFFFF00
245 | 
246 |         @classmethod
247 |         def get_qualifier_transitions(cls, value):
248 |             qualifiers = []
249 | 
250 |             try:
251 |                 value = int(value) & cls.QUALIFIER_MASK
252 |             except ValueError:
253 |                 return qualifiers
254 | 
255 |             if cls.QUALIFIER_BLOCKED & value:
256 |                 qualifiers.append('blocked')
257 | 
258 |             if cls.QUALIFIER_FORWARD_BACK & value:
259 |                 qualifiers.append('forward_back')
260 | 
261 |             if cls.QUALIFIER_FROM_ADDRESS_BAR & value:
262 |                 qualifiers.append('from_address_bar')
263 | 
264 |             if cls.QUALIFIER_HOME_PAGE & value:
265 |                 qualifiers.append('home_page')
266 | 
267 |             if cls.QUALIFIER_CHAIN_START & value:
268 |                 qualifiers.append('chain_start')
269 | 
270 |             if cls.QUALIFIER_CHAIN_END & value:
271 |                 qualifiers.append('chain_end')
272 | 
273 |             if cls.QUALIFIER_CLIENT_REDIRECT & value:
274 |                 qualifiers.append('client_redirect')
275 | 
276 |             if cls.QUALIFIER_SERVER_REDIRECT & value:
277 |                 qualifiers.append('server_redirect')
278 | 
279 |             return qualifiers
280 | 
281 | 
282 | def main():
283 |     run_filter_main(SortHistoryFilter)
284 | 
285 | 
286 | if __name__ == '__main__':
287 |     main()
288 | 


--------------------------------------------------------------------------------