├── tests
├── __init__.py
└── output_filters
│ ├── __init__.py
│ ├── alexa
│ ├── __init__.py
│ └── lookup_domains_test.py
│ ├── util
│ ├── __init__.py
│ ├── config_test.py
│ ├── error_messages_test.py
│ ├── domains_test.py
│ └── blacklist_test.py
│ ├── opendns
│ ├── __init__.py
│ ├── lookup_domains_test.py
│ └── related_domains_test.py
│ ├── virustotal
│ ├── __init__.py
│ ├── lookup_domains_test.py
│ └── lookup_hashes_test.py
│ ├── base_filters
│ ├── __init__.py
│ ├── threat_feed_test.py
│ ├── chain_test.py
│ └── output_filter_test.py
│ ├── shadowserver
│ ├── __init__.py
│ └── lookup_hashes_test.py
│ ├── data
│ ├── domains_whitelist.txt
│ ├── domains_blacklist.txt
│ ├── hashes_blacklist.txt
│ ├── opendns
│ │ └── lookup_domains
│ │ │ ├── categorization.json
│ │ │ ├── expected.json
│ │ │ └── security.json
│ ├── cache.shadowserver.LookupHashesFilter.json
│ ├── test_osxcollector_config.yaml
│ ├── cache.virustotal.LookupDomainsFilter.json
│ └── cache.virustotal.LookupHashesFilter.json
│ ├── find_blacklisted_test.py
│ ├── run_filter_test.py
│ ├── find_domains_test.py
│ └── related_files_test.py
├── .deactivate.sh
├── .activate.sh
├── osxcollector
├── output_filters
│ ├── alexa
│ │ ├── __init__.py
│ │ └── lookup_rankings.py
│ ├── chrome
│ │ ├── __init__.py
│ │ ├── find_extensions.py
│ │ └── sort_history.py
│ ├── firefox
│ │ ├── __init__.py
│ │ ├── find_extensions.py
│ │ └── sort_history.py
│ ├── opendns
│ │ ├── __init__.py
│ │ ├── lookup_domains.py
│ │ └── related_domains.py
│ ├── util
│ │ ├── __init__.py
│ │ ├── error_messages.py
│ │ ├── config.py
│ │ ├── domains.py
│ │ ├── dict_utils.py
│ │ └── blacklist.py
│ ├── base_filters
│ │ ├── __init__.py
│ │ ├── chain.py
│ │ ├── threat_feed.py
│ │ └── output_filter.py
│ ├── shadowserver
│ │ ├── __init__.py
│ │ └── lookup_hashes.py
│ ├── virustotal
│ │ ├── __init__.py
│ │ ├── lookup_hashes.py
│ │ ├── lookup_urls.py
│ │ └── lookup_domains.py
│ ├── summary_filters
│ │ ├── __init__.py
│ │ ├── summary.py
│ │ └── text.py
│ ├── __init__.py
│ ├── exceptions.py
│ ├── find_blacklisted.py
│ ├── related_files.py
│ ├── find_domains.py
│ └── analyze.py
└── __init__.py
├── MANIFEST.in
├── requirements.txt
├── requirements-dev.txt
├── .gitignore
├── Makefile
├── LICENSE.md
├── .coveragerc
├── .secrets.baseline
├── tox.ini
├── setup.py
├── .travis.yml
├── .pre-commit-config.yaml
└── osxcollector.yaml.example
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.deactivate.sh:
--------------------------------------------------------------------------------
1 | deactivate
2 |
--------------------------------------------------------------------------------
/tests/output_filters/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/output_filters/alexa/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/output_filters/util/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.activate.sh:
--------------------------------------------------------------------------------
1 | virtualenv_run/bin/activate
--------------------------------------------------------------------------------
/tests/output_filters/opendns/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/output_filters/virustotal/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/alexa/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/chrome/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/firefox/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/opendns/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/util/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/output_filters/base_filters/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/output_filters/shadowserver/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/base_filters/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/shadowserver/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/virustotal/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE.md
3 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/summary_filters/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/output_filters/data/domains_whitelist.txt:
--------------------------------------------------------------------------------
1 | yelp.com
2 | yelp.co.uk
3 |
--------------------------------------------------------------------------------
/tests/output_filters/data/domains_blacklist.txt:
--------------------------------------------------------------------------------
1 | example.com
2 | example.co.uk
3 | example.org
4 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | PyYAML==5.1
2 | simplejson==3.10.0
3 | six==1.12.0
4 | threat_intel==0.1.29
5 | tldextract==2.0.2
6 |
--------------------------------------------------------------------------------
/osxcollector/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | coverage==4.5.3
3 | flake8==3.7.7
4 | mock==2.0.0
5 | pre-commit>=1.0.0
6 | pytest==4.4.0
7 | tox==3.8.6
8 |
--------------------------------------------------------------------------------
/tests/output_filters/data/hashes_blacklist.txt:
--------------------------------------------------------------------------------
1 | ffff5f60462c38b1d235cb3509876543
2 | ffff234d2a50a42a87389f1234561a21
3 | ffff51e77b442ee23188d87e4abcdef0
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .tox/
3 | *.egg-info/
4 | .DS_Store
5 | .idea/
6 | config.yaml
7 | osxcollector.yaml
8 | virtualenv_run/
9 | .coverage
10 | dist/
11 | sdist/
12 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | import logging
6 |
7 | # Suppress output from tldextract module
8 | logging.getLogger('tldextract').addHandler(logging.NullHandler())
9 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .DELETE_ON_ERROR:
2 |
3 | all:
4 | echo >&2 "Must specify target."
5 |
6 | test:
7 | tox
8 |
9 | venv:
10 | tox -evenv
11 |
12 | install-hooks:
13 | tox -e pre-commit -- install -f --install-hooks
14 |
15 | clean:
16 | rm -rf build/ dist/ osxcollector_output_filters.egg-info/ .tox/ virtualenv_run/
17 | find . -name '*.pyc' -delete
18 | find . -name '__pycache__' -delete
19 |
20 | .PHONY: all test venv install-hooks clean
21 |
--------------------------------------------------------------------------------
/tests/output_filters/data/opendns/lookup_domains/categorization.json:
--------------------------------------------------------------------------------
1 | {
2 | "bango.com": {
3 | "status": 1,
4 | "content_categories": [
5 | "Search Engines",
6 | "Business Services",
7 | "Research/Reference"
8 | ],
9 | "security_categories": []
10 | },
11 | "dango.com": {
12 | "status": 0,
13 | "content_categories": [
14 | "Phishing"
15 | ],
16 | "security_categories": []
17 | },
18 | "dingo.com": null,
19 | "bingo.com": null
20 | }
21 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # All exceptions thrown by the osxcollector.output_filters module
4 | #
5 | from __future__ import absolute_import
6 | from __future__ import unicode_literals
7 |
8 |
9 | class OutputFilterError(Exception):
10 | pass
11 |
12 |
13 | class MissingConfigError(OutputFilterError):
14 |
15 | """An error to throw when configuration is missing"""
16 | pass
17 |
18 |
19 | class BadDomainError(OutputFilterError):
20 |
21 | """An error to throw when a domain is invalid."""
22 | pass
23 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
2 |
3 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
4 |
5 | You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.
6 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source =
4 | .
5 | omit =
6 | .tox/*
7 | virtualenv_run/*
8 | setup.py
9 |
10 | [report]
11 | show_missing = True
12 | skip_covered = True
13 |
14 | exclude_lines =
15 | # Have to re-enable the standard pragma
16 | \#\s*pragma: no cover
17 |
18 | # Don't complain if tests don't hit defensive assertion code:
19 | ^\s*raise AssertionError\b
20 | ^\s*raise NotImplementedError\b
21 | ^\s*return NotImplemented\b
22 | ^\s*raise$
23 |
24 | # Don't complain if non-runnable code isn't run:
25 | ^if __name__ == ['"]__main__['"]:$
26 |
27 | [html]
28 | directory = coverage-html
29 |
30 | # vim:ft=dosini
31 |
--------------------------------------------------------------------------------
/tests/output_filters/data/opendns/lookup_domains/expected.json:
--------------------------------------------------------------------------------
1 | [[{
2 | "domain": "dango.com",
3 | "categorization": {
4 | "status": 0,
5 | "content_categories": [
6 | "Phishing"
7 | ],
8 | "security_categories": [],
9 | "suspicious": true
10 | },
11 | "link": "https://investigate.opendns.com/domain-view/name/dango.com/view",
12 | "security": {
13 | "dga_score": 0,
14 | "asn_score": -0.1608560065526172,
15 | "rip_score": 0,
16 | "securerank2": 0.04721624022600212,
17 | "prefix_score": 0,
18 | "attack": "",
19 | "found": true,
20 | "threat_type": ""
21 | }
22 | }]]
23 |
--------------------------------------------------------------------------------
/.secrets.baseline:
--------------------------------------------------------------------------------
1 | {
2 | "exclude": {
3 | "files": ".*tests/.*|\\\\.pre-commit-config\\\\.yaml",
4 | "lines": null
5 | },
6 | "generated_at": "2019-04-05T11:02:14Z",
7 | "plugins_used": [
8 | {
9 | "base64_limit": 4.5,
10 | "name": "Base64HighEntropyString"
11 | },
12 | {
13 | "hex_limit": 3,
14 | "name": "HexHighEntropyString"
15 | },
16 | {
17 | "name": "PrivateKeyDetector"
18 | }
19 | ],
20 | "results": {
21 | ".travis.yml": [
22 | {
23 | "hashed_secret": "468b2bbbf250e477bd35e81cafa3bf8c7ce36285",
24 | "line_number": 19,
25 | "type": "Base64 High Entropy String"
26 | }
27 | ]
28 | },
29 | "version": "0.12.2"
30 | }
31 |
--------------------------------------------------------------------------------
/tests/output_filters/base_filters/threat_feed_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
6 | from tests.output_filters.run_filter_test import RunFilterTest
7 |
8 |
9 | class TestThreatFeedFilter(RunFilterTest):
10 |
11 | def test_run_threat_feed_filter(self):
12 | input_blobs = [
13 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
14 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
15 | ]
16 | self.run_test(lambda: ThreatFeedFilter('dinky', 'feed_test'), input_blobs, expected_output_blobs=input_blobs)
17 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | project = osxcollector_output_filters
3 | envlist = py27,py36
4 | tox_pip_extensions_ext_pip_custom_platform = true
5 | tox_pip_extensions_ext_venv_update = true
6 |
7 | [testenv]
8 | deps = -rrequirements-dev.txt
9 | commands =
10 | flake8 .
11 | {envpython} --version
12 | coverage --version
13 | coverage run -m pytest --strict {posargs:tests}
14 | coverage report -m --show-missing
15 |
16 | [testenv:pre-commit]
17 | deps = pre-commit>=1.0.0
18 | commands = pre-commit run --all-files
19 |
20 | [testenv:venv]
21 | envdir = virtualenv_run
22 | basepython = python3.6
23 | commands =
24 |
25 | [flake8]
26 | exclude = .git,__pycache__,.tox,virtualenv_run
27 | # E501 - long lines
28 | ignore = E501
29 | max_line_length = 140
30 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/util/error_messages.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # A set of simple methods for writing messages to stderr
4 | #
5 | from __future__ import absolute_import
6 | from __future__ import unicode_literals
7 |
8 | import sys
9 | from traceback import extract_tb
10 | from traceback import format_list
11 |
12 |
13 | def write_exception(e):
14 | exc_type, _, exc_traceback = sys.exc_info()
15 | msg = ', '.join(str(a) for a in e.args)
16 | sys.stderr.write('[ERROR] {0} {1}\n'.format(exc_type.__name__, msg))
17 | for line in format_list(extract_tb(exc_traceback)):
18 | sys.stderr.write(line)
19 |
20 |
21 | def write_error_message(message):
22 | sys.stderr.write('[ERROR] ')
23 | sys.stderr.write(message)
24 | sys.stderr.write('\n')
25 |
--------------------------------------------------------------------------------
/tests/output_filters/base_filters/chain_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.base_filters.chain import ChainFilter
6 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
7 | from tests.output_filters.run_filter_test import RunFilterTest
8 |
9 |
10 | class TestChainFilter(RunFilterTest):
11 |
12 | def test_run_chain_filter(self):
13 | input_blobs = [
14 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
15 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
16 | ]
17 | self.run_test(lambda: ChainFilter([OutputFilter(), OutputFilter()]), input_blobs=input_blobs, expected_output_blobs=input_blobs)
18 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from setuptools import find_packages
3 | from setuptools import setup
4 |
5 |
6 | with open('README.md', 'r') as fh:
7 | long_description = fh.read()
8 |
9 | setup(
10 | name='osxcollector_output_filters',
11 | version='1.1.1',
12 | author='Yelp Security',
13 | author_email='opensource@yelp.com',
14 | description='Filters that process and transform the output of OSXCollector',
15 | long_description=long_description,
16 | long_description_content_type='text/markdown',
17 | license='GNU General Public License',
18 | url='https://github.com/Yelp/osxcollector_output_filters',
19 | setup_requires='setuptools',
20 | packages=find_packages(exclude=['tests']),
21 | provides=['osxcollector'],
22 | install_requires=[
23 | 'PyYAML>=5.0',
24 | 'threat_intel',
25 | 'tldextract',
26 | 'simplejson',
27 | 'six',
28 | ],
29 | )
30 |
--------------------------------------------------------------------------------
/tests/output_filters/data/cache.shadowserver.LookupHashesFilter.json:
--------------------------------------------------------------------------------
1 | {"shadowserver-bin-test": {"5d87de61cb368c93325dd910c202b8647f8e90ed": {"os_version": "10.10", "filesize": "48976", "reference": "os_all", "sha1": "5D87DE61CB368C93325DD910C202B8647F8E90ED", "dirname": "/System/Library/Extensions/System.kext/PlugIns/Libkern.kext", "binary": "1", "sha256": "1FAFE48F626FDC030B0A0EFC1008D51CD3078D1B3EC95F808D12AFBFEF458B23", "filetimestamp": "09/19/2014 00:42:35", "source": "MacAppInfo", "sha512": "C1CAEB26F892FE3C00B3B6BAB462058C772F91824092BF9B2E183F66D885278B6F0C6DA65D06994A45166501F1A889E38D5D234AE18ECBD2EF3CFD9F4388DC8F", "language": "English", "md5": "6746005C822CEB6737B871698D3ED22F", "bit": "64", "filename": "Libkern", "os_name": "Mac OS X 10.10 (build 14A389)", "application_type": "Mach-O 64-bit kext bundle x86_64", "crc32": "5332564F", "os_mfg": "Apple Inc.", "source_version": "1.1"}, "816a85d89ae34d2dc73b8c768eecb03935c568ba": {"sha1": "816a85d89ae34d2dc73b8c768eecb03935c568ba"}}}
--------------------------------------------------------------------------------
/tests/output_filters/util/config_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | import pytest
6 | from mock import patch
7 |
8 | from osxcollector.output_filters.util.config import config_get_deep
9 |
10 |
11 | class TestCreateBlacklist:
12 |
13 | @pytest.fixture(scope='module', autouse=True)
14 | def patched_config(self):
15 | config_initial_contents = {
16 | 'a': 'b',
17 | 'c': {'d': 'e'},
18 | 'f': 1,
19 | 'g': ['apple', 'banana'],
20 | }
21 | with patch('osxcollector.output_filters.util.config._read_config', return_value=config_initial_contents):
22 | yield
23 |
24 | def test_read_top_level_key(self):
25 | assert config_get_deep('a') == 'b'
26 |
27 | def test_read_multi_level_key(self):
28 | assert config_get_deep('c.d') == 'e'
29 |
30 | def test_numeric_val(self):
31 | assert config_get_deep('f') == 1
32 |
33 | def test_list_val(self):
34 | assert config_get_deep('g') == ['apple', 'banana']
35 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | matrix:
3 | include:
4 | - env: TOXENV=py27
5 | python: '2.7'
6 | - env: TOXENV=py36
7 | python: '3.6'
8 | - env: TOXENV=pre-commit
9 |
10 | install: sudo pip install tox-travis
11 |
12 | script: tox
13 | deploy:
14 | provider: pypi
15 | user: yelplabs
16 | skip_existing: true
17 | distributions: "sdist bdist_wheel"
18 | password:
19 | secure: WMygawcYeJGbe4cJlQECKKEZJLYsirgutlihp8Yn4iAKRjpDFmOTwB0B8NjaYsB9pBvz7MLz913ukIhzsHhZLMYE6GRpwjiGfaSXupC4zDVkdi14KPJIo7dff/1p0rGGtZmYa/iohC/HDgbF4iXcBcwdzrvDBDqwPFaM/5J4LxF+KunXCVopsmQTkBEsMNz/K55By3xCO3qxupixTDYy+VOVv3F6Bs8hChqKmql9vvi2ZZPVq9y7io13T7JREKKv8ZOyIq+AGXtrZvnzVuNPfW6PE3eBUv2BUy0xEuwsqjX4goQ0bzRiVKS4XdIl8HmZD/aj2mkaXSw6HLST4+/+im2uNVIPHTEfyqDgUwtZInQF9zML3wGANGIfS+z1ZKfirSO0DiTKpFMkbiM5K0D+VRKFChblCOqQ5WiU9jhcLrHDHED7aLT7pIAlEcizeTpTwF9ZR5Eg48wB59A3q5b6aTsw1t9Q2kIyEnXHgf3JUGtRu0BO2ATGL324Dkzoa8DN2CeZ0F3fWCaI7gqFssrytDPwG+ct17yJkLQXnRTemFm/fCw9YC+onSufa+pV2qlRZRFywifgwIaAwL7hXC4w2qF4d5RzEOtCZjlmOBCLZ6/r/dwhnS+bBdHSuxxcfV1WB5RZLemqNJfbRdQheM0Ld6tzqZZRi5SB7gohnLEJWlc=
20 | on:
21 | tags: true
22 | repo: Yelp/osxcollector_output_filters
23 | branch: master
24 |
--------------------------------------------------------------------------------
/tests/output_filters/base_filters/output_filter_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
6 | from tests.output_filters.run_filter_test import RunFilterTest
7 |
8 |
9 | class TestOutputFilter(RunFilterTest):
10 |
11 | def test_filter_line(self):
12 | input_blobs = [
13 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
14 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
15 | ]
16 | output_filter = OutputFilter()
17 | for blob in input_blobs:
18 | output = output_filter.filter_line(blob)
19 | assert output == blob
20 |
21 | def test_end_of_lines(self):
22 | output_filter = OutputFilter()
23 | output = output_filter.end_of_lines()
24 | assert output == []
25 |
26 | def test_run_output_filter(self):
27 | input_blobs = [
28 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
29 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
30 | ]
31 | self.run_test(OutputFilter, input_blobs, expected_output_blobs=input_blobs)
32 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | exclude: '^tests/output_filters/data/.*$'
2 | repos:
3 | - repo: git://github.com/pre-commit/pre-commit-hooks
4 | rev: v2.1.0
5 | hooks:
6 | - id: check-json
7 | - id: check-yaml
8 | - id: end-of-file-fixer
9 | - id: trailing-whitespace
10 | - id: name-tests-test
11 | - id: requirements-txt-fixer
12 | - id: double-quote-string-fixer
13 | - id: flake8
14 | - id: fix-encoding-pragma
15 | - repo: git://github.com/pre-commit/mirrors-autopep8
16 | rev: v1.4.3
17 | hooks:
18 | - id: autopep8
19 | - repo: git://github.com/asottile/reorder_python_imports
20 | rev: v1.4.0
21 | hooks:
22 | - id: reorder-python-imports
23 | args: [
24 | '--add-import', 'from __future__ import absolute_import',
25 | '--add-import', 'from __future__ import unicode_literals',
26 | ]
27 | exclude: setup.py
28 | - repo: git://github.com/asottile/add-trailing-comma
29 | rev: v1.0.0
30 | hooks:
31 | - id: add-trailing-comma
32 | - repo: https://github.com/Yelp/detect-secrets
33 | rev: v0.12.2
34 | hooks:
35 | - id: detect-secrets
36 | args: ['--baseline', '.secrets.baseline']
37 | exclude: .*tests/.*|\.pre-commit-config\.yaml
38 | language_version: python2.7
39 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/util/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Config is a very simplistic class for reading YAML config.
4 | #
5 | from __future__ import absolute_import
6 | from __future__ import unicode_literals
7 |
8 | import os
9 |
10 | import yaml
11 | try:
12 | from yaml import CSafeLoader as SafeLoader
13 | except ImportError:
14 | from yaml import SafeLoader
15 |
16 | from osxcollector.output_filters.exceptions import MissingConfigError
17 | from osxcollector.output_filters.util.dict_utils import DictUtils
18 |
19 |
20 | def config_get_deep(key, default=None):
21 | """Reads from the config.
22 |
23 | Args:
24 | key: Dictionary key to lookup in config
25 | default: Value to return if key is not found
26 | Returns:
27 | Value from config or default if not found otherwise
28 | """
29 | return DictUtils.get_deep(_read_config(), key, default)
30 |
31 |
32 | def _read_config():
33 | """Reads and parses the YAML file.
34 |
35 | Returns:
36 | dict of config
37 | """
38 | with open(_config_file_path()) as source:
39 | return yaml.load(source.read(), Loader=SafeLoader)
40 |
41 |
42 | def _config_file_path():
43 | """Find the path to the config file.
44 |
45 | Returns:
46 | String file path
47 | Raises:
48 | MissingConfigError if no config file is found
49 | """
50 | for loc in os.curdir, os.path.expanduser('~'), os.environ.get('OSXCOLLECTOR_CONF', ''):
51 | path = os.path.join(loc, 'osxcollector.yaml')
52 | if os.path.exists(path):
53 | return path
54 | raise MissingConfigError()
55 |
--------------------------------------------------------------------------------
/tests/output_filters/util/error_messages_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.exceptions import BadDomainError
6 | from osxcollector.output_filters.util.error_messages import write_error_message
7 | from osxcollector.output_filters.util.error_messages import write_exception
8 |
9 |
10 | class TestWriteException:
11 |
12 | def test_simple_exception(self, capsys):
13 | try:
14 | raise Exception()
15 | except Exception as e:
16 | write_exception(e)
17 |
18 | output = capsys.readouterr().err
19 | assert 0 == output.find('[ERROR]')
20 |
21 | def test_specific_exception(self, capsys):
22 | try:
23 | raise BadDomainError()
24 | except Exception as e:
25 | write_exception(e)
26 |
27 | output = capsys.readouterr().err
28 | assert output.find('[ERROR] BadDomainError') == 0
29 |
30 | def test_exception_message(self, capsys):
31 | try:
32 | raise BadDomainError('Look for me in validation')
33 | except Exception as e:
34 | write_exception(e)
35 |
36 | output = capsys.readouterr().err
37 | assert output.find('[ERROR] BadDomainError Look for me in validation') == 0
38 |
39 |
40 | class TestWriteErrorMessage:
41 |
42 | def test_write_error_message(self, capsys):
43 | message = 'Look for me in validation'
44 | expected = '[ERROR] Look for me in validation\n'
45 |
46 | write_error_message(message)
47 |
48 | output = capsys.readouterr().err
49 | assert output == expected
50 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/summary_filters/summary.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | import sys
6 | from collections import defaultdict
7 |
8 | import six
9 |
10 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
11 |
12 |
13 | class SummaryFilter(OutputFilter):
14 | """Base class for summary filters."""
15 |
16 | def __init__(self, show_signature_chain=False, show_browser_ext=False, summary_output_file=None, group_by_iocs=False, group_key=None, **kwargs):
17 | super(SummaryFilter, self).__init__(**kwargs)
18 | self._iocs = []
19 | self._iocs_by_key = defaultdict(list)
20 | self._vthash = []
21 | self._vtdomain = []
22 | self._opendns = []
23 | self._alexarank = []
24 | self._blacklist = []
25 | self._related = []
26 | self._signature_chain = []
27 | self._extensions = []
28 | self._show_signature_chain = show_signature_chain
29 | self._show_browser_ext = show_browser_ext
30 | self._group_by_iocs = group_by_iocs
31 | self._group_key = group_key
32 |
33 | self._add_to_blacklist = []
34 |
35 | self._close_file = False
36 |
37 | self._open_output_stream(summary_output_file)
38 |
39 | def _open_output_stream(self, summary_output_file):
40 | if summary_output_file:
41 | if isinstance(summary_output_file, six.string_types):
42 | self._output_stream = open(summary_output_file, 'w')
43 | self._close_file = True
44 | else:
45 | # not a string, most likely already opened output stream
46 | self._output_stream = summary_output_file
47 | else:
48 | self._output_stream = sys.stdout
49 |
50 | def __del__(self):
51 | self._close_output_stream()
52 |
53 | def _close_output_stream(self):
54 | if self._close_file:
55 | self._output_stream.close()
56 |
--------------------------------------------------------------------------------
/tests/output_filters/opendns/lookup_domains_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | import simplejson
6 | from mock import patch
7 | from threat_intel.opendns import InvestigateApi
8 |
9 | from osxcollector.output_filters.opendns.lookup_domains import LookupDomainsFilter
10 | from tests.output_filters.run_filter_test import RunFilterTest
11 |
12 |
13 | class TestLookupDomainsFilter(RunFilterTest):
14 |
15 | def test_no_domains(self):
16 | input_blobs = [
17 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
18 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
19 | ]
20 |
21 | self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
22 |
23 | def _read_json(self, file_name):
24 | with(open(file_name, 'r')) as fp:
25 | contents = fp.read()
26 | return simplejson.loads(contents)
27 |
28 | def test_no_security_information(self):
29 | input_blobs = [
30 | {'osxcollector_domains': ['bingo.com', 'dingo.com', 'bango.com', 'dango.com'], 'banana': {'a': 11}},
31 | ]
32 | file_name_pattern = 'tests/output_filters/data/opendns/lookup_domains/{0}'
33 | categorization = self._read_json(file_name_pattern.format('categorization.json'))
34 | security = self._read_json(file_name_pattern.format('security.json'))
35 |
36 | with patch.object(
37 | InvestigateApi, 'categorization', autospec=True,
38 | return_value=categorization,
39 | ), patch.object(
40 | InvestigateApi, 'security', autospec=True, return_value=security,
41 | ):
42 | output_blobs = self.run_test(LookupDomainsFilter, input_blobs=input_blobs)
43 |
44 | expected_categorization = self._read_json(file_name_pattern.format('expected.json'))
45 | self.assert_key_added_to_blob('osxcollector_opendns', expected_categorization, input_blobs, output_blobs)
46 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/util/domains.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Utilities for dealing with domain names
4 | #
5 | from __future__ import absolute_import
6 | from __future__ import unicode_literals
7 |
8 | import re
9 |
10 | import six
11 | import tldextract
12 |
13 | from osxcollector.output_filters.exceptions import BadDomainError
14 |
15 |
16 | def expand_domain(domain):
17 | """A generator that returns the input with and without the subdomain.
18 |
19 | Args:
20 | domain - string
21 | Returns:
22 | generator that returns strings
23 | """
24 | extraction = tldextract.extract(domain)
25 |
26 | if extraction.subdomain:
27 | subdomain = '.'.join(extraction)
28 | yield subdomain
29 |
30 | fulldomain = '.'.join(extraction[1:])
31 | yield fulldomain
32 |
33 |
34 | def clean_domain(unclean_domain):
35 | """Removing errant characters and stuff from a domain name.
36 |
37 | A bit of careful dancing with character encodings. Eventually, some consumer of the domain string is gonna
38 | deal with it as ASCII. Make sure to encode as ASCII explicitly, so ASCII encoding errors can be ignored.
39 |
40 | Args:
41 | unclean_domain: string
42 | Returns:
43 | string domain name
44 | Raises:
45 | BadDomainError - when a clean domain can't be made
46 | """
47 | if not isinstance(unclean_domain, six.text_type):
48 | unclean_domain = unclean_domain.decode('utf-8', errors='ignore')
49 |
50 | unclean_domain = re.sub(r'^[^a-zA-Z0-9]*(.*?)[^a-zA-Z0-9]*$', r'\1', unclean_domain)
51 |
52 | extracted = tldextract.extract(unclean_domain)
53 | if bool(extracted.domain and extracted.suffix):
54 | start_index = 1 if not extracted.subdomain else 0
55 | domain = '.'.join(extracted[start_index:]).lstrip('.')
56 | return domain.encode('ascii', errors='ignore') if six.PY2 else \
57 | domain.encode('utf8').decode('ascii', errors='ignore')
58 |
59 | raise BadDomainError(u'Can not clean {0} {1}'.format(unclean_domain, repr(extracted)))
60 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/shadowserver/lookup_hashes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # LookupHashesFilter uses ShadowServer to lookup the values in 'sha1' and add 'osxcollector_shadowserver' key.
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | import os.path
10 |
11 | from threat_intel.shadowserver import ShadowServerApi
12 |
13 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
14 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
15 | from osxcollector.output_filters.util.config import config_get_deep
16 |
17 |
18 | class LookupHashesFilter(ThreatFeedFilter):
19 |
20 | """A class to lookup hashes using ShadowServer API."""
21 |
22 | def __init__(self, lookup_when=None, **kwargs):
23 | super(LookupHashesFilter, self).__init__('sha1', 'osxcollector_shadowserver', lookup_when=lookup_when, **kwargs)
24 |
25 | def _lookup_iocs(self, all_iocs):
26 | """Looks up the ShadowServer info for a set of hashes.
27 |
28 | Args:
29 | all_iocs - a list of hashes.
30 | Returns:
31 | A dict with hash as key and threat info as value
32 | """
33 | cache_file_name = config_get_deep('shadowserver.LookupHashesFilter.cache_file_name', None)
34 | ss = ShadowServerApi(cache_file_name=cache_file_name)
35 | return ss.get_bin_test(all_iocs)
36 |
37 | def _should_add_threat_info_to_blob(self, blob, threat_info):
38 | """Only add info from ShadowServer if the hash and the filename match.
39 |
40 | Args:
41 | blob - A dict of data representing a line of output from OSXCollector
42 | threat_info - The threat info from ShadowServer
43 | Returns:
44 | boolean
45 | """
46 | blob_filename = os.path.split(blob.get('file_path', ''))[-1]
47 | return blob_filename == threat_info.get('filename', '')
48 |
49 |
50 | def main():
51 | run_filter_main(LookupHashesFilter)
52 |
53 |
54 | if __name__ == '__main__':
55 | main()
56 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/util/dict_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 |
6 | class DictUtils(object):
7 |
8 | """A set of method for manipulating dictionaries."""
9 |
10 | @classmethod
11 | def _link_path_to_chain(cls, path):
12 | """Helper method for get_deep
13 |
14 | Args:
15 | path: A str representing a chain of keys separated '.' or an enumerable set of strings
16 | Returns:
17 | an enumerable set of strings
18 | """
19 | if path == '':
20 | return []
21 | elif type(path) in (list, tuple, set):
22 | return path
23 | else:
24 | return path.split('.')
25 |
26 | @classmethod
27 | def _get_deep_by_chain(cls, x, chain, default=None):
28 | """Grab data from a dict using a ['key1', 'key2', 'key3'] chain param to do deep traversal.
29 |
30 | Args:
31 | x: A dict
32 | chain: an enumerable set of strings
33 | default: A value to return if the path can not be found
34 | Returns:
35 | The value of the key or default
36 | """
37 | if chain == []:
38 | return default
39 | try:
40 | for link in chain:
41 | try:
42 | x = x[link]
43 | except (KeyError, TypeError):
44 | x = x[int(link)]
45 | except (KeyError, TypeError, ValueError):
46 | x = default
47 | return x
48 |
49 | @classmethod
50 | def get_deep(cls, x, path='', default=None):
51 | """Grab data from a dict using a 'key1.key2.key3' path param to do deep traversal.
52 |
53 | Args:
54 | x: A dict
55 | path: A 'deep path' to retrieve in the dict
56 | default: A value to return if the path can not be found
57 | Returns:
58 | The value of the key or default
59 | """
60 | chain = cls._link_path_to_chain(path)
61 | return cls._get_deep_by_chain(x, chain, default=default)
62 |
--------------------------------------------------------------------------------
/tests/output_filters/data/test_osxcollector_config.yaml:
--------------------------------------------------------------------------------
1 | # The OpenDNSFilter requires an API key for OpenDNS Investigate
2 | api_key:
3 | opendns: "00FAABADF00D"
4 | virustotal: "00FAABADF00D"
5 |
6 | # The BlacklistFilter allows for multiple blacklists to be compared against at once
7 | # Each blacklists requires:
8 | # - blacklist_name, A name
9 | # - blacklist_keys, JSON paths. These can be of the form "a.b" to look at "b" in {"a": {"b": "foo"}}
10 | # - value_file, the path to a file containing values considered blacklisted. Any line starting with # is skipped
11 | # - blacklist_is_regex, should values in the file be treated as Python regex
12 | blacklists:
13 | - blacklist_name: "hashes"
14 | blacklist_keys:
15 | - "md5"
16 | - "sha1"
17 | - "sha2"
18 | blacklist_file_path: "./tests/output_filters/data/hashes_blacklist.txt"
19 | blacklist_is_regex: False
20 | - blacklist_name: "domains"
21 | blacklist_keys:
22 | - "osxcollector_domains"
23 | blacklist_file_path: "./tests/output_filters/data/domains_blacklist.txt"
24 | blacklist_is_domains: True
25 | blacklist_is_regex: True
26 |
27 | domain_whitelist:
28 | blacklist_name: "domain_whitelist"
29 | blacklist_keys:
30 | - "osxcollector_domains"
31 | blacklist_file_path: "./tests/output_filters/data/domains_whitelist.txt"
32 | blacklist_is_domains: True
33 | blacklist_is_regex: True
34 |
35 | shadowserver:
36 | LookupHashesFilter:
37 | cache_file_name: "./tests/output_filters/data/cache.shadowserver.LookupHashesFilter.json"
38 |
39 | virustotal:
40 | LookupHashesFilter:
41 | cache_file_name: "./tests/output_filters/data/cache.virustotal.LookupHashesFilter.json"
42 | LookupDomainsFilter:
43 | cache_file_name: "./tests/output_filters/data/cache.virustotal.LookupDomainsFilter.json"
44 |
45 | # No cache data is available or opendns yet
46 | opendns:
47 | # LookupDomainsFilter:
48 | # cache_file_name: "./tests/output_filters/data/cache.opendns.LookupDomainsFilter.json"
49 | RelatedDomainsFilter:
50 | cache_file_name: "./tests/output_filters/data/cache.opendns.RelatedDomainsFilter.json"
51 |
--------------------------------------------------------------------------------
/tests/output_filters/data/opendns/lookup_domains/security.json:
--------------------------------------------------------------------------------
1 | {
2 | "bango.com": null,
3 | "dango.com": {
4 | "dga_score": 0,
5 | "geodiversity_normalized": [
6 | [
7 | "IS",
8 | 0.9996267573230843
9 | ],
10 | [
11 | "US",
12 | 0.0003732426769157135
13 | ]
14 | ],
15 | "asn_score": -0.1608560065526172,
16 | "rip_score": 0,
17 | "securerank2": 0.04721624022600212,
18 | "popularity": 0,
19 | "geoscore": 0,
20 | "ks_test": 0,
21 | "prefix_score": 0,
22 | "attack": "",
23 | "pagerank": 0,
24 | "geodiversity": [
25 | [
26 | "IS",
27 | 0.5
28 | ],
29 | [
30 | "US",
31 | 0.5
32 | ]
33 | ],
34 | "found": true,
35 | "perplexity": 0.3866686930931377,
36 | "entropy": 3.5351745656359026,
37 | "fastflux": false,
38 | "threat_type": "",
39 | "tld_geodiversity": []
40 | },
41 | "dingo.com": {
42 | "dga_score": 0,
43 | "geodiversity_normalized": [
44 | [
45 | "IS",
46 | 0.9996267573230843
47 | ],
48 | [
49 | "US",
50 | 0.0003732426769157135
51 | ]
52 | ],
53 | "asn_score": -0.1608560065526172,
54 | "rip_score": 0,
55 | "securerank2": 0.04721624022600212,
56 | "popularity": 0,
57 | "geoscore": 0,
58 | "ks_test": 0,
59 | "prefix_score": 0,
60 | "attack": "",
61 | "pagerank": 0,
62 | "geodiversity": [
63 | [
64 | "IS",
65 | 0.5
66 | ],
67 | [
68 | "US",
69 | 0.5
70 | ]
71 | ],
72 | "found": true,
73 | "perplexity": 0.3866686930931377,
74 | "entropy": 3.5351745656359026,
75 | "fastflux": false,
76 | "threat_type": "",
77 | "tld_geodiversity": []
78 | },
79 | "bingo.com": null
80 | }
81 |
--------------------------------------------------------------------------------
/tests/output_filters/find_blacklisted_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.find_blacklisted import FindBlacklistedFilter
6 | from tests.output_filters.run_filter_test import RunFilterTest
7 |
8 |
9 | class TestFindBlacklistedFilter(RunFilterTest):
10 |
11 | def test_simple_hashes(self):
12 | input_blobs = [
13 | {'md5': 'ffff5f60462c38b1d235cb3509876543'},
14 | {'sha1': 'ffff234d2a50a42a87389f1234561a21'},
15 | {'sha2': 'ffff51e77b442ee23188d87e4abcdef0'},
16 | ]
17 | expected_blacklists = [
18 | {'hashes': ['ffff5f60462c38b1d235cb3509876543']},
19 | {'hashes': ['ffff234d2a50a42a87389f1234561a21']},
20 | {'hashes': ['ffff51e77b442ee23188d87e4abcdef0']},
21 | ]
22 | self._run_test(input_blobs, expected_blacklists)
23 |
24 | def test_no_hashes(self):
25 | input_blobs = [
26 | # Not the right key
27 | {'apple': 'ffff5f60462c38b1d235cb3509876543'},
28 | # Value not on blacklist
29 | {'sha1': 'aaaa234d2a50a42a87389f1234561a21'},
30 | ]
31 | expected_blacklists = [
32 | None,
33 | None,
34 | ]
35 | self._run_test(input_blobs, expected_blacklists)
36 |
37 | def test_simple_domains(self):
38 | input_blobs = [
39 | {'osxcollector_domains': ['biz.example.com']},
40 | {'osxcollector_domains': ['www.example.co.uk']},
41 | {'osxcollector_domains': ['example.org']},
42 | ]
43 | expected_blacklists = [
44 | {'domains': ['example.com']},
45 | {'domains': ['example.co.uk']},
46 | {'domains': ['example.org']},
47 | ]
48 | self._run_test(input_blobs, expected_blacklists)
49 |
50 | def _run_test(self, input_blobs, expected_blacklists):
51 |
52 | output_blobs = self.run_test(FindBlacklistedFilter, input_blobs)
53 |
54 | # added_key, expected_values, input_blobs, output_blobs
55 | self.assert_key_added_to_blob('osxcollector_blacklist', expected_blacklists, input_blobs, output_blobs)
56 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/firefox/find_extensions.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # FindExtensionsFilter reads the Firefox JSON blobs and creates records about the extensions and plugins.
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
10 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
11 | from osxcollector.output_filters.util.dict_utils import DictUtils
12 |
13 |
14 | class FindExtensionsFilter(OutputFilter):
15 |
16 | """Reads the Firefox JSON blobs and creates records about the extensions and plugins.
17 |
18 | In the output look a line where:
19 | ('osxcollector_section' == 'firefox' and 'osxcollector_subsection' == 'json_files')
20 | and then parse the heck out of the extensions.
21 | """
22 |
23 | def __init__(self, **kwargs):
24 | super(FindExtensionsFilter, self).__init__(**kwargs)
25 | self._new_lines = []
26 |
27 | def filter_line(self, blob):
28 | if 'firefox' != blob.get('osxcollector_section') or 'json_files' != blob.get('osxcollector_subsection'):
29 | return blob
30 |
31 | if blob.get('osxcollector_json_file') not in ['addons.json', 'extensions.json']:
32 | return blob
33 |
34 | extensions_blobs = DictUtils.get_deep(blob, 'contents.addons', [])
35 | for addon in extensions_blobs:
36 | extension = {
37 | 'osxcollector_section': 'firefox',
38 | 'osxcollector_subsection': 'extensions',
39 | 'osxcollector_incident_id': blob['osxcollector_incident_id'],
40 | 'name': DictUtils.get_deep(addon, 'defaultLocale.name', addon.get('name')),
41 | 'description': DictUtils.get_deep(addon, 'defaultLocale.description', addon.get('description')),
42 | 'path': addon.get('id'),
43 | }
44 | if blob.get('osxcollector_username'):
45 | extension['osxcollector_username'] = blob['osxcollector_username']
46 |
47 | self._new_lines.append(extension)
48 |
49 | return None
50 |
51 | def end_of_lines(self):
52 | return self._new_lines
53 |
54 |
55 | def main():
56 | run_filter_main(FindExtensionsFilter)
57 |
58 |
59 | if __name__ == '__main__':
60 | main()
61 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/chrome/find_extensions.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # FindExtensionsFilter reads the Chrome preferences JSON blob and creates records about the extensions and plugins.
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
10 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
11 | from osxcollector.output_filters.util.dict_utils import DictUtils
12 |
13 |
14 | class FindExtensionsFilter(OutputFilter):
15 |
16 | """Reads the Chrome preferences JSON blob and creates records about the extensions and plugins.
17 |
18 | In the output look a line where:
19 | ('osxcollector_section' == 'chrome' and 'osxcollector_subsection' == 'preferences')
20 | and then parse the heck out of the extensions.
21 | """
22 |
23 | def __init__(self, **kwargs):
24 | super(FindExtensionsFilter, self).__init__(**kwargs)
25 | self._new_lines = []
26 |
27 | def filter_line(self, blob):
28 | if 'chrome' != blob.get('osxcollector_section') or 'preferences' != blob.get('osxcollector_subsection'):
29 | return blob
30 |
31 | extensions_blob = DictUtils.get_deep(blob, 'contents.extensions.settings', {})
32 | for key in extensions_blob:
33 | setting = extensions_blob[key]
34 | extension = {
35 | 'osxcollector_section': 'chrome',
36 | 'osxcollector_subsection': 'extensions',
37 | 'osxcollector_incident_id': blob['osxcollector_incident_id'],
38 | 'state': setting.get('state'),
39 | 'was_installed_by_default': setting.get('was_installed_by_default'),
40 | 'name': DictUtils.get_deep(setting, 'manifest.name'),
41 | 'description': DictUtils.get_deep(setting, 'manifest.description'),
42 | 'path': setting.get('path'),
43 | }
44 | if blob.get('osxcollector_username'):
45 | extension['osxcollector_username'] = blob['osxcollector_username']
46 |
47 | self._new_lines.append(extension)
48 |
49 | return None
50 |
51 | def end_of_lines(self):
52 | return self._new_lines
53 |
54 |
55 | def main():
56 | run_filter_main(FindExtensionsFilter)
57 |
58 |
59 | if __name__ == '__main__':
60 | main()
61 |
--------------------------------------------------------------------------------
/osxcollector.yaml.example:
--------------------------------------------------------------------------------
1 | api_key:
2 | # The OpenDNSFilter requires an API key for OpenDNS Investigate
3 | opendns: "ADD YOUR KEY"
4 |
5 | # The VTHashesFilter requires an API key for VirusTotal
6 | virustotal: "ADD YOUR KEY"
7 |
8 | # The BlacklistFilter allows for multiple blacklists to be compared against at once
9 | # Each blacklists requires:
10 | # - blacklist_name, A name
11 | # - blacklist_keys, JSON paths. These can be of the form "a.b" to look at "b" in {"a": {"b": "foo"}}
12 | # - blacklist_file_path, the path to a file containing values considered blacklisted. Any line starting with # is skipped
13 | # - blacklist_is_regex, should values in the file be treated as Python regex
14 | # - blacklist_is_domains, should values in the file be treated as domains and analyzed with some smart regex to retrieve subdomain etc.
15 | blacklists:
16 | - blacklist_name: "hashes"
17 | blacklist_keys:
18 | - "md5"
19 | - "sha1"
20 | - "sha2"
21 | blacklist_file_path: "/tmp/hash_blacklist.txt"
22 | blacklist_is_regex: False
23 | - blacklist_name: "domains"
24 | blacklist_keys:
25 | - "osxcollector_domains"
26 | blacklist_file_path: "/tmp/domain_blacklist.txt"
27 | blacklist_is_regex: False
28 | blacklist_is_domains: True
29 |
30 | # domain_whitelist is a special blacklist entry. Anything on this list won't be looked up with
31 | # OpenDNS or VirusTotal
32 | domain_whitelist:
33 | blacklist_name: "Ignore Domains"
34 | blacklist_keys:
35 | - "osxcollector_domains"
36 | blacklist_file_path: "/tmp/domain_whitelist.txt"
37 | blacklist_is_domains: True
38 | blacklist_is_regex: True
39 |
40 | opendns:
41 | LookupDomainsFilter:
42 | cache_file_name: "/tmp/cache.opendns.LookupDomainsFilter.json"
43 | RelatedDomainsFilter:
44 | cache_file_name: "/tmp/cache.opendns.RelatedDomainsFilter.json"
45 |
46 | shadowserver:
47 | LookupHashesFilter:
48 | cache_file_name: "/tmp/cache.shadowserver.LookupHashesFilter.json"
49 |
50 | virustotal:
51 | LookupHashesFilter:
52 | cache_file_name: "/tmp/cache.virustotal.LookupHashesFilter.json"
53 | LookupDomainsFilter:
54 | cache_file_name: "/tmp/cache.virustotal.LookupDomainsFilter.json"
55 | LookupURLsFilter:
56 | cache_file_name: "/tmp/cache.virustotal.LookupURLsFilter.json"
57 | resources_per_req: 4
58 |
59 | alexa:
60 | LookupRankingsFilter:
61 | cache_file_name: "/tmp/cache.alexa.LookupRankingsFilter.json"
62 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/alexa/lookup_rankings.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # AlexaRankingFilter uses the AWIS API to lookup Alexa traffic rankings.
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | from threat_intel.alexaranking import AlexaRankingApi
10 |
11 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
12 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
13 | from osxcollector.output_filters.util.config import config_get_deep
14 |
15 |
16 | class LookupRankingsFilter(ThreatFeedFilter):
17 |
18 | """A class to lookup traffic rankings using AWIS API."""
19 |
20 | def __init__(self, lookup_when=None, **kwargs):
21 | super(LookupRankingsFilter, self).__init__(
22 | 'osxcollector_domains',
23 | 'osxcollector_alexa_rank',
24 | lookup_when=lookup_when,
25 | name_of_api_key=None,
26 | **kwargs
27 | )
28 |
29 | def _lookup_iocs(self, domains, resource_per_req=25):
30 | """Caches the Alexa ranking info for a set of domains.
31 |
32 | Args:
33 | domains - a list of domains.
34 | Returns:
35 | A dict with domain as key and threat info as value
36 | """
37 | traffic_info = {}
38 |
39 | cache_file_name = config_get_deep('alexa.LookupRankingsFilter.cache_file_name', None)
40 | ar = AlexaRankingApi(resource_per_req, cache_file_name=cache_file_name)
41 |
42 | iocs = domains
43 | reports = ar.get_alexa_rankings(iocs)
44 | for domain in reports:
45 | report = reports[domain]
46 | if report and self._should_store_ioc_info(report):
47 | traffic_info[domain] = report
48 |
49 | return traffic_info
50 |
51 | def _should_store_ioc_info(self, report):
52 | """Only store if traffic ranking passes a certain threshold.
53 |
54 | Args:
55 | report - a dict from get_alexa_rankings
56 | Returns:
57 | booleans
58 | """
59 | # Always include Alexa ranking information since we do not yet
60 | # categorize by it for the output summaries, rather just use it as an
61 | # additional source of threat intel.
62 | return True
63 |
64 |
65 | def main():
66 | run_filter_main(LookupRankingsFilter)
67 |
68 |
69 | if __name__ == '__main__':
70 | main()
71 |
--------------------------------------------------------------------------------
/tests/output_filters/util/domains_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | import pytest
6 |
7 | from osxcollector.output_filters.exceptions import BadDomainError
8 | from osxcollector.output_filters.util.domains import clean_domain
9 | from osxcollector.output_filters.util.domains import expand_domain
10 |
11 |
12 | class TestCleanDomain:
13 |
14 | def _test_clean_domain(self, dirty_domain, expected):
15 | domain = clean_domain(dirty_domain)
16 | assert domain == expected
17 |
18 | def test_trailing_and_leading_dots(self):
19 | self._test_clean_domain('.www.example.com.', 'www.example.com')
20 |
21 | def test_trailing_and_leading_slashes(self):
22 | self._test_clean_domain('//www.example.com//', 'www.example.com')
23 |
24 | def test_unicode_prefix(self):
25 | self._test_clean_domain('\xadwww.example.com', 'www.example.com')
26 |
27 | def test_unicode_prefix2(self):
28 | self._test_clean_domain(u'\xadwww.example.com', 'www.example.com')
29 |
30 | def test_unicode_mid(self):
31 | self._test_clean_domain('stinkum.\xadexample.com', 'stinkum.example.com')
32 |
33 | def test_unicode_mid2(self):
34 | self._test_clean_domain(u'stinkum.\xadexample.com', 'stinkum.example.com')
35 |
36 | def test_punicoded(self):
37 | # TODO: OSXCollector is confused by stuff that ought to be punycode... or something
38 | self._test_clean_domain('hotmaıll.com', 'hotmall.com')
39 |
40 | def test_unicode_punicoded(self):
41 | self._test_clean_domain(u'hotmaıll.com', 'hotmall.com')
42 |
43 | def test_single_word(self):
44 | with pytest.raises(BadDomainError):
45 | clean_domain('oneword')
46 |
47 |
48 | class TestExpandDomain:
49 |
50 | def _test_expand_domain(self, initial_domain, expected):
51 | expanded = list(expand_domain(initial_domain))
52 | assert sorted(expanded) == sorted(expected)
53 |
54 | def test_simple_subdomain(self):
55 | self._test_expand_domain('www.example.com', ['example.com', 'www.example.com'])
56 |
57 | def test_no_subdomain(self):
58 | self._test_expand_domain('example.com', ['example.com'])
59 |
60 | def test_complex_subdomain(self):
61 | self._test_expand_domain('www.foo.bar.whiz.example.com', ['example.com', 'www.foo.bar.whiz.example.com'])
62 |
63 | def test_unicode_subdomain(self):
64 | self._test_expand_domain('www.jobbörse.com', ['www.jobbörse.com', 'jobbörse.com'])
65 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/find_blacklisted.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # FindBlacklistedFilter adds 'osxcollector_blacklist' key to lines matching a blacklist.
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
10 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
11 | from osxcollector.output_filters.util.blacklist import create_blacklist
12 | from osxcollector.output_filters.util.config import config_get_deep
13 |
14 |
15 | class FindBlacklistedFilter(OutputFilter):
16 |
17 | """Adds 'osxcollector_blacklist' key to lines matching a blacklist.
18 |
19 | This filters compares each line to a set of blacklists and marks lines that match the blacklist.
20 | This is proving useful for filtering known hashes, known bad filenames, known bad domains, etc.
21 |
22 | Configuration Keys:
23 | blacklist_name - [REQUIRED] the name of the blacklist
24 | blacklist_keys - [REQUIRED] get the value of these keys and compare against the blacklist
25 | blacklist_is_regex - [REQUIRED] should the values in the blacklist file be treated as regex
26 | blacklist_file_path - [REQUIRED if no blacklist_data_feed] path to a file with the actual values to blacklist
27 | blacklist_data_feed - [REQUIRED if no blacklist_file_path] name of the data feed from which data is read
28 | blacklist_is_domains - [OPTIONAL] interpret values as domains and do some smart regex and subdomain stuff with them
29 | """
30 |
31 | def __init__(self, **kwargs):
32 | super(FindBlacklistedFilter, self).__init__(**kwargs)
33 | data_feeds = kwargs.get('data_feeds', {})
34 | self._blacklists = [
35 | create_blacklist(config_chunk, data_feeds)
36 | for config_chunk in config_get_deep('blacklists')
37 | ]
38 |
39 | def filter_line(self, blob):
40 | """Find blacklisted values in a line.
41 |
42 | Lines are never cached, every line in produces a line out.
43 | """
44 | for blacklist in self._blacklists:
45 | matching_term = blacklist.match_line(blob)
46 | if matching_term:
47 | blob.setdefault('osxcollector_blacklist', {})
48 | blob['osxcollector_blacklist'].setdefault(blacklist.name, [])
49 | blob['osxcollector_blacklist'][blacklist.name].append(matching_term)
50 | break
51 |
52 | return blob
53 |
54 |
55 | def main():
56 | run_filter_main(FindBlacklistedFilter)
57 |
58 |
59 | if __name__ == '__main__':
60 | main()
61 |
--------------------------------------------------------------------------------
/tests/output_filters/alexa/lookup_domains_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.virustotal.lookup_domains import LookupDomainsFilter
6 | from tests.output_filters.run_filter_test import RunFilterTest
7 |
8 |
9 | class TestLookupDomainsFilter(RunFilterTest):
10 |
11 | def test_no_domains(self):
12 | input_blobs = [
13 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
14 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
15 | ]
16 |
17 | self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
18 |
19 | def test_benign_domains(self):
20 | input_blobs = [
21 | {'osxcollector_domains': ['good.example.com'], 'dingo': 'bingo', 'apple': [3, 14]},
22 | {'osxcollector_domains': ['good.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'},
23 | ]
24 |
25 | self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
26 |
27 | def test_suspicious_domains(self):
28 | input_blobs = [
29 | {'osxcollector_domains': ['evil.example.com'], 'dingo': 'bingo', 'apple': [3, 14]},
30 | {'osxcollector_domains': ['evil.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'},
31 | ]
32 | expected_vtdomains = [
33 | [
34 | {
35 | 'domain': 'evil.example.com',
36 | 'response_code': 1,
37 | 'detections': {
38 | 'undetected_referrer_samples': 0,
39 | 'undetected_communicating_samples': 0,
40 | 'detected_downloaded_samples': 5,
41 | 'detected_referrer_samples': 5,
42 | 'detected_communicating_samples': 5,
43 | 'detected_urls': 5,
44 | },
45 | 'categorization': {},
46 | },
47 | ],
48 | [
49 | {
50 | 'domain': 'evil.example.co.uk',
51 | 'response_code': 1,
52 | 'detections': {
53 | 'undetected_referrer_samples': 0,
54 | 'undetected_communicating_samples': 0,
55 | 'detected_downloaded_samples': 4,
56 | 'detected_referrer_samples': 5,
57 | 'detected_communicating_samples': 5,
58 | 'detected_urls': 6,
59 | },
60 | 'categorization': {},
61 | },
62 | ],
63 | ]
64 | output_blobs = self.run_test(LookupDomainsFilter, input_blobs=input_blobs)
65 | self.assert_key_added_to_blob('osxcollector_vtdomain', expected_vtdomains, input_blobs, output_blobs)
66 |
--------------------------------------------------------------------------------
/tests/output_filters/virustotal/lookup_domains_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.virustotal.lookup_domains import LookupDomainsFilter
6 | from tests.output_filters.run_filter_test import RunFilterTest
7 |
8 |
9 | class TestLookupDomainsFilter(RunFilterTest):
10 |
11 | def test_no_domains(self):
12 | input_blobs = [
13 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
14 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
15 | ]
16 |
17 | self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
18 |
19 | def test_benign_domains(self):
20 | input_blobs = [
21 | {'osxcollector_domains': ['good.example.com'], 'dingo': 'bingo', 'apple': [3, 14]},
22 | {'osxcollector_domains': ['good.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'},
23 | ]
24 |
25 | self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
26 |
27 | def test_suspicious_domains(self):
28 | input_blobs = [
29 | {'osxcollector_domains': ['evil.example.com'], 'dingo': 'bingo', 'apple': [3, 14]},
30 | {'osxcollector_domains': ['evil.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'},
31 | ]
32 | expected_vtdomains = [
33 | [
34 | {
35 | 'domain': 'evil.example.com',
36 | 'response_code': 1,
37 | 'detections': {
38 | 'undetected_referrer_samples': 0,
39 | 'undetected_communicating_samples': 0,
40 | 'detected_downloaded_samples': 5,
41 | 'detected_referrer_samples': 5,
42 | 'detected_communicating_samples': 5,
43 | 'detected_urls': 5,
44 | },
45 | 'categorization': {},
46 | },
47 | ],
48 | [
49 | {
50 | 'domain': 'evil.example.co.uk',
51 | 'response_code': 1,
52 | 'detections': {
53 | 'undetected_referrer_samples': 0,
54 | 'undetected_communicating_samples': 0,
55 | 'detected_downloaded_samples': 4,
56 | 'detected_referrer_samples': 5,
57 | 'detected_communicating_samples': 5,
58 | 'detected_urls': 6,
59 | },
60 | 'categorization': {},
61 | },
62 | ],
63 | ]
64 | output_blobs = self.run_test(LookupDomainsFilter, input_blobs=input_blobs)
65 | self.assert_key_added_to_blob('osxcollector_vtdomain', expected_vtdomains, input_blobs, output_blobs)
66 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/virustotal/lookup_hashes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # LookupHashesFilter uses VirusTotal to lookup the values in 'sha2' and add 'osxcollector_vthash' key.
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | from threat_intel.virustotal import VirusTotalApi
10 |
11 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
12 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
13 | from osxcollector.output_filters.util.config import config_get_deep
14 |
15 |
16 | class LookupHashesFilter(ThreatFeedFilter):
17 |
18 | """A class to lookup hashes using VirusTotal API."""
19 |
20 | def __init__(self, lookup_when=None, **kwargs):
21 | super(LookupHashesFilter, self).__init__(
22 | 'sha2',
23 | 'osxcollector_vthash', lookup_when=lookup_when,
24 | name_of_api_key='virustotal', **kwargs
25 | )
26 |
27 | def _lookup_iocs(self, all_iocs, resource_per_req=25):
28 | """Caches the VirusTotal info for a set of hashes.
29 |
30 | Args:
31 | all_iocs - a list of hashes.
32 | Returns:
33 | A dict with hash as key and threat info as value
34 | """
35 | threat_info = {}
36 |
37 | cache_file_name = config_get_deep('virustotal.LookupHashesFilter.cache_file_name', None)
38 | vt = VirusTotalApi(self._api_key, resource_per_req, cache_file_name=cache_file_name)
39 | reports = vt.get_file_reports(all_iocs)
40 |
41 | for hash_val in reports:
42 | report = reports[hash_val]
43 | if not report:
44 | continue
45 | if self._should_store_ioc_info(report):
46 | threat_info[hash_val] = self._trim_hash_report(report)
47 |
48 | return threat_info
49 |
50 | def _should_store_ioc_info(self, report, min_hits=1):
51 | """Only store if the hash has > min_hits positive detections.
52 |
53 | Args:
54 | report - A dict response from get_file_reports
55 | min_hits - Minimum number of VT positives
56 | Returns:
57 | boolean
58 | """
59 | return 1 == report.get('response_code') and min_hits < report.get('positives', 0)
60 |
61 | def _trim_hash_report(self, report):
62 | """Copy just the required keys from the report into a new report.
63 |
64 | Args:
65 | report - A dict response from get_file_reports
66 | Returns:
67 | A smaller dict
68 | """
69 | copy_keys = [
70 | 'scan_id',
71 | 'sha1',
72 | 'sha256',
73 | 'md5',
74 | 'scan_date',
75 | 'permalink',
76 | 'positives',
77 | 'total',
78 | 'response_code',
79 | ]
80 |
81 | return dict([(key, report.get(key)) for key in copy_keys])
82 |
83 |
84 | def main():
85 | run_filter_main(LookupHashesFilter)
86 |
87 |
88 | if __name__ == '__main__':
89 | main()
90 |
--------------------------------------------------------------------------------
/tests/output_filters/virustotal/lookup_hashes_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.virustotal.lookup_hashes import LookupHashesFilter
6 | from tests.output_filters.run_filter_test import RunFilterTest
7 |
8 |
9 | class TestLookupHashesFilter(RunFilterTest):
10 |
11 | def test_no_hashes(self):
12 | input_blobs = [
13 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}},
14 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}},
15 | ]
16 | self.run_test(LookupHashesFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
17 |
18 | def test_benign_hashes(self):
19 | input_blobs = [
20 | {'sha2': 'b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7', 'dingo': 'bingo', 'apple': [3, 14]},
21 | {'sha2': '52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c', 'bingo': 'bongo', 'orange': 'banana'},
22 | ]
23 | self.run_test(LookupHashesFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs)
24 |
25 | def test_suspicious_hashes(self):
26 | input_blobs = [
27 | {'sha2': 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302', 'dingo': 'bingo', 'apple': [3, 14]},
28 | {'sha2': '6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360', 'bingo': 'bongo', 'orange': 'banana'},
29 | ]
30 | expected_vthashes = [
31 | [
32 | {
33 | 'scan_id': 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302-1273894724',
34 | 'sha1': 'da9b79f2fd33d002033b69a9a346af4671a9e16b',
35 | 'sha256': 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302',
36 | 'md5': '0c71d8cedc8bbb2b619a76d1478c4348',
37 | 'scan_date': '2015-01-15 16:42:01',
38 | 'permalink': 'https://www.virustotal.com/file/'
39 | 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302/analysis/1273894724/',
40 | 'total': 40,
41 | 'positives': 40,
42 | 'response_code': 1,
43 | },
44 | ],
45 | [
46 | {
47 | 'scan_id': '52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c-1273894724',
48 | 'sha1': '92e3750a9f0eef6290dd83867eff88064e9c01bb',
49 | 'sha256': '6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360',
50 | 'md5': '06506cc06cf0167ea583de62c98eae2c',
51 | 'scan_date': '2010-05-15 03:38:44',
52 | 'permalink': 'https://www.virustotal.com/file/'
53 | '6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360/analysis/1273894724/',
54 | 'total': 40,
55 | 'positives': 40,
56 | 'response_code': 1,
57 | },
58 | ],
59 | ]
60 | output_blobs = self.run_test(LookupHashesFilter, input_blobs=input_blobs)
61 | self.assert_key_added_to_blob('osxcollector_vthash', expected_vthashes, input_blobs, output_blobs)
62 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/firefox/sort_history.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # SortHistoryFilter creates a clean sorted Firefox browser history and tags lines with {'osxcollector_browser_history': 'firefox'}
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | import copy
10 |
11 | import six
12 |
13 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
14 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
15 |
16 |
17 | class SortHistoryFilter(OutputFilter):
18 |
19 | """Joins Firefox browser history 'visits' and 'urls' tables, producing a time sorted browser history.
20 |
21 | In the output look for lines where:
22 | ('osxcollector_section' == 'chrome' and 'osxcollector_subsection' == 'history' and 'osxcollector_table_name' == 'visits')
23 | for some snazzy browser history stuff.
24 | """
25 |
26 | def __init__(self, **kwargs):
27 | super(SortHistoryFilter, self).__init__(**kwargs)
28 |
29 | self._visits_table = dict()
30 | self._places_table = dict()
31 |
32 | def filter_line(self, blob):
33 | """Cache the 'visits' and 'urls' tables."""
34 | if 'firefox' == blob.get('osxcollector_section') and 'history' == blob.get('osxcollector_subsection'):
35 | table = blob.get('osxcollector_table_name')
36 |
37 | if 'moz_historyvisits' == table:
38 | if self._validate_visit(blob):
39 | self._visits_table[blob['place_id']] = blob
40 | blob = None # Consume the line
41 | elif 'moz_places' == table:
42 | if self._validate_places(blob):
43 | self._places_table[blob['id']] = blob
44 | blob = None # Consume the line
45 |
46 | return blob
47 |
48 | def end_of_lines(self):
49 | """Join the 'visits' and 'urls' tables into a single browser history and timeline."""
50 | history = list()
51 |
52 | for visit in six.itervalues(self._visits_table):
53 | place = self._places_table.get(visit.get('place_id'))
54 | if place:
55 | add_keys = [key for key in visit if key not in place]
56 | record = copy.deepcopy(place)
57 | for key in add_keys:
58 | record[key] = visit[key]
59 | record['osxcollector_browser_history'] = 'firefox'
60 | history.append(record)
61 |
62 | return sorted(history, key=lambda x: x['last_visit_date'], reverse=True)
63 |
64 | @classmethod
65 | def _validate_visit(cls, blob):
66 | """Does the visit dict have the required fields?
67 |
68 | Args:
69 | blob: a visit dict
70 | Returns:
71 | boolean
72 | """
73 | required_fields = ['place_id']
74 | return all([field in blob for field in required_fields])
75 |
76 | @classmethod
77 | def _validate_places(cls, blob):
78 | """Does the place dict have the required fields?
79 |
80 | Args:
81 | blob: a place dict
82 | Returns:
83 | boolean
84 | """
85 | required_fields = ['id']
86 | return all([field in blob for field in required_fields])
87 |
88 |
89 | def main():
90 | run_filter_main(SortHistoryFilter)
91 |
92 |
93 | if __name__ == '__main__':
94 | main()
95 |
--------------------------------------------------------------------------------
/tests/output_filters/shadowserver/lookup_hashes_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.shadowserver.lookup_hashes import LookupHashesFilter
6 | from tests.output_filters.run_filter_test import RunFilterTest
7 |
8 |
9 | class TestLookupHashesFilter(RunFilterTest):
10 |
11 | def setup_method(self, method):
12 | self._known_sha1_input = [
13 | {
14 | 'sha2': '1fafe48f626fdc030b0a0efc1008d51cd3078d1b3ec95f808d12afbfef458b23',
15 | 'sha1': '5d87de61cb368c93325dd910c202b8647f8e90ed',
16 | 'ctime': '2014-12-05 16:50:48',
17 | 'osxcollector_plist_path': '/System/Library/Extensions/System.kext/PlugIns/Libkern.kext/Info.plist',
18 | 'mtime': '2014-09-19 00:42:35',
19 | 'osxcollector_incident_id': 'RecalibratedTurnip-2014_12_21-18_49_52',
20 | 'osxcollector_section': 'kext',
21 | 'osxcollector_bundle_id': 'com.apple.kpi.libkern',
22 | 'file_path': '/System/Library/Extensions/System.kext/PlugIns/Libkern.kext/Libkern',
23 | 'md5': '6746005c822ceb6737b871698d3ed22f',
24 | },
25 | ]
26 | self._unknown_sha1_input = [
27 | {
28 | 'sha2': '5148211a7bc4a5d02913b0037805f20704f329e1739b5a6d2338fc84c1780b71',
29 | 'sha1': '816a85d89ae34d2dc73b8c768eecb03935c568ba',
30 | 'ctime': '2014-12-05 16:53:07',
31 | 'osxcollector_plist_path': '/System/Library/Extensions/AMDRadeonX3000GLDriver.bundle/Contents/Info.plist',
32 | 'mtime': '2014-09-28 22:34:42',
33 | 'osxcollector_incident_id': 'RecalibratedTurnip-2014_12_21-18_49_52',
34 | 'osxcollector_section': 'kext',
35 | 'osxcollector_bundle_id': 'com.apple.AMDRadeonX3000GLDriver',
36 | 'file_path': '/System/Library/Extensions/AMDRadeonX3000GLDriver.bundle/Contents/MacOS/AMDRadeonX3000GLDriver',
37 | 'md5': '967698d9ad4171bed991df85e1c72e56',
38 | },
39 | ]
40 |
41 | def test_no_match(self):
42 | output_blobs = self.run_test(LookupHashesFilter, self._unknown_sha1_input)
43 | assert len(output_blobs) == 1
44 | assert 'osxcollector_shadowserver' not in output_blobs[0]
45 |
46 | def test_known_match(self):
47 | output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input)
48 | assert len(output_blobs) == 1
49 | assert 'osxcollector_shadowserver' in output_blobs[0]
50 |
51 | def test_known_match_different_path_prefix(self):
52 | self._known_sha1_input[0]['file_path'] = '/new_path/Libkern'
53 | output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input)
54 | assert len(output_blobs) == 1
55 | assert 'osxcollector_shadowserver' in output_blobs[0]
56 |
57 | def test_wrong_filename(self):
58 | """Change the filename and don't match"""
59 | self._known_sha1_input[0]['file_path'] = 'wrong_name'
60 | output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input)
61 | assert len(output_blobs) == 1
62 | assert 'osxcollector_shadowserver' not in output_blobs[0]
63 |
64 | def test_partial_filename(self):
65 | """Change the filename and don't match"""
66 | self._known_sha1_input[0]['file_path'] = '/System/Library/Extensions/System.kext/PlugIns/Libkern.kext/Not_Quite_Libkern'
67 | output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input)
68 | assert len(output_blobs) == 1
69 | assert 'osxcollector_shadowserver' not in output_blobs[0]
70 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/virustotal/lookup_urls.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # LookupURLsFilter uses VirusTotal to lookup the URLs in 'LSQuarantineDataURLString' and add 'osxcollector_vturl' key.
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | import re
10 |
11 | from threat_intel import VirusTotalApi
12 |
13 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
14 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
15 | from osxcollector.output_filters.util.config import config_get_deep
16 |
17 |
18 | class LookupURLsFilter(ThreatFeedFilter):
19 |
20 | """A class to find suspicious URLs using VirusTotal API."""
21 |
22 | SCHEMES = re.compile('https?')
23 |
24 | def __init__(self, lookup_when=None, **kwargs):
25 | lookup_when_url_scheme_matches = self._generate_lookup_when(lookup_when)
26 | super(LookupURLsFilter, self).__init__(
27 | 'LSQuarantineDataURLString', 'osxcollector_vturl',
28 | lookup_when=lookup_when_url_scheme_matches,
29 | name_of_api_key='virustotal', **kwargs
30 | )
31 |
32 | def _generate_lookup_when(self, only_lookup_when):
33 | """Generates functions that checks whether the blob contains a valid URL
34 | in LSQuarantineDataURLString field.
35 | """
36 | def check_url_scheme(blob):
37 | return self.SCHEMES.match(blob['LSQuarantineDataURLString']) and (not only_lookup_when or only_lookup_when(blob))
38 | return check_url_scheme
39 |
40 | def _lookup_iocs(self, all_iocs, resource_per_req=25):
41 | """Caches the VirusTotal info for a set of URLs.
42 |
43 | Args:
44 | all_iocs - a list of URLs.
45 | Returns:
46 | A dict with URL as key and threat info as value
47 | """
48 | threat_info = {}
49 |
50 | cache_file_name = config_get_deep('virustotal.LookupURLsFilter.cache_file_name', None)
51 | vt = VirusTotalApi(self._api_key, resource_per_req, cache_file_name=cache_file_name)
52 | reports = vt.get_url_reports(all_iocs)
53 |
54 | for url in reports:
55 | report = reports[url]
56 | if not report:
57 | continue
58 | if self._should_store_ioc_info(report):
59 | threat_info[url] = self._trim_url_report(report)
60 |
61 | return threat_info
62 |
63 | def _should_store_ioc_info(self, report, min_hits=1):
64 | """Only store if the hash has > min_hits positive detections.
65 |
66 | Args:
67 | report - A dict response from get_url_reports
68 | min_hits - Minimum number of VT positives
69 | Returns:
70 | boolean
71 | """
72 | return 1 == report.get('response_code') and min_hits < report.get('positives', 0)
73 |
74 | def _trim_url_report(self, report):
75 | """Copy just the required keys from the report into a new report.
76 |
77 | Args:
78 | report - A dict response from get_url_reports
79 | Returns:
80 | A smaller dict
81 | """
82 | copy_keys = [
83 | 'scan_id',
84 | 'resource',
85 | 'url',
86 | 'scan_date',
87 | 'permalink',
88 | 'positives',
89 | 'total',
90 | 'response_code',
91 | ]
92 |
93 | return dict([(key, report.get(key)) for key in copy_keys])
94 |
95 |
96 | def main():
97 | run_filter_main(LookupURLsFilter)
98 |
99 |
100 | if __name__ == '__main__':
101 | main()
102 |
--------------------------------------------------------------------------------
/tests/output_filters/opendns/related_domains_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.opendns.related_domains import RelatedDomainsFilter
6 | from tests.output_filters.run_filter_test import RunFilterTest
7 |
8 |
9 | class TestRelatedDomainsFilter(RunFilterTest):
10 |
11 | def setup_method(self, method):
12 | self._initial_domains = ['zendesk.com', 'jpmorganaccess.com', 'opendns.zendesk.com', 'yelp.com']
13 | self._initial_ips = ['159.53.60.177']
14 |
15 | def _run_test(self, input_blobs, expected_relateddomains):
16 | def create_filter():
17 | return RelatedDomainsFilter(initial_domains=self._initial_domains, initial_ips=self._initial_ips)
18 | output_blobs = self.run_test(create_filter, input_blobs=input_blobs)
19 | self.assert_key_added_to_blob('osxcollector_related', expected_relateddomains, input_blobs, output_blobs)
20 |
21 | def test_no_domains(self):
22 | input_blobs = [
23 | {'tater': 'tots'},
24 | ]
25 | expected_relateddomains = None
26 | self._run_test(input_blobs, expected_relateddomains)
27 |
28 | def test_direct_domain_match(self):
29 | # Direct meaning the domain in the input is an initial domain
30 | input_blobs = [
31 | {'osxcollector_domains': ['opendns.zendesk.com']},
32 | ]
33 | expected_relateddomains = [
34 | {
35 | 'domains': {'opendns.zendesk.com': ['opendns.zendesk.com']},
36 | },
37 | ]
38 | self._run_test(input_blobs, expected_relateddomains)
39 |
40 | def test_related_domain_match(self):
41 | input_blobs = [
42 | {'osxcollector_domains': ['webmd.com']},
43 | ]
44 | expected_relateddomains = [
45 | {
46 | 'domains': {'webmd.com': ['opendns.zendesk.com', 'zendesk.com']},
47 | },
48 | ]
49 | self._run_test(input_blobs, expected_relateddomains)
50 |
51 | def test_multiple_related_domain_match(self):
52 | input_blobs = [
53 | {'osxcollector_domains': ['webmd.com', 'hushmail.zendesk.com']},
54 | ]
55 | expected_relateddomains = [
56 | {
57 | 'domains':
58 | {
59 | 'webmd.com': ['opendns.zendesk.com', 'zendesk.com'],
60 | 'hushmail.zendesk.com': ['opendns.zendesk.com'],
61 | },
62 | },
63 | ]
64 | self._run_test(input_blobs, expected_relateddomains)
65 |
66 | def test_direct_and_related_domain_match(self):
67 | input_blobs = [
68 | {'osxcollector_domains': ['zendesk.com']},
69 | ]
70 | expected_relateddomains = [
71 | {
72 | 'domains': {'zendesk.com': ['opendns.zendesk.com', 'zendesk.com']},
73 | },
74 | ]
75 | self._run_test(input_blobs, expected_relateddomains)
76 |
77 | def test_direct_ip_match(self):
78 | input_blobs = [
79 | {'osxcollector_domains': ['jpmorganaccess.com']},
80 | ]
81 | expected_relateddomains = [
82 | {
83 | 'domains': {'jpmorganaccess.com': ['159.53.60.177', 'jpmorganaccess.com', 'opendns.zendesk.com', 'zendesk.com']},
84 | },
85 | ]
86 | self._run_test(input_blobs, expected_relateddomains)
87 |
88 | def test_whitelist_domain(self):
89 | input_blobs = [
90 | {'osxcollector_domains': ['yelp.com']},
91 | ]
92 | expected_relateddomains = [
93 | None,
94 | ]
95 | self._run_test(input_blobs, expected_relateddomains)
96 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/related_files.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # RelatedFilesFilter finds files related to specific terms or file names.
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | import os.path
10 | from argparse import ArgumentParser
11 |
12 | import simplejson
13 |
14 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
15 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
16 | from osxcollector.output_filters.util.dict_utils import DictUtils
17 |
18 |
19 | class RelatedFilesFilter(OutputFilter):
20 |
21 | """RelatedFilesFilter finds files related to specific terms or file names.
22 |
23 | The file paths passed to the filter during creation are split into arrays of
24 | directory or file names. Anything matching a stop list of common directory names
25 | is discarded.
26 | """
27 |
28 | def __init__(self, when=None, file_terms=None, **kwargs):
29 | super(RelatedFilesFilter, self).__init__()
30 | self._all_blobs = list()
31 | self._terms = set()
32 | self._usernames = set()
33 |
34 | self._when = when
35 |
36 | if file_terms:
37 | for val in file_terms:
38 | self._create_terms(val)
39 |
40 | def _create_terms(self, val):
41 | for term in os.path.normpath(val.lower()).split(os.path.sep):
42 | if len(term) > 1 and term not in self.STOP_WORDS:
43 | self._terms.add(term)
44 |
45 | def filter_line(self, blob):
46 | self._all_blobs.append(blob)
47 |
48 | if self._when and self._when(blob):
49 | for key in self.FILE_NAME_KEYS:
50 | val = DictUtils.get_deep(blob, key)
51 | if val:
52 | self._create_terms(val)
53 | if 'osxcollector_username' in blob:
54 | self._usernames.add(blob['osxcollector_username'].lower())
55 |
56 | return None
57 |
58 | def end_of_lines(self):
59 | self._terms = self._terms - self._usernames
60 |
61 | for blob in self._all_blobs:
62 | line = simplejson.dumps(blob).lower()
63 | for term in self._terms:
64 | if term in line:
65 | blob.setdefault('osxcollector_related', {})
66 | blob['osxcollector_related'].setdefault('files', [])
67 | blob['osxcollector_related']['files'].append(term)
68 |
69 | return self._all_blobs
70 |
71 | def get_argument_parser(self):
72 | parser = ArgumentParser()
73 | group = parser.add_argument_group('RelatedFilesFilter')
74 | group.add_argument(
75 | '-f', '--file-term', dest='file_terms', default=[], action='append',
76 | help='[OPTIONAL] Suspicious terms to use in pivoting through file names. May be specified more than once.',
77 | )
78 | return parser
79 |
80 | @property
81 | def terms(self):
82 | return self._terms
83 |
84 | @property
85 | def usernames(self):
86 | return self._usernames
87 |
88 | # Keys to look in to find file paths
89 | FILE_NAME_KEYS = [
90 | 'file_path',
91 | 'osxcollector_plist_path',
92 | ]
93 |
94 | # Words that can never be terms
95 | STOP_WORDS = [
96 | 'applications',
97 | 'bin',
98 | 'contents',
99 | 'cores',
100 | 'coreservices',
101 | 'dev',
102 | 'downloads',
103 | 'extensions',
104 | 'frameworks',
105 | 'helpers',
106 | 'home',
107 | 'information',
108 | 'libexec',
109 | 'libraries',
110 | 'library',
111 | 'macos',
112 | 'malware',
113 | 'net',
114 | 'network',
115 | 'opt',
116 | 'plugins',
117 | 'private',
118 | 'privateframeworks',
119 | 'python',
120 | 'resources',
121 | 'sbin',
122 | 'support',
123 | 'system',
124 | 'tmp',
125 | 'user',
126 | 'users',
127 | 'usr',
128 | 'utilities',
129 | 'versions',
130 | 'var',
131 | ]
132 |
133 |
134 | def main():
135 | run_filter_main(RelatedFilesFilter)
136 |
137 |
138 | if __name__ == '__main__':
139 | main()
140 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/find_domains.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # FindDomainsFilter looks for domains in all input lines and adds those domains into the 'osxcollector_domains' key.
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | import logging
10 | import re
11 |
12 | import six
13 | from six.moves.urllib.parse import unquote_plus
14 | from six.moves.urllib.parse import urlsplit
15 |
16 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
17 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
18 | from osxcollector.output_filters.exceptions import BadDomainError
19 | from osxcollector.output_filters.util.domains import clean_domain
20 | from osxcollector.output_filters.util.domains import expand_domain
21 |
22 |
23 | class FindDomainsFilter(OutputFilter):
24 |
25 | """Adds 'osxcollector_domains' key to output lines.
26 |
27 | This filters parses domains out of anywhere in an output line and adds them to a clean array in the line.
28 | This is helpful as a pre-processing step before sending the domains off to threat APIs or matching against
29 | threat feeds.
30 | """
31 |
32 | def __init__(self, **kwargs):
33 | super(FindDomainsFilter, self).__init__(**kwargs)
34 | self._domains = set()
35 |
36 | def filter_line(self, blob):
37 | """Find domains in a line."""
38 | self._domains = set()
39 | self._look_for_domains(blob)
40 |
41 | # self._domains accumulates domains during calls to _look_for_domains
42 | if len(self._domains):
43 | blob['osxcollector_domains'] = sorted(list(self._domains))
44 |
45 | return blob
46 |
47 | def _look_for_domains(self, val, key=None):
48 | """Given a value and perhaps a key, look for domains.
49 |
50 | Args:
51 | val: The value, could be of any type
52 | key: A string key associated with the value.
53 | """
54 | if isinstance(val, six.string_types):
55 | if key in self.HOST_KEYS:
56 | self._add_domain(val)
57 | return
58 | if -1 != self.SCHEMES.search(val):
59 | # Sometimes values are complex strings, like JSON or pickle encoded stuff.
60 | # Try splitting the string on non-URL related punctuation
61 | for maybe_url in re.split(r'[ \'\(\)\"\[\]\{\}\;\n\t#@\^&\*=]+', val):
62 | domain = self._url_to_domain(maybe_url)
63 | self._add_domain(domain)
64 | elif isinstance(val, list):
65 | for elem in val:
66 | self._look_for_domains(elem)
67 | elif isinstance(val, dict):
68 | for key, elem in six.iteritems(val):
69 | self._look_for_domains(elem, key)
70 | self._look_for_domains(key)
71 |
72 | def _url_to_domain(self, maybe_url):
73 | """Converts an URL to a domain.
74 |
75 | The code deals with eccentricities of both unquote_plus and split_url.
76 |
77 | Args:
78 | maybe_url - a string that might be an URL.
79 | Returns:
80 | a string representing the domain or None
81 | """
82 | if self.SCHEMES.match(maybe_url):
83 | url = unquote_plus(maybe_url)
84 |
85 | try:
86 | split_url = urlsplit(url)
87 | if split_url.hostname:
88 | return split_url.hostname
89 | # in case "url" is not a valid URL, just log a message
90 | except ValueError as e:
91 | logging.info('Cannot split the URL: {0}. Hint: {1}'.format(url, e))
92 |
93 | return None
94 |
95 | def _add_domain(self, domain):
96 | """Clean a domain and store it internally"""
97 | if not domain:
98 | return
99 |
100 | try:
101 | domain = clean_domain(domain)
102 | for extracted in expand_domain(domain):
103 | self._domains.add(extracted)
104 | except BadDomainError:
105 | pass
106 |
107 | SCHEMES = re.compile('((https?)|ftp)')
108 | HOST_KEYS = ['host', 'host_key', 'baseDomain']
109 |
110 |
111 | def main():
112 | run_filter_main(FindDomainsFilter)
113 |
114 |
115 | if __name__ == '__main__':
116 | main()
117 |
--------------------------------------------------------------------------------
/tests/output_filters/run_filter_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | import simplejson
6 | from mock import patch
7 | from six import StringIO
8 |
9 | from osxcollector.output_filters.base_filters.output_filter import _run_filter
10 |
11 |
12 | class RunFilterTest:
13 |
14 | def run_test(self, create_filter, input_blobs=None, expected_output_blobs=None):
15 | """Mocks out stdin, stdout, and config then runs input lines through an OutputFilter.
16 |
17 | Args:
18 | create_filter: A callable that returns an OutputFilter.
19 | input_blobs: An array of dicts to pass to OutputFilter. These will be serialized into strings and passed as stdin.
20 | expected_output_blobs: An array of dicts the output of the OutputFilter must match.
21 | """
22 | if not input_blobs:
23 | input_blobs = []
24 | input_lines = '\n'.join([simplejson.dumps(blob) for blob in input_blobs])
25 |
26 | with patch(
27 | 'sys.stdin', StringIO(input_lines),
28 | ), patch(
29 | 'sys.stdout', new_callable=StringIO,
30 | ) as mock_stdout, patch(
31 | 'osxcollector.output_filters.util.config._config_file_path',
32 | return_value='./tests/output_filters/data/test_osxcollector_config.yaml',
33 | ):
34 | output_filter = create_filter()
35 | _run_filter(output_filter)
36 | output_lines = [line for line in mock_stdout.getvalue().split('\n') if len(line)]
37 | output_blobs = [simplejson.loads(line) for line in output_lines]
38 |
39 | if expected_output_blobs:
40 | assert len(output_blobs) == len(expected_output_blobs)
41 |
42 | for expected_blob, actual_blob in zip(expected_output_blobs[1:], output_blobs[1:]):
43 | assert_equal_sorted(expected_blob, actual_blob)
44 |
45 | return output_blobs
46 |
47 | def assert_key_added_to_blob(self, added_key, expected_values, input_blobs, output_blobs):
48 | """Verifies that a single key has been added to each input_blob with an expected value.
49 |
50 | Asserts that effectively:
51 | output_blobs = [input_blob.update(key=expected_value) for expected_value, input_blob in zip(expected_values, input_blobs)]
52 |
53 | Args:
54 | added_key: The name of the key that should have been added.
55 | expected_values: A list containing the expected value of the key for each input_blob
56 | input_blobs: A list of dicts that were the initial input.
57 | output_blobs: A list of dicts that are the output.
58 | """
59 |
60 | if expected_values:
61 | actual_values = list(blob.get(added_key, None) for blob in output_blobs)
62 | for actual, expected in zip(actual_values, expected_values):
63 | assert_equal_sorted(actual, expected)
64 |
65 | # Minus the added key, the input should be unchanged
66 | for input_blob, output_blob in zip(input_blobs, output_blobs):
67 | if added_key in output_blob:
68 | del output_blob[added_key]
69 | assert_equal_sorted(input_blob, output_blob)
70 |
71 | def load_reports(self, filename):
72 | with open(filename, 'r') as fp:
73 | file_contents = fp.read()
74 | reports = simplejson.loads(file_contents)
75 | return reports
76 |
77 |
78 | def assert_equal_sorted(a, b):
79 | """A version of T.assert_equal that ignores the ordering of lists or sets.
80 |
81 | Args:
82 | a: first item to compare
83 | b: next time to compare
84 | Raises:
85 | assert when items don't match
86 | """
87 | assert sort_for_comparison(a) == sort_for_comparison(b)
88 |
89 |
90 | def sort_for_comparison(val):
91 | """Sort the input if it is a list or dict or set, return it unchanged otherwise.
92 |
93 | Args:
94 | val: A value of any type
95 | Returns:
96 | A more easily comparable version of the input
97 | """
98 | if isinstance(val, list):
99 | try:
100 | return sorted(val)
101 | except Exception:
102 | return val
103 | elif isinstance(val, set):
104 | return sort_for_comparison(list(val))
105 | elif isinstance(val, dict):
106 | for key in val:
107 | val[key] = sort_for_comparison(val[key])
108 | return val
109 | else:
110 | return val
111 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/base_filters/chain.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # ChainFilter is a base class that passes each line through a chain of OutputFilters.
4 | #
5 | from __future__ import absolute_import
6 | from __future__ import unicode_literals
7 |
8 | from argparse import ArgumentParser
9 |
10 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
11 |
12 |
13 | class ChainFilter(OutputFilter):
14 |
15 | """ChainFilter is a base class that passes each line through a chain of OutputFilters.
16 |
17 | This is useful for constructing a single OutputFilter that does multiple things without
18 | having to run `python -m FilterOne | python -m FilterTwo | python -m FilterThree`.
19 | """
20 |
21 | def __init__(self, chain, **kwargs):
22 | """Adds the property _next_link to each OutputFilter in the chain.
23 |
24 | Treating the chain as a linked list makes it easy to know which filter runs after the current filter.
25 | _next_link should be present and have a value of None for the final link in the chain.
26 |
27 | Args:
28 | chain: An enumerable of OutputFilters.
29 | """
30 | super(ChainFilter, self).__init__(**kwargs)
31 |
32 | prev_link = None
33 | for cur_link in chain:
34 | if prev_link:
35 | prev_link._next_link = cur_link
36 | cur_link._next_link = None
37 | prev_link = cur_link
38 |
39 | self._head_of_chain = chain[0]
40 |
41 | def filter_line(self, blob):
42 | """Each Line of OSXCollector output will be passed to filter_line.
43 |
44 | Passes the line to the filter at the head of the chain. Output from each filter flows to it's _next_link.
45 |
46 | Args:
47 | blob: A dict representing one line of output from OSXCollector.
48 | Returns:
49 | A dict or None
50 | """
51 | return self._on_filter_line(blob, self._head_of_chain)
52 |
53 | def _on_filter_line(self, blob, link):
54 | """Pass the line to a link in the chain and pass any output to the next link.
55 |
56 | Args:
57 | blob: A dict representing one line of output from OSXCollector.
58 | link: An OutputFilter
59 | Returns:
60 | A dict or None
61 | """
62 | if not link or not blob:
63 | return blob
64 | return self._on_filter_line(link.filter_line(blob), link._next_link)
65 |
66 | def end_of_lines(self):
67 | """Pass end_of_lines to the filter at the head of the chain.
68 |
69 | Returns:
70 | An enumerable of dicts
71 | """
72 | return self._on_end_of_lines(self._head_of_chain)
73 |
74 | def _on_end_of_lines(self, link):
75 | """Pass end_of_lines to a link in the chain and pass any output to the next link.
76 |
77 | Args:
78 | link: An OutputFilter
79 | Returns:
80 | An enumerable of dicts
81 | """
82 | if not link._next_link:
83 | return link.end_of_lines()
84 |
85 | filtered_lines = []
86 | for blob in link.end_of_lines():
87 | filtered_line = self._on_filter_line(blob, link._next_link)
88 | if filtered_line:
89 | filtered_lines.append(filtered_line)
90 |
91 | final_lines = self._on_end_of_lines(link._next_link)
92 | if final_lines:
93 | filtered_lines.extend(final_lines)
94 |
95 | return filtered_lines
96 |
97 | def get_argument_parser(self):
98 | """Collects the ArgumentParsers from every OutputFilter in the chain.
99 |
100 | Returns:
101 | An `argparse.ArgumentParser`
102 | """
103 | parsers_to_chain = []
104 |
105 | cur_link = self._head_of_chain
106 | while cur_link:
107 | arg_parser = cur_link.get_argument_parser()
108 | if arg_parser:
109 | parsers_to_chain.append(arg_parser)
110 | cur_link = cur_link._next_link
111 |
112 | parser = self._on_get_argument_parser()
113 | if parser:
114 | parsers_to_chain.append(parser)
115 |
116 | if parsers_to_chain:
117 | return ArgumentParser(parents=parsers_to_chain, conflict_handler='resolve')
118 |
119 | return None
120 |
121 | def _on_get_argument_parser(self):
122 | """Returns an ArgumentParser with arguments for just this OutputFilter (not the contained chained OutputFilters).
123 |
124 | Returns:
125 | An `argparse.ArgumentParser`
126 | """
127 | return None
128 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/virustotal/lookup_domains.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # LookupDomainsFilter uses VirusTotal to lookup the values in 'osxcollector_domains' and add 'osxcollector_vtdomain' key.
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | from threat_intel.virustotal import VirusTotalApi
10 |
11 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
12 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
13 | from osxcollector.output_filters.util.blacklist import create_blacklist
14 | from osxcollector.output_filters.util.config import config_get_deep
15 |
16 |
17 | class LookupDomainsFilter(ThreatFeedFilter):
18 |
19 | """A class to lookup hashes using VirusTotal API."""
20 |
21 | def __init__(self, lookup_when=None, **kwargs):
22 | super(LookupDomainsFilter, self).__init__(
23 | 'osxcollector_domains', 'osxcollector_vtdomain',
24 | lookup_when=lookup_when, name_of_api_key='virustotal', **kwargs
25 | )
26 | self._whitelist = create_blacklist(
27 | config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}),
28 | )
29 |
30 | def _lookup_iocs(self, all_iocs, resource_per_req=25):
31 | """Caches the VirusTotal info for a set of domains.
32 |
33 | Domains on a whitelist will be ignored.
34 |
35 | Args:
36 | all_iocs - a list of domains.
37 | Returns:
38 | A dict with domain as key and threat info as value
39 | """
40 | threat_info = {}
41 |
42 | cache_file_name = config_get_deep('virustotal.LookupDomainsFilter.cache_file_name', None)
43 | vt = VirusTotalApi(self._api_key, resource_per_req, cache_file_name=cache_file_name)
44 |
45 | iocs = [x for x in all_iocs if not self._whitelist.match_values(x)]
46 | reports = vt.get_domain_reports(iocs)
47 | for domain in reports:
48 | if not reports[domain]:
49 | continue
50 |
51 | trimmed_report = self._trim_domain_report(domain, reports[domain])
52 | if self._should_store_ioc_info(trimmed_report):
53 | threat_info[domain] = trimmed_report
54 |
55 | return threat_info
56 |
57 | def _should_store_ioc_info(self, trimmed_report):
58 | """Decide whether a report from VT is interesting enough to store in the output.
59 |
60 | Args:
61 | trimmed_report: A dict of data from VT
62 | Returns:
63 | boolean
64 | """
65 | sample_keys = [
66 | ('detected_downloaded_samples', 3),
67 | ('detected_referrer_samples', 3),
68 | ('detected_communicating_samples', 3),
69 | ('detected_urls', 3),
70 | ]
71 | detections = trimmed_report.get('detections', {})
72 | for sample_key, threshold in sample_keys:
73 | if detections.get(sample_key, 0) >= threshold:
74 | return True
75 | return False
76 |
77 | def _trim_domain_report(self, domain, initial_report):
78 | """Reorganizes and compacts a VT domain report.
79 |
80 | Args:
81 | domain - string domain name
82 | initial_report - dict result of calling VirusTotalApi.get_domain_reports for the domain
83 |
84 | Returns:
85 | A reorganized and compacted dict.
86 | """
87 | trimmed_report = {}
88 |
89 | sample_keys = [
90 | 'undetected_referrer_samples',
91 | 'undetected_communicating_samples',
92 | 'detected_downloaded_samples',
93 | 'detected_referrer_samples',
94 | 'detected_communicating_samples',
95 | 'detected_urls',
96 | ]
97 | detections = {}
98 | for sample_key in sample_keys:
99 | detections[sample_key] = len(initial_report.get(sample_key, []))
100 | trimmed_report['detections'] = detections
101 |
102 | categorization_keys = [
103 | 'categories',
104 | 'BitDefender category',
105 | 'BitDefender domain info',
106 | 'Websense ThreatSeeker category',
107 | 'Webutation domain info',
108 | 'WOT domain info',
109 | 'TrendMicro category',
110 | ]
111 | categorization = {}
112 | for copy_key in categorization_keys:
113 | if copy_key in initial_report:
114 | categorization[copy_key] = initial_report[copy_key]
115 | trimmed_report['categorization'] = categorization
116 |
117 | just_copy_keys = [
118 | 'response_code',
119 | ]
120 | for copy_key in just_copy_keys:
121 | if copy_key in initial_report:
122 | trimmed_report[copy_key] = initial_report[copy_key]
123 |
124 | trimmed_report['domain'] = domain
125 |
126 | return trimmed_report
127 |
128 |
129 | def main():
130 | run_filter_main(LookupDomainsFilter)
131 |
132 |
133 | if __name__ == '__main__':
134 | main()
135 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/base_filters/threat_feed.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # ThreatFeedFilter is a base class to find info on IOCs using some random API.
4 | #
5 | from __future__ import absolute_import
6 | from __future__ import unicode_literals
7 |
8 | import six
9 |
10 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
11 | from osxcollector.output_filters.util.config import config_get_deep
12 |
13 |
14 | class ThreatFeedFilter(OutputFilter):
15 |
16 | """A base class to find info on IOCs using some random API.
17 |
18 | Derived classes need only to implement _lookup_iocs()
19 | If necessary, they should implement _should_add_threat_info_to_blob.
20 |
21 | It is assumed that the API uses an api_key stored in the config.
22 | """
23 |
24 | def __init__(self, ioc_key, output_key, lookup_when=None, name_of_api_key=None, **kwargs):
25 | """Configure the ThreatFeedFilter.
26 |
27 | Args:
28 | ioc_key: A string key to look for in each line of OSXCollector output.
29 | The value of this key is the potential IOC to lookup in a threat feed.
30 | output_key: A string key which is added to output lines and contains the result of threat feed lookups.
31 | lookup_when: A boolean function to call to decide whether to perform a lookup on a line.
32 | Use lookup_when to limit which IOCs are looked up.
33 | name_of_api_key: A string name of the key in the 'api_key' section of config.
34 | """
35 | super(ThreatFeedFilter, self).__init__(**kwargs)
36 |
37 | if name_of_api_key:
38 | self._api_key = config_get_deep('api_key.{0}'.format(name_of_api_key))
39 |
40 | self._lookup_when = lookup_when
41 | self._blobs_with_iocs = list()
42 | self.ioc_set = set()
43 |
44 | self._ioc_key = ioc_key
45 | self._output_key = output_key
46 |
47 | def _lookup_iocs(self, all_iocs):
48 | """Looks up threat info for IOCs.
49 |
50 | This is the only method a derived class needs to implement.
51 |
52 | Args:
53 | all_iocs: An enumerable of strings representing all IOCs to lookup.
54 | Returns:
55 | A dict of the form {ioc_value: threat_info}
56 | """
57 | raise NotImplementedError('Derived classes must implement _lookup_iocs')
58 |
59 | def _should_add_threat_info_to_blob(self, blob, threat_info):
60 | """Threat info is only added to a blob if this returns True.
61 |
62 | Override this method in derived classes to correlate threat_info and blob data.
63 |
64 | For example, the ShadowServer filter looks up SHA1 hashes. Since SHA1 hashes for different files collide, the ShadowServer
65 | filter overrides _should_add_threat_info_to_blob and verifies that the filename in the blob matches the filename in the threat
66 | info.
67 |
68 | Args:
69 | blob: A dict of data representing a line of output from OSXCollector
70 | threat_info: A dict of threat info.
71 | Returns:
72 | boolean
73 | """
74 | return True
75 |
76 | def filter_line(self, blob):
77 | """Accumulate IOCs to lookup with the ThreatFeed.
78 |
79 | Args:
80 | blob: A dict representing one line of output from OSXCollector.
81 | Returns:
82 | A dict or None
83 | """
84 | if self._ioc_key in blob and (not self._lookup_when or self._lookup_when(blob)):
85 | ioc_list = blob[self._ioc_key]
86 | if isinstance(ioc_list, six.string_types):
87 | ioc_list = [ioc_list]
88 |
89 | if len(ioc_list) > 10:
90 | return blob
91 |
92 | for ioc in ioc_list:
93 | if not len(ioc):
94 | continue
95 |
96 | self.ioc_set.add(ioc)
97 |
98 | self._blobs_with_iocs.append(blob)
99 | return None
100 | else:
101 | return blob
102 |
103 | def end_of_lines(self):
104 | """Performs threat feed lookup on the IOCs and adds output to the stored Lines.
105 |
106 | Returns:
107 | An enumerable of dicts
108 | """
109 | if self.ioc_set:
110 | self._add_threat_info_to_blobs()
111 | return self._blobs_with_iocs
112 |
113 | def _add_threat_info_to_blobs(self):
114 | """Adds threat info to blobs.
115 |
116 | Args:
117 | all_threat_info: A dict of the form {ioc_value: threat_info}
118 | """
119 | self.ioc_set = sorted(list(self.ioc_set))
120 | all_threat_info = self._lookup_iocs(self.ioc_set)
121 | for blob in self._blobs_with_iocs:
122 | ioc_list = blob[self._ioc_key]
123 | if isinstance(ioc_list, six.string_types):
124 | ioc_list = [ioc_list]
125 |
126 | for ioc in ioc_list:
127 | info = all_threat_info.get(ioc)
128 | if not info:
129 | continue
130 |
131 | if self._should_add_threat_info_to_blob(blob, info):
132 | blob.setdefault(self._output_key, [])
133 | blob[self._output_key].append(info)
134 |
--------------------------------------------------------------------------------
/tests/output_filters/data/cache.virustotal.LookupDomainsFilter.json:
--------------------------------------------------------------------------------
1 | {"virustotal-domain-reports": {"evil.example.com": {"detected_downloaded_samples": [{"positives": 10, "sha256": "c66e5a89051acf14fcec03618e4c00e9bfc095352bdd94ffa216a16041010aab", "total": 12}, {"positives": 9, "sha256": "4725839ffd5fd40205f8e255864031016a001cca8ff3574ddd2c6fd7ac6a23e8", "total": 10}, {"positives": 34, "sha256": "07598d5335710987284370cf9ce4a5a4a6bcc46b06429f0f7cc93714c73e7785", "total": 40}, {"positives": 10, "sha256": "1b7caa3073b83dadd52e38e5e833fbac4ab57253b2945f2e699ac253db2b4300", "total": 12}, {"positives": 88, "sha256": "8e6338540084c2118d1d032b83f0488bdb9a933f615c0d3c0e85863027072c92", "total": 100}], "detected_referrer_samples": [{"positives": 11, "sha256": "71e9d3cafeaca051deca9e1040d40af97850aec19ee1047f4ed7ff5ebb057247", "total": 53}, {"positives": 15, "sha256": "de46b5cbd9dc1446ade90b30524fc903a13c99b30bf534aca4ab4b07ad83943a", "total": 53}, {"positives": 16, "sha256": "c19fb354851a1ca670f374e1e60d50531dc44b78d5621732092946870ac79edc", "total": 53}, {"positives": 16, "sha256": "72c399c1ccd4597d77f2017aa38b094d098c2e3db97ff4f46244a0178a17030e", "total": 54}, {"positives": 14, "sha256": "b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7", "total": 54}], "response_code": 1, "detected_communicating_samples": [{"positives": 24, "date": "2014-05-30 02:31:16", "sha256": "31ce992017ae628e59fb0599330ba18483777f6f281c660b036649825296a3cb", "total": 52}, {"positives": 34, "date": "2014-04-29 23:16:50", "sha256": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302", "total": 51}, {"positives": 15, "date": "2013-07-09 06:23:40", "sha256": "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360", "total": 46}, {"positives": 34, "date": "2014-04-29 23:16:46", "sha256": "7db46a7eb4baeeb342d37a6fc05910adeed339450701c600ad973a77aa28b121", "total": 51}, {"positives": 15, "date": "2013-07-09 06:23:00", "sha256": "f33c27745f2bd87344be790465ef984a972fd539dc83bd4f61d4242c607ef1ee", "total": 46}], "undetected_communicating_samples": [], "detected_urls": [{"total": 23, "positives": 6, "url": "www.example.com/bingo", "scan_date": "2015-01-23 14:27"}, {"total": 25, "positives": 23, "url": "www.example.com/bongo", "scan_date": "2015-01-23 14:30"}, {"total": 14, "positives": 12, "url": "www.example.com/dingo", "scan_date": "2015-01-23 14:20"}, {"total": 20, "positives": 18, "url": "www.example.com/dongo", "scan_date": "2015-01-23 14:22"}, {"total": 20, "positives": 19, "url": "www.example.com/orange", "scan_date": "2015-01-23 14:28"}], "undetected_referrer_samples": []}, "good.example.com": {"detected_downloaded_samples": [], "detected_referrer_samples": [], "response_code": 0, "detected_communicating_samples": [], "undetected_communicating_samples": [], "detected_urls": [], "undetected_referrer_samples": []}, "good.example.co.uk": {"detected_downloaded_samples": [], "detected_referrer_samples": [], "response_code": 0, "detected_communicating_samples": [], "undetected_communicating_samples": [], "detected_urls": [], "undetected_referrer_samples": []}, "evil.example.co.uk": {"detected_downloaded_samples": [{"positives": 0, "date": "2014-07-29 09:46:22", "sha256": "3044d232d1815c9e1584f406b67c3331c0eaebd304cd280d578e75368e5b0c3a", "total": 54}, {"positives": 0, "date": "2013-05-16 08:57:51", "sha256": "f33c27745f2bd87344be790465ef984a972fd539dc83bd4f61d4242c607ef1ee", "total": 46}, {"positives": 0, "date": "2013-05-03 01:16:53", "sha256": "841f2c2faadf6a28aeb4fd29d7cd3a4156af20a68bd9ad7c2f41db64db06015f", "total": 46}, {"positives": 0, "date": "2013-04-27 20:50:18", "sha256": "02900e181b1941c79c73dadddbd03a8f6f974ca884baf5860cd5a54ac4fb97e1", "total": 46}], "detected_referrer_samples": [{"positives": 16, "sha256": "0de277bca1df07e691c865c84a0dfd849ac0124fab8f9ccde9c28fb3abe24abc", "total": 54}, {"positives": 16, "sha256": "ca34d60f2c1dc20932f2fb8adce1be2a8b9389054d67343ea4c86b9cc9ffabb0", "total": 54}, {"positives": 16, "sha256": "aefddcb96b75fe89195dbbfdd2c373f72492d5f71903eeeaae4afa1f71865515", "total": 54}, {"positives": 15, "sha256": "bee4aecc415e23328b9139b9abdb34a22b36e28da13c75aa8699e07fd7b10307", "total": 53}, {"positives": 16, "sha256": "c6065d39610471d970242a303c3e8905a7200c8aa2fd4bc69a95a3b618df4cb9", "total": 54}], "response_code": 1, "detected_communicating_samples": [{"positives": 24, "date": "2014-05-30 02:31:16", "sha256": "31ce992017ae628e59fb0599330ba18483777f6f281c660b036649825296a3cb", "total": 52}, {"positives": 34, "date": "2014-04-29 23:16:50", "sha256": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302", "total": 51}, {"positives": 15, "date": "2013-07-09 06:23:30", "sha256": "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360", "total": 46}, {"positives": 39, "date": "2014-04-29 23:16:50", "sha256": "b22eb5ff3793b551470f4758e4cff656b5168e10cfce24ac51ebd5a8a7fffe4a", "total": 51}, {"positives": 35, "date": "2013-07-09 06:23:30", "sha256": "26692ab17432ad292810c31ce4cee9e43c0166e23c2b05950751d52fc1decbd2", "total": 46}], "undetected_communicating_samples": [], "detected_urls": [{"total": 61, "positives": 3, "url": "http://www.example.co.uk/regents-park", "scan_date": "2014-11-27 13:56:37"}, {"total": 59, "positives": 3, "url": "http://www.example.co.uk/hyde-park", "scan_date": "2014-10-07 01:04:17"}, {"total": 59, "positives": 2, "url": "http://www.example.co.uk/st-jamess-park", "scan_date": "2014-10-07 00:22:03"}, {"total": 59, "positives": 2, "url": "http://www.example.co.uk/green-park", "scan_date": "2014-09-30 16:27:23"}, {"total": 58, "positives": 3, "url": "http://www.example.co.uk/the-royal-parks", "scan_date": "2014-08-08 18:23:25"}, {"total": 58, "positives": 3, "url": "http://www.example.co.uk/hampstead-heath", "scan_date": "2014-08-06 11:53:22"}], "undetected_referrer_samples": []}}}
--------------------------------------------------------------------------------
/osxcollector/output_filters/base_filters/output_filter.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # An OutputFilter transforms the output from OSXCollector. Every filter must derive from OutputFilter.
4 | #
5 | # _run_filter is a default implementation of a main that reads input from stdin, feeds it to an OutputFilter, and
6 | # spits the output to stdout.
7 | #
8 | from __future__ import absolute_import
9 | from __future__ import unicode_literals
10 |
11 | import sys
12 | from argparse import ArgumentParser
13 |
14 | import simplejson
15 | import six
16 |
17 | from osxcollector.output_filters.util.error_messages import write_exception
18 |
19 |
20 | class OutputFilter(object):
21 |
22 | """An OutputFilter transforms the output from OSXCollector. Every filter must derive from OutputFilter.
23 |
24 | The basic flow of data through an OutputFilter:
25 | - Each line of OSXCollector output is passed to OutputFilter.filter_line
26 | - After all lines have been passed to filter_line, a final call is made OutputFilter.to end_of_lines
27 |
28 | There are two common ways a filter deals with lines:
29 | - A filter that modifies each line independent of other lines could simply implement filter_line.
30 | - A filter that modifies each line based on other lines may want to accumulate lines until end_of_lines is called,
31 | then bulk operate on all lines at once.
32 |
33 | OutputFilters use the words 'line' or 'blob' to refer to OSXCollector output.
34 | """
35 |
36 | def __init__(self, **kwargs):
37 | """Skeleton for __init__
38 |
39 | Args:
40 | kwargs: Variable arguments are used to pass filter specific args to OutputFilters.
41 | """
42 | pass
43 |
44 | def filter_line(self, blob):
45 | """Each Line of OSXCollector output will be passed to filter_line.
46 |
47 | The OutputFilter should return the line, either modified or unmodified.
48 | The OutputFilter can also choose to return nothing, effectively swallowing the line.
49 |
50 | Args:
51 | blob: A dict representing one line of output from OSXCollector.
52 | Returns:
53 | A dict or None
54 | """
55 | return blob
56 |
57 | def end_of_lines(self):
58 | """Called after all lines have been fed to filter_output_line.
59 |
60 | The OutputFilter performs any processing that requires the complete input to have already been fed.
61 |
62 | Returns:
63 | An enumerable of dicts
64 | """
65 | return []
66 |
67 | def get_argument_parser(self):
68 | """Describes commandline arguments for this OutputFilter.
69 |
70 | The names of the `dest` param for the argument in the ArgumentParser must match the name of positional or
71 | named arguments in `__init__`
72 |
73 | Returns:
74 | An `argparse.ArgumentParser`
75 | """
76 | return None
77 |
78 |
79 | def _unbuffered_input(read_from):
80 | """A generator to allow lines to be read before EOF is reached.
81 |
82 | Args:
83 | read_from: A stream to read from
84 | Returns:
85 | yields strings
86 | """
87 | line = read_from.readline()
88 | while bool(line):
89 | if isinstance(line, six.binary_type):
90 | line = line.decode('latin-1', errors='ignore')
91 | yield line.encode('utf-8', errors='ignore') if six.PY2 else line
92 | line = read_from.readline()
93 |
94 |
95 | def _run_filter(output_filter, input_stream=None, output_stream=None, *args, **kwargs):
96 | """Feeds stdin to an instance of OutputFilter and spews to stdout.
97 |
98 | Args:
99 | output_filter: An instance of OutputFilter.
100 | input_stream: Where to read input from.
101 | output_stream: Where to write output to.
102 | """
103 | if not input_stream:
104 | input_stream = sys.stdin
105 | if not output_stream:
106 | output_stream = sys.stdout
107 |
108 | for json_string in _unbuffered_input(input_stream):
109 | try:
110 | blob = simplejson.loads(json_string)
111 | except simplejson.JSONDecodeError as e:
112 | write_exception(e)
113 | continue
114 |
115 | blob = output_filter.filter_line(blob)
116 | if blob:
117 | output_stream.write(simplejson.dumps(blob))
118 | output_stream.write('\n')
119 |
120 | final_blobs = output_filter.end_of_lines()
121 | for blob in final_blobs:
122 | output_stream.write(simplejson.dumps(blob))
123 | output_stream.write('\n')
124 |
125 | output_stream.flush()
126 |
127 |
128 | def run_filter_main(output_filter_cls):
129 | """A `main` method which runs an OutputFilter.
130 |
131 | Args:
132 | output_filter_cls: Class name of the OutputFilter
133 | """
134 | filter_arguments = output_filter_cls().get_argument_parser()
135 | argument_parents = [filter_arguments] if filter_arguments else []
136 |
137 | parser = ArgumentParser(parents=argument_parents, conflict_handler='resolve')
138 | parser.add_argument(
139 | '-i', '--input-file', dest='input_file', default=None,
140 | help='[OPTIONAL] Path to OSXCollector output to read. Defaults to stdin otherwise.',
141 | )
142 | parser.add_argument(
143 | '-o', '--output-file', dest='output_file', default=None,
144 | help='[OPTIONAL] Path where OSXCollector output augmented with the analysis results will be written to. '
145 | 'Defaults to stdout otherwise.',
146 | )
147 | args = parser.parse_args()
148 |
149 | output_filter = output_filter_cls(**vars(args))
150 |
151 | fp_in = open(args.input_file, 'r') if args.input_file else None
152 | fp_out = open(args.output_file, 'w') if args.output_file else None
153 |
154 | _run_filter(output_filter, input_stream=fp_in, output_stream=fp_out)
155 |
156 | if args.input_file:
157 | fp_in.close()
158 |
159 | if args.output_file:
160 | fp_out.close()
161 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/util/blacklist.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Utilities for dealing with blacklists
4 | #
5 | from __future__ import absolute_import
6 | from __future__ import unicode_literals
7 |
8 | import logging
9 | import os
10 | import re
11 |
12 | import six
13 |
14 | from osxcollector.output_filters.exceptions import BadDomainError
15 | from osxcollector.output_filters.exceptions import MissingConfigError
16 | from osxcollector.output_filters.util.dict_utils import DictUtils
17 | from osxcollector.output_filters.util.domains import clean_domain
18 |
19 |
20 | def create_blacklist(config_chunk, data_feeds={}):
21 | """Reads the config and builds a Blacklist.
22 |
23 | The blacklist config is sufficiently complex that much of this method deals with simply validating config
24 |
25 | Args:
26 | config_chunk: A dict of config for building the blacklist
27 | data_feeds: Dict of generator functions returning the blacklist data
28 | Returns:
29 | A Blacklist
30 | Raises:
31 | MissingConfigError - when required key does not exist.
32 | """
33 | required_keys = ['blacklist_name', 'blacklist_keys']
34 | if not all([key in config_chunk for key in required_keys]):
35 | raise MissingConfigError('Blacklist config is missing a required key.\nRequired keys are: {0}'.format(repr(required_keys)))
36 |
37 | if not isinstance(config_chunk['blacklist_keys'], list):
38 | raise MissingConfigError('The value of \'blacklist_keys\' in Blacklist config must be a list')
39 |
40 | blacklist_name = config_chunk.get('blacklist_name')
41 | blacklist_keys = config_chunk.get('blacklist_keys')
42 | blacklist_file_path = config_chunk.get('blacklist_file_path')
43 | blacklist_data_feed = config_chunk.get('blacklist_data_feed')
44 | if blacklist_file_path:
45 | if not os.path.exists(blacklist_file_path):
46 | raise MissingConfigError('The blacklist file {} does not exist'.format(blacklist_file_path))
47 | blacklist_data_generator = _read_blacklist_file(blacklist_file_path)
48 | elif blacklist_data_feed:
49 | if blacklist_data_feed not in data_feeds:
50 | raise MissingConfigError('Data feed {} not found among provided generators'.format(blacklist_data_feed))
51 | blacklist_data_generator = data_feeds[blacklist_data_feed]()
52 | else:
53 | raise MissingConfigError('Blacklist config is missing a data input.\nEither select a file or a generator object')
54 | blacklist_is_regex = config_chunk.get('blacklist_is_regex', False)
55 | blacklist_is_domains = config_chunk.get('blacklist_is_domains', False)
56 | return Blacklist(blacklist_name, blacklist_keys, blacklist_data_generator, blacklist_is_regex, blacklist_is_domains)
57 |
58 |
59 | def _read_blacklist_file(filepath):
60 | """ Parse blacklist file """
61 | with open(filepath, 'r') as f:
62 | for line in f:
63 | line = line.strip()
64 | if line and not line.startswith('#'):
65 | yield line
66 |
67 |
68 | class Blacklist(object):
69 |
70 | def __init__(self, name, blacklisted_keys, input_generator, is_regex=False, is_domains=False):
71 | """Build a blacklist from the data in the blacklist file.
72 |
73 | Built in smarts make it easy to build a blacklist of domains
74 |
75 | Raises:
76 | MissingConfigError - when required config key does not exist.
77 | """
78 | self._name = name
79 | self._blacklisted_keys = blacklisted_keys
80 | self._is_domains = is_domains
81 | self._is_regex = is_regex or self._is_domains
82 | self._blacklisted_values = dict(
83 | self._convert_to_matching_term(val) for val in input_generator if val
84 | )
85 | self._blacklisted_values.pop(None, None)
86 |
87 | def _convert_to_matching_term(self, blacklisted_value):
88 | """Convert a blacklisted_value to a regex.
89 |
90 | Args:
91 | blacklisted_value - string of value on a blacklist
92 | blacklist_is_domains - Boolean if true, the blacklisted_value is treated as a domain.
93 | Returns:
94 | MatchingTerm
95 | """
96 | display_name = blacklisted_value
97 |
98 | if self._is_domains:
99 | try:
100 | domain = clean_domain(blacklisted_value)
101 | except BadDomainError:
102 | if not isinstance(blacklisted_value, six.text_type):
103 | blacklisted_value = blacklisted_value.decode('utf8')
104 | logging.warning(
105 | u'Blacklisted value "{0}" cannot be resolved as a domain name'
106 | .format(blacklisted_value),
107 | )
108 | return None, None
109 |
110 | blacklisted_value = re.compile(r'^(.+\.)*{0}$'.format(re.escape(domain)))
111 |
112 | elif self._is_regex:
113 | blacklisted_value = re.compile(blacklisted_value)
114 |
115 | return blacklisted_value, display_name
116 |
117 | def match_line(self, blob):
118 | """Determines whether a line matches the blacklist.
119 |
120 | Returns:
121 | String of matched term is the value matches, None otherwise
122 | """
123 | for key in self._blacklisted_keys:
124 | values = DictUtils.get_deep(blob, key)
125 | if not values:
126 | continue
127 |
128 | matching_term = self.match_values(values)
129 | if matching_term:
130 | return matching_term
131 |
132 | return None
133 |
134 | def match_values(self, values):
135 | """Determines whether an array of values match the blacklist.
136 |
137 | Returns:
138 | String of matched term is the value matches, None otherwise
139 | """
140 | if not isinstance(values, list):
141 | values = [values]
142 |
143 | for val in values:
144 | if self._is_regex or self._is_domains:
145 | return next(
146 | (
147 | name for term, name in six.iteritems(self._blacklisted_values) if term.search(val)
148 | ), None,
149 | )
150 | else:
151 | return self._blacklisted_values.get(val, None)
152 | return None
153 |
154 | @property
155 | def name(self):
156 | return self._name
157 |
--------------------------------------------------------------------------------
/tests/output_filters/find_domains_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.find_domains import FindDomainsFilter
6 | from tests.output_filters.run_filter_test import RunFilterTest
7 |
8 |
9 | class TestFindDomainsFilter(RunFilterTest):
10 |
11 | """Tests many variants of blobs with a domain in them."""
12 |
13 | def _run_test(self, input_blob, expected_domains):
14 | output_blobs = self.run_test(FindDomainsFilter, [input_blob])
15 | self.assert_key_added_to_blob('osxcollector_domains', [expected_domains], [input_blob], output_blobs)
16 |
17 | def test_no_domain(self):
18 | input_blob = {'fungo': 'kidney'}
19 | self._run_test(input_blob, None)
20 |
21 | def test_tld(self):
22 | input_blob = {'fungo': 'http://www.example.com'}
23 | expected_domains = ['example.com', 'www.example.com']
24 | self._run_test(input_blob, expected_domains)
25 |
26 | def test_bare_domain(self):
27 | input_blob = {'fungo': 'http://example.com'}
28 | expected_domains = ['example.com']
29 | self._run_test(input_blob, expected_domains)
30 |
31 | def test_uk_domain(self):
32 | input_blob = {'fungo': 'http://www.example.co.uk'}
33 | expected_domains = ['example.co.uk', 'www.example.co.uk']
34 | self._run_test(input_blob, expected_domains)
35 |
36 | def test_info_domain(self):
37 | input_blob = {'fungo': 'http://www.example.info'}
38 | expected_domains = ['example.info', 'www.example.info']
39 | self._run_test(input_blob, expected_domains)
40 |
41 | def test_ftp_scheme(self):
42 | input_blob = {'fungo': 'ftp://example.com'}
43 | expected_domains = ['example.com']
44 | self._run_test(input_blob, expected_domains)
45 |
46 | def test_domain_in_path(self):
47 | input_blob = {'fungo': 'http://www.example.com/bango?p=http://www.dingo.com'}
48 | expected_domains = [
49 | 'dingo.com',
50 | 'example.com',
51 | 'www.dingo.com',
52 | 'www.example.com',
53 | ]
54 | self._run_test(input_blob, expected_domains)
55 |
56 | def test_quoted_domain(self):
57 | input_blob = {'fungo': 'http%3A//www.example.com'}
58 | expected_domains = [
59 | 'example.com',
60 | 'www.example.com',
61 | ]
62 | self._run_test(input_blob, expected_domains)
63 |
64 | def test_quoted_in_path(self):
65 | input_blob = {'fungo': 'http://www.example.com/bango?p=http%3A//www.dingo.co.uk'}
66 | expected_domains = [
67 | 'dingo.co.uk',
68 | 'example.com',
69 | 'www.dingo.co.uk',
70 | 'www.example.com',
71 | ]
72 | self._run_test(input_blob, expected_domains)
73 |
74 | def test_domain_in_key(self):
75 | input_blob = {'http://www.example.com': 'zungo'}
76 | expected_domains = [
77 | 'example.com',
78 | 'www.example.com',
79 | ]
80 | self._run_test(input_blob, expected_domains)
81 |
82 | def test_list(self):
83 | input_blob = {'fungo': ['http://www.example.com', 'https://www.zzz.sample.org']}
84 | expected_domains = [
85 | 'example.com',
86 | 'sample.org',
87 | 'www.example.com',
88 | 'www.zzz.sample.org',
89 | ]
90 | self._run_test(input_blob, expected_domains)
91 |
92 | def test_dict(self):
93 | input_blob = {'fungo': {'http://www.example.com': 'https://www.zzz.sample.org'}}
94 | expected_domains = [
95 | 'example.com',
96 | 'sample.org',
97 | 'www.example.com',
98 | 'www.zzz.sample.org',
99 | ]
100 | self._run_test(input_blob, expected_domains)
101 |
102 | def test_list_of_dict(self):
103 | input_blob = {
104 | 'fungo': [
105 | {'http://www.example.com': 'https://www.zzz.sample.org'},
106 | {'a': 'https://www.dingo.co.uk'},
107 | ],
108 | }
109 | expected_domains = [
110 | 'dingo.co.uk',
111 | 'example.com',
112 | 'sample.org',
113 | 'www.dingo.co.uk',
114 | 'www.example.com',
115 | 'www.zzz.sample.org',
116 | ]
117 | self._run_test(input_blob, expected_domains)
118 |
119 | def test_tokenizing(self):
120 | input_blob = {
121 | 'fungo': [
122 | '{"bar":\'http://www.example.com\'}',
123 | '(http://www.example2.com)',
124 | ';http://www.example3.com\n',
125 | 'http://example4.com.',
126 | '#@^%$*http://www.xxx.yyy.zzz.example.com/fungo/digno',
127 | ],
128 | }
129 | expected_domains = [
130 | 'example.com',
131 | 'example2.com',
132 | 'example3.com',
133 | 'example4.com',
134 | 'www.example.com',
135 | 'www.example2.com',
136 | 'www.example3.com',
137 | 'www.xxx.yyy.zzz.example.com',
138 | ]
139 | self._run_test(input_blob, expected_domains)
140 |
141 | def test_special_keys_domain(self):
142 | input_blob = {'host': 'www.example.com'}
143 | expected_domains = [
144 | 'example.com',
145 | 'www.example.com',
146 | ]
147 | self._run_test(input_blob, expected_domains)
148 |
149 | def test_no_dupes(self):
150 | input_blob = {
151 | 'host': 'www.example.com',
152 | 'another_thing': 'http://www.example.com',
153 | 'https://www.example.com': True,
154 | 'dictation': {'threepete': ['ftp://example.com', 'http://example.com', 'https://www.example.com']},
155 | }
156 | expected_domains = [
157 | 'example.com',
158 | 'www.example.com',
159 | ]
160 | self._run_test(input_blob, expected_domains)
161 |
162 | def test_special_keys_url(self):
163 | input_blob = {'host': 'https://www.example.com'}
164 | expected_domains = [
165 | 'example.com',
166 | 'www.example.com',
167 | ]
168 | self._run_test(input_blob, expected_domains)
169 |
170 | def test_not_valid_url(self):
171 | input_blob = {'term': 'https://blah.bork.yarn.dorn-duh-%5DYYYY.WW,boo:bloom,fizz:(ault:(akh_ugh:!it,pook:NOOM)),rort:!(\'@tuht\',dort))'}
172 | expected_domains = None
173 | self._run_test(input_blob, expected_domains)
174 |
--------------------------------------------------------------------------------
/tests/output_filters/related_files_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from osxcollector.output_filters.related_files import RelatedFilesFilter
6 | from tests.output_filters.run_filter_test import assert_equal_sorted
7 | from tests.output_filters.run_filter_test import RunFilterTest
8 |
9 |
10 | def when_anytime(blob):
11 | """A simple when that always returns True"""
12 | return True
13 |
14 |
15 | class RelatedFilesFilterTest(RunFilterTest):
16 |
17 | """Created a RelateFilesFilter, calls run_test, and performs additional filter specific validation."""
18 |
19 | def teardown_method(self, method):
20 | self._output_filter = None
21 |
22 | def _run_test(
23 | self, input_blobs=None, when=when_anytime, file_terms=None, expected_terms=None,
24 | expected_usernames=None, expected_is_related=None,
25 | ):
26 | """Created a RelateFilesFilter, calls run_test, and performs additional filter specific validation.
27 |
28 | Args:
29 | input_blob: An enumerable of dicts
30 | when: A callable when to init the RelatedFilesFilter with
31 | file_terms: An enumerable of strings to init the RelatedFilesFilter with
32 | expected_terms: The expected final value of RelatedFilesFilter.terms
33 | expected_usernames: The expected final value of RelatedFilesFilter.usernames
34 | expected_is_related: An enumerable of the expected value of 'osxcollector_related' for each output_blob
35 | """
36 |
37 | def create_related_files_filter():
38 | self._output_filter = RelatedFilesFilter(when=when, file_terms=file_terms)
39 | return self._output_filter
40 |
41 | output_blobs = self.run_test(create_related_files_filter, input_blobs=input_blobs)
42 | if expected_terms:
43 | assert_equal_sorted(expected_terms, self._output_filter.terms)
44 | if expected_usernames:
45 | assert_equal_sorted(expected_usernames, self._output_filter.usernames)
46 | if expected_is_related:
47 | self.assert_key_added_to_blob('osxcollector_related', expected_is_related, input_blobs, output_blobs)
48 | return output_blobs
49 |
50 |
51 | class TestCreateTerms(RelatedFilesFilterTest):
52 |
53 | """Focuses on testing that terms are properly created."""
54 |
55 | def test_single_term(self):
56 | file_terms = ['one_word']
57 | expected = ['one_word']
58 | self._run_test(file_terms=file_terms, expected_terms=expected)
59 |
60 | def test_multi_terms(self):
61 | file_terms = ['one_word', 'pants', 'face']
62 | expected = ['one_word', 'pants', 'face']
63 | self._run_test(file_terms=file_terms, expected_terms=expected)
64 |
65 | def test_split_terms(self):
66 | file_terms = ['/ivanlei/source/osxcollector']
67 | expected = ['ivanlei', 'source', 'osxcollector']
68 | self._run_test(file_terms=file_terms, expected_terms=expected)
69 |
70 | def test_whitelist_terms(self):
71 | file_terms = ['/Users/ivanlei/source/osxcollector', '/Users/ivanlei/virtual_envs/osxcollector/bin/python']
72 | expected = ['ivanlei', 'source', 'osxcollector', 'virtual_envs']
73 | self._run_test(file_terms=file_terms, expected_terms=expected)
74 |
75 | def test_whitelist_username_terms(self):
76 | file_terms = ['/Users/ivanlei/source/osxcollector', '/Users/ivanlei/virtual_envs/osxcollector/bin/python']
77 | expected = ['source', 'osxcollector', 'virtual_envs']
78 | blob = {'osxcollector_username': 'ivanlei'}
79 | expected_usernames = ['ivanlei']
80 |
81 | self._run_test(input_blobs=[blob], file_terms=file_terms, expected_terms=expected, expected_usernames=expected_usernames)
82 |
83 |
84 | class TestFindUserNames(RelatedFilesFilterTest):
85 |
86 | """Focuses on ensuring that usernames are found so they can be ignored as terms."""
87 |
88 | def test_find_username(self):
89 | blob = {'osxcollector_username': 'bob'}
90 | expected_usernames = ['bob']
91 | self._run_test(input_blobs=[blob], expected_usernames=expected_usernames)
92 |
93 | def test_find_multiple_username(self):
94 | blobs = [
95 | {'osxcollector_username': 'bob'},
96 | {'osxcollector_username': 'jim'},
97 | {'osxcollector_username': 'bob'},
98 | {'banana': 'pants'},
99 | ]
100 | expected_usernames = ['bob', 'jim']
101 | self._run_test(input_blobs=blobs, expected_usernames=expected_usernames)
102 |
103 |
104 | class TestRelatedFilesFilter(RelatedFilesFilterTest):
105 |
106 | """Tests the overall functionality of the filter."""
107 |
108 | def test_single_term(self):
109 | input_blobs = [
110 | {'banana': '/var/bin/magic_value'},
111 | ]
112 | expected_is_related = [
113 | {'files': ['magic_value']},
114 | ]
115 | file_terms = ['magic_value']
116 | self._run_test(input_blobs=input_blobs, file_terms=file_terms, expected_is_related=expected_is_related)
117 |
118 | def test_multi_term(self):
119 | input_blobs = [
120 | {'avocado': '/var/bin/magic/hat'},
121 | {'mango': '/var/bin/value/hat'},
122 | {'shandy': '/var/bin/magic/value/hat'},
123 | ]
124 | expected_is_related = [
125 | {'files': ['magic']},
126 | {'files': ['value']},
127 | {'files': ['magic', 'value']},
128 | ]
129 | file_terms = ['magic', 'value']
130 | self._run_test(input_blobs=input_blobs, file_terms=file_terms, expected_is_related=expected_is_related)
131 |
132 | def test_split_term(self):
133 | input_blobs = [
134 | {'avocado': '/var/bin/magic/hat'},
135 | {'mango': '/var/bin/value/hat'},
136 | {'shandy': '/var/bin/magic/value/hat'},
137 | ]
138 | expected_is_related = [
139 | {'files': ['magic']},
140 | {'files': ['value']},
141 | {'files': ['magic', 'value']},
142 | ]
143 | file_terms = ['magic/value']
144 | self._run_test(input_blobs=input_blobs, file_terms=file_terms, expected_is_related=expected_is_related)
145 |
146 | def test_discover_term(self):
147 | input_blobs = [
148 | {'file_path': '/var/bin/magic/value'},
149 | {'carrot': '/var/bin/magic/hat'},
150 | {'apple': '/var/bin/value/hat'},
151 | {'lemmon': '/lime/rickey'},
152 | ]
153 | expected_is_related = [
154 | {'files': ['magic', 'value']},
155 | {'files': ['magic']},
156 | {'files': ['value']},
157 | None,
158 | ]
159 | self._run_test(input_blobs=input_blobs, expected_is_related=expected_is_related)
160 |
161 | def test_skip_username(self):
162 | input_blobs = [
163 | {'file_path': '/var/bin/magic/value', 'osxcollector_username': 'magic'},
164 | {'carrot': '/var/bin/magic/hat'},
165 | {'apple': '/var/bin/value/hat'},
166 | {'lemmon': '/lime/rickey'},
167 | ]
168 | expected_is_related = [
169 | {'files': ['value']},
170 | None,
171 | {'files': ['value']},
172 | None,
173 | ]
174 | self._run_test(input_blobs=input_blobs, expected_is_related=expected_is_related)
175 |
176 | def test_when(self):
177 | def when_binbing(blob):
178 | return 'bingbing' in blob
179 |
180 | input_blobs = [
181 | {'file_path': '/var/bin/magic', 'bingbing': True, 'osxcollector_username': 'hat'},
182 | {'file_path': '/var/bin/value'},
183 | {'carrot': '/var/bin/magic/hat'},
184 | {'apple': '/var/bin/value/hat'},
185 | {'lemmon': '/lime/rickey'},
186 | ]
187 | expected_is_related = [
188 | {'files': ['magic']},
189 | None,
190 | {'files': ['magic']},
191 | None,
192 | None,
193 | ]
194 | self._run_test(input_blobs=input_blobs, when=when_binbing, expected_is_related=expected_is_related)
195 |
--------------------------------------------------------------------------------
/tests/output_filters/util/blacklist_test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | from copy import deepcopy
6 |
7 | import pytest
8 | from mock import call
9 | from mock import patch
10 |
11 | from osxcollector.output_filters.exceptions import MissingConfigError
12 | from osxcollector.output_filters.util.blacklist import create_blacklist
13 |
14 |
15 | class TestCreateBlacklist:
16 |
17 | @pytest.fixture(scope='function', autouse=True)
18 | def file_contents(self):
19 | file_contents = [
20 | # Fruits
21 | 'apple', 'banana',
22 |
23 | # Cars
24 | 'corolla', 'datsun',
25 | ]
26 | with patch(
27 | 'osxcollector.output_filters.util.blacklist._read_blacklist_file',
28 | return_value=file_contents,
29 | ) as file_contents:
30 | yield file_contents
31 |
32 | @pytest.fixture(scope='function')
33 | def blacklist_data(self):
34 | yield {
35 | 'blacklist_name': 'only_required',
36 | 'blacklist_keys': ['fruit_name'],
37 | 'blacklist_file_path': '/who/cares/I/mock/this.txt',
38 | }
39 |
40 | @pytest.fixture(scope='module', autouse=True)
41 | def mock_exists(self):
42 | with patch('os.path.exists', return_value=True):
43 | yield
44 |
45 | def test_only_required_keys(self, blacklist_data):
46 | blacklist = create_blacklist(blacklist_data)
47 | assert blacklist.name == blacklist_data['blacklist_name']
48 | assert blacklist._blacklisted_keys == blacklist_data['blacklist_keys']
49 | assert not blacklist._is_regex
50 | assert not blacklist._is_domains
51 |
52 | def test_missing_required_keys(self, blacklist_data):
53 | for key in blacklist_data:
54 | _blacklist_data = deepcopy(blacklist_data)
55 | del _blacklist_data[key]
56 | with pytest.raises(MissingConfigError):
57 | create_blacklist(_blacklist_data)
58 |
59 | def test_missing_data_input(self, blacklist_data):
60 | blacklist_data.pop('blacklist_file_path')
61 | with pytest.raises(MissingConfigError):
62 | create_blacklist(blacklist_data)
63 |
64 | def test_required_with_two_keys(self, blacklist_data):
65 | blacklist_data['blacklist_keys'] = ['fruit_name', 'car_name']
66 | blacklist = create_blacklist(blacklist_data)
67 | assert blacklist._blacklisted_keys == blacklist_data['blacklist_keys']
68 |
69 | def test_keys_not_list(self, blacklist_data):
70 | blacklist_data['blacklist_keys'] = 'fruit_name'
71 | with pytest.raises(MissingConfigError):
72 | create_blacklist(blacklist_data)
73 |
74 | def test_is_regex(self, blacklist_data):
75 | blacklist_data['blacklist_is_regex'] = True
76 | blacklist = create_blacklist(blacklist_data)
77 | assert blacklist._is_regex
78 |
79 | def test_is_domains(self, blacklist_data, file_contents):
80 | file_contents.return_value = ['apple.com', 'banana.org']
81 | # Setting 'blacklist_is_domains' overrides 'blacklist_is_regex'
82 | blacklist_data['blacklist_is_domains'] = True
83 | blacklist_data['blacklist_is_regex'] = False
84 | blacklist = create_blacklist(blacklist_data)
85 | assert blacklist._is_regex
86 | assert blacklist._is_domains
87 |
88 | # TODO: Refactor OSXCollector Output Filters to work with unicode-based domains
89 | def test_bad_domains_unicode(self, blacklist_data):
90 | unicode_domain_1 = 'yelp.公司'
91 | unicode_domain_2 = 'www.Yülp.tld'
92 | unicode_domain_3 = 'иelф.р'
93 | unicode_domains = [unicode_domain_1, unicode_domain_2, unicode_domain_3]
94 | blacklist_data['blacklist_is_domains'] = True
95 | with patch(
96 | 'osxcollector.output_filters.util.blacklist._read_blacklist_file',
97 | return_value=unicode_domains,
98 | ):
99 | with patch('logging.warning', autospec=True) as patched_logging_warning:
100 | create_blacklist(blacklist_data)
101 | assert patched_logging_warning.call_count == 3
102 |
103 | calls = [
104 | call(
105 | u'Blacklisted value "{0}" cannot be resolved as a domain name'
106 | .format(unicode_domain),
107 | ) for unicode_domain in unicode_domains
108 | ]
109 | assert calls == patched_logging_warning.call_args_list
110 |
111 | def test_bad_domains(self, blacklist_data):
112 | blacklist_data['blacklist_is_domains'] = True
113 | with patch('logging.warning', autospec=True) as patched_logging_warning:
114 | blacklist = create_blacklist(blacklist_data)
115 | assert patched_logging_warning.call_count == 4
116 | calls = [
117 | call('Blacklisted value "apple" cannot be resolved as a domain name'),
118 | call('Blacklisted value "banana" cannot be resolved as a domain name'),
119 | call('Blacklisted value "corolla" cannot be resolved as a domain name'),
120 | call('Blacklisted value "datsun" cannot be resolved as a domain name'),
121 | ]
122 | assert calls == patched_logging_warning.call_args_list
123 |
124 | blob = {'fruit_name': 'apple.com'}
125 | assert not blacklist.match_line(blob)
126 |
127 | def test_match_fruit(self, blacklist_data):
128 | good_blobs = [
129 | {'fruit_name': 'apple'},
130 | {'fruit_name': 'banana'},
131 | ]
132 | bad_blobs = [
133 | {'car_name': 'corolla'},
134 | {'car_name': 'datsun'},
135 | ]
136 |
137 | blacklist = create_blacklist(blacklist_data)
138 | for blob in good_blobs:
139 | assert blacklist.match_line(blob)
140 | for blob in bad_blobs:
141 | assert not blacklist.match_line(blob)
142 |
143 | def test_match_fruit_and_cars(self, blacklist_data):
144 | good_blobs = [
145 | {'fruit_name': 'apple'},
146 | {'fruit_name': 'banana'},
147 | {'car_name': 'corolla'},
148 | {'car_name': 'datsun'},
149 | ]
150 |
151 | blacklist_data['blacklist_keys'] = ['fruit_name', 'car_name']
152 | blacklist = create_blacklist(blacklist_data)
153 | for blob in good_blobs:
154 | assert blacklist.match_line(blob)
155 |
156 | def test_match_fruit_regex(self, blacklist_data, file_contents):
157 | good_blobs = [
158 | {'fruit_name': 'apple'},
159 | ]
160 |
161 | bad_blobs = [
162 | {'fruit_name': 'banana'},
163 | {'car_name': 'corolla'},
164 | {'car_name': 'datsun'},
165 | ]
166 |
167 | blacklist_data['blacklist_is_regex'] = True
168 | file_contents.return_value = ['app.*', 'ban.+org']
169 | blacklist = create_blacklist(blacklist_data)
170 | for blob in good_blobs:
171 | assert blacklist.match_line(blob)
172 | for blob in bad_blobs:
173 | assert not blacklist.match_line(blob)
174 |
175 | def test_match_domains(self, blacklist_data, file_contents):
176 | good_blobs = [
177 | {'fruit_name': 'apple.com'},
178 | {'fruit_name': 'www.apple.com'},
179 | {'fruit_name': 'www.another-thing.apple.com'},
180 | ]
181 |
182 | bad_blobs = [
183 | {'fruit_name': 'cran-apple.com'},
184 | {'fruit_name': 'apple.org'},
185 | {'fruit_name': 'apple.com.jp'},
186 | {'car_name': 'apple.com'},
187 | ]
188 | blacklist_data['blacklist_is_domains'] = True
189 | file_contents.return_value = ['apple.com']
190 | blacklist = create_blacklist(blacklist_data)
191 | for blob in good_blobs:
192 | assert blacklist.match_line(blob)
193 | for blob in bad_blobs:
194 | assert not blacklist.match_line(blob)
195 |
196 | def test_match_domains_data_feed(self, blacklist_data):
197 | good_blobs = [
198 | {'fruit_name': 'apple.com'},
199 | {'fruit_name': 'www.apple.com'},
200 | {'fruit_name': 'www.another-thing.apple.com'},
201 | ]
202 |
203 | bad_blobs = [
204 | {'fruit_name': 'cran-apple.com'},
205 | {'fruit_name': 'apple.org'},
206 | {'fruit_name': 'apple.com.jp'},
207 | {'car_name': 'apple.com'},
208 | ]
209 | blacklist_data['blacklist_is_domains'] = True
210 | blacklist_data['blacklist_data_feed'] = 'domain_list'
211 | blacklist_data.pop('blacklist_file_path')
212 |
213 | def mock_generator():
214 | for domain in ['apple.com']:
215 | yield domain
216 |
217 | blacklist = create_blacklist(
218 | blacklist_data, {'domain_list': mock_generator},
219 | )
220 | for blob in good_blobs:
221 | assert blacklist.match_line(blob)
222 | for blob in bad_blobs:
223 | assert not blacklist.match_line(blob)
224 |
225 | # TODO: Refactor OSXCollector Output Filters to work with unicode-based domains
226 | def test_log_unicode_domain(self):
227 | config_chunk = {
228 | 'blacklist_name': 'Unicode domain',
229 | 'blacklist_keys': ['visited_domain'],
230 | 'blacklist_file_path': 'not_really_a_blacklist.txt',
231 | 'blacklist_is_domains': True,
232 | }
233 | file_contents = ['Bücher.tld', 'yelp.公司', 'www.Yülp.tld', 'иelф.р']
234 | with patch(
235 | 'osxcollector.output_filters.util.blacklist._read_blacklist_file',
236 | return_value=file_contents,
237 | ), patch('logging.warning', autospec=True) as patched_logging_warning:
238 | blacklist = create_blacklist(config_chunk)
239 | assert patched_logging_warning.call_count == 4
240 | calls = [
241 | call(
242 | u'Blacklisted value "{0}" cannot be resolved as a domain name'
243 | .format(domain),
244 | ) for domain in file_contents
245 | ]
246 | assert calls == patched_logging_warning.call_args_list
247 |
248 | blob = {'visted_domain': 'Bücher.tld'}
249 | assert not blacklist.match_line(blob)
250 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/summary_filters/text.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 |
5 | import sys
6 | from numbers import Number
7 |
8 | import six
9 |
10 | from osxcollector.output_filters.summary_filters.summary import SummaryFilter
11 |
12 |
13 | class TextSummaryFilter(SummaryFilter):
14 | """Prints the analysis summary (AKA "Very Readable Output") in plain text format."""
15 |
16 | END_COLOR = '\033[0m'
17 | SECTION_COLOR = '\033[1m'
18 | BOT_COLOR = '\033[93m\033[1m'
19 | KEY_COLOR = '\033[94m'
20 | VAL_COLOR = '\033[32m'
21 |
22 | def __init__(self, monochrome=False, text_output_file=None, **kwargs):
23 | super(TextSummaryFilter, self).__init__(summary_output_file=text_output_file, **kwargs)
24 | self._monochrome = monochrome
25 |
26 | def filter_line(self, blob):
27 | """Each Line of OSXCollector output will be passed to filter_line.
28 |
29 | The OutputFilter should return the line, either modified or unmodified.
30 | The OutputFilter can also choose to return nothing, effectively swallowing the line.
31 |
32 | Args:
33 | output_line: A dict
34 |
35 | Returns:
36 | A dict or None
37 | """
38 | if 'osxcollector_vthash' in blob:
39 | self._vthash.append(blob)
40 |
41 | if 'osxcollector_vtdomain' in blob:
42 | self._vtdomain.append(blob)
43 |
44 | if 'osxcollector_opendns' in blob:
45 | self._opendns.append(blob)
46 |
47 | if 'osxcollector_blacklist' in blob:
48 | self._blacklist.append(blob)
49 |
50 | if 'osxcollector_related' in blob:
51 | self._related.append(blob)
52 |
53 | if self._show_signature_chain:
54 | if 'signature_chain' in blob and blob['osxcollector_section'] in ['startup', 'kext']:
55 | signature_chain = blob['signature_chain']
56 | if not len(signature_chain) or 'Apple Root CA' != signature_chain[-1]:
57 | self._signature_chain.append(blob)
58 |
59 | if self._show_browser_ext:
60 | if blob['osxcollector_section'] in ['firefox', 'chrome'] and blob.get('osxcollector_subsection') == 'extensions':
61 | self._extensions.append(blob)
62 |
63 | return blob
64 |
65 | def _write(self, msg, color=END_COLOR):
66 | if not self._monochrome:
67 | self._output_stream.write(color)
68 | try:
69 | self._output_stream.write(msg.encode('utf-8', errors='ignore'))
70 | except UnicodeDecodeError as err:
71 | self._output_stream.write(msg)
72 | sys.stderr.write('Unicode decode error: {0}'.format(err))
73 | if not self._monochrome:
74 | self._output_stream.write(self.END_COLOR)
75 |
76 | def end_of_lines(self):
77 | """Called after all lines have been fed to filter_output_line.
78 |
79 | The OutputFilter can do any batch processing on that requires the complete input.
80 |
81 | Returns:
82 | An array of dicts (empty array if no lines remain)
83 | """
84 | self._write('== Very Readable Output Bot ==\n', self.BOT_COLOR)
85 | self._write('Let\'s see what\'s up with this machine.\n\n', self.BOT_COLOR)
86 |
87 | if len(self._vthash):
88 | self._write('Dang! You\'ve got known malware on this machine. Hope it\'s commodity stuff\n', self.BOT_COLOR)
89 | self._summarize_blobs(self._vthash)
90 | self._write('Sheesh! This is why we can\'t have nice things!\n\n', self.BOT_COLOR)
91 |
92 | if len(self._vtdomain):
93 | self._write('I see you\'ve been visiting some \'questionable\' sites. If you trust VirusTotal that is.\n', self.BOT_COLOR)
94 | self._summarize_blobs(self._vtdomain)
95 | self._write('I hope it was worth it!\n\n', self.BOT_COLOR)
96 |
97 | if len(self._opendns):
98 | self._write('Well, here\'s some domains OpenDNS wouldn\'t recommend.\n', self.BOT_COLOR)
99 | self._summarize_blobs(self._opendns)
100 | self._write('You know you shouldn\'t just click every link you see? #truth\n\n', self.BOT_COLOR)
101 |
102 | if len(self._blacklist):
103 | self._write('We put stuff on a blacklist for a reason. Mostly so you don\'t do this.\n', self.BOT_COLOR)
104 | self._summarize_blobs(self._blacklist)
105 | self._write('SMH\n\n', self.BOT_COLOR)
106 |
107 | if len(self._related):
108 | self._write('This whole things started with just a few clues. Now look what I found.\n', self.BOT_COLOR)
109 | self._summarize_blobs(self._related)
110 | self._write('Nothing hides from Very Readable Output Bot\n\n', self.BOT_COLOR)
111 |
112 | if len(self._signature_chain):
113 | self._write('If these binaries were signed by \'Apple Root CA\' I\'d trust them more.\n', self.BOT_COLOR)
114 | self._summarize_blobs(self._signature_chain)
115 | self._write('Let\'s just try and stick with some safe software\n\n', self.BOT_COLOR)
116 |
117 | if len(self._extensions):
118 | self._write('Let\'s see what\'s hiding in the browser, shall we.\n', self.BOT_COLOR)
119 | self._summarize_blobs(self._extensions)
120 | self._write('You know these things have privileges galore.\n\n', self.BOT_COLOR)
121 |
122 | if len(self._add_to_blacklist):
123 | self._add_to_blacklist = list(set(self._add_to_blacklist))
124 | self._write('If I were you, I\'d probably update my blacklists to include:\n', self.BOT_COLOR)
125 | for key, val in self._add_to_blacklist:
126 | self._summarize_val(key, val)
127 | self._write('That might just help things, Skippy!\n\n', self.BOT_COLOR)
128 |
129 | self._write('== Very Readable Output Bot ==\n', self.BOT_COLOR)
130 | self._write('#kaythanksbye', self.BOT_COLOR)
131 |
132 | return []
133 |
134 | def _summarize_blobs(self, blobs):
135 | for blob in blobs:
136 | self._summarize_line(blob)
137 |
138 | add_to_blacklist = False
139 |
140 | if 'osxcollector_vthash' in blob:
141 | self._summarize_vthash(blob)
142 | add_to_blacklist = True
143 |
144 | if 'osxcollector_vtdomain' in blob:
145 | self._summarize_vtdomain(blob)
146 |
147 | if 'osxcollector_opendns' in blob:
148 | self._summarize_opendns(blob)
149 |
150 | if 'osxcollector_blacklist' in blob:
151 | for key in blob['osxcollector_blacklist']:
152 | self._summarize_val('blacklist-{0}'.format(key), blob['osxcollector_blacklist'][key])
153 |
154 | if 'osxcollector_related' in blob:
155 | for key in blob['osxcollector_related']:
156 | self._summarize_val('related-{0}'.format(key), blob['osxcollector_related'][key])
157 |
158 | if 'md5' in blob and '' == blob['md5']:
159 | add_to_blacklist = True
160 |
161 | if add_to_blacklist:
162 | blacklists = blob.get('osxcollector_blacklist', {})
163 | values_on_blacklist = blacklists.get('hashes', [])
164 | for key in ['md5', 'sha1', 'sha2']:
165 | val = blob.get(key, '')
166 | if len(val) and val not in values_on_blacklist:
167 | self._add_to_blacklist.append((key, val))
168 |
169 | values_on_blacklist = blacklists.get('domains', [])
170 | for domain in blob.get('osxcollector_domains', []):
171 | if domain not in values_on_blacklist:
172 | self._add_to_blacklist.append(('domain', domain))
173 |
174 | def _summarize_line(self, blob):
175 | section = blob.get('osxcollector_section')
176 | subsection = blob.get('osxcollector_subsection', '')
177 |
178 | self._write('- {0} {1}\n'.format(section, subsection), self.SECTION_COLOR)
179 | for key in sorted(blob.keys()):
180 | if not key.startswith('osxcollector') and blob.get(key):
181 | val = blob.get(key)
182 | self._summarize_val(key, val)
183 |
184 | def _summarize_vthash(self, blob):
185 | for blob in blob['osxcollector_vthash']:
186 | for key in ['positives', 'total', 'scan_date', 'permalink']:
187 | val = blob.get(key)
188 | self._summarize_val(key, val, 'vthash')
189 |
190 | def _summarize_vtdomain(self, blob):
191 | for blob in blob['osxcollector_vtdomain']:
192 | for key in ['domain', 'detections']:
193 | val = blob.get(key)
194 | self._summarize_val(key, val, 'vtdomain')
195 |
196 | def _summarize_opendns(self, blob):
197 | for blob in blob['osxcollector_opendns']:
198 | for key in ['domain', 'categorization', 'security', 'link']:
199 | val = blob.get(key)
200 | self._summarize_val(key, val, 'opendns')
201 |
202 | def _summarize_val(self, key, val, prefix=None):
203 | self._print_key(key, prefix)
204 | self._print_val(val)
205 | self._write('\n')
206 |
207 | def _print_key(self, key, prefix):
208 | if not prefix:
209 | prefix = ''
210 | else:
211 | prefix += '-'
212 |
213 | self._write(' {0}{1}'.format(prefix, key), self.KEY_COLOR)
214 | self._write(': ')
215 |
216 | def _print_val(self, val):
217 | if isinstance(val, list):
218 | self._write('[')
219 | for index, elem in enumerate(val):
220 | self._print_val(elem)
221 | if index != len(val) - 1:
222 | self._write(', ')
223 | self._write(']')
224 | elif isinstance(val, dict):
225 | self._write('{')
226 | for index, key in enumerate(val):
227 | self._write('"')
228 | self._write(key, self.VAL_COLOR)
229 | self._write('": ')
230 | self._print_val(val[key])
231 | if index != len(val) - 1:
232 | self._write(', ')
233 | self._write('}')
234 | elif isinstance(val, six.string_types):
235 | val = val[:480]
236 | self._write('"')
237 | self._write(val, self.VAL_COLOR)
238 | self._write('"')
239 | elif isinstance(val, Number):
240 | self._write('{0}'.format(val), self.VAL_COLOR)
241 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/opendns/lookup_domains.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # LookupDomainsFilter uses OpenDNS to lookup the values in 'osxcollector_domains' and adds the 'osxcollector_opendns' key.
4 | #
5 | from __future__ import absolute_import
6 | from __future__ import unicode_literals
7 |
8 | import logging
9 | from collections import namedtuple
10 |
11 | import six
12 | from threat_intel.opendns import InvestigateApi
13 |
14 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
15 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter
16 | from osxcollector.output_filters.util.blacklist import create_blacklist
17 | from osxcollector.output_filters.util.config import config_get_deep
18 |
19 |
20 | class LookupDomainsFilter(ThreatFeedFilter):
21 |
22 | """Uses OpenDNS to lookup the values in 'osxcollector_domains' and adds the 'osxcollector_opendns' key."""
23 |
24 | # Domain categories to consider suspicious
25 | SUSPICIOUS_CATEGORIES = [
26 | 'Adware',
27 | 'Botnet',
28 | 'Typo Squatting',
29 | 'Drive-by Downloads/Exploits',
30 | 'Mobile Threats',
31 | 'High Risk Sites and Locations',
32 | 'Malware',
33 | 'Phishing',
34 | ]
35 |
36 | SecurityCheck = namedtuple('SecurityCheck', ['key', 'min', 'max', 'threshold'])
37 | SECURITY_CHECKS = [
38 | # Domain Generation Algorithm. This score is generated based on the likeliness of the domain name being
39 | # generated by an algorithm rather than a human. This algorithm is designed to identify domains which have
40 | # been created using an automated randomization strategy, which is a common evasion technique in malware kits
41 | # or botnets. This score ranges from -100 (suspicious) to 0 (benign)
42 | #
43 | SecurityCheck('dga_score', -100, 0, -70),
44 |
45 | # Suspicious rank for a domain that reviews based on the lookup behavior of client IP for the domain.
46 | # Securerank is designed to identify hostnames requested by known infected clients but never requested
47 | # by clean clients, assuming these domains are more likely to be bad.
48 | # Scores returned range from -100 (suspicious) to 100 (benign).
49 | #
50 | SecurityCheck('securerank2', -100, 100, -10),
51 |
52 | # ASN reputation score, ranges from -100 to 0 with -100 being very suspicious
53 | SecurityCheck('asn_score', -100, 0, -3),
54 |
55 | # Prefix ranks domains given their IP prefixes (An IP prefix is the first three octets in an IP address)
56 | # and the reputation score of these prefixes.
57 | # Ranges from -100 to 0, -100 being very suspicious
58 | SecurityCheck('prefix_score', -100, 0, -12),
59 |
60 | # RIP ranks domains given their IP addresses and the reputation score of these IP addresses.
61 | # Ranges from -100 to 0, -100 being very suspicious
62 | SecurityCheck('rip_score', -100, 0, -25),
63 | ]
64 |
65 | SECURITY_BAD_KEYS = [
66 | # The name of any known attacks associated with this domain.
67 | # Returns blank is no known threat associated with domain.
68 | 'attack',
69 |
70 | # The type of the known attack, such as botnet or APT.
71 | # Returns blank if no known threat associated with domain.
72 | 'threat_type',
73 | ]
74 |
75 | def __init__(self, lookup_when=None, **kwargs):
76 | super(LookupDomainsFilter, self).__init__(
77 | 'osxcollector_domains', 'osxcollector_opendns',
78 | lookup_when=lookup_when, name_of_api_key='opendns', **kwargs
79 | )
80 | self._whitelist = create_blacklist(
81 | config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}),
82 | )
83 |
84 | def _lookup_iocs(self, all_iocs):
85 | """Caches the OpenDNS info for a set of domains.
86 |
87 | Domains on a whitelist will be ignored.
88 | First, lookup the categorization details for each domain.
89 | Next, if the categorization seems suspicious or unknown, lookup detailed security info.
90 | Finally, if the categorization or security info is suspicious, save the threat info.
91 |
92 | Args:
93 | all_iocs: an enumerable of string domain names.
94 | Returns:
95 | A dict {domain: opendns_info}
96 | """
97 | threat_info = {}
98 |
99 | cache_file_name = config_get_deep('opendns.LookupDomainsFilter.cache_file_name', None)
100 | investigate = InvestigateApi(self._api_key, cache_file_name=cache_file_name)
101 |
102 | iocs = [x for x in all_iocs if not self._whitelist.match_values(x)]
103 |
104 | categorization = investigate.categorization(iocs)
105 |
106 | # Mark the categorization as suspicious
107 | for domain, categorization_info in six.iteritems(categorization):
108 | if categorization_info:
109 | categorization_info['suspicious'] = \
110 | self._is_category_info_suspicious(categorization_info)
111 | else:
112 | logging.warning(
113 | 'No categorization for domain {0}'.format(domain),
114 | )
115 | categorization[domain] = {'suspicious': False}
116 |
117 | # Decide which values to lookup security info for
118 | iocs = [domain for domain in categorization if self._should_get_security_info(categorization[domain])]
119 |
120 | security = investigate.security(iocs)
121 |
122 | for domain, security_info in six.iteritems(security):
123 | if security_info:
124 | security_info['suspicious'] = \
125 | self._is_security_info_suspicious(security_info)
126 | else:
127 | logging.warning(
128 | 'No security information for domain {0}'.format(domain),
129 | )
130 | security[domain] = {'suspicious': False}
131 |
132 | for domain in security:
133 | if self._should_store_ioc_info(categorization[domain], security[domain]):
134 | threat_info[domain] = {
135 | 'domain': domain,
136 | 'categorization': categorization[domain],
137 | 'security': self._trim_security_result(security[domain]),
138 | 'link': 'https://investigate.opendns.com/domain-view/name/{0}/view'.format(
139 | domain.encode('utf-8', errors='ignore') if six.PY2 else domain,
140 | ),
141 | }
142 |
143 | return threat_info
144 |
145 | def _is_category_info_suspicious(self, category_info):
146 | """Figure out whether the categorization info is suspicious.
147 |
148 | Args:
149 | category_info: A dict of info returned by the OpenDNS categorization call
150 | Returns:
151 | boolean
152 | """
153 | status = category_info['status']
154 | content_categories = category_info['content_categories']
155 | security_categories = category_info['security_categories']
156 |
157 | return -1 == status or len(security_categories) or any([cat in self.SUSPICIOUS_CATEGORIES for cat in content_categories])
158 |
159 | def _should_get_security_info(self, categorization_info):
160 | """Figure out whether the categorization info on the domain is interesting enough to gather more data.
161 |
162 | If the domain isn't categorized, or is categorized as suspicious, get security info.
163 |
164 | Args:
165 | categorization_info: A dict of info returned by the OpenDNS categorization call
166 | Returns:
167 | boolean
168 | """
169 | status = categorization_info.get('status', 0)
170 | content_categories = categorization_info.get('content_categories', [])
171 | security_categories = categorization_info.get('security_categories', [])
172 |
173 | return categorization_info['suspicious'] or \
174 | (0 == status and 0 == len(content_categories) and 0 == len(security_categories))
175 |
176 | def _is_security_info_suspicious(self, security_info):
177 | """Analyzes info from OpenDNS and makes a boolean determination of suspicious or not.
178 |
179 | Either looks for low values for a specific set of properties, looks for known participation in
180 | a threat campaign, or looks for unknown domains.
181 |
182 | Args:
183 | security_info: The result of a call to the security endpoint
184 | Returns:
185 | boolean
186 | """
187 | # Categorization of site
188 | if any([security_info.get(key, None) for key in self.SECURITY_BAD_KEYS]):
189 | return True
190 |
191 | for security_check in self.SECURITY_CHECKS:
192 | if security_info.get(security_check.key, security_check.max) <= security_check.threshold:
193 | return True
194 |
195 | if not security_info.get('found', False):
196 | return True
197 |
198 | return False
199 |
200 | def _should_store_ioc_info(self, category_info, security_info):
201 | """Figure out whether the data gathered is interesting enough to store in the output.
202 |
203 | Args:
204 | category_info: A dict of info returned by the OpenDNS categorization call
205 | security_info: A dict of info returned by the OpenDNS security call
206 | Returns:
207 | boolean
208 | """
209 | return category_info['suspicious'] or security_info['suspicious']
210 |
211 | def _trim_security_result(self, security_info):
212 | """Converts the results of a security call into a smaller dict.
213 |
214 | Args:
215 | security_info: The result of a call to the security endpoint.
216 | Returns:
217 | A dict
218 | """
219 | # dga_score sometimes has the wrong sign, fix that please
220 | dga_score = security_info.get('dga_score', 0)
221 | if dga_score > 0:
222 | security_info['dga_score'] = -1 * dga_score
223 |
224 | # There's a lot of info in the security_info, trim it
225 | result = {}
226 | for security_check in self.SECURITY_CHECKS:
227 | if security_check.key in security_info:
228 | result[security_check.key] = security_info[security_check.key]
229 | for key in self.SECURITY_BAD_KEYS:
230 | if key in security_info:
231 | result[key] = security_info[key]
232 |
233 | result['found'] = security_info.get('found', False)
234 |
235 | return result
236 |
237 |
238 | def main():
239 | run_filter_main(LookupDomainsFilter)
240 |
241 |
242 | if __name__ == '__main__':
243 | main()
244 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/opendns/related_domains.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # RelatedDomains uses OpenDNS to find domains related to input domains or IPs.
4 | # Adds 'osxcollector_related' key to the output:
5 | # {
6 | # 'osxcollector_related': {
7 | # 'domains': {
8 | # 'domain_in_line.com': ['related_domain.com'],
9 | # 'another.com': ['1.2.3.4']
10 | # }
11 | # }
12 | # }
13 | #
14 | from __future__ import absolute_import
15 | from __future__ import unicode_literals
16 |
17 | from argparse import ArgumentParser
18 |
19 | import six
20 | from threat_intel.opendns import InvestigateApi
21 |
22 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
23 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
24 | from osxcollector.output_filters.util.blacklist import create_blacklist
25 | from osxcollector.output_filters.util.config import config_get_deep
26 | from osxcollector.output_filters.util.domains import expand_domain
27 |
28 |
29 | DEFAULT_RELATED_DOMAINS_GENERATIONS = 2
30 |
31 |
32 | class RelatedDomainsFilter(OutputFilter):
33 |
34 | """Uses OpenDNS to find domains related to input domains or IPs.
35 |
36 | A whitelist of domains to ignore is read during initialization.
37 | Adds 'osxcollector_related' key to the output:
38 | ```python
39 | {
40 | 'osxcollector_related': {
41 | 'domains': {
42 | 'domain_in_line.com': ['related_domain.com'],
43 | 'another.com': ['1.2.3.4']
44 | }
45 | }
46 | }
47 | ```
48 | """
49 |
50 | def __init__(
51 | self,
52 | initial_domains=None,
53 | initial_ips=None,
54 | generations=DEFAULT_RELATED_DOMAINS_GENERATIONS,
55 | related_when=None,
56 | **kwargs
57 | ):
58 | """Initializes the RelatedDomainsFilter.
59 |
60 | Args:
61 | initial_domains: an enumerable of string domain names
62 | initial_ips: an enumerable of string IPs in the form ''
63 | generations: How many generations of related domains to retrieve. Passing 1
64 | means just find the domains related to the initial input. Passing 2 means also find the
65 | domains related to the domains related to the initial input.
66 | related_when: A boolean function to call to decide whether to add the domains from a line to
67 | the list of related domains.
68 | """
69 | super(RelatedDomainsFilter, self).__init__(**kwargs)
70 | self._whitelist = create_blacklist(
71 | config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}),
72 | )
73 |
74 | cache_file_name = config_get_deep('opendns.RelatedDomainsFilter.cache_file_name', None)
75 | self._investigate = InvestigateApi(config_get_deep('api_key.opendns'), cache_file_name=cache_file_name)
76 |
77 | self._domains_to_lookup = set(initial_domains) if initial_domains else set()
78 | self._ips_to_lookup = set(initial_ips) if initial_ips else set()
79 |
80 | self._related_when = related_when
81 | self._generation_count = generations
82 |
83 | self._all_blobs = list()
84 |
85 | def filter_line(self, blob):
86 | """Accumulate a set of all domains.
87 |
88 | Args:
89 | blob: A dict representing one line of output from OSXCollector.
90 | Returns:
91 | A dict or None
92 | """
93 | self._all_blobs.append(blob)
94 |
95 | if 'osxcollector_domains' in blob and self._related_when and self._related_when(blob):
96 | for domain in blob.get('osxcollector_domains'):
97 | self._domains_to_lookup.add(domain)
98 |
99 | return None
100 |
101 | def end_of_lines(self):
102 | """Called after all lines have been fed to filter_output_line.
103 |
104 | The OutputFilter performs any processing that requires the complete input to have already been fed.
105 |
106 | Returns:
107 | An enumerable of dicts
108 | """
109 | domains_to_related = self._perform_lookup_for_all_domains(self._domains_to_lookup, self._ips_to_lookup)
110 |
111 | if domains_to_related:
112 | for blob in self._all_blobs:
113 | for domain in blob.get('osxcollector_domains', []):
114 | add_related_domains = False
115 | if domain in domains_to_related:
116 | blob.setdefault('osxcollector_related', {})
117 | blob['osxcollector_related'].setdefault('domains', {})
118 | blob['osxcollector_related']['domains'].setdefault(domain, [])
119 | blob['osxcollector_related']['domains'][domain] += domains_to_related[domain]
120 | add_related_domains = True
121 |
122 | # Unique the related domains
123 | if add_related_domains:
124 | blob['osxcollector_related']['domains'][domain] = list(set(blob['osxcollector_related']['domains'][domain]))
125 |
126 | return self._all_blobs
127 |
128 | def get_argument_parser(self):
129 | parser = ArgumentParser()
130 | group = parser.add_argument_group('opendns.RelatedDomainsFilter')
131 | group.add_argument(
132 | '-d', '--domain', dest='initial_domains', default=[], action='append',
133 | help='[OPTIONAL] Suspicious domains to use in pivoting. May be specified more than once.',
134 | )
135 | group.add_argument(
136 | '-i', '--ip', dest='initial_ips', default=[], action='append',
137 | help='[OPTIONAL] Suspicious IP to use in pivoting. May be specified more than once.',
138 | )
139 | group.add_argument(
140 | '--related-domains-generations', dest='generations', default=DEFAULT_RELATED_DOMAINS_GENERATIONS,
141 | help='[OPTIONAL] How many generations of related domains to lookup with OpenDNS',
142 | )
143 | return parser
144 |
145 | def _filter_domains_by_whitelist(self, domains):
146 | """Remove all domains that are on the whitelist.
147 |
148 | Args:
149 | domains: An enumerable of domains
150 | Returns:
151 | An enumerable of domains
152 | """
153 | return [x for x in list(domains) if not self._whitelist.match_values(x)]
154 |
155 | def _perform_lookup_for_all_domains(self, domains_to_lookup, ips_to_lookup):
156 | """Lookup all the domains related to the input domains or IPs.
157 |
158 | Args:
159 | domains_to_lookup: Enumerable of domains
160 | ips_to_lookup: Enumerable of IPs
161 | Returns:
162 | A dict mapping {'related_domain': ['initial_domainA', 'initial_domainB']}
163 | """
164 | self._domains_to_lookup = self._filter_domains_by_whitelist(self._domains_to_lookup)
165 |
166 | domains_to_related = {}
167 |
168 | what_to_lookup = [(domain, True) for domain in domains_to_lookup] + [(ip, False) for ip in ips_to_lookup]
169 |
170 | for domain_or_ip, is_domain in what_to_lookup:
171 | related_domains = self._perform_lookup_for_single_domain(domain_or_ip, is_domain, self._generation_count)
172 | related_domains = self._filter_domains_by_whitelist(related_domains)
173 | for related_domain in related_domains:
174 | domains_to_related.setdefault(related_domain, set())
175 | domains_to_related[related_domain].add(domain_or_ip)
176 |
177 | return domains_to_related
178 |
179 | def _perform_lookup_for_single_domain(self, domain_or_ip, is_domain, generation_count):
180 | """Given a domain or IP, lookup the Nth related domains.
181 |
182 | Args:
183 | domain_or_ip: A string domain name or IP
184 | is_domain: A boolean of whether the previous arg is a domain or IP
185 | generation_count: A count of generations to lookup
186 | Returns:
187 | set of related domains
188 | """
189 | domains_found = set([domain_or_ip]) if is_domain else set()
190 | generation_results = set([domain_or_ip])
191 |
192 | # For IPs, do one IP specific lookup then switch to domain lookups
193 | if not is_domain:
194 | generation_results = self._find_related_domains(None, generation_results)
195 | domains_found |= generation_results
196 | generation_count -= 1
197 |
198 | while generation_count > 0:
199 | if len(generation_results):
200 | generation_results = self._find_related_domains(generation_results, None)
201 | domains_found |= generation_results
202 |
203 | generation_count -= 1
204 |
205 | return domains_found
206 |
207 | def _find_related_domains(self, domains, ips):
208 | """Calls OpenDNS to find related domains and normalizes the responses.
209 |
210 | Args:
211 | domains: An enumerable of domains
212 | ips: An enumerable of IPs
213 | Returns:
214 | An enumerable of domains
215 | """
216 | related_domains = set()
217 |
218 | if domains:
219 | domains = self._filter_domains_by_whitelist(domains)
220 | cooccurrence_info = self._investigate.cooccurrences(domains)
221 | cooccurrence_domains = self._cooccurrences_to_domains(cooccurrence_info)
222 | related_domains.update(cooccurrence_domains)
223 |
224 | if ips:
225 | rr_history_info = self._investigate.rr_history(ips)
226 | related_domains.update(self._rr_history_to_domains(rr_history_info))
227 |
228 | return related_domains
229 |
230 | def _cooccurrences_to_domains(self, cooccurrence_info):
231 | """Parse the results of a call to the OpenDNS cooccurrences endpoint.
232 |
233 | Args:
234 | cooccurrence_info: Result of a call to cooccurrences
235 | Returns:
236 | An enumerable of domains
237 | """
238 | domains = set()
239 |
240 | for domain, cooccurence in six.iteritems(cooccurrence_info):
241 | for occur_domain in cooccurence.get('pfs2', []):
242 | for elem in expand_domain(occur_domain[0]):
243 | domains.add(elem)
244 |
245 | return domains
246 |
247 | def _rr_history_to_domains(self, rr_history_info):
248 | """Parse the results of a call to the OpenDNS rr_history endpoint.
249 |
250 | Args:
251 | rr_history_info: Result of a call to rr_history
252 | Returns:
253 | An enumerable of domains
254 | """
255 | domains = set()
256 |
257 | for ip, rr_history in six.iteritems(rr_history_info):
258 | for rr_domain in rr_history.get('rrs', []):
259 | for elem in expand_domain(rr_domain['rr']):
260 | domains.add(elem)
261 |
262 | return domains
263 |
264 |
265 | def main():
266 | run_filter_main(RelatedDomainsFilter)
267 |
268 |
269 | if __name__ == '__main__':
270 | main()
271 |
--------------------------------------------------------------------------------
/osxcollector/output_filters/analyze.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # The AnalyzeFilter is a handy little tool that ties together many filters to attempt to
5 | # enhance the output of OSXCollector with data from threat APIs, compare against blacklists,
6 | # search for lines related to suspicious domains, ips, or files, and generally figure shit out.
7 | #
8 | # The more detailed description of what goes on:
9 | # 1. Parse out browser extension information.
10 | # 2. Find all the domains in every line. Add them to the output lines.
11 | # 3. Find any file hashes or domains that are on blacklists. Mark those lines.
12 | # 4. Take any filepaths from the command line and mark all lines related to those.
13 | # 5. Take any domain or IP from the command line and use OpenDNS Investigate API to find all the domains
14 | # related to those domains and all the domains related to those related domains - basically the 1st and 2nd
15 | # generation related domains. Mark any lines where these domains appear.
16 | # 6. Lookup all sha1 hashes in ShadowServer's bin-test whitelist.
17 | # Files that match both hash and filename are ignored by further filters.
18 | # 7. Lookup file hashes in VirusTotal and mark any lines with suspicious files hashes.
19 | # 8. Lookup all the domains in the file with OpenDNS Investigate. Categorize and score the domains.
20 | # Mark all the lines that contain domains that were scored as "suspicious".
21 | # 9. Lookup suspicious domains, those domains on a blacklist, or those related to the initial input in VirusTotal.
22 | # 10. Cleanup the browser history and sort it in descending time order.
23 | # 11. Save all the enhanced output to a new file.
24 | # 12. Look at all the interesting lines in the file and try to summarize them in some very human readable output.
25 | # 13. Party!
26 | #
27 | from __future__ import absolute_import
28 | from __future__ import unicode_literals
29 |
30 | from argparse import ArgumentParser
31 |
32 | from osxcollector.output_filters.alexa.lookup_rankings import LookupRankingsFilter as ArLookupRankingsFilter
33 | from osxcollector.output_filters.base_filters.chain import ChainFilter
34 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
35 | from osxcollector.output_filters.chrome.find_extensions import FindExtensionsFilter as ChromeExtensionsFilter
36 | from osxcollector.output_filters.chrome.sort_history import SortHistoryFilter as ChromeHistoryFilter
37 | from osxcollector.output_filters.find_blacklisted import FindBlacklistedFilter
38 | from osxcollector.output_filters.find_domains import FindDomainsFilter
39 | from osxcollector.output_filters.firefox.find_extensions import FindExtensionsFilter as FirefoxExtensionsFilter
40 | from osxcollector.output_filters.firefox.sort_history import SortHistoryFilter as FirefoxHistoryFilter
41 | from osxcollector.output_filters.opendns.lookup_domains import LookupDomainsFilter as OpenDnsLookupDomainsFilter
42 | from osxcollector.output_filters.opendns.related_domains import RelatedDomainsFilter as OpenDnsRelatedDomainsFilter
43 | from osxcollector.output_filters.related_files import RelatedFilesFilter
44 | from osxcollector.output_filters.shadowserver.lookup_hashes import LookupHashesFilter as ShadowServerLookupHashesFilter
45 | from osxcollector.output_filters.summary_filters.html import HtmlSummaryFilter
46 | from osxcollector.output_filters.summary_filters.text import TextSummaryFilter
47 | from osxcollector.output_filters.virustotal.lookup_domains import LookupDomainsFilter as VtLookupDomainsFilter
48 | from osxcollector.output_filters.virustotal.lookup_hashes import LookupHashesFilter as VtLookupHashesFilter
49 |
50 |
51 | class AnalyzeFilter(ChainFilter):
52 |
53 | """AnalyzeFilter chains all the other filters to produce maximum effect.
54 |
55 | A lot of the smarts of AnalyzeFilter are around what filters to run in which order and how results of one filter should
56 | effect the operations of the next filter.
57 | """
58 |
59 | def __init__(
60 | self, no_opendns=False, no_virustotal=False, no_shadowserver=False,
61 | no_alexa=False, readout=False, **kwargs
62 | ):
63 |
64 | filter_chain = []
65 |
66 | if not readout:
67 | filter_chain.append(ChromeExtensionsFilter(**kwargs))
68 | filter_chain.append(FirefoxExtensionsFilter(**kwargs))
69 |
70 | filter_chain.append(FindDomainsFilter(**kwargs))
71 |
72 | # Do Alexa ranking lookups first since they are dependent only on FindDomainsFilter
73 | if not no_alexa:
74 | filter_chain.append(ArLookupRankingsFilter(**kwargs))
75 |
76 | # Do hash related lookups first. This is done first since hash lookup is not influenced
77 | # by anything but other hash lookups.
78 | if not no_shadowserver:
79 | filter_chain.append(ShadowServerLookupHashesFilter(**kwargs))
80 | if not no_virustotal:
81 | filter_chain.append(
82 | VtLookupHashesFilter(lookup_when=AnalyzeFilter.lookup_when_not_in_shadowserver, **kwargs),
83 | )
84 |
85 | # Find blacklisted stuff next. Finding blacklisted domains requires running FindDomainsFilter first.
86 | filter_chain.append(FindBlacklistedFilter(**kwargs))
87 |
88 | # RelatedFilesFilter and OpenDnsRelatedDomainsFilter use command line args in addition to previous filter
89 | # results to find lines of interest.
90 | filter_chain.append(RelatedFilesFilter(when=AnalyzeFilter.find_related_when, **kwargs))
91 | if not no_opendns:
92 | filter_chain.append(
93 | OpenDnsRelatedDomainsFilter(related_when=AnalyzeFilter.find_related_when, **kwargs),
94 | )
95 |
96 | # Lookup threat info on suspicious and related stuff
97 | if not no_opendns:
98 | filter_chain.append(
99 | OpenDnsLookupDomainsFilter(lookup_when=AnalyzeFilter.lookup_when_not_in_shadowserver, **kwargs),
100 | )
101 | if not no_virustotal:
102 | filter_chain.append(
103 | VtLookupDomainsFilter(lookup_when=AnalyzeFilter.lookup_domains_in_vt_when, **kwargs),
104 | )
105 |
106 | # Sort browser history for maximum pretty
107 | filter_chain.append(FirefoxHistoryFilter(**kwargs))
108 | filter_chain.append(ChromeHistoryFilter(**kwargs))
109 |
110 | filter_chain.append(TextSummaryFilter(**kwargs))
111 | filter_chain.append(HtmlSummaryFilter(**kwargs))
112 |
113 | super(AnalyzeFilter, self).__init__(filter_chain, **kwargs)
114 |
115 | def _on_get_argument_parser(self):
116 | """Returns an ArgumentParser with arguments for just this OutputFilter (not the contained chained OutputFilters).
117 |
118 | Returns:
119 | An `argparse.ArgumentParser`
120 | """
121 | parser = ArgumentParser()
122 | group = parser.add_argument_group('AnalyzeFilter')
123 | group.add_argument(
124 | '--readout', dest='readout', action='store_true', default=False,
125 | help='[OPTIONAL] Skip the analysis and just output really readable analysis',
126 | )
127 | group.add_argument(
128 | '--no-opendns', dest='no_opendns', action='store_true', default=False,
129 | help='[OPTIONAL] Don\'t run OpenDNS filters',
130 | )
131 | group.add_argument(
132 | '--no-virustotal', dest='no_virustotal', action='store_true', default=False,
133 | help='[OPTIONAL] Don\'t run VirusTotal filters',
134 | )
135 | group.add_argument(
136 | '--no-shadowserver', dest='no_shadowserver', action='store_true', default=False,
137 | help='[OPTIONAL] Don\'t run ShadowServer filters',
138 | )
139 | group.add_argument(
140 | '--no-alexa', dest='no_alexa', action='store_true', default=False,
141 | help='[OPTIONAL] Don\'t run AlexaRanking filters',
142 | )
143 | group.add_argument(
144 | '-M', '--monochrome', dest='monochrome', action='store_true', default=False,
145 | help='[OPTIONAL] Output monochrome analysis',
146 | )
147 | group.add_argument(
148 | '--show-signature-chain', dest='show_signature_chain', action='store_true', default=False,
149 | help='[OPTIONAL] Output unsigned startup items and kexts.',
150 | )
151 | group.add_argument(
152 | '--show-browser-ext', dest='show_browser_ext', action='store_true', default=False,
153 | help='[OPTIONAL] Output the list of installed browser extensions.',
154 | )
155 | group.add_argument(
156 | '-t', '--text', dest='text_output_file', default=None,
157 | help='[OPTIONAL] Path to the output file where summary in plain text format will be written to.',
158 | )
159 | group.add_argument(
160 | '-w', '--html', dest='html_output_file', default=None,
161 | help='[OPTIONAL] Path to the output file where summary in HTML format will be written to.',
162 | )
163 | group.add_argument(
164 | '-c', '--group-by-iocs', dest='group_by_iocs', action='store_true', default=False,
165 | help='[OPTIONAL] Summarize the output grouped by IOCs instead of by threat indicators.',
166 | )
167 | group.add_argument(
168 | '-k', '--group-key', dest='group_key', default=None,
169 | help='[OPTIONAL] If sorting by IOCs, select which key to group by (sha1/sha2/domain)',
170 | )
171 | return parser
172 |
173 | @staticmethod
174 | def include_in_summary(blob):
175 | _KEYS_FOR_SUMMARY = [
176 | 'osxcollector_vthash',
177 | 'osxcollector_vtdomain',
178 | 'osxcollector_opendns',
179 | 'osxcollector_blacklist',
180 | 'osxcollector_related',
181 | ]
182 |
183 | return any([key in blob for key in _KEYS_FOR_SUMMARY])
184 |
185 | @staticmethod
186 | def lookup_when_not_in_shadowserver(blob):
187 | """ShadowServer whitelists blobs that can be ignored."""
188 | return 'osxcollector_shadowserver' not in blob
189 |
190 | @staticmethod
191 | def lookup_domains_in_vt_when(blob):
192 | """VT domain lookup is a final step and what to lookup is dependent upon what has been found so far."""
193 | return AnalyzeFilter.lookup_when_not_in_shadowserver(blob) and AnalyzeFilter.include_in_summary(blob)
194 |
195 | @staticmethod
196 | def find_related_when(blob):
197 | """When to find related terms or domains.
198 |
199 | Stuff in ShadowServer is not interesting.
200 | Blacklisted file paths are worth investigating.
201 | Files where the md5 could not be calculated are also interesting. Root should be able to read files.
202 | Files with a bad hash in VT are obviously malware, go find related bad stuff.
203 |
204 | Args:
205 | blob - a line of output from OSXCollector
206 | Returns:
207 | boolean
208 | """
209 | if 'osxcollector_shadowserver' in blob:
210 | return False
211 | if '' == blob.get('md5', None):
212 | return True
213 | return any([key in blob for key in ['osxcollector_vthash', 'osxcollector_related']])
214 |
215 |
216 | def main():
217 | run_filter_main(AnalyzeFilter)
218 |
219 |
220 | if __name__ == '__main__':
221 | main()
222 |
--------------------------------------------------------------------------------
/tests/output_filters/data/cache.virustotal.LookupHashesFilter.json:
--------------------------------------------------------------------------------
1 | {"virustotal-file-reports": {"b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7": {"scan_id": "b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7-1273894724", "sha256": "b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": false, "result": null, "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": false, "result": null, "version": "10.00", "update": "20100514"}, "nProtect": {"detected": false, "result": null, "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": false, "result": null, "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": false, "result": null, "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": false, "result": null, "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": false, "result": null, "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": false, "result": null, "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": false, "result": null, "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": false, "result": null, "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": false, "result": null, "version": "4842", "update": "20100515"}, "DrWeb": {"detected": false, "result": null, "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": false, "result": null, "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": false, "result": null, "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": false, "result": null, "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": false, "result": null, "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": false, "result": null, "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": false, "result": null, "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": false, "result": null, "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": false, "result": null, "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 0, "total": 40, "resource": "bd34339415ce6a7d692c90779993dd6f", "scan_date": "2015-01-23 16:23:00", "md5": "bd34339415ce6a7d692c90779993dd6f", "permalink": "https://www.virustotal.com/file/b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7/analysis/1273894724/", "sha1": "2a27c19560f7ad8017d79c1eb8eb2c91fffb9291", "positives": 0, "verbose_msg": "Scan finished, scan information embedded in this object"}, "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360": {"scan_id": "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c-1273894724", "sha256": "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": true, "result": "Trojan.Generic.3611249", "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": true, "result": "Trojan.VB.acgy", "version": "10.00", "update": "20100514"}, "nProtect": {"detected": true, "result": "Trojan.Generic.3611249", "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": true, "result": "Trojan.VB.JFDE", "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": true, "result": "a variant of Win32/Qhost.NTY", "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": true, "result": "Win32/ASuspect.HDBBD", "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": true, "result": "Generic.dx!rkx", "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": true, "result": "TR/VB.acgy.1", "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": true, "result": "W32/Smalltroj.YFHZ", "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": true, "result": "Win32:Malware-gen", "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": true, "result": "Heur.Suspicious", "version": "4842", "update": "20100515"}, "DrWeb": {"detected": true, "result": "Trojan.Hosts.37", "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": true, "result": "Trojan/VB.gen", "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": true, "result": "Win32.TRVB.Acgy", "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": true, "result": "Troj/VBHost-A", "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": true, "result": "Trojan.Win32.VB.acgy", "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": true, "result": "Generic.dx!rkx", "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": true, "result": "Trojan/VB.yqh", "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": true, "result": "Trojan.Generic.3611249", "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": true, "result": "Trojan.KillAV", "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 1, "total": 40, "resource": "06506cc06cf0167ea583de62c98eae2c", "scan_date": "2010-05-15 03:38:44", "md5": "06506cc06cf0167ea583de62c98eae2c", "permalink": "https://www.virustotal.com/file/6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360/analysis/1273894724/", "sha1": "92e3750a9f0eef6290dd83867eff88064e9c01bb", "positives": 40, "verbose_msg": "Scan finished, scan information embedded in this object"}, "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c": {"scan_id": "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c-1273894724", "sha256": "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": false, "result": null, "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": false, "result": null, "version": "10.00", "update": "20100514"}, "nProtect": {"detected": false, "result": null, "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": false, "result": null, "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": false, "result": "a variant of Win32/Qhost.NTY", "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": false, "result": null, "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": false, "result": null, "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": false, "result": null, "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": false, "result": null, "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": false, "result": null, "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": false, "result": null, "version": "4842", "update": "20100515"}, "DrWeb": {"detected": false, "result": null, "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": false, "result": null, "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": false, "result": null, "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": false, "result": null, "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": false, "result": null, "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": false, "result": null, "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": false, "result": null, "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": false, "result": null, "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": false, "result": null, "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 0, "total": 40, "resource": "99017f6eebbac24f351415dd410d522d", "scan_date": "2010-05-15 03:38:44", "md5": "99017f6eebbac24f351415dd410d522d", "permalink": "https://www.virustotal.com/file/52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c/analysis/1273894724/", "sha1": "4d1740485713a2ab3a4f5822a01f645fe8387f92", "positives": 0, "verbose_msg": "Scan finished, scan information embedded in this object"}, "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302": {"scan_id": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302-1273894724", "sha256": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": true, "result": "Trojan.Generic.3611249", "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": true, "result": "Trojan.VB.acgy", "version": "10.00", "update": "20100514"}, "nProtect": {"detected": true, "result": "Trojan.Generic.3611249", "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": true, "result": "Trojan.VB.JFDE", "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": true, "result": "a variant of Win32/Qhost.NTY", "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": true, "result": "Win32/ASuspect.HDBBD", "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": true, "result": "Generic.dx!rkx", "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": true, "result": "TR/VB.acgy.1", "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": true, "result": "W32/Smalltroj.YFHZ", "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": true, "result": "Win32:Malware-gen", "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": true, "result": "Heur.Suspicious", "version": "4842", "update": "20100515"}, "DrWeb": {"detected": true, "result": "Trojan.Hosts.37", "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": true, "result": "Trojan/VB.gen", "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": true, "result": "Win32.TRVB.Acgy", "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": true, "result": "Troj/VBHost-A", "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": true, "result": "Trojan.Win32.VB.acgy", "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": true, "result": "Generic.dx!rkx", "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": true, "result": "Trojan/VB.yqh", "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": true, "result": "Trojan.Generic.3611249", "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": true, "result": "Trojan.KillAV", "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 1, "total": 40, "resource": "0c71d8cedc8bbb2b619a76d1478c4348", "scan_date": "2015-01-15 16:42:01", "md5": "0c71d8cedc8bbb2b619a76d1478c4348", "permalink": "https://www.virustotal.com/file/b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302/analysis/1273894724/", "sha1": "da9b79f2fd33d002033b69a9a346af4671a9e16b", "positives": 40, "verbose_msg": "Scan finished, scan information embedded in this object"}}}
--------------------------------------------------------------------------------
/osxcollector/output_filters/chrome/sort_history.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # SortHistoryFilter creates a clean sorted Chrome browser history and tags lines with {'osxcollector_browser_history': 'chrome'}
5 | #
6 | from __future__ import absolute_import
7 | from __future__ import unicode_literals
8 |
9 | import six
10 |
11 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter
12 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main
13 |
14 |
15 | class SortHistoryFilter(OutputFilter):
16 |
17 | """Joins Chrome browser history 'visits' and 'urls' tables, producing a time sorted browser history.
18 |
19 | In the output look for lines where:
20 | ('osxcollector_section' == 'chrome' and 'osxcollector_subsection' == 'history' and 'osxcollector_table_name' == 'visits')
21 | for some snazzy browser history stuff.
22 | """
23 |
24 | def __init__(self, **kwargs):
25 | super(SortHistoryFilter, self).__init__(**kwargs)
26 |
27 | self._visits_table = dict()
28 | self._urls_table = dict()
29 |
30 | def filter_line(self, blob):
31 | """Cache the 'visits' and 'urls' tables."""
32 | if 'chrome' == blob.get('osxcollector_section') and 'history' == blob.get('osxcollector_subsection'):
33 | table = blob.get('osxcollector_table_name')
34 |
35 | if 'visits' == table:
36 | if self._validate_visit(blob):
37 | self._visits_table[blob['id']] = blob
38 | blob = None # Consume the line
39 | elif 'urls' == table:
40 | if self._validate_urls(blob):
41 | self._urls_table[blob['id']] = blob
42 | blob = None # Consume the line
43 |
44 | return blob
45 |
46 | def end_of_lines(self):
47 | """Join the 'visits' and 'urls' tables into a single browser history and timeline."""
48 | history = list()
49 |
50 | for visit in six.itervalues(self._visits_table):
51 | url = self._urls_table.get(visit.get('url'))
52 | if url:
53 | record = {
54 | 'url': url['url'].encode('utf-8') if six.PY2 else url['url'],
55 | 'title': url['title'].encode('utf-8') if six.PY2 else url['url'],
56 | 'last_visit_time': url['last_visit_time'],
57 | 'visit_time': visit['visit_time'],
58 | 'core_transition': self.PAGE_TRANSITION.get_core_transition(visit['transition']),
59 | 'page_transition': self.PAGE_TRANSITION.get_qualifier_transitions(visit['transition']),
60 | 'osxcollector_browser_history': 'chrome',
61 | }
62 |
63 | # Add all the OSXCollector specific keys to the record
64 | for key in visit:
65 | if key.startswith('osxcollector_'):
66 | record[key] = visit[key]
67 | for key in url:
68 | if key.startswith('osxcollector_') and key not in record:
69 | record[key] = url[key]
70 |
71 | history.append(record)
72 |
73 | return sorted(history, key=lambda x: x['last_visit_time'], reverse=True)
74 |
75 | @classmethod
76 | def _validate_visit(cls, blob):
77 | """Does the visit dict have the required fields?
78 |
79 | Args:
80 | blob: a visit dict
81 | Returns:
82 | boolean
83 | """
84 | required_fields = ['id', 'url', 'visit_time', 'transition']
85 | return all([field in blob for field in required_fields])
86 |
87 | @classmethod
88 | def _validate_urls(cls, blob):
89 | """Does the url dict have the required fields?
90 |
91 | Args:
92 | blob: a url dict
93 | Returns:
94 | boolean
95 | """
96 | required_fields = ['id', 'url', 'title', 'last_visit_time']
97 | return all([field in blob for field in required_fields])
98 |
99 | class PAGE_TRANSITION:
100 |
101 | """Constants that detail page transitions in the Chrome 'visits' table.
102 |
103 | These constants comes from:
104 | _
105 | """
106 | # User got to this page by clicking a link on another page.
107 | CORE_LINK = 0
108 |
109 | # User got this page by typing the URL in the URL bar. This should not be
110 | # used for cases where the user selected a choice that didn't look at all
111 | # like a URL; see GENERATED below.
112 | #
113 | # We also use this for other "explicit" navigation actions.
114 | CORE_TYPED = 1
115 |
116 | # User got to this page through a suggestion in the UI, for example,
117 | # through the destinations page.
118 | CORE_AUTO_BOOKMARK = 2
119 |
120 | # This is a subframe navigation. This is any content that is automatically
121 | # loaded in a non-toplevel frame. For example, if a page consists of
122 | # several frames containing ads, those ad URLs will have this transition
123 | # type. The user may not even realize the content in these pages is a
124 | # separate frame, so may not care about the URL (see MANUAL below).
125 | CORE_AUTO_SUBFRAME = 3
126 |
127 | # For subframe navigations that are explicitly requested by the user and
128 | # generate new navigation entries in the back/forward list. These are
129 | # probably more important than frames that were automatically loaded in
130 | # the background because the user probably cares about the fact that this
131 | # link was loaded.
132 | CORE_MANUAL_SUBFRAME = 4
133 |
134 | # User got to this page by typing in the URL bar and selecting an entry
135 | # that did not look like a URL. For example, a match might have the URL
136 | # of a Google search result page, but appear like "Search Google for ...".
137 | # These are not quite the same as TYPED navigations because the user
138 | # didn't type or see the destination URL.
139 | # See also KEYWORD.
140 | CORE_GENERATED = 5
141 |
142 | # The page was specified in the command line or is the start page.
143 | CORE_START_PAGE = 6
144 |
145 | # The user filled out values in a form and submitted it. NOTE that in
146 | # some situations submitting a form does not result in this transition
147 | # type. This can happen if the form uses script to submit the contents.
148 | CORE_FORM_SUBMIT = 7
149 |
150 | # The user "reloaded" the page, either by hitting the reload button or by
151 | # hitting enter in the address bar. NOTE: This is distinct from the
152 | # concept of whether a particular load uses "reload semantics" (i.e.
153 | # bypasses cached data). For this reason, lots of code needs to pass
154 | # around the concept of whether a load should be treated as a "reload"
155 | # separately from their tracking of this transition type, which is mainly
156 | # used for proper scoring for consumers who care about how frequently a
157 | # user typed/visited a particular URL.
158 | #
159 | # SessionRestore and undo tab close use this transition type too.
160 | CORE_RELOAD = 8
161 |
162 | # The url was generated from a replaceable keyword other than the default
163 | # search provider. If the user types a keyword (which also applies to
164 | # tab-to-search) in the omnibox this qualifier is applied to the transition
165 | # type of the generated url. TemplateURLModel then may generate an
166 | # additional visit with a transition type of KEYWORD_GENERATED against the
167 | # url 'http:#' + keyword. For example, if you do a tab-to-search against
168 | # wikipedia the generated url has a transition qualifer of KEYWORD, and
169 | # TemplateURLModel generates a visit for 'wikipedia.org' with a transition
170 | # type of KEYWORD_GENERATED.
171 | CORE_KEYWORD = 9
172 |
173 | # Corresponds to a visit generated for a keyword. See description of
174 | # KEYWORD for more details.
175 | CORE_KEYWORD_GENERATED = 10
176 |
177 | CORE_MASK = 0xFF
178 |
179 | @classmethod
180 | def get_core_transition(cls, value):
181 | """Translates a numeric page transition into a human readable description.
182 |
183 | Args:
184 | value: A numeric value represented as a Number or String
185 |
186 | Returns:
187 | A string
188 | """
189 | try:
190 | value = int(value) & cls.CORE_MASK
191 | except ValueError:
192 | return 'ERROR'
193 |
194 | if cls.CORE_LINK == value:
195 | return 'link'
196 | elif cls.CORE_TYPED == value:
197 | return 'typed'
198 | elif cls.CORE_AUTO_BOOKMARK == value:
199 | return 'auto_bookmark'
200 | elif cls.CORE_AUTO_SUBFRAME == value:
201 | return 'auto_subframe'
202 | elif cls.CORE_MANUAL_SUBFRAME == value:
203 | return 'manual_subframe'
204 | elif cls.CORE_GENERATED == value:
205 | return 'generated'
206 | elif cls.CORE_START_PAGE == value:
207 | return 'start_page'
208 | elif cls.CORE_FORM_SUBMIT == value:
209 | return 'form_submit'
210 | elif cls.CORE_RELOAD == value:
211 | return 'reload'
212 | elif cls.CORE_KEYWORD == value:
213 | return 'keyword'
214 | elif cls.CORE_KEYWORD_GENERATED == value:
215 | return 'generated'
216 | return 'UNKNOWN'
217 |
218 | # A managed user attempted to visit a URL but was blocked.
219 | QUALIFIER_BLOCKED = 0x00800000
220 |
221 | # User used the Forward or Back button to navigate among browsing history.
222 | QUALIFIER_FORWARD_BACK = 0x01000000
223 |
224 | # User used the address bar to trigger this navigation.
225 | QUALIFIER_FROM_ADDRESS_BAR = 0x02000000
226 |
227 | # User is navigating to the home page.
228 | QUALIFIER_HOME_PAGE = 0x04000000
229 |
230 | # The beginning of a navigation chain.
231 | QUALIFIER_CHAIN_START = 0x10000000
232 |
233 | # The last transition in a redirect chain.
234 | QUALIFIER_CHAIN_END = 0x20000000
235 |
236 | # Redirects caused by JavaScript or a meta refresh tag on the page.
237 | QUALIFIER_CLIENT_REDIRECT = 0x40000000
238 |
239 | # Redirects sent from the server by HTTP headers. It might be nice to
240 | # break this out into 2 types in the future, permanent or temporary, if we
241 | # can get that information from WebKit.
242 | QUALIFIER_SERVER_REDIRECT = 0x80000000
243 |
244 | QUALIFIER_MASK = 0xFFFFFF00
245 |
246 | @classmethod
247 | def get_qualifier_transitions(cls, value):
248 | qualifiers = []
249 |
250 | try:
251 | value = int(value) & cls.QUALIFIER_MASK
252 | except ValueError:
253 | return qualifiers
254 |
255 | if cls.QUALIFIER_BLOCKED & value:
256 | qualifiers.append('blocked')
257 |
258 | if cls.QUALIFIER_FORWARD_BACK & value:
259 | qualifiers.append('forward_back')
260 |
261 | if cls.QUALIFIER_FROM_ADDRESS_BAR & value:
262 | qualifiers.append('from_address_bar')
263 |
264 | if cls.QUALIFIER_HOME_PAGE & value:
265 | qualifiers.append('home_page')
266 |
267 | if cls.QUALIFIER_CHAIN_START & value:
268 | qualifiers.append('chain_start')
269 |
270 | if cls.QUALIFIER_CHAIN_END & value:
271 | qualifiers.append('chain_end')
272 |
273 | if cls.QUALIFIER_CLIENT_REDIRECT & value:
274 | qualifiers.append('client_redirect')
275 |
276 | if cls.QUALIFIER_SERVER_REDIRECT & value:
277 | qualifiers.append('server_redirect')
278 |
279 | return qualifiers
280 |
281 |
282 | def main():
283 | run_filter_main(SortHistoryFilter)
284 |
285 |
286 | if __name__ == '__main__':
287 | main()
288 |
--------------------------------------------------------------------------------