├── tests ├── __init__.py └── output_filters │ ├── __init__.py │ ├── alexa │ ├── __init__.py │ └── lookup_domains_test.py │ ├── util │ ├── __init__.py │ ├── config_test.py │ ├── error_messages_test.py │ ├── domains_test.py │ └── blacklist_test.py │ ├── opendns │ ├── __init__.py │ ├── lookup_domains_test.py │ └── related_domains_test.py │ ├── virustotal │ ├── __init__.py │ ├── lookup_domains_test.py │ └── lookup_hashes_test.py │ ├── base_filters │ ├── __init__.py │ ├── threat_feed_test.py │ ├── chain_test.py │ └── output_filter_test.py │ ├── shadowserver │ ├── __init__.py │ └── lookup_hashes_test.py │ ├── data │ ├── domains_whitelist.txt │ ├── domains_blacklist.txt │ ├── hashes_blacklist.txt │ ├── opendns │ │ └── lookup_domains │ │ │ ├── categorization.json │ │ │ ├── expected.json │ │ │ └── security.json │ ├── cache.shadowserver.LookupHashesFilter.json │ ├── test_osxcollector_config.yaml │ ├── cache.virustotal.LookupDomainsFilter.json │ └── cache.virustotal.LookupHashesFilter.json │ ├── find_blacklisted_test.py │ ├── run_filter_test.py │ ├── find_domains_test.py │ └── related_files_test.py ├── .deactivate.sh ├── .activate.sh ├── osxcollector ├── output_filters │ ├── alexa │ │ ├── __init__.py │ │ └── lookup_rankings.py │ ├── chrome │ │ ├── __init__.py │ │ ├── find_extensions.py │ │ └── sort_history.py │ ├── firefox │ │ ├── __init__.py │ │ ├── find_extensions.py │ │ └── sort_history.py │ ├── opendns │ │ ├── __init__.py │ │ ├── lookup_domains.py │ │ └── related_domains.py │ ├── util │ │ ├── __init__.py │ │ ├── error_messages.py │ │ ├── config.py │ │ ├── domains.py │ │ ├── dict_utils.py │ │ └── blacklist.py │ ├── base_filters │ │ ├── __init__.py │ │ ├── chain.py │ │ ├── threat_feed.py │ │ └── output_filter.py │ ├── shadowserver │ │ ├── __init__.py │ │ └── lookup_hashes.py │ ├── virustotal │ │ ├── __init__.py │ │ ├── lookup_hashes.py │ │ ├── lookup_urls.py │ │ └── lookup_domains.py │ ├── summary_filters │ │ ├── __init__.py │ │ ├── summary.py │ │ └── text.py │ ├── __init__.py │ ├── exceptions.py │ ├── find_blacklisted.py │ ├── related_files.py │ ├── find_domains.py │ └── analyze.py └── __init__.py ├── MANIFEST.in ├── requirements.txt ├── requirements-dev.txt ├── .gitignore ├── Makefile ├── LICENSE.md ├── .coveragerc ├── .secrets.baseline ├── tox.ini ├── setup.py ├── .travis.yml ├── .pre-commit-config.yaml └── osxcollector.yaml.example /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.deactivate.sh: -------------------------------------------------------------------------------- 1 | deactivate 2 | -------------------------------------------------------------------------------- /tests/output_filters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/output_filters/alexa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/output_filters/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.activate.sh: -------------------------------------------------------------------------------- 1 | virtualenv_run/bin/activate -------------------------------------------------------------------------------- /tests/output_filters/opendns/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/output_filters/virustotal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /osxcollector/output_filters/alexa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /osxcollector/output_filters/chrome/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /osxcollector/output_filters/firefox/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /osxcollector/output_filters/opendns/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /osxcollector/output_filters/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/output_filters/base_filters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/output_filters/shadowserver/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /osxcollector/output_filters/base_filters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /osxcollector/output_filters/shadowserver/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /osxcollector/output_filters/virustotal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE.md 3 | -------------------------------------------------------------------------------- /osxcollector/output_filters/summary_filters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/output_filters/data/domains_whitelist.txt: -------------------------------------------------------------------------------- 1 | yelp.com 2 | yelp.co.uk 3 | -------------------------------------------------------------------------------- /tests/output_filters/data/domains_blacklist.txt: -------------------------------------------------------------------------------- 1 | example.com 2 | example.co.uk 3 | example.org 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyYAML==5.1 2 | simplejson==3.10.0 3 | six==1.12.0 4 | threat_intel==0.1.29 5 | tldextract==2.0.2 6 | -------------------------------------------------------------------------------- /osxcollector/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | coverage==4.5.3 3 | flake8==3.7.7 4 | mock==2.0.0 5 | pre-commit>=1.0.0 6 | pytest==4.4.0 7 | tox==3.8.6 8 | -------------------------------------------------------------------------------- /tests/output_filters/data/hashes_blacklist.txt: -------------------------------------------------------------------------------- 1 | ffff5f60462c38b1d235cb3509876543 2 | ffff234d2a50a42a87389f1234561a21 3 | ffff51e77b442ee23188d87e4abcdef0 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .tox/ 3 | *.egg-info/ 4 | .DS_Store 5 | .idea/ 6 | config.yaml 7 | osxcollector.yaml 8 | virtualenv_run/ 9 | .coverage 10 | dist/ 11 | sdist/ 12 | -------------------------------------------------------------------------------- /osxcollector/output_filters/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import logging 6 | 7 | # Suppress output from tldextract module 8 | logging.getLogger('tldextract').addHandler(logging.NullHandler()) 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DELETE_ON_ERROR: 2 | 3 | all: 4 | echo >&2 "Must specify target." 5 | 6 | test: 7 | tox 8 | 9 | venv: 10 | tox -evenv 11 | 12 | install-hooks: 13 | tox -e pre-commit -- install -f --install-hooks 14 | 15 | clean: 16 | rm -rf build/ dist/ osxcollector_output_filters.egg-info/ .tox/ virtualenv_run/ 17 | find . -name '*.pyc' -delete 18 | find . -name '__pycache__' -delete 19 | 20 | .PHONY: all test venv install-hooks clean 21 | -------------------------------------------------------------------------------- /tests/output_filters/data/opendns/lookup_domains/categorization.json: -------------------------------------------------------------------------------- 1 | { 2 | "bango.com": { 3 | "status": 1, 4 | "content_categories": [ 5 | "Search Engines", 6 | "Business Services", 7 | "Research/Reference" 8 | ], 9 | "security_categories": [] 10 | }, 11 | "dango.com": { 12 | "status": 0, 13 | "content_categories": [ 14 | "Phishing" 15 | ], 16 | "security_categories": [] 17 | }, 18 | "dingo.com": null, 19 | "bingo.com": null 20 | } 21 | -------------------------------------------------------------------------------- /osxcollector/output_filters/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # All exceptions thrown by the osxcollector.output_filters module 4 | # 5 | from __future__ import absolute_import 6 | from __future__ import unicode_literals 7 | 8 | 9 | class OutputFilterError(Exception): 10 | pass 11 | 12 | 13 | class MissingConfigError(OutputFilterError): 14 | 15 | """An error to throw when configuration is missing""" 16 | pass 17 | 18 | 19 | class BadDomainError(OutputFilterError): 20 | 21 | """An error to throw when a domain is invalid.""" 22 | pass 23 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. 2 | 3 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 4 | 5 | You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/. 6 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = 4 | . 5 | omit = 6 | .tox/* 7 | virtualenv_run/* 8 | setup.py 9 | 10 | [report] 11 | show_missing = True 12 | skip_covered = True 13 | 14 | exclude_lines = 15 | # Have to re-enable the standard pragma 16 | \#\s*pragma: no cover 17 | 18 | # Don't complain if tests don't hit defensive assertion code: 19 | ^\s*raise AssertionError\b 20 | ^\s*raise NotImplementedError\b 21 | ^\s*return NotImplemented\b 22 | ^\s*raise$ 23 | 24 | # Don't complain if non-runnable code isn't run: 25 | ^if __name__ == ['"]__main__['"]:$ 26 | 27 | [html] 28 | directory = coverage-html 29 | 30 | # vim:ft=dosini 31 | -------------------------------------------------------------------------------- /tests/output_filters/data/opendns/lookup_domains/expected.json: -------------------------------------------------------------------------------- 1 | [[{ 2 | "domain": "dango.com", 3 | "categorization": { 4 | "status": 0, 5 | "content_categories": [ 6 | "Phishing" 7 | ], 8 | "security_categories": [], 9 | "suspicious": true 10 | }, 11 | "link": "https://investigate.opendns.com/domain-view/name/dango.com/view", 12 | "security": { 13 | "dga_score": 0, 14 | "asn_score": -0.1608560065526172, 15 | "rip_score": 0, 16 | "securerank2": 0.04721624022600212, 17 | "prefix_score": 0, 18 | "attack": "", 19 | "found": true, 20 | "threat_type": "" 21 | } 22 | }]] 23 | -------------------------------------------------------------------------------- /.secrets.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "exclude": { 3 | "files": ".*tests/.*|\\\\.pre-commit-config\\\\.yaml", 4 | "lines": null 5 | }, 6 | "generated_at": "2019-04-05T11:02:14Z", 7 | "plugins_used": [ 8 | { 9 | "base64_limit": 4.5, 10 | "name": "Base64HighEntropyString" 11 | }, 12 | { 13 | "hex_limit": 3, 14 | "name": "HexHighEntropyString" 15 | }, 16 | { 17 | "name": "PrivateKeyDetector" 18 | } 19 | ], 20 | "results": { 21 | ".travis.yml": [ 22 | { 23 | "hashed_secret": "468b2bbbf250e477bd35e81cafa3bf8c7ce36285", 24 | "line_number": 19, 25 | "type": "Base64 High Entropy String" 26 | } 27 | ] 28 | }, 29 | "version": "0.12.2" 30 | } 31 | -------------------------------------------------------------------------------- /tests/output_filters/base_filters/threat_feed_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter 6 | from tests.output_filters.run_filter_test import RunFilterTest 7 | 8 | 9 | class TestThreatFeedFilter(RunFilterTest): 10 | 11 | def test_run_threat_feed_filter(self): 12 | input_blobs = [ 13 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}}, 14 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}}, 15 | ] 16 | self.run_test(lambda: ThreatFeedFilter('dinky', 'feed_test'), input_blobs, expected_output_blobs=input_blobs) 17 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | project = osxcollector_output_filters 3 | envlist = py27,py36 4 | tox_pip_extensions_ext_pip_custom_platform = true 5 | tox_pip_extensions_ext_venv_update = true 6 | 7 | [testenv] 8 | deps = -rrequirements-dev.txt 9 | commands = 10 | flake8 . 11 | {envpython} --version 12 | coverage --version 13 | coverage run -m pytest --strict {posargs:tests} 14 | coverage report -m --show-missing 15 | 16 | [testenv:pre-commit] 17 | deps = pre-commit>=1.0.0 18 | commands = pre-commit run --all-files 19 | 20 | [testenv:venv] 21 | envdir = virtualenv_run 22 | basepython = python3.6 23 | commands = 24 | 25 | [flake8] 26 | exclude = .git,__pycache__,.tox,virtualenv_run 27 | # E501 - long lines 28 | ignore = E501 29 | max_line_length = 140 30 | -------------------------------------------------------------------------------- /osxcollector/output_filters/util/error_messages.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # A set of simple methods for writing messages to stderr 4 | # 5 | from __future__ import absolute_import 6 | from __future__ import unicode_literals 7 | 8 | import sys 9 | from traceback import extract_tb 10 | from traceback import format_list 11 | 12 | 13 | def write_exception(e): 14 | exc_type, _, exc_traceback = sys.exc_info() 15 | msg = ', '.join(str(a) for a in e.args) 16 | sys.stderr.write('[ERROR] {0} {1}\n'.format(exc_type.__name__, msg)) 17 | for line in format_list(extract_tb(exc_traceback)): 18 | sys.stderr.write(line) 19 | 20 | 21 | def write_error_message(message): 22 | sys.stderr.write('[ERROR] ') 23 | sys.stderr.write(message) 24 | sys.stderr.write('\n') 25 | -------------------------------------------------------------------------------- /tests/output_filters/base_filters/chain_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.base_filters.chain import ChainFilter 6 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 7 | from tests.output_filters.run_filter_test import RunFilterTest 8 | 9 | 10 | class TestChainFilter(RunFilterTest): 11 | 12 | def test_run_chain_filter(self): 13 | input_blobs = [ 14 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}}, 15 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}}, 16 | ] 17 | self.run_test(lambda: ChainFilter([OutputFilter(), OutputFilter()]), input_blobs=input_blobs, expected_output_blobs=input_blobs) 18 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from setuptools import find_packages 3 | from setuptools import setup 4 | 5 | 6 | with open('README.md', 'r') as fh: 7 | long_description = fh.read() 8 | 9 | setup( 10 | name='osxcollector_output_filters', 11 | version='1.1.1', 12 | author='Yelp Security', 13 | author_email='opensource@yelp.com', 14 | description='Filters that process and transform the output of OSXCollector', 15 | long_description=long_description, 16 | long_description_content_type='text/markdown', 17 | license='GNU General Public License', 18 | url='https://github.com/Yelp/osxcollector_output_filters', 19 | setup_requires='setuptools', 20 | packages=find_packages(exclude=['tests']), 21 | provides=['osxcollector'], 22 | install_requires=[ 23 | 'PyYAML>=5.0', 24 | 'threat_intel', 25 | 'tldextract', 26 | 'simplejson', 27 | 'six', 28 | ], 29 | ) 30 | -------------------------------------------------------------------------------- /tests/output_filters/data/cache.shadowserver.LookupHashesFilter.json: -------------------------------------------------------------------------------- 1 | {"shadowserver-bin-test": {"5d87de61cb368c93325dd910c202b8647f8e90ed": {"os_version": "10.10", "filesize": "48976", "reference": "os_all", "sha1": "5D87DE61CB368C93325DD910C202B8647F8E90ED", "dirname": "/System/Library/Extensions/System.kext/PlugIns/Libkern.kext", "binary": "1", "sha256": "1FAFE48F626FDC030B0A0EFC1008D51CD3078D1B3EC95F808D12AFBFEF458B23", "filetimestamp": "09/19/2014 00:42:35", "source": "MacAppInfo", "sha512": "C1CAEB26F892FE3C00B3B6BAB462058C772F91824092BF9B2E183F66D885278B6F0C6DA65D06994A45166501F1A889E38D5D234AE18ECBD2EF3CFD9F4388DC8F", "language": "English", "md5": "6746005C822CEB6737B871698D3ED22F", "bit": "64", "filename": "Libkern", "os_name": "Mac OS X 10.10 (build 14A389)", "application_type": "Mach-O 64-bit kext bundle x86_64", "crc32": "5332564F", "os_mfg": "Apple Inc.", "source_version": "1.1"}, "816a85d89ae34d2dc73b8c768eecb03935c568ba": {"sha1": "816a85d89ae34d2dc73b8c768eecb03935c568ba"}}} -------------------------------------------------------------------------------- /tests/output_filters/util/config_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import pytest 6 | from mock import patch 7 | 8 | from osxcollector.output_filters.util.config import config_get_deep 9 | 10 | 11 | class TestCreateBlacklist: 12 | 13 | @pytest.fixture(scope='module', autouse=True) 14 | def patched_config(self): 15 | config_initial_contents = { 16 | 'a': 'b', 17 | 'c': {'d': 'e'}, 18 | 'f': 1, 19 | 'g': ['apple', 'banana'], 20 | } 21 | with patch('osxcollector.output_filters.util.config._read_config', return_value=config_initial_contents): 22 | yield 23 | 24 | def test_read_top_level_key(self): 25 | assert config_get_deep('a') == 'b' 26 | 27 | def test_read_multi_level_key(self): 28 | assert config_get_deep('c.d') == 'e' 29 | 30 | def test_numeric_val(self): 31 | assert config_get_deep('f') == 1 32 | 33 | def test_list_val(self): 34 | assert config_get_deep('g') == ['apple', 'banana'] 35 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | matrix: 3 | include: 4 | - env: TOXENV=py27 5 | python: '2.7' 6 | - env: TOXENV=py36 7 | python: '3.6' 8 | - env: TOXENV=pre-commit 9 | 10 | install: sudo pip install tox-travis 11 | 12 | script: tox 13 | deploy: 14 | provider: pypi 15 | user: yelplabs 16 | skip_existing: true 17 | distributions: "sdist bdist_wheel" 18 | password: 19 | secure: WMygawcYeJGbe4cJlQECKKEZJLYsirgutlihp8Yn4iAKRjpDFmOTwB0B8NjaYsB9pBvz7MLz913ukIhzsHhZLMYE6GRpwjiGfaSXupC4zDVkdi14KPJIo7dff/1p0rGGtZmYa/iohC/HDgbF4iXcBcwdzrvDBDqwPFaM/5J4LxF+KunXCVopsmQTkBEsMNz/K55By3xCO3qxupixTDYy+VOVv3F6Bs8hChqKmql9vvi2ZZPVq9y7io13T7JREKKv8ZOyIq+AGXtrZvnzVuNPfW6PE3eBUv2BUy0xEuwsqjX4goQ0bzRiVKS4XdIl8HmZD/aj2mkaXSw6HLST4+/+im2uNVIPHTEfyqDgUwtZInQF9zML3wGANGIfS+z1ZKfirSO0DiTKpFMkbiM5K0D+VRKFChblCOqQ5WiU9jhcLrHDHED7aLT7pIAlEcizeTpTwF9ZR5Eg48wB59A3q5b6aTsw1t9Q2kIyEnXHgf3JUGtRu0BO2ATGL324Dkzoa8DN2CeZ0F3fWCaI7gqFssrytDPwG+ct17yJkLQXnRTemFm/fCw9YC+onSufa+pV2qlRZRFywifgwIaAwL7hXC4w2qF4d5RzEOtCZjlmOBCLZ6/r/dwhnS+bBdHSuxxcfV1WB5RZLemqNJfbRdQheM0Ld6tzqZZRi5SB7gohnLEJWlc= 20 | on: 21 | tags: true 22 | repo: Yelp/osxcollector_output_filters 23 | branch: master 24 | -------------------------------------------------------------------------------- /tests/output_filters/base_filters/output_filter_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 6 | from tests.output_filters.run_filter_test import RunFilterTest 7 | 8 | 9 | class TestOutputFilter(RunFilterTest): 10 | 11 | def test_filter_line(self): 12 | input_blobs = [ 13 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}}, 14 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}}, 15 | ] 16 | output_filter = OutputFilter() 17 | for blob in input_blobs: 18 | output = output_filter.filter_line(blob) 19 | assert output == blob 20 | 21 | def test_end_of_lines(self): 22 | output_filter = OutputFilter() 23 | output = output_filter.end_of_lines() 24 | assert output == [] 25 | 26 | def test_run_output_filter(self): 27 | input_blobs = [ 28 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}}, 29 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}}, 30 | ] 31 | self.run_test(OutputFilter, input_blobs, expected_output_blobs=input_blobs) 32 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: '^tests/output_filters/data/.*$' 2 | repos: 3 | - repo: git://github.com/pre-commit/pre-commit-hooks 4 | rev: v2.1.0 5 | hooks: 6 | - id: check-json 7 | - id: check-yaml 8 | - id: end-of-file-fixer 9 | - id: trailing-whitespace 10 | - id: name-tests-test 11 | - id: requirements-txt-fixer 12 | - id: double-quote-string-fixer 13 | - id: flake8 14 | - id: fix-encoding-pragma 15 | - repo: git://github.com/pre-commit/mirrors-autopep8 16 | rev: v1.4.3 17 | hooks: 18 | - id: autopep8 19 | - repo: git://github.com/asottile/reorder_python_imports 20 | rev: v1.4.0 21 | hooks: 22 | - id: reorder-python-imports 23 | args: [ 24 | '--add-import', 'from __future__ import absolute_import', 25 | '--add-import', 'from __future__ import unicode_literals', 26 | ] 27 | exclude: setup.py 28 | - repo: git://github.com/asottile/add-trailing-comma 29 | rev: v1.0.0 30 | hooks: 31 | - id: add-trailing-comma 32 | - repo: https://github.com/Yelp/detect-secrets 33 | rev: v0.12.2 34 | hooks: 35 | - id: detect-secrets 36 | args: ['--baseline', '.secrets.baseline'] 37 | exclude: .*tests/.*|\.pre-commit-config\.yaml 38 | language_version: python2.7 39 | -------------------------------------------------------------------------------- /osxcollector/output_filters/util/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Config is a very simplistic class for reading YAML config. 4 | # 5 | from __future__ import absolute_import 6 | from __future__ import unicode_literals 7 | 8 | import os 9 | 10 | import yaml 11 | try: 12 | from yaml import CSafeLoader as SafeLoader 13 | except ImportError: 14 | from yaml import SafeLoader 15 | 16 | from osxcollector.output_filters.exceptions import MissingConfigError 17 | from osxcollector.output_filters.util.dict_utils import DictUtils 18 | 19 | 20 | def config_get_deep(key, default=None): 21 | """Reads from the config. 22 | 23 | Args: 24 | key: Dictionary key to lookup in config 25 | default: Value to return if key is not found 26 | Returns: 27 | Value from config or default if not found otherwise 28 | """ 29 | return DictUtils.get_deep(_read_config(), key, default) 30 | 31 | 32 | def _read_config(): 33 | """Reads and parses the YAML file. 34 | 35 | Returns: 36 | dict of config 37 | """ 38 | with open(_config_file_path()) as source: 39 | return yaml.load(source.read(), Loader=SafeLoader) 40 | 41 | 42 | def _config_file_path(): 43 | """Find the path to the config file. 44 | 45 | Returns: 46 | String file path 47 | Raises: 48 | MissingConfigError if no config file is found 49 | """ 50 | for loc in os.curdir, os.path.expanduser('~'), os.environ.get('OSXCOLLECTOR_CONF', ''): 51 | path = os.path.join(loc, 'osxcollector.yaml') 52 | if os.path.exists(path): 53 | return path 54 | raise MissingConfigError() 55 | -------------------------------------------------------------------------------- /tests/output_filters/util/error_messages_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.exceptions import BadDomainError 6 | from osxcollector.output_filters.util.error_messages import write_error_message 7 | from osxcollector.output_filters.util.error_messages import write_exception 8 | 9 | 10 | class TestWriteException: 11 | 12 | def test_simple_exception(self, capsys): 13 | try: 14 | raise Exception() 15 | except Exception as e: 16 | write_exception(e) 17 | 18 | output = capsys.readouterr().err 19 | assert 0 == output.find('[ERROR]') 20 | 21 | def test_specific_exception(self, capsys): 22 | try: 23 | raise BadDomainError() 24 | except Exception as e: 25 | write_exception(e) 26 | 27 | output = capsys.readouterr().err 28 | assert output.find('[ERROR] BadDomainError') == 0 29 | 30 | def test_exception_message(self, capsys): 31 | try: 32 | raise BadDomainError('Look for me in validation') 33 | except Exception as e: 34 | write_exception(e) 35 | 36 | output = capsys.readouterr().err 37 | assert output.find('[ERROR] BadDomainError Look for me in validation') == 0 38 | 39 | 40 | class TestWriteErrorMessage: 41 | 42 | def test_write_error_message(self, capsys): 43 | message = 'Look for me in validation' 44 | expected = '[ERROR] Look for me in validation\n' 45 | 46 | write_error_message(message) 47 | 48 | output = capsys.readouterr().err 49 | assert output == expected 50 | -------------------------------------------------------------------------------- /osxcollector/output_filters/summary_filters/summary.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import sys 6 | from collections import defaultdict 7 | 8 | import six 9 | 10 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 11 | 12 | 13 | class SummaryFilter(OutputFilter): 14 | """Base class for summary filters.""" 15 | 16 | def __init__(self, show_signature_chain=False, show_browser_ext=False, summary_output_file=None, group_by_iocs=False, group_key=None, **kwargs): 17 | super(SummaryFilter, self).__init__(**kwargs) 18 | self._iocs = [] 19 | self._iocs_by_key = defaultdict(list) 20 | self._vthash = [] 21 | self._vtdomain = [] 22 | self._opendns = [] 23 | self._alexarank = [] 24 | self._blacklist = [] 25 | self._related = [] 26 | self._signature_chain = [] 27 | self._extensions = [] 28 | self._show_signature_chain = show_signature_chain 29 | self._show_browser_ext = show_browser_ext 30 | self._group_by_iocs = group_by_iocs 31 | self._group_key = group_key 32 | 33 | self._add_to_blacklist = [] 34 | 35 | self._close_file = False 36 | 37 | self._open_output_stream(summary_output_file) 38 | 39 | def _open_output_stream(self, summary_output_file): 40 | if summary_output_file: 41 | if isinstance(summary_output_file, six.string_types): 42 | self._output_stream = open(summary_output_file, 'w') 43 | self._close_file = True 44 | else: 45 | # not a string, most likely already opened output stream 46 | self._output_stream = summary_output_file 47 | else: 48 | self._output_stream = sys.stdout 49 | 50 | def __del__(self): 51 | self._close_output_stream() 52 | 53 | def _close_output_stream(self): 54 | if self._close_file: 55 | self._output_stream.close() 56 | -------------------------------------------------------------------------------- /tests/output_filters/opendns/lookup_domains_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import simplejson 6 | from mock import patch 7 | from threat_intel.opendns import InvestigateApi 8 | 9 | from osxcollector.output_filters.opendns.lookup_domains import LookupDomainsFilter 10 | from tests.output_filters.run_filter_test import RunFilterTest 11 | 12 | 13 | class TestLookupDomainsFilter(RunFilterTest): 14 | 15 | def test_no_domains(self): 16 | input_blobs = [ 17 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}}, 18 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}}, 19 | ] 20 | 21 | self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs) 22 | 23 | def _read_json(self, file_name): 24 | with(open(file_name, 'r')) as fp: 25 | contents = fp.read() 26 | return simplejson.loads(contents) 27 | 28 | def test_no_security_information(self): 29 | input_blobs = [ 30 | {'osxcollector_domains': ['bingo.com', 'dingo.com', 'bango.com', 'dango.com'], 'banana': {'a': 11}}, 31 | ] 32 | file_name_pattern = 'tests/output_filters/data/opendns/lookup_domains/{0}' 33 | categorization = self._read_json(file_name_pattern.format('categorization.json')) 34 | security = self._read_json(file_name_pattern.format('security.json')) 35 | 36 | with patch.object( 37 | InvestigateApi, 'categorization', autospec=True, 38 | return_value=categorization, 39 | ), patch.object( 40 | InvestigateApi, 'security', autospec=True, return_value=security, 41 | ): 42 | output_blobs = self.run_test(LookupDomainsFilter, input_blobs=input_blobs) 43 | 44 | expected_categorization = self._read_json(file_name_pattern.format('expected.json')) 45 | self.assert_key_added_to_blob('osxcollector_opendns', expected_categorization, input_blobs, output_blobs) 46 | -------------------------------------------------------------------------------- /osxcollector/output_filters/util/domains.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Utilities for dealing with domain names 4 | # 5 | from __future__ import absolute_import 6 | from __future__ import unicode_literals 7 | 8 | import re 9 | 10 | import six 11 | import tldextract 12 | 13 | from osxcollector.output_filters.exceptions import BadDomainError 14 | 15 | 16 | def expand_domain(domain): 17 | """A generator that returns the input with and without the subdomain. 18 | 19 | Args: 20 | domain - string 21 | Returns: 22 | generator that returns strings 23 | """ 24 | extraction = tldextract.extract(domain) 25 | 26 | if extraction.subdomain: 27 | subdomain = '.'.join(extraction) 28 | yield subdomain 29 | 30 | fulldomain = '.'.join(extraction[1:]) 31 | yield fulldomain 32 | 33 | 34 | def clean_domain(unclean_domain): 35 | """Removing errant characters and stuff from a domain name. 36 | 37 | A bit of careful dancing with character encodings. Eventually, some consumer of the domain string is gonna 38 | deal with it as ASCII. Make sure to encode as ASCII explicitly, so ASCII encoding errors can be ignored. 39 | 40 | Args: 41 | unclean_domain: string 42 | Returns: 43 | string domain name 44 | Raises: 45 | BadDomainError - when a clean domain can't be made 46 | """ 47 | if not isinstance(unclean_domain, six.text_type): 48 | unclean_domain = unclean_domain.decode('utf-8', errors='ignore') 49 | 50 | unclean_domain = re.sub(r'^[^a-zA-Z0-9]*(.*?)[^a-zA-Z0-9]*$', r'\1', unclean_domain) 51 | 52 | extracted = tldextract.extract(unclean_domain) 53 | if bool(extracted.domain and extracted.suffix): 54 | start_index = 1 if not extracted.subdomain else 0 55 | domain = '.'.join(extracted[start_index:]).lstrip('.') 56 | return domain.encode('ascii', errors='ignore') if six.PY2 else \ 57 | domain.encode('utf8').decode('ascii', errors='ignore') 58 | 59 | raise BadDomainError(u'Can not clean {0} {1}'.format(unclean_domain, repr(extracted))) 60 | -------------------------------------------------------------------------------- /osxcollector/output_filters/shadowserver/lookup_hashes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # LookupHashesFilter uses ShadowServer to lookup the values in 'sha1' and add 'osxcollector_shadowserver' key. 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | import os.path 10 | 11 | from threat_intel.shadowserver import ShadowServerApi 12 | 13 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 14 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter 15 | from osxcollector.output_filters.util.config import config_get_deep 16 | 17 | 18 | class LookupHashesFilter(ThreatFeedFilter): 19 | 20 | """A class to lookup hashes using ShadowServer API.""" 21 | 22 | def __init__(self, lookup_when=None, **kwargs): 23 | super(LookupHashesFilter, self).__init__('sha1', 'osxcollector_shadowserver', lookup_when=lookup_when, **kwargs) 24 | 25 | def _lookup_iocs(self, all_iocs): 26 | """Looks up the ShadowServer info for a set of hashes. 27 | 28 | Args: 29 | all_iocs - a list of hashes. 30 | Returns: 31 | A dict with hash as key and threat info as value 32 | """ 33 | cache_file_name = config_get_deep('shadowserver.LookupHashesFilter.cache_file_name', None) 34 | ss = ShadowServerApi(cache_file_name=cache_file_name) 35 | return ss.get_bin_test(all_iocs) 36 | 37 | def _should_add_threat_info_to_blob(self, blob, threat_info): 38 | """Only add info from ShadowServer if the hash and the filename match. 39 | 40 | Args: 41 | blob - A dict of data representing a line of output from OSXCollector 42 | threat_info - The threat info from ShadowServer 43 | Returns: 44 | boolean 45 | """ 46 | blob_filename = os.path.split(blob.get('file_path', ''))[-1] 47 | return blob_filename == threat_info.get('filename', '') 48 | 49 | 50 | def main(): 51 | run_filter_main(LookupHashesFilter) 52 | 53 | 54 | if __name__ == '__main__': 55 | main() 56 | -------------------------------------------------------------------------------- /osxcollector/output_filters/util/dict_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | 6 | class DictUtils(object): 7 | 8 | """A set of method for manipulating dictionaries.""" 9 | 10 | @classmethod 11 | def _link_path_to_chain(cls, path): 12 | """Helper method for get_deep 13 | 14 | Args: 15 | path: A str representing a chain of keys separated '.' or an enumerable set of strings 16 | Returns: 17 | an enumerable set of strings 18 | """ 19 | if path == '': 20 | return [] 21 | elif type(path) in (list, tuple, set): 22 | return path 23 | else: 24 | return path.split('.') 25 | 26 | @classmethod 27 | def _get_deep_by_chain(cls, x, chain, default=None): 28 | """Grab data from a dict using a ['key1', 'key2', 'key3'] chain param to do deep traversal. 29 | 30 | Args: 31 | x: A dict 32 | chain: an enumerable set of strings 33 | default: A value to return if the path can not be found 34 | Returns: 35 | The value of the key or default 36 | """ 37 | if chain == []: 38 | return default 39 | try: 40 | for link in chain: 41 | try: 42 | x = x[link] 43 | except (KeyError, TypeError): 44 | x = x[int(link)] 45 | except (KeyError, TypeError, ValueError): 46 | x = default 47 | return x 48 | 49 | @classmethod 50 | def get_deep(cls, x, path='', default=None): 51 | """Grab data from a dict using a 'key1.key2.key3' path param to do deep traversal. 52 | 53 | Args: 54 | x: A dict 55 | path: A 'deep path' to retrieve in the dict 56 | default: A value to return if the path can not be found 57 | Returns: 58 | The value of the key or default 59 | """ 60 | chain = cls._link_path_to_chain(path) 61 | return cls._get_deep_by_chain(x, chain, default=default) 62 | -------------------------------------------------------------------------------- /tests/output_filters/data/test_osxcollector_config.yaml: -------------------------------------------------------------------------------- 1 | # The OpenDNSFilter requires an API key for OpenDNS Investigate 2 | api_key: 3 | opendns: "00FAABADF00D" 4 | virustotal: "00FAABADF00D" 5 | 6 | # The BlacklistFilter allows for multiple blacklists to be compared against at once 7 | # Each blacklists requires: 8 | # - blacklist_name, A name 9 | # - blacklist_keys, JSON paths. These can be of the form "a.b" to look at "b" in {"a": {"b": "foo"}} 10 | # - value_file, the path to a file containing values considered blacklisted. Any line starting with # is skipped 11 | # - blacklist_is_regex, should values in the file be treated as Python regex 12 | blacklists: 13 | - blacklist_name: "hashes" 14 | blacklist_keys: 15 | - "md5" 16 | - "sha1" 17 | - "sha2" 18 | blacklist_file_path: "./tests/output_filters/data/hashes_blacklist.txt" 19 | blacklist_is_regex: False 20 | - blacklist_name: "domains" 21 | blacklist_keys: 22 | - "osxcollector_domains" 23 | blacklist_file_path: "./tests/output_filters/data/domains_blacklist.txt" 24 | blacklist_is_domains: True 25 | blacklist_is_regex: True 26 | 27 | domain_whitelist: 28 | blacklist_name: "domain_whitelist" 29 | blacklist_keys: 30 | - "osxcollector_domains" 31 | blacklist_file_path: "./tests/output_filters/data/domains_whitelist.txt" 32 | blacklist_is_domains: True 33 | blacklist_is_regex: True 34 | 35 | shadowserver: 36 | LookupHashesFilter: 37 | cache_file_name: "./tests/output_filters/data/cache.shadowserver.LookupHashesFilter.json" 38 | 39 | virustotal: 40 | LookupHashesFilter: 41 | cache_file_name: "./tests/output_filters/data/cache.virustotal.LookupHashesFilter.json" 42 | LookupDomainsFilter: 43 | cache_file_name: "./tests/output_filters/data/cache.virustotal.LookupDomainsFilter.json" 44 | 45 | # No cache data is available or opendns yet 46 | opendns: 47 | # LookupDomainsFilter: 48 | # cache_file_name: "./tests/output_filters/data/cache.opendns.LookupDomainsFilter.json" 49 | RelatedDomainsFilter: 50 | cache_file_name: "./tests/output_filters/data/cache.opendns.RelatedDomainsFilter.json" 51 | -------------------------------------------------------------------------------- /tests/output_filters/data/opendns/lookup_domains/security.json: -------------------------------------------------------------------------------- 1 | { 2 | "bango.com": null, 3 | "dango.com": { 4 | "dga_score": 0, 5 | "geodiversity_normalized": [ 6 | [ 7 | "IS", 8 | 0.9996267573230843 9 | ], 10 | [ 11 | "US", 12 | 0.0003732426769157135 13 | ] 14 | ], 15 | "asn_score": -0.1608560065526172, 16 | "rip_score": 0, 17 | "securerank2": 0.04721624022600212, 18 | "popularity": 0, 19 | "geoscore": 0, 20 | "ks_test": 0, 21 | "prefix_score": 0, 22 | "attack": "", 23 | "pagerank": 0, 24 | "geodiversity": [ 25 | [ 26 | "IS", 27 | 0.5 28 | ], 29 | [ 30 | "US", 31 | 0.5 32 | ] 33 | ], 34 | "found": true, 35 | "perplexity": 0.3866686930931377, 36 | "entropy": 3.5351745656359026, 37 | "fastflux": false, 38 | "threat_type": "", 39 | "tld_geodiversity": [] 40 | }, 41 | "dingo.com": { 42 | "dga_score": 0, 43 | "geodiversity_normalized": [ 44 | [ 45 | "IS", 46 | 0.9996267573230843 47 | ], 48 | [ 49 | "US", 50 | 0.0003732426769157135 51 | ] 52 | ], 53 | "asn_score": -0.1608560065526172, 54 | "rip_score": 0, 55 | "securerank2": 0.04721624022600212, 56 | "popularity": 0, 57 | "geoscore": 0, 58 | "ks_test": 0, 59 | "prefix_score": 0, 60 | "attack": "", 61 | "pagerank": 0, 62 | "geodiversity": [ 63 | [ 64 | "IS", 65 | 0.5 66 | ], 67 | [ 68 | "US", 69 | 0.5 70 | ] 71 | ], 72 | "found": true, 73 | "perplexity": 0.3866686930931377, 74 | "entropy": 3.5351745656359026, 75 | "fastflux": false, 76 | "threat_type": "", 77 | "tld_geodiversity": [] 78 | }, 79 | "bingo.com": null 80 | } 81 | -------------------------------------------------------------------------------- /tests/output_filters/find_blacklisted_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.find_blacklisted import FindBlacklistedFilter 6 | from tests.output_filters.run_filter_test import RunFilterTest 7 | 8 | 9 | class TestFindBlacklistedFilter(RunFilterTest): 10 | 11 | def test_simple_hashes(self): 12 | input_blobs = [ 13 | {'md5': 'ffff5f60462c38b1d235cb3509876543'}, 14 | {'sha1': 'ffff234d2a50a42a87389f1234561a21'}, 15 | {'sha2': 'ffff51e77b442ee23188d87e4abcdef0'}, 16 | ] 17 | expected_blacklists = [ 18 | {'hashes': ['ffff5f60462c38b1d235cb3509876543']}, 19 | {'hashes': ['ffff234d2a50a42a87389f1234561a21']}, 20 | {'hashes': ['ffff51e77b442ee23188d87e4abcdef0']}, 21 | ] 22 | self._run_test(input_blobs, expected_blacklists) 23 | 24 | def test_no_hashes(self): 25 | input_blobs = [ 26 | # Not the right key 27 | {'apple': 'ffff5f60462c38b1d235cb3509876543'}, 28 | # Value not on blacklist 29 | {'sha1': 'aaaa234d2a50a42a87389f1234561a21'}, 30 | ] 31 | expected_blacklists = [ 32 | None, 33 | None, 34 | ] 35 | self._run_test(input_blobs, expected_blacklists) 36 | 37 | def test_simple_domains(self): 38 | input_blobs = [ 39 | {'osxcollector_domains': ['biz.example.com']}, 40 | {'osxcollector_domains': ['www.example.co.uk']}, 41 | {'osxcollector_domains': ['example.org']}, 42 | ] 43 | expected_blacklists = [ 44 | {'domains': ['example.com']}, 45 | {'domains': ['example.co.uk']}, 46 | {'domains': ['example.org']}, 47 | ] 48 | self._run_test(input_blobs, expected_blacklists) 49 | 50 | def _run_test(self, input_blobs, expected_blacklists): 51 | 52 | output_blobs = self.run_test(FindBlacklistedFilter, input_blobs) 53 | 54 | # added_key, expected_values, input_blobs, output_blobs 55 | self.assert_key_added_to_blob('osxcollector_blacklist', expected_blacklists, input_blobs, output_blobs) 56 | -------------------------------------------------------------------------------- /osxcollector/output_filters/firefox/find_extensions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # FindExtensionsFilter reads the Firefox JSON blobs and creates records about the extensions and plugins. 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 10 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 11 | from osxcollector.output_filters.util.dict_utils import DictUtils 12 | 13 | 14 | class FindExtensionsFilter(OutputFilter): 15 | 16 | """Reads the Firefox JSON blobs and creates records about the extensions and plugins. 17 | 18 | In the output look a line where: 19 | ('osxcollector_section' == 'firefox' and 'osxcollector_subsection' == 'json_files') 20 | and then parse the heck out of the extensions. 21 | """ 22 | 23 | def __init__(self, **kwargs): 24 | super(FindExtensionsFilter, self).__init__(**kwargs) 25 | self._new_lines = [] 26 | 27 | def filter_line(self, blob): 28 | if 'firefox' != blob.get('osxcollector_section') or 'json_files' != blob.get('osxcollector_subsection'): 29 | return blob 30 | 31 | if blob.get('osxcollector_json_file') not in ['addons.json', 'extensions.json']: 32 | return blob 33 | 34 | extensions_blobs = DictUtils.get_deep(blob, 'contents.addons', []) 35 | for addon in extensions_blobs: 36 | extension = { 37 | 'osxcollector_section': 'firefox', 38 | 'osxcollector_subsection': 'extensions', 39 | 'osxcollector_incident_id': blob['osxcollector_incident_id'], 40 | 'name': DictUtils.get_deep(addon, 'defaultLocale.name', addon.get('name')), 41 | 'description': DictUtils.get_deep(addon, 'defaultLocale.description', addon.get('description')), 42 | 'path': addon.get('id'), 43 | } 44 | if blob.get('osxcollector_username'): 45 | extension['osxcollector_username'] = blob['osxcollector_username'] 46 | 47 | self._new_lines.append(extension) 48 | 49 | return None 50 | 51 | def end_of_lines(self): 52 | return self._new_lines 53 | 54 | 55 | def main(): 56 | run_filter_main(FindExtensionsFilter) 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /osxcollector/output_filters/chrome/find_extensions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # FindExtensionsFilter reads the Chrome preferences JSON blob and creates records about the extensions and plugins. 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 10 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 11 | from osxcollector.output_filters.util.dict_utils import DictUtils 12 | 13 | 14 | class FindExtensionsFilter(OutputFilter): 15 | 16 | """Reads the Chrome preferences JSON blob and creates records about the extensions and plugins. 17 | 18 | In the output look a line where: 19 | ('osxcollector_section' == 'chrome' and 'osxcollector_subsection' == 'preferences') 20 | and then parse the heck out of the extensions. 21 | """ 22 | 23 | def __init__(self, **kwargs): 24 | super(FindExtensionsFilter, self).__init__(**kwargs) 25 | self._new_lines = [] 26 | 27 | def filter_line(self, blob): 28 | if 'chrome' != blob.get('osxcollector_section') or 'preferences' != blob.get('osxcollector_subsection'): 29 | return blob 30 | 31 | extensions_blob = DictUtils.get_deep(blob, 'contents.extensions.settings', {}) 32 | for key in extensions_blob: 33 | setting = extensions_blob[key] 34 | extension = { 35 | 'osxcollector_section': 'chrome', 36 | 'osxcollector_subsection': 'extensions', 37 | 'osxcollector_incident_id': blob['osxcollector_incident_id'], 38 | 'state': setting.get('state'), 39 | 'was_installed_by_default': setting.get('was_installed_by_default'), 40 | 'name': DictUtils.get_deep(setting, 'manifest.name'), 41 | 'description': DictUtils.get_deep(setting, 'manifest.description'), 42 | 'path': setting.get('path'), 43 | } 44 | if blob.get('osxcollector_username'): 45 | extension['osxcollector_username'] = blob['osxcollector_username'] 46 | 47 | self._new_lines.append(extension) 48 | 49 | return None 50 | 51 | def end_of_lines(self): 52 | return self._new_lines 53 | 54 | 55 | def main(): 56 | run_filter_main(FindExtensionsFilter) 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /osxcollector.yaml.example: -------------------------------------------------------------------------------- 1 | api_key: 2 | # The OpenDNSFilter requires an API key for OpenDNS Investigate 3 | opendns: "ADD YOUR KEY" 4 | 5 | # The VTHashesFilter requires an API key for VirusTotal 6 | virustotal: "ADD YOUR KEY" 7 | 8 | # The BlacklistFilter allows for multiple blacklists to be compared against at once 9 | # Each blacklists requires: 10 | # - blacklist_name, A name 11 | # - blacklist_keys, JSON paths. These can be of the form "a.b" to look at "b" in {"a": {"b": "foo"}} 12 | # - blacklist_file_path, the path to a file containing values considered blacklisted. Any line starting with # is skipped 13 | # - blacklist_is_regex, should values in the file be treated as Python regex 14 | # - blacklist_is_domains, should values in the file be treated as domains and analyzed with some smart regex to retrieve subdomain etc. 15 | blacklists: 16 | - blacklist_name: "hashes" 17 | blacklist_keys: 18 | - "md5" 19 | - "sha1" 20 | - "sha2" 21 | blacklist_file_path: "/tmp/hash_blacklist.txt" 22 | blacklist_is_regex: False 23 | - blacklist_name: "domains" 24 | blacklist_keys: 25 | - "osxcollector_domains" 26 | blacklist_file_path: "/tmp/domain_blacklist.txt" 27 | blacklist_is_regex: False 28 | blacklist_is_domains: True 29 | 30 | # domain_whitelist is a special blacklist entry. Anything on this list won't be looked up with 31 | # OpenDNS or VirusTotal 32 | domain_whitelist: 33 | blacklist_name: "Ignore Domains" 34 | blacklist_keys: 35 | - "osxcollector_domains" 36 | blacklist_file_path: "/tmp/domain_whitelist.txt" 37 | blacklist_is_domains: True 38 | blacklist_is_regex: True 39 | 40 | opendns: 41 | LookupDomainsFilter: 42 | cache_file_name: "/tmp/cache.opendns.LookupDomainsFilter.json" 43 | RelatedDomainsFilter: 44 | cache_file_name: "/tmp/cache.opendns.RelatedDomainsFilter.json" 45 | 46 | shadowserver: 47 | LookupHashesFilter: 48 | cache_file_name: "/tmp/cache.shadowserver.LookupHashesFilter.json" 49 | 50 | virustotal: 51 | LookupHashesFilter: 52 | cache_file_name: "/tmp/cache.virustotal.LookupHashesFilter.json" 53 | LookupDomainsFilter: 54 | cache_file_name: "/tmp/cache.virustotal.LookupDomainsFilter.json" 55 | LookupURLsFilter: 56 | cache_file_name: "/tmp/cache.virustotal.LookupURLsFilter.json" 57 | resources_per_req: 4 58 | 59 | alexa: 60 | LookupRankingsFilter: 61 | cache_file_name: "/tmp/cache.alexa.LookupRankingsFilter.json" 62 | -------------------------------------------------------------------------------- /osxcollector/output_filters/alexa/lookup_rankings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # AlexaRankingFilter uses the AWIS API to lookup Alexa traffic rankings. 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | from threat_intel.alexaranking import AlexaRankingApi 10 | 11 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 12 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter 13 | from osxcollector.output_filters.util.config import config_get_deep 14 | 15 | 16 | class LookupRankingsFilter(ThreatFeedFilter): 17 | 18 | """A class to lookup traffic rankings using AWIS API.""" 19 | 20 | def __init__(self, lookup_when=None, **kwargs): 21 | super(LookupRankingsFilter, self).__init__( 22 | 'osxcollector_domains', 23 | 'osxcollector_alexa_rank', 24 | lookup_when=lookup_when, 25 | name_of_api_key=None, 26 | **kwargs 27 | ) 28 | 29 | def _lookup_iocs(self, domains, resource_per_req=25): 30 | """Caches the Alexa ranking info for a set of domains. 31 | 32 | Args: 33 | domains - a list of domains. 34 | Returns: 35 | A dict with domain as key and threat info as value 36 | """ 37 | traffic_info = {} 38 | 39 | cache_file_name = config_get_deep('alexa.LookupRankingsFilter.cache_file_name', None) 40 | ar = AlexaRankingApi(resource_per_req, cache_file_name=cache_file_name) 41 | 42 | iocs = domains 43 | reports = ar.get_alexa_rankings(iocs) 44 | for domain in reports: 45 | report = reports[domain] 46 | if report and self._should_store_ioc_info(report): 47 | traffic_info[domain] = report 48 | 49 | return traffic_info 50 | 51 | def _should_store_ioc_info(self, report): 52 | """Only store if traffic ranking passes a certain threshold. 53 | 54 | Args: 55 | report - a dict from get_alexa_rankings 56 | Returns: 57 | booleans 58 | """ 59 | # Always include Alexa ranking information since we do not yet 60 | # categorize by it for the output summaries, rather just use it as an 61 | # additional source of threat intel. 62 | return True 63 | 64 | 65 | def main(): 66 | run_filter_main(LookupRankingsFilter) 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /tests/output_filters/util/domains_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import pytest 6 | 7 | from osxcollector.output_filters.exceptions import BadDomainError 8 | from osxcollector.output_filters.util.domains import clean_domain 9 | from osxcollector.output_filters.util.domains import expand_domain 10 | 11 | 12 | class TestCleanDomain: 13 | 14 | def _test_clean_domain(self, dirty_domain, expected): 15 | domain = clean_domain(dirty_domain) 16 | assert domain == expected 17 | 18 | def test_trailing_and_leading_dots(self): 19 | self._test_clean_domain('.www.example.com.', 'www.example.com') 20 | 21 | def test_trailing_and_leading_slashes(self): 22 | self._test_clean_domain('//www.example.com//', 'www.example.com') 23 | 24 | def test_unicode_prefix(self): 25 | self._test_clean_domain('\xadwww.example.com', 'www.example.com') 26 | 27 | def test_unicode_prefix2(self): 28 | self._test_clean_domain(u'\xadwww.example.com', 'www.example.com') 29 | 30 | def test_unicode_mid(self): 31 | self._test_clean_domain('stinkum.\xadexample.com', 'stinkum.example.com') 32 | 33 | def test_unicode_mid2(self): 34 | self._test_clean_domain(u'stinkum.\xadexample.com', 'stinkum.example.com') 35 | 36 | def test_punicoded(self): 37 | # TODO: OSXCollector is confused by stuff that ought to be punycode... or something 38 | self._test_clean_domain('hotmaıll.com', 'hotmall.com') 39 | 40 | def test_unicode_punicoded(self): 41 | self._test_clean_domain(u'hotmaıll.com', 'hotmall.com') 42 | 43 | def test_single_word(self): 44 | with pytest.raises(BadDomainError): 45 | clean_domain('oneword') 46 | 47 | 48 | class TestExpandDomain: 49 | 50 | def _test_expand_domain(self, initial_domain, expected): 51 | expanded = list(expand_domain(initial_domain)) 52 | assert sorted(expanded) == sorted(expected) 53 | 54 | def test_simple_subdomain(self): 55 | self._test_expand_domain('www.example.com', ['example.com', 'www.example.com']) 56 | 57 | def test_no_subdomain(self): 58 | self._test_expand_domain('example.com', ['example.com']) 59 | 60 | def test_complex_subdomain(self): 61 | self._test_expand_domain('www.foo.bar.whiz.example.com', ['example.com', 'www.foo.bar.whiz.example.com']) 62 | 63 | def test_unicode_subdomain(self): 64 | self._test_expand_domain('www.jobbörse.com', ['www.jobbörse.com', 'jobbörse.com']) 65 | -------------------------------------------------------------------------------- /osxcollector/output_filters/find_blacklisted.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # FindBlacklistedFilter adds 'osxcollector_blacklist' key to lines matching a blacklist. 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 10 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 11 | from osxcollector.output_filters.util.blacklist import create_blacklist 12 | from osxcollector.output_filters.util.config import config_get_deep 13 | 14 | 15 | class FindBlacklistedFilter(OutputFilter): 16 | 17 | """Adds 'osxcollector_blacklist' key to lines matching a blacklist. 18 | 19 | This filters compares each line to a set of blacklists and marks lines that match the blacklist. 20 | This is proving useful for filtering known hashes, known bad filenames, known bad domains, etc. 21 | 22 | Configuration Keys: 23 | blacklist_name - [REQUIRED] the name of the blacklist 24 | blacklist_keys - [REQUIRED] get the value of these keys and compare against the blacklist 25 | blacklist_is_regex - [REQUIRED] should the values in the blacklist file be treated as regex 26 | blacklist_file_path - [REQUIRED if no blacklist_data_feed] path to a file with the actual values to blacklist 27 | blacklist_data_feed - [REQUIRED if no blacklist_file_path] name of the data feed from which data is read 28 | blacklist_is_domains - [OPTIONAL] interpret values as domains and do some smart regex and subdomain stuff with them 29 | """ 30 | 31 | def __init__(self, **kwargs): 32 | super(FindBlacklistedFilter, self).__init__(**kwargs) 33 | data_feeds = kwargs.get('data_feeds', {}) 34 | self._blacklists = [ 35 | create_blacklist(config_chunk, data_feeds) 36 | for config_chunk in config_get_deep('blacklists') 37 | ] 38 | 39 | def filter_line(self, blob): 40 | """Find blacklisted values in a line. 41 | 42 | Lines are never cached, every line in produces a line out. 43 | """ 44 | for blacklist in self._blacklists: 45 | matching_term = blacklist.match_line(blob) 46 | if matching_term: 47 | blob.setdefault('osxcollector_blacklist', {}) 48 | blob['osxcollector_blacklist'].setdefault(blacklist.name, []) 49 | blob['osxcollector_blacklist'][blacklist.name].append(matching_term) 50 | break 51 | 52 | return blob 53 | 54 | 55 | def main(): 56 | run_filter_main(FindBlacklistedFilter) 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /tests/output_filters/alexa/lookup_domains_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.virustotal.lookup_domains import LookupDomainsFilter 6 | from tests.output_filters.run_filter_test import RunFilterTest 7 | 8 | 9 | class TestLookupDomainsFilter(RunFilterTest): 10 | 11 | def test_no_domains(self): 12 | input_blobs = [ 13 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}}, 14 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}}, 15 | ] 16 | 17 | self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs) 18 | 19 | def test_benign_domains(self): 20 | input_blobs = [ 21 | {'osxcollector_domains': ['good.example.com'], 'dingo': 'bingo', 'apple': [3, 14]}, 22 | {'osxcollector_domains': ['good.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'}, 23 | ] 24 | 25 | self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs) 26 | 27 | def test_suspicious_domains(self): 28 | input_blobs = [ 29 | {'osxcollector_domains': ['evil.example.com'], 'dingo': 'bingo', 'apple': [3, 14]}, 30 | {'osxcollector_domains': ['evil.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'}, 31 | ] 32 | expected_vtdomains = [ 33 | [ 34 | { 35 | 'domain': 'evil.example.com', 36 | 'response_code': 1, 37 | 'detections': { 38 | 'undetected_referrer_samples': 0, 39 | 'undetected_communicating_samples': 0, 40 | 'detected_downloaded_samples': 5, 41 | 'detected_referrer_samples': 5, 42 | 'detected_communicating_samples': 5, 43 | 'detected_urls': 5, 44 | }, 45 | 'categorization': {}, 46 | }, 47 | ], 48 | [ 49 | { 50 | 'domain': 'evil.example.co.uk', 51 | 'response_code': 1, 52 | 'detections': { 53 | 'undetected_referrer_samples': 0, 54 | 'undetected_communicating_samples': 0, 55 | 'detected_downloaded_samples': 4, 56 | 'detected_referrer_samples': 5, 57 | 'detected_communicating_samples': 5, 58 | 'detected_urls': 6, 59 | }, 60 | 'categorization': {}, 61 | }, 62 | ], 63 | ] 64 | output_blobs = self.run_test(LookupDomainsFilter, input_blobs=input_blobs) 65 | self.assert_key_added_to_blob('osxcollector_vtdomain', expected_vtdomains, input_blobs, output_blobs) 66 | -------------------------------------------------------------------------------- /tests/output_filters/virustotal/lookup_domains_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.virustotal.lookup_domains import LookupDomainsFilter 6 | from tests.output_filters.run_filter_test import RunFilterTest 7 | 8 | 9 | class TestLookupDomainsFilter(RunFilterTest): 10 | 11 | def test_no_domains(self): 12 | input_blobs = [ 13 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}}, 14 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}}, 15 | ] 16 | 17 | self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs) 18 | 19 | def test_benign_domains(self): 20 | input_blobs = [ 21 | {'osxcollector_domains': ['good.example.com'], 'dingo': 'bingo', 'apple': [3, 14]}, 22 | {'osxcollector_domains': ['good.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'}, 23 | ] 24 | 25 | self.run_test(LookupDomainsFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs) 26 | 27 | def test_suspicious_domains(self): 28 | input_blobs = [ 29 | {'osxcollector_domains': ['evil.example.com'], 'dingo': 'bingo', 'apple': [3, 14]}, 30 | {'osxcollector_domains': ['evil.example.co.uk'], 'bingo': 'bongo', 'orange': 'banana'}, 31 | ] 32 | expected_vtdomains = [ 33 | [ 34 | { 35 | 'domain': 'evil.example.com', 36 | 'response_code': 1, 37 | 'detections': { 38 | 'undetected_referrer_samples': 0, 39 | 'undetected_communicating_samples': 0, 40 | 'detected_downloaded_samples': 5, 41 | 'detected_referrer_samples': 5, 42 | 'detected_communicating_samples': 5, 43 | 'detected_urls': 5, 44 | }, 45 | 'categorization': {}, 46 | }, 47 | ], 48 | [ 49 | { 50 | 'domain': 'evil.example.co.uk', 51 | 'response_code': 1, 52 | 'detections': { 53 | 'undetected_referrer_samples': 0, 54 | 'undetected_communicating_samples': 0, 55 | 'detected_downloaded_samples': 4, 56 | 'detected_referrer_samples': 5, 57 | 'detected_communicating_samples': 5, 58 | 'detected_urls': 6, 59 | }, 60 | 'categorization': {}, 61 | }, 62 | ], 63 | ] 64 | output_blobs = self.run_test(LookupDomainsFilter, input_blobs=input_blobs) 65 | self.assert_key_added_to_blob('osxcollector_vtdomain', expected_vtdomains, input_blobs, output_blobs) 66 | -------------------------------------------------------------------------------- /osxcollector/output_filters/virustotal/lookup_hashes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # LookupHashesFilter uses VirusTotal to lookup the values in 'sha2' and add 'osxcollector_vthash' key. 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | from threat_intel.virustotal import VirusTotalApi 10 | 11 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 12 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter 13 | from osxcollector.output_filters.util.config import config_get_deep 14 | 15 | 16 | class LookupHashesFilter(ThreatFeedFilter): 17 | 18 | """A class to lookup hashes using VirusTotal API.""" 19 | 20 | def __init__(self, lookup_when=None, **kwargs): 21 | super(LookupHashesFilter, self).__init__( 22 | 'sha2', 23 | 'osxcollector_vthash', lookup_when=lookup_when, 24 | name_of_api_key='virustotal', **kwargs 25 | ) 26 | 27 | def _lookup_iocs(self, all_iocs, resource_per_req=25): 28 | """Caches the VirusTotal info for a set of hashes. 29 | 30 | Args: 31 | all_iocs - a list of hashes. 32 | Returns: 33 | A dict with hash as key and threat info as value 34 | """ 35 | threat_info = {} 36 | 37 | cache_file_name = config_get_deep('virustotal.LookupHashesFilter.cache_file_name', None) 38 | vt = VirusTotalApi(self._api_key, resource_per_req, cache_file_name=cache_file_name) 39 | reports = vt.get_file_reports(all_iocs) 40 | 41 | for hash_val in reports: 42 | report = reports[hash_val] 43 | if not report: 44 | continue 45 | if self._should_store_ioc_info(report): 46 | threat_info[hash_val] = self._trim_hash_report(report) 47 | 48 | return threat_info 49 | 50 | def _should_store_ioc_info(self, report, min_hits=1): 51 | """Only store if the hash has > min_hits positive detections. 52 | 53 | Args: 54 | report - A dict response from get_file_reports 55 | min_hits - Minimum number of VT positives 56 | Returns: 57 | boolean 58 | """ 59 | return 1 == report.get('response_code') and min_hits < report.get('positives', 0) 60 | 61 | def _trim_hash_report(self, report): 62 | """Copy just the required keys from the report into a new report. 63 | 64 | Args: 65 | report - A dict response from get_file_reports 66 | Returns: 67 | A smaller dict 68 | """ 69 | copy_keys = [ 70 | 'scan_id', 71 | 'sha1', 72 | 'sha256', 73 | 'md5', 74 | 'scan_date', 75 | 'permalink', 76 | 'positives', 77 | 'total', 78 | 'response_code', 79 | ] 80 | 81 | return dict([(key, report.get(key)) for key in copy_keys]) 82 | 83 | 84 | def main(): 85 | run_filter_main(LookupHashesFilter) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /tests/output_filters/virustotal/lookup_hashes_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.virustotal.lookup_hashes import LookupHashesFilter 6 | from tests.output_filters.run_filter_test import RunFilterTest 7 | 8 | 9 | class TestLookupHashesFilter(RunFilterTest): 10 | 11 | def test_no_hashes(self): 12 | input_blobs = [ 13 | {'fungo': 'dingo', 'bingo': [11, 37], 'banana': {'a': 11}}, 14 | {'span': 'div', 'head': ['tail', 22], 'orange': {'lemmon': 'zits'}}, 15 | ] 16 | self.run_test(LookupHashesFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs) 17 | 18 | def test_benign_hashes(self): 19 | input_blobs = [ 20 | {'sha2': 'b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7', 'dingo': 'bingo', 'apple': [3, 14]}, 21 | {'sha2': '52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c', 'bingo': 'bongo', 'orange': 'banana'}, 22 | ] 23 | self.run_test(LookupHashesFilter, input_blobs=input_blobs, expected_output_blobs=input_blobs) 24 | 25 | def test_suspicious_hashes(self): 26 | input_blobs = [ 27 | {'sha2': 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302', 'dingo': 'bingo', 'apple': [3, 14]}, 28 | {'sha2': '6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360', 'bingo': 'bongo', 'orange': 'banana'}, 29 | ] 30 | expected_vthashes = [ 31 | [ 32 | { 33 | 'scan_id': 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302-1273894724', 34 | 'sha1': 'da9b79f2fd33d002033b69a9a346af4671a9e16b', 35 | 'sha256': 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302', 36 | 'md5': '0c71d8cedc8bbb2b619a76d1478c4348', 37 | 'scan_date': '2015-01-15 16:42:01', 38 | 'permalink': 'https://www.virustotal.com/file/' 39 | 'b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302/analysis/1273894724/', 40 | 'total': 40, 41 | 'positives': 40, 42 | 'response_code': 1, 43 | }, 44 | ], 45 | [ 46 | { 47 | 'scan_id': '52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c-1273894724', 48 | 'sha1': '92e3750a9f0eef6290dd83867eff88064e9c01bb', 49 | 'sha256': '6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360', 50 | 'md5': '06506cc06cf0167ea583de62c98eae2c', 51 | 'scan_date': '2010-05-15 03:38:44', 52 | 'permalink': 'https://www.virustotal.com/file/' 53 | '6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360/analysis/1273894724/', 54 | 'total': 40, 55 | 'positives': 40, 56 | 'response_code': 1, 57 | }, 58 | ], 59 | ] 60 | output_blobs = self.run_test(LookupHashesFilter, input_blobs=input_blobs) 61 | self.assert_key_added_to_blob('osxcollector_vthash', expected_vthashes, input_blobs, output_blobs) 62 | -------------------------------------------------------------------------------- /osxcollector/output_filters/firefox/sort_history.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # SortHistoryFilter creates a clean sorted Firefox browser history and tags lines with {'osxcollector_browser_history': 'firefox'} 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | import copy 10 | 11 | import six 12 | 13 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 14 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 15 | 16 | 17 | class SortHistoryFilter(OutputFilter): 18 | 19 | """Joins Firefox browser history 'visits' and 'urls' tables, producing a time sorted browser history. 20 | 21 | In the output look for lines where: 22 | ('osxcollector_section' == 'chrome' and 'osxcollector_subsection' == 'history' and 'osxcollector_table_name' == 'visits') 23 | for some snazzy browser history stuff. 24 | """ 25 | 26 | def __init__(self, **kwargs): 27 | super(SortHistoryFilter, self).__init__(**kwargs) 28 | 29 | self._visits_table = dict() 30 | self._places_table = dict() 31 | 32 | def filter_line(self, blob): 33 | """Cache the 'visits' and 'urls' tables.""" 34 | if 'firefox' == blob.get('osxcollector_section') and 'history' == blob.get('osxcollector_subsection'): 35 | table = blob.get('osxcollector_table_name') 36 | 37 | if 'moz_historyvisits' == table: 38 | if self._validate_visit(blob): 39 | self._visits_table[blob['place_id']] = blob 40 | blob = None # Consume the line 41 | elif 'moz_places' == table: 42 | if self._validate_places(blob): 43 | self._places_table[blob['id']] = blob 44 | blob = None # Consume the line 45 | 46 | return blob 47 | 48 | def end_of_lines(self): 49 | """Join the 'visits' and 'urls' tables into a single browser history and timeline.""" 50 | history = list() 51 | 52 | for visit in six.itervalues(self._visits_table): 53 | place = self._places_table.get(visit.get('place_id')) 54 | if place: 55 | add_keys = [key for key in visit if key not in place] 56 | record = copy.deepcopy(place) 57 | for key in add_keys: 58 | record[key] = visit[key] 59 | record['osxcollector_browser_history'] = 'firefox' 60 | history.append(record) 61 | 62 | return sorted(history, key=lambda x: x['last_visit_date'], reverse=True) 63 | 64 | @classmethod 65 | def _validate_visit(cls, blob): 66 | """Does the visit dict have the required fields? 67 | 68 | Args: 69 | blob: a visit dict 70 | Returns: 71 | boolean 72 | """ 73 | required_fields = ['place_id'] 74 | return all([field in blob for field in required_fields]) 75 | 76 | @classmethod 77 | def _validate_places(cls, blob): 78 | """Does the place dict have the required fields? 79 | 80 | Args: 81 | blob: a place dict 82 | Returns: 83 | boolean 84 | """ 85 | required_fields = ['id'] 86 | return all([field in blob for field in required_fields]) 87 | 88 | 89 | def main(): 90 | run_filter_main(SortHistoryFilter) 91 | 92 | 93 | if __name__ == '__main__': 94 | main() 95 | -------------------------------------------------------------------------------- /tests/output_filters/shadowserver/lookup_hashes_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.shadowserver.lookup_hashes import LookupHashesFilter 6 | from tests.output_filters.run_filter_test import RunFilterTest 7 | 8 | 9 | class TestLookupHashesFilter(RunFilterTest): 10 | 11 | def setup_method(self, method): 12 | self._known_sha1_input = [ 13 | { 14 | 'sha2': '1fafe48f626fdc030b0a0efc1008d51cd3078d1b3ec95f808d12afbfef458b23', 15 | 'sha1': '5d87de61cb368c93325dd910c202b8647f8e90ed', 16 | 'ctime': '2014-12-05 16:50:48', 17 | 'osxcollector_plist_path': '/System/Library/Extensions/System.kext/PlugIns/Libkern.kext/Info.plist', 18 | 'mtime': '2014-09-19 00:42:35', 19 | 'osxcollector_incident_id': 'RecalibratedTurnip-2014_12_21-18_49_52', 20 | 'osxcollector_section': 'kext', 21 | 'osxcollector_bundle_id': 'com.apple.kpi.libkern', 22 | 'file_path': '/System/Library/Extensions/System.kext/PlugIns/Libkern.kext/Libkern', 23 | 'md5': '6746005c822ceb6737b871698d3ed22f', 24 | }, 25 | ] 26 | self._unknown_sha1_input = [ 27 | { 28 | 'sha2': '5148211a7bc4a5d02913b0037805f20704f329e1739b5a6d2338fc84c1780b71', 29 | 'sha1': '816a85d89ae34d2dc73b8c768eecb03935c568ba', 30 | 'ctime': '2014-12-05 16:53:07', 31 | 'osxcollector_plist_path': '/System/Library/Extensions/AMDRadeonX3000GLDriver.bundle/Contents/Info.plist', 32 | 'mtime': '2014-09-28 22:34:42', 33 | 'osxcollector_incident_id': 'RecalibratedTurnip-2014_12_21-18_49_52', 34 | 'osxcollector_section': 'kext', 35 | 'osxcollector_bundle_id': 'com.apple.AMDRadeonX3000GLDriver', 36 | 'file_path': '/System/Library/Extensions/AMDRadeonX3000GLDriver.bundle/Contents/MacOS/AMDRadeonX3000GLDriver', 37 | 'md5': '967698d9ad4171bed991df85e1c72e56', 38 | }, 39 | ] 40 | 41 | def test_no_match(self): 42 | output_blobs = self.run_test(LookupHashesFilter, self._unknown_sha1_input) 43 | assert len(output_blobs) == 1 44 | assert 'osxcollector_shadowserver' not in output_blobs[0] 45 | 46 | def test_known_match(self): 47 | output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input) 48 | assert len(output_blobs) == 1 49 | assert 'osxcollector_shadowserver' in output_blobs[0] 50 | 51 | def test_known_match_different_path_prefix(self): 52 | self._known_sha1_input[0]['file_path'] = '/new_path/Libkern' 53 | output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input) 54 | assert len(output_blobs) == 1 55 | assert 'osxcollector_shadowserver' in output_blobs[0] 56 | 57 | def test_wrong_filename(self): 58 | """Change the filename and don't match""" 59 | self._known_sha1_input[0]['file_path'] = 'wrong_name' 60 | output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input) 61 | assert len(output_blobs) == 1 62 | assert 'osxcollector_shadowserver' not in output_blobs[0] 63 | 64 | def test_partial_filename(self): 65 | """Change the filename and don't match""" 66 | self._known_sha1_input[0]['file_path'] = '/System/Library/Extensions/System.kext/PlugIns/Libkern.kext/Not_Quite_Libkern' 67 | output_blobs = self.run_test(LookupHashesFilter, self._known_sha1_input) 68 | assert len(output_blobs) == 1 69 | assert 'osxcollector_shadowserver' not in output_blobs[0] 70 | -------------------------------------------------------------------------------- /osxcollector/output_filters/virustotal/lookup_urls.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # LookupURLsFilter uses VirusTotal to lookup the URLs in 'LSQuarantineDataURLString' and add 'osxcollector_vturl' key. 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | import re 10 | 11 | from threat_intel import VirusTotalApi 12 | 13 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 14 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter 15 | from osxcollector.output_filters.util.config import config_get_deep 16 | 17 | 18 | class LookupURLsFilter(ThreatFeedFilter): 19 | 20 | """A class to find suspicious URLs using VirusTotal API.""" 21 | 22 | SCHEMES = re.compile('https?') 23 | 24 | def __init__(self, lookup_when=None, **kwargs): 25 | lookup_when_url_scheme_matches = self._generate_lookup_when(lookup_when) 26 | super(LookupURLsFilter, self).__init__( 27 | 'LSQuarantineDataURLString', 'osxcollector_vturl', 28 | lookup_when=lookup_when_url_scheme_matches, 29 | name_of_api_key='virustotal', **kwargs 30 | ) 31 | 32 | def _generate_lookup_when(self, only_lookup_when): 33 | """Generates functions that checks whether the blob contains a valid URL 34 | in LSQuarantineDataURLString field. 35 | """ 36 | def check_url_scheme(blob): 37 | return self.SCHEMES.match(blob['LSQuarantineDataURLString']) and (not only_lookup_when or only_lookup_when(blob)) 38 | return check_url_scheme 39 | 40 | def _lookup_iocs(self, all_iocs, resource_per_req=25): 41 | """Caches the VirusTotal info for a set of URLs. 42 | 43 | Args: 44 | all_iocs - a list of URLs. 45 | Returns: 46 | A dict with URL as key and threat info as value 47 | """ 48 | threat_info = {} 49 | 50 | cache_file_name = config_get_deep('virustotal.LookupURLsFilter.cache_file_name', None) 51 | vt = VirusTotalApi(self._api_key, resource_per_req, cache_file_name=cache_file_name) 52 | reports = vt.get_url_reports(all_iocs) 53 | 54 | for url in reports: 55 | report = reports[url] 56 | if not report: 57 | continue 58 | if self._should_store_ioc_info(report): 59 | threat_info[url] = self._trim_url_report(report) 60 | 61 | return threat_info 62 | 63 | def _should_store_ioc_info(self, report, min_hits=1): 64 | """Only store if the hash has > min_hits positive detections. 65 | 66 | Args: 67 | report - A dict response from get_url_reports 68 | min_hits - Minimum number of VT positives 69 | Returns: 70 | boolean 71 | """ 72 | return 1 == report.get('response_code') and min_hits < report.get('positives', 0) 73 | 74 | def _trim_url_report(self, report): 75 | """Copy just the required keys from the report into a new report. 76 | 77 | Args: 78 | report - A dict response from get_url_reports 79 | Returns: 80 | A smaller dict 81 | """ 82 | copy_keys = [ 83 | 'scan_id', 84 | 'resource', 85 | 'url', 86 | 'scan_date', 87 | 'permalink', 88 | 'positives', 89 | 'total', 90 | 'response_code', 91 | ] 92 | 93 | return dict([(key, report.get(key)) for key in copy_keys]) 94 | 95 | 96 | def main(): 97 | run_filter_main(LookupURLsFilter) 98 | 99 | 100 | if __name__ == '__main__': 101 | main() 102 | -------------------------------------------------------------------------------- /tests/output_filters/opendns/related_domains_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.opendns.related_domains import RelatedDomainsFilter 6 | from tests.output_filters.run_filter_test import RunFilterTest 7 | 8 | 9 | class TestRelatedDomainsFilter(RunFilterTest): 10 | 11 | def setup_method(self, method): 12 | self._initial_domains = ['zendesk.com', 'jpmorganaccess.com', 'opendns.zendesk.com', 'yelp.com'] 13 | self._initial_ips = ['159.53.60.177'] 14 | 15 | def _run_test(self, input_blobs, expected_relateddomains): 16 | def create_filter(): 17 | return RelatedDomainsFilter(initial_domains=self._initial_domains, initial_ips=self._initial_ips) 18 | output_blobs = self.run_test(create_filter, input_blobs=input_blobs) 19 | self.assert_key_added_to_blob('osxcollector_related', expected_relateddomains, input_blobs, output_blobs) 20 | 21 | def test_no_domains(self): 22 | input_blobs = [ 23 | {'tater': 'tots'}, 24 | ] 25 | expected_relateddomains = None 26 | self._run_test(input_blobs, expected_relateddomains) 27 | 28 | def test_direct_domain_match(self): 29 | # Direct meaning the domain in the input is an initial domain 30 | input_blobs = [ 31 | {'osxcollector_domains': ['opendns.zendesk.com']}, 32 | ] 33 | expected_relateddomains = [ 34 | { 35 | 'domains': {'opendns.zendesk.com': ['opendns.zendesk.com']}, 36 | }, 37 | ] 38 | self._run_test(input_blobs, expected_relateddomains) 39 | 40 | def test_related_domain_match(self): 41 | input_blobs = [ 42 | {'osxcollector_domains': ['webmd.com']}, 43 | ] 44 | expected_relateddomains = [ 45 | { 46 | 'domains': {'webmd.com': ['opendns.zendesk.com', 'zendesk.com']}, 47 | }, 48 | ] 49 | self._run_test(input_blobs, expected_relateddomains) 50 | 51 | def test_multiple_related_domain_match(self): 52 | input_blobs = [ 53 | {'osxcollector_domains': ['webmd.com', 'hushmail.zendesk.com']}, 54 | ] 55 | expected_relateddomains = [ 56 | { 57 | 'domains': 58 | { 59 | 'webmd.com': ['opendns.zendesk.com', 'zendesk.com'], 60 | 'hushmail.zendesk.com': ['opendns.zendesk.com'], 61 | }, 62 | }, 63 | ] 64 | self._run_test(input_blobs, expected_relateddomains) 65 | 66 | def test_direct_and_related_domain_match(self): 67 | input_blobs = [ 68 | {'osxcollector_domains': ['zendesk.com']}, 69 | ] 70 | expected_relateddomains = [ 71 | { 72 | 'domains': {'zendesk.com': ['opendns.zendesk.com', 'zendesk.com']}, 73 | }, 74 | ] 75 | self._run_test(input_blobs, expected_relateddomains) 76 | 77 | def test_direct_ip_match(self): 78 | input_blobs = [ 79 | {'osxcollector_domains': ['jpmorganaccess.com']}, 80 | ] 81 | expected_relateddomains = [ 82 | { 83 | 'domains': {'jpmorganaccess.com': ['159.53.60.177', 'jpmorganaccess.com', 'opendns.zendesk.com', 'zendesk.com']}, 84 | }, 85 | ] 86 | self._run_test(input_blobs, expected_relateddomains) 87 | 88 | def test_whitelist_domain(self): 89 | input_blobs = [ 90 | {'osxcollector_domains': ['yelp.com']}, 91 | ] 92 | expected_relateddomains = [ 93 | None, 94 | ] 95 | self._run_test(input_blobs, expected_relateddomains) 96 | -------------------------------------------------------------------------------- /osxcollector/output_filters/related_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # RelatedFilesFilter finds files related to specific terms or file names. 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | import os.path 10 | from argparse import ArgumentParser 11 | 12 | import simplejson 13 | 14 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 15 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 16 | from osxcollector.output_filters.util.dict_utils import DictUtils 17 | 18 | 19 | class RelatedFilesFilter(OutputFilter): 20 | 21 | """RelatedFilesFilter finds files related to specific terms or file names. 22 | 23 | The file paths passed to the filter during creation are split into arrays of 24 | directory or file names. Anything matching a stop list of common directory names 25 | is discarded. 26 | """ 27 | 28 | def __init__(self, when=None, file_terms=None, **kwargs): 29 | super(RelatedFilesFilter, self).__init__() 30 | self._all_blobs = list() 31 | self._terms = set() 32 | self._usernames = set() 33 | 34 | self._when = when 35 | 36 | if file_terms: 37 | for val in file_terms: 38 | self._create_terms(val) 39 | 40 | def _create_terms(self, val): 41 | for term in os.path.normpath(val.lower()).split(os.path.sep): 42 | if len(term) > 1 and term not in self.STOP_WORDS: 43 | self._terms.add(term) 44 | 45 | def filter_line(self, blob): 46 | self._all_blobs.append(blob) 47 | 48 | if self._when and self._when(blob): 49 | for key in self.FILE_NAME_KEYS: 50 | val = DictUtils.get_deep(blob, key) 51 | if val: 52 | self._create_terms(val) 53 | if 'osxcollector_username' in blob: 54 | self._usernames.add(blob['osxcollector_username'].lower()) 55 | 56 | return None 57 | 58 | def end_of_lines(self): 59 | self._terms = self._terms - self._usernames 60 | 61 | for blob in self._all_blobs: 62 | line = simplejson.dumps(blob).lower() 63 | for term in self._terms: 64 | if term in line: 65 | blob.setdefault('osxcollector_related', {}) 66 | blob['osxcollector_related'].setdefault('files', []) 67 | blob['osxcollector_related']['files'].append(term) 68 | 69 | return self._all_blobs 70 | 71 | def get_argument_parser(self): 72 | parser = ArgumentParser() 73 | group = parser.add_argument_group('RelatedFilesFilter') 74 | group.add_argument( 75 | '-f', '--file-term', dest='file_terms', default=[], action='append', 76 | help='[OPTIONAL] Suspicious terms to use in pivoting through file names. May be specified more than once.', 77 | ) 78 | return parser 79 | 80 | @property 81 | def terms(self): 82 | return self._terms 83 | 84 | @property 85 | def usernames(self): 86 | return self._usernames 87 | 88 | # Keys to look in to find file paths 89 | FILE_NAME_KEYS = [ 90 | 'file_path', 91 | 'osxcollector_plist_path', 92 | ] 93 | 94 | # Words that can never be terms 95 | STOP_WORDS = [ 96 | 'applications', 97 | 'bin', 98 | 'contents', 99 | 'cores', 100 | 'coreservices', 101 | 'dev', 102 | 'downloads', 103 | 'extensions', 104 | 'frameworks', 105 | 'helpers', 106 | 'home', 107 | 'information', 108 | 'libexec', 109 | 'libraries', 110 | 'library', 111 | 'macos', 112 | 'malware', 113 | 'net', 114 | 'network', 115 | 'opt', 116 | 'plugins', 117 | 'private', 118 | 'privateframeworks', 119 | 'python', 120 | 'resources', 121 | 'sbin', 122 | 'support', 123 | 'system', 124 | 'tmp', 125 | 'user', 126 | 'users', 127 | 'usr', 128 | 'utilities', 129 | 'versions', 130 | 'var', 131 | ] 132 | 133 | 134 | def main(): 135 | run_filter_main(RelatedFilesFilter) 136 | 137 | 138 | if __name__ == '__main__': 139 | main() 140 | -------------------------------------------------------------------------------- /osxcollector/output_filters/find_domains.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # FindDomainsFilter looks for domains in all input lines and adds those domains into the 'osxcollector_domains' key. 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | import logging 10 | import re 11 | 12 | import six 13 | from six.moves.urllib.parse import unquote_plus 14 | from six.moves.urllib.parse import urlsplit 15 | 16 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 17 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 18 | from osxcollector.output_filters.exceptions import BadDomainError 19 | from osxcollector.output_filters.util.domains import clean_domain 20 | from osxcollector.output_filters.util.domains import expand_domain 21 | 22 | 23 | class FindDomainsFilter(OutputFilter): 24 | 25 | """Adds 'osxcollector_domains' key to output lines. 26 | 27 | This filters parses domains out of anywhere in an output line and adds them to a clean array in the line. 28 | This is helpful as a pre-processing step before sending the domains off to threat APIs or matching against 29 | threat feeds. 30 | """ 31 | 32 | def __init__(self, **kwargs): 33 | super(FindDomainsFilter, self).__init__(**kwargs) 34 | self._domains = set() 35 | 36 | def filter_line(self, blob): 37 | """Find domains in a line.""" 38 | self._domains = set() 39 | self._look_for_domains(blob) 40 | 41 | # self._domains accumulates domains during calls to _look_for_domains 42 | if len(self._domains): 43 | blob['osxcollector_domains'] = sorted(list(self._domains)) 44 | 45 | return blob 46 | 47 | def _look_for_domains(self, val, key=None): 48 | """Given a value and perhaps a key, look for domains. 49 | 50 | Args: 51 | val: The value, could be of any type 52 | key: A string key associated with the value. 53 | """ 54 | if isinstance(val, six.string_types): 55 | if key in self.HOST_KEYS: 56 | self._add_domain(val) 57 | return 58 | if -1 != self.SCHEMES.search(val): 59 | # Sometimes values are complex strings, like JSON or pickle encoded stuff. 60 | # Try splitting the string on non-URL related punctuation 61 | for maybe_url in re.split(r'[ \'\(\)\"\[\]\{\}\;\n\t#@\^&\*=]+', val): 62 | domain = self._url_to_domain(maybe_url) 63 | self._add_domain(domain) 64 | elif isinstance(val, list): 65 | for elem in val: 66 | self._look_for_domains(elem) 67 | elif isinstance(val, dict): 68 | for key, elem in six.iteritems(val): 69 | self._look_for_domains(elem, key) 70 | self._look_for_domains(key) 71 | 72 | def _url_to_domain(self, maybe_url): 73 | """Converts an URL to a domain. 74 | 75 | The code deals with eccentricities of both unquote_plus and split_url. 76 | 77 | Args: 78 | maybe_url - a string that might be an URL. 79 | Returns: 80 | a string representing the domain or None 81 | """ 82 | if self.SCHEMES.match(maybe_url): 83 | url = unquote_plus(maybe_url) 84 | 85 | try: 86 | split_url = urlsplit(url) 87 | if split_url.hostname: 88 | return split_url.hostname 89 | # in case "url" is not a valid URL, just log a message 90 | except ValueError as e: 91 | logging.info('Cannot split the URL: {0}. Hint: {1}'.format(url, e)) 92 | 93 | return None 94 | 95 | def _add_domain(self, domain): 96 | """Clean a domain and store it internally""" 97 | if not domain: 98 | return 99 | 100 | try: 101 | domain = clean_domain(domain) 102 | for extracted in expand_domain(domain): 103 | self._domains.add(extracted) 104 | except BadDomainError: 105 | pass 106 | 107 | SCHEMES = re.compile('((https?)|ftp)') 108 | HOST_KEYS = ['host', 'host_key', 'baseDomain'] 109 | 110 | 111 | def main(): 112 | run_filter_main(FindDomainsFilter) 113 | 114 | 115 | if __name__ == '__main__': 116 | main() 117 | -------------------------------------------------------------------------------- /tests/output_filters/run_filter_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import simplejson 6 | from mock import patch 7 | from six import StringIO 8 | 9 | from osxcollector.output_filters.base_filters.output_filter import _run_filter 10 | 11 | 12 | class RunFilterTest: 13 | 14 | def run_test(self, create_filter, input_blobs=None, expected_output_blobs=None): 15 | """Mocks out stdin, stdout, and config then runs input lines through an OutputFilter. 16 | 17 | Args: 18 | create_filter: A callable that returns an OutputFilter. 19 | input_blobs: An array of dicts to pass to OutputFilter. These will be serialized into strings and passed as stdin. 20 | expected_output_blobs: An array of dicts the output of the OutputFilter must match. 21 | """ 22 | if not input_blobs: 23 | input_blobs = [] 24 | input_lines = '\n'.join([simplejson.dumps(blob) for blob in input_blobs]) 25 | 26 | with patch( 27 | 'sys.stdin', StringIO(input_lines), 28 | ), patch( 29 | 'sys.stdout', new_callable=StringIO, 30 | ) as mock_stdout, patch( 31 | 'osxcollector.output_filters.util.config._config_file_path', 32 | return_value='./tests/output_filters/data/test_osxcollector_config.yaml', 33 | ): 34 | output_filter = create_filter() 35 | _run_filter(output_filter) 36 | output_lines = [line for line in mock_stdout.getvalue().split('\n') if len(line)] 37 | output_blobs = [simplejson.loads(line) for line in output_lines] 38 | 39 | if expected_output_blobs: 40 | assert len(output_blobs) == len(expected_output_blobs) 41 | 42 | for expected_blob, actual_blob in zip(expected_output_blobs[1:], output_blobs[1:]): 43 | assert_equal_sorted(expected_blob, actual_blob) 44 | 45 | return output_blobs 46 | 47 | def assert_key_added_to_blob(self, added_key, expected_values, input_blobs, output_blobs): 48 | """Verifies that a single key has been added to each input_blob with an expected value. 49 | 50 | Asserts that effectively: 51 | output_blobs = [input_blob.update(key=expected_value) for expected_value, input_blob in zip(expected_values, input_blobs)] 52 | 53 | Args: 54 | added_key: The name of the key that should have been added. 55 | expected_values: A list containing the expected value of the key for each input_blob 56 | input_blobs: A list of dicts that were the initial input. 57 | output_blobs: A list of dicts that are the output. 58 | """ 59 | 60 | if expected_values: 61 | actual_values = list(blob.get(added_key, None) for blob in output_blobs) 62 | for actual, expected in zip(actual_values, expected_values): 63 | assert_equal_sorted(actual, expected) 64 | 65 | # Minus the added key, the input should be unchanged 66 | for input_blob, output_blob in zip(input_blobs, output_blobs): 67 | if added_key in output_blob: 68 | del output_blob[added_key] 69 | assert_equal_sorted(input_blob, output_blob) 70 | 71 | def load_reports(self, filename): 72 | with open(filename, 'r') as fp: 73 | file_contents = fp.read() 74 | reports = simplejson.loads(file_contents) 75 | return reports 76 | 77 | 78 | def assert_equal_sorted(a, b): 79 | """A version of T.assert_equal that ignores the ordering of lists or sets. 80 | 81 | Args: 82 | a: first item to compare 83 | b: next time to compare 84 | Raises: 85 | assert when items don't match 86 | """ 87 | assert sort_for_comparison(a) == sort_for_comparison(b) 88 | 89 | 90 | def sort_for_comparison(val): 91 | """Sort the input if it is a list or dict or set, return it unchanged otherwise. 92 | 93 | Args: 94 | val: A value of any type 95 | Returns: 96 | A more easily comparable version of the input 97 | """ 98 | if isinstance(val, list): 99 | try: 100 | return sorted(val) 101 | except Exception: 102 | return val 103 | elif isinstance(val, set): 104 | return sort_for_comparison(list(val)) 105 | elif isinstance(val, dict): 106 | for key in val: 107 | val[key] = sort_for_comparison(val[key]) 108 | return val 109 | else: 110 | return val 111 | -------------------------------------------------------------------------------- /osxcollector/output_filters/base_filters/chain.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # ChainFilter is a base class that passes each line through a chain of OutputFilters. 4 | # 5 | from __future__ import absolute_import 6 | from __future__ import unicode_literals 7 | 8 | from argparse import ArgumentParser 9 | 10 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 11 | 12 | 13 | class ChainFilter(OutputFilter): 14 | 15 | """ChainFilter is a base class that passes each line through a chain of OutputFilters. 16 | 17 | This is useful for constructing a single OutputFilter that does multiple things without 18 | having to run `python -m FilterOne | python -m FilterTwo | python -m FilterThree`. 19 | """ 20 | 21 | def __init__(self, chain, **kwargs): 22 | """Adds the property _next_link to each OutputFilter in the chain. 23 | 24 | Treating the chain as a linked list makes it easy to know which filter runs after the current filter. 25 | _next_link should be present and have a value of None for the final link in the chain. 26 | 27 | Args: 28 | chain: An enumerable of OutputFilters. 29 | """ 30 | super(ChainFilter, self).__init__(**kwargs) 31 | 32 | prev_link = None 33 | for cur_link in chain: 34 | if prev_link: 35 | prev_link._next_link = cur_link 36 | cur_link._next_link = None 37 | prev_link = cur_link 38 | 39 | self._head_of_chain = chain[0] 40 | 41 | def filter_line(self, blob): 42 | """Each Line of OSXCollector output will be passed to filter_line. 43 | 44 | Passes the line to the filter at the head of the chain. Output from each filter flows to it's _next_link. 45 | 46 | Args: 47 | blob: A dict representing one line of output from OSXCollector. 48 | Returns: 49 | A dict or None 50 | """ 51 | return self._on_filter_line(blob, self._head_of_chain) 52 | 53 | def _on_filter_line(self, blob, link): 54 | """Pass the line to a link in the chain and pass any output to the next link. 55 | 56 | Args: 57 | blob: A dict representing one line of output from OSXCollector. 58 | link: An OutputFilter 59 | Returns: 60 | A dict or None 61 | """ 62 | if not link or not blob: 63 | return blob 64 | return self._on_filter_line(link.filter_line(blob), link._next_link) 65 | 66 | def end_of_lines(self): 67 | """Pass end_of_lines to the filter at the head of the chain. 68 | 69 | Returns: 70 | An enumerable of dicts 71 | """ 72 | return self._on_end_of_lines(self._head_of_chain) 73 | 74 | def _on_end_of_lines(self, link): 75 | """Pass end_of_lines to a link in the chain and pass any output to the next link. 76 | 77 | Args: 78 | link: An OutputFilter 79 | Returns: 80 | An enumerable of dicts 81 | """ 82 | if not link._next_link: 83 | return link.end_of_lines() 84 | 85 | filtered_lines = [] 86 | for blob in link.end_of_lines(): 87 | filtered_line = self._on_filter_line(blob, link._next_link) 88 | if filtered_line: 89 | filtered_lines.append(filtered_line) 90 | 91 | final_lines = self._on_end_of_lines(link._next_link) 92 | if final_lines: 93 | filtered_lines.extend(final_lines) 94 | 95 | return filtered_lines 96 | 97 | def get_argument_parser(self): 98 | """Collects the ArgumentParsers from every OutputFilter in the chain. 99 | 100 | Returns: 101 | An `argparse.ArgumentParser` 102 | """ 103 | parsers_to_chain = [] 104 | 105 | cur_link = self._head_of_chain 106 | while cur_link: 107 | arg_parser = cur_link.get_argument_parser() 108 | if arg_parser: 109 | parsers_to_chain.append(arg_parser) 110 | cur_link = cur_link._next_link 111 | 112 | parser = self._on_get_argument_parser() 113 | if parser: 114 | parsers_to_chain.append(parser) 115 | 116 | if parsers_to_chain: 117 | return ArgumentParser(parents=parsers_to_chain, conflict_handler='resolve') 118 | 119 | return None 120 | 121 | def _on_get_argument_parser(self): 122 | """Returns an ArgumentParser with arguments for just this OutputFilter (not the contained chained OutputFilters). 123 | 124 | Returns: 125 | An `argparse.ArgumentParser` 126 | """ 127 | return None 128 | -------------------------------------------------------------------------------- /osxcollector/output_filters/virustotal/lookup_domains.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # LookupDomainsFilter uses VirusTotal to lookup the values in 'osxcollector_domains' and add 'osxcollector_vtdomain' key. 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | from threat_intel.virustotal import VirusTotalApi 10 | 11 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 12 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter 13 | from osxcollector.output_filters.util.blacklist import create_blacklist 14 | from osxcollector.output_filters.util.config import config_get_deep 15 | 16 | 17 | class LookupDomainsFilter(ThreatFeedFilter): 18 | 19 | """A class to lookup hashes using VirusTotal API.""" 20 | 21 | def __init__(self, lookup_when=None, **kwargs): 22 | super(LookupDomainsFilter, self).__init__( 23 | 'osxcollector_domains', 'osxcollector_vtdomain', 24 | lookup_when=lookup_when, name_of_api_key='virustotal', **kwargs 25 | ) 26 | self._whitelist = create_blacklist( 27 | config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}), 28 | ) 29 | 30 | def _lookup_iocs(self, all_iocs, resource_per_req=25): 31 | """Caches the VirusTotal info for a set of domains. 32 | 33 | Domains on a whitelist will be ignored. 34 | 35 | Args: 36 | all_iocs - a list of domains. 37 | Returns: 38 | A dict with domain as key and threat info as value 39 | """ 40 | threat_info = {} 41 | 42 | cache_file_name = config_get_deep('virustotal.LookupDomainsFilter.cache_file_name', None) 43 | vt = VirusTotalApi(self._api_key, resource_per_req, cache_file_name=cache_file_name) 44 | 45 | iocs = [x for x in all_iocs if not self._whitelist.match_values(x)] 46 | reports = vt.get_domain_reports(iocs) 47 | for domain in reports: 48 | if not reports[domain]: 49 | continue 50 | 51 | trimmed_report = self._trim_domain_report(domain, reports[domain]) 52 | if self._should_store_ioc_info(trimmed_report): 53 | threat_info[domain] = trimmed_report 54 | 55 | return threat_info 56 | 57 | def _should_store_ioc_info(self, trimmed_report): 58 | """Decide whether a report from VT is interesting enough to store in the output. 59 | 60 | Args: 61 | trimmed_report: A dict of data from VT 62 | Returns: 63 | boolean 64 | """ 65 | sample_keys = [ 66 | ('detected_downloaded_samples', 3), 67 | ('detected_referrer_samples', 3), 68 | ('detected_communicating_samples', 3), 69 | ('detected_urls', 3), 70 | ] 71 | detections = trimmed_report.get('detections', {}) 72 | for sample_key, threshold in sample_keys: 73 | if detections.get(sample_key, 0) >= threshold: 74 | return True 75 | return False 76 | 77 | def _trim_domain_report(self, domain, initial_report): 78 | """Reorganizes and compacts a VT domain report. 79 | 80 | Args: 81 | domain - string domain name 82 | initial_report - dict result of calling VirusTotalApi.get_domain_reports for the domain 83 | 84 | Returns: 85 | A reorganized and compacted dict. 86 | """ 87 | trimmed_report = {} 88 | 89 | sample_keys = [ 90 | 'undetected_referrer_samples', 91 | 'undetected_communicating_samples', 92 | 'detected_downloaded_samples', 93 | 'detected_referrer_samples', 94 | 'detected_communicating_samples', 95 | 'detected_urls', 96 | ] 97 | detections = {} 98 | for sample_key in sample_keys: 99 | detections[sample_key] = len(initial_report.get(sample_key, [])) 100 | trimmed_report['detections'] = detections 101 | 102 | categorization_keys = [ 103 | 'categories', 104 | 'BitDefender category', 105 | 'BitDefender domain info', 106 | 'Websense ThreatSeeker category', 107 | 'Webutation domain info', 108 | 'WOT domain info', 109 | 'TrendMicro category', 110 | ] 111 | categorization = {} 112 | for copy_key in categorization_keys: 113 | if copy_key in initial_report: 114 | categorization[copy_key] = initial_report[copy_key] 115 | trimmed_report['categorization'] = categorization 116 | 117 | just_copy_keys = [ 118 | 'response_code', 119 | ] 120 | for copy_key in just_copy_keys: 121 | if copy_key in initial_report: 122 | trimmed_report[copy_key] = initial_report[copy_key] 123 | 124 | trimmed_report['domain'] = domain 125 | 126 | return trimmed_report 127 | 128 | 129 | def main(): 130 | run_filter_main(LookupDomainsFilter) 131 | 132 | 133 | if __name__ == '__main__': 134 | main() 135 | -------------------------------------------------------------------------------- /osxcollector/output_filters/base_filters/threat_feed.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # ThreatFeedFilter is a base class to find info on IOCs using some random API. 4 | # 5 | from __future__ import absolute_import 6 | from __future__ import unicode_literals 7 | 8 | import six 9 | 10 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 11 | from osxcollector.output_filters.util.config import config_get_deep 12 | 13 | 14 | class ThreatFeedFilter(OutputFilter): 15 | 16 | """A base class to find info on IOCs using some random API. 17 | 18 | Derived classes need only to implement _lookup_iocs() 19 | If necessary, they should implement _should_add_threat_info_to_blob. 20 | 21 | It is assumed that the API uses an api_key stored in the config. 22 | """ 23 | 24 | def __init__(self, ioc_key, output_key, lookup_when=None, name_of_api_key=None, **kwargs): 25 | """Configure the ThreatFeedFilter. 26 | 27 | Args: 28 | ioc_key: A string key to look for in each line of OSXCollector output. 29 | The value of this key is the potential IOC to lookup in a threat feed. 30 | output_key: A string key which is added to output lines and contains the result of threat feed lookups. 31 | lookup_when: A boolean function to call to decide whether to perform a lookup on a line. 32 | Use lookup_when to limit which IOCs are looked up. 33 | name_of_api_key: A string name of the key in the 'api_key' section of config. 34 | """ 35 | super(ThreatFeedFilter, self).__init__(**kwargs) 36 | 37 | if name_of_api_key: 38 | self._api_key = config_get_deep('api_key.{0}'.format(name_of_api_key)) 39 | 40 | self._lookup_when = lookup_when 41 | self._blobs_with_iocs = list() 42 | self.ioc_set = set() 43 | 44 | self._ioc_key = ioc_key 45 | self._output_key = output_key 46 | 47 | def _lookup_iocs(self, all_iocs): 48 | """Looks up threat info for IOCs. 49 | 50 | This is the only method a derived class needs to implement. 51 | 52 | Args: 53 | all_iocs: An enumerable of strings representing all IOCs to lookup. 54 | Returns: 55 | A dict of the form {ioc_value: threat_info} 56 | """ 57 | raise NotImplementedError('Derived classes must implement _lookup_iocs') 58 | 59 | def _should_add_threat_info_to_blob(self, blob, threat_info): 60 | """Threat info is only added to a blob if this returns True. 61 | 62 | Override this method in derived classes to correlate threat_info and blob data. 63 | 64 | For example, the ShadowServer filter looks up SHA1 hashes. Since SHA1 hashes for different files collide, the ShadowServer 65 | filter overrides _should_add_threat_info_to_blob and verifies that the filename in the blob matches the filename in the threat 66 | info. 67 | 68 | Args: 69 | blob: A dict of data representing a line of output from OSXCollector 70 | threat_info: A dict of threat info. 71 | Returns: 72 | boolean 73 | """ 74 | return True 75 | 76 | def filter_line(self, blob): 77 | """Accumulate IOCs to lookup with the ThreatFeed. 78 | 79 | Args: 80 | blob: A dict representing one line of output from OSXCollector. 81 | Returns: 82 | A dict or None 83 | """ 84 | if self._ioc_key in blob and (not self._lookup_when or self._lookup_when(blob)): 85 | ioc_list = blob[self._ioc_key] 86 | if isinstance(ioc_list, six.string_types): 87 | ioc_list = [ioc_list] 88 | 89 | if len(ioc_list) > 10: 90 | return blob 91 | 92 | for ioc in ioc_list: 93 | if not len(ioc): 94 | continue 95 | 96 | self.ioc_set.add(ioc) 97 | 98 | self._blobs_with_iocs.append(blob) 99 | return None 100 | else: 101 | return blob 102 | 103 | def end_of_lines(self): 104 | """Performs threat feed lookup on the IOCs and adds output to the stored Lines. 105 | 106 | Returns: 107 | An enumerable of dicts 108 | """ 109 | if self.ioc_set: 110 | self._add_threat_info_to_blobs() 111 | return self._blobs_with_iocs 112 | 113 | def _add_threat_info_to_blobs(self): 114 | """Adds threat info to blobs. 115 | 116 | Args: 117 | all_threat_info: A dict of the form {ioc_value: threat_info} 118 | """ 119 | self.ioc_set = sorted(list(self.ioc_set)) 120 | all_threat_info = self._lookup_iocs(self.ioc_set) 121 | for blob in self._blobs_with_iocs: 122 | ioc_list = blob[self._ioc_key] 123 | if isinstance(ioc_list, six.string_types): 124 | ioc_list = [ioc_list] 125 | 126 | for ioc in ioc_list: 127 | info = all_threat_info.get(ioc) 128 | if not info: 129 | continue 130 | 131 | if self._should_add_threat_info_to_blob(blob, info): 132 | blob.setdefault(self._output_key, []) 133 | blob[self._output_key].append(info) 134 | -------------------------------------------------------------------------------- /tests/output_filters/data/cache.virustotal.LookupDomainsFilter.json: -------------------------------------------------------------------------------- 1 | {"virustotal-domain-reports": {"evil.example.com": {"detected_downloaded_samples": [{"positives": 10, "sha256": "c66e5a89051acf14fcec03618e4c00e9bfc095352bdd94ffa216a16041010aab", "total": 12}, {"positives": 9, "sha256": "4725839ffd5fd40205f8e255864031016a001cca8ff3574ddd2c6fd7ac6a23e8", "total": 10}, {"positives": 34, "sha256": "07598d5335710987284370cf9ce4a5a4a6bcc46b06429f0f7cc93714c73e7785", "total": 40}, {"positives": 10, "sha256": "1b7caa3073b83dadd52e38e5e833fbac4ab57253b2945f2e699ac253db2b4300", "total": 12}, {"positives": 88, "sha256": "8e6338540084c2118d1d032b83f0488bdb9a933f615c0d3c0e85863027072c92", "total": 100}], "detected_referrer_samples": [{"positives": 11, "sha256": "71e9d3cafeaca051deca9e1040d40af97850aec19ee1047f4ed7ff5ebb057247", "total": 53}, {"positives": 15, "sha256": "de46b5cbd9dc1446ade90b30524fc903a13c99b30bf534aca4ab4b07ad83943a", "total": 53}, {"positives": 16, "sha256": "c19fb354851a1ca670f374e1e60d50531dc44b78d5621732092946870ac79edc", "total": 53}, {"positives": 16, "sha256": "72c399c1ccd4597d77f2017aa38b094d098c2e3db97ff4f46244a0178a17030e", "total": 54}, {"positives": 14, "sha256": "b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7", "total": 54}], "response_code": 1, "detected_communicating_samples": [{"positives": 24, "date": "2014-05-30 02:31:16", "sha256": "31ce992017ae628e59fb0599330ba18483777f6f281c660b036649825296a3cb", "total": 52}, {"positives": 34, "date": "2014-04-29 23:16:50", "sha256": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302", "total": 51}, {"positives": 15, "date": "2013-07-09 06:23:40", "sha256": "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360", "total": 46}, {"positives": 34, "date": "2014-04-29 23:16:46", "sha256": "7db46a7eb4baeeb342d37a6fc05910adeed339450701c600ad973a77aa28b121", "total": 51}, {"positives": 15, "date": "2013-07-09 06:23:00", "sha256": "f33c27745f2bd87344be790465ef984a972fd539dc83bd4f61d4242c607ef1ee", "total": 46}], "undetected_communicating_samples": [], "detected_urls": [{"total": 23, "positives": 6, "url": "www.example.com/bingo", "scan_date": "2015-01-23 14:27"}, {"total": 25, "positives": 23, "url": "www.example.com/bongo", "scan_date": "2015-01-23 14:30"}, {"total": 14, "positives": 12, "url": "www.example.com/dingo", "scan_date": "2015-01-23 14:20"}, {"total": 20, "positives": 18, "url": "www.example.com/dongo", "scan_date": "2015-01-23 14:22"}, {"total": 20, "positives": 19, "url": "www.example.com/orange", "scan_date": "2015-01-23 14:28"}], "undetected_referrer_samples": []}, "good.example.com": {"detected_downloaded_samples": [], "detected_referrer_samples": [], "response_code": 0, "detected_communicating_samples": [], "undetected_communicating_samples": [], "detected_urls": [], "undetected_referrer_samples": []}, "good.example.co.uk": {"detected_downloaded_samples": [], "detected_referrer_samples": [], "response_code": 0, "detected_communicating_samples": [], "undetected_communicating_samples": [], "detected_urls": [], "undetected_referrer_samples": []}, "evil.example.co.uk": {"detected_downloaded_samples": [{"positives": 0, "date": "2014-07-29 09:46:22", "sha256": "3044d232d1815c9e1584f406b67c3331c0eaebd304cd280d578e75368e5b0c3a", "total": 54}, {"positives": 0, "date": "2013-05-16 08:57:51", "sha256": "f33c27745f2bd87344be790465ef984a972fd539dc83bd4f61d4242c607ef1ee", "total": 46}, {"positives": 0, "date": "2013-05-03 01:16:53", "sha256": "841f2c2faadf6a28aeb4fd29d7cd3a4156af20a68bd9ad7c2f41db64db06015f", "total": 46}, {"positives": 0, "date": "2013-04-27 20:50:18", "sha256": "02900e181b1941c79c73dadddbd03a8f6f974ca884baf5860cd5a54ac4fb97e1", "total": 46}], "detected_referrer_samples": [{"positives": 16, "sha256": "0de277bca1df07e691c865c84a0dfd849ac0124fab8f9ccde9c28fb3abe24abc", "total": 54}, {"positives": 16, "sha256": "ca34d60f2c1dc20932f2fb8adce1be2a8b9389054d67343ea4c86b9cc9ffabb0", "total": 54}, {"positives": 16, "sha256": "aefddcb96b75fe89195dbbfdd2c373f72492d5f71903eeeaae4afa1f71865515", "total": 54}, {"positives": 15, "sha256": "bee4aecc415e23328b9139b9abdb34a22b36e28da13c75aa8699e07fd7b10307", "total": 53}, {"positives": 16, "sha256": "c6065d39610471d970242a303c3e8905a7200c8aa2fd4bc69a95a3b618df4cb9", "total": 54}], "response_code": 1, "detected_communicating_samples": [{"positives": 24, "date": "2014-05-30 02:31:16", "sha256": "31ce992017ae628e59fb0599330ba18483777f6f281c660b036649825296a3cb", "total": 52}, {"positives": 34, "date": "2014-04-29 23:16:50", "sha256": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302", "total": 51}, {"positives": 15, "date": "2013-07-09 06:23:30", "sha256": "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360", "total": 46}, {"positives": 39, "date": "2014-04-29 23:16:50", "sha256": "b22eb5ff3793b551470f4758e4cff656b5168e10cfce24ac51ebd5a8a7fffe4a", "total": 51}, {"positives": 35, "date": "2013-07-09 06:23:30", "sha256": "26692ab17432ad292810c31ce4cee9e43c0166e23c2b05950751d52fc1decbd2", "total": 46}], "undetected_communicating_samples": [], "detected_urls": [{"total": 61, "positives": 3, "url": "http://www.example.co.uk/regents-park", "scan_date": "2014-11-27 13:56:37"}, {"total": 59, "positives": 3, "url": "http://www.example.co.uk/hyde-park", "scan_date": "2014-10-07 01:04:17"}, {"total": 59, "positives": 2, "url": "http://www.example.co.uk/st-jamess-park", "scan_date": "2014-10-07 00:22:03"}, {"total": 59, "positives": 2, "url": "http://www.example.co.uk/green-park", "scan_date": "2014-09-30 16:27:23"}, {"total": 58, "positives": 3, "url": "http://www.example.co.uk/the-royal-parks", "scan_date": "2014-08-08 18:23:25"}, {"total": 58, "positives": 3, "url": "http://www.example.co.uk/hampstead-heath", "scan_date": "2014-08-06 11:53:22"}], "undetected_referrer_samples": []}}} -------------------------------------------------------------------------------- /osxcollector/output_filters/base_filters/output_filter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # An OutputFilter transforms the output from OSXCollector. Every filter must derive from OutputFilter. 4 | # 5 | # _run_filter is a default implementation of a main that reads input from stdin, feeds it to an OutputFilter, and 6 | # spits the output to stdout. 7 | # 8 | from __future__ import absolute_import 9 | from __future__ import unicode_literals 10 | 11 | import sys 12 | from argparse import ArgumentParser 13 | 14 | import simplejson 15 | import six 16 | 17 | from osxcollector.output_filters.util.error_messages import write_exception 18 | 19 | 20 | class OutputFilter(object): 21 | 22 | """An OutputFilter transforms the output from OSXCollector. Every filter must derive from OutputFilter. 23 | 24 | The basic flow of data through an OutputFilter: 25 | - Each line of OSXCollector output is passed to OutputFilter.filter_line 26 | - After all lines have been passed to filter_line, a final call is made OutputFilter.to end_of_lines 27 | 28 | There are two common ways a filter deals with lines: 29 | - A filter that modifies each line independent of other lines could simply implement filter_line. 30 | - A filter that modifies each line based on other lines may want to accumulate lines until end_of_lines is called, 31 | then bulk operate on all lines at once. 32 | 33 | OutputFilters use the words 'line' or 'blob' to refer to OSXCollector output. 34 | """ 35 | 36 | def __init__(self, **kwargs): 37 | """Skeleton for __init__ 38 | 39 | Args: 40 | kwargs: Variable arguments are used to pass filter specific args to OutputFilters. 41 | """ 42 | pass 43 | 44 | def filter_line(self, blob): 45 | """Each Line of OSXCollector output will be passed to filter_line. 46 | 47 | The OutputFilter should return the line, either modified or unmodified. 48 | The OutputFilter can also choose to return nothing, effectively swallowing the line. 49 | 50 | Args: 51 | blob: A dict representing one line of output from OSXCollector. 52 | Returns: 53 | A dict or None 54 | """ 55 | return blob 56 | 57 | def end_of_lines(self): 58 | """Called after all lines have been fed to filter_output_line. 59 | 60 | The OutputFilter performs any processing that requires the complete input to have already been fed. 61 | 62 | Returns: 63 | An enumerable of dicts 64 | """ 65 | return [] 66 | 67 | def get_argument_parser(self): 68 | """Describes commandline arguments for this OutputFilter. 69 | 70 | The names of the `dest` param for the argument in the ArgumentParser must match the name of positional or 71 | named arguments in `__init__` 72 | 73 | Returns: 74 | An `argparse.ArgumentParser` 75 | """ 76 | return None 77 | 78 | 79 | def _unbuffered_input(read_from): 80 | """A generator to allow lines to be read before EOF is reached. 81 | 82 | Args: 83 | read_from: A stream to read from 84 | Returns: 85 | yields strings 86 | """ 87 | line = read_from.readline() 88 | while bool(line): 89 | if isinstance(line, six.binary_type): 90 | line = line.decode('latin-1', errors='ignore') 91 | yield line.encode('utf-8', errors='ignore') if six.PY2 else line 92 | line = read_from.readline() 93 | 94 | 95 | def _run_filter(output_filter, input_stream=None, output_stream=None, *args, **kwargs): 96 | """Feeds stdin to an instance of OutputFilter and spews to stdout. 97 | 98 | Args: 99 | output_filter: An instance of OutputFilter. 100 | input_stream: Where to read input from. 101 | output_stream: Where to write output to. 102 | """ 103 | if not input_stream: 104 | input_stream = sys.stdin 105 | if not output_stream: 106 | output_stream = sys.stdout 107 | 108 | for json_string in _unbuffered_input(input_stream): 109 | try: 110 | blob = simplejson.loads(json_string) 111 | except simplejson.JSONDecodeError as e: 112 | write_exception(e) 113 | continue 114 | 115 | blob = output_filter.filter_line(blob) 116 | if blob: 117 | output_stream.write(simplejson.dumps(blob)) 118 | output_stream.write('\n') 119 | 120 | final_blobs = output_filter.end_of_lines() 121 | for blob in final_blobs: 122 | output_stream.write(simplejson.dumps(blob)) 123 | output_stream.write('\n') 124 | 125 | output_stream.flush() 126 | 127 | 128 | def run_filter_main(output_filter_cls): 129 | """A `main` method which runs an OutputFilter. 130 | 131 | Args: 132 | output_filter_cls: Class name of the OutputFilter 133 | """ 134 | filter_arguments = output_filter_cls().get_argument_parser() 135 | argument_parents = [filter_arguments] if filter_arguments else [] 136 | 137 | parser = ArgumentParser(parents=argument_parents, conflict_handler='resolve') 138 | parser.add_argument( 139 | '-i', '--input-file', dest='input_file', default=None, 140 | help='[OPTIONAL] Path to OSXCollector output to read. Defaults to stdin otherwise.', 141 | ) 142 | parser.add_argument( 143 | '-o', '--output-file', dest='output_file', default=None, 144 | help='[OPTIONAL] Path where OSXCollector output augmented with the analysis results will be written to. ' 145 | 'Defaults to stdout otherwise.', 146 | ) 147 | args = parser.parse_args() 148 | 149 | output_filter = output_filter_cls(**vars(args)) 150 | 151 | fp_in = open(args.input_file, 'r') if args.input_file else None 152 | fp_out = open(args.output_file, 'w') if args.output_file else None 153 | 154 | _run_filter(output_filter, input_stream=fp_in, output_stream=fp_out) 155 | 156 | if args.input_file: 157 | fp_in.close() 158 | 159 | if args.output_file: 160 | fp_out.close() 161 | -------------------------------------------------------------------------------- /osxcollector/output_filters/util/blacklist.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Utilities for dealing with blacklists 4 | # 5 | from __future__ import absolute_import 6 | from __future__ import unicode_literals 7 | 8 | import logging 9 | import os 10 | import re 11 | 12 | import six 13 | 14 | from osxcollector.output_filters.exceptions import BadDomainError 15 | from osxcollector.output_filters.exceptions import MissingConfigError 16 | from osxcollector.output_filters.util.dict_utils import DictUtils 17 | from osxcollector.output_filters.util.domains import clean_domain 18 | 19 | 20 | def create_blacklist(config_chunk, data_feeds={}): 21 | """Reads the config and builds a Blacklist. 22 | 23 | The blacklist config is sufficiently complex that much of this method deals with simply validating config 24 | 25 | Args: 26 | config_chunk: A dict of config for building the blacklist 27 | data_feeds: Dict of generator functions returning the blacklist data 28 | Returns: 29 | A Blacklist 30 | Raises: 31 | MissingConfigError - when required key does not exist. 32 | """ 33 | required_keys = ['blacklist_name', 'blacklist_keys'] 34 | if not all([key in config_chunk for key in required_keys]): 35 | raise MissingConfigError('Blacklist config is missing a required key.\nRequired keys are: {0}'.format(repr(required_keys))) 36 | 37 | if not isinstance(config_chunk['blacklist_keys'], list): 38 | raise MissingConfigError('The value of \'blacklist_keys\' in Blacklist config must be a list') 39 | 40 | blacklist_name = config_chunk.get('blacklist_name') 41 | blacklist_keys = config_chunk.get('blacklist_keys') 42 | blacklist_file_path = config_chunk.get('blacklist_file_path') 43 | blacklist_data_feed = config_chunk.get('blacklist_data_feed') 44 | if blacklist_file_path: 45 | if not os.path.exists(blacklist_file_path): 46 | raise MissingConfigError('The blacklist file {} does not exist'.format(blacklist_file_path)) 47 | blacklist_data_generator = _read_blacklist_file(blacklist_file_path) 48 | elif blacklist_data_feed: 49 | if blacklist_data_feed not in data_feeds: 50 | raise MissingConfigError('Data feed {} not found among provided generators'.format(blacklist_data_feed)) 51 | blacklist_data_generator = data_feeds[blacklist_data_feed]() 52 | else: 53 | raise MissingConfigError('Blacklist config is missing a data input.\nEither select a file or a generator object') 54 | blacklist_is_regex = config_chunk.get('blacklist_is_regex', False) 55 | blacklist_is_domains = config_chunk.get('blacklist_is_domains', False) 56 | return Blacklist(blacklist_name, blacklist_keys, blacklist_data_generator, blacklist_is_regex, blacklist_is_domains) 57 | 58 | 59 | def _read_blacklist_file(filepath): 60 | """ Parse blacklist file """ 61 | with open(filepath, 'r') as f: 62 | for line in f: 63 | line = line.strip() 64 | if line and not line.startswith('#'): 65 | yield line 66 | 67 | 68 | class Blacklist(object): 69 | 70 | def __init__(self, name, blacklisted_keys, input_generator, is_regex=False, is_domains=False): 71 | """Build a blacklist from the data in the blacklist file. 72 | 73 | Built in smarts make it easy to build a blacklist of domains 74 | 75 | Raises: 76 | MissingConfigError - when required config key does not exist. 77 | """ 78 | self._name = name 79 | self._blacklisted_keys = blacklisted_keys 80 | self._is_domains = is_domains 81 | self._is_regex = is_regex or self._is_domains 82 | self._blacklisted_values = dict( 83 | self._convert_to_matching_term(val) for val in input_generator if val 84 | ) 85 | self._blacklisted_values.pop(None, None) 86 | 87 | def _convert_to_matching_term(self, blacklisted_value): 88 | """Convert a blacklisted_value to a regex. 89 | 90 | Args: 91 | blacklisted_value - string of value on a blacklist 92 | blacklist_is_domains - Boolean if true, the blacklisted_value is treated as a domain. 93 | Returns: 94 | MatchingTerm 95 | """ 96 | display_name = blacklisted_value 97 | 98 | if self._is_domains: 99 | try: 100 | domain = clean_domain(blacklisted_value) 101 | except BadDomainError: 102 | if not isinstance(blacklisted_value, six.text_type): 103 | blacklisted_value = blacklisted_value.decode('utf8') 104 | logging.warning( 105 | u'Blacklisted value "{0}" cannot be resolved as a domain name' 106 | .format(blacklisted_value), 107 | ) 108 | return None, None 109 | 110 | blacklisted_value = re.compile(r'^(.+\.)*{0}$'.format(re.escape(domain))) 111 | 112 | elif self._is_regex: 113 | blacklisted_value = re.compile(blacklisted_value) 114 | 115 | return blacklisted_value, display_name 116 | 117 | def match_line(self, blob): 118 | """Determines whether a line matches the blacklist. 119 | 120 | Returns: 121 | String of matched term is the value matches, None otherwise 122 | """ 123 | for key in self._blacklisted_keys: 124 | values = DictUtils.get_deep(blob, key) 125 | if not values: 126 | continue 127 | 128 | matching_term = self.match_values(values) 129 | if matching_term: 130 | return matching_term 131 | 132 | return None 133 | 134 | def match_values(self, values): 135 | """Determines whether an array of values match the blacklist. 136 | 137 | Returns: 138 | String of matched term is the value matches, None otherwise 139 | """ 140 | if not isinstance(values, list): 141 | values = [values] 142 | 143 | for val in values: 144 | if self._is_regex or self._is_domains: 145 | return next( 146 | ( 147 | name for term, name in six.iteritems(self._blacklisted_values) if term.search(val) 148 | ), None, 149 | ) 150 | else: 151 | return self._blacklisted_values.get(val, None) 152 | return None 153 | 154 | @property 155 | def name(self): 156 | return self._name 157 | -------------------------------------------------------------------------------- /tests/output_filters/find_domains_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.find_domains import FindDomainsFilter 6 | from tests.output_filters.run_filter_test import RunFilterTest 7 | 8 | 9 | class TestFindDomainsFilter(RunFilterTest): 10 | 11 | """Tests many variants of blobs with a domain in them.""" 12 | 13 | def _run_test(self, input_blob, expected_domains): 14 | output_blobs = self.run_test(FindDomainsFilter, [input_blob]) 15 | self.assert_key_added_to_blob('osxcollector_domains', [expected_domains], [input_blob], output_blobs) 16 | 17 | def test_no_domain(self): 18 | input_blob = {'fungo': 'kidney'} 19 | self._run_test(input_blob, None) 20 | 21 | def test_tld(self): 22 | input_blob = {'fungo': 'http://www.example.com'} 23 | expected_domains = ['example.com', 'www.example.com'] 24 | self._run_test(input_blob, expected_domains) 25 | 26 | def test_bare_domain(self): 27 | input_blob = {'fungo': 'http://example.com'} 28 | expected_domains = ['example.com'] 29 | self._run_test(input_blob, expected_domains) 30 | 31 | def test_uk_domain(self): 32 | input_blob = {'fungo': 'http://www.example.co.uk'} 33 | expected_domains = ['example.co.uk', 'www.example.co.uk'] 34 | self._run_test(input_blob, expected_domains) 35 | 36 | def test_info_domain(self): 37 | input_blob = {'fungo': 'http://www.example.info'} 38 | expected_domains = ['example.info', 'www.example.info'] 39 | self._run_test(input_blob, expected_domains) 40 | 41 | def test_ftp_scheme(self): 42 | input_blob = {'fungo': 'ftp://example.com'} 43 | expected_domains = ['example.com'] 44 | self._run_test(input_blob, expected_domains) 45 | 46 | def test_domain_in_path(self): 47 | input_blob = {'fungo': 'http://www.example.com/bango?p=http://www.dingo.com'} 48 | expected_domains = [ 49 | 'dingo.com', 50 | 'example.com', 51 | 'www.dingo.com', 52 | 'www.example.com', 53 | ] 54 | self._run_test(input_blob, expected_domains) 55 | 56 | def test_quoted_domain(self): 57 | input_blob = {'fungo': 'http%3A//www.example.com'} 58 | expected_domains = [ 59 | 'example.com', 60 | 'www.example.com', 61 | ] 62 | self._run_test(input_blob, expected_domains) 63 | 64 | def test_quoted_in_path(self): 65 | input_blob = {'fungo': 'http://www.example.com/bango?p=http%3A//www.dingo.co.uk'} 66 | expected_domains = [ 67 | 'dingo.co.uk', 68 | 'example.com', 69 | 'www.dingo.co.uk', 70 | 'www.example.com', 71 | ] 72 | self._run_test(input_blob, expected_domains) 73 | 74 | def test_domain_in_key(self): 75 | input_blob = {'http://www.example.com': 'zungo'} 76 | expected_domains = [ 77 | 'example.com', 78 | 'www.example.com', 79 | ] 80 | self._run_test(input_blob, expected_domains) 81 | 82 | def test_list(self): 83 | input_blob = {'fungo': ['http://www.example.com', 'https://www.zzz.sample.org']} 84 | expected_domains = [ 85 | 'example.com', 86 | 'sample.org', 87 | 'www.example.com', 88 | 'www.zzz.sample.org', 89 | ] 90 | self._run_test(input_blob, expected_domains) 91 | 92 | def test_dict(self): 93 | input_blob = {'fungo': {'http://www.example.com': 'https://www.zzz.sample.org'}} 94 | expected_domains = [ 95 | 'example.com', 96 | 'sample.org', 97 | 'www.example.com', 98 | 'www.zzz.sample.org', 99 | ] 100 | self._run_test(input_blob, expected_domains) 101 | 102 | def test_list_of_dict(self): 103 | input_blob = { 104 | 'fungo': [ 105 | {'http://www.example.com': 'https://www.zzz.sample.org'}, 106 | {'a': 'https://www.dingo.co.uk'}, 107 | ], 108 | } 109 | expected_domains = [ 110 | 'dingo.co.uk', 111 | 'example.com', 112 | 'sample.org', 113 | 'www.dingo.co.uk', 114 | 'www.example.com', 115 | 'www.zzz.sample.org', 116 | ] 117 | self._run_test(input_blob, expected_domains) 118 | 119 | def test_tokenizing(self): 120 | input_blob = { 121 | 'fungo': [ 122 | '{"bar":\'http://www.example.com\'}', 123 | '(http://www.example2.com)', 124 | ';http://www.example3.com\n', 125 | 'http://example4.com.', 126 | '#@^%$*http://www.xxx.yyy.zzz.example.com/fungo/digno', 127 | ], 128 | } 129 | expected_domains = [ 130 | 'example.com', 131 | 'example2.com', 132 | 'example3.com', 133 | 'example4.com', 134 | 'www.example.com', 135 | 'www.example2.com', 136 | 'www.example3.com', 137 | 'www.xxx.yyy.zzz.example.com', 138 | ] 139 | self._run_test(input_blob, expected_domains) 140 | 141 | def test_special_keys_domain(self): 142 | input_blob = {'host': 'www.example.com'} 143 | expected_domains = [ 144 | 'example.com', 145 | 'www.example.com', 146 | ] 147 | self._run_test(input_blob, expected_domains) 148 | 149 | def test_no_dupes(self): 150 | input_blob = { 151 | 'host': 'www.example.com', 152 | 'another_thing': 'http://www.example.com', 153 | 'https://www.example.com': True, 154 | 'dictation': {'threepete': ['ftp://example.com', 'http://example.com', 'https://www.example.com']}, 155 | } 156 | expected_domains = [ 157 | 'example.com', 158 | 'www.example.com', 159 | ] 160 | self._run_test(input_blob, expected_domains) 161 | 162 | def test_special_keys_url(self): 163 | input_blob = {'host': 'https://www.example.com'} 164 | expected_domains = [ 165 | 'example.com', 166 | 'www.example.com', 167 | ] 168 | self._run_test(input_blob, expected_domains) 169 | 170 | def test_not_valid_url(self): 171 | input_blob = {'term': 'https://blah.bork.yarn.dorn-duh-%5DYYYY.WW,boo:bloom,fizz:(ault:(akh_ugh:!it,pook:NOOM)),rort:!(\'@tuht\',dort))'} 172 | expected_domains = None 173 | self._run_test(input_blob, expected_domains) 174 | -------------------------------------------------------------------------------- /tests/output_filters/related_files_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from osxcollector.output_filters.related_files import RelatedFilesFilter 6 | from tests.output_filters.run_filter_test import assert_equal_sorted 7 | from tests.output_filters.run_filter_test import RunFilterTest 8 | 9 | 10 | def when_anytime(blob): 11 | """A simple when that always returns True""" 12 | return True 13 | 14 | 15 | class RelatedFilesFilterTest(RunFilterTest): 16 | 17 | """Created a RelateFilesFilter, calls run_test, and performs additional filter specific validation.""" 18 | 19 | def teardown_method(self, method): 20 | self._output_filter = None 21 | 22 | def _run_test( 23 | self, input_blobs=None, when=when_anytime, file_terms=None, expected_terms=None, 24 | expected_usernames=None, expected_is_related=None, 25 | ): 26 | """Created a RelateFilesFilter, calls run_test, and performs additional filter specific validation. 27 | 28 | Args: 29 | input_blob: An enumerable of dicts 30 | when: A callable when to init the RelatedFilesFilter with 31 | file_terms: An enumerable of strings to init the RelatedFilesFilter with 32 | expected_terms: The expected final value of RelatedFilesFilter.terms 33 | expected_usernames: The expected final value of RelatedFilesFilter.usernames 34 | expected_is_related: An enumerable of the expected value of 'osxcollector_related' for each output_blob 35 | """ 36 | 37 | def create_related_files_filter(): 38 | self._output_filter = RelatedFilesFilter(when=when, file_terms=file_terms) 39 | return self._output_filter 40 | 41 | output_blobs = self.run_test(create_related_files_filter, input_blobs=input_blobs) 42 | if expected_terms: 43 | assert_equal_sorted(expected_terms, self._output_filter.terms) 44 | if expected_usernames: 45 | assert_equal_sorted(expected_usernames, self._output_filter.usernames) 46 | if expected_is_related: 47 | self.assert_key_added_to_blob('osxcollector_related', expected_is_related, input_blobs, output_blobs) 48 | return output_blobs 49 | 50 | 51 | class TestCreateTerms(RelatedFilesFilterTest): 52 | 53 | """Focuses on testing that terms are properly created.""" 54 | 55 | def test_single_term(self): 56 | file_terms = ['one_word'] 57 | expected = ['one_word'] 58 | self._run_test(file_terms=file_terms, expected_terms=expected) 59 | 60 | def test_multi_terms(self): 61 | file_terms = ['one_word', 'pants', 'face'] 62 | expected = ['one_word', 'pants', 'face'] 63 | self._run_test(file_terms=file_terms, expected_terms=expected) 64 | 65 | def test_split_terms(self): 66 | file_terms = ['/ivanlei/source/osxcollector'] 67 | expected = ['ivanlei', 'source', 'osxcollector'] 68 | self._run_test(file_terms=file_terms, expected_terms=expected) 69 | 70 | def test_whitelist_terms(self): 71 | file_terms = ['/Users/ivanlei/source/osxcollector', '/Users/ivanlei/virtual_envs/osxcollector/bin/python'] 72 | expected = ['ivanlei', 'source', 'osxcollector', 'virtual_envs'] 73 | self._run_test(file_terms=file_terms, expected_terms=expected) 74 | 75 | def test_whitelist_username_terms(self): 76 | file_terms = ['/Users/ivanlei/source/osxcollector', '/Users/ivanlei/virtual_envs/osxcollector/bin/python'] 77 | expected = ['source', 'osxcollector', 'virtual_envs'] 78 | blob = {'osxcollector_username': 'ivanlei'} 79 | expected_usernames = ['ivanlei'] 80 | 81 | self._run_test(input_blobs=[blob], file_terms=file_terms, expected_terms=expected, expected_usernames=expected_usernames) 82 | 83 | 84 | class TestFindUserNames(RelatedFilesFilterTest): 85 | 86 | """Focuses on ensuring that usernames are found so they can be ignored as terms.""" 87 | 88 | def test_find_username(self): 89 | blob = {'osxcollector_username': 'bob'} 90 | expected_usernames = ['bob'] 91 | self._run_test(input_blobs=[blob], expected_usernames=expected_usernames) 92 | 93 | def test_find_multiple_username(self): 94 | blobs = [ 95 | {'osxcollector_username': 'bob'}, 96 | {'osxcollector_username': 'jim'}, 97 | {'osxcollector_username': 'bob'}, 98 | {'banana': 'pants'}, 99 | ] 100 | expected_usernames = ['bob', 'jim'] 101 | self._run_test(input_blobs=blobs, expected_usernames=expected_usernames) 102 | 103 | 104 | class TestRelatedFilesFilter(RelatedFilesFilterTest): 105 | 106 | """Tests the overall functionality of the filter.""" 107 | 108 | def test_single_term(self): 109 | input_blobs = [ 110 | {'banana': '/var/bin/magic_value'}, 111 | ] 112 | expected_is_related = [ 113 | {'files': ['magic_value']}, 114 | ] 115 | file_terms = ['magic_value'] 116 | self._run_test(input_blobs=input_blobs, file_terms=file_terms, expected_is_related=expected_is_related) 117 | 118 | def test_multi_term(self): 119 | input_blobs = [ 120 | {'avocado': '/var/bin/magic/hat'}, 121 | {'mango': '/var/bin/value/hat'}, 122 | {'shandy': '/var/bin/magic/value/hat'}, 123 | ] 124 | expected_is_related = [ 125 | {'files': ['magic']}, 126 | {'files': ['value']}, 127 | {'files': ['magic', 'value']}, 128 | ] 129 | file_terms = ['magic', 'value'] 130 | self._run_test(input_blobs=input_blobs, file_terms=file_terms, expected_is_related=expected_is_related) 131 | 132 | def test_split_term(self): 133 | input_blobs = [ 134 | {'avocado': '/var/bin/magic/hat'}, 135 | {'mango': '/var/bin/value/hat'}, 136 | {'shandy': '/var/bin/magic/value/hat'}, 137 | ] 138 | expected_is_related = [ 139 | {'files': ['magic']}, 140 | {'files': ['value']}, 141 | {'files': ['magic', 'value']}, 142 | ] 143 | file_terms = ['magic/value'] 144 | self._run_test(input_blobs=input_blobs, file_terms=file_terms, expected_is_related=expected_is_related) 145 | 146 | def test_discover_term(self): 147 | input_blobs = [ 148 | {'file_path': '/var/bin/magic/value'}, 149 | {'carrot': '/var/bin/magic/hat'}, 150 | {'apple': '/var/bin/value/hat'}, 151 | {'lemmon': '/lime/rickey'}, 152 | ] 153 | expected_is_related = [ 154 | {'files': ['magic', 'value']}, 155 | {'files': ['magic']}, 156 | {'files': ['value']}, 157 | None, 158 | ] 159 | self._run_test(input_blobs=input_blobs, expected_is_related=expected_is_related) 160 | 161 | def test_skip_username(self): 162 | input_blobs = [ 163 | {'file_path': '/var/bin/magic/value', 'osxcollector_username': 'magic'}, 164 | {'carrot': '/var/bin/magic/hat'}, 165 | {'apple': '/var/bin/value/hat'}, 166 | {'lemmon': '/lime/rickey'}, 167 | ] 168 | expected_is_related = [ 169 | {'files': ['value']}, 170 | None, 171 | {'files': ['value']}, 172 | None, 173 | ] 174 | self._run_test(input_blobs=input_blobs, expected_is_related=expected_is_related) 175 | 176 | def test_when(self): 177 | def when_binbing(blob): 178 | return 'bingbing' in blob 179 | 180 | input_blobs = [ 181 | {'file_path': '/var/bin/magic', 'bingbing': True, 'osxcollector_username': 'hat'}, 182 | {'file_path': '/var/bin/value'}, 183 | {'carrot': '/var/bin/magic/hat'}, 184 | {'apple': '/var/bin/value/hat'}, 185 | {'lemmon': '/lime/rickey'}, 186 | ] 187 | expected_is_related = [ 188 | {'files': ['magic']}, 189 | None, 190 | {'files': ['magic']}, 191 | None, 192 | None, 193 | ] 194 | self._run_test(input_blobs=input_blobs, when=when_binbing, expected_is_related=expected_is_related) 195 | -------------------------------------------------------------------------------- /tests/output_filters/util/blacklist_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from copy import deepcopy 6 | 7 | import pytest 8 | from mock import call 9 | from mock import patch 10 | 11 | from osxcollector.output_filters.exceptions import MissingConfigError 12 | from osxcollector.output_filters.util.blacklist import create_blacklist 13 | 14 | 15 | class TestCreateBlacklist: 16 | 17 | @pytest.fixture(scope='function', autouse=True) 18 | def file_contents(self): 19 | file_contents = [ 20 | # Fruits 21 | 'apple', 'banana', 22 | 23 | # Cars 24 | 'corolla', 'datsun', 25 | ] 26 | with patch( 27 | 'osxcollector.output_filters.util.blacklist._read_blacklist_file', 28 | return_value=file_contents, 29 | ) as file_contents: 30 | yield file_contents 31 | 32 | @pytest.fixture(scope='function') 33 | def blacklist_data(self): 34 | yield { 35 | 'blacklist_name': 'only_required', 36 | 'blacklist_keys': ['fruit_name'], 37 | 'blacklist_file_path': '/who/cares/I/mock/this.txt', 38 | } 39 | 40 | @pytest.fixture(scope='module', autouse=True) 41 | def mock_exists(self): 42 | with patch('os.path.exists', return_value=True): 43 | yield 44 | 45 | def test_only_required_keys(self, blacklist_data): 46 | blacklist = create_blacklist(blacklist_data) 47 | assert blacklist.name == blacklist_data['blacklist_name'] 48 | assert blacklist._blacklisted_keys == blacklist_data['blacklist_keys'] 49 | assert not blacklist._is_regex 50 | assert not blacklist._is_domains 51 | 52 | def test_missing_required_keys(self, blacklist_data): 53 | for key in blacklist_data: 54 | _blacklist_data = deepcopy(blacklist_data) 55 | del _blacklist_data[key] 56 | with pytest.raises(MissingConfigError): 57 | create_blacklist(_blacklist_data) 58 | 59 | def test_missing_data_input(self, blacklist_data): 60 | blacklist_data.pop('blacklist_file_path') 61 | with pytest.raises(MissingConfigError): 62 | create_blacklist(blacklist_data) 63 | 64 | def test_required_with_two_keys(self, blacklist_data): 65 | blacklist_data['blacklist_keys'] = ['fruit_name', 'car_name'] 66 | blacklist = create_blacklist(blacklist_data) 67 | assert blacklist._blacklisted_keys == blacklist_data['blacklist_keys'] 68 | 69 | def test_keys_not_list(self, blacklist_data): 70 | blacklist_data['blacklist_keys'] = 'fruit_name' 71 | with pytest.raises(MissingConfigError): 72 | create_blacklist(blacklist_data) 73 | 74 | def test_is_regex(self, blacklist_data): 75 | blacklist_data['blacklist_is_regex'] = True 76 | blacklist = create_blacklist(blacklist_data) 77 | assert blacklist._is_regex 78 | 79 | def test_is_domains(self, blacklist_data, file_contents): 80 | file_contents.return_value = ['apple.com', 'banana.org'] 81 | # Setting 'blacklist_is_domains' overrides 'blacklist_is_regex' 82 | blacklist_data['blacklist_is_domains'] = True 83 | blacklist_data['blacklist_is_regex'] = False 84 | blacklist = create_blacklist(blacklist_data) 85 | assert blacklist._is_regex 86 | assert blacklist._is_domains 87 | 88 | # TODO: Refactor OSXCollector Output Filters to work with unicode-based domains 89 | def test_bad_domains_unicode(self, blacklist_data): 90 | unicode_domain_1 = 'yelp.公司' 91 | unicode_domain_2 = 'www.Yülp.tld' 92 | unicode_domain_3 = 'иelф.р' 93 | unicode_domains = [unicode_domain_1, unicode_domain_2, unicode_domain_3] 94 | blacklist_data['blacklist_is_domains'] = True 95 | with patch( 96 | 'osxcollector.output_filters.util.blacklist._read_blacklist_file', 97 | return_value=unicode_domains, 98 | ): 99 | with patch('logging.warning', autospec=True) as patched_logging_warning: 100 | create_blacklist(blacklist_data) 101 | assert patched_logging_warning.call_count == 3 102 | 103 | calls = [ 104 | call( 105 | u'Blacklisted value "{0}" cannot be resolved as a domain name' 106 | .format(unicode_domain), 107 | ) for unicode_domain in unicode_domains 108 | ] 109 | assert calls == patched_logging_warning.call_args_list 110 | 111 | def test_bad_domains(self, blacklist_data): 112 | blacklist_data['blacklist_is_domains'] = True 113 | with patch('logging.warning', autospec=True) as patched_logging_warning: 114 | blacklist = create_blacklist(blacklist_data) 115 | assert patched_logging_warning.call_count == 4 116 | calls = [ 117 | call('Blacklisted value "apple" cannot be resolved as a domain name'), 118 | call('Blacklisted value "banana" cannot be resolved as a domain name'), 119 | call('Blacklisted value "corolla" cannot be resolved as a domain name'), 120 | call('Blacklisted value "datsun" cannot be resolved as a domain name'), 121 | ] 122 | assert calls == patched_logging_warning.call_args_list 123 | 124 | blob = {'fruit_name': 'apple.com'} 125 | assert not blacklist.match_line(blob) 126 | 127 | def test_match_fruit(self, blacklist_data): 128 | good_blobs = [ 129 | {'fruit_name': 'apple'}, 130 | {'fruit_name': 'banana'}, 131 | ] 132 | bad_blobs = [ 133 | {'car_name': 'corolla'}, 134 | {'car_name': 'datsun'}, 135 | ] 136 | 137 | blacklist = create_blacklist(blacklist_data) 138 | for blob in good_blobs: 139 | assert blacklist.match_line(blob) 140 | for blob in bad_blobs: 141 | assert not blacklist.match_line(blob) 142 | 143 | def test_match_fruit_and_cars(self, blacklist_data): 144 | good_blobs = [ 145 | {'fruit_name': 'apple'}, 146 | {'fruit_name': 'banana'}, 147 | {'car_name': 'corolla'}, 148 | {'car_name': 'datsun'}, 149 | ] 150 | 151 | blacklist_data['blacklist_keys'] = ['fruit_name', 'car_name'] 152 | blacklist = create_blacklist(blacklist_data) 153 | for blob in good_blobs: 154 | assert blacklist.match_line(blob) 155 | 156 | def test_match_fruit_regex(self, blacklist_data, file_contents): 157 | good_blobs = [ 158 | {'fruit_name': 'apple'}, 159 | ] 160 | 161 | bad_blobs = [ 162 | {'fruit_name': 'banana'}, 163 | {'car_name': 'corolla'}, 164 | {'car_name': 'datsun'}, 165 | ] 166 | 167 | blacklist_data['blacklist_is_regex'] = True 168 | file_contents.return_value = ['app.*', 'ban.+org'] 169 | blacklist = create_blacklist(blacklist_data) 170 | for blob in good_blobs: 171 | assert blacklist.match_line(blob) 172 | for blob in bad_blobs: 173 | assert not blacklist.match_line(blob) 174 | 175 | def test_match_domains(self, blacklist_data, file_contents): 176 | good_blobs = [ 177 | {'fruit_name': 'apple.com'}, 178 | {'fruit_name': 'www.apple.com'}, 179 | {'fruit_name': 'www.another-thing.apple.com'}, 180 | ] 181 | 182 | bad_blobs = [ 183 | {'fruit_name': 'cran-apple.com'}, 184 | {'fruit_name': 'apple.org'}, 185 | {'fruit_name': 'apple.com.jp'}, 186 | {'car_name': 'apple.com'}, 187 | ] 188 | blacklist_data['blacklist_is_domains'] = True 189 | file_contents.return_value = ['apple.com'] 190 | blacklist = create_blacklist(blacklist_data) 191 | for blob in good_blobs: 192 | assert blacklist.match_line(blob) 193 | for blob in bad_blobs: 194 | assert not blacklist.match_line(blob) 195 | 196 | def test_match_domains_data_feed(self, blacklist_data): 197 | good_blobs = [ 198 | {'fruit_name': 'apple.com'}, 199 | {'fruit_name': 'www.apple.com'}, 200 | {'fruit_name': 'www.another-thing.apple.com'}, 201 | ] 202 | 203 | bad_blobs = [ 204 | {'fruit_name': 'cran-apple.com'}, 205 | {'fruit_name': 'apple.org'}, 206 | {'fruit_name': 'apple.com.jp'}, 207 | {'car_name': 'apple.com'}, 208 | ] 209 | blacklist_data['blacklist_is_domains'] = True 210 | blacklist_data['blacklist_data_feed'] = 'domain_list' 211 | blacklist_data.pop('blacklist_file_path') 212 | 213 | def mock_generator(): 214 | for domain in ['apple.com']: 215 | yield domain 216 | 217 | blacklist = create_blacklist( 218 | blacklist_data, {'domain_list': mock_generator}, 219 | ) 220 | for blob in good_blobs: 221 | assert blacklist.match_line(blob) 222 | for blob in bad_blobs: 223 | assert not blacklist.match_line(blob) 224 | 225 | # TODO: Refactor OSXCollector Output Filters to work with unicode-based domains 226 | def test_log_unicode_domain(self): 227 | config_chunk = { 228 | 'blacklist_name': 'Unicode domain', 229 | 'blacklist_keys': ['visited_domain'], 230 | 'blacklist_file_path': 'not_really_a_blacklist.txt', 231 | 'blacklist_is_domains': True, 232 | } 233 | file_contents = ['Bücher.tld', 'yelp.公司', 'www.Yülp.tld', 'иelф.р'] 234 | with patch( 235 | 'osxcollector.output_filters.util.blacklist._read_blacklist_file', 236 | return_value=file_contents, 237 | ), patch('logging.warning', autospec=True) as patched_logging_warning: 238 | blacklist = create_blacklist(config_chunk) 239 | assert patched_logging_warning.call_count == 4 240 | calls = [ 241 | call( 242 | u'Blacklisted value "{0}" cannot be resolved as a domain name' 243 | .format(domain), 244 | ) for domain in file_contents 245 | ] 246 | assert calls == patched_logging_warning.call_args_list 247 | 248 | blob = {'visted_domain': 'Bücher.tld'} 249 | assert not blacklist.match_line(blob) 250 | -------------------------------------------------------------------------------- /osxcollector/output_filters/summary_filters/text.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import sys 6 | from numbers import Number 7 | 8 | import six 9 | 10 | from osxcollector.output_filters.summary_filters.summary import SummaryFilter 11 | 12 | 13 | class TextSummaryFilter(SummaryFilter): 14 | """Prints the analysis summary (AKA "Very Readable Output") in plain text format.""" 15 | 16 | END_COLOR = '\033[0m' 17 | SECTION_COLOR = '\033[1m' 18 | BOT_COLOR = '\033[93m\033[1m' 19 | KEY_COLOR = '\033[94m' 20 | VAL_COLOR = '\033[32m' 21 | 22 | def __init__(self, monochrome=False, text_output_file=None, **kwargs): 23 | super(TextSummaryFilter, self).__init__(summary_output_file=text_output_file, **kwargs) 24 | self._monochrome = monochrome 25 | 26 | def filter_line(self, blob): 27 | """Each Line of OSXCollector output will be passed to filter_line. 28 | 29 | The OutputFilter should return the line, either modified or unmodified. 30 | The OutputFilter can also choose to return nothing, effectively swallowing the line. 31 | 32 | Args: 33 | output_line: A dict 34 | 35 | Returns: 36 | A dict or None 37 | """ 38 | if 'osxcollector_vthash' in blob: 39 | self._vthash.append(blob) 40 | 41 | if 'osxcollector_vtdomain' in blob: 42 | self._vtdomain.append(blob) 43 | 44 | if 'osxcollector_opendns' in blob: 45 | self._opendns.append(blob) 46 | 47 | if 'osxcollector_blacklist' in blob: 48 | self._blacklist.append(blob) 49 | 50 | if 'osxcollector_related' in blob: 51 | self._related.append(blob) 52 | 53 | if self._show_signature_chain: 54 | if 'signature_chain' in blob and blob['osxcollector_section'] in ['startup', 'kext']: 55 | signature_chain = blob['signature_chain'] 56 | if not len(signature_chain) or 'Apple Root CA' != signature_chain[-1]: 57 | self._signature_chain.append(blob) 58 | 59 | if self._show_browser_ext: 60 | if blob['osxcollector_section'] in ['firefox', 'chrome'] and blob.get('osxcollector_subsection') == 'extensions': 61 | self._extensions.append(blob) 62 | 63 | return blob 64 | 65 | def _write(self, msg, color=END_COLOR): 66 | if not self._monochrome: 67 | self._output_stream.write(color) 68 | try: 69 | self._output_stream.write(msg.encode('utf-8', errors='ignore')) 70 | except UnicodeDecodeError as err: 71 | self._output_stream.write(msg) 72 | sys.stderr.write('Unicode decode error: {0}'.format(err)) 73 | if not self._monochrome: 74 | self._output_stream.write(self.END_COLOR) 75 | 76 | def end_of_lines(self): 77 | """Called after all lines have been fed to filter_output_line. 78 | 79 | The OutputFilter can do any batch processing on that requires the complete input. 80 | 81 | Returns: 82 | An array of dicts (empty array if no lines remain) 83 | """ 84 | self._write('== Very Readable Output Bot ==\n', self.BOT_COLOR) 85 | self._write('Let\'s see what\'s up with this machine.\n\n', self.BOT_COLOR) 86 | 87 | if len(self._vthash): 88 | self._write('Dang! You\'ve got known malware on this machine. Hope it\'s commodity stuff\n', self.BOT_COLOR) 89 | self._summarize_blobs(self._vthash) 90 | self._write('Sheesh! This is why we can\'t have nice things!\n\n', self.BOT_COLOR) 91 | 92 | if len(self._vtdomain): 93 | self._write('I see you\'ve been visiting some \'questionable\' sites. If you trust VirusTotal that is.\n', self.BOT_COLOR) 94 | self._summarize_blobs(self._vtdomain) 95 | self._write('I hope it was worth it!\n\n', self.BOT_COLOR) 96 | 97 | if len(self._opendns): 98 | self._write('Well, here\'s some domains OpenDNS wouldn\'t recommend.\n', self.BOT_COLOR) 99 | self._summarize_blobs(self._opendns) 100 | self._write('You know you shouldn\'t just click every link you see? #truth\n\n', self.BOT_COLOR) 101 | 102 | if len(self._blacklist): 103 | self._write('We put stuff on a blacklist for a reason. Mostly so you don\'t do this.\n', self.BOT_COLOR) 104 | self._summarize_blobs(self._blacklist) 105 | self._write('SMH\n\n', self.BOT_COLOR) 106 | 107 | if len(self._related): 108 | self._write('This whole things started with just a few clues. Now look what I found.\n', self.BOT_COLOR) 109 | self._summarize_blobs(self._related) 110 | self._write('Nothing hides from Very Readable Output Bot\n\n', self.BOT_COLOR) 111 | 112 | if len(self._signature_chain): 113 | self._write('If these binaries were signed by \'Apple Root CA\' I\'d trust them more.\n', self.BOT_COLOR) 114 | self._summarize_blobs(self._signature_chain) 115 | self._write('Let\'s just try and stick with some safe software\n\n', self.BOT_COLOR) 116 | 117 | if len(self._extensions): 118 | self._write('Let\'s see what\'s hiding in the browser, shall we.\n', self.BOT_COLOR) 119 | self._summarize_blobs(self._extensions) 120 | self._write('You know these things have privileges galore.\n\n', self.BOT_COLOR) 121 | 122 | if len(self._add_to_blacklist): 123 | self._add_to_blacklist = list(set(self._add_to_blacklist)) 124 | self._write('If I were you, I\'d probably update my blacklists to include:\n', self.BOT_COLOR) 125 | for key, val in self._add_to_blacklist: 126 | self._summarize_val(key, val) 127 | self._write('That might just help things, Skippy!\n\n', self.BOT_COLOR) 128 | 129 | self._write('== Very Readable Output Bot ==\n', self.BOT_COLOR) 130 | self._write('#kaythanksbye', self.BOT_COLOR) 131 | 132 | return [] 133 | 134 | def _summarize_blobs(self, blobs): 135 | for blob in blobs: 136 | self._summarize_line(blob) 137 | 138 | add_to_blacklist = False 139 | 140 | if 'osxcollector_vthash' in blob: 141 | self._summarize_vthash(blob) 142 | add_to_blacklist = True 143 | 144 | if 'osxcollector_vtdomain' in blob: 145 | self._summarize_vtdomain(blob) 146 | 147 | if 'osxcollector_opendns' in blob: 148 | self._summarize_opendns(blob) 149 | 150 | if 'osxcollector_blacklist' in blob: 151 | for key in blob['osxcollector_blacklist']: 152 | self._summarize_val('blacklist-{0}'.format(key), blob['osxcollector_blacklist'][key]) 153 | 154 | if 'osxcollector_related' in blob: 155 | for key in blob['osxcollector_related']: 156 | self._summarize_val('related-{0}'.format(key), blob['osxcollector_related'][key]) 157 | 158 | if 'md5' in blob and '' == blob['md5']: 159 | add_to_blacklist = True 160 | 161 | if add_to_blacklist: 162 | blacklists = blob.get('osxcollector_blacklist', {}) 163 | values_on_blacklist = blacklists.get('hashes', []) 164 | for key in ['md5', 'sha1', 'sha2']: 165 | val = blob.get(key, '') 166 | if len(val) and val not in values_on_blacklist: 167 | self._add_to_blacklist.append((key, val)) 168 | 169 | values_on_blacklist = blacklists.get('domains', []) 170 | for domain in blob.get('osxcollector_domains', []): 171 | if domain not in values_on_blacklist: 172 | self._add_to_blacklist.append(('domain', domain)) 173 | 174 | def _summarize_line(self, blob): 175 | section = blob.get('osxcollector_section') 176 | subsection = blob.get('osxcollector_subsection', '') 177 | 178 | self._write('- {0} {1}\n'.format(section, subsection), self.SECTION_COLOR) 179 | for key in sorted(blob.keys()): 180 | if not key.startswith('osxcollector') and blob.get(key): 181 | val = blob.get(key) 182 | self._summarize_val(key, val) 183 | 184 | def _summarize_vthash(self, blob): 185 | for blob in blob['osxcollector_vthash']: 186 | for key in ['positives', 'total', 'scan_date', 'permalink']: 187 | val = blob.get(key) 188 | self._summarize_val(key, val, 'vthash') 189 | 190 | def _summarize_vtdomain(self, blob): 191 | for blob in blob['osxcollector_vtdomain']: 192 | for key in ['domain', 'detections']: 193 | val = blob.get(key) 194 | self._summarize_val(key, val, 'vtdomain') 195 | 196 | def _summarize_opendns(self, blob): 197 | for blob in blob['osxcollector_opendns']: 198 | for key in ['domain', 'categorization', 'security', 'link']: 199 | val = blob.get(key) 200 | self._summarize_val(key, val, 'opendns') 201 | 202 | def _summarize_val(self, key, val, prefix=None): 203 | self._print_key(key, prefix) 204 | self._print_val(val) 205 | self._write('\n') 206 | 207 | def _print_key(self, key, prefix): 208 | if not prefix: 209 | prefix = '' 210 | else: 211 | prefix += '-' 212 | 213 | self._write(' {0}{1}'.format(prefix, key), self.KEY_COLOR) 214 | self._write(': ') 215 | 216 | def _print_val(self, val): 217 | if isinstance(val, list): 218 | self._write('[') 219 | for index, elem in enumerate(val): 220 | self._print_val(elem) 221 | if index != len(val) - 1: 222 | self._write(', ') 223 | self._write(']') 224 | elif isinstance(val, dict): 225 | self._write('{') 226 | for index, key in enumerate(val): 227 | self._write('"') 228 | self._write(key, self.VAL_COLOR) 229 | self._write('": ') 230 | self._print_val(val[key]) 231 | if index != len(val) - 1: 232 | self._write(', ') 233 | self._write('}') 234 | elif isinstance(val, six.string_types): 235 | val = val[:480] 236 | self._write('"') 237 | self._write(val, self.VAL_COLOR) 238 | self._write('"') 239 | elif isinstance(val, Number): 240 | self._write('{0}'.format(val), self.VAL_COLOR) 241 | -------------------------------------------------------------------------------- /osxcollector/output_filters/opendns/lookup_domains.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # LookupDomainsFilter uses OpenDNS to lookup the values in 'osxcollector_domains' and adds the 'osxcollector_opendns' key. 4 | # 5 | from __future__ import absolute_import 6 | from __future__ import unicode_literals 7 | 8 | import logging 9 | from collections import namedtuple 10 | 11 | import six 12 | from threat_intel.opendns import InvestigateApi 13 | 14 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 15 | from osxcollector.output_filters.base_filters.threat_feed import ThreatFeedFilter 16 | from osxcollector.output_filters.util.blacklist import create_blacklist 17 | from osxcollector.output_filters.util.config import config_get_deep 18 | 19 | 20 | class LookupDomainsFilter(ThreatFeedFilter): 21 | 22 | """Uses OpenDNS to lookup the values in 'osxcollector_domains' and adds the 'osxcollector_opendns' key.""" 23 | 24 | # Domain categories to consider suspicious 25 | SUSPICIOUS_CATEGORIES = [ 26 | 'Adware', 27 | 'Botnet', 28 | 'Typo Squatting', 29 | 'Drive-by Downloads/Exploits', 30 | 'Mobile Threats', 31 | 'High Risk Sites and Locations', 32 | 'Malware', 33 | 'Phishing', 34 | ] 35 | 36 | SecurityCheck = namedtuple('SecurityCheck', ['key', 'min', 'max', 'threshold']) 37 | SECURITY_CHECKS = [ 38 | # Domain Generation Algorithm. This score is generated based on the likeliness of the domain name being 39 | # generated by an algorithm rather than a human. This algorithm is designed to identify domains which have 40 | # been created using an automated randomization strategy, which is a common evasion technique in malware kits 41 | # or botnets. This score ranges from -100 (suspicious) to 0 (benign) 42 | # 43 | SecurityCheck('dga_score', -100, 0, -70), 44 | 45 | # Suspicious rank for a domain that reviews based on the lookup behavior of client IP for the domain. 46 | # Securerank is designed to identify hostnames requested by known infected clients but never requested 47 | # by clean clients, assuming these domains are more likely to be bad. 48 | # Scores returned range from -100 (suspicious) to 100 (benign). 49 | # 50 | SecurityCheck('securerank2', -100, 100, -10), 51 | 52 | # ASN reputation score, ranges from -100 to 0 with -100 being very suspicious 53 | SecurityCheck('asn_score', -100, 0, -3), 54 | 55 | # Prefix ranks domains given their IP prefixes (An IP prefix is the first three octets in an IP address) 56 | # and the reputation score of these prefixes. 57 | # Ranges from -100 to 0, -100 being very suspicious 58 | SecurityCheck('prefix_score', -100, 0, -12), 59 | 60 | # RIP ranks domains given their IP addresses and the reputation score of these IP addresses. 61 | # Ranges from -100 to 0, -100 being very suspicious 62 | SecurityCheck('rip_score', -100, 0, -25), 63 | ] 64 | 65 | SECURITY_BAD_KEYS = [ 66 | # The name of any known attacks associated with this domain. 67 | # Returns blank is no known threat associated with domain. 68 | 'attack', 69 | 70 | # The type of the known attack, such as botnet or APT. 71 | # Returns blank if no known threat associated with domain. 72 | 'threat_type', 73 | ] 74 | 75 | def __init__(self, lookup_when=None, **kwargs): 76 | super(LookupDomainsFilter, self).__init__( 77 | 'osxcollector_domains', 'osxcollector_opendns', 78 | lookup_when=lookup_when, name_of_api_key='opendns', **kwargs 79 | ) 80 | self._whitelist = create_blacklist( 81 | config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}), 82 | ) 83 | 84 | def _lookup_iocs(self, all_iocs): 85 | """Caches the OpenDNS info for a set of domains. 86 | 87 | Domains on a whitelist will be ignored. 88 | First, lookup the categorization details for each domain. 89 | Next, if the categorization seems suspicious or unknown, lookup detailed security info. 90 | Finally, if the categorization or security info is suspicious, save the threat info. 91 | 92 | Args: 93 | all_iocs: an enumerable of string domain names. 94 | Returns: 95 | A dict {domain: opendns_info} 96 | """ 97 | threat_info = {} 98 | 99 | cache_file_name = config_get_deep('opendns.LookupDomainsFilter.cache_file_name', None) 100 | investigate = InvestigateApi(self._api_key, cache_file_name=cache_file_name) 101 | 102 | iocs = [x for x in all_iocs if not self._whitelist.match_values(x)] 103 | 104 | categorization = investigate.categorization(iocs) 105 | 106 | # Mark the categorization as suspicious 107 | for domain, categorization_info in six.iteritems(categorization): 108 | if categorization_info: 109 | categorization_info['suspicious'] = \ 110 | self._is_category_info_suspicious(categorization_info) 111 | else: 112 | logging.warning( 113 | 'No categorization for domain {0}'.format(domain), 114 | ) 115 | categorization[domain] = {'suspicious': False} 116 | 117 | # Decide which values to lookup security info for 118 | iocs = [domain for domain in categorization if self._should_get_security_info(categorization[domain])] 119 | 120 | security = investigate.security(iocs) 121 | 122 | for domain, security_info in six.iteritems(security): 123 | if security_info: 124 | security_info['suspicious'] = \ 125 | self._is_security_info_suspicious(security_info) 126 | else: 127 | logging.warning( 128 | 'No security information for domain {0}'.format(domain), 129 | ) 130 | security[domain] = {'suspicious': False} 131 | 132 | for domain in security: 133 | if self._should_store_ioc_info(categorization[domain], security[domain]): 134 | threat_info[domain] = { 135 | 'domain': domain, 136 | 'categorization': categorization[domain], 137 | 'security': self._trim_security_result(security[domain]), 138 | 'link': 'https://investigate.opendns.com/domain-view/name/{0}/view'.format( 139 | domain.encode('utf-8', errors='ignore') if six.PY2 else domain, 140 | ), 141 | } 142 | 143 | return threat_info 144 | 145 | def _is_category_info_suspicious(self, category_info): 146 | """Figure out whether the categorization info is suspicious. 147 | 148 | Args: 149 | category_info: A dict of info returned by the OpenDNS categorization call 150 | Returns: 151 | boolean 152 | """ 153 | status = category_info['status'] 154 | content_categories = category_info['content_categories'] 155 | security_categories = category_info['security_categories'] 156 | 157 | return -1 == status or len(security_categories) or any([cat in self.SUSPICIOUS_CATEGORIES for cat in content_categories]) 158 | 159 | def _should_get_security_info(self, categorization_info): 160 | """Figure out whether the categorization info on the domain is interesting enough to gather more data. 161 | 162 | If the domain isn't categorized, or is categorized as suspicious, get security info. 163 | 164 | Args: 165 | categorization_info: A dict of info returned by the OpenDNS categorization call 166 | Returns: 167 | boolean 168 | """ 169 | status = categorization_info.get('status', 0) 170 | content_categories = categorization_info.get('content_categories', []) 171 | security_categories = categorization_info.get('security_categories', []) 172 | 173 | return categorization_info['suspicious'] or \ 174 | (0 == status and 0 == len(content_categories) and 0 == len(security_categories)) 175 | 176 | def _is_security_info_suspicious(self, security_info): 177 | """Analyzes info from OpenDNS and makes a boolean determination of suspicious or not. 178 | 179 | Either looks for low values for a specific set of properties, looks for known participation in 180 | a threat campaign, or looks for unknown domains. 181 | 182 | Args: 183 | security_info: The result of a call to the security endpoint 184 | Returns: 185 | boolean 186 | """ 187 | # Categorization of site 188 | if any([security_info.get(key, None) for key in self.SECURITY_BAD_KEYS]): 189 | return True 190 | 191 | for security_check in self.SECURITY_CHECKS: 192 | if security_info.get(security_check.key, security_check.max) <= security_check.threshold: 193 | return True 194 | 195 | if not security_info.get('found', False): 196 | return True 197 | 198 | return False 199 | 200 | def _should_store_ioc_info(self, category_info, security_info): 201 | """Figure out whether the data gathered is interesting enough to store in the output. 202 | 203 | Args: 204 | category_info: A dict of info returned by the OpenDNS categorization call 205 | security_info: A dict of info returned by the OpenDNS security call 206 | Returns: 207 | boolean 208 | """ 209 | return category_info['suspicious'] or security_info['suspicious'] 210 | 211 | def _trim_security_result(self, security_info): 212 | """Converts the results of a security call into a smaller dict. 213 | 214 | Args: 215 | security_info: The result of a call to the security endpoint. 216 | Returns: 217 | A dict 218 | """ 219 | # dga_score sometimes has the wrong sign, fix that please 220 | dga_score = security_info.get('dga_score', 0) 221 | if dga_score > 0: 222 | security_info['dga_score'] = -1 * dga_score 223 | 224 | # There's a lot of info in the security_info, trim it 225 | result = {} 226 | for security_check in self.SECURITY_CHECKS: 227 | if security_check.key in security_info: 228 | result[security_check.key] = security_info[security_check.key] 229 | for key in self.SECURITY_BAD_KEYS: 230 | if key in security_info: 231 | result[key] = security_info[key] 232 | 233 | result['found'] = security_info.get('found', False) 234 | 235 | return result 236 | 237 | 238 | def main(): 239 | run_filter_main(LookupDomainsFilter) 240 | 241 | 242 | if __name__ == '__main__': 243 | main() 244 | -------------------------------------------------------------------------------- /osxcollector/output_filters/opendns/related_domains.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # RelatedDomains uses OpenDNS to find domains related to input domains or IPs. 4 | # Adds 'osxcollector_related' key to the output: 5 | # { 6 | # 'osxcollector_related': { 7 | # 'domains': { 8 | # 'domain_in_line.com': ['related_domain.com'], 9 | # 'another.com': ['1.2.3.4'] 10 | # } 11 | # } 12 | # } 13 | # 14 | from __future__ import absolute_import 15 | from __future__ import unicode_literals 16 | 17 | from argparse import ArgumentParser 18 | 19 | import six 20 | from threat_intel.opendns import InvestigateApi 21 | 22 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 23 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 24 | from osxcollector.output_filters.util.blacklist import create_blacklist 25 | from osxcollector.output_filters.util.config import config_get_deep 26 | from osxcollector.output_filters.util.domains import expand_domain 27 | 28 | 29 | DEFAULT_RELATED_DOMAINS_GENERATIONS = 2 30 | 31 | 32 | class RelatedDomainsFilter(OutputFilter): 33 | 34 | """Uses OpenDNS to find domains related to input domains or IPs. 35 | 36 | A whitelist of domains to ignore is read during initialization. 37 | Adds 'osxcollector_related' key to the output: 38 | ```python 39 | { 40 | 'osxcollector_related': { 41 | 'domains': { 42 | 'domain_in_line.com': ['related_domain.com'], 43 | 'another.com': ['1.2.3.4'] 44 | } 45 | } 46 | } 47 | ``` 48 | """ 49 | 50 | def __init__( 51 | self, 52 | initial_domains=None, 53 | initial_ips=None, 54 | generations=DEFAULT_RELATED_DOMAINS_GENERATIONS, 55 | related_when=None, 56 | **kwargs 57 | ): 58 | """Initializes the RelatedDomainsFilter. 59 | 60 | Args: 61 | initial_domains: an enumerable of string domain names 62 | initial_ips: an enumerable of string IPs in the form '' 63 | generations: How many generations of related domains to retrieve. Passing 1 64 | means just find the domains related to the initial input. Passing 2 means also find the 65 | domains related to the domains related to the initial input. 66 | related_when: A boolean function to call to decide whether to add the domains from a line to 67 | the list of related domains. 68 | """ 69 | super(RelatedDomainsFilter, self).__init__(**kwargs) 70 | self._whitelist = create_blacklist( 71 | config_get_deep('domain_whitelist'), kwargs.get('data_feeds', {}), 72 | ) 73 | 74 | cache_file_name = config_get_deep('opendns.RelatedDomainsFilter.cache_file_name', None) 75 | self._investigate = InvestigateApi(config_get_deep('api_key.opendns'), cache_file_name=cache_file_name) 76 | 77 | self._domains_to_lookup = set(initial_domains) if initial_domains else set() 78 | self._ips_to_lookup = set(initial_ips) if initial_ips else set() 79 | 80 | self._related_when = related_when 81 | self._generation_count = generations 82 | 83 | self._all_blobs = list() 84 | 85 | def filter_line(self, blob): 86 | """Accumulate a set of all domains. 87 | 88 | Args: 89 | blob: A dict representing one line of output from OSXCollector. 90 | Returns: 91 | A dict or None 92 | """ 93 | self._all_blobs.append(blob) 94 | 95 | if 'osxcollector_domains' in blob and self._related_when and self._related_when(blob): 96 | for domain in blob.get('osxcollector_domains'): 97 | self._domains_to_lookup.add(domain) 98 | 99 | return None 100 | 101 | def end_of_lines(self): 102 | """Called after all lines have been fed to filter_output_line. 103 | 104 | The OutputFilter performs any processing that requires the complete input to have already been fed. 105 | 106 | Returns: 107 | An enumerable of dicts 108 | """ 109 | domains_to_related = self._perform_lookup_for_all_domains(self._domains_to_lookup, self._ips_to_lookup) 110 | 111 | if domains_to_related: 112 | for blob in self._all_blobs: 113 | for domain in blob.get('osxcollector_domains', []): 114 | add_related_domains = False 115 | if domain in domains_to_related: 116 | blob.setdefault('osxcollector_related', {}) 117 | blob['osxcollector_related'].setdefault('domains', {}) 118 | blob['osxcollector_related']['domains'].setdefault(domain, []) 119 | blob['osxcollector_related']['domains'][domain] += domains_to_related[domain] 120 | add_related_domains = True 121 | 122 | # Unique the related domains 123 | if add_related_domains: 124 | blob['osxcollector_related']['domains'][domain] = list(set(blob['osxcollector_related']['domains'][domain])) 125 | 126 | return self._all_blobs 127 | 128 | def get_argument_parser(self): 129 | parser = ArgumentParser() 130 | group = parser.add_argument_group('opendns.RelatedDomainsFilter') 131 | group.add_argument( 132 | '-d', '--domain', dest='initial_domains', default=[], action='append', 133 | help='[OPTIONAL] Suspicious domains to use in pivoting. May be specified more than once.', 134 | ) 135 | group.add_argument( 136 | '-i', '--ip', dest='initial_ips', default=[], action='append', 137 | help='[OPTIONAL] Suspicious IP to use in pivoting. May be specified more than once.', 138 | ) 139 | group.add_argument( 140 | '--related-domains-generations', dest='generations', default=DEFAULT_RELATED_DOMAINS_GENERATIONS, 141 | help='[OPTIONAL] How many generations of related domains to lookup with OpenDNS', 142 | ) 143 | return parser 144 | 145 | def _filter_domains_by_whitelist(self, domains): 146 | """Remove all domains that are on the whitelist. 147 | 148 | Args: 149 | domains: An enumerable of domains 150 | Returns: 151 | An enumerable of domains 152 | """ 153 | return [x for x in list(domains) if not self._whitelist.match_values(x)] 154 | 155 | def _perform_lookup_for_all_domains(self, domains_to_lookup, ips_to_lookup): 156 | """Lookup all the domains related to the input domains or IPs. 157 | 158 | Args: 159 | domains_to_lookup: Enumerable of domains 160 | ips_to_lookup: Enumerable of IPs 161 | Returns: 162 | A dict mapping {'related_domain': ['initial_domainA', 'initial_domainB']} 163 | """ 164 | self._domains_to_lookup = self._filter_domains_by_whitelist(self._domains_to_lookup) 165 | 166 | domains_to_related = {} 167 | 168 | what_to_lookup = [(domain, True) for domain in domains_to_lookup] + [(ip, False) for ip in ips_to_lookup] 169 | 170 | for domain_or_ip, is_domain in what_to_lookup: 171 | related_domains = self._perform_lookup_for_single_domain(domain_or_ip, is_domain, self._generation_count) 172 | related_domains = self._filter_domains_by_whitelist(related_domains) 173 | for related_domain in related_domains: 174 | domains_to_related.setdefault(related_domain, set()) 175 | domains_to_related[related_domain].add(domain_or_ip) 176 | 177 | return domains_to_related 178 | 179 | def _perform_lookup_for_single_domain(self, domain_or_ip, is_domain, generation_count): 180 | """Given a domain or IP, lookup the Nth related domains. 181 | 182 | Args: 183 | domain_or_ip: A string domain name or IP 184 | is_domain: A boolean of whether the previous arg is a domain or IP 185 | generation_count: A count of generations to lookup 186 | Returns: 187 | set of related domains 188 | """ 189 | domains_found = set([domain_or_ip]) if is_domain else set() 190 | generation_results = set([domain_or_ip]) 191 | 192 | # For IPs, do one IP specific lookup then switch to domain lookups 193 | if not is_domain: 194 | generation_results = self._find_related_domains(None, generation_results) 195 | domains_found |= generation_results 196 | generation_count -= 1 197 | 198 | while generation_count > 0: 199 | if len(generation_results): 200 | generation_results = self._find_related_domains(generation_results, None) 201 | domains_found |= generation_results 202 | 203 | generation_count -= 1 204 | 205 | return domains_found 206 | 207 | def _find_related_domains(self, domains, ips): 208 | """Calls OpenDNS to find related domains and normalizes the responses. 209 | 210 | Args: 211 | domains: An enumerable of domains 212 | ips: An enumerable of IPs 213 | Returns: 214 | An enumerable of domains 215 | """ 216 | related_domains = set() 217 | 218 | if domains: 219 | domains = self._filter_domains_by_whitelist(domains) 220 | cooccurrence_info = self._investigate.cooccurrences(domains) 221 | cooccurrence_domains = self._cooccurrences_to_domains(cooccurrence_info) 222 | related_domains.update(cooccurrence_domains) 223 | 224 | if ips: 225 | rr_history_info = self._investigate.rr_history(ips) 226 | related_domains.update(self._rr_history_to_domains(rr_history_info)) 227 | 228 | return related_domains 229 | 230 | def _cooccurrences_to_domains(self, cooccurrence_info): 231 | """Parse the results of a call to the OpenDNS cooccurrences endpoint. 232 | 233 | Args: 234 | cooccurrence_info: Result of a call to cooccurrences 235 | Returns: 236 | An enumerable of domains 237 | """ 238 | domains = set() 239 | 240 | for domain, cooccurence in six.iteritems(cooccurrence_info): 241 | for occur_domain in cooccurence.get('pfs2', []): 242 | for elem in expand_domain(occur_domain[0]): 243 | domains.add(elem) 244 | 245 | return domains 246 | 247 | def _rr_history_to_domains(self, rr_history_info): 248 | """Parse the results of a call to the OpenDNS rr_history endpoint. 249 | 250 | Args: 251 | rr_history_info: Result of a call to rr_history 252 | Returns: 253 | An enumerable of domains 254 | """ 255 | domains = set() 256 | 257 | for ip, rr_history in six.iteritems(rr_history_info): 258 | for rr_domain in rr_history.get('rrs', []): 259 | for elem in expand_domain(rr_domain['rr']): 260 | domains.add(elem) 261 | 262 | return domains 263 | 264 | 265 | def main(): 266 | run_filter_main(RelatedDomainsFilter) 267 | 268 | 269 | if __name__ == '__main__': 270 | main() 271 | -------------------------------------------------------------------------------- /osxcollector/output_filters/analyze.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # The AnalyzeFilter is a handy little tool that ties together many filters to attempt to 5 | # enhance the output of OSXCollector with data from threat APIs, compare against blacklists, 6 | # search for lines related to suspicious domains, ips, or files, and generally figure shit out. 7 | # 8 | # The more detailed description of what goes on: 9 | # 1. Parse out browser extension information. 10 | # 2. Find all the domains in every line. Add them to the output lines. 11 | # 3. Find any file hashes or domains that are on blacklists. Mark those lines. 12 | # 4. Take any filepaths from the command line and mark all lines related to those. 13 | # 5. Take any domain or IP from the command line and use OpenDNS Investigate API to find all the domains 14 | # related to those domains and all the domains related to those related domains - basically the 1st and 2nd 15 | # generation related domains. Mark any lines where these domains appear. 16 | # 6. Lookup all sha1 hashes in ShadowServer's bin-test whitelist. 17 | # Files that match both hash and filename are ignored by further filters. 18 | # 7. Lookup file hashes in VirusTotal and mark any lines with suspicious files hashes. 19 | # 8. Lookup all the domains in the file with OpenDNS Investigate. Categorize and score the domains. 20 | # Mark all the lines that contain domains that were scored as "suspicious". 21 | # 9. Lookup suspicious domains, those domains on a blacklist, or those related to the initial input in VirusTotal. 22 | # 10. Cleanup the browser history and sort it in descending time order. 23 | # 11. Save all the enhanced output to a new file. 24 | # 12. Look at all the interesting lines in the file and try to summarize them in some very human readable output. 25 | # 13. Party! 26 | # 27 | from __future__ import absolute_import 28 | from __future__ import unicode_literals 29 | 30 | from argparse import ArgumentParser 31 | 32 | from osxcollector.output_filters.alexa.lookup_rankings import LookupRankingsFilter as ArLookupRankingsFilter 33 | from osxcollector.output_filters.base_filters.chain import ChainFilter 34 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 35 | from osxcollector.output_filters.chrome.find_extensions import FindExtensionsFilter as ChromeExtensionsFilter 36 | from osxcollector.output_filters.chrome.sort_history import SortHistoryFilter as ChromeHistoryFilter 37 | from osxcollector.output_filters.find_blacklisted import FindBlacklistedFilter 38 | from osxcollector.output_filters.find_domains import FindDomainsFilter 39 | from osxcollector.output_filters.firefox.find_extensions import FindExtensionsFilter as FirefoxExtensionsFilter 40 | from osxcollector.output_filters.firefox.sort_history import SortHistoryFilter as FirefoxHistoryFilter 41 | from osxcollector.output_filters.opendns.lookup_domains import LookupDomainsFilter as OpenDnsLookupDomainsFilter 42 | from osxcollector.output_filters.opendns.related_domains import RelatedDomainsFilter as OpenDnsRelatedDomainsFilter 43 | from osxcollector.output_filters.related_files import RelatedFilesFilter 44 | from osxcollector.output_filters.shadowserver.lookup_hashes import LookupHashesFilter as ShadowServerLookupHashesFilter 45 | from osxcollector.output_filters.summary_filters.html import HtmlSummaryFilter 46 | from osxcollector.output_filters.summary_filters.text import TextSummaryFilter 47 | from osxcollector.output_filters.virustotal.lookup_domains import LookupDomainsFilter as VtLookupDomainsFilter 48 | from osxcollector.output_filters.virustotal.lookup_hashes import LookupHashesFilter as VtLookupHashesFilter 49 | 50 | 51 | class AnalyzeFilter(ChainFilter): 52 | 53 | """AnalyzeFilter chains all the other filters to produce maximum effect. 54 | 55 | A lot of the smarts of AnalyzeFilter are around what filters to run in which order and how results of one filter should 56 | effect the operations of the next filter. 57 | """ 58 | 59 | def __init__( 60 | self, no_opendns=False, no_virustotal=False, no_shadowserver=False, 61 | no_alexa=False, readout=False, **kwargs 62 | ): 63 | 64 | filter_chain = [] 65 | 66 | if not readout: 67 | filter_chain.append(ChromeExtensionsFilter(**kwargs)) 68 | filter_chain.append(FirefoxExtensionsFilter(**kwargs)) 69 | 70 | filter_chain.append(FindDomainsFilter(**kwargs)) 71 | 72 | # Do Alexa ranking lookups first since they are dependent only on FindDomainsFilter 73 | if not no_alexa: 74 | filter_chain.append(ArLookupRankingsFilter(**kwargs)) 75 | 76 | # Do hash related lookups first. This is done first since hash lookup is not influenced 77 | # by anything but other hash lookups. 78 | if not no_shadowserver: 79 | filter_chain.append(ShadowServerLookupHashesFilter(**kwargs)) 80 | if not no_virustotal: 81 | filter_chain.append( 82 | VtLookupHashesFilter(lookup_when=AnalyzeFilter.lookup_when_not_in_shadowserver, **kwargs), 83 | ) 84 | 85 | # Find blacklisted stuff next. Finding blacklisted domains requires running FindDomainsFilter first. 86 | filter_chain.append(FindBlacklistedFilter(**kwargs)) 87 | 88 | # RelatedFilesFilter and OpenDnsRelatedDomainsFilter use command line args in addition to previous filter 89 | # results to find lines of interest. 90 | filter_chain.append(RelatedFilesFilter(when=AnalyzeFilter.find_related_when, **kwargs)) 91 | if not no_opendns: 92 | filter_chain.append( 93 | OpenDnsRelatedDomainsFilter(related_when=AnalyzeFilter.find_related_when, **kwargs), 94 | ) 95 | 96 | # Lookup threat info on suspicious and related stuff 97 | if not no_opendns: 98 | filter_chain.append( 99 | OpenDnsLookupDomainsFilter(lookup_when=AnalyzeFilter.lookup_when_not_in_shadowserver, **kwargs), 100 | ) 101 | if not no_virustotal: 102 | filter_chain.append( 103 | VtLookupDomainsFilter(lookup_when=AnalyzeFilter.lookup_domains_in_vt_when, **kwargs), 104 | ) 105 | 106 | # Sort browser history for maximum pretty 107 | filter_chain.append(FirefoxHistoryFilter(**kwargs)) 108 | filter_chain.append(ChromeHistoryFilter(**kwargs)) 109 | 110 | filter_chain.append(TextSummaryFilter(**kwargs)) 111 | filter_chain.append(HtmlSummaryFilter(**kwargs)) 112 | 113 | super(AnalyzeFilter, self).__init__(filter_chain, **kwargs) 114 | 115 | def _on_get_argument_parser(self): 116 | """Returns an ArgumentParser with arguments for just this OutputFilter (not the contained chained OutputFilters). 117 | 118 | Returns: 119 | An `argparse.ArgumentParser` 120 | """ 121 | parser = ArgumentParser() 122 | group = parser.add_argument_group('AnalyzeFilter') 123 | group.add_argument( 124 | '--readout', dest='readout', action='store_true', default=False, 125 | help='[OPTIONAL] Skip the analysis and just output really readable analysis', 126 | ) 127 | group.add_argument( 128 | '--no-opendns', dest='no_opendns', action='store_true', default=False, 129 | help='[OPTIONAL] Don\'t run OpenDNS filters', 130 | ) 131 | group.add_argument( 132 | '--no-virustotal', dest='no_virustotal', action='store_true', default=False, 133 | help='[OPTIONAL] Don\'t run VirusTotal filters', 134 | ) 135 | group.add_argument( 136 | '--no-shadowserver', dest='no_shadowserver', action='store_true', default=False, 137 | help='[OPTIONAL] Don\'t run ShadowServer filters', 138 | ) 139 | group.add_argument( 140 | '--no-alexa', dest='no_alexa', action='store_true', default=False, 141 | help='[OPTIONAL] Don\'t run AlexaRanking filters', 142 | ) 143 | group.add_argument( 144 | '-M', '--monochrome', dest='monochrome', action='store_true', default=False, 145 | help='[OPTIONAL] Output monochrome analysis', 146 | ) 147 | group.add_argument( 148 | '--show-signature-chain', dest='show_signature_chain', action='store_true', default=False, 149 | help='[OPTIONAL] Output unsigned startup items and kexts.', 150 | ) 151 | group.add_argument( 152 | '--show-browser-ext', dest='show_browser_ext', action='store_true', default=False, 153 | help='[OPTIONAL] Output the list of installed browser extensions.', 154 | ) 155 | group.add_argument( 156 | '-t', '--text', dest='text_output_file', default=None, 157 | help='[OPTIONAL] Path to the output file where summary in plain text format will be written to.', 158 | ) 159 | group.add_argument( 160 | '-w', '--html', dest='html_output_file', default=None, 161 | help='[OPTIONAL] Path to the output file where summary in HTML format will be written to.', 162 | ) 163 | group.add_argument( 164 | '-c', '--group-by-iocs', dest='group_by_iocs', action='store_true', default=False, 165 | help='[OPTIONAL] Summarize the output grouped by IOCs instead of by threat indicators.', 166 | ) 167 | group.add_argument( 168 | '-k', '--group-key', dest='group_key', default=None, 169 | help='[OPTIONAL] If sorting by IOCs, select which key to group by (sha1/sha2/domain)', 170 | ) 171 | return parser 172 | 173 | @staticmethod 174 | def include_in_summary(blob): 175 | _KEYS_FOR_SUMMARY = [ 176 | 'osxcollector_vthash', 177 | 'osxcollector_vtdomain', 178 | 'osxcollector_opendns', 179 | 'osxcollector_blacklist', 180 | 'osxcollector_related', 181 | ] 182 | 183 | return any([key in blob for key in _KEYS_FOR_SUMMARY]) 184 | 185 | @staticmethod 186 | def lookup_when_not_in_shadowserver(blob): 187 | """ShadowServer whitelists blobs that can be ignored.""" 188 | return 'osxcollector_shadowserver' not in blob 189 | 190 | @staticmethod 191 | def lookup_domains_in_vt_when(blob): 192 | """VT domain lookup is a final step and what to lookup is dependent upon what has been found so far.""" 193 | return AnalyzeFilter.lookup_when_not_in_shadowserver(blob) and AnalyzeFilter.include_in_summary(blob) 194 | 195 | @staticmethod 196 | def find_related_when(blob): 197 | """When to find related terms or domains. 198 | 199 | Stuff in ShadowServer is not interesting. 200 | Blacklisted file paths are worth investigating. 201 | Files where the md5 could not be calculated are also interesting. Root should be able to read files. 202 | Files with a bad hash in VT are obviously malware, go find related bad stuff. 203 | 204 | Args: 205 | blob - a line of output from OSXCollector 206 | Returns: 207 | boolean 208 | """ 209 | if 'osxcollector_shadowserver' in blob: 210 | return False 211 | if '' == blob.get('md5', None): 212 | return True 213 | return any([key in blob for key in ['osxcollector_vthash', 'osxcollector_related']]) 214 | 215 | 216 | def main(): 217 | run_filter_main(AnalyzeFilter) 218 | 219 | 220 | if __name__ == '__main__': 221 | main() 222 | -------------------------------------------------------------------------------- /tests/output_filters/data/cache.virustotal.LookupHashesFilter.json: -------------------------------------------------------------------------------- 1 | {"virustotal-file-reports": {"b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7": {"scan_id": "b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7-1273894724", "sha256": "b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": false, "result": null, "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": false, "result": null, "version": "10.00", "update": "20100514"}, "nProtect": {"detected": false, "result": null, "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": false, "result": null, "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": false, "result": null, "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": false, "result": null, "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": false, "result": null, "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": false, "result": null, "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": false, "result": null, "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": false, "result": null, "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": false, "result": null, "version": "4842", "update": "20100515"}, "DrWeb": {"detected": false, "result": null, "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": false, "result": null, "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": false, "result": null, "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": false, "result": null, "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": false, "result": null, "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": false, "result": null, "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": false, "result": null, "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": false, "result": null, "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": false, "result": null, "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 0, "total": 40, "resource": "bd34339415ce6a7d692c90779993dd6f", "scan_date": "2015-01-23 16:23:00", "md5": "bd34339415ce6a7d692c90779993dd6f", "permalink": "https://www.virustotal.com/file/b8d99a20b148b6906977922ce2f964748c70cc36d5c5806a5c41ac9cb50f16d7/analysis/1273894724/", "sha1": "2a27c19560f7ad8017d79c1eb8eb2c91fffb9291", "positives": 0, "verbose_msg": "Scan finished, scan information embedded in this object"}, "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360": {"scan_id": "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c-1273894724", "sha256": "6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": true, "result": "Trojan.Generic.3611249", "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": true, "result": "Trojan.VB.acgy", "version": "10.00", "update": "20100514"}, "nProtect": {"detected": true, "result": "Trojan.Generic.3611249", "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": true, "result": "Trojan.VB.JFDE", "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": true, "result": "a variant of Win32/Qhost.NTY", "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": true, "result": "Win32/ASuspect.HDBBD", "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": true, "result": "Generic.dx!rkx", "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": true, "result": "TR/VB.acgy.1", "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": true, "result": "W32/Smalltroj.YFHZ", "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": true, "result": "Win32:Malware-gen", "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": true, "result": "Heur.Suspicious", "version": "4842", "update": "20100515"}, "DrWeb": {"detected": true, "result": "Trojan.Hosts.37", "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": true, "result": "Trojan/VB.gen", "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": true, "result": "Win32.TRVB.Acgy", "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": true, "result": "Troj/VBHost-A", "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": true, "result": "Trojan.Win32.VB.acgy", "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": true, "result": "Generic.dx!rkx", "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": true, "result": "Trojan/VB.yqh", "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": true, "result": "Trojan.Generic.3611249", "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": true, "result": "Trojan.KillAV", "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 1, "total": 40, "resource": "06506cc06cf0167ea583de62c98eae2c", "scan_date": "2010-05-15 03:38:44", "md5": "06506cc06cf0167ea583de62c98eae2c", "permalink": "https://www.virustotal.com/file/6e87855371171d912dd866e8d7747bf965c80053f83259827a55826ca38a9360/analysis/1273894724/", "sha1": "92e3750a9f0eef6290dd83867eff88064e9c01bb", "positives": 40, "verbose_msg": "Scan finished, scan information embedded in this object"}, "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c": {"scan_id": "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c-1273894724", "sha256": "52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": false, "result": null, "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": false, "result": null, "version": "10.00", "update": "20100514"}, "nProtect": {"detected": false, "result": null, "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": false, "result": null, "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": false, "result": "a variant of Win32/Qhost.NTY", "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": false, "result": null, "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": false, "result": null, "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": false, "result": null, "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": false, "result": null, "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": false, "result": null, "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": false, "result": null, "version": "4842", "update": "20100515"}, "DrWeb": {"detected": false, "result": null, "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": false, "result": null, "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": false, "result": null, "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": false, "result": null, "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": false, "result": null, "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": false, "result": null, "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": false, "result": null, "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": false, "result": null, "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": false, "result": null, "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": false, "result": null, "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 0, "total": 40, "resource": "99017f6eebbac24f351415dd410d522d", "scan_date": "2010-05-15 03:38:44", "md5": "99017f6eebbac24f351415dd410d522d", "permalink": "https://www.virustotal.com/file/52d3df0ed60c46f336c131bf2ca454f73bafdc4b04dfa2aea80746f5ba9e6d1c/analysis/1273894724/", "sha1": "4d1740485713a2ab3a4f5822a01f645fe8387f92", "positives": 0, "verbose_msg": "Scan finished, scan information embedded in this object"}, "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302": {"scan_id": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302-1273894724", "sha256": "b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302", "scans": {"ClamAV": {"detected": false, "result": null, "version": "0.96.0.3-git", "update": "20100514"}, "BitDefender": {"detected": true, "result": "Trojan.Generic.3611249", "version": "7.2", "update": "20100515"}, "Authentium": {"detected": false, "result": null, "version": "5.2.0.5", "update": "20100514"}, "CAT-QuickHeal": {"detected": true, "result": "Trojan.VB.acgy", "version": "10.00", "update": "20100514"}, "nProtect": {"detected": true, "result": "Trojan.Generic.3611249", "version": "2010-05-14.01", "update": "20100514"}, "VirusBuster": {"detected": true, "result": "Trojan.VB.JFDE", "version": "5.0.27.0", "update": "20100514"}, "NOD32": {"detected": true, "result": "a variant of Win32/Qhost.NTY", "version": "5115", "update": "20100514"}, "eTrust-Vet": {"detected": true, "result": "Win32/ASuspect.HDBBD", "version": "35.2.7490", "update": "20100515"}, "McAfee-GW-Edition": {"detected": true, "result": "Generic.dx!rkx", "version": "2010.1", "update": "20100515"}, "AntiVir": {"detected": true, "result": "TR/VB.acgy.1", "version": "8.2.1.242", "update": "20100514"}, "Norman": {"detected": true, "result": "W32/Smalltroj.YFHZ", "version": "6.04.12", "update": "20100514"}, "Avast": {"detected": true, "result": "Win32:Malware-gen", "version": "4.8.1351.0", "update": "20100514"}, "Comodo": {"detected": true, "result": "Heur.Suspicious", "version": "4842", "update": "20100515"}, "DrWeb": {"detected": true, "result": "Trojan.Hosts.37", "version": "5.0.2.03300", "update": "20100515"}, "TheHacker": {"detected": true, "result": "Trojan/VB.gen", "version": "6.5.2.0.280", "update": "20100514"}, "F-Prot": {"detected": false, "result": null, "version": "4.5.1.85", "update": "20100514"}, "TrendMicro": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100514"}, "eSafe": {"detected": true, "result": "Win32.TRVB.Acgy", "version": "7.0.17.0", "update": "20100513"}, "Sophos": {"detected": true, "result": "Troj/VBHost-A", "version": "4.53.0", "update": "20100515"}, "Kaspersky": {"detected": true, "result": "Trojan.Win32.VB.acgy", "version": "7.0.0.125", "update": "20100515"}, "McAfee": {"detected": true, "result": "Generic.dx!rkx", "version": "5.400.0.1158", "update": "20100515"}, "Jiangmin": {"detected": true, "result": "Trojan/VB.yqh", "version": "13.0.900", "update": "20100514"}, "TrendMicro-HouseCall": {"detected": true, "result": "TROJ_VB.JVJ", "version": "9.120.0.1004", "update": "20100515"}, "F-Secure": {"detected": true, "result": "Trojan.Generic.3611249", "version": "9.0.15370.0", "update": "20100514"}, "Symantec": {"detected": true, "result": "Trojan.KillAV", "version": "20101.1.0.89", "update": "20100515"}}, "response_code": 1, "total": 40, "resource": "0c71d8cedc8bbb2b619a76d1478c4348", "scan_date": "2015-01-15 16:42:01", "md5": "0c71d8cedc8bbb2b619a76d1478c4348", "permalink": "https://www.virustotal.com/file/b779bafdf61b74784f2d3601ed663d7476da9ad4182601b8ca54fd4fbe1aa302/analysis/1273894724/", "sha1": "da9b79f2fd33d002033b69a9a346af4671a9e16b", "positives": 40, "verbose_msg": "Scan finished, scan information embedded in this object"}}} -------------------------------------------------------------------------------- /osxcollector/output_filters/chrome/sort_history.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # SortHistoryFilter creates a clean sorted Chrome browser history and tags lines with {'osxcollector_browser_history': 'chrome'} 5 | # 6 | from __future__ import absolute_import 7 | from __future__ import unicode_literals 8 | 9 | import six 10 | 11 | from osxcollector.output_filters.base_filters.output_filter import OutputFilter 12 | from osxcollector.output_filters.base_filters.output_filter import run_filter_main 13 | 14 | 15 | class SortHistoryFilter(OutputFilter): 16 | 17 | """Joins Chrome browser history 'visits' and 'urls' tables, producing a time sorted browser history. 18 | 19 | In the output look for lines where: 20 | ('osxcollector_section' == 'chrome' and 'osxcollector_subsection' == 'history' and 'osxcollector_table_name' == 'visits') 21 | for some snazzy browser history stuff. 22 | """ 23 | 24 | def __init__(self, **kwargs): 25 | super(SortHistoryFilter, self).__init__(**kwargs) 26 | 27 | self._visits_table = dict() 28 | self._urls_table = dict() 29 | 30 | def filter_line(self, blob): 31 | """Cache the 'visits' and 'urls' tables.""" 32 | if 'chrome' == blob.get('osxcollector_section') and 'history' == blob.get('osxcollector_subsection'): 33 | table = blob.get('osxcollector_table_name') 34 | 35 | if 'visits' == table: 36 | if self._validate_visit(blob): 37 | self._visits_table[blob['id']] = blob 38 | blob = None # Consume the line 39 | elif 'urls' == table: 40 | if self._validate_urls(blob): 41 | self._urls_table[blob['id']] = blob 42 | blob = None # Consume the line 43 | 44 | return blob 45 | 46 | def end_of_lines(self): 47 | """Join the 'visits' and 'urls' tables into a single browser history and timeline.""" 48 | history = list() 49 | 50 | for visit in six.itervalues(self._visits_table): 51 | url = self._urls_table.get(visit.get('url')) 52 | if url: 53 | record = { 54 | 'url': url['url'].encode('utf-8') if six.PY2 else url['url'], 55 | 'title': url['title'].encode('utf-8') if six.PY2 else url['url'], 56 | 'last_visit_time': url['last_visit_time'], 57 | 'visit_time': visit['visit_time'], 58 | 'core_transition': self.PAGE_TRANSITION.get_core_transition(visit['transition']), 59 | 'page_transition': self.PAGE_TRANSITION.get_qualifier_transitions(visit['transition']), 60 | 'osxcollector_browser_history': 'chrome', 61 | } 62 | 63 | # Add all the OSXCollector specific keys to the record 64 | for key in visit: 65 | if key.startswith('osxcollector_'): 66 | record[key] = visit[key] 67 | for key in url: 68 | if key.startswith('osxcollector_') and key not in record: 69 | record[key] = url[key] 70 | 71 | history.append(record) 72 | 73 | return sorted(history, key=lambda x: x['last_visit_time'], reverse=True) 74 | 75 | @classmethod 76 | def _validate_visit(cls, blob): 77 | """Does the visit dict have the required fields? 78 | 79 | Args: 80 | blob: a visit dict 81 | Returns: 82 | boolean 83 | """ 84 | required_fields = ['id', 'url', 'visit_time', 'transition'] 85 | return all([field in blob for field in required_fields]) 86 | 87 | @classmethod 88 | def _validate_urls(cls, blob): 89 | """Does the url dict have the required fields? 90 | 91 | Args: 92 | blob: a url dict 93 | Returns: 94 | boolean 95 | """ 96 | required_fields = ['id', 'url', 'title', 'last_visit_time'] 97 | return all([field in blob for field in required_fields]) 98 | 99 | class PAGE_TRANSITION: 100 | 101 | """Constants that detail page transitions in the Chrome 'visits' table. 102 | 103 | These constants comes from: 104 | _ 105 | """ 106 | # User got to this page by clicking a link on another page. 107 | CORE_LINK = 0 108 | 109 | # User got this page by typing the URL in the URL bar. This should not be 110 | # used for cases where the user selected a choice that didn't look at all 111 | # like a URL; see GENERATED below. 112 | # 113 | # We also use this for other "explicit" navigation actions. 114 | CORE_TYPED = 1 115 | 116 | # User got to this page through a suggestion in the UI, for example, 117 | # through the destinations page. 118 | CORE_AUTO_BOOKMARK = 2 119 | 120 | # This is a subframe navigation. This is any content that is automatically 121 | # loaded in a non-toplevel frame. For example, if a page consists of 122 | # several frames containing ads, those ad URLs will have this transition 123 | # type. The user may not even realize the content in these pages is a 124 | # separate frame, so may not care about the URL (see MANUAL below). 125 | CORE_AUTO_SUBFRAME = 3 126 | 127 | # For subframe navigations that are explicitly requested by the user and 128 | # generate new navigation entries in the back/forward list. These are 129 | # probably more important than frames that were automatically loaded in 130 | # the background because the user probably cares about the fact that this 131 | # link was loaded. 132 | CORE_MANUAL_SUBFRAME = 4 133 | 134 | # User got to this page by typing in the URL bar and selecting an entry 135 | # that did not look like a URL. For example, a match might have the URL 136 | # of a Google search result page, but appear like "Search Google for ...". 137 | # These are not quite the same as TYPED navigations because the user 138 | # didn't type or see the destination URL. 139 | # See also KEYWORD. 140 | CORE_GENERATED = 5 141 | 142 | # The page was specified in the command line or is the start page. 143 | CORE_START_PAGE = 6 144 | 145 | # The user filled out values in a form and submitted it. NOTE that in 146 | # some situations submitting a form does not result in this transition 147 | # type. This can happen if the form uses script to submit the contents. 148 | CORE_FORM_SUBMIT = 7 149 | 150 | # The user "reloaded" the page, either by hitting the reload button or by 151 | # hitting enter in the address bar. NOTE: This is distinct from the 152 | # concept of whether a particular load uses "reload semantics" (i.e. 153 | # bypasses cached data). For this reason, lots of code needs to pass 154 | # around the concept of whether a load should be treated as a "reload" 155 | # separately from their tracking of this transition type, which is mainly 156 | # used for proper scoring for consumers who care about how frequently a 157 | # user typed/visited a particular URL. 158 | # 159 | # SessionRestore and undo tab close use this transition type too. 160 | CORE_RELOAD = 8 161 | 162 | # The url was generated from a replaceable keyword other than the default 163 | # search provider. If the user types a keyword (which also applies to 164 | # tab-to-search) in the omnibox this qualifier is applied to the transition 165 | # type of the generated url. TemplateURLModel then may generate an 166 | # additional visit with a transition type of KEYWORD_GENERATED against the 167 | # url 'http:#' + keyword. For example, if you do a tab-to-search against 168 | # wikipedia the generated url has a transition qualifer of KEYWORD, and 169 | # TemplateURLModel generates a visit for 'wikipedia.org' with a transition 170 | # type of KEYWORD_GENERATED. 171 | CORE_KEYWORD = 9 172 | 173 | # Corresponds to a visit generated for a keyword. See description of 174 | # KEYWORD for more details. 175 | CORE_KEYWORD_GENERATED = 10 176 | 177 | CORE_MASK = 0xFF 178 | 179 | @classmethod 180 | def get_core_transition(cls, value): 181 | """Translates a numeric page transition into a human readable description. 182 | 183 | Args: 184 | value: A numeric value represented as a Number or String 185 | 186 | Returns: 187 | A string 188 | """ 189 | try: 190 | value = int(value) & cls.CORE_MASK 191 | except ValueError: 192 | return 'ERROR' 193 | 194 | if cls.CORE_LINK == value: 195 | return 'link' 196 | elif cls.CORE_TYPED == value: 197 | return 'typed' 198 | elif cls.CORE_AUTO_BOOKMARK == value: 199 | return 'auto_bookmark' 200 | elif cls.CORE_AUTO_SUBFRAME == value: 201 | return 'auto_subframe' 202 | elif cls.CORE_MANUAL_SUBFRAME == value: 203 | return 'manual_subframe' 204 | elif cls.CORE_GENERATED == value: 205 | return 'generated' 206 | elif cls.CORE_START_PAGE == value: 207 | return 'start_page' 208 | elif cls.CORE_FORM_SUBMIT == value: 209 | return 'form_submit' 210 | elif cls.CORE_RELOAD == value: 211 | return 'reload' 212 | elif cls.CORE_KEYWORD == value: 213 | return 'keyword' 214 | elif cls.CORE_KEYWORD_GENERATED == value: 215 | return 'generated' 216 | return 'UNKNOWN' 217 | 218 | # A managed user attempted to visit a URL but was blocked. 219 | QUALIFIER_BLOCKED = 0x00800000 220 | 221 | # User used the Forward or Back button to navigate among browsing history. 222 | QUALIFIER_FORWARD_BACK = 0x01000000 223 | 224 | # User used the address bar to trigger this navigation. 225 | QUALIFIER_FROM_ADDRESS_BAR = 0x02000000 226 | 227 | # User is navigating to the home page. 228 | QUALIFIER_HOME_PAGE = 0x04000000 229 | 230 | # The beginning of a navigation chain. 231 | QUALIFIER_CHAIN_START = 0x10000000 232 | 233 | # The last transition in a redirect chain. 234 | QUALIFIER_CHAIN_END = 0x20000000 235 | 236 | # Redirects caused by JavaScript or a meta refresh tag on the page. 237 | QUALIFIER_CLIENT_REDIRECT = 0x40000000 238 | 239 | # Redirects sent from the server by HTTP headers. It might be nice to 240 | # break this out into 2 types in the future, permanent or temporary, if we 241 | # can get that information from WebKit. 242 | QUALIFIER_SERVER_REDIRECT = 0x80000000 243 | 244 | QUALIFIER_MASK = 0xFFFFFF00 245 | 246 | @classmethod 247 | def get_qualifier_transitions(cls, value): 248 | qualifiers = [] 249 | 250 | try: 251 | value = int(value) & cls.QUALIFIER_MASK 252 | except ValueError: 253 | return qualifiers 254 | 255 | if cls.QUALIFIER_BLOCKED & value: 256 | qualifiers.append('blocked') 257 | 258 | if cls.QUALIFIER_FORWARD_BACK & value: 259 | qualifiers.append('forward_back') 260 | 261 | if cls.QUALIFIER_FROM_ADDRESS_BAR & value: 262 | qualifiers.append('from_address_bar') 263 | 264 | if cls.QUALIFIER_HOME_PAGE & value: 265 | qualifiers.append('home_page') 266 | 267 | if cls.QUALIFIER_CHAIN_START & value: 268 | qualifiers.append('chain_start') 269 | 270 | if cls.QUALIFIER_CHAIN_END & value: 271 | qualifiers.append('chain_end') 272 | 273 | if cls.QUALIFIER_CLIENT_REDIRECT & value: 274 | qualifiers.append('client_redirect') 275 | 276 | if cls.QUALIFIER_SERVER_REDIRECT & value: 277 | qualifiers.append('server_redirect') 278 | 279 | return qualifiers 280 | 281 | 282 | def main(): 283 | run_filter_main(SortHistoryFilter) 284 | 285 | 286 | if __name__ == '__main__': 287 | main() 288 | --------------------------------------------------------------------------------