├── .activate.sh ├── .coveragerc ├── .deactivate.sh ├── .github └── workflows │ ├── build.yaml │ └── publish.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .secrets.baseline ├── LICENSE.md ├── MANIFEST.in ├── Makefile ├── README.md ├── amira ├── __init__.py ├── amira.py ├── data_processor.py ├── results_uploader.py ├── s3.py └── sqs.py ├── amira_github_banner.png ├── doc └── component_diagram.png ├── requirements-bootstrap.txt ├── requirements-dev.txt ├── requirements.txt ├── setup.py ├── tests ├── __init__.py ├── amira_test.py ├── data │ ├── mock_input.tar.gz │ ├── s3_event_notifications.json │ └── s3_test_event_notification.json ├── data_processor_test.py ├── s3_test.py └── sqs_test.py └── tox.ini /.activate.sh: -------------------------------------------------------------------------------- 1 | virtualenv_run/bin/activate -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = 4 | . 5 | omit = 6 | .tox/* 7 | virtualenv_run/* 8 | setup.py 9 | 10 | [report] 11 | show_missing = True 12 | skip_covered = False 13 | 14 | exclude_lines = 15 | # Have to re-enable the standard pragma 16 | \#\s*pragma: no cover 17 | 18 | # Don't complain if tests don't hit defensive assertion code: 19 | ^\s*raise AssertionError\b 20 | ^\s*raise NotImplementedError\b 21 | ^\s*return NotImplemented\b 22 | ^\s*raise$ 23 | 24 | # Don't complain if non-runnable code isn't run: 25 | ^if __name__ == ['"]__main__['"]:$ 26 | 27 | [html] 28 | directory = coverage-html 29 | 30 | # vim:ft=dosini 31 | -------------------------------------------------------------------------------- /.deactivate.sh: -------------------------------------------------------------------------------- 1 | deactivate 2 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | pre-commit: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | with: 11 | submodules: true 12 | - uses: actions/setup-python@v2 13 | with: 14 | python-version: '3.7' 15 | architecture: x64 16 | 17 | - name: Install dependencies 18 | run: python -m pip install tox 19 | 20 | - name: Run pre-commit tox job 21 | run: tox -e pre-commit 22 | 23 | test: 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | os: [ubuntu-latest] 28 | python-version: ['3.5', '3.6', '3.7', '3.8'] 29 | 30 | name: Python ${{ matrix.python-version }} on ${{ matrix.os }} 31 | runs-on: ${{ matrix.os }} 32 | steps: 33 | - uses: actions/checkout@v2 34 | with: 35 | submodules: true 36 | - uses: actions/setup-python@v2 37 | with: 38 | python-version: ${{ matrix.python-version }} 39 | architecture: x64 40 | 41 | - name: Install dependencies 42 | run: python -m pip install tox 43 | 44 | - name: Run ${{ matrix.python }} tox job 45 | run: tox -e py 46 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish on PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout Repo 14 | uses: actions/checkout@v2 15 | 16 | - name: Setup Python 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: 3.6 20 | 21 | - name: Install Python dependencies 22 | run: pip install wheel 23 | 24 | - name: Create a Wheel file and source distribution 25 | run: python setup.py sdist bdist_wheel 26 | 27 | - name: Publish distribution package to PyPI 28 | uses: pypa/gh-action-pypi-publish@v1.2.2 29 | with: 30 | user: __token__ 31 | password: ${{ secrets.PYPI_TOKEN }} 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | .idea 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # dotenv 81 | .env 82 | 83 | # virtualenv 84 | .venv/ 85 | venv/ 86 | ENV/ 87 | virtualenv_run/ 88 | 89 | # Spyder project settings 90 | .spyderproject 91 | 92 | # Rope project settings 93 | .ropeproject 94 | 95 | # macOS custom folder attributes 96 | .DS_Store 97 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: '^tests/output_filters/data/.*$' 2 | repos: 3 | - repo: git://github.com/pre-commit/pre-commit-hooks 4 | rev: v2.3.0 5 | hooks: 6 | - id: check-json 7 | - id: check-yaml 8 | - id: end-of-file-fixer 9 | - id: trailing-whitespace 10 | - id: name-tests-test 11 | - id: requirements-txt-fixer 12 | - id: double-quote-string-fixer 13 | - id: flake8 14 | - id: fix-encoding-pragma 15 | - repo: git://github.com/pre-commit/mirrors-autopep8 16 | rev: v1.4.4 17 | hooks: 18 | - id: autopep8 19 | - repo: git://github.com/asottile/reorder_python_imports 20 | rev: v1.7.0 21 | hooks: 22 | - id: reorder-python-imports 23 | args: [ 24 | '--add-import', 'from __future__ import absolute_import', 25 | '--add-import', 'from __future__ import unicode_literals', 26 | ] 27 | exclude: setup.py 28 | - repo: git://github.com/asottile/add-trailing-comma 29 | rev: v1.4.1 30 | hooks: 31 | - id: add-trailing-comma 32 | - repo: https://github.com/Yelp/detect-secrets 33 | rev: v0.12.7 34 | hooks: 35 | - id: detect-secrets 36 | args: ['--baseline', '.secrets.baseline'] 37 | exclude: .*tests/.*|\.pre-commit-config\.yaml 38 | language_version: python2.7 39 | -------------------------------------------------------------------------------- /.secrets.baseline: -------------------------------------------------------------------------------- 1 | { 2 | "exclude": { 3 | "files": ".*tests/.*|\\.pre-commit-config\\.yaml", 4 | "lines": null 5 | }, 6 | "generated_at": "2019-10-02T10:15:23Z", 7 | "plugins_used": [ 8 | { 9 | "base64_limit": 4.5, 10 | "name": "Base64HighEntropyString" 11 | }, 12 | { 13 | "hex_limit": 3, 14 | "name": "HexHighEntropyString" 15 | }, 16 | { 17 | "name": "PrivateKeyDetector" 18 | } 19 | ], 20 | "results": { 21 | ".travis.yml": [ 22 | { 23 | "hashed_secret": "20bccf6b10adb3faf8ef4552f5ec21b2767501ac", 24 | "is_secret": false, 25 | "is_verified": false, 26 | "line_number": 33, 27 | "type": "Base64 High Entropy String" 28 | } 29 | ] 30 | }, 31 | "version": "0.12.7", 32 | "word_list": { 33 | "file": null, 34 | "hash": null 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 Yelp Inc. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE.md 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DELETE_ON_ERROR: 2 | 3 | all: install-hooks test 4 | 5 | test: 6 | tox 7 | 8 | venv: 9 | tox -evenv 10 | 11 | install-hooks: venv 12 | pre-commit install -f --install-hooks 13 | 14 | clean: 15 | rm -rf build/ dist/ .tox/ virtualenv_run/ *.egg-info/ 16 | rm -f .coverage 17 | find . -name '*.pyc' -delete 18 | find . -name '__pycache__' -delete 19 | 20 | .PHONY: all test venv install-hooks clean 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![amira](https://raw.githubusercontent.com/Yelp/amira/master/amira_github_banner.png) 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/amira.svg)](https://pypi.python.org/pypi/amira) 4 | [![Build Status](https://github.com/Yelp/amira/actions/workflows/build.yaml/badge.svg)](https://github.com/Yelp/amira/actions/workflows/build.yaml) 5 | 6 | # AMIRA: Automated Malware Incident Response & Analysis 7 | 8 | AMIRA is a service for automatically running the analysis on the 9 | [OSXCollector](https://github.com/Yelp/osxcollector) output files. 10 | The automated analysis is performed via 11 | [OSXCollector Output Filters](https://github.com/Yelp/osxcollector_output_filters), 12 | in particular *The One Filter to Rule Them All*: the 13 | [Analyze Filter](https://github.com/Yelp/osxcollector_output_filters#analyzefilter---the-one-filter-to-rule-them-all). 14 | AMIRA takes care of retrieving the output files from an S3 bucket, 15 | running the Analyze Filter and then uploading the results 16 | of the analysis back to S3 (although one could envision as well 17 | attaching them to the related JIRA ticket). 18 | 19 | ## Prerequisites 20 | 21 | ### tox 22 | 23 | The following steps assume you have [tox](http://tox.readthedocs.org/) 24 | installed on your machine. 25 | 26 | If this is not the case, please run: 27 | ```bash 28 | $ sudo pip install tox 29 | ``` 30 | 31 | ### OSXCollector Output Filters configuration file 32 | 33 | AMIRA uses OSXCollector Output Filters to do the actual analysis, 34 | so you will need to have a valid `osxcollector.yaml` 35 | configuration file in the working directory. 36 | The example configuration file can be found in the 37 | [OSXCollector Output Filters](https://github.com/Yelp/osxcollector_output_filters/blob/master/osxcollector.yaml.example). 38 | 39 | The configuration file mentions the location of the file hash and the domain 40 | blacklists. 41 | Make sure that the blacklist locations mentioned in the configuration file are 42 | also available when running AMIRA. 43 | 44 | ### AWS credentials 45 | 46 | AMIRA uses boto3 to interface with AWS. 47 | You can supply credentials using either of the possible 48 | [configuration options](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). 49 | 50 | The credentials should allow reading and deleting SQS messages 51 | from the SQS queue specified in the AMIRA config as well as 52 | the read access to the objects in the S3 bucket where the OSXCollector 53 | output files are stored. 54 | To be able to upload the analysis results back to the S3 bucket 55 | specified in the AMIRA configuration file, the credentials should 56 | also allow write access to this bucket. 57 | 58 | ## AMIRA Architecture 59 | 60 | The service uses the 61 | [S3 bucket event notifications](http://docs.aws.amazon.com/AmazonS3/latest/dev/NotificationHowTo.html) 62 | to trigger the analysis. 63 | You will need to configure an S3 bucket for the OSXCollector output files, 64 | so that when a file is added there the notification will be sent to an SQS queue 65 | (`AmiraS3EventNotifications` in the picture below). 66 | AMIRA periodically checks the queue for any new messages 67 | and upon receiving one it will fetch the OSXCollector output file from the S3 68 | bucket. 69 | It will then run the Analyze Filter on the retrieved file. 70 | 71 | The Analyze Filter runs all the filters contained in the OSXCollector Output 72 | Filters package sequentially. Some of them communicate with the external 73 | resources, like domain and hashes blacklists (or whitelists) and threat intel 74 | APIs, e.g. [VirusTotal](https://github.com/Yelp/threat_intel#virustotal-api), 75 | [OpenDNS Investigate](https://github.com/Yelp/threat_intel#opendns-investigate-api) 76 | or [ShadowServer](https://github.com/Yelp/threat_intel#shadowserver-api). 77 | The original OSXCollector output is extended with all of this information and 78 | the very last filter run by the Analyze Filter summarizes all of the findings 79 | into a human-readable form. After the filter finishes running, the results of 80 | the analysis will be uploaded to the Analysis Results S3 bucket. 81 | 82 | The overview of the whole process and the system components involved in it are 83 | depicted below: 84 | 85 | ![component diagram](https://github.com/Yelp/amira/raw/master/doc/component_diagram.png "Component Diagram") 86 | 87 | ## Using AMIRA 88 | 89 | The main entry point to AMIRA is in the `amira/amira.py` module. 90 | You will first need to create an instance of AMIRA class by providing the AWS 91 | region name, where the SQS queue with the event notifications for the 92 | OSXCollector output bucket is, and the SQS queue name: 93 | 94 | ```python 95 | from amira.amira import AMIRA 96 | 97 | amira = AMIRA('us-west-1', 'AmiraS3EventNotifications') 98 | ``` 99 | 100 | Then you can register the analysis results uploader, e.g. the S3 results 101 | uploader: 102 | 103 | ```python 104 | from amira.s3 import S3ResultsUploader 105 | 106 | s3_results_uploader = S3ResultsUploader('amira-results-bucket') 107 | amira.register_results_uploader(s3_results_uploader) 108 | ``` 109 | 110 | Finally, run AMIRA: 111 | ```python 112 | amira.run() 113 | ``` 114 | 115 | Go get some coffee, sit back, relax and wait till the analysis results pop up 116 | in the S3 bucket! 117 | -------------------------------------------------------------------------------- /amira/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | __version__ = '2.0.3' 6 | -------------------------------------------------------------------------------- /amira/amira.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import logging 6 | 7 | from amira.data_processor import OSXCollectorDataProcessor 8 | from amira.s3 import S3Handler 9 | from amira.sqs import SqsHandler 10 | 11 | 12 | class AMIRA(object): 13 | """Runs the automated analysis based on the new elements in an S3 14 | bucket: 15 | 1. Receives the messages from the SQS queue about the new 16 | objects in the S3 bucket. 17 | 2. Retrieves the objects (OSXCollector output files) from the 18 | bucket. 19 | 3. Runs the Analayze Filter on the retrieved OSXCollector 20 | output. 21 | 4. Uploads the analysis results. 22 | 23 | JIRA integration is optional. If any of the JIRA parameters 24 | (`jira_server`, `jira_user`, `jira_password` or `jira_project`) 25 | is not supplied or `None`, attaching the analysis results to a JIRA 26 | issue will be skipped. 27 | 28 | :param region_name: The AWS region name where the SQS queue 29 | containing the S3 event notifications is 30 | configured. 31 | :type region_name: string 32 | :param queue_name: The name of the SQS queue containing the S3 33 | event notifications. 34 | :type queue_name: string 35 | """ 36 | 37 | def __init__(self, region_name, queue_name): 38 | self._sqs_handler = SqsHandler(region_name, queue_name) 39 | self._s3_handler = S3Handler() 40 | self._results_uploader = [] 41 | self._data_feeds = {} 42 | self._data_processor = OSXCollectorDataProcessor() 43 | 44 | def register_results_uploader(self, results_uploader): 45 | """Registers results uploader. 46 | 47 | Results uploader will upload the analysis results and the 48 | summary to a specific destination after the analysis is 49 | finished. 50 | """ 51 | self._results_uploader.append(results_uploader) 52 | 53 | def register_data_feed(self, feed_name, generator): 54 | """Register data input which to be used by the OsXCollector filters 55 | 56 | :param feed_name: Name of the data feed 57 | :param generator: Generator function providing the data 58 | """ 59 | self._data_feeds[feed_name] = generator 60 | 61 | def register_data_processor(self, processor): 62 | """Registers DataProcessor object to process and analyze input data from S3. 63 | If no processor is registered Amira will fall back using the default 64 | OSXCollector result processor. 65 | 66 | :param processor: DataProcessor object instance 67 | """ 68 | self._data_processor = processor 69 | 70 | def run(self): 71 | """Fetches the OSXCollector output from an S3 bucket based on 72 | the S3 ObjectCreated event notifications and runs the Analyze 73 | Filter on the output file. 74 | Once the analysis is finished the output and the "very readable 75 | output" files are uploaded to the target S3 bucket. 76 | """ 77 | created_objects = self._sqs_handler.get_created_objects() 78 | 79 | for created_object in created_objects: 80 | if created_object.key_name.endswith('.tar.gz'): 81 | self._process_created_object(created_object) 82 | else: 83 | logging.warning( 84 | 'S3 object {0} name should end with ".tar.gz"' 85 | .format(created_object.key_name), 86 | ) 87 | 88 | def _process_created_object(self, created_object): 89 | """Fetches the object from an S3 bucket and runs the analysis. 90 | Then it sends the results to the target S3 bucket and attaches 91 | them to the JIRA ticket. 92 | """ 93 | # fetch forensic data from the S3 bucket 94 | forensic_output = self._s3_handler.get_contents_as_string( 95 | created_object.bucket_name, created_object.key_name, 96 | ) 97 | 98 | try: 99 | processed_input = self._data_processor.process_input(forensic_output) 100 | if processed_input: 101 | self._data_processor.perform_analysis(processed_input, self._data_feeds) 102 | except Exception as exc: 103 | # Log the exception and do not try any recovery. 104 | # The message that caused the exception will be deleted from the 105 | # SQS queue to prevent the same exception from happening in the 106 | # future. 107 | logging.warning( 108 | 'Unexpected error while running the Analyze Filter for the ' 109 | 'object {}: {}'.format(created_object.key_name, exc), 110 | ) 111 | try: 112 | self._data_processor.upload_results( 113 | created_object.key_name[:-7], self._results_uploader, 114 | ) 115 | except Exception: 116 | logging.exception( 117 | 'Unexpected error while uploading results for the ' 118 | 'object: {0}'.format(created_object.key_name), 119 | ) 120 | -------------------------------------------------------------------------------- /amira/data_processor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import logging 6 | import os 7 | import tarfile 8 | 9 | try: 10 | from cStringIO import StringIO as ByteBuffer 11 | from cStringIO import StringIO as StringBuffer 12 | except ImportError: 13 | from io import BytesIO as ByteBuffer 14 | from io import StringIO as StringBuffer 15 | 16 | from osxcollector.output_filters.analyze import AnalyzeFilter 17 | from osxcollector.output_filters.base_filters import output_filter 18 | 19 | from amira.results_uploader import FileMetaInfo 20 | 21 | 22 | class DataProcessor(object): 23 | 24 | def __init__(self): 25 | # List to store processing outputs 26 | self._results = [] 27 | 28 | def process_input(self, tardata): 29 | """Process input TAR file 30 | 31 | :param tardata: TAR byte stream 32 | :return: processed data file stream 33 | """ 34 | raise NotImplementedError() 35 | 36 | def perform_analysis(self, input_stream, data_feeds=None): 37 | """Perform analysis of forensic input. 38 | Analysis results should be handled as internal object state 39 | 40 | :param input_stream: forensic data 41 | :param data_feeds: additional data feeds which may be required in the analysis 42 | """ 43 | raise NotImplementedError() 44 | 45 | def upload_results(self, file_basename, result_uploaders): 46 | """Upload forensic results. 47 | These must be stored as FileMetaInfo objects in the `_results` list attribute 48 | 49 | :param file_basename: Basename used to generate output filenames (prepended) 50 | :param result_uploaders: List of Uploader objects to invoke 51 | """ 52 | results = [ 53 | FileMetaInfo(file_basename + res.name, res.content, res.content_type) for res in self._results 54 | if isinstance(res, FileMetaInfo) and DataProcessor.get_buffer_size(res.content) > 0 55 | ] 56 | if results: 57 | for res_uploader in result_uploaders: 58 | for res in results: 59 | res.content.seek(0) 60 | res_uploader.upload_results(results) 61 | else: 62 | logging.warning('No results to upload for {}'.format(file_basename)) 63 | 64 | @staticmethod 65 | def get_buffer_size(data_buffer): 66 | """Get byte size of file-like object 67 | 68 | :param data_buffer: file-like object 69 | :return: total size in bytes 70 | """ 71 | data_buffer.seek(0, os.SEEK_END) 72 | size = data_buffer.tell() 73 | data_buffer.seek(0) 74 | return size 75 | 76 | 77 | class OSXCollectorDataProcessor(DataProcessor): 78 | 79 | def process_input(self, tardata): 80 | """Extracts JSON file containing the OSXCollector output from 81 | tar.gz archive. It will look in the archive contents for the 82 | file with the extension ".json". If no file with this extension 83 | is found in the archive or more than one JSON file is found, it 84 | will raise `OSXCollectorOutputExtractionError`. 85 | 86 | :param tardata: Input TAR archive data 87 | """ 88 | self._results = [FileMetaInfo('.tar.gz', ByteBuffer(tardata), 'application/gzip')] 89 | # create a file-like object based on the S3 object contents as string 90 | fileobj = ByteBuffer(tardata) 91 | tar = None 92 | try: 93 | tar = tarfile.open(mode='r:gz', fileobj=fileobj) 94 | except tarfile.ReadError as ter: 95 | logging.error('Failed to read the archive: {}'.format(ter)) 96 | return 97 | 98 | json_tarinfo = [t for t in tar if t.name.endswith('.json')] 99 | 100 | if len(json_tarinfo) != 1: 101 | raise OSXCollectorOutputExtractionError( 102 | 'Expected 1 JSON file inside the OSXCollector output archive, ' 103 | 'but found {0} instead.'.format(len(json_tarinfo)), 104 | ) 105 | 106 | tarinfo = json_tarinfo[0] 107 | logging.info('Extracted OSXCollector output JSON file {0}'.format(tarinfo.name)) 108 | return tar.extractfile(tarinfo) 109 | 110 | def perform_analysis(self, input_stream, data_feeds=None): 111 | """Runs Analyze Filter on the OSXCollector output retrieved 112 | from an S3 bucket. 113 | 114 | :param input_stream: Input data stream on which filters should be ran 115 | :param data_feeds: black/whitelist data feeds 116 | """ 117 | analysis_output = StringBuffer() 118 | text_analysis_summary = ByteBuffer() 119 | html_analysis_summary = ByteBuffer() 120 | 121 | analyze_filter = AnalyzeFilter( 122 | monochrome=True, 123 | text_output_file=text_analysis_summary, 124 | html_output_file=html_analysis_summary, 125 | data_feeds=data_feeds or {}, 126 | ) 127 | 128 | output_filter._run_filter( 129 | analyze_filter, 130 | input_stream=input_stream, 131 | output_stream=analysis_output, 132 | ) 133 | 134 | # rewind the output files 135 | analysis_output.seek(0) 136 | text_analysis_summary.seek(0) 137 | html_analysis_summary.seek(0) 138 | 139 | self._results += [ 140 | FileMetaInfo('_analysis.json', analysis_output, 'application/json'), 141 | FileMetaInfo('_summary.txt', text_analysis_summary, 'text/plain'), 142 | FileMetaInfo('_summary.html', html_analysis_summary, 'text/html; charset=UTF-8'), 143 | ] 144 | 145 | 146 | class OSXCollectorOutputExtractionError(Exception): 147 | """Raised when an unexpected number of JSON files is found in the 148 | OSXCollector output archive. 149 | """ 150 | pass 151 | -------------------------------------------------------------------------------- /amira/results_uploader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | from collections import namedtuple 6 | 7 | 8 | FileMetaInfo = namedtuple('FileMetaInfo', ['name', 'content', 'content_type']) 9 | 10 | 11 | class ResultsUploader(object): 12 | 13 | """Parent class for the AMIRA results uploaders. Results uploaders 14 | should expose a single method, ``upload_results()``, that should 15 | take a list of ``FileMetaInfo`` tuples. 16 | """ 17 | 18 | def upload_results(self, results): 19 | """Uploads the analysis results to a desired destination. 20 | 21 | :param results: The list containing the meta info (name, 22 | content and content-type) of the files which 23 | needs to be uploaded. 24 | :type results: list of ``FileMetaInfo`` tuples 25 | """ 26 | raise NotImplementedError( 27 | 'Derived classes must implement "upload_results()".', 28 | ) 29 | -------------------------------------------------------------------------------- /amira/s3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import logging 6 | 7 | try: 8 | from cStringIO import StringIO as ByteBuffer 9 | from cStringIO import StringIO as StringBuffer 10 | IN_PY3 = False 11 | except ImportError: 12 | from io import BytesIO as ByteBuffer 13 | from io import StringIO as StringBuffer 14 | IN_PY3 = True 15 | 16 | import boto3 17 | 18 | from amira.results_uploader import ResultsUploader 19 | 20 | 21 | class S3Handler(object): 22 | """Handles the operations with S3, like retrieving the key 23 | (object) contents from a bucket and creating a new key 24 | (object) with the contents of a given file. 25 | AWS uses the ambiguous term "key" to describe the objects 26 | inside the S3 bucket. They are unrelated to AWS keys used to access 27 | the resources. 28 | """ 29 | 30 | def __init__(self): 31 | self._s3_connection = boto3.client('s3') 32 | 33 | def get_contents_as_string(self, bucket_name, key_name): 34 | """Retrieves the S3 key (object) contents. 35 | 36 | :param bucket_name: The S3 bucket name. 37 | :type bucket_name: string 38 | :param key_name: The S3 key (object) name. 39 | :type key_name: string 40 | :returns: The key (object) contents as a bytes (str in py2). 41 | :rtype: bytes 42 | """ 43 | response = self._s3_connection.get_object(Bucket=bucket_name, Key=key_name) 44 | return response['Body'].read() 45 | 46 | 47 | class S3ResultsUploader(ResultsUploader): 48 | """Uploads the analysis results to an S3 bucket. 49 | 50 | :param bucket_name: The name of the S3 bucket where the analysis 51 | results will be uploaded. 52 | :type bucket_name: string 53 | """ 54 | 55 | def __init__(self, bucket_name): 56 | self._bucket_name = bucket_name 57 | self._s3_connection = boto3.client('s3') 58 | 59 | def upload_results(self, results): 60 | """Uploads the analysis results to an S3 bucket. 61 | 62 | :param results: The list containing the meta info (name, 63 | content and content-type) of the files which 64 | needs to be uploaded. 65 | :type results: list of ``FileMetaInfo`` tuples 66 | """ 67 | for file_meta_info in results: 68 | logging.info( 69 | 'Uploading the analysis results in the file "{0}" to the S3 ' 70 | 'bucket "{1}"'.format(file_meta_info.name, self._bucket_name), 71 | ) 72 | body = ( 73 | ByteBuffer(file_meta_info.content.getvalue().encode()) 74 | if IN_PY3 and isinstance(file_meta_info.content, StringBuffer) 75 | else file_meta_info.content 76 | ) 77 | self._s3_connection.put_object( 78 | Bucket=self._bucket_name, 79 | Key=file_meta_info.name, 80 | ContentType=file_meta_info.content_type, 81 | Body=body, 82 | ) 83 | -------------------------------------------------------------------------------- /amira/sqs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import logging 6 | from collections import namedtuple 7 | 8 | import boto3 9 | import simplejson 10 | 11 | 12 | # 10 is the maximum number of messages to read at once: 13 | # http://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_ReceiveMessage.html 14 | MAX_NUMBER_MESSAGES = 10 15 | 16 | 17 | CreatedObject = namedtuple('ObjectCreated', ['bucket_name', 'key_name']) 18 | 19 | 20 | class SqsHandler(object): 21 | """Retrieves the S3 event notifications about the objects created 22 | in the bucket for which the notifications were configured. 23 | 24 | :param region_name: The AWS region name where the SQS queue 25 | containing the S3 event notifications is 26 | configured. 27 | :type region_name: string 28 | :param queue_name: The name of the SQS queue containing the S3 29 | event notifications. 30 | :type queue_name: string 31 | """ 32 | 33 | def __init__(self, region_name, queue_name): 34 | """ Connects to the SQS queue in a given AWS region. 35 | 36 | :param region_name: The AWS region name. 37 | :type region_name: string 38 | :param queue_name: The SQS queue name. 39 | :type queue_name: string 40 | """ 41 | sqs_connection = boto3.resource('sqs', region_name=region_name) 42 | self.sqs_queue = sqs_connection.get_queue_by_name(QueueName=queue_name) 43 | logging.info( 44 | 'Successfully connected to {} SQS queue'.format(queue_name), 45 | ) 46 | 47 | def get_created_objects(self): 48 | """Retrieves the S3 event notifications about the objects 49 | created in the OSXCollector output bucket yields the (bucket 50 | name, key name) pairs describing these objects. 51 | """ 52 | messages = self.sqs_queue.receive_messages(MaxNumberOfMessages=MAX_NUMBER_MESSAGES) 53 | logging.info( 54 | 'Received {0} message(s) from the SQS queue'.format(len(messages)), 55 | ) 56 | if messages: 57 | for message in messages: 58 | objects_created = self._retrieve_created_objects_from_message(message) 59 | for object_created in objects_created: 60 | yield object_created 61 | message.delete() 62 | 63 | def _retrieve_created_objects_from_message(self, message): 64 | """Retrieves the bucket name and the key name, describing the 65 | created object, from the `Records` array in the SQS message. 66 | 67 | Yields each (bucket name, key name) pair as an `CreatedObject` 68 | named tuple. 69 | 70 | :param message: The SQS message. It should be in the JSON 71 | format. 72 | :type message: string 73 | """ 74 | body = simplejson.loads(message.body) 75 | if 'Records' not in body: 76 | logging.warning( 77 | '"Records" field not found in the SQS message. ' 78 | 'Message body: {0}'.format(body), 79 | ) 80 | return [] 81 | return self._extract_created_objects_from_records(body['Records']) 82 | 83 | def _extract_created_objects_from_records(self, records): 84 | logging.info( 85 | 'Found {0} record(s) in the SQS message'.format(len(records)), 86 | ) 87 | for record in records: 88 | bucket_name = record['s3']['bucket']['name'] 89 | key_name = record['s3']['object']['key'] 90 | yield CreatedObject(bucket_name=bucket_name, key_name=key_name) 91 | -------------------------------------------------------------------------------- /amira_github_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/amira/0390d7969fdd64cda36996baf896d50d47648c80/amira_github_banner.png -------------------------------------------------------------------------------- /doc/component_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/amira/0390d7969fdd64cda36996baf896d50d47648c80/doc/component_diagram.png -------------------------------------------------------------------------------- /requirements-bootstrap.txt: -------------------------------------------------------------------------------- 1 | pip==20.3 2 | venv-update==3.2.4 3 | virtualenv==16.7.7 4 | wheel==0.32.3 5 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | coverage==4.5.4 3 | flake8==3.7.8 4 | mock==3.0.5 5 | pre-commit>=1.0.0 6 | pytest==4.6.5 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.14.62 2 | osxcollector_output_filters==1.1.1 3 | simplejson==3.16.0 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Even for a larger incident response team handling all of the repetitive tasks 5 | related to malware infections is a tedious task. Our malware analysts have 6 | spent a lot of time chasing digital forensics from potentially infected macOS 7 | systems, leveraging open source tools, like OSXCollector. Early on, we have 8 | automated some part of the analysis process, augmenting the initial set of 9 | digital forensics collected from the machines with the information gathered 10 | from the threat intelligence APIs. They helped us with additional information 11 | on potentially suspicious domains, URLs and file hashes. But our approach to 12 | the analysis still required a certain degree of configuration and manual 13 | maintenance that was consuming lots of attention from malware responders. 14 | 15 | Enter automation: turning all of your repetitive tasks in a scripted way that 16 | will help you deal faster with the incident discovery, forensic collection and 17 | analysis, with fewer possibilities to make a mistake. We went ahead and turned 18 | OSXCollector toolkit into AMIRA: Automated Malware Incident Response and 19 | Analysis service. AMIRA turns the forensic information gathered by OSXCollector 20 | into actionable response plan, suggesting the infection source as well as 21 | suspicious files and domains requiring a closer look. Furthermore, we 22 | integrated AMIRA with our incident response platform, making sure that as 23 | little interaction as necessary is required from the analyst to follow the 24 | investigation. Thanks to that, the incident response team members can focus on 25 | what they excel at: finding unusual patterns and the novel ways that malware 26 | was trying to sneak into the corporate infrastructure. 27 | """ 28 | from __future__ import absolute_import 29 | 30 | from setuptools import find_packages 31 | from setuptools import setup 32 | 33 | from amira import __version__ 34 | 35 | 36 | with open('README.md', 'r') as fh: 37 | long_description = fh.read() 38 | 39 | setup( 40 | name='amira', 41 | version=__version__, 42 | description='Automated Malware Incident Response and Analysis', 43 | long_description=long_description, 44 | long_description_content_type='text/markdown', 45 | author='Yelp Security', 46 | author_email='opensource@yelp.com', 47 | license='The MIT License (MIT)', 48 | url='https://github.com/Yelp/amira', 49 | setup_requires='setuptools', 50 | packages=find_packages(exclude=['tests']), 51 | provides=['amira'], 52 | install_requires=[ 53 | 'boto3', 54 | 'osxcollector_output_filters>=1.1.1', 55 | 'simplejson', 56 | ], 57 | ) 58 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | -------------------------------------------------------------------------------- /tests/amira_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import types 6 | 7 | from mock import ANY 8 | from mock import call 9 | from mock import MagicMock 10 | from mock import patch 11 | try: 12 | from cStringIO import StringIO as ByteBuffer 13 | except ImportError: 14 | from io import BytesIO as ByteBuffer 15 | 16 | from amira.amira import AMIRA 17 | from amira.data_processor import DataProcessor 18 | from amira.results_uploader import FileMetaInfo 19 | from amira.s3 import S3Handler 20 | from amira.sqs import CreatedObject 21 | from amira.sqs import SqsHandler 22 | 23 | 24 | class TestAmira(object): 25 | 26 | """Tests ``amira.amira.AMIRA`` class.""" 27 | 28 | def _patch_and_run_amira( 29 | self, region_name, queue_name, contents, created_objects, data_processor, 30 | ): 31 | """Patches all the external dependencies and runs AMIRA.""" 32 | self._results_uploader_mock = MagicMock() 33 | 34 | with patch.object( 35 | S3Handler, '__init__', autospec=True, return_value=None, 36 | ), patch.object( 37 | S3Handler, 'get_contents_as_string', autospec=True, side_effect=contents, 38 | ) as self._patched_get_contents_as_string, patch.object( 39 | SqsHandler, '__init__', autospec=True, return_value=None, 40 | ), patch.object( 41 | DataProcessor, 'get_buffer_size', return_value=1, 42 | ), patch.object( 43 | SqsHandler, 'get_created_objects', autospec=True, side_effect=created_objects, 44 | ) as self._patched_get_created_objects: 45 | amira_instance = AMIRA(region_name, queue_name) 46 | amira_instance.register_results_uploader(self._results_uploader_mock) 47 | amira_instance.register_data_processor(data_processor) 48 | amira_instance.run() 49 | 50 | def test_run(self): 51 | contents = [ 52 | b'New Petitions Against Tax', 53 | b'Building Code Under Fire', 54 | ] 55 | created_objects = [[ 56 | CreatedObject( 57 | bucket_name='amira-test', key_name='AMIRA-301.tar.gz', 58 | ), 59 | CreatedObject( 60 | bucket_name='amira-test', key_name='AMIRA-302.tar.gz', 61 | ), 62 | ]] 63 | 64 | mock_processor = DataProcessor() 65 | 66 | def mock_process_input(o, _): 67 | o._results = [FileMetaInfo('.tar.gz', ByteBuffer(b'1'), 'application/gzip')] 68 | return MagicMock() 69 | mock_processor.process_input = types.MethodType(mock_process_input, mock_processor) 70 | mock_processor.perform_analysis = MagicMock() 71 | region_name, queue_name = 'us-west-2', 'etaoin-shrdlu' 72 | self._patch_and_run_amira( 73 | region_name, queue_name, contents, created_objects, mock_processor, 74 | ) 75 | 76 | assert self._patched_get_created_objects.call_count == 1 77 | assert self._patched_get_contents_as_string.call_args_list == [ 78 | call(ANY, 'amira-test', 'AMIRA-301.tar.gz'), 79 | call(ANY, 'amira-test', 'AMIRA-302.tar.gz'), 80 | ] 81 | assert mock_processor.perform_analysis.call_count == 2 82 | 83 | # assert that the results uploader was called 84 | # with the expected arguments 85 | assert self._results_uploader_mock.upload_results.call_args_list == [ 86 | call([FileMetaInfo('AMIRA-301.tar.gz', ANY, 'application/gzip')]), 87 | call([FileMetaInfo('AMIRA-302.tar.gz', ANY, 'application/gzip')]), 88 | ] 89 | 90 | def test_run_wrong_key_name_suffix(self): 91 | created_objects = [[ 92 | CreatedObject(bucket_name='amira-test', key_name='MALWARE-301.txt'), 93 | ]] 94 | 95 | mock_processor = MagicMock() 96 | region_name, queue_name = 'us-west-2', 'cmfwyp-vbgkqj' 97 | self._patch_and_run_amira( 98 | region_name, queue_name, None, created_objects, mock_processor, 99 | ) 100 | 101 | assert 1 == self._patched_get_created_objects.call_count 102 | assert not self._patched_get_contents_as_string.called 103 | assert not self._results_uploader_mock.upload_results.called 104 | assert not mock_processor.perform_analysis.called 105 | assert not mock_processor.process_input.called 106 | 107 | def test_run_analyze_filter_exception(self): 108 | """Tests the exception handling while running the Analyze Filter.""" 109 | contents = [b'The European languages are members of the same family.'] 110 | created_objects = [[ 111 | CreatedObject( 112 | bucket_name='amira-test', key_name='MALWARE-303.tar.gz', 113 | ), 114 | ]] 115 | data_processor_mock = MagicMock() 116 | data_processor_mock.perform_analysis.side_effect = Exception 117 | region_name, queue_name = 'us-west-2', 'li-europan-lingues' 118 | self._patch_and_run_amira( 119 | region_name, queue_name, contents, created_objects, data_processor_mock, 120 | ) 121 | assert data_processor_mock.perform_analysis.called 122 | assert data_processor_mock.upload_results.called 123 | -------------------------------------------------------------------------------- /tests/data/mock_input.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/amira/0390d7969fdd64cda36996baf896d50d47648c80/tests/data/mock_input.tar.gz -------------------------------------------------------------------------------- /tests/data/s3_event_notifications.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1561-2016_01_11-10_54_07.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]}, 3 | {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1562-2016_01_11-10_54_47.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]}, 4 | {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1563-2016_01_11-10_54_58.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]}, 5 | {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1564-2016_01_11-10_55_12.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]}, 6 | {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1565-2016_01_11-10_55_32.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]}, 7 | {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1566-2016_01_11-10_55_49.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]}, 8 | {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1567-2016_01_11-10_56_09.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]} 9 | ] 10 | -------------------------------------------------------------------------------- /tests/data/s3_test_event_notification.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"Service":"Amazon S5","Event":"s3:TestEvent","Time":"2016-06-09T18:43:01.130Z","Bucket":"godzilla","RequestId":"A311687676D7EF82","HostId":"egIGJdAQHPZ/ZsSjJoq6Dj0HYouGsorKLMZF3luXAD71o24DuwHrgSebkF8GpPIQ"} 3 | ] 4 | -------------------------------------------------------------------------------- /tests/data_processor_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import logging 6 | import tarfile 7 | 8 | import pytest 9 | from mock import ANY 10 | from mock import call 11 | from mock import MagicMock 12 | from mock import patch 13 | 14 | try: 15 | from cStringIO import StringIO as ByteBuffer 16 | except ImportError: 17 | from io import BytesIO as ByteBuffer 18 | 19 | from amira.results_uploader import FileMetaInfo 20 | from amira.data_processor import DataProcessor 21 | from amira.data_processor import OSXCollectorDataProcessor 22 | from amira.data_processor import OSXCollectorOutputExtractionError 23 | 24 | 25 | class TestDataProcessor(object): 26 | 27 | def test_get_buffer_size(self): 28 | assert DataProcessor.get_buffer_size(ByteBuffer(b'123' * 111)) == 333 29 | 30 | def test_upload_results(self): 31 | data = ByteBuffer(b'123') 32 | processor = DataProcessor() 33 | processor._results = [FileMetaInfo('_suff.txt', data, 'text/plain')] 34 | uploaders = [MagicMock(), MagicMock()] 35 | processor.upload_results('filename', uploaders) 36 | for u in uploaders: 37 | u.upload_results.assert_called_once_with( 38 | [FileMetaInfo('filename_suff.txt', data, 'text/plain')], 39 | ) 40 | 41 | 42 | class TestOSXCollectorDataProcessor(object): 43 | 44 | @pytest.fixture 45 | def tar_gz_mock(self): 46 | """Mocks tar.gz file content.""" 47 | tarfile.open = MagicMock() 48 | tarinfo_mock = MagicMock() 49 | tarinfo_mock.name = 'lorem_ipsum.json' 50 | tar_mock = tarfile.open.return_value 51 | tar_mock.__iter__.return_value = [tarinfo_mock] 52 | return tarinfo_mock 53 | 54 | def test_process_input(self): 55 | processor = OSXCollectorDataProcessor() 56 | with open('tests/data/mock_input.tar.gz', 'rb') as f: 57 | input_data = f.read() 58 | output = processor.process_input(input_data) 59 | assert output.read() == b'{"a":2}\n' 60 | assert len(processor._results) == 1 61 | 62 | def test_process_input_malformed_gz(self, tar_gz_mock): 63 | logging.error = MagicMock() 64 | processor = OSXCollectorDataProcessor() 65 | tarfile.open.side_effect = tarfile.ReadError('mock.tar.gz is not a gz file') 66 | processor.process_input(b'things') 67 | logging.error.assert_has_calls([call(u'Failed to read the archive: mock.tar.gz is not a gz file')]) 68 | 69 | def test_process_input_no_json(self, tar_gz_mock): 70 | processor = OSXCollectorDataProcessor() 71 | tar_gz_mock.name = 'lorem_ipsum.txt' 72 | 73 | with pytest.raises(OSXCollectorOutputExtractionError) as exc_info: 74 | processor.process_input(b'things') 75 | 76 | assert 'Expected 1 JSON file inside the OSXCollector output archive, ' \ 77 | 'but found 0 instead.' in str(exc_info.value) 78 | 79 | def test_perform_analysis(self): 80 | with patch('amira.data_processor.AnalyzeFilter') as mock_filter, \ 81 | patch('amira.data_processor.output_filter') as mock_run_filter: 82 | processor = OSXCollectorDataProcessor() 83 | processor.perform_analysis(b'123', {'a': 'b'}) 84 | mock_filter.assert_called_once_with( 85 | monochrome=True, 86 | html_output_file=ANY, 87 | text_output_file=ANY, 88 | data_feeds={'a': 'b'}, 89 | ) 90 | mock_run_filter._run_filter.assert_called_once_with( 91 | mock_filter.return_value, 92 | input_stream=b'123', 93 | output_stream=ANY, 94 | ) 95 | assert len(processor._results) == 3 96 | -------------------------------------------------------------------------------- /tests/s3_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | try: 6 | from cStringIO import StringIO as ByteBuffer 7 | except ImportError: 8 | from io import BytesIO as ByteBuffer 9 | 10 | import pytest 11 | from mock import MagicMock 12 | from mock import patch 13 | from mock import call 14 | 15 | from amira.results_uploader import FileMetaInfo 16 | from amira.s3 import S3Handler 17 | from amira.s3 import S3ResultsUploader 18 | 19 | 20 | class TestS3Handler(object): 21 | 22 | """Tests ``amira.s3.S3Handler`` class.""" 23 | 24 | @pytest.fixture 25 | def s3_handler(self): 26 | with patch('amira.s3.boto3') as mock_boto3: 27 | handler = S3Handler() 28 | mock_boto3.client.assert_called_once_with('s3') 29 | yield handler 30 | 31 | def test_get_contents_as_string(self, s3_handler): 32 | mock_contents = 'test key contents' 33 | s3_connection_mock = s3_handler._s3_connection 34 | s3_connection_mock.get_object.return_value = { 35 | 'Body': ByteBuffer(mock_contents.encode()), 36 | } 37 | contents = s3_handler.get_contents_as_string( 38 | 'amira-test', 'MALWARE-1564-2016_01_11-10_55_12.tar.gz', 39 | ) 40 | assert mock_contents == contents.decode() 41 | s3_connection_mock.get_object.assert_called_once_with( 42 | Bucket='amira-test', Key='MALWARE-1564-2016_01_11-10_55_12.tar.gz', 43 | ) 44 | 45 | 46 | class TestS3ResultsUploader(): 47 | 48 | """Tests ``amira.s3.S3ResultsUploader`` class.""" 49 | 50 | @pytest.fixture 51 | def s3_results_uploader(self): 52 | with patch('amira.s3.boto3') as mock_boto3: 53 | uploader = S3ResultsUploader('lorem-ipsum') 54 | mock_boto3.client.assert_called_once_with('s3') 55 | yield uploader 56 | 57 | def test_upload_results(self, s3_results_uploader): 58 | s3_connection_mock = s3_results_uploader._s3_connection 59 | fileobj_mock1 = MagicMock() 60 | fileobj_mock2 = MagicMock() 61 | results = [ 62 | FileMetaInfo('etaoin', fileobj_mock1, 'text/html; charset=UTF-8'), 63 | FileMetaInfo('shrdlu', fileobj_mock2, 'application/json'), 64 | ] 65 | s3_results_uploader.upload_results(results) 66 | s3_connection_mock.put_object.assert_has_calls([ 67 | call( 68 | Bucket='lorem-ipsum', 69 | Key='etaoin', 70 | ContentType='text/html; charset=UTF-8', 71 | Body=fileobj_mock1, 72 | ), 73 | call( 74 | Bucket='lorem-ipsum', 75 | Key='shrdlu', 76 | ContentType='application/json', 77 | Body=fileobj_mock2, 78 | ), 79 | ]) 80 | -------------------------------------------------------------------------------- /tests/sqs_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import pytest 6 | import simplejson 7 | from mock import MagicMock 8 | from mock import patch 9 | 10 | from amira.sqs import SqsHandler 11 | 12 | 13 | TEST_DATA_DIR_PATH = 'tests/data' 14 | 15 | 16 | @pytest.fixture 17 | def sqs_handler(): 18 | with patch('amira.sqs.boto3') as mock_boto3: 19 | handler = SqsHandler('us-west-1', 'godzilla') 20 | mock_boto3.resource.assert_called_once_with('sqs', region_name='us-west-1') 21 | mock_boto3.resource.return_value.get_queue_by_name.assert_called_once_with( 22 | QueueName='godzilla', 23 | ) 24 | yield handler 25 | 26 | 27 | def read_s3_event_notifications_file(s3_event_notifications_file_path): 28 | with open(s3_event_notifications_file_path) as fp: 29 | s3_event_notifications = simplejson.load(fp) 30 | json_s3_event_notifications = [ 31 | simplejson.dumps(s3_event_notification) 32 | for s3_event_notification in s3_event_notifications 33 | ] 34 | return json_s3_event_notifications 35 | 36 | 37 | def create_s3_event_notification_message_mocks(s3_event_notifications_file_name): 38 | """Creates SQS queue message mocks that will return the JSON content of 39 | `s3_event_notifications_file_path` JSON file as the body of the message. 40 | """ 41 | s3_event_notifications_file_path = '{0}/{1}'.format( 42 | TEST_DATA_DIR_PATH, s3_event_notifications_file_name, 43 | ) 44 | json_s3_event_notifications = read_s3_event_notifications_file( 45 | s3_event_notifications_file_path, 46 | ) 47 | return [ 48 | MagicMock(body=json_s3_event_notification) 49 | for json_s3_event_notification in json_s3_event_notifications 50 | ] 51 | 52 | 53 | def mock_s3_event_notifications( 54 | mock_sqs_queue, s3_event_notifications_file_name, 55 | ): 56 | """`SqsHandler.get_created_objects()` is a generator, so we need to 57 | mock multiple values returned by `get_messages()` method. 58 | In this case only one as the test cases do not operate on more than 59 | one message. 60 | """ 61 | s3_event_notification_message_mocks = create_s3_event_notification_message_mocks( 62 | s3_event_notifications_file_name, 63 | ) 64 | mock_sqs_queue.receive_messages.side_effect = [s3_event_notification_message_mocks] 65 | return s3_event_notification_message_mocks 66 | 67 | 68 | class TestSqsHandler(object): 69 | 70 | def test_get_created_objects(self, sqs_handler): 71 | s3_event_notification_message_mocks = mock_s3_event_notifications( 72 | sqs_handler.sqs_queue, 's3_event_notifications.json', 73 | ) 74 | created_objects = sqs_handler.get_created_objects() 75 | actual_key_names = [ 76 | created_object.key_name 77 | for created_object in created_objects 78 | ] 79 | assert actual_key_names == [ 80 | 'AMIRA-1561-2016_01_11-10_54_07.tar.gz', 81 | 'AMIRA-1562-2016_01_11-10_54_47.tar.gz', 82 | 'AMIRA-1563-2016_01_11-10_54_58.tar.gz', 83 | 'AMIRA-1564-2016_01_11-10_55_12.tar.gz', 84 | 'AMIRA-1565-2016_01_11-10_55_32.tar.gz', 85 | 'AMIRA-1566-2016_01_11-10_55_49.tar.gz', 86 | 'AMIRA-1567-2016_01_11-10_56_09.tar.gz', 87 | ] 88 | for message_mock in s3_event_notification_message_mocks: 89 | message_mock.delete.assert_called_once_with() 90 | 91 | def test_get_created_objects_no_created_objects(self, sqs_handler): 92 | sqs_handler.sqs_queue.receive_messages.side_effect = [[]] 93 | created_objects = sqs_handler.get_created_objects() 94 | assert not list(created_objects) 95 | 96 | def test_get_created_objects_no_records(self, sqs_handler): 97 | """Tests the behavior of `get_created_objects()` method in case 98 | the message received from SQS does not contain the "Records" 99 | field in the message body. 100 | """ 101 | mock_s3_event_notifications( 102 | sqs_handler.sqs_queue, 's3_test_event_notification.json', 103 | ) 104 | assert not list(sqs_handler.get_created_objects()) 105 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | project = amira 3 | envlist = py27,py36 4 | 5 | [testenv] 6 | deps = 7 | --only-binary=cryptography 8 | -rrequirements-dev.txt 9 | commands = 10 | flake8 . 11 | {envpython} --version 12 | coverage --version 13 | coverage run -m pytest --strict -rxs {posargs:tests} 14 | coverage report -m 15 | 16 | [testenv:pre-commit] 17 | deps = pre-commit>=1.0.0 18 | commands = pre-commit run --all-files 19 | 20 | [testenv:venv] 21 | envdir = virtualenv_run 22 | basepython = python3.6 23 | commands = 24 | 25 | [pytest] 26 | norecursedirs = .* virtualenv_run build 27 | 28 | [flake8] 29 | exclude = .git,__pycache__,.tox,virtualenv_run 30 | max_line_length = 140 31 | --------------------------------------------------------------------------------