├── .activate.sh
├── .coveragerc
├── .deactivate.sh
├── .github
    └── workflows
    │   ├── build.yaml
    │   └── publish.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .secrets.baseline
├── LICENSE.md
├── MANIFEST.in
├── Makefile
├── README.md
├── amira
    ├── __init__.py
    ├── amira.py
    ├── data_processor.py
    ├── results_uploader.py
    ├── s3.py
    └── sqs.py
├── amira_github_banner.png
├── doc
    └── component_diagram.png
├── requirements-bootstrap.txt
├── requirements-dev.txt
├── requirements.txt
├── setup.py
├── tests
    ├── __init__.py
    ├── amira_test.py
    ├── data
    │   ├── mock_input.tar.gz
    │   ├── s3_event_notifications.json
    │   └── s3_test_event_notification.json
    ├── data_processor_test.py
    ├── s3_test.py
    └── sqs_test.py
└── tox.ini


/.activate.sh:
--------------------------------------------------------------------------------
1 | virtualenv_run/bin/activate


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | source =
 4 |     .
 5 | omit =
 6 |     .tox/*
 7 |     virtualenv_run/*
 8 |     setup.py
 9 | 
10 | [report]
11 | show_missing = True
12 | skip_covered = False
13 | 
14 | exclude_lines =
15 |     # Have to re-enable the standard pragma
16 |     \#\s*pragma: no cover
17 | 
18 |     # Don't complain if tests don't hit defensive assertion code:
19 |     ^\s*raise AssertionError\b
20 |     ^\s*raise NotImplementedError\b
21 |     ^\s*return NotImplemented\b
22 |     ^\s*raise$
23 | 
24 |     # Don't complain if non-runnable code isn't run:
25 |     ^if __name__ == ['"]__main__['"]:$
26 | 
27 | [html]
28 | directory = coverage-html
29 | 
30 | # vim:ft=dosini
31 | 


--------------------------------------------------------------------------------
/.deactivate.sh:
--------------------------------------------------------------------------------
1 | deactivate
2 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   pre-commit:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |     - uses: actions/checkout@v2
10 |       with:
11 |         submodules: true
12 |     - uses: actions/setup-python@v2
13 |       with:
14 |         python-version: '3.7'
15 |         architecture: x64
16 | 
17 |     - name: Install dependencies
18 |       run: python -m pip install tox
19 | 
20 |     - name: Run pre-commit tox job
21 |       run: tox -e pre-commit
22 | 
23 |   test:
24 |     strategy:
25 |       fail-fast: false
26 |       matrix:
27 |         os: [ubuntu-latest]
28 |         python-version: ['3.5', '3.6', '3.7', '3.8']
29 | 
30 |     name: Python ${{ matrix.python-version }} on ${{ matrix.os }}
31 |     runs-on: ${{ matrix.os }}
32 |     steps:
33 |     - uses: actions/checkout@v2
34 |       with:
35 |         submodules: true
36 |     - uses: actions/setup-python@v2
37 |       with:
38 |         python-version: ${{ matrix.python-version }}
39 |         architecture: x64
40 | 
41 |     - name: Install dependencies
42 |       run: python -m pip install tox
43 | 
44 |     - name: Run ${{ matrix.python }} tox job
45 |       run: tox -e py
46 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish on PyPI
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |     - v*
 7 | 
 8 | jobs:
 9 |   publish:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |     - name: Checkout Repo
14 |       uses: actions/checkout@v2
15 | 
16 |     - name: Setup Python
17 |       uses: actions/setup-python@v2
18 |       with:
19 |         python-version: 3.6
20 | 
21 |     - name: Install Python dependencies
22 |       run: pip install wheel
23 | 
24 |     - name: Create a Wheel file and source distribution
25 |       run: python setup.py sdist bdist_wheel
26 | 
27 |     - name: Publish distribution package to PyPI
28 |       uses: pypa/gh-action-pypi-publish@v1.2.2
29 |       with:
30 |         user: __token__
31 |         password: ${{ secrets.PYPI_TOKEN }}
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | .idea
28 | 
29 | # PyInstaller
30 | #  Usually these files are written by a python script from a template
31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 | 
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 | 
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | .hypothesis/
49 | 
50 | # Translations
51 | *.mo
52 | *.pot
53 | 
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | 
58 | # Flask stuff:
59 | instance/
60 | .webassets-cache
61 | 
62 | # Scrapy stuff:
63 | .scrapy
64 | 
65 | # Sphinx documentation
66 | docs/_build/
67 | 
68 | # PyBuilder
69 | target/
70 | 
71 | # Jupyter Notebook
72 | .ipynb_checkpoints
73 | 
74 | # pyenv
75 | .python-version
76 | 
77 | # celery beat schedule file
78 | celerybeat-schedule
79 | 
80 | # dotenv
81 | .env
82 | 
83 | # virtualenv
84 | .venv/
85 | venv/
86 | ENV/
87 | virtualenv_run/
88 | 
89 | # Spyder project settings
90 | .spyderproject
91 | 
92 | # Rope project settings
93 | .ropeproject
94 | 
95 | # macOS custom folder attributes
96 | .DS_Store
97 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: '^tests/output_filters/data/.*$'
 2 | repos:
 3 | -   repo: git://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v2.3.0
 5 |     hooks:
 6 |     -   id: check-json
 7 |     -   id: check-yaml
 8 |     -   id: end-of-file-fixer
 9 |     -   id: trailing-whitespace
10 |     -   id: name-tests-test
11 |     -   id: requirements-txt-fixer
12 |     -   id: double-quote-string-fixer
13 |     -   id: flake8
14 |     -   id: fix-encoding-pragma
15 | -   repo: git://github.com/pre-commit/mirrors-autopep8
16 |     rev: v1.4.4
17 |     hooks:
18 |     -   id: autopep8
19 | -   repo: git://github.com/asottile/reorder_python_imports
20 |     rev: v1.7.0
21 |     hooks:
22 |     -   id: reorder-python-imports
23 |         args: [
24 |             '--add-import', 'from __future__ import absolute_import',
25 |             '--add-import', 'from __future__ import unicode_literals',
26 |         ]
27 |         exclude: setup.py
28 | -   repo: git://github.com/asottile/add-trailing-comma
29 |     rev: v1.4.1
30 |     hooks:
31 |     -   id: add-trailing-comma
32 | -   repo: https://github.com/Yelp/detect-secrets
33 |     rev: v0.12.7
34 |     hooks:
35 |     -   id: detect-secrets
36 |         args: ['--baseline', '.secrets.baseline']
37 |         exclude: .*tests/.*|\.pre-commit-config\.yaml
38 |         language_version: python2.7
39 | 


--------------------------------------------------------------------------------
/.secrets.baseline:
--------------------------------------------------------------------------------
 1 | {
 2 |   "exclude": {
 3 |     "files": ".*tests/.*|\\.pre-commit-config\\.yaml",
 4 |     "lines": null
 5 |   },
 6 |   "generated_at": "2019-10-02T10:15:23Z",
 7 |   "plugins_used": [
 8 |     {
 9 |       "base64_limit": 4.5,
10 |       "name": "Base64HighEntropyString"
11 |     },
12 |     {
13 |       "hex_limit": 3,
14 |       "name": "HexHighEntropyString"
15 |     },
16 |     {
17 |       "name": "PrivateKeyDetector"
18 |     }
19 |   ],
20 |   "results": {
21 |     ".travis.yml": [
22 |       {
23 |         "hashed_secret": "20bccf6b10adb3faf8ef4552f5ec21b2767501ac",
24 |         "is_secret": false,
25 |         "is_verified": false,
26 |         "line_number": 33,
27 |         "type": "Base64 High Entropy String"
28 |       }
29 |     ]
30 |   },
31 |   "version": "0.12.7",
32 |   "word_list": {
33 |     "file": null,
34 |     "hash": null
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | Copyright (c) 2016 Yelp Inc.
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 | 
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 | 
8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE.md
3 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .DELETE_ON_ERROR:
 2 | 
 3 | all: install-hooks test
 4 | 
 5 | test:
 6 | 	tox
 7 | 
 8 | venv:
 9 | 	tox -evenv
10 | 
11 | install-hooks: venv
12 | 	pre-commit install -f --install-hooks
13 | 
14 | clean:
15 | 	rm -rf build/ dist/ .tox/ virtualenv_run/ *.egg-info/
16 | 	rm -f .coverage
17 | 	find . -name '*.pyc' -delete
18 | 	find . -name '__pycache__' -delete
19 | 
20 | .PHONY: all test venv install-hooks clean
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![amira](https://raw.githubusercontent.com/Yelp/amira/master/amira_github_banner.png)
  2 | 
  3 | [![PyPI](https://img.shields.io/pypi/v/amira.svg)](https://pypi.python.org/pypi/amira)
  4 | [![Build Status](https://github.com/Yelp/amira/actions/workflows/build.yaml/badge.svg)](https://github.com/Yelp/amira/actions/workflows/build.yaml)
  5 | 
  6 | # AMIRA: Automated Malware Incident Response & Analysis
  7 | 
  8 | AMIRA is a service for automatically running the analysis on the
  9 | [OSXCollector](https://github.com/Yelp/osxcollector) output files.
 10 | The automated analysis is performed via
 11 | [OSXCollector Output Filters](https://github.com/Yelp/osxcollector_output_filters),
 12 | in particular *The One Filter to Rule Them All*: the
 13 | [Analyze Filter](https://github.com/Yelp/osxcollector_output_filters#analyzefilter---the-one-filter-to-rule-them-all).
 14 | AMIRA takes care of retrieving the output files from an S3 bucket,
 15 | running the Analyze Filter and then uploading the results
 16 | of the analysis back to S3 (although one could envision as well
 17 | attaching them to the related JIRA ticket).
 18 | 
 19 | ## Prerequisites
 20 | 
 21 | ### tox
 22 | 
 23 | The following steps assume you have [tox](http://tox.readthedocs.org/)
 24 | installed on your machine.
 25 | 
 26 | If this is not the case, please run:
 27 | ```bash
 28 | $ sudo pip install tox
 29 | ```
 30 | 
 31 | ### OSXCollector Output Filters configuration file
 32 | 
 33 | AMIRA uses OSXCollector Output Filters to do the actual analysis,
 34 | so you will need to have a valid `osxcollector.yaml`
 35 | configuration file in the working directory.
 36 | The example configuration file can be found in the
 37 | [OSXCollector Output Filters](https://github.com/Yelp/osxcollector_output_filters/blob/master/osxcollector.yaml.example).
 38 | 
 39 | The configuration file mentions the location of the file hash and the domain
 40 | blacklists.
 41 | Make sure that the blacklist locations mentioned in the configuration file are
 42 | also available when running AMIRA.
 43 | 
 44 | ### AWS credentials
 45 | 
 46 | AMIRA uses boto3 to interface with AWS.
 47 | You can supply credentials using either of the possible
 48 | [configuration options](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html).
 49 | 
 50 | The credentials should allow reading and deleting SQS messages
 51 | from the SQS queue specified in the AMIRA config as well as
 52 | the read access to the objects in the S3 bucket where the OSXCollector
 53 | output files are stored.
 54 | To be able to upload the analysis results back to the S3 bucket
 55 | specified in the AMIRA configuration file, the credentials should
 56 | also allow write access to this bucket.
 57 | 
 58 | ## AMIRA Architecture
 59 | 
 60 | The service uses the
 61 | [S3 bucket event notifications](http://docs.aws.amazon.com/AmazonS3/latest/dev/NotificationHowTo.html)
 62 | to trigger the analysis.
 63 | You will need to configure an S3 bucket for the OSXCollector output files,
 64 | so that when a file is added there the notification will be sent to an SQS queue
 65 | (`AmiraS3EventNotifications` in the picture below).
 66 | AMIRA periodically checks the queue for any new messages
 67 | and upon receiving one it will fetch the OSXCollector output file from the S3
 68 | bucket.
 69 | It will then run the Analyze Filter on the retrieved file.
 70 | 
 71 | The Analyze Filter runs all the filters contained in the OSXCollector Output
 72 | Filters package sequentially. Some of them communicate with the external
 73 | resources, like domain and hashes blacklists (or whitelists) and threat intel
 74 | APIs, e.g. [VirusTotal](https://github.com/Yelp/threat_intel#virustotal-api),
 75 | [OpenDNS Investigate](https://github.com/Yelp/threat_intel#opendns-investigate-api)
 76 | or [ShadowServer](https://github.com/Yelp/threat_intel#shadowserver-api).
 77 | The original OSXCollector output is extended with all of this information and
 78 | the very last filter run by the Analyze Filter summarizes all of the findings
 79 | into a human-readable form. After the filter finishes running, the results of
 80 | the analysis will be uploaded to the Analysis Results S3 bucket.
 81 | 
 82 | The overview of the whole process and the system components involved in it are
 83 | depicted below:
 84 | 
 85 | ![component diagram](https://github.com/Yelp/amira/raw/master/doc/component_diagram.png "Component Diagram")
 86 | 
 87 | ## Using AMIRA
 88 | 
 89 | The main entry point to AMIRA is in the `amira/amira.py` module.
 90 | You will first need to create an instance of AMIRA class by providing the AWS
 91 | region name, where the SQS queue with the event notifications for the
 92 | OSXCollector output bucket is, and the SQS queue name:
 93 | 
 94 | ```python
 95 | from amira.amira import AMIRA
 96 | 
 97 | amira = AMIRA('us-west-1', 'AmiraS3EventNotifications')
 98 | ```
 99 | 
100 | Then you can register the analysis results uploader, e.g. the S3 results
101 | uploader:
102 | 
103 | ```python
104 | from amira.s3 import S3ResultsUploader
105 | 
106 | s3_results_uploader = S3ResultsUploader('amira-results-bucket')
107 | amira.register_results_uploader(s3_results_uploader)
108 | ```
109 | 
110 | Finally, run AMIRA:
111 | ```python
112 | amira.run()
113 | ```
114 | 
115 | Go get some coffee, sit back, relax and wait till the analysis results pop up
116 | in the S3 bucket!
117 | 


--------------------------------------------------------------------------------
/amira/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 | 
5 | __version__ = '2.0.3'
6 | 


--------------------------------------------------------------------------------
/amira/amira.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import logging
  6 | 
  7 | from amira.data_processor import OSXCollectorDataProcessor
  8 | from amira.s3 import S3Handler
  9 | from amira.sqs import SqsHandler
 10 | 
 11 | 
 12 | class AMIRA(object):
 13 |     """Runs the automated analysis based on the new elements in an S3
 14 |     bucket:
 15 |         1. Receives the messages from the SQS queue about the new
 16 |            objects in the S3 bucket.
 17 |         2. Retrieves the objects (OSXCollector output files) from the
 18 |            bucket.
 19 |         3. Runs the Analayze Filter on the retrieved OSXCollector
 20 |            output.
 21 |         4. Uploads the analysis results.
 22 | 
 23 |     JIRA integration is optional. If any of the JIRA parameters
 24 |     (`jira_server`, `jira_user`, `jira_password` or `jira_project`)
 25 |     is not supplied or `None`, attaching the analysis results to a JIRA
 26 |     issue will be skipped.
 27 | 
 28 |     :param region_name: The AWS region name where the SQS queue
 29 |                         containing the S3 event notifications is
 30 |                         configured.
 31 |     :type region_name: string
 32 |     :param queue_name: The name of the SQS queue containing the S3
 33 |                        event notifications.
 34 |     :type queue_name: string
 35 |     """
 36 | 
 37 |     def __init__(self, region_name, queue_name):
 38 |         self._sqs_handler = SqsHandler(region_name, queue_name)
 39 |         self._s3_handler = S3Handler()
 40 |         self._results_uploader = []
 41 |         self._data_feeds = {}
 42 |         self._data_processor = OSXCollectorDataProcessor()
 43 | 
 44 |     def register_results_uploader(self, results_uploader):
 45 |         """Registers results uploader.
 46 | 
 47 |         Results uploader will upload the analysis results and the
 48 |         summary to a specific destination after the analysis is
 49 |         finished.
 50 |         """
 51 |         self._results_uploader.append(results_uploader)
 52 | 
 53 |     def register_data_feed(self, feed_name, generator):
 54 |         """Register data input which to be used by the OsXCollector filters
 55 | 
 56 |         :param feed_name: Name of the data feed
 57 |         :param generator: Generator function providing the data
 58 |         """
 59 |         self._data_feeds[feed_name] = generator
 60 | 
 61 |     def register_data_processor(self, processor):
 62 |         """Registers DataProcessor object to process and analyze input data from S3.
 63 |         If no processor is registered Amira will fall back using the default
 64 |         OSXCollector result processor.
 65 | 
 66 |         :param processor: DataProcessor object instance
 67 |         """
 68 |         self._data_processor = processor
 69 | 
 70 |     def run(self):
 71 |         """Fetches the OSXCollector output from an S3 bucket based on
 72 |         the S3 ObjectCreated event notifications and runs the Analyze
 73 |         Filter on the output file.
 74 |         Once the analysis is finished the output and the "very readable
 75 |         output" files are uploaded to the target S3 bucket.
 76 |         """
 77 |         created_objects = self._sqs_handler.get_created_objects()
 78 | 
 79 |         for created_object in created_objects:
 80 |             if created_object.key_name.endswith('.tar.gz'):
 81 |                 self._process_created_object(created_object)
 82 |             else:
 83 |                 logging.warning(
 84 |                     'S3 object {0} name should end with ".tar.gz"'
 85 |                     .format(created_object.key_name),
 86 |                 )
 87 | 
 88 |     def _process_created_object(self, created_object):
 89 |         """Fetches the object from an S3 bucket and runs the analysis.
 90 |         Then it sends the results to the target S3 bucket and attaches
 91 |         them to the JIRA ticket.
 92 |         """
 93 |         # fetch forensic data from the S3 bucket
 94 |         forensic_output = self._s3_handler.get_contents_as_string(
 95 |             created_object.bucket_name, created_object.key_name,
 96 |         )
 97 | 
 98 |         try:
 99 |             processed_input = self._data_processor.process_input(forensic_output)
100 |             if processed_input:
101 |                 self._data_processor.perform_analysis(processed_input, self._data_feeds)
102 |         except Exception as exc:
103 |             # Log the exception and do not try any recovery.
104 |             # The message that caused the exception will be deleted from the
105 |             # SQS queue to prevent the same exception from happening in the
106 |             # future.
107 |             logging.warning(
108 |                 'Unexpected error while running the Analyze Filter for the '
109 |                 'object {}: {}'.format(created_object.key_name, exc),
110 |             )
111 |         try:
112 |             self._data_processor.upload_results(
113 |                 created_object.key_name[:-7], self._results_uploader,
114 |             )
115 |         except Exception:
116 |             logging.exception(
117 |                 'Unexpected error while uploading results for the '
118 |                 'object: {0}'.format(created_object.key_name),
119 |             )
120 | 


--------------------------------------------------------------------------------
/amira/data_processor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import logging
  6 | import os
  7 | import tarfile
  8 | 
  9 | try:
 10 |     from cStringIO import StringIO as ByteBuffer
 11 |     from cStringIO import StringIO as StringBuffer
 12 | except ImportError:
 13 |     from io import BytesIO as ByteBuffer
 14 |     from io import StringIO as StringBuffer
 15 | 
 16 | from osxcollector.output_filters.analyze import AnalyzeFilter
 17 | from osxcollector.output_filters.base_filters import output_filter
 18 | 
 19 | from amira.results_uploader import FileMetaInfo
 20 | 
 21 | 
 22 | class DataProcessor(object):
 23 | 
 24 |     def __init__(self):
 25 |         # List to store processing outputs
 26 |         self._results = []
 27 | 
 28 |     def process_input(self, tardata):
 29 |         """Process input TAR file
 30 | 
 31 |         :param tardata: TAR byte stream
 32 |         :return: processed data file stream
 33 |         """
 34 |         raise NotImplementedError()
 35 | 
 36 |     def perform_analysis(self, input_stream, data_feeds=None):
 37 |         """Perform analysis of forensic input.
 38 |         Analysis results should be handled as internal object state
 39 | 
 40 |         :param input_stream: forensic data
 41 |         :param data_feeds: additional data feeds which may be required in the analysis
 42 |         """
 43 |         raise NotImplementedError()
 44 | 
 45 |     def upload_results(self, file_basename, result_uploaders):
 46 |         """Upload forensic results.
 47 |         These must be stored as FileMetaInfo objects in the `_results` list attribute
 48 | 
 49 |         :param file_basename: Basename used to generate output filenames (prepended)
 50 |         :param result_uploaders: List of Uploader objects to invoke
 51 |         """
 52 |         results = [
 53 |             FileMetaInfo(file_basename + res.name, res.content, res.content_type) for res in self._results
 54 |             if isinstance(res, FileMetaInfo) and DataProcessor.get_buffer_size(res.content) > 0
 55 |         ]
 56 |         if results:
 57 |             for res_uploader in result_uploaders:
 58 |                 for res in results:
 59 |                     res.content.seek(0)
 60 |                 res_uploader.upload_results(results)
 61 |         else:
 62 |             logging.warning('No results to upload for {}'.format(file_basename))
 63 | 
 64 |     @staticmethod
 65 |     def get_buffer_size(data_buffer):
 66 |         """Get byte size of file-like object
 67 | 
 68 |         :param data_buffer: file-like object
 69 |         :return: total size in bytes
 70 |         """
 71 |         data_buffer.seek(0, os.SEEK_END)
 72 |         size = data_buffer.tell()
 73 |         data_buffer.seek(0)
 74 |         return size
 75 | 
 76 | 
 77 | class OSXCollectorDataProcessor(DataProcessor):
 78 | 
 79 |     def process_input(self, tardata):
 80 |         """Extracts JSON file containing the OSXCollector output from
 81 |         tar.gz archive. It will look in the archive contents for the
 82 |         file with the extension ".json". If no file with this extension
 83 |         is found in the archive or more than one JSON file is found, it
 84 |         will raise `OSXCollectorOutputExtractionError`.
 85 | 
 86 |         :param tardata: Input TAR archive data
 87 |         """
 88 |         self._results = [FileMetaInfo('.tar.gz', ByteBuffer(tardata), 'application/gzip')]
 89 |         # create a file-like object based on the S3 object contents as string
 90 |         fileobj = ByteBuffer(tardata)
 91 |         tar = None
 92 |         try:
 93 |             tar = tarfile.open(mode='r:gz', fileobj=fileobj)
 94 |         except tarfile.ReadError as ter:
 95 |             logging.error('Failed to read the archive: {}'.format(ter))
 96 |             return
 97 | 
 98 |         json_tarinfo = [t for t in tar if t.name.endswith('.json')]
 99 | 
100 |         if len(json_tarinfo) != 1:
101 |             raise OSXCollectorOutputExtractionError(
102 |                 'Expected 1 JSON file inside the OSXCollector output archive, '
103 |                 'but found {0} instead.'.format(len(json_tarinfo)),
104 |             )
105 | 
106 |         tarinfo = json_tarinfo[0]
107 |         logging.info('Extracted OSXCollector output JSON file {0}'.format(tarinfo.name))
108 |         return tar.extractfile(tarinfo)
109 | 
110 |     def perform_analysis(self, input_stream, data_feeds=None):
111 |         """Runs Analyze Filter on the OSXCollector output retrieved
112 |         from an S3 bucket.
113 | 
114 |         :param input_stream: Input data stream on which filters should be ran
115 |         :param data_feeds: black/whitelist data feeds
116 |         """
117 |         analysis_output = StringBuffer()
118 |         text_analysis_summary = ByteBuffer()
119 |         html_analysis_summary = ByteBuffer()
120 | 
121 |         analyze_filter = AnalyzeFilter(
122 |             monochrome=True,
123 |             text_output_file=text_analysis_summary,
124 |             html_output_file=html_analysis_summary,
125 |             data_feeds=data_feeds or {},
126 |         )
127 | 
128 |         output_filter._run_filter(
129 |             analyze_filter,
130 |             input_stream=input_stream,
131 |             output_stream=analysis_output,
132 |         )
133 | 
134 |         # rewind the output files
135 |         analysis_output.seek(0)
136 |         text_analysis_summary.seek(0)
137 |         html_analysis_summary.seek(0)
138 | 
139 |         self._results += [
140 |             FileMetaInfo('_analysis.json', analysis_output, 'application/json'),
141 |             FileMetaInfo('_summary.txt', text_analysis_summary, 'text/plain'),
142 |             FileMetaInfo('_summary.html', html_analysis_summary, 'text/html; charset=UTF-8'),
143 |         ]
144 | 
145 | 
146 | class OSXCollectorOutputExtractionError(Exception):
147 |     """Raised when an unexpected number of JSON files is found in the
148 |     OSXCollector output archive.
149 |     """
150 |     pass
151 | 


--------------------------------------------------------------------------------
/amira/results_uploader.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | from collections import namedtuple
 6 | 
 7 | 
 8 | FileMetaInfo = namedtuple('FileMetaInfo', ['name', 'content', 'content_type'])
 9 | 
10 | 
11 | class ResultsUploader(object):
12 | 
13 |     """Parent class for the AMIRA results uploaders. Results uploaders
14 |     should expose a single method, ``upload_results()``, that should
15 |     take a list of ``FileMetaInfo`` tuples.
16 |     """
17 | 
18 |     def upload_results(self, results):
19 |         """Uploads the analysis results to a desired destination.
20 | 
21 |         :param results: The list containing the meta info (name,
22 |                         content and content-type) of the files which
23 |                         needs to be uploaded.
24 |         :type results: list of ``FileMetaInfo`` tuples
25 |         """
26 |         raise NotImplementedError(
27 |             'Derived classes must implement "upload_results()".',
28 |         )
29 | 


--------------------------------------------------------------------------------
/amira/s3.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import logging
 6 | 
 7 | try:
 8 |     from cStringIO import StringIO as ByteBuffer
 9 |     from cStringIO import StringIO as StringBuffer
10 |     IN_PY3 = False
11 | except ImportError:
12 |     from io import BytesIO as ByteBuffer
13 |     from io import StringIO as StringBuffer
14 |     IN_PY3 = True
15 | 
16 | import boto3
17 | 
18 | from amira.results_uploader import ResultsUploader
19 | 
20 | 
21 | class S3Handler(object):
22 |     """Handles the operations with S3, like retrieving the key
23 |     (object) contents from a bucket and creating a new key
24 |     (object) with the contents of a given file.
25 |     AWS uses the ambiguous term "key" to describe the objects
26 |     inside the S3 bucket. They are unrelated to AWS keys used to access
27 |     the resources.
28 |     """
29 | 
30 |     def __init__(self):
31 |         self._s3_connection = boto3.client('s3')
32 | 
33 |     def get_contents_as_string(self, bucket_name, key_name):
34 |         """Retrieves the S3 key (object) contents.
35 | 
36 |         :param bucket_name: The S3 bucket name.
37 |         :type bucket_name: string
38 |         :param key_name: The S3 key (object) name.
39 |         :type key_name: string
40 |         :returns: The key (object) contents as a bytes (str in py2).
41 |         :rtype: bytes
42 |         """
43 |         response = self._s3_connection.get_object(Bucket=bucket_name, Key=key_name)
44 |         return response['Body'].read()
45 | 
46 | 
47 | class S3ResultsUploader(ResultsUploader):
48 |     """Uploads the analysis results to an S3 bucket.
49 | 
50 |     :param bucket_name: The name of the S3 bucket where the analysis
51 |                         results will be uploaded.
52 |     :type bucket_name: string
53 |     """
54 | 
55 |     def __init__(self, bucket_name):
56 |         self._bucket_name = bucket_name
57 |         self._s3_connection = boto3.client('s3')
58 | 
59 |     def upload_results(self, results):
60 |         """Uploads the analysis results to an S3 bucket.
61 | 
62 |         :param results: The list containing the meta info (name,
63 |                         content and content-type) of the files which
64 |                         needs to be uploaded.
65 |         :type results: list of ``FileMetaInfo`` tuples
66 |         """
67 |         for file_meta_info in results:
68 |             logging.info(
69 |                 'Uploading the analysis results in the file "{0}" to the S3 '
70 |                 'bucket "{1}"'.format(file_meta_info.name, self._bucket_name),
71 |             )
72 |             body = (
73 |                 ByteBuffer(file_meta_info.content.getvalue().encode())
74 |                 if IN_PY3 and isinstance(file_meta_info.content, StringBuffer)
75 |                 else file_meta_info.content
76 |             )
77 |             self._s3_connection.put_object(
78 |                 Bucket=self._bucket_name,
79 |                 Key=file_meta_info.name,
80 |                 ContentType=file_meta_info.content_type,
81 |                 Body=body,
82 |             )
83 | 


--------------------------------------------------------------------------------
/amira/sqs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import logging
 6 | from collections import namedtuple
 7 | 
 8 | import boto3
 9 | import simplejson
10 | 
11 | 
12 | # 10 is the maximum number of messages to read at once:
13 | # http://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_ReceiveMessage.html
14 | MAX_NUMBER_MESSAGES = 10
15 | 
16 | 
17 | CreatedObject = namedtuple('ObjectCreated', ['bucket_name', 'key_name'])
18 | 
19 | 
20 | class SqsHandler(object):
21 |     """Retrieves the S3 event notifications about the objects created
22 |     in the bucket for which the notifications were configured.
23 | 
24 |     :param region_name: The AWS region name where the SQS queue
25 |                         containing the S3 event notifications is
26 |                         configured.
27 |     :type region_name: string
28 |     :param queue_name: The name of the SQS queue containing the S3
29 |                        event notifications.
30 |     :type queue_name: string
31 |     """
32 | 
33 |     def __init__(self, region_name, queue_name):
34 |         """ Connects to the SQS queue in a given AWS region.
35 | 
36 |         :param region_name: The AWS region name.
37 |         :type region_name: string
38 |         :param queue_name: The SQS queue name.
39 |         :type queue_name: string
40 |         """
41 |         sqs_connection = boto3.resource('sqs', region_name=region_name)
42 |         self.sqs_queue = sqs_connection.get_queue_by_name(QueueName=queue_name)
43 |         logging.info(
44 |             'Successfully connected to {} SQS queue'.format(queue_name),
45 |         )
46 | 
47 |     def get_created_objects(self):
48 |         """Retrieves the S3 event notifications about the objects
49 |         created in the OSXCollector output bucket yields the (bucket
50 |         name, key name) pairs describing these objects.
51 |         """
52 |         messages = self.sqs_queue.receive_messages(MaxNumberOfMessages=MAX_NUMBER_MESSAGES)
53 |         logging.info(
54 |             'Received {0} message(s) from the SQS queue'.format(len(messages)),
55 |         )
56 |         if messages:
57 |             for message in messages:
58 |                 objects_created = self._retrieve_created_objects_from_message(message)
59 |                 for object_created in objects_created:
60 |                     yield object_created
61 |                 message.delete()
62 | 
63 |     def _retrieve_created_objects_from_message(self, message):
64 |         """Retrieves the bucket name and the key name, describing the
65 |         created object, from the `Records` array in the SQS message.
66 | 
67 |         Yields each (bucket name, key name) pair as an `CreatedObject`
68 |         named tuple.
69 | 
70 |         :param message: The SQS message. It should be in the JSON
71 |                         format.
72 |         :type message: string
73 |         """
74 |         body = simplejson.loads(message.body)
75 |         if 'Records' not in body:
76 |             logging.warning(
77 |                 '"Records" field not found in the SQS message. '
78 |                 'Message body: {0}'.format(body),
79 |             )
80 |             return []
81 |         return self._extract_created_objects_from_records(body['Records'])
82 | 
83 |     def _extract_created_objects_from_records(self, records):
84 |         logging.info(
85 |             'Found {0} record(s) in the SQS message'.format(len(records)),
86 |         )
87 |         for record in records:
88 |             bucket_name = record['s3']['bucket']['name']
89 |             key_name = record['s3']['object']['key']
90 |             yield CreatedObject(bucket_name=bucket_name, key_name=key_name)
91 | 


--------------------------------------------------------------------------------
/amira_github_banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yelp/amira/0390d7969fdd64cda36996baf896d50d47648c80/amira_github_banner.png


--------------------------------------------------------------------------------
/doc/component_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yelp/amira/0390d7969fdd64cda36996baf896d50d47648c80/doc/component_diagram.png


--------------------------------------------------------------------------------
/requirements-bootstrap.txt:
--------------------------------------------------------------------------------
1 | pip==20.3
2 | venv-update==3.2.4
3 | virtualenv==16.7.7
4 | wheel==0.32.3
5 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | coverage==4.5.4
3 | flake8==3.7.8
4 | mock==3.0.5
5 | pre-commit>=1.0.0
6 | pytest==4.6.5
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.14.62
2 | osxcollector_output_filters==1.1.1
3 | simplejson==3.16.0
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Even for a larger incident response team handling all of the repetitive tasks
 5 | related to malware infections is a tedious task. Our malware analysts have
 6 | spent a lot of time chasing digital forensics from potentially infected macOS
 7 | systems, leveraging open source tools, like OSXCollector. Early on, we have
 8 | automated some part of the analysis process, augmenting the initial set of
 9 | digital forensics collected from the machines with the information gathered
10 | from the threat intelligence APIs. They helped us with additional information
11 | on potentially suspicious domains, URLs and file hashes. But our approach to
12 | the analysis still required a certain degree of configuration and manual
13 | maintenance that was consuming lots of attention from malware responders.
14 | 
15 | Enter automation: turning all of your repetitive tasks in a scripted way that
16 | will help you deal faster with the incident discovery, forensic collection and
17 | analysis, with fewer possibilities to make a mistake. We went ahead and turned
18 | OSXCollector toolkit into AMIRA: Automated Malware Incident Response and
19 | Analysis service. AMIRA turns the forensic information gathered by OSXCollector
20 | into actionable response plan, suggesting the infection source as well as
21 | suspicious files and domains requiring a closer look. Furthermore, we
22 | integrated AMIRA with our incident response platform, making sure that as
23 | little interaction as necessary is required from the analyst to follow the
24 | investigation. Thanks to that, the incident response team members can focus on
25 | what they excel at: finding unusual patterns and the novel ways that malware
26 | was trying to sneak into the corporate infrastructure.
27 | """
28 | from __future__ import absolute_import
29 | 
30 | from setuptools import find_packages
31 | from setuptools import setup
32 | 
33 | from amira import __version__
34 | 
35 | 
36 | with open('README.md', 'r') as fh:
37 |     long_description = fh.read()
38 | 
39 | setup(
40 |     name='amira',
41 |     version=__version__,
42 |     description='Automated Malware Incident Response and Analysis',
43 |     long_description=long_description,
44 |     long_description_content_type='text/markdown',
45 |     author='Yelp Security',
46 |     author_email='opensource@yelp.com',
47 |     license='The MIT License (MIT)',
48 |     url='https://github.com/Yelp/amira',
49 |     setup_requires='setuptools',
50 |     packages=find_packages(exclude=['tests']),
51 |     provides=['amira'],
52 |     install_requires=[
53 |         'boto3',
54 |         'osxcollector_output_filters>=1.1.1',
55 |         'simplejson',
56 |     ],
57 | )
58 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 | 


--------------------------------------------------------------------------------
/tests/amira_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import types
  6 | 
  7 | from mock import ANY
  8 | from mock import call
  9 | from mock import MagicMock
 10 | from mock import patch
 11 | try:
 12 |     from cStringIO import StringIO as ByteBuffer
 13 | except ImportError:
 14 |     from io import BytesIO as ByteBuffer
 15 | 
 16 | from amira.amira import AMIRA
 17 | from amira.data_processor import DataProcessor
 18 | from amira.results_uploader import FileMetaInfo
 19 | from amira.s3 import S3Handler
 20 | from amira.sqs import CreatedObject
 21 | from amira.sqs import SqsHandler
 22 | 
 23 | 
 24 | class TestAmira(object):
 25 | 
 26 |     """Tests ``amira.amira.AMIRA`` class."""
 27 | 
 28 |     def _patch_and_run_amira(
 29 |         self, region_name, queue_name, contents, created_objects, data_processor,
 30 |     ):
 31 |         """Patches all the external dependencies and runs AMIRA."""
 32 |         self._results_uploader_mock = MagicMock()
 33 | 
 34 |         with patch.object(
 35 |             S3Handler, '__init__', autospec=True, return_value=None,
 36 |         ), patch.object(
 37 |             S3Handler, 'get_contents_as_string', autospec=True, side_effect=contents,
 38 |         ) as self._patched_get_contents_as_string, patch.object(
 39 |             SqsHandler, '__init__', autospec=True, return_value=None,
 40 |         ), patch.object(
 41 |             DataProcessor, 'get_buffer_size', return_value=1,
 42 |         ), patch.object(
 43 |             SqsHandler, 'get_created_objects', autospec=True, side_effect=created_objects,
 44 |         ) as self._patched_get_created_objects:
 45 |             amira_instance = AMIRA(region_name, queue_name)
 46 |             amira_instance.register_results_uploader(self._results_uploader_mock)
 47 |             amira_instance.register_data_processor(data_processor)
 48 |             amira_instance.run()
 49 | 
 50 |     def test_run(self):
 51 |         contents = [
 52 |             b'New Petitions Against Tax',
 53 |             b'Building Code Under Fire',
 54 |         ]
 55 |         created_objects = [[
 56 |             CreatedObject(
 57 |                 bucket_name='amira-test', key_name='AMIRA-301.tar.gz',
 58 |             ),
 59 |             CreatedObject(
 60 |                 bucket_name='amira-test', key_name='AMIRA-302.tar.gz',
 61 |             ),
 62 |         ]]
 63 | 
 64 |         mock_processor = DataProcessor()
 65 | 
 66 |         def mock_process_input(o, _):
 67 |             o._results = [FileMetaInfo('.tar.gz', ByteBuffer(b'1'), 'application/gzip')]
 68 |             return MagicMock()
 69 |         mock_processor.process_input = types.MethodType(mock_process_input, mock_processor)
 70 |         mock_processor.perform_analysis = MagicMock()
 71 |         region_name, queue_name = 'us-west-2', 'etaoin-shrdlu'
 72 |         self._patch_and_run_amira(
 73 |             region_name, queue_name, contents, created_objects, mock_processor,
 74 |         )
 75 | 
 76 |         assert self._patched_get_created_objects.call_count == 1
 77 |         assert self._patched_get_contents_as_string.call_args_list == [
 78 |             call(ANY, 'amira-test', 'AMIRA-301.tar.gz'),
 79 |             call(ANY, 'amira-test', 'AMIRA-302.tar.gz'),
 80 |         ]
 81 |         assert mock_processor.perform_analysis.call_count == 2
 82 | 
 83 |         # assert that the results uploader was called
 84 |         # with the expected arguments
 85 |         assert self._results_uploader_mock.upload_results.call_args_list == [
 86 |             call([FileMetaInfo('AMIRA-301.tar.gz', ANY, 'application/gzip')]),
 87 |             call([FileMetaInfo('AMIRA-302.tar.gz', ANY, 'application/gzip')]),
 88 |         ]
 89 | 
 90 |     def test_run_wrong_key_name_suffix(self):
 91 |         created_objects = [[
 92 |             CreatedObject(bucket_name='amira-test', key_name='MALWARE-301.txt'),
 93 |         ]]
 94 | 
 95 |         mock_processor = MagicMock()
 96 |         region_name, queue_name = 'us-west-2', 'cmfwyp-vbgkqj'
 97 |         self._patch_and_run_amira(
 98 |             region_name, queue_name, None, created_objects, mock_processor,
 99 |         )
100 | 
101 |         assert 1 == self._patched_get_created_objects.call_count
102 |         assert not self._patched_get_contents_as_string.called
103 |         assert not self._results_uploader_mock.upload_results.called
104 |         assert not mock_processor.perform_analysis.called
105 |         assert not mock_processor.process_input.called
106 | 
107 |     def test_run_analyze_filter_exception(self):
108 |         """Tests the exception handling while running the Analyze Filter."""
109 |         contents = [b'The European languages are members of the same family.']
110 |         created_objects = [[
111 |             CreatedObject(
112 |                 bucket_name='amira-test', key_name='MALWARE-303.tar.gz',
113 |             ),
114 |         ]]
115 |         data_processor_mock = MagicMock()
116 |         data_processor_mock.perform_analysis.side_effect = Exception
117 |         region_name, queue_name = 'us-west-2', 'li-europan-lingues'
118 |         self._patch_and_run_amira(
119 |             region_name, queue_name, contents, created_objects, data_processor_mock,
120 |         )
121 |         assert data_processor_mock.perform_analysis.called
122 |         assert data_processor_mock.upload_results.called
123 | 


--------------------------------------------------------------------------------
/tests/data/mock_input.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yelp/amira/0390d7969fdd64cda36996baf896d50d47648c80/tests/data/mock_input.tar.gz


--------------------------------------------------------------------------------
/tests/data/s3_event_notifications.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1561-2016_01_11-10_54_07.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]},
 3 |     {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1562-2016_01_11-10_54_47.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]},
 4 |     {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1563-2016_01_11-10_54_58.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]},
 5 |     {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1564-2016_01_11-10_55_12.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]},
 6 |     {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1565-2016_01_11-10_55_32.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]},
 7 |     {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1566-2016_01_11-10_55_49.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]},
 8 |     {"Records":[{"eventVersion":"2.0","eventSource":"aws:s3","awsRegion":"us-west-2","eventTime":"2016-01-09T00:20:41.349Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:ETAOINSHRDLU"},"requestParameters":{"sourceIPAddress":"127.0.0.1"},"responseElements":{"x-amz-request-id":"38C37D157D36ACCA","x-amz-id-2":"EgkRe0J6ltML/ycItJ77lMRfaM0i7tb9RoPcCnpOdh0A2VU4uI2p1sJfyikvUPXd"},"s3":{"s3SchemaVersion":"1.0","configurationId":"NotificationObjectCreated","bucket":{"name":"godzilla","ownerIdentity":{"principalId":"LOREMIPSUM"},"arn":"arn:aws:s3:::godzilla"},"object":{"key":"AMIRA-1567-2016_01_11-10_56_09.tar.gz","size":1977499,"eTag":"1294cf3868f6381654b02aaaaa4a8f6c","sequencer":"0056905258E9928B11"}}}]}
 9 | ]
10 | 


--------------------------------------------------------------------------------
/tests/data/s3_test_event_notification.json:
--------------------------------------------------------------------------------
1 | [
2 |     {"Service":"Amazon S5","Event":"s3:TestEvent","Time":"2016-06-09T18:43:01.130Z","Bucket":"godzilla","RequestId":"A311687676D7EF82","HostId":"egIGJdAQHPZ/ZsSjJoq6Dj0HYouGsorKLMZF3luXAD71o24DuwHrgSebkF8GpPIQ"}
3 | ]
4 | 


--------------------------------------------------------------------------------
/tests/data_processor_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import logging
 6 | import tarfile
 7 | 
 8 | import pytest
 9 | from mock import ANY
10 | from mock import call
11 | from mock import MagicMock
12 | from mock import patch
13 | 
14 | try:
15 |     from cStringIO import StringIO as ByteBuffer
16 | except ImportError:
17 |     from io import BytesIO as ByteBuffer
18 | 
19 | from amira.results_uploader import FileMetaInfo
20 | from amira.data_processor import DataProcessor
21 | from amira.data_processor import OSXCollectorDataProcessor
22 | from amira.data_processor import OSXCollectorOutputExtractionError
23 | 
24 | 
25 | class TestDataProcessor(object):
26 | 
27 |     def test_get_buffer_size(self):
28 |         assert DataProcessor.get_buffer_size(ByteBuffer(b'123' * 111)) == 333
29 | 
30 |     def test_upload_results(self):
31 |         data = ByteBuffer(b'123')
32 |         processor = DataProcessor()
33 |         processor._results = [FileMetaInfo('_suff.txt', data, 'text/plain')]
34 |         uploaders = [MagicMock(), MagicMock()]
35 |         processor.upload_results('filename', uploaders)
36 |         for u in uploaders:
37 |             u.upload_results.assert_called_once_with(
38 |                 [FileMetaInfo('filename_suff.txt', data, 'text/plain')],
39 |             )
40 | 
41 | 
42 | class TestOSXCollectorDataProcessor(object):
43 | 
44 |     @pytest.fixture
45 |     def tar_gz_mock(self):
46 |         """Mocks tar.gz file content."""
47 |         tarfile.open = MagicMock()
48 |         tarinfo_mock = MagicMock()
49 |         tarinfo_mock.name = 'lorem_ipsum.json'
50 |         tar_mock = tarfile.open.return_value
51 |         tar_mock.__iter__.return_value = [tarinfo_mock]
52 |         return tarinfo_mock
53 | 
54 |     def test_process_input(self):
55 |         processor = OSXCollectorDataProcessor()
56 |         with open('tests/data/mock_input.tar.gz', 'rb') as f:
57 |             input_data = f.read()
58 |         output = processor.process_input(input_data)
59 |         assert output.read() == b'{"a":2}\n'
60 |         assert len(processor._results) == 1
61 | 
62 |     def test_process_input_malformed_gz(self, tar_gz_mock):
63 |         logging.error = MagicMock()
64 |         processor = OSXCollectorDataProcessor()
65 |         tarfile.open.side_effect = tarfile.ReadError('mock.tar.gz is not a gz file')
66 |         processor.process_input(b'things')
67 |         logging.error.assert_has_calls([call(u'Failed to read the archive: mock.tar.gz is not a gz file')])
68 | 
69 |     def test_process_input_no_json(self, tar_gz_mock):
70 |         processor = OSXCollectorDataProcessor()
71 |         tar_gz_mock.name = 'lorem_ipsum.txt'
72 | 
73 |         with pytest.raises(OSXCollectorOutputExtractionError) as exc_info:
74 |             processor.process_input(b'things')
75 | 
76 |         assert 'Expected 1 JSON file inside the OSXCollector output archive, ' \
77 |                'but found 0 instead.' in str(exc_info.value)
78 | 
79 |     def test_perform_analysis(self):
80 |         with patch('amira.data_processor.AnalyzeFilter') as mock_filter, \
81 |                 patch('amira.data_processor.output_filter') as mock_run_filter:
82 |             processor = OSXCollectorDataProcessor()
83 |             processor.perform_analysis(b'123', {'a': 'b'})
84 |             mock_filter.assert_called_once_with(
85 |                 monochrome=True,
86 |                 html_output_file=ANY,
87 |                 text_output_file=ANY,
88 |                 data_feeds={'a': 'b'},
89 |             )
90 |             mock_run_filter._run_filter.assert_called_once_with(
91 |                 mock_filter.return_value,
92 |                 input_stream=b'123',
93 |                 output_stream=ANY,
94 |             )
95 |             assert len(processor._results) == 3
96 | 


--------------------------------------------------------------------------------
/tests/s3_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | try:
 6 |     from cStringIO import StringIO as ByteBuffer
 7 | except ImportError:
 8 |     from io import BytesIO as ByteBuffer
 9 | 
10 | import pytest
11 | from mock import MagicMock
12 | from mock import patch
13 | from mock import call
14 | 
15 | from amira.results_uploader import FileMetaInfo
16 | from amira.s3 import S3Handler
17 | from amira.s3 import S3ResultsUploader
18 | 
19 | 
20 | class TestS3Handler(object):
21 | 
22 |     """Tests ``amira.s3.S3Handler`` class."""
23 | 
24 |     @pytest.fixture
25 |     def s3_handler(self):
26 |         with patch('amira.s3.boto3') as mock_boto3:
27 |             handler = S3Handler()
28 |             mock_boto3.client.assert_called_once_with('s3')
29 |             yield handler
30 | 
31 |     def test_get_contents_as_string(self, s3_handler):
32 |         mock_contents = 'test key contents'
33 |         s3_connection_mock = s3_handler._s3_connection
34 |         s3_connection_mock.get_object.return_value = {
35 |             'Body': ByteBuffer(mock_contents.encode()),
36 |         }
37 |         contents = s3_handler.get_contents_as_string(
38 |             'amira-test', 'MALWARE-1564-2016_01_11-10_55_12.tar.gz',
39 |         )
40 |         assert mock_contents == contents.decode()
41 |         s3_connection_mock.get_object.assert_called_once_with(
42 |             Bucket='amira-test', Key='MALWARE-1564-2016_01_11-10_55_12.tar.gz',
43 |         )
44 | 
45 | 
46 | class TestS3ResultsUploader():
47 | 
48 |     """Tests ``amira.s3.S3ResultsUploader`` class."""
49 | 
50 |     @pytest.fixture
51 |     def s3_results_uploader(self):
52 |         with patch('amira.s3.boto3') as mock_boto3:
53 |             uploader = S3ResultsUploader('lorem-ipsum')
54 |             mock_boto3.client.assert_called_once_with('s3')
55 |             yield uploader
56 | 
57 |     def test_upload_results(self, s3_results_uploader):
58 |         s3_connection_mock = s3_results_uploader._s3_connection
59 |         fileobj_mock1 = MagicMock()
60 |         fileobj_mock2 = MagicMock()
61 |         results = [
62 |             FileMetaInfo('etaoin', fileobj_mock1, 'text/html; charset=UTF-8'),
63 |             FileMetaInfo('shrdlu', fileobj_mock2, 'application/json'),
64 |         ]
65 |         s3_results_uploader.upload_results(results)
66 |         s3_connection_mock.put_object.assert_has_calls([
67 |             call(
68 |                 Bucket='lorem-ipsum',
69 |                 Key='etaoin',
70 |                 ContentType='text/html; charset=UTF-8',
71 |                 Body=fileobj_mock1,
72 |             ),
73 |             call(
74 |                 Bucket='lorem-ipsum',
75 |                 Key='shrdlu',
76 |                 ContentType='application/json',
77 |                 Body=fileobj_mock2,
78 |             ),
79 |         ])
80 | 


--------------------------------------------------------------------------------
/tests/sqs_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import pytest
  6 | import simplejson
  7 | from mock import MagicMock
  8 | from mock import patch
  9 | 
 10 | from amira.sqs import SqsHandler
 11 | 
 12 | 
 13 | TEST_DATA_DIR_PATH = 'tests/data'
 14 | 
 15 | 
 16 | @pytest.fixture
 17 | def sqs_handler():
 18 |     with patch('amira.sqs.boto3') as mock_boto3:
 19 |         handler = SqsHandler('us-west-1', 'godzilla')
 20 |         mock_boto3.resource.assert_called_once_with('sqs', region_name='us-west-1')
 21 |         mock_boto3.resource.return_value.get_queue_by_name.assert_called_once_with(
 22 |             QueueName='godzilla',
 23 |         )
 24 |         yield handler
 25 | 
 26 | 
 27 | def read_s3_event_notifications_file(s3_event_notifications_file_path):
 28 |     with open(s3_event_notifications_file_path) as fp:
 29 |         s3_event_notifications = simplejson.load(fp)
 30 |         json_s3_event_notifications = [
 31 |             simplejson.dumps(s3_event_notification)
 32 |             for s3_event_notification in s3_event_notifications
 33 |         ]
 34 |     return json_s3_event_notifications
 35 | 
 36 | 
 37 | def create_s3_event_notification_message_mocks(s3_event_notifications_file_name):
 38 |     """Creates SQS queue message mocks that will return the JSON content of
 39 |     `s3_event_notifications_file_path` JSON file as the body of the message.
 40 |     """
 41 |     s3_event_notifications_file_path = '{0}/{1}'.format(
 42 |         TEST_DATA_DIR_PATH, s3_event_notifications_file_name,
 43 |     )
 44 |     json_s3_event_notifications = read_s3_event_notifications_file(
 45 |         s3_event_notifications_file_path,
 46 |     )
 47 |     return [
 48 |         MagicMock(body=json_s3_event_notification)
 49 |         for json_s3_event_notification in json_s3_event_notifications
 50 |     ]
 51 | 
 52 | 
 53 | def mock_s3_event_notifications(
 54 |         mock_sqs_queue, s3_event_notifications_file_name,
 55 | ):
 56 |     """`SqsHandler.get_created_objects()` is a generator, so we need to
 57 |     mock multiple values returned by `get_messages()` method.
 58 |     In this case only one as the test cases do not operate on more than
 59 |     one message.
 60 |     """
 61 |     s3_event_notification_message_mocks = create_s3_event_notification_message_mocks(
 62 |         s3_event_notifications_file_name,
 63 |     )
 64 |     mock_sqs_queue.receive_messages.side_effect = [s3_event_notification_message_mocks]
 65 |     return s3_event_notification_message_mocks
 66 | 
 67 | 
 68 | class TestSqsHandler(object):
 69 | 
 70 |     def test_get_created_objects(self, sqs_handler):
 71 |         s3_event_notification_message_mocks = mock_s3_event_notifications(
 72 |             sqs_handler.sqs_queue, 's3_event_notifications.json',
 73 |         )
 74 |         created_objects = sqs_handler.get_created_objects()
 75 |         actual_key_names = [
 76 |             created_object.key_name
 77 |             for created_object in created_objects
 78 |         ]
 79 |         assert actual_key_names == [
 80 |             'AMIRA-1561-2016_01_11-10_54_07.tar.gz',
 81 |             'AMIRA-1562-2016_01_11-10_54_47.tar.gz',
 82 |             'AMIRA-1563-2016_01_11-10_54_58.tar.gz',
 83 |             'AMIRA-1564-2016_01_11-10_55_12.tar.gz',
 84 |             'AMIRA-1565-2016_01_11-10_55_32.tar.gz',
 85 |             'AMIRA-1566-2016_01_11-10_55_49.tar.gz',
 86 |             'AMIRA-1567-2016_01_11-10_56_09.tar.gz',
 87 |         ]
 88 |         for message_mock in s3_event_notification_message_mocks:
 89 |             message_mock.delete.assert_called_once_with()
 90 | 
 91 |     def test_get_created_objects_no_created_objects(self, sqs_handler):
 92 |         sqs_handler.sqs_queue.receive_messages.side_effect = [[]]
 93 |         created_objects = sqs_handler.get_created_objects()
 94 |         assert not list(created_objects)
 95 | 
 96 |     def test_get_created_objects_no_records(self, sqs_handler):
 97 |         """Tests the behavior of `get_created_objects()` method in case
 98 |         the message received from SQS does not contain the "Records"
 99 |         field in the message body.
100 |         """
101 |         mock_s3_event_notifications(
102 |             sqs_handler.sqs_queue, 's3_test_event_notification.json',
103 |         )
104 |         assert not list(sqs_handler.get_created_objects())
105 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | project = amira
 3 | envlist = py27,py36
 4 | 
 5 | [testenv]
 6 | deps =
 7 |     --only-binary=cryptography
 8 |     -rrequirements-dev.txt
 9 | commands =
10 |     flake8 .
11 |     {envpython} --version
12 |     coverage --version
13 |     coverage run -m pytest --strict -rxs {posargs:tests}
14 |     coverage report -m
15 | 
16 | [testenv:pre-commit]
17 | deps = pre-commit>=1.0.0
18 | commands = pre-commit run --all-files
19 | 
20 | [testenv:venv]
21 | envdir = virtualenv_run
22 | basepython = python3.6
23 | commands =
24 | 
25 | [pytest]
26 | norecursedirs = .* virtualenv_run build
27 | 
28 | [flake8]
29 | exclude = .git,__pycache__,.tox,virtualenv_run
30 | max_line_length = 140
31 | 


--------------------------------------------------------------------------------