├── mwcp
    ├── stix
    │   ├── __init__.py
    │   ├── objects.py
    │   └── report_writer.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_string_report
    │   │   ├── strings.txt
    │   │   └── strings.json
    │   ├── test_disassembly
    │   │   ├── strings.exe
    │   │   ├── Sample.py
    │   │   └── strings.c
    │   ├── test_poshdeob.py
    │   ├── test_runner
    │   │   ├── yara_repo
    │   │   │   ├── rule_a.yara
    │   │   │   ├── rule_b.yara
    │   │   │   └── sibling_dispatch.yara
    │   │   ├── Sample.py
    │   │   └── SiblingDispatch.py
    │   ├── test_cli
    │   │   ├── csv_legacy.csv
    │   │   ├── parse.txt
    │   │   ├── csv_cli.csv
    │   │   ├── fb843efb2ffec987db12e72ca75c9ea2.json
    │   │   └── parse.json
    │   ├── test_server
    │   │   └── DecodedStringTestParser.py
    │   ├── test_report_writer
    │   │   ├── report_foreign.txt
    │   │   ├── report_foreign.md
    │   │   ├── report_foreign.html
    │   │   ├── report_wordwrap.txt
    │   │   └── report_wordwrap.html
    │   ├── test_custombase64.py
    │   ├── test_legacy_reporter
    │   │   └── report.txt
    │   ├── test_string_report.py
    │   ├── test_testing.py
    │   ├── test_stix.py
    │   ├── test_disassembly.py
    │   ├── test_pecon.py
    │   ├── test_report
    │   │   └── split_report.py
    │   ├── test_report_writer.py
    │   ├── test_runner.py
    │   ├── test_construct.py
    │   ├── test_report.py
    │   ├── test_issues.py
    │   └── test_legacy_reporter.py
    ├── tools
    │   ├── __init__.py
    │   ├── server
    │   │   ├── templates
    │   │   │   ├── results.html
    │   │   │   ├── parsers.html
    │   │   │   ├── base.html
    │   │   │   └── upload.html
    │   │   └── __init__.py
    │   └── update_schema.py
    ├── resources
    │   ├── __init__.py
    │   └── RATDecoders
    │   │   ├── __init__.py
    │   │   └── PLACE_PARSERS_HERE
    ├── utils
    │   ├── __init__.py
    │   ├── construct
    │   │   ├── __init__.py
    │   │   ├── network.py
    │   │   ├── datetime_.py
    │   │   ├── MIPS.py
    │   │   ├── dotnet.py
    │   │   ├── windows_enums.py
    │   │   ├── windows_constants.py
    │   │   ├── construct_template.html
    │   │   └── ARM.py
    │   ├── stringutils.py
    │   ├── multi_proc.py
    │   ├── elffileutils.py
    │   ├── custombase64.py
    │   └── logutil.py
    ├── parsers
    │   ├── __init__.py
    │   ├── TA.py
    │   ├── GenericDropper.py
    │   ├── PDF.py
    │   ├── PowerShell.py
    │   ├── Decoy.py
    │   ├── foo.py
    │   ├── Archive.py
    │   ├── ISO.py
    │   ├── tests
    │   │   └── foo
    │   │   │   └── f144899b86766688991c5d0d10902f4a.json
    │   ├── VisualBasic.py
    │   ├── Python.py
    │   └── RSA.py
    ├── exceptions.py
    ├── __init__.py
    ├── config
    │   ├── log_config.yml
    │   ├── config.yml
    │   ├── __init__.py
    │   └── fields.txt
    ├── parser_config.yml
    ├── core.py
    └── parser.py
├── docs
    └── PythonStyleGuide.md
├── MANIFEST.in
├── setup.cfg
├── noxfile.py
├── .gitignore
├── LICENSE.txt
├── .github
    └── workflows
    │   └── workflow.yml
└── setup.py


/mwcp/stix/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mwcp/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mwcp/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mwcp/resources/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mwcp/resources/RATDecoders/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mwcp/resources/RATDecoders/PLACE_PARSERS_HERE:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_string_report/strings.txt:
--------------------------------------------------------------------------------
1 | hello
2 | world


--------------------------------------------------------------------------------
/mwcp/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """This package is used to store common helper utilities for developing parsers."""
2 | 


--------------------------------------------------------------------------------
/mwcp/tools/server/templates/results.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | 
3 | {% block content %}
4 |     {{ highlight|safe }}
5 | {% endblock %}


--------------------------------------------------------------------------------
/mwcp/tests/test_disassembly/strings.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dod-cyber-crime-center/DC3-MWCP/HEAD/mwcp/tests/test_disassembly/strings.exe


--------------------------------------------------------------------------------
/docs/PythonStyleGuide.md:
--------------------------------------------------------------------------------
1 | # Python Style Guide
2 | 
3 | Use [black](https://github.com/psf/black) with line lengths of 120.
4 | 
5 | ```bash
6 | $ pip install black
7 | $ black -l 120 <path_to_source_code>
8 | ```
9 | 


--------------------------------------------------------------------------------
/mwcp/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | # Path to parser configuration file.
4 | # (Used when parsers are installed through entry points.)
5 | config = os.path.join(os.path.dirname(__file__), "..", "parser_config.yml")
6 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_poshdeob.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests powershell deofuscator
 3 | """
 4 | 
 5 | import doctest
 6 | 
 7 | from mwcp.utils import poshdeob
 8 | 
 9 | 
10 | def test_doctests():
11 |     """Tests that the doctests work."""
12 |     results = doctest.testmod(poshdeob)
13 |     assert not results.failed
14 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_runner/yara_repo/rule_a.yara:
--------------------------------------------------------------------------------
 1 | 
 2 | rule Rule_Mapped {
 3 |     meta:
 4 |         mwcp = "dc3:foo"
 5 |     strings:
 6 |         $str = "mapped"
 7 |     condition:
 8 |         all of them
 9 | }
10 | 
11 | rule Rule_Unmapped {
12 |     strings:
13 |         $str = "unmapped"
14 |     condition:
15 |         all of them
16 | }
17 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_cli/csv_legacy.csv:
--------------------------------------------------------------------------------
 1 | scan_date,inputfilename,outputfile.name,outputfile.description,outputfile.md5,a,address,other.field1,other.field2
 2 | [TIMESTAMP],file1.exe,"out_name
 3 | out_name2","out_desc
 4 | out_desc2","out_md5
 5 | out_md52",,"https://google.com
 6 | ftp://amazon.com",value1,"value2
 7 | value3"
 8 | [TIMESTAMP],file2.exe,,,,"b
 9 | c",,,
10 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.md LICENSE.txt
 2 | include mwcp/resources/RATDecoders/PLACE_PARSERS_HERE
 3 | include mwcp/parser_config.yml
 4 | recursive-include mwcp/resources *.json *.txt
 5 | recursive-include mwcp/config *
 6 | recursive-include mwcp/parsers/tests *
 7 | graft mwcp/tests
 8 | graft mwcp/tools/server
 9 | include mwcp/utils/construct/construct_template.html
10 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_runner/yara_repo/rule_b.yara:
--------------------------------------------------------------------------------
 1 | 
 2 | rule FileA {
 3 |     meta:
 4 |         mwcp = "Sample.FileA"
 5 |     strings:
 6 |         $str = "file a"
 7 |     condition:
 8 |         all of them
 9 | }
10 | 
11 | 
12 | rule FileB {
13 |     meta:
14 |         mwcp = "Sample.FileB"
15 |     strings:
16 |         $str = "file b"
17 |     condition:
18 |         all of them
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | version = attr:mwcp.__version__
 3 | description = A framework for malware configuration parsers.
 4 | long_description_content_type = text/markdown
 5 | long_description = file:README.md
 6 | 
 7 | [tool:pytest]
 8 | testpaths = mwcp/tests
 9 | required_plugins = pytest-datadir pytest-xdist
10 | filterwarnings =
11 | 	ignore::DeprecationWarning
12 | addopts =
13 |     -p no:faulthandler
14 | 


--------------------------------------------------------------------------------
/mwcp/tools/update_schema.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is a script for updating the formal schema file - schema.json
 3 | """
 4 | import json
 5 | import pathlib
 6 | 
 7 | import mwcp
 8 | 
 9 | 
10 | def main():
11 |     schema_json = pathlib.Path(mwcp.__file__).parent / "config" / "schema.json"
12 | 
13 |     with schema_json.open("w") as fo:
14 |         json.dump(mwcp.schema(), fo, indent=4)
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     main()
19 | 


--------------------------------------------------------------------------------
/mwcp/utils/construct/__init__.py:
--------------------------------------------------------------------------------
 1 | """This is a wrapper interface to the construct library which adds extra helper functions."""
 2 | 
 3 | # from __future__ import absolute_import
 4 | 
 5 | # Import interface
 6 | from .core import *
 7 | 
 8 | from .construct_html import html_hex
 9 | from .helpers import *
10 | from .dotnet import *
11 | from .datetime_ import *
12 | from .windows_structures import *
13 | from .network import *
14 | from .windows_enums import *
15 | from . import ARM
16 | from . import MIPS
17 | 
18 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_server/DecodedStringTestParser.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sample parser that reports decoded strings.
 3 | """
 4 | 
 5 | from mwcp import metadata, Parser
 6 | 
 7 | 
 8 | class Implant(Parser):
 9 |     DESCRIPTION = "Sample Implant"
10 | 
11 |     @classmethod
12 |     def identify(cls, file_object):
13 |         return True
14 | 
15 |     def run(self):
16 |         self.report.add(metadata.DecodedString("string A"))
17 |         self.report.add(metadata.DecodedString("string B", encryption_key=metadata.EncryptionKey(b"\xde\xad\xbe\xef", "xor")))
18 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_runner/Sample.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from mwcp import Parser, FileObject
 3 | 
 4 | 
 5 | class FileA(Parser):
 6 |     DESCRIPTION = "File A"
 7 | 
 8 |     @classmethod
 9 |     def identify(cls, file_object):
10 |         return b"matches file a" in file_object.data
11 | 
12 |     def run(self):
13 |         self.dispatcher.add(FileObject(b"matches file b"))
14 | 
15 | 
16 | class FileB(Parser):
17 |     DESCRIPTION = "File B"
18 | 
19 |     @classmethod
20 |     def identify(cls, file_object):
21 |         return b"matches file b" in file_object.data
22 | 


--------------------------------------------------------------------------------
/mwcp/tools/server/templates/parsers.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block content %}
 4 |     <table>
 5 |         <thead>
 6 |         <tr>
 7 |             {% for column in headers %}
 8 |                 <th>{{ column }}</th>
 9 |             {% endfor %}
10 |         </tr>
11 |         </thead>
12 |         <tbody>
13 |         {% for parser in parsers %}
14 |             <tr>
15 |                 {% for column in parser %}
16 |                     <td>{{ column }}</td>
17 |                 {% endfor %}
18 |             </tr>
19 |         {% endfor %}
20 |         </tbody>
21 |     </table>
22 | {% endblock %}


--------------------------------------------------------------------------------
/mwcp/exceptions.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class MWCPError(Exception):
 4 |     """
 5 |     Base class for custom exceptions thrown by MWCP.
 6 |     """
 7 | 
 8 | 
 9 | class ConfigError(MWCPError):
10 |     """
11 |     This exception is thrown if there is an issue with the configuration file.
12 |     """
13 | 
14 | 
15 | class UnableToParse(MWCPError):
16 |     """
17 |     This exception can be thrown if a parser that has been correctly identified has failed to parse
18 |     the file and you would like other parsers to be tried.
19 |     """
20 | 
21 | 
22 | class ValidationError(MWCPError):
23 |     """
24 |     This exception can be thrown if validation fails when adding metadata.
25 |     """
26 | 
27 | 
28 | class ParserNotFoundError(MWCPError):
29 |     """
30 |     This exception gets thrown if a parser can't be found.
31 |     """
32 | 


--------------------------------------------------------------------------------
/mwcp/tools/server/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import flask as f
 4 | 
 5 | from . import server
 6 | 
 7 | 
 8 | def create_app(extra_config=None):
 9 |     """
10 |     Create a Flask app instance for the MWCP API.
11 | 
12 |     :param dict extra_config: Extra configuration options to add to the app
13 |     :return: Flask app for MWCP API
14 |     """
15 |     app = f.Flask(__name__)
16 | 
17 |     app.config.setdefault("MENU_LINKS", []).extend(
18 |         [
19 |             {"name": "Upload", "endpoint": "mwcp.upload"},
20 |             {"name": "Parsers", "endpoint": "mwcp.parsers_list"},
21 |         ]
22 |     )
23 | 
24 |     if extra_config:
25 |         app.config.from_mapping(extra_config)
26 | 
27 |     server.init_app(app)
28 |     app.register_blueprint(server.bp)
29 | 
30 |     return app
31 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_runner/yara_repo/sibling_dispatch.yara:
--------------------------------------------------------------------------------
 1 | /*
 2 | Rules for test_yara_runner_sibling_dispatch
 3 | */
 4 | 
 5 | rule Parent {
 6 |     meta:
 7 |         mwcp = "SiblingDispatch.Parent"
 8 |     strings:
 9 |         $str = "parent"
10 |     condition:
11 |         all of them
12 | }
13 | 
14 | 
15 | rule Sibling1 {
16 |     meta:
17 |         mwcp = "SiblingDispatch.Sibling1"
18 |     strings:
19 |         $str = "sibling 1"
20 |     condition:
21 |         all of them
22 | }
23 | 
24 | 
25 | rule Sibling2 {
26 |     meta:
27 |         mwcp = "SiblingDispatch.Sibling2"
28 |     strings:
29 |         $str = "sibling 2"
30 |     condition:
31 |         all of them
32 | }
33 | 
34 | 
35 | rule Grandchild {
36 |     meta:
37 |         mwcp = "SiblingDispatch.Grandchild"
38 |     strings:
39 |         $str = "grandchild"
40 |     condition:
41 |         all of them
42 | }
43 | 


--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Runs tests and other routines.
 3 | 
 4 | Usage:
 5 |   1. Install "nox"
 6 |   2. Run "nox" or "nox -s test"
 7 | """
 8 | 
 9 | import nox
10 | 
11 | 
12 | @nox.session(python="3.10")
13 | def test(session):
14 |     """Run pytests"""
15 |     session.install("-e", ".[testing]")
16 |     session.run("pytest")
17 | 
18 | 
19 | @nox.session(python="3.10")
20 | def build(session):
21 |     """Build source and wheel distribution"""
22 |     session.run("python", "setup.py", "sdist")
23 |     session.run("python", "setup.py", "bdist_wheel")
24 | 
25 | 
26 | @nox.session(python=False)
27 | def release_patch(session):
28 |     """Generate release patch"""
29 |     session.run("mkdir", "-p", "dist", external=True)
30 |     with open("./dist/updates.patch", "w") as out:
31 |         session.run(
32 |             "git", "format-patch", "--stdout", "master",
33 |             external=True,
34 |             stdout=out
35 |         )
36 | 


--------------------------------------------------------------------------------
/mwcp/__init__.py:
--------------------------------------------------------------------------------
 1 | """Exposes interface for MWCP."""
 2 | 
 3 | import logging
 4 | 
 5 | # Add null handler to root logger to avoid "no handler" error when this is used as a library
 6 | logging.getLogger().addHandler(logging.NullHandler())
 7 | 
 8 | 
 9 | from mwcp.config import _config as config
10 | from mwcp.parser import Parser
11 | from mwcp.file_object import FileObject
12 | from mwcp.registry import (
13 |     register_entry_points, register_parser_directory, register_parser_package,
14 |     iter_parsers, get_parser_descriptions, set_default_source,
15 |     clear as clear_registry,
16 |     clear_default_source,
17 |     ParserNotFoundError
18 | )
19 | from mwcp.runner import Runner
20 | from mwcp.report import Report
21 | from mwcp.dispatcher import Dispatcher, UnidentifiedFile
22 | from mwcp.utils.logutil import setup_logging
23 | from mwcp.core import run, schema
24 | from mwcp.exceptions import *
25 | 
26 | 
27 | __version__ = "3.14.0"
28 | 


--------------------------------------------------------------------------------
/mwcp/parsers/TA.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Wrapper for Techanarchy RATDecoders using techanarchy_bridge
 3 | """
 4 | 
 5 | import os
 6 | from pathlib import Path
 7 | 
 8 | from mwcp import Parser
 9 | from mwcp.resources import RATDecoders
10 | 
11 | RAT_DECODERS = [decoder.stem for decoder in Path(RATDecoders.__file__).parent.glob("[!_]*.py")]
12 | 
13 | 
14 | def run(self):
15 |     from mwcp.resources import techanarchy_bridge
16 | 
17 |     name = self.__class__.__name__
18 |     scriptpath = os.path.join(os.path.dirname(RATDecoders.__file__), name + ".py")
19 |     techanarchy_bridge.run_decoder(self, scriptpath)
20 | 
21 | 
22 | # Dynamically declare Parser classes.
23 | for name in RAT_DECODERS:
24 |     if name == "TEMPLATE":
25 |         continue
26 |     klass = type(name, (Parser,), {"DESCRIPTION": name, "run": run, "AUTHOR": "TechAnarchy"})
27 |     klass.__module__ = __name__  # Module originally gets incorrectly set to "abc"
28 |     globals()[name] = klass
29 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_report_writer/report_foreign.txt:
--------------------------------------------------------------------------------
 1 | ----- File: input_file.bin -----
 2 | Field         Value
 3 | ------------  ----------------------------------------------------------------
 4 | Parser        FooParser
 5 | File Path     C:/input_file.bin
 6 | Description   SuperMalware Implant
 7 | Architecture
 8 | MD5           1e50210a0202497fb79bc38b6ade6c34
 9 | SHA1          baf34551fecb48acc3da868eb85e1b6dac9de356
10 | SHA256        1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee
11 | Compile Time
12 | 
13 | ---- Miscellaneous ----
14 | Key     Value
15 | ------  -----------------------------------
16 | JAPAN   ユーザー別サイト
17 | CHINA   简体中文
18 | KOREA   크로스 플랫폼으로
19 | ISRAEL  מדורים מבוקשים
20 | EGYPT   أفضل البحوث
21 | RUSSIA  Десятую Международную
22 | MATH    ∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i)
23 | FRANCE  français langue étrangère
24 | SPAIN   mañana olé
25 | 
26 | ----- File Tree -----
27 | <input_file.bin (1e50210a0202497fb79bc38b6ade6c34) : SuperMalware Implant>
28 | 
29 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_custombase64.py:
--------------------------------------------------------------------------------
 1 | """Tests mwcp.utils.custombase64"""
 2 | 
 3 | from mwcp.utils import custombase64
 4 | 
 5 | 
 6 | def test_base64():
 7 |     custom_alphabet = b'EFGHQRSTUVWefghijklmnopIJKLMNOPABCDqrstuvwxyXYZabcdz0123456789+/='
 8 |     assert custombase64.b64encode(b'hello world', custom_alphabet) == b'LSoXMS8BO29dMSj='
 9 |     assert custombase64.b64decode(b'LSoXMS8BO29dMSj=', custom_alphabet) == b'hello world'
10 | 
11 | 
12 | def test_base32():
13 |     custom_alphabet = b'FGHIJQ345RSTUVWXYKLMABCDENOPZ267='
14 |     assert custombase64.b32encode(b'hello world', custom_alphabet) == b'VGLCEPIXJGPC6ZMUUY======'
15 |     assert custombase64.b32decode(b'VGLCEPIXJGPC6ZMUUY======', custom_alphabet) == b'hello world'
16 | 
17 | 
18 | def test_base16():
19 |     custom_alphabet = b'78BDE0123F459A6C'
20 |     assert custombase64.b16encode(b'hello world', custom_alphabet) == b'131019191CB7221C2B191E'
21 |     assert custombase64.b16decode(b'131019191CB7221C2B191E', custom_alphabet) == b'hello world'
22 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_legacy_reporter/report.txt:
--------------------------------------------------------------------------------
 1 | ---- Credential ----
 2 | Username    Password
 3 | ----------  ----------
 4 | admin       pass
 5 | 
 6 | ---- Network ----
 7 | Tags    Address        Port  Network Protocol    Username    Password
 8 | ------  -----------  ------  ------------------  ----------  ----------
 9 | proxy   192.168.1.1      80  tcp                 admin       pass
10 | 
11 | ---- Socket ----
12 | Tags    Address        Port  Network Protocol
13 | ------  -----------  ------  ------------------
14 | proxy   192.168.1.1      80  tcp
15 | 
16 | ---- Miscellaneous ----
17 | Key    Value
18 | -----  --------------
19 | foo    bar
20 | biz    b'baz\x00\x01'
21 | 
22 | ---- Residual Files ----
23 | Filename    Description          Derivation    MD5                               Arch    Compile Time
24 | ----------  -------------------  ------------  --------------------------------  ------  --------------
25 | file_1.exe  example output file                8d777f385d3dfec8815d20f7496026dc
26 | 
27 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_string_report.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests components of string report extension.
 3 | """
 4 | 
 5 | from mwcp import metadata
 6 | 
 7 | 
 8 | def test_strings(report):
 9 |     with report:
10 |         report.add(metadata.DecodedString("hello"))
11 |         report.add(metadata.DecodedString("world", encryption_key=metadata.EncryptionKey(b"\xde\xad\xbe\xef")))
12 |     assert report.strings() == ["hello", "world"]
13 | 
14 | 
15 | def test_string_report_generation(report, datadir):
16 |     report._external_strings_report = True
17 |     with report:
18 |         report.add(metadata.DecodedString("hello"))
19 |         report.add(metadata.DecodedString("world", encryption_key=metadata.EncryptionKey(b"\xde\xad\xbe\xef")))
20 |     string_reports = report.get(metadata.File)[:2]
21 |     assert string_reports[0].name.endswith(f"_strings.json")
22 |     assert string_reports[1].name.endswith(f"_strings.txt")
23 |     assert string_reports[0].data.decode("utf8") == (datadir / "strings.json").read_text()
24 |     assert string_reports[1].data.decode("utf8") == (datadir / "strings.txt").read_text()
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | .DS_Store
 3 | .project
 4 | .vscode/
 5 | 
 6 | # Byte-compiled / optimized / DLL files
 7 | __pycache__/
 8 | *.py[cod]
 9 | 
10 | # C extensions
11 | *.so
12 | 
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | 
31 | # PyInstaller
32 | #  Usually these files are written by a python script from a template
33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 | 
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 | 
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .nox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *,cover
51 | .pytest_cache/
52 | 
53 | # Translations
54 | *.mo
55 | *.pot
56 | 
57 | # Django stuff:
58 | *.log
59 | 
60 | # Sphinx documentation
61 | docs/_build/
62 | 
63 | # PyBuilder
64 | target/
65 | 
66 | # PyCharm
67 | /.idea
68 | 
69 | /scratch
70 | TODO.txt


--------------------------------------------------------------------------------
/mwcp/tools/server/templates/base.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
 5 | 
 6 |     <style type="text/css">
 7 |         {{ extra_css|safe }}
 8 |     </style>
 9 | 
10 |     <meta charset="UTF-8">
11 |     <title>{{ g.title + " |" if g.title else "" }} {{ config.get('SERVICE_NAME', 'DC3-MWCP Service') }}</title>
12 | </head>
13 | <body>
14 | <div class="container">
15 |     <div class="navbar">
16 |         <ul>
17 |             <li><a href="/">Home</a></li>
18 |             {% for link in config.get('MENU_LINKS', []) %}
19 |                 <li><a href="{{ link.url or url_for(link.endpoint) }}">{{ link.name }}</a></li>
20 |             {% endfor %}
21 |         </ul>
22 |     </div>
23 |     <h2>{{ g.title|default(config.get('SERVICE_NAME', 'DC3-MWCP Service')) }}</h2>
24 | 
25 |     {% block content %}{% endblock %}
26 | 
27 |     <footer class='footer'>
28 |         {% block footer %}
29 |         <p>
30 |             {{ config.get('SERVICE_NAME', 'DC3-MWCP Service') }}.
31 |         </p>
32 |         {% endblock %}
33 |     </footer>
34 | </div>
35 | 
36 | </body>
37 | </html>


--------------------------------------------------------------------------------
/mwcp/utils/stringutils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility used for string conversions.
 3 | """
 4 | 
 5 | import string
 6 | import sys
 7 | import unicodedata
 8 | 
 9 | 
10 | def convert_to_unicode(input_value):
11 |     if isinstance(input_value, str):
12 |         return input_value
13 |     elif isinstance(input_value, bytes):
14 |         return str(input_value, encoding="latin1", errors="replace")
15 |     else:
16 |         return convert_to_unicode(str(input_value))
17 | 
18 | 
19 | VALID_FILENAME_CHARS = "-_.() {}{}".format(string.ascii_letters, string.digits).encode("ascii")
20 | 
21 | 
22 | def sanitize_filename(filename: str) -> str:
23 |     """
24 |     Convert given filename to sanitized version that is safe to be used to write to the file system.
25 |     """
26 |     filename = convert_to_unicode(filename)
27 |     filename = unicodedata.normalize("NFKD", filename)  # convert accented characters
28 |     filename = convert_to_unicode(bytes(c for c in filename.encode("ascii", "ignore") if c in VALID_FILENAME_CHARS))
29 | 
30 |     # If in Windows, remove any `.lnk` extension to prevent issues with the file explorer.
31 |     if sys.platform == "win32" and filename.lower().endswith(".lnk"):
32 |         filename = filename[:-len(".lnk")] + "_lnk"
33 | 
34 |     return filename
35 | 


--------------------------------------------------------------------------------
/mwcp/config/log_config.yml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | disable_existing_loggers: False  # fixes issue with module level loggers
 3 | 
 4 | filters:
 5 |   # This filter is necessary to use the "%(level_char)s" format variable.
 6 |   level_char:
 7 |     (): mwcp.utils.logutil.LevelCharFilter
 8 | 
 9 | formatters:
10 |   simple:
11 |     format: "[%(level_char)s] (%(processName)s:%(name)s): %(message)s"
12 |   error:
13 |     format: "<PID %(process)d:%(processName)s> [%(name)s:%(funcName)s():%(lineno)d]: %(message)s"
14 | 
15 | handlers:
16 |   console:
17 |     class: logging.StreamHandler
18 |     formatter: simple
19 |     filters: [level_char]
20 |     stream: ext://sys.stderr
21 | 
22 |   error_file:
23 |     # Custom handler necessary to fix issues that can occur in Windows.
24 |     (): mwcp.utils.logutil.MPRotatingFileHandler
25 |     level: WARNING
26 |     formatter: error
27 |     filename: "errors.log"
28 |     maxBytes: 10485760 # 10MB
29 |     backupCount: 3
30 |     encoding: utf8
31 | 
32 |   null_handler:
33 |     class: logging.NullHandler
34 | 
35 |   mwcp_server:
36 |     (): mwcp.utils.logutil.ListHandler
37 |     level: INFO
38 |     filters: [level_char]
39 |     formatter: simple
40 |     entries: 1000
41 | 
42 | root:
43 |   level: INFO
44 |   handlers: [console, error_file, mwcp_server]
45 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | This project constitutes a work of the United States Government and is not subject to domestic copyright protection under 17 USC § 105.
2 | 
3 | However, because the project utilizes code licensed from contributors and other third parties, it therefore is licensed under the MIT License. http://opensource.org/licenses/mit-license.php. Under that license, permission is granted free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the conditions that any appropriate copyright notices and this permission notice are included in all copies or substantial portions of the Software.
4 | 
5 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
6 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_string_report/strings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "type": "string_report",
 3 |     "tags": [],
 4 |     "file": {
 5 |         "type": "file",
 6 |         "tags": [],
 7 |         "name": "input_file.bin",
 8 |         "description": null,
 9 |         "md5": "1e50210a0202497fb79bc38b6ade6c34",
10 |         "sha1": "baf34551fecb48acc3da868eb85e1b6dac9de356",
11 |         "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
12 |         "architecture": null,
13 |         "compile_time": null,
14 |         "file_path": "C:/input_file.bin",
15 |         "data": null,
16 |         "derivation": null
17 |     },
18 |     "strings": [
19 |         {
20 |             "type": "decoded_string",
21 |             "tags": [],
22 |             "value": "hello",
23 |             "encryption_key": null
24 |         },
25 |         {
26 |             "type": "decoded_string",
27 |             "tags": [],
28 |             "value": "world",
29 |             "encryption_key": {
30 |                 "type": "encryption_key",
31 |                 "tags": [],
32 |                 "key": "3q2+7w==",
33 |                 "algorithm": null,
34 |                 "mode": null,
35 |                 "iv": null,
36 |                 "secret": null,
37 |                 "key_derivation": null
38 |             }
39 |         }
40 |     ]
41 | }


--------------------------------------------------------------------------------
/mwcp/parser_config.yml:
--------------------------------------------------------------------------------
 1 | Archive:
 2 |   description: Archive file types
 3 |   author: DC3
 4 |   parsers:
 5 |     - .Zip
 6 |     - .Gzip
 7 | 
 8 | Decoy:
 9 |   description: Generic file types described as a decoy file
10 |   author: DC3
11 |   parsers:
12 |     - .DOC
13 |     - .PDF
14 |     - .RTF
15 |     - .JPG
16 |     - .DOCX
17 |     - .XLSX
18 |     - .PPTX
19 | 
20 | foo:
21 |   description: example parser that works on any file
22 |   author: DC3
23 |   parsers:
24 |     - .Foo
25 | 
26 | GenericDropper:
27 |   description: Generic Dropper
28 |   author: DC3
29 |   parsers:
30 |     - .Overlay
31 |     - .RSRC
32 | 
33 | ISO: .ImageFile
34 | 
35 | PDF: .Document
36 | 
37 | PowerShell: .Script
38 | 
39 | Python:
40 |   description: Python artifacts
41 |   author: DC3
42 |   parsers:
43 |     - .PyInstaller
44 | 
45 | Quarantined:
46 |   description: Anti-Virus Quarantined File
47 |   author: DC3
48 |   parsers:
49 |     - .McAfee
50 |     - .Defender
51 |     - .SymantecQB
52 |     - .SymantecSubSDK
53 |     - .AhnLab
54 |     - .Avast_AVG
55 | 
56 | RSA:
57 |   description: RSA artifacts
58 |   author: DC3
59 |   parsers:
60 |     - .DigitalCertificate
61 |     - .PrivateKey
62 | 
63 | VisualBasic:
64 |   description: VisualBasic Script
65 |   author: DC3
66 |   parsers:
67 |     - .EncodedASP  # must come first
68 |     - .VBE
69 |     - .VBScript
70 | 


--------------------------------------------------------------------------------
/mwcp/utils/construct/network.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Network constructs
 3 | """
 4 | 
 5 | from .core import *
 6 | 
 7 | 
 8 | class _MACAddressAdapter(Adapter):
 9 |     r"""
10 |     Adapter used to format a MAC address from a list of 6 bytes
11 | 
12 |     e.g.
13 |     >>> _MACAddressAdapter(Byte[6]).parse(b'\x00\x0c\x29\xd3\x91\xbc')
14 |     '00-0c-29-d3-91-bc'
15 |     """
16 |     def _encode(self, obj, context, path):
17 |         return list(map(chr, obj.split("-")))
18 | 
19 |     def _decode(self, obj, context, path):
20 |         return '{:02x}-{:02x}-{:02x}-{:02x}-{:02x}-{:02x}'.format(*obj)
21 | 
22 | 
23 | # A MacAddress parsed from single bytes.
24 | MacAddress = _MACAddressAdapter(Byte[6])
25 | 
26 | 
27 | class _IP4AddressAdapter(Adapter):
28 |     r"""
29 |     Adapter used to format a IP address from a list of four ints.
30 | 
31 |     e.g.
32 |     >>> _IP4AddressAdapter(Byte[4]).parse(b'\x01\x02\x03\x04')
33 |     '1.2.3.4'
34 |     >>> _IP4AddressAdapter(Int16ul[4]).parse(b'\x01\x00\x02\x00\x03\x00\x04\x00')
35 |     '1.2.3.4'
36 |     """
37 | 
38 |     def _encode(self, obj, context, path):
39 |         return list(map(int, obj.split('.')))
40 | 
41 |     def _decode(self, obj, context, path):
42 |         return '{0}.{1}.{2}.{3}'.format(*obj)
43 | 
44 | 
45 | # An IP4Address parsed from single bytes.
46 | IP4Address = _IP4AddressAdapter(Byte[4])
47 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_runner/SiblingDispatch.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Parsers for test_yara_runner_sibling_dispatch
 3 | """
 4 | 
 5 | from mwcp import Parser, FileObject
 6 | 
 7 | 
 8 | class Parent(Parser):
 9 |     DESCRIPTION = "Parent"
10 | 
11 |     @classmethod
12 |     def identify(cls, file_object):
13 |         return b"parent" in file_object.data
14 | 
15 |     def run(self):
16 |         self.dispatcher.add(FileObject(b"sibling 1"))
17 |         self.dispatcher.add(FileObject(b"sibling 2"))
18 | 
19 | 
20 | class Sibling1(Parser):
21 |     DESCRIPTION = "Sibling 1"
22 | 
23 |     @classmethod
24 |     def identify(cls, file_object):
25 |         return b"sibling 1" in file_object.data
26 | 
27 | 
28 | class Sibling2(Parser):
29 |     DESCRIPTION = "Sibling 2"
30 | 
31 |     @classmethod
32 |     def identify(cls, file_object):
33 |         return b"sibling 2" in file_object.data
34 | 
35 |     def run(self):
36 |         # Testing corner case where we dispatch a file that is a parent of an already processed sibling.
37 |         sibling = self.file_object.siblings[0]
38 |         assert sibling.description == "Sibling 1"  # sanity check
39 |         self.dispatcher.add(FileObject(b"grandchild"), parent=sibling)
40 | 
41 | 
42 | class Grandchild(Parser):
43 |     DESCRIPTION = "Grandchild"
44 | 
45 |     @classmethod
46 |     def identify(cls, file_object):
47 |         return b"grandchild" in file_object.data
48 | 


--------------------------------------------------------------------------------
/mwcp/config/config.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | # Path to logging configuration file.
 3 | LOG_CONFIG_PATH: ./log_config.yml
 4 | 
 5 | # Overwrites the directory containing JSON test case files for all parsers.
 6 | # If not set, the "tests" directory located within the root directory of the respective parser source
 7 | # will be used.
 8 | #TESTCASE_DIR: ~/mwcp_tests
 9 | 
10 | # Directory containing malware samples used for testing.
11 | #MALWARE_REPO: ~/malware_repo
12 | 
13 | # Optional extra parser directory to use along with registered parser extension.
14 | # This allows you so use a directory of parsers that aren't officially part of any python package.
15 | #PARSER_DIR: ~/mwcp_parsers
16 | 
17 | # Path to the parser_config.yml file to use for the provided extra parser directory above.
18 | # If this is not set, the "config" attribute in the __init__.py of the parser's directory is used instead.
19 | # This must be set if the parser directory doesn't have an __init__.py file!
20 | #PARSER_CONFIG_PATH: ~/mwcp_parsers/parser_config.yml
21 | 
22 | # Name (or path) of a default parser source to use if not explicitly defined.
23 | # If this is not set, all sources will be considered.
24 | # (This is useful for enforcing parsers of only a specific source to be used)
25 | #PARSER_SOURCE: acme
26 | 
27 | # Directory containing yara signatures.
28 | #YARA_REPO: ~/yara_repo
29 | 
30 | # Keep temporary directory created by FileObject.temp_path()
31 | # KEEP_TMP: false
32 | 


--------------------------------------------------------------------------------
/mwcp/tools/server/templates/upload.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block content %}
 4 |     <form action="/run_parser" method="post" enctype="multipart/form-data">
 5 |         <fieldset>
 6 |         <label for="upload-file">File</label>
 7 |         <input id="upload-file" type="file" name="data" required/>
 8 |         <label for="parser">Parser</label>
 9 |         <select id="parser" name="parser">
10 |         {% for parser in parsers %}
11 |             <option value="{{parser}}">{{parser}}</option>
12 |         {% endfor %}
13 |         </select>
14 |         <label for="options">Output Format</label>
15 |         <fieldset id="options">
16 |             <input id="request-text" type="radio" name="output" value="text" checked>
17 |             <label class="label-inline" for="request-text">Plain text</label>
18 |             <br />
19 |             <input id="request-json" type="radio" name="output" value="json">
20 |             <label class="label-inline" for="request-json">JSON</label>
21 |             <br />
22 |             <input id="request-zip" type="radio" name="output" value="zip">
23 |             <label class="label-inline" for="request-zip">ZIP</label>
24 |             <br />
25 |             <input id="request-stix" type="radio" name="output" value="stix">
26 |             <label class="label-inline" for="request-stix">STIX 2.1</label>
27 |         </fieldset>
28 |         <input type="hidden" id="highlight" name="highlight" value="True">
29 |         <input type="submit" value="Upload">
30 |         </fieldset>
31 |     </form>
32 | {% endblock %}


--------------------------------------------------------------------------------
/mwcp/tests/test_cli/parse.txt:
--------------------------------------------------------------------------------
 1 | ----- File: test.txt -----
 2 | Field         Value
 3 | ------------  ----------------------------------------------------------------
 4 | Parser        foo
 5 | File Path     test.txt
 6 | Description   Foo
 7 | Architecture
 8 | MD5           fb843efb2ffec987db12e72ca75c9ea2
 9 | SHA1          5e90c4c2be31a7a0be133b3dbb4846b0434bc2ab
10 | SHA256        fe5af8c641835c24f3bbc237a659814b96ed64d2898fae4cb3d2c0ac5161f5e9
11 | Compile Time
12 | 
13 | ---- Network ----
14 | Url               Protocol    Address
15 | ----------------  ----------  ---------
16 | http://127.0.0.1  http        127.0.0.1
17 | 
18 | ---- Socket ----
19 | Address
20 | ---------
21 | 127.0.0.1
22 | 
23 | ---- URL ----
24 | Url               Protocol
25 | ----------------  ----------
26 | http://127.0.0.1  http
27 | 
28 | ---- Residual Files ----
29 | Filename           Description          Derivation                  MD5                               Arch    Compile Time
30 | -----------------  -------------------  --------------------------  --------------------------------  ------  --------------
31 | fooconfigtest.txt  example output file  extracted and decompressed  5eb63bbbe01eeed093cb22bb8f5acdc3
32 | 
33 | ---- Logs ----
34 | [+] File test.txt identified as Foo.
35 | [+] size of inputfile is 23 bytes
36 | [+] test.txt dispatched residual file: fooconfigtest.txt
37 | [+] File fooconfigtest.txt described as example output file
38 | [+] operating on inputfile test.txt
39 | 
40 | ----- File Tree -----
41 | <test.txt (fb843efb2ffec987db12e72ca75c9ea2) : Foo>
42 | └── <fooconfigtest.txt (5eb63bbbe01eeed093cb22bb8f5acdc3) : example output file>
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_report_writer/report_foreign.md:
--------------------------------------------------------------------------------
 1 | # File: input_file.bin
 2 | | Field        | Value                                                            |
 3 | |:-------------|:-----------------------------------------------------------------|
 4 | | Parser       | FooParser                                                        |
 5 | | File Path    | C:/input_file.bin                                                |
 6 | | Description  | SuperMalware Implant                                             |
 7 | | Architecture |                                                                  |
 8 | | MD5          | 1e50210a0202497fb79bc38b6ade6c34                                 |
 9 | | SHA1         | baf34551fecb48acc3da868eb85e1b6dac9de356                         |
10 | | SHA256       | 1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee |
11 | | Compile Time |                                                                  |
12 | 
13 | ## Miscellaneous
14 | | Key    | Value                               |
15 | |:-------|:------------------------------------|
16 | | JAPAN  | ユーザー別サイト                    |
17 | | CHINA  | 简体中文                            |
18 | | KOREA  | 크로스 플랫폼으로                   |
19 | | ISRAEL | מדורים מבוקשים                      |
20 | | EGYPT  | أفضل البحوث                         |
21 | | RUSSIA | Десятую Международную               |
22 | | MATH   | ∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i) |
23 | | FRANCE | français langue étrangère           |
24 | | SPAIN  | mañana olé                          |
25 | 
26 | # File Tree
27 | ```
28 | <input_file.bin (1e50210a0202497fb79bc38b6ade6c34) : SuperMalware Implant>
29 | ```
30 | 
31 | 


--------------------------------------------------------------------------------
/mwcp/parsers/GenericDropper.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains parsers for common Dropper types containing embedded file(s) in plaintext
 3 | """
 4 | 
 5 | from mwcp import FileObject, Parser
 6 | from mwcp.utils import pefileutils
 7 | 
 8 | 
 9 | class Overlay(Parser):
10 |     DESCRIPTION = "Dropper (Overlay)"
11 | 
12 |     @classmethod
13 |     def identify(cls, file_object):
14 |         """
15 |         Validate input file is a PE and there is a pefile.PE object starting at the overlay.
16 |         """
17 |         if not file_object.pe:
18 |             return False
19 |         overlay = file_object.pe.get_overlay()
20 |         return overlay and pefileutils.obtain_pe(overlay)
21 | 
22 |     def run(self):
23 |         """
24 |         Extract PE file from overlay and add to dispatcher
25 |         """
26 |         overlay = self.file_object.pe.get_overlay()
27 |         self.dispatcher.add(FileObject(overlay))
28 | 
29 | 
30 | class RSRC(Parser):
31 |     DESCRIPTION = "Dropper (RSRC)"
32 | 
33 |     @classmethod
34 |     def identify(cls, file_object):
35 |         """
36 |         Validate a PE file is in the resources in plaintext
37 |         """
38 |         return (
39 |             file_object.pe
40 |             and any(pefileutils.obtain_pe(rsrc.data) for rsrc in file_object.resources)
41 |         )
42 | 
43 |     def run(self):
44 |         """
45 |         Extract embedded PE files from resources
46 | 
47 |         :return:
48 |         """
49 |         for rsrc in self.file_object.resources:
50 |             file = FileObject(rsrc.data, def_stub=rsrc.fname_stub)
51 |             if file.pe:
52 |                 self.logger.info(f"PE file identified in resource {rsrc.rsrc_entry}")
53 |                 self.dispatcher.add(file)
54 | 


--------------------------------------------------------------------------------
/mwcp/utils/multi_proc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper methods for setting up multiprocessing workers with logging capabilities
 3 | """
 4 | 
 5 | import logging
 6 | import multiprocessing as mp
 7 | import multiprocessing.pool
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | from mwcp import registry
12 | from mwcp.utils import logutil
13 | 
14 | 
15 | def initializer(parser_sources, default_source):
16 |     """Initializer function that runs at the beginning of each process creation."""
17 |     registry._sources = parser_sources  # Propagate registered parser information.
18 |     registry._default_source = default_source
19 | 
20 | 
21 | class TProcess(mp.Process):
22 |     """
23 |     Slighted modified subclass of :class:`multiprocessing.Process`.
24 | 
25 |     Use this in place of ``Process`` to enable logging in the spawned process.
26 |     """
27 | 
28 |     def __init__(self, group=None, target=None, name=None, args=(), kwargs=None):
29 |         kwargs = kwargs or {}
30 |         # NOTE: Forcing group to be None since BaseProcess asserts it to be None.
31 |         super(TProcess, self).__init__(group=None, target=target, name=name, args=args, kwargs=kwargs)
32 |         self.queue = logutil.mp_queue
33 | 
34 |     def run(self):
35 |         logutil.setup_logging(queue=self.queue)
36 |         logger.debug("Setup logger in {}".format(mp.current_process().name))
37 |         super(TProcess, self).run()
38 | 
39 | 
40 | class TPool(mp.pool.Pool):
41 |     """
42 |     Version of :class:`multiprocessing.pool.Pool` that uses :class:`TProcess`.
43 |     """
44 | 
45 |     Process = TProcess
46 | 
47 |     def __init__(self, processes=None, maxtasksperchild=None):
48 |         """Overwrite to add initializer."""
49 |         super(TPool, self).__init__(
50 |             processes=processes,
51 |             maxtasksperchild=maxtasksperchild,
52 |             initializer=initializer,
53 |             initargs=(registry._sources, registry._default_source),
54 |         )
55 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_report_writer/report_foreign.html:
--------------------------------------------------------------------------------
 1 | <h1>File: input_file.bin</h1>
 2 | <table>
 3 | <thead>
 4 | <tr><th>Field       </th><th>Value                                                           </th></tr>
 5 | </thead>
 6 | <tbody>
 7 | <tr><td>Parser      </td><td>FooParser                                                       </td></tr>
 8 | <tr><td>File Path   </td><td>C:/input_file.bin                                               </td></tr>
 9 | <tr><td>Description </td><td>SuperMalware Implant                                            </td></tr>
10 | <tr><td>Architecture</td><td>                                                                </td></tr>
11 | <tr><td>MD5         </td><td>1e50210a0202497fb79bc38b6ade6c34                                </td></tr>
12 | <tr><td>SHA1        </td><td>baf34551fecb48acc3da868eb85e1b6dac9de356                        </td></tr>
13 | <tr><td>SHA256      </td><td>1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee</td></tr>
14 | <tr><td>Compile Time</td><td>                                                                </td></tr>
15 | </tbody>
16 | </table>
17 | 
18 | <h2>Miscellaneous</h2>
19 | <table>
20 | <thead>
21 | <tr><th>Key   </th><th>Value                              </th></tr>
22 | </thead>
23 | <tbody>
24 | <tr><td>JAPAN </td><td>ユーザー別サイト                   </td></tr>
25 | <tr><td>CHINA </td><td>简体中文                           </td></tr>
26 | <tr><td>KOREA </td><td>크로스 플랫폼으로                  </td></tr>
27 | <tr><td>ISRAEL</td><td>מדורים מבוקשים                     </td></tr>
28 | <tr><td>EGYPT </td><td>أفضل البحوث                        </td></tr>
29 | <tr><td>RUSSIA</td><td>Десятую Международную              </td></tr>
30 | <tr><td>MATH  </td><td>∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i)</td></tr>
31 | <tr><td>FRANCE</td><td>français langue étrangère          </td></tr>
32 | <tr><td>SPAIN </td><td>mañana olé                         </td></tr>
33 | </tbody>
34 | </table>
35 | 
36 | <h1>File Tree</h1>
37 | <pre>
38 | &lt;input_file.bin (1e50210a0202497fb79bc38b6ade6c34) : SuperMalware Implant&gt;
39 | </pre>
40 | 
41 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_testing.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import mwcp
 3 | from mwcp import testing
 4 | 
 5 | 
 6 | def test_get_malware_repo_path(tmp_path):
 7 |     """Tests generating malware repo path."""
 8 |     malware_repo = tmp_path / "malware_repo"
 9 |     malware_repo.mkdir()
10 |     mwcp.config["MALWARE_REPO"] = str(malware_repo)
11 | 
12 |     test_file = tmp_path / "test.txt"
13 |     test_file.write_bytes(b"This is some test data!")
14 |     testing.add_to_malware_repo(test_file)
15 | 
16 |     expected_path = malware_repo / "fb84" / "fb843efb2ffec987db12e72ca75c9ea2"
17 | 
18 |     # Test with hashing a file.
19 |     sample_path = testing.get_path_in_malware_repo(test_file)
20 |     assert sample_path == expected_path
21 | 
22 |     # Test with md5
23 |     sample_path = testing.get_path_in_malware_repo(md5="fb843efb2ffec987db12e72ca75c9ea2")
24 |     assert sample_path == expected_path
25 | 
26 |     # Test with partial md5
27 |     sample_path = testing.get_path_in_malware_repo(md5="fb843e")
28 |     assert sample_path == expected_path
29 | 
30 | 
31 | def test_add_to_malware_repo(tmp_path):
32 |     """Tests adding a file to the malware repo."""
33 |     malware_repo = tmp_path / "malware_repo"
34 |     malware_repo.mkdir()
35 |     test_file = tmp_path / "test.txt"
36 |     test_file.write_bytes(b"This is some test data!")
37 | 
38 |     mwcp.config["MALWARE_REPO"] = str(malware_repo)
39 |     sample_path = testing.add_to_malware_repo(test_file)
40 |     expected_sample_path = malware_repo / "fb84" / "fb843efb2ffec987db12e72ca75c9ea2"
41 |     assert sample_path == expected_sample_path
42 |     assert expected_sample_path.exists()
43 |     assert expected_sample_path.read_bytes() == test_file.read_bytes()
44 | 
45 | 
46 | def test_iter_md5s():
47 |     """Tests obtaining md5s for a parser based on test cases"""
48 |     mwcp.register_entry_points()
49 |     mwcp.config["TESTCASE_DIR"] = None  # need to clear any previously set testcase_dir from a previous unit test.
50 |     assert list(testing.iter_md5s("foo")) == ["f144899b86766688991c5d0d10902f4a"]
51 |     assert list(testing.iter_md5s("bogus")) == []
52 | 


--------------------------------------------------------------------------------
/mwcp/parsers/PDF.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PDF
 3 | """
 4 | import re
 5 | 
 6 | from mwcp import Parser, metadata
 7 | 
 8 | 
 9 | class Document(Parser):
10 |     """
11 |     Parses PDF file with some basic metadata extraction.
12 |     """
13 |     DESCRIPTION = "PDF Document"
14 |     AUTHOR = "DC3"
15 | 
16 |     IGNORE_DOMAINS = [
17 |         b"www.w3.org",
18 |         b"ns.adobe.com",
19 |         b"purl.org",
20 |     ]
21 | 
22 |     # 2-6 character protocol -> :// -> Up to 253 alphanumeric, "-", "_", or "." characters, (which should include all
23 |     # valid domains or IP addresses) -> Nothing, or a port or "/" -> (For the port or "/") any non-whitespace characters.
24 |     URL_RE = re.compile(
25 |         b"[a-zA-Z]{2,6}"  # scheme
26 |         b"://"
27 |         b"([\w._\-]+(:[\w._\-]+)?@)?"  # user info
28 |         b"[\w._\-]{4,253}"  # host
29 |         b"(:[\d]{1,5})?"  # port
30 |         b"(/[\w._\-~=%]*)*"  # path
31 |         b"(\?[\w._\-~=&,%]+)?"  # query
32 |         b"(#[\w._\-~]+)?"  # fragment
33 |     )
34 |     EMAIL_RE = re.compile(b"[\w.+-]+@([A-Za-z0-9](|[\w-]{0,61}[A-Za-z0-9])\.)+[A-Za-z]{2,6}")
35 | 
36 |     @classmethod
37 |     def identify(cls, file_object):
38 |         return file_object.data.startswith(b"%PDF") and (
39 |             cls.URL_RE.search(file_object.data)
40 |             or cls.EMAIL_RE.search(file_object.data)
41 |         )
42 | 
43 |     def extract_urls(self):
44 |         """
45 |         Statically extract URLs embedded in the PDF.
46 |         """
47 |         for match in self.URL_RE.finditer(self.file_object.data):
48 |             url = match.group()
49 |             if not any(domain in url for domain in self.IGNORE_DOMAINS):
50 |                 self.report.add(metadata.URL(url))
51 | 
52 |     def extract_emails(self):
53 |         """
54 |         Statically extract URLs embedded in the PDF.
55 |         """
56 |         for match in self.EMAIL_RE.finditer(self.file_object.data):
57 |             self.report.add(metadata.EmailAddress(match.group()))
58 | 
59 |     def run(self):
60 |         self.extract_urls()
61 |         self.extract_emails()
62 | 


--------------------------------------------------------------------------------
/mwcp/parsers/PowerShell.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import re
 3 | from typing import List
 4 | 
 5 | from mwcp import Parser, metadata
 6 | 
 7 | 
 8 | class Script(Parser):
 9 |     """
10 |     Generic parser for pulling suspect URLs from a Powershell script
11 |     """
12 |     DESCRIPTION = "PowerShell Script"
13 |     AUTHOR = "DC3"
14 | 
15 |     INVALID_DOMAINS = [
16 |         "ipify.org",
17 |         "whatismyipaddress.com"
18 |     ]
19 | 
20 |     URL_REGEX = re.compile(
21 |         (
22 |             # HTTP/HTTPS.
23 |             b"(https?://)"
24 |             b"((["
25 |             # IP address.
26 |             b"(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\."
27 |             b"(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\."
28 |             b"(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\."
29 |             b"(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])]|"
30 |             # Or domain name.
31 |             b"[a-zA-Z0-9.-]+)"
32 |             # Optional port.
33 |             b"(:\\d+)?"
34 |             # URI.
35 |             b"(/[()a-zA-Z0-9_:%=/.-]*)?"
36 |         )
37 |     )
38 | 
39 |     @classmethod
40 |     def identify(cls, file_object):
41 |         return file_object.name.endswith(".ps1")
42 | 
43 |     def extract_urls(self, data: bytes) -> List[str]:
44 |         """
45 |         Extract URLs using regular expression.
46 | 
47 |         :param data: Data to search for URLs in
48 |         :return: List of extracted URLs (with duplicates removed)
49 |         :rtype: list[str]
50 |         """
51 |         urls = set()
52 |         for match in self.URL_REGEX.finditer(data):
53 |             url = match.group().decode()
54 |             if not any(invalid in url for invalid in self.INVALID_DOMAINS):
55 |                 urls.add(url)
56 |         return list(urls)
57 | 
58 |     def run(self):
59 |         """
60 |         Presently only search for extract-able URLs.
61 |         """
62 |         # General report of URLS.
63 |         urls = self.extract_urls(self.file_object.data)
64 |         for url in urls:
65 |             self.report.add(metadata.URL(url))
66 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_stix.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests STIX Reports.
 3 | """
 4 | import logging
 5 | import json
 6 | 
 7 | import pytest
 8 | 
 9 | import mwcp
10 | 
11 | 
12 | class CheatUUID:
13 |     """
14 |     Used to provide a mock that overrides the uuid.uuid4 function with something that is deterministic
15 |     """
16 |     def __init__(self):
17 |         self.counter = 0
18 | 
19 |     def uuid4(self):
20 |         self.counter += 1
21 |         return "00000000-0000-4006-9000-{:012d}".format(self.counter)
22 | 
23 | 
24 | @pytest.fixture
25 | def filled_report(report, metadata_items):
26 |     """
27 |     Provides a report filled with metadata examples seen above.
28 |     """
29 |     logger = logging.getLogger("test_report")
30 | 
31 |     with report:
32 |         report.input_file.description = "SuperMalware Implant"
33 | 
34 |         for item in metadata_items:
35 |             report.add(item)
36 | 
37 |         logger.info("Test info log")
38 |         logger.error("Test error log")
39 |         logger.debug("Test debug log")
40 | 
41 |         report.add_tag("test", "tagging")
42 | 
43 |     return report
44 | 
45 | 
46 | def test_report_stix(datadir, filled_report, mocker):
47 |     # Instead of creating UUIDv4s we will auto increment them to allow easier compares
48 |     uuid_generator = CheatUUID()
49 |     mocker.patch(
50 |         'uuid.uuid4',
51 |         uuid_generator.uuid4
52 |     )
53 | 
54 |     # Writer must be initialized with a fixed time so we can easily compare results
55 |     # TODO: Look into using freezegun library.
56 |     actual = filled_report.as_stix(fixed_timestamp="2022-01-01T07:32:00.000Z")
57 |     print(actual)
58 |     actual = json.loads(actual)
59 |     with open(datadir / "report.json", "rt") as input_file:
60 |         expected = json.load(input_file)
61 | 
62 |     # sometimes the ordering of sco_refs will change so this cleans them up
63 |     for obj in expected["objects"]:
64 |         # always keep the current version of MWCP for the expected result
65 |         if obj["type"] == "malware-analysis":
66 |             obj["version"] = mwcp.__version__
67 | 
68 |     assert actual == expected
69 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_cli/csv_cli.csv:
--------------------------------------------------------------------------------
 1 | MD5,MetaIndex,Category,Field,Value
 2 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,parser,foo
 3 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,filename,test.txt
 4 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,description,Foo
 5 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,architecture,
 6 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,compile_time,
 7 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,derivation,
 8 | fb843efb2ffec987db12e72ca75c9ea2,1,URL,url,http://127.0.0.1
 9 | fb843efb2ffec987db12e72ca75c9ea2,1,URL,path,
10 | fb843efb2ffec987db12e72ca75c9ea2,1,URL,query,
11 | fb843efb2ffec987db12e72ca75c9ea2,1,URL,protocol,http
12 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,url.url,http://127.0.0.1
13 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,path,
14 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,query,
15 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,protocol,http
16 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,address,127.0.0.1
17 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,port,
18 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,network_protocol,
19 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,listen,
20 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,credential,
21 | fb843efb2ffec987db12e72ca75c9ea2,3,Socket,address,127.0.0.1
22 | fb843efb2ffec987db12e72ca75c9ea2,3,Socket,port,
23 | fb843efb2ffec987db12e72ca75c9ea2,3,Socket,network_protocol,
24 | fb843efb2ffec987db12e72ca75c9ea2,3,Socket,listen,
25 | fb843efb2ffec987db12e72ca75c9ea2,4,File,name,fooconfigtest.txt
26 | fb843efb2ffec987db12e72ca75c9ea2,4,File,description,example output file
27 | fb843efb2ffec987db12e72ca75c9ea2,4,File,md5,5eb63bbbe01eeed093cb22bb8f5acdc3
28 | fb843efb2ffec987db12e72ca75c9ea2,4,File,sha1,2aae6c35c94fcfb415dbe95f408b9ce91ee846ed
29 | fb843efb2ffec987db12e72ca75c9ea2,4,File,sha256,b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9
30 | fb843efb2ffec987db12e72ca75c9ea2,4,File,architecture,
31 | fb843efb2ffec987db12e72ca75c9ea2,4,File,compile_time,
32 | fb843efb2ffec987db12e72ca75c9ea2,4,File,file_path,
33 | fb843efb2ffec987db12e72ca75c9ea2,4,File,data,
34 | fb843efb2ffec987db12e72ca75c9ea2,4,File,derivation,extracted and decompressed
35 | 
36 | 


--------------------------------------------------------------------------------
/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
 1 | name: Build Pipeline
 2 | 
 3 | on: [ push ]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         # This workflow can be matrixed against multiple Python versions if desired. eg. [3.7, 3.8, 3.9, "3.10"]
11 |         python-version: [ "3.11" ]
12 | 
13 |     steps:
14 |       # Get the code from the repository to be linted, packaged, and pushed
15 |       - name: Get Repo
16 |         uses: actions/checkout@v3
17 | 
18 |       # Setup the Python environment
19 |       - name: Set up Python ${{ matrix.python-version }}
20 |         uses: actions/setup-python@v4
21 |         with:
22 |           python-version: ${{ matrix.python-version }}
23 | 
24 |       # Install the packages for linting and building the package
25 |       - name: Prepare Build Environment
26 |         run: |
27 |           pip install -q flake8 twine wheel nox
28 |       
29 |       # Lint the Python code to check for syntax errors or code smell
30 |       - name: Lint with Flake8
31 |         run: |
32 |           # stop the build if there are Python syntax errors or undefined names
33 |           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
34 |           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
35 |           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
36 |       
37 |       # Build the distributable package as well as the release patch
38 |       - name: Build Objects
39 |         if: startsWith(github.ref, 'refs/tags')
40 |         run: nox -s build
41 | 
42 |       # Ensure the objects were packaged correctly and there wasn't an issue with
43 |       # the compilation or packaging process.
44 |       - name: Check Objects
45 |         if: startsWith(github.ref, 'refs/tags')
46 |         run: twine check dist/*
47 | 
48 |       # If this commit is the result of a Git tag, push the wheel and tar packages
49 |       # to the PyPi registry
50 |       - name: Publish to PyPI
51 |         if: startsWith(github.ref, 'refs/tags')
52 |         run: twine upload --repository-url https://upload.pypi.org/legacy/ -u __token__ -p ${{ secrets.PYPI_API_TOKEN }} --skip-existing --verbose dist/*


--------------------------------------------------------------------------------
/mwcp/parsers/Decoy.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains parsers for common Decoy documents.
 3 | """
 4 | 
 5 | from mwcp import Parser
 6 | 
 7 | 
 8 | class Base(Parser):
 9 |     # Must be implemented in child class
10 |     HEADER = None
11 |     EXT = None
12 | 
13 |     @classmethod
14 |     def identify(cls, file_object):
15 |         """
16 |         Validate the file starts with the file header
17 |         """
18 |         if not cls.HEADER:
19 |             raise NotImplementedError("[*] HEADER was not set.")
20 |         return file_object.data.startswith(cls.HEADER)
21 | 
22 |     def run(self):
23 |         # Update file extension if unknown or generic .bin
24 |         if self.EXT and self.file_object.ext in ("", ".bin"):
25 |             self.file_object.ext = self.EXT
26 | 
27 | 
28 | class DOC(Base):
29 |     DESCRIPTION = "Decoy Document (.doc)"
30 | 
31 |     HEADER = b"\xd0\xcf\x11\xe0"
32 |     EXT = ".doc"
33 | 
34 | 
35 | class PDF(Base):
36 |     DESCRIPTION = "Decoy Document (.pdf)"
37 | 
38 |     HEADER = b"%PDF-"
39 |     EXT = ".pdf"
40 | 
41 | 
42 | class RTF(Base):
43 |     DESCRIPTION = "Decoy Document (.rtf)"
44 | 
45 |     HEADER = b"{\\rt"
46 |     EXT = ".rtf"
47 | 
48 | 
49 | class JPG(Base):
50 |     DESCRIPTION = "Decoy (.jpg)"
51 | 
52 |     HEADER = b"\xff\xd8\xff\xe0"
53 |     EXT = ".jpg"
54 | 
55 | 
56 | class XMLDocument(Base):
57 |     DESCRIPTION = "Decoy XML Document"
58 | 
59 |     HEADER = b"PK\x03\x04"
60 |     # Must be implemented by child class
61 |     RELS_PATH = None
62 | 
63 |     @classmethod
64 |     def identify(cls, file_object):
65 |         if not super().identify(file_object):
66 |             return False
67 |         if cls.RELS_PATH:
68 |             return cls.RELS_PATH in file_object.data
69 |         else:
70 |             return True
71 | 
72 | 
73 | class DOCX(XMLDocument):
74 |     DESCRIPTION = "Decoy Document (.docx)"
75 | 
76 |     EXT = ".docx"
77 |     RELS_PATH = b"word/_rels"
78 | 
79 | 
80 | class XLSX(XMLDocument):
81 |     DESCRIPTION = "Decoy Document (.xlsx)"
82 | 
83 |     EXT = ".xlsx"
84 |     RELS_PATH = b"xl/_rels"
85 | 
86 | 
87 | class PPTX(XMLDocument):
88 |     DESCRIPTION = "Decoy Document (.pptx)"
89 | 
90 |     EXT = ".pptx"
91 |     RELS_PATH = b"ppt/_rels"
92 | 


--------------------------------------------------------------------------------
/mwcp/utils/construct/datetime_.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Date/Time constructs
 3 | """
 4 | 
 5 | from __future__ import absolute_import
 6 | 
 7 | import datetime
 8 | 
 9 | from .core import *
10 | 
11 | 
12 | # TODO: Implement _encode.
13 | class _DateTimeDateDataAdapter(Adapter):
14 |     r"""
15 |     Adapter for a C# DateTime.dateData object to DateTime format. Obtain the DateTime.Ticks and the DateTime.Kind
16 |     property to format datetime.
17 | 
18 | 
19 |     >>> _DateTimeDateDataAdapter(Int64sl).parse(b'\x80\xb4N3\xd1\xd4\xd1H')
20 |     '2014-11-23 01:09:01 UTC'
21 |     """
22 |     def _decode(self, obj, context, path):
23 |         ticks = obj & 0x3fffffffffffffff
24 |         kind = (obj >> 62) & 0x03
25 |         converted_ticks = datetime.datetime(1, 1, 1) + datetime.timedelta(microseconds=ticks / 10)
26 |         if kind == 0:
27 |             return converted_ticks.strftime("%Y-%m-%d %H:%M:%S")
28 |         elif kind == 1:
29 |             return converted_ticks.strftime("%Y-%m-%d %H:%M:%S UTC")
30 |         elif kind == 2:
31 |             return converted_ticks.strftime("%Y-%m-%d %H:%M:%S Local")
32 | 
33 | 
34 | DateTimeDateData = _DateTimeDateDataAdapter(Int64sl)
35 | 
36 | 
37 | # TODO: Implement _encode
38 | class EpochTimeAdapter(Adapter):
39 |     r"""
40 |     Adapter to convert time_t, EpochTime, to an isoformat
41 | 
42 |     >>> EpochTimeAdapter(construct.Int32ul, tz=datetime.timezone.utc).parse(b'\xff\x93\x37\x57')
43 |     '2016-05-14T21:09:19+00:00'
44 |     >>> EpochTimeAdapter(construct.Int32ul).parse(b'\xff\x93\x37\x57')
45 |     '2016-05-14T17:09:19'
46 |     """
47 |     def __init__(self, subcon, tz=None):
48 |         """
49 |         :param tz: Optional timezone object, default is localtime
50 |         :param subcon: subcon to parse EpochTime.
51 |         """
52 |         super().__init__(subcon)
53 |         self._tz = tz
54 | 
55 |     def _decode(self, obj, context, path):
56 |         try:
57 |             return datetime.datetime.fromtimestamp(obj, tz=self._tz).isoformat()
58 |         except OSError as e:
59 |             raise construct.ConstructError(e)
60 | 
61 | 
62 | # Add common helpers
63 | EpochTime = EpochTimeAdapter(Int32ul)
64 | EpochTimeUTC = EpochTimeAdapter(construct.Int32ul, tz=datetime.timezone.utc)
65 | 


--------------------------------------------------------------------------------
/mwcp/parsers/foo.py:
--------------------------------------------------------------------------------
 1 | """This is an example parser used to show the different methods of adding data to the reporter."""
 2 | import logging
 3 | import os
 4 | 
 5 | from mwcp import Parser, FileObject, metadata
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class Foo(Parser):
11 |     DESCRIPTION = "Foo"
12 | 
13 |     @classmethod
14 |     def identify(cls, file_object):
15 |         # identifies if the parser can parse the given file.
16 |         # checking filename to avoid infinite loop.
17 |         return file_object.name != "fooconfigtest.txt"
18 | 
19 |     def run(self):
20 |         # retrieve input file
21 |         input_file = self.file_object
22 | 
23 |         # Pull external information from user or other parsers through knowledge_base
24 |         secret = self.knowledge_base.get("secret", None)
25 |         if secret:
26 |             self.report.add(metadata.Other("secret_using_external_knowledge", secret + "!"))
27 |         # Pass in our own information for other parsers.
28 |         self.knowledge_base["encryption_key"] = b"\xde\xad\xbe\xef"
29 | 
30 |         # standardized metadata
31 |         self.report.add(metadata.URL("http://127.0.0.1"))
32 | 
33 |         # demonstrate access to sample
34 |         logger.info(f"size of inputfile is {len(input_file.data)} bytes")
35 | 
36 |         # other, non-standardized metadata
37 |         # also demonstrate use of pefile object
38 |         if input_file.pe:
39 |             self.report.add(metadata.Other(
40 |                 "section0", input_file.pe.sections[0].Name.rstrip(b"\x00")
41 |             ))
42 | 
43 |         # Dispatch residual files to also be processed.
44 |         self.dispatcher.add(FileObject(
45 |             b"hello world",
46 |             file_name="fooconfigtest.txt",
47 |             description="example output file",
48 |             derivation="extracted and decompressed",
49 |         ))
50 |         #  Alternatively we can manually report a residual file without being processed.
51 |         if False:
52 |             self.report.add(metadata.File(
53 |                 "fooconfigtest.txt", description="example output file", data=b"hello world"
54 |             ))
55 | 
56 |         # demonstrate use of filename()
57 |         logger.info(f"operating on inputfile {input_file.name}")
58 | 


--------------------------------------------------------------------------------
/mwcp/parsers/Archive.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Parsers for archive type files.
 3 | """
 4 | 
 5 | import io
 6 | import ntpath
 7 | import pathlib
 8 | import tarfile
 9 | import zipfile
10 | 
11 | from mwcp import Parser, FileObject
12 | 
13 | 
14 | class Zip(Parser):
15 |     DESCRIPTION = "Zip Archive File"
16 | 
17 |     ZIP_HEADER = b"PK"
18 | 
19 |     @classmethod
20 |     def identify(cls, file_object):
21 |         """
22 |         Identify as a Zip archive file.
23 |         """
24 |         return file_object.data.startswith(cls.ZIP_HEADER)
25 | 
26 |     def parse_extracted(self, file_name, file_data):
27 |         self.dispatcher.add(FileObject(file_data, file_name=file_name))
28 | 
29 |     def run(self):
30 |         """
31 |         Use the zipfile Python library to extract the contents of a Zip archive.
32 |         """
33 |         self.logger.info("Attempting to extract files from Zip archive.")
34 |         try:
35 |             z = zipfile.ZipFile(io.BytesIO(self.file_object.data))
36 |             for obj in z.infolist():
37 |                 file_data = z.read(obj)
38 |                 file_name = ntpath.basename(obj.filename)
39 |                 # see if there is data, before passing to the parse_extracted function
40 |                 if not len(file_data):
41 |                     continue
42 |                 self.parse_extracted(file_name, file_data)
43 |         except IOError:
44 |             self.logger.exception("Failed to extract Zip archive.")
45 |         except zipfile.BadZipfile:
46 |             self.logger.exception("Invalid zip file")
47 | 
48 | 
49 | class Gzip(Parser):
50 |     DESCRIPTION = "Gzip Archive file"
51 | 
52 |     HEADER = b"\x1F\x8B"
53 | 
54 |     @classmethod
55 |     def identify(cls, file_object):
56 |         return file_object.data.startswith(cls.HEADER)
57 | 
58 |     def parse_extracted(self, file_name, file_data):
59 |         self.dispatcher.add(FileObject(file_data, file_name=file_name))
60 | 
61 |     def run(self):
62 |         with self.file_object.open() as fo:
63 |             with tarfile.open(fileobj=fo, mode="r:gz") as tar:
64 |                 for member in tar.getmembers():
65 |                     if member.isfile():
66 |                         data = tar.extractfile(member).read()
67 |                         name = pathlib.Path(member.name).name
68 |                         self.parse_extracted(name, data)
69 | 


--------------------------------------------------------------------------------
/mwcp/utils/construct/MIPS.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper constructs for parsing the MIPS instruction set.
 3 | This module will be imported along with 'from mwcp.utils import construct'
 4 | and accessible from the submodule "MIPS". (e.g. construct.MIPS.lw)
 5 | 
 6 | reference: github.com/MIPT-ILab/mipt-mips/wiki/MIPS-Instruction-Set
 7 | """
 8 | 
 9 | from .core import *
10 | from .core import this
11 | 
12 | 
13 | _REGISTERS = {
14 |     '$zero': 0,
15 |     '$at': 1,
16 |     '$v0': 2, '$v1': 3,
17 |     '$a0': 4, '$a1': 5, '$a2': 6, '$a3': 7,
18 |     '$t0': 8, '$t1': 9, '$t2': 10, '$t3': 11, '$t4': 12, '$t5': 13, '$t6': 14, '$t7': 15,
19 |     '$s0': 16, '$s1': 17, '$s2': 18, '$s3': 19, '$s4': 20, '$s5': 21, '$s6': 22, '$s7': 23,
20 |     '$t8': 24, '$t9': 25,
21 |     '$k0': 26, '$k1': 27,
22 |     '$gp': 28, '$sp': 29, '$fp': 30, '$ra': 31,
23 | }
24 | _Register = Enum(BitsInteger(5), **_REGISTERS)
25 | 
26 | # I-type instruction
27 | _I_inst = Struct(
28 |     *BitStruct(
29 |         'opcode' / Enum(
30 |             BitsInteger(6),
31 |             # NOTE: Some opcode values are reserved for other instruction formats
32 |             # and we should let construct fail if it sees one.
33 |             j=0x02, jal=0x03, beq=0x04, bne=0x05, blez=0x06, bgtz=0x07,
34 |             addi=0x08, addiu=0x09, slti=0x0A, sltiu=0x0B, andi=0x0C, ori=0x0D, xori=0x0E, lui=0x0F,
35 |             beql=0x14, bnel=0x15, blezl=0x16, bgtzl=0x17,
36 |             daddi=0x18, daddiu=0x19, ldl=0x1A, ldr=0x1B, jalx=0x1D,
37 |             lb=0x20, lh=0x21, lwl=0x22, lw=0x23, lbu=0x24, lhu=0x25, lwr=0x26, lwu=0x27,
38 |             sb=0x28, sh=0x29, swl=0x2A, sw=0x2B, sdl=0x2C, sdr=0x2D, swr=0x2E, cache=0x2F,
39 |             ll=0x30, lwc1=0x31, lwc2=0x32, pref=0x33, lld=0x34, ldc1=0x35, ldc2=0x36, ld=0x37,
40 |             sc=0x38, swc1=0x39, swc2=0x3A, scd=0x3C, sdc1=0x3D, sdc2=0x3E, sd=0x3F,
41 |         ),
42 |         'src_register' / _Register,
43 |         'target_register' / _Register,
44 |         # 'imm_constant' / construct.BitsInteger(16)
45 |     ),
46 |     # Need to move immediate outside of BitStruct to create signed number.
47 |     # (Luckly, the constant is byte aligned)
48 |     'imm_constant' / Int16sb
49 | )
50 | 
51 | 
52 | lw = ExprValidator(_I_inst, this.opcode == 'lw')
53 | 
54 | # TODO: Create a MIPS version of ELFPointer that will account for the Global Offset Table and $gp register
55 | # from extracted "la" psuedo instructions.
56 | 


--------------------------------------------------------------------------------
/mwcp/stix/objects.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This provides helper objects that can be used to generate STIX content
 3 | """
 4 | from __future__ import annotations
 5 | 
 6 | 
 7 | class STIXResult:
 8 |     """
 9 |     Provides a means to return STIX 2.1 content
10 | 
11 |     :var linked_stix: An array of STIX objects that should be linked to a parent malware analysis object
12 |     :var unlinked_stix: An array of STIX objects that should not be linked to a parent malware analysis object.
13 |          This can include relationship objects, objects connected by relationship objects,
14 |          and objects with embedded references like Notes
15 |     :var note_content: The content of the note which will be attached to the STIX file object being analyzed by the
16 |         malware analysis
17 |     :var note_labels: The labels of the note which will be attached to the STIX file object being analyzed by the
18 |         malware analysis
19 |     """
20 | 
21 |     def __init__(self, note_content: str = "", fixed_timestamp: str = None):
22 |         self.linked_stix = []
23 |         self.unlinked_stix = []
24 |         self.note_content = note_content
25 |         self.note_labels = []
26 |         self.fixed_timestamp = fixed_timestamp
27 | 
28 |     def add_linked(self, stix_content):
29 |         self.linked_stix.append(stix_content)
30 | 
31 |     def add_unlinked(self, stix_content):
32 |         self.unlinked_stix.append(stix_content)
33 | 
34 |     def create_tag_note(self, metadata, stix_content):
35 |         note = metadata.as_stix_tags(stix_content, self.fixed_timestamp)
36 |         if note:
37 |             self.unlinked_stix.append(note)
38 | 
39 |     def merge(self, other: STIXResult):
40 |         self.linked_stix.extend(other.linked_stix)
41 |         self.unlinked_stix.extend(other.unlinked_stix)
42 | 
43 |         if self.note_content == "":
44 |             self.note_content = other.note_content
45 |         elif other.note_content != "":
46 |             self.note_content += "\n" + other.note_content
47 | 
48 |     def merge_ref(self, other: STIXResult):
49 |         """
50 |         A merge for when the target is a reference for the current object.
51 |         """
52 |         self.unlinked_stix.extend(other.linked_stix)
53 |         self.unlinked_stix.extend(other.unlinked_stix)
54 | 
55 |         if self.note_content == "":
56 |             self.note_content = other.note_content
57 |         elif other.note_content != "":
58 |             self.note_content += "\n" + other.note_content
59 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_disassembly/Sample.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sample parser that uses dragodis disassembly library.
 3 | (This is a conversion from the kordesii "Sample" parser.)
 4 | """
 5 | 
 6 | import dragodis
 7 | import rugosa
 8 | 
 9 | from mwcp import metadata, Parser
10 | 
11 | 
12 | class Implant(Parser):
13 |     DESCRIPTION = "Sample Implant"
14 | 
15 |     @classmethod
16 |     def identify(cls, file_object):
17 |         return file_object.md5 == "e1b6be6c0c2db8b3d4dca56062ca6301"
18 | 
19 |     @staticmethod
20 |     def xor_decrypt(key, enc_data):
21 |         return bytes((x ^ key) for x in enc_data)
22 | 
23 |     def find_strings(self, dis: dragodis.Disassembler):
24 |         """
25 |         Extracts and reports DecodedString objects for the parameters following xor encryption function:
26 | 
27 |             void encrypt(char *s, char key)
28 |             {
29 |                 while (*s)
30 |                     *s++ ^= key;
31 |             }
32 |         """
33 |         emulator = rugosa.Emulator(dis)
34 |         pattern = rugosa.re.compile(br"\x8b\x45\x08\x0f\xbe\x08")
35 |         for encrypt_func in pattern.find_functions(dis):
36 |             self.logger.info("Found XOR encrypt function at: 0x%x", encrypt_func.start)
37 |             for call_ea in encrypt_func.calls_to:
38 |                 self.logger.debug("Tracing 0x%08x", call_ea)
39 |                 # Extract arguments for call to xor function.
40 |                 context = emulator.context_at(call_ea)
41 |                 enc_str_ptr, key = context.get_function_arg_values()
42 | 
43 |                 enc_string_data = rugosa.get_terminated_bytes(dis, enc_str_ptr)
44 |                 dec_string_data = self.xor_decrypt(key, enc_string_data)
45 |                 string = rugosa.DecodedString(
46 |                     dec_data=dec_string_data,
47 |                     enc_data=enc_string_data,
48 |                     # data is encrypted in-place, so include string pointer as decoded source.
49 |                     dec_source=enc_str_ptr,
50 |                 )
51 |                 # Annotate underlying disassembler with decrypted data.
52 |                 string.patch(dis, rename=False)
53 | 
54 |                 # Report decoded string.
55 |                 self.report.add(metadata.DecodedString(
56 |                     str(string), encryption_key=metadata.EncryptionKey(bytes([key]), "xor")
57 |                 ))
58 | 
59 |     def run(self):
60 |         with self.file_object.disassembly(report=self.report) as dis:
61 |             self.find_strings(dis)
62 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_disassembly.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests components that use Dragodis disassembly.
 3 | """
 4 | 
 5 | import os
 6 | 
 7 | import pytest
 8 | 
 9 | import mwcp
10 | from mwcp import metadata
11 | from mwcp.tests.test_parsers import _test_parser
12 | 
13 | dragodis = pytest.importorskip("dragodis", reason="Dragodis not installed")
14 | 
15 | 
16 | @pytest.mark.parametrize("backend", ["ida", "ghidra"])
17 | def test_disassembly(datadir, backend):
18 |     """Tests basic disassembly"""
19 |     strings_exe = datadir / "strings.exe"
20 | 
21 |     input_file = mwcp.FileObject.from_path(strings_exe)
22 |     try:
23 |         with input_file.disassembly(backend) as dis:
24 |             insn = dis.get_instruction(0x401000)
25 |             assert insn.mnemonic == "push"
26 |     except dragodis.NotInstalledError as e:
27 |         pytest.skip(e)
28 | 
29 | 
30 | @pytest.mark.parametrize("backend", ["ida", "ghidra"])
31 | def test_file_object_disassembly(datadir, backend):
32 |     """Tests disassembler project file gets reported when using FileObject.disassembly()"""
33 |     strings_exe = datadir / "strings.exe"
34 | 
35 |     input_file = mwcp.FileObject.from_path(strings_exe)
36 |     report = mwcp.Report(input_file, "FooParser")
37 |     with report:
38 |         try:
39 |             with input_file.disassembly(backend, report=report) as dis:
40 |                 line = dis.get_line(0x401000)
41 |                 line.set_comment("test comment")
42 |         except dragodis.NotInstalledError as e:
43 |             pytest.skip(e)
44 |     # After we leave disassembly context, we should see the project file in the report.
45 |     files = report.get(metadata.File)
46 |     assert len(files) == 1
47 |     project_file = files[0]
48 |     assert project_file.data
49 |     if backend == "ida":
50 |         assert project_file.name == "strings.exe.idb"
51 |     else:
52 |         assert project_file.name == "strings.exe_ghidra.zip"
53 |     assert project_file.derivation == "supplemental"
54 | 
55 | 
56 | @pytest.mark.parametrize("backend", ["ida", "ghidra"])
57 | def test_Sample(pytestconfig, datadir, backend):
58 |     """Tests running the Sample parser."""
59 |     mwcp.register_parser_directory(str(datadir), source_name="test")
60 |     os.environ["DRAGODIS_DISASSEMBLER"] = backend
61 |     input_file_path = datadir / "strings.exe"
62 |     results_path = datadir / "strings.json"
63 | 
64 |     try:
65 |         _test_parser(pytestconfig, input_file_path, results_path)
66 |     except dragodis.NotInstalledError as e:
67 |         pytest.skip(e)
68 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | A framework for malware configuration parsers.
 4 | """
 5 | 
 6 | from setuptools import setup, find_namespace_packages
 7 | 
 8 | setup(
 9 |     name="mwcp",
10 |     author="DC3",
11 |     author_email="dc3.tsd@us.af.mil",
12 |     keywords="malware",
13 |     url="https://github.com/dod-cyber-crime-center/DC3-MWCP/",
14 |     packages=find_namespace_packages(),
15 |     include_package_data=True,
16 |     license='MIT',
17 |     classifiers=[
18 |         'Development Status :: 5 - Production/Stable',
19 |         'Intended Audience :: Developers',
20 |         'License :: OSI Approved :: MIT License',
21 |         'Programming Language :: Python :: 3',
22 |         'Programming Language :: Python :: 3.9',
23 |         'Programming Language :: Python :: 3.10',
24 |         'Programming Language :: Python :: 3.11',
25 |         'Programming Language :: Python :: 3.12',
26 |     ],
27 |     python_requires=">=3.9",
28 |     entry_points={
29 |         'console_scripts': [
30 |             'mwcp = mwcp.cli:main',
31 |             'poshdeob = mwcp.utils.poshdeob:main',
32 |             'mwcp_update_legacy_tests = mwcp.tools.update_legacy_tests:main',
33 |         ],
34 |         'mwcp.parsers': [
35 |             'dc3 = mwcp.parsers',
36 |         ]
37 |     },
38 |     install_requires=[
39 |         'anytree',
40 |         'appdirs',
41 |         'attrs>=20.3.0',
42 |         'bitarray',
43 |         'cattrs',
44 |         'click>=8.0.1',
45 |         'construct >=2.9.45, <2.11',
46 |         'defusedxml',
47 |         'future',
48 |         'isodate',
49 |         'jinja2',  # For construct.html_hex()
50 |         'jsonschema_extractor>=1.0',
51 |         'lief>=0.16.0;python_version>="3.9"',
52 |         'packaging',
53 |         'pandas',
54 |         'pefile>=2019.4.18',
55 |         'pyasn1',
56 |         'pyasn1_modules',
57 |         'pyelftools',
58 |         'pyparsing',
59 |         'pytest>=6.0.0',
60 |         'pytest-datadir',
61 |         'pytest-xdist',
62 |         'pytest-mock',
63 |         'pytest-cov',
64 |         'pyyaml',
65 |         'requests',
66 |         'ruamel.yaml',
67 |         'six',
68 |         'tabulate[widechars]<1.0.0',
69 |         'stix2',
70 |         'yara-python',
71 |         # For the server and API
72 |         'flask',
73 |         'pygments',
74 | 
75 |         # Dependencies for builtin parsers.
76 |         'pycdlib',
77 |         'pycryptodome',
78 |         'olefile',
79 |     ],
80 |     extras_require={
81 |         'dragodis': ['dragodis>=0.2.0'],
82 |         'kordesii': ['kordesii>=2.0.0'],
83 |         'testing': [
84 |             'jsonschema',
85 |             'dragodis',
86 |             'rugosa',
87 |         ],
88 |     }
89 | )
90 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_disassembly/strings.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <string.h>
 3 | 
 4 | char string01[] = "Idmmn!Vnsme ";
 5 | char string02[] = "Vgqv\"qvpkle\"ukvj\"ig{\"2z20";
 6 | char string03[] = "Wkf#rvj`h#aqltm#el{#ivnsp#lufq#wkf#obyz#gld-";
 7 | char string04[] = "Keo$mw$wpvkjc$ej`$ehwk$cmraw$wle`a*";
 8 | char string05[] = "Dfla%gpwkv%mji`v%lk%rjji%fijqm+";
 9 | char string06[] = "Egru&ghb&biau&cgen&ngrc&rnc&irnct(";
10 | char string13[] = "\\cv}3g{v3pargv3qfg3w|}4g3qavrx3g{v3t\x7fr``=";
11 | char string17[] = "C\x7frer7c\x7fr7q{xxs7zve|7~d7cry7~yt\x7frd9";
12 | char string1a[] = "+()./,-\"#*";
13 | char string23[] = "`QFBWFsQL@FPPb";
14 | char string27[] = "tSUdFS";
15 | char string40[] = "\x01\x13\x10n\x0e\x05\x14";
16 | char string46[] = "-\",5 , v,tr4v,trv4t,v\x7f,ttt";
17 | char string73[] = "@AKJDGBA@KJGDBJKAGDC";
18 | char string75[] = "!\x1d\x10U\x05\x14\x06\x01U\x02\x1c\x19\x19U\x19\x1a\x1a\x1eU\x17\x07\x1c\x12\x1d\x01\x10\x07U\x01\x1a\x18\x1a\x07\x07\x1a\x02[";
19 | char string77[] = "4\x16\x05\x04W\x16\x19\x13W\x15\x02\x04\x04\x12\x04W\x04\x03\x16\x1b\x1b\x12\x13W\x1e\x19W\x04\x16\x19\x13W\x13\x05\x1e\x11\x03\x04Y";
20 | char string7a[] = ".\x12\x1fZ\x10\x1b\x19\x11\x1f\x0eZ\x12\x0f\x14\x1dZ\x15\x14Z\x0e\x12\x1fZ\x18\x1b\x19\x11Z\x15\x1cZ\x0e\x12\x1fZ\r\x13\x1e\x1fZ\x19\x12\x1b\x13\x08T";
21 | char string7f[] = "LMFOGHKNLMGFOHKFGNLKHNMLOKGNKGHFGLHKGLMHKGOFNMLHKGFNLMJNMLIJFGNMLOJIMLNGFJHNM";;
22 | 
23 | 
24 | 
25 | void encrypt(char *s, char key)
26 | {
27 | 	while (*s)
28 | 		*s++ ^= key;
29 | }
30 | 
31 | void decrypt()
32 | {
33 | 	encrypt(&string01[0], 0x01);
34 | 	encrypt(&string02[0], 0x02);
35 | 	encrypt(&string03[0], 0x03);
36 | 	encrypt(&string04[0], 0x04);
37 | 	encrypt(&string05[0], 0x05);
38 | 	encrypt(&string06[0], 0x06);
39 | 	encrypt(&string13[0], 0x13);
40 | 	encrypt(&string17[0], 0x17);
41 | 	encrypt(&string1a[0], 0x1a);
42 | 	encrypt(&string23[0], 0x23);
43 | 	encrypt(&string27[0], 0x27);
44 | 	encrypt(&string40[0], 0x40);
45 | 	encrypt(&string46[0], 0x46);
46 | 	encrypt(&string73[0], 0x73);
47 | 	encrypt(&string75[0], 0x75);
48 | 	encrypt(&string77[0], 0x77);
49 | 	encrypt(&string7a[0], 0x7a);
50 | 	encrypt(&string7f[0], 0x7f);
51 | }
52 | 
53 | int main()
54 | {
55 | 	decrypt();
56 | 	printf("%s\n", string01);
57 | 	printf("%s\n", string02);
58 | 	printf("%s\n", string03);
59 | 	printf("%s\n", string04);
60 | 	printf("%s\n", string05);
61 | 	printf("%s\n", string06);
62 | 	printf("%s\n", string13);
63 | 	printf("%s\n", string17);
64 | 	printf("%s\n", string1a);
65 | 	printf("%s\n", string23);
66 | 	printf("%s\n", string27);
67 | 	printf("%s\n", string40);
68 | 	printf("%s\n", string46);
69 | 	printf("%s\n", string73);
70 | 	printf("%s\n", string75);
71 | 	printf("%s\n", string77);
72 | 	printf("%s\n", string7a);
73 | 	printf("%s\n", string7f);
74 | 
75 |     return 0;
76 | }
77 | 


--------------------------------------------------------------------------------
/mwcp/parsers/ISO.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ISO Image
 3 | """
 4 | 
 5 | from io import BytesIO
 6 | 
 7 | import pycdlib
 8 | 
 9 | from mwcp import Parser, FileObject
10 | 
11 | 
12 | class ImageFile(Parser):
13 |     DESCRIPTION = "ISO Image File"
14 |     AUTHOR = "DC3"
15 | 
16 |     MAGIC = b"CD001"
17 |     OFFSETS = [0x8001, 0x8801, 0x9001]
18 | 
19 |     @classmethod
20 |     def identify(cls, file_object):
21 |         for offset in cls.OFFSETS:
22 |             if file_object.data[offset:offset+len(cls.MAGIC)] == cls.MAGIC:
23 |                 return True
24 |         return False
25 | 
26 |     def walk_handler(self, iso, **kwargs):
27 |         """
28 |         Uses the iso.walk function to walk the ISO image depending on the path
29 |         provided in kwargs.
30 | 
31 |         :param iso: an open PyCdlib instance
32 |         :param kwargs: Keyword arguments to be passed to the iso.walk function.
33 |         Note that this must include either iso_path, udf_path, joliet_path, or
34 |         rr_path.
35 |         :return:
36 |         """
37 |         for dirname, dirlist, filelist in iso.walk(**kwargs):
38 |             for filename in filelist:
39 |                 if dirname == "/":
40 |                     path = dirname + filename
41 |                 else:
42 |                     path = dirname + "/" + filename
43 | 
44 |                 filedata = BytesIO()
45 | 
46 |                 if "iso_path" in kwargs:
47 |                     iso.get_file_from_iso_fp(filedata, iso_path=path)
48 |                 elif "udf_path" in kwargs:
49 |                     iso.get_file_from_iso_fp(filedata, udf_path=path)
50 |                 elif "joliet_path" in kwargs:
51 |                     iso.get_file_from_iso_fp(filedata, joliet_path=path)
52 |                 elif "rr_path" in kwargs:
53 |                     iso.get_file_from_iso_fp(filedata, rr_path=path)
54 |                 else:
55 |                     return
56 | 
57 |                 self.dispatcher.add(FileObject(file_data=filedata.getvalue(), file_name=filename))
58 | 
59 |     def run(self):
60 |         """
61 |         Walk the ISO image to extract embedded files.
62 | 
63 |         :return:
64 |         """
65 |         iso = pycdlib.PyCdlib()
66 |         iso.open_fp(BytesIO(self.file_object.data))
67 | 
68 |         try:
69 |             self.walk_handler(iso, iso_path="/")
70 | 
71 |             if iso.has_udf():
72 |                 self.logger.info("UDF extension identified")
73 |                 self.walk_handler(iso, udf_path="/")
74 | 
75 |             if iso.has_joliet():
76 |                 self.logger.info("Joliet extension identified")
77 |                 self.walk_handler(iso, joliet_path="/")
78 | 
79 |             if iso.has_rock_ridge():
80 |                 self.logger.info("Rock Ridge extension identified")
81 |                 self.walk_handler(iso, rr_path="/")
82 |         finally:
83 |             iso.close()
84 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_cli/fb843efb2ffec987db12e72ca75c9ea2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "type": "report",
 3 |     "tags": [],
 4 |     "mwcp_version": "MWCP_VERSION",
 5 |     "input_file": {
 6 |         "type": "file",
 7 |         "tags": [],
 8 |         "name": "fb843efb2ffec987db12e72ca75c9ea2.bin",
 9 |         "description": "Foo",
10 |         "md5": "fb843efb2ffec987db12e72ca75c9ea2",
11 |         "sha1": "5e90c4c2be31a7a0be133b3dbb4846b0434bc2ab",
12 |         "sha256": "fe5af8c641835c24f3bbc237a659814b96ed64d2898fae4cb3d2c0ac5161f5e9",
13 |         "architecture": null,
14 |         "compile_time": null,
15 |         "file_path": null,
16 |         "data": null,
17 |         "derivation": null
18 |     },
19 |     "parser": "dc3:foo",
20 |     "recursive": false,
21 |     "external_knowledge": {},
22 |     "errors": [],
23 |     "logs": [
24 |         "[+] File fb843efb2ffec987db12e72ca75c9ea2.bin identified as Foo.",
25 |         "[+] size of inputfile is 23 bytes",
26 |         "[+] fb843efb2ffec987db12e72ca75c9ea2.bin dispatched residual file: fooconfigtest.txt",
27 |         "[+] File fooconfigtest.txt described as example output file",
28 |         "[+] operating on inputfile fb843efb2ffec987db12e72ca75c9ea2.bin"
29 |     ],
30 |     "metadata": [
31 |         {
32 |             "type": "url",
33 |             "tags": [],
34 |             "url": "http://127.0.0.1",
35 |             "path": null,
36 |             "query": null,
37 |             "protocol": "http"
38 |         },
39 |         {
40 |             "type": "network",
41 |             "tags": [],
42 |             "url": {
43 |                 "type": "url",
44 |                 "tags": [],
45 |                 "url": "http://127.0.0.1",
46 |                 "path": null,
47 |                 "query": null,
48 |                 "protocol": "http"
49 |             },
50 |             "socket": {
51 |                 "type": "socket",
52 |                 "tags": [],
53 |                 "address": "127.0.0.1",
54 |                 "port": null,
55 |                 "network_protocol": null,
56 |                 "listen": null
57 |             },
58 |             "credential": null
59 |         },
60 |         {
61 |             "type": "socket",
62 |             "tags": [],
63 |             "address": "127.0.0.1",
64 |             "port": null,
65 |             "network_protocol": null,
66 |             "listen": null
67 |         },
68 |         {
69 |             "type": "file",
70 |             "tags": [],
71 |             "name": "fooconfigtest.txt",
72 |             "description": "example output file",
73 |             "md5": "5eb63bbbe01eeed093cb22bb8f5acdc3",
74 |             "sha1": "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed",
75 |             "sha256": "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9",
76 |             "architecture": null,
77 |             "compile_time": null,
78 |             "file_path": null,
79 |             "data": null,
80 |             "derivation": "extracted and decompressed"
81 |         }
82 |     ]
83 | }


--------------------------------------------------------------------------------
/mwcp/tests/test_pecon.py:
--------------------------------------------------------------------------------
 1 | """
 2 | These are pytest test cases for pecon.
 3 | """
 4 | 
 5 | from mwcp.utils import pecon
 6 | 
 7 | 
 8 | default_pe = (
 9 |     b'MZ\x90\x00\x03\x00\x00\x00\x04\x00\x00\x00\xff\xff\x00\x00\xb8\x00\x00\x00'
10 |     b'\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
11 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
12 |     b'\x00\x00\xe0\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
13 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
14 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
15 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
16 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
17 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
18 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
19 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
20 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00PE\x00\x00L\x01\x00'
21 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0\x00\x0f\x01\x0b\x01'
22 |     b'\x01G\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
23 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x02\x00\x00\x01'
24 |     b'\x00\x00\x00\x00\x00\x00\x00\x05\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00'
25 |     b'\x00\x02\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x10\x00\x00\x10\x00'
26 |     b'\x00\x00\x00\x10\x00\x00\x10\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00'
27 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
28 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
29 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
30 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
31 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
32 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
33 |     b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
34 | )
35 | 
36 | 
37 | def test_reconstruction():
38 |     """Tests basic PE reconstruction"""
39 |     pe = pecon.PE()
40 | 
41 |     # Test accessing some random fields.
42 |     assert pe.DosHeader.e_magic == b'MZ'
43 |     assert pe.SectionTable == []
44 |     assert pe.OptionalHeader.FileAlignment == 512
45 |     assert pe.OptionalHeader.DataDirectory.imports.VirtualAddress == 0
46 | 
47 |     # Test building
48 |     pe_data = pe.build()
49 |     assert pe_data == default_pe
50 | 
51 | 
52 | def test_parsing():
53 |     """Tests parsing and then rebuilding existing PE file."""
54 |     pe = pecon.PE(default_pe)
55 | 
56 |     assert pe.build() == default_pe
57 | 
58 |     pe.DosHeader.e_magic = b'ZM'
59 |     assert pe.build() == b'ZM' + default_pe[2:]
60 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_report/split_report.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import mwcp
 3 | 
 4 | 
 5 | split_report = [
 6 |     {
 7 |         "errors": [
 8 |             "[!] Error log in input_file.bin",
 9 |         ],
10 |         "logs": [
11 |             "[+] Info log in input_file.bin",
12 |             "[!] Error log in input_file.bin",
13 |         ],
14 |         "mwcp_version": mwcp.__version__,
15 |         "input_file": {
16 |             "architecture": None,
17 |             "compile_time": None,
18 |             "data": None,
19 |             "derivation": None,
20 |             "description": None,
21 |             "file_path": "C:/input_file.bin",
22 |             "md5": "1e50210a0202497fb79bc38b6ade6c34",
23 |             "name": "input_file.bin",
24 |             "sha1": "baf34551fecb48acc3da868eb85e1b6dac9de356",
25 |             "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
26 |             "tags": [],
27 |             "type": "file"
28 |         },
29 |         "metadata": [
30 |             {
31 |                 "tags": [],
32 |                 "type": "mutex",
33 |                 "value": "root_mutex"
34 |             },
35 |             {
36 |                 "architecture": None,
37 |                 "compile_time": None,
38 |                 "data": None,
39 |                 "derivation": None,
40 |                 "description": None,
41 |                 "file_path": None,
42 |                 "md5": "4844437d5747acd52a54981b48f60c8e",
43 |                 "name": "sub_file.exe",
44 |                 "sha1": "7bd8e7cb8e1e8b7b2e94b472422512935c9d4519",
45 |                 "sha256": "c2b8761db47791e06799e99a698ed4d63cdbdb9f5f16224c90b625b02581350c",
46 |                 "tags": [],
47 |                 "type": "file"
48 |             }
49 |         ],
50 |         "parser": None,
51 |         "recursive": False,
52 |         "external_knowledge": {},
53 |         "tags": ["tagging", "test"],
54 |         "type": "report"
55 |     },
56 |     {
57 |         "errors": [
58 |             "[!] Error log in sub_file.exe",
59 |         ],
60 |         "logs": [
61 |             "[+] Info log in sub_file.exe",
62 |             "[!] Error log in sub_file.exe",
63 |         ],
64 |         "mwcp_version": mwcp.__version__,
65 |         "input_file": {
66 |             "architecture": None,
67 |             "compile_time": None,
68 |             "data": None,
69 |             "derivation": None,
70 |             "description": None,
71 |             "file_path": None,
72 |             "md5": "4844437d5747acd52a54981b48f60c8e",
73 |             "name": "sub_file.exe",
74 |             "sha1": "7bd8e7cb8e1e8b7b2e94b472422512935c9d4519",
75 |             "sha256": "c2b8761db47791e06799e99a698ed4d63cdbdb9f5f16224c90b625b02581350c",
76 |             "tags": [],
77 |             "type": "file"
78 |         },
79 |         "metadata": [
80 |             {
81 |                 "tags": [],
82 |                 "type": "mutex",
83 |                 "value": "sub_mutex"
84 |             }
85 |         ],
86 |         "parser": None,
87 |         "recursive": False,
88 |         "external_knowledge": {},
89 |         "tags": ["tagging", "test"],
90 |         "type": "report"
91 |     }
92 | ]
93 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_cli/parse.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "type": "report",
 4 |         "tags": [],
 5 |         "mwcp_version": "MWCP_VERSION",
 6 |         "input_file": {
 7 |             "type": "file",
 8 |             "tags": [],
 9 |             "name": "test.txt",
10 |             "description": "Foo",
11 |             "md5": "fb843efb2ffec987db12e72ca75c9ea2",
12 |             "sha1": "5e90c4c2be31a7a0be133b3dbb4846b0434bc2ab",
13 |             "sha256": "fe5af8c641835c24f3bbc237a659814b96ed64d2898fae4cb3d2c0ac5161f5e9",
14 |             "architecture": null,
15 |             "compile_time": null,
16 |             "file_path": "test.txt",
17 |             "data": null,
18 |             "derivation": null
19 |         },
20 |         "parser": "foo",
21 |         "recursive": true,
22 |         "external_knowledge": {},
23 |         "errors": [],
24 |         "logs": [
25 |             "[+] File test.txt identified as Foo.",
26 |             "[+] size of inputfile is 23 bytes",
27 |             "[+] test.txt dispatched residual file: fooconfigtest.txt",
28 |             "[+] File fooconfigtest.txt described as example output file",
29 |             "[+] operating on inputfile test.txt"
30 |         ],
31 |         "metadata": [
32 |             {
33 |                 "type": "url",
34 |                 "tags": [],
35 |                 "url": "http://127.0.0.1",
36 |                 "path": null,
37 |                 "query": null,
38 |                 "protocol": "http"
39 |             },
40 |             {
41 |                 "type": "network",
42 |                 "tags": [],
43 |                 "url": {
44 |                     "type": "url",
45 |                     "tags": [],
46 |                     "url": "http://127.0.0.1",
47 |                     "path": null,
48 |                     "query": null,
49 |                     "protocol": "http"
50 |                 },
51 |                 "socket": {
52 |                     "type": "socket",
53 |                     "tags": [],
54 |                     "address": "127.0.0.1",
55 |                     "port": null,
56 |                     "network_protocol": null,
57 |                     "listen": null
58 |                 },
59 |                 "credential": null
60 |             },
61 |             {
62 |                 "type": "socket",
63 |                 "tags": [],
64 |                 "address": "127.0.0.1",
65 |                 "port": null,
66 |                 "network_protocol": null,
67 |                 "listen": null
68 |             },
69 |             {
70 |                 "type": "file",
71 |                 "tags": [],
72 |                 "name": "fooconfigtest.txt",
73 |                 "description": "example output file",
74 |                 "md5": "5eb63bbbe01eeed093cb22bb8f5acdc3",
75 |                 "sha1": "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed",
76 |                 "sha256": "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9",
77 |                 "architecture": null,
78 |                 "compile_time": null,
79 |                 "file_path": null,
80 |                 "data": null,
81 |                 "derivation": "extracted and decompressed"
82 |             }
83 |         ]
84 |     }
85 | ]
86 | 


--------------------------------------------------------------------------------
/mwcp/parsers/tests/foo/f144899b86766688991c5d0d10902f4a.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "type": "report",
 3 |     "tags": [],
 4 |     "mwcp_version": "3.12.0",
 5 |     "input_file": {
 6 |         "type": "file",
 7 |         "tags": [],
 8 |         "name": "f144899b86766688991c5d0d10902f4a.bin",
 9 |         "description": "Foo",
10 |         "md5": "f144899b86766688991c5d0d10902f4a",
11 |         "sha1": "bdca4e5c28a89d3b5281cc189f5910eaad16395a",
12 |         "sha256": "8a599bf73a83c1d32a1b426ed736488ae34991fda6ac4cd27cc0597e21cd8420",
13 |         "architecture": null,
14 |         "compile_time": null,
15 |         "file_path": null,
16 |         "data": null,
17 |         "derivation": null
18 |     },
19 |     "parser": "dc3:foo",
20 |     "recursive": false,
21 |     "external_knowledge": {
22 |         "secret": "hello"
23 |     },
24 |     "errors": [],
25 |     "logs": [
26 |         "[+] File f144899b86766688991c5d0d10902f4a.bin identified as Foo.",
27 |         "[+] size of inputfile is 15765 bytes",
28 |         "[+] f144899b86766688991c5d0d10902f4a.bin dispatched residual file: fooconfigtest.txt",
29 |         "[+] File fooconfigtest.txt described as example output file",
30 |         "[+] operating on inputfile f144899b86766688991c5d0d10902f4a.bin"
31 |     ],
32 |     "metadata": [
33 |         {
34 |             "type": "other",
35 |             "tags": [],
36 |             "key": "secret_using_external_knowledge",
37 |             "value": "hello!",
38 |             "value_format": "string"
39 |         },
40 |         {
41 |             "type": "url",
42 |             "tags": [],
43 |             "url": "http://127.0.0.1",
44 |             "path": null,
45 |             "query": null,
46 |             "protocol": "http"
47 |         },
48 |         {
49 |             "credential": null,
50 |             "socket": {
51 |                 "type": "socket",
52 |                 "tags": [],
53 |                 "address": "127.0.0.1",
54 |                 "port": null,
55 |                 "network_protocol": null,
56 |                 "listen": null
57 |             },
58 |             "tags": [],
59 |             "type": "network",
60 |             "url": {
61 |                 "type": "url",
62 |                 "tags": [],
63 |                 "url": "http://127.0.0.1",
64 |                 "path": null,
65 |                 "query": null,
66 |                 "protocol": "http"
67 |             }
68 |         },
69 |         {
70 |             "type": "socket",
71 |             "tags": [],
72 |             "address": "127.0.0.1",
73 |             "port": null,
74 |             "network_protocol": null,
75 |             "listen": null
76 |         },
77 |         {
78 |             "type": "file",
79 |             "tags": [],
80 |             "name": "fooconfigtest.txt",
81 |             "description": "example output file",
82 |             "md5": "5eb63bbbe01eeed093cb22bb8f5acdc3",
83 |             "sha1": "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed",
84 |             "sha256": "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9",
85 |             "architecture": null,
86 |             "compile_time": null,
87 |             "file_path": null,
88 |             "data": null,
89 |             "derivation": "extracted and decompressed"
90 |         }
91 |     ]
92 | }


--------------------------------------------------------------------------------
/mwcp/tests/test_report_writer.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytest
 3 | 
 4 | from mwcp import metadata
 5 | 
 6 | 
 7 | @pytest.mark.parametrize("text_format,report_name", [
 8 |     ("markdown", "report.md"),
 9 |     ("simple", "report.txt"),
10 |     ("html", "report.html"),
11 | ])
12 | def test_basic(datadir, report, metadata_items, text_format, report_name):
13 |     """
14 |     Tests each metadata element to ensure they are presented
15 |     nicely in a report.
16 |     """
17 |     with report:
18 |         report.input_file.description = "SuperMalware Implant"
19 |         for item in metadata_items:
20 |             report.add(item)
21 |         report.add_tag("test", "tagging")
22 | 
23 |     actual = report.as_text(text_format)
24 |     print(actual)
25 |     expected = (datadir / report_name).read_text()
26 |     assert actual == expected
27 | 
28 | 
29 | @pytest.mark.parametrize("text_format,report_name", [
30 |     ("markdown", "report_wordwrap.md"),
31 |     ("simple", "report_wordwrap.txt"),
32 |     ("html", "report_wordwrap.html"),
33 | ])
34 | def test_wordwrap(datadir, report, text_format, report_name):
35 |     with report:
36 |         report.input_file.description = "SuperMalware Implant"
37 |         large_num = int("123"*50)  # Large number that will require word wrapping.
38 |         report.add(metadata.RSAPublicKey(1234, large_num))
39 |         report.add(metadata.RSAPrivateKey(
40 |             1234, large_num, 1234, large_num, large_num, large_num, large_num, large_num))
41 |         report.add(metadata.UserAgent("This is a really large user agent that will need to be word wrapped." * 16))
42 | 
43 |     actual = report.as_text(text_format)
44 |     print(actual)
45 |     expected = (datadir / report_name).read_text()
46 |     assert actual == expected
47 | 
48 | 
49 | @pytest.mark.parametrize("text_format,report_name", [
50 |     ("markdown", "report_foreign.md"),
51 |     ("simple", "report_foreign.txt"),
52 |     ("html", "report_foreign.html"),
53 | ])
54 | def test_foreign_characters(datadir, report, text_format, report_name):
55 |     with report:
56 |         report.input_file.description = "SuperMalware Implant"
57 |         report.add(metadata.Other("JAPAN", "\u30E6\u30FC\u30B6\u30FC\u5225\u30B5\u30A4\u30C8"))
58 |         report.add(metadata.Other("CHINA", "\u7B80\u4F53\u4E2D\u6587"))
59 |         report.add(metadata.Other("KOREA", "\uD06C\uB85C\uC2A4 \uD50C\uB7AB\uD3FC\uC73C\uB85C"))
60 |         report.add(metadata.Other("ISRAEL", "\u05DE\u05D3\u05D5\u05E8\u05D9\u05DD \u05DE\u05D1\u05D5\u05E7\u05E9\u05D9\u05DD"))
61 |         report.add(metadata.Other("EGYPT", "\u0623\u0641\u0636\u0644 \u0627\u0644\u0628\u062D\u0648\u062B"))
62 |         report.add(metadata.Other(
63 |             "RUSSIA",
64 |             "\u0414\u0435\u0441\u044F\u0442\u0443\u044E \u041C\u0435\u0436\u0434\u0443\u043D\u0430"
65 |             "\u0440\u043E\u0434\u043D\u0443\u044E"
66 |         ))
67 |         report.add(metadata.Other("MATH", "\u222E E\u22C5da = Q,  n \u2192 \u221E, \u2211 f(i) = \u220F g(i)"))
68 |         report.add(metadata.Other("FRANCE", "fran\u00E7ais langue \u00E9trang\u00E8re"))
69 |         report.add(metadata.Other("SPAIN", "ma\u00F1ana ol\u00E9"))
70 | 
71 |     actual = report.as_text(text_format)
72 |     print(actual)
73 |     expected = (datadir / report_name).read_text("utf-8")
74 |     assert actual == expected
75 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_runner.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests mwcp.Runner components.
 3 | """
 4 | import textwrap
 5 | 
 6 | import mwcp
 7 | 
 8 | 
 9 | def test_running_parser_class():
10 |     from mwcp import Parser
11 | 
12 |     class TestParser(Parser):
13 |         DESCRIPTION = "Test"
14 | 
15 |     report = mwcp.run(TestParser, data=b"test")
16 |     assert report
17 |     assert report.parser == "TestParser"
18 | 
19 | 
20 | def test_yara_runner(datadir):
21 |     mwcp.register_entry_points()
22 | 
23 |     # File should map to foo parser.
24 |     report = mwcp.run(data=b"mapped file", yara_repo=datadir / "yara_repo", recursive=False)
25 |     assert report
26 |     # Report "parser" will be "-" because it was initially unknown, but the parser mapped
27 |     # to the input file should be foo.
28 |     assert report.parser == "-"
29 |     assert report.input_file.description == "Foo"
30 |     assert report.input_file.parser.name == "foo.Foo"
31 | 
32 | 
33 | def test_yara_runner_recursive(datadir):
34 |     mwcp.register_parser_directory(str(datadir), source_name="test")
35 | 
36 |     # Initial file should map to FileA and residual to FileB.
37 |     # Recursion detection should take effect.
38 |     report = mwcp.run(data=b"matches file a", yara_repo=datadir / "yara_repo", recursive=True)
39 |     assert report
40 |     assert report.parser == "-"
41 |     assert report.input_file.description == "File A"
42 |     residual_file = report.input_file.children[0]
43 |     assert residual_file.description == "File B"
44 | 
45 |     # Recursion will not take effect.
46 |     report = mwcp.run(data=b"matches file a", yara_repo=datadir / "yara_repo", recursive=False)
47 |     assert report
48 |     assert report.parser == "-"
49 |     assert report.input_file.description == "File A"
50 |     residual_file = report.input_file.children[0]
51 |     assert residual_file.description == "Unidentified file"
52 | 
53 | 
54 | def test_yara_runner_sibling_dispatch(datadir):
55 |     """
56 |     Tests Github issue #40 where a file doesn't get processed because
57 |     it was dispatched with a parent of an already processed sibling.
58 |     """
59 |     mwcp.register_parser_directory(str(datadir), source_name="test")
60 | 
61 |     # Test running SingleDispatch parser and see if we successfully get the Grandchild to be parsed.
62 |     report = mwcp.run(data=b"matches parent", yara_repo=datadir / "yara_repo", recursive=True)
63 |     assert report
64 |     assert report.parser == "-"
65 |     input_file = report.input_file
66 |     assert input_file.description == "Parent"
67 |     children = input_file.children
68 |     assert len(children) == 2
69 |     assert children[0].description == "Sibling 1"
70 |     assert children[1].description == "Sibling 2"
71 |     assert len(children[0].children) == 1
72 |     # This was originally unidentified due to not being processed.
73 |     assert children[0].children[0].description == "Grandchild"
74 |     assert report.file_tree() == textwrap.dedent("""\
75 |         <40b44905ee15a698e22f086c758a3981.bin (40b44905ee15a698e22f086c758a3981) : Parent>
76 |         ├── <efd40a513a2b00d7354756967ff6b683.bin (efd40a513a2b00d7354756967ff6b683) : Sibling 1>
77 |         │   └── <3ca5088d02dfb0fc668a0e2898ec3d93.bin (3ca5088d02dfb0fc668a0e2898ec3d93) : Grandchild>
78 |         └── <aaaa145ac48779f3eafdb0e521d15b94.bin (aaaa145ac48779f3eafdb0e521d15b94) : Sibling 2>""")
79 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_construct.py:
--------------------------------------------------------------------------------
  1 | """Tests our construct helpers."""
  2 | 
  3 | import doctest
  4 | import os
  5 | import sys
  6 | 
  7 | import pytest
  8 | 
  9 | from mwcp.utils import construct
 10 | 
 11 | 
 12 | # @pytest.mark.xfail(
 13 | #     raises=ValueError,
 14 | #     reason="Doctest is producing a 'wrapper loop when unwrapping obj_' error"
 15 | # )
 16 | @pytest.mark.parametrize("module", [
 17 |     construct.helpers,
 18 |     construct.datetime_,
 19 |     construct.network,
 20 |     construct.windows_enums,
 21 |     construct.windows_structures,
 22 | ])
 23 | def test_helpers(module):
 24 |     """Tests that the doctests for the helpers work."""
 25 |     results = doctest.testmod(module)
 26 |     assert not results.failed
 27 | 
 28 | 
 29 | def test_html():
 30 |     """Tests the html construct."""
 31 |     # Test doctests
 32 |     results = doctest.testmod(construct.construct_html)
 33 |     assert not results.failed
 34 | 
 35 |     # Test with an example
 36 |     EMBED_SPEC = construct.Struct(
 37 |         'a' / construct.IP4Address,
 38 |         'b' / construct.IP4Address,
 39 |         'c' / construct.IP4Address,
 40 |         'd' / construct.IP4Address
 41 |     )
 42 | 
 43 |     address_struct = construct.Struct(
 44 |         'first' / construct.Struct('a' / construct.Byte, 'b' / construct.Byte),
 45 |         'second' / construct.Struct('inner2' / construct.Bytes(2))
 46 |         # 'internal' / IP4Address
 47 |     )
 48 | 
 49 |     PACKET = construct.Struct(
 50 |         construct.Padding(0x9),
 51 |         'Hardcoded Value 1' / construct.HexString(construct.Int32ul),
 52 |         'Hardcoded Value 2' / construct.HexString(construct.Int32ul),
 53 |         'Hardcoded Value 3' / construct.HexString(construct.Int32ul),
 54 |         construct.Padding(0x17),
 55 |         'Compromised Host IP' / construct.IP4Address,  # Use IP adapter
 56 |         # 'Unknown IP Addresses' / construct.Switch(
 57 |         #     this['Hardcoded Value 1'],
 58 |         #     {
 59 |         #         '0x1f4' : EMBED_SPEC
 60 |         #     },
 61 |         # ),
 62 |         'Unknown IP Addresses' / address_struct[4],
 63 |         # 'Unknown IP Addresses' / IP4Address[4],
 64 |         construct.Padding(8),
 65 |         'Unknown Indicator' / construct.String(0xF),
 66 |         construct.Padding(2),
 67 |         'Number of CPUs' / construct.Int32ul,
 68 |         'CPU Mhz' / construct.Int32ul,
 69 |         'Total Memory (MB)' / construct.Int32ul,
 70 |         'Compromised System Kernel' / construct.CString(),
 71 |         'Possible Trojan Version' / construct.CString()
 72 |     )
 73 | 
 74 |     data = (b'\x01\x00\x00\x00}\x00\x00\x00\x00\xf4\x01\x00\x002\x00\x00\x00\xe8'
 75 |             b'\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01'
 76 |             b'\x01\x00\x00\x00\x00\x01\x00\x00\x00\xc0\xa8\x01\r\xc0\xa8\x01\r\xc0'
 77 |             b'\xa8\x01\r\xc0\xa8\x01\r\xc0\xa8\x01\r\xff\xff\x01\x00\x00\x00\x00\x00'
 78 |             b'-== Love AV ==-:\x00\x01\x00\x00\x00d\n\x00\x00\xc4\x07\x00\x00'
 79 |             b'Linux 3.13.0-93-generic\x001:G2.40\x00')
 80 | 
 81 |     html_data = construct.html_hex(PACKET, data, depth=1)
 82 | 
 83 |     with open(os.path.join(os.path.dirname(__file__), 'construct_html.html'), 'r') as fo:
 84 |         expected_html_data = fo.read()
 85 | 
 86 |     assert html_data == expected_html_data
 87 | 
 88 | 
 89 | def test_base64():
 90 |     """Test the Base64 Adapter with bug associated with unicode encoding on build"""
 91 |     spec = construct.Base64(construct.CString("utf-16le"))
 92 |     data = b'Y\x00W\x00J\x00j\x00Z\x00A\x00=\x00=\x00\x00\x00'
 93 |     assert spec.parse(data) == b"abcd"
 94 |     assert spec.build(b"abcd") == data
 95 | 
 96 |     spec = construct.Base64(construct.CString("utf-8"))
 97 |     data = b'YWJjZA==\x00'
 98 |     assert spec.parse(data) == b"abcd"
 99 |     assert spec.build(b"abcd") == data
100 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_report_writer/report_wordwrap.txt:
--------------------------------------------------------------------------------
 1 | ----- File: input_file.bin -----
 2 | Field         Value
 3 | ------------  ----------------------------------------------------------------
 4 | Parser        FooParser
 5 | File Path     C:/input_file.bin
 6 | Description   SuperMalware Implant
 7 | Architecture
 8 | MD5           1e50210a0202497fb79bc38b6ade6c34
 9 | SHA1          baf34551fecb48acc3da868eb85e1b6dac9de356
10 | SHA256        1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee
11 | Compile Time
12 | 
13 | ---- RSA Private Key ----
14 | Value
15 | -------------------------------------------------
16 | Modulus (n):
17 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
18 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
19 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
20 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
21 |     f3:b3
22 | Public Exponent (e):
23 |     1234 (0x4d2)
24 | Private Exponent (d):
25 |     1234 (0x4d2)
26 | p:
27 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
28 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
29 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
30 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
31 |     f3:b3
32 | q:
33 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
34 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
35 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
36 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
37 |     f3:b3
38 | d mod (p-1):
39 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
40 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
41 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
42 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
43 |     f3:b3
44 | d mod (q-1):
45 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
46 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
47 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
48 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
49 |     f3:b3
50 | (inverse of q) mod p:
51 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
52 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
53 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
54 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
55 |     f3:b3
56 | 
57 | ---- RSA Public Key ----
58 | Value
59 | -------------------------------------------------
60 | Modulus (n):
61 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
62 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
63 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
64 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
65 |     f3:b3
66 | Public Exponent (e):
67 |     1234 (0x4d2)
68 | 
69 | ---- User Agent ----
70 | Value
71 | ----------------------------------------------------------------------------------------------------
72 | This is a really large user agent that will need to be word wrapped.This is a really large user
73 |   agent that will need to be word wrapped.This is a really large user agent that will need to be
74 |   word wrapped.This is a really large user agent that will need to be word wrapped.This is a really
75 |   large user agent that will need to be word wrapped.This is a really large user agent that will
76 |   need to be word wrapped.This is a really large user agent that will need to be word wrapped.This
77 |   is a really large user agent that will need to be word wrapped.This is a really large user agent
78 |   that will need to be word wrapped.This is a really large user agent that will need to be word
79 |   wrapped.This is a really large user agent that will need to be word wrapped.This is a really large
80 |   user agent that will need to be word wrapped.This is a really large user agent that will need to
81 |   be word wrapped.This is a really large user agent that will need to be word wrapped.This is a
82 |   really large user agent that will need to be word wrapped.This is a really large user agent that
83 |   will need to be word wrapped.
84 | 
85 | ----- File Tree -----
86 | <input_file.bin (1e50210a0202497fb79bc38b6ade6c34) : SuperMalware Implant>
87 | 
88 | 


--------------------------------------------------------------------------------
/mwcp/utils/construct/dotnet.py:
--------------------------------------------------------------------------------
  1 | """Construct helpers for .NET"""
  2 | 
  3 | from .core import *
  4 | 
  5 | 
  6 | class _DotNetUInt(Construct):
  7 |     r"""
  8 |     DotNet encoded unsigned 32-bit integer, where first byte indicates the length of the integer.
  9 | 
 10 |     Example:
 11 | 
 12 |         >>> DotNetUInt.build(16)
 13 |         '\x10'
 14 |         >>> DotNetUInt.parse(_)
 15 |         16
 16 |         >>> DotNetUInt.build(256)
 17 |         '\x81\x00'
 18 |         >>> DotNetUInt.parse(_)
 19 |         256
 20 |         >>> DotNetUInt.build(0xffff)
 21 |         '\xc0\x00\xff\xff'
 22 |         >>> DotNetUInt.parse(_)
 23 |         65535
 24 |     """
 25 |     def _parse(self, stream, context, path):
 26 |         b = byte2int(stream_read(stream, 1))
 27 |         if b & 0x80 == 0:
 28 |             num = b
 29 |         elif b & 0xc0 == 0x80:
 30 |             num = ((b & 0x3f) << 8) + byte2int(stream_read(stream, 1))
 31 |         elif b & 0xe0 == 0xc0:
 32 |             num = (b & 0x1f) << 24
 33 |             num += byte2int(stream_read(stream, 1)) << 16
 34 |             num += byte2int(stream_read(stream, 1)) << 8
 35 |             num += byte2int(stream_read(stream, 1))
 36 |         else:
 37 |             raise ConstructError('DotNetUInt encountered an invalid string')
 38 |         return num
 39 | 
 40 |     def _build(self, obj, stream, context, path):
 41 |         if obj < 0:
 42 |             raise ConstructError("DotNetUInt cannot build from negative number")
 43 |         if obj > 0x1fffffff:
 44 |             raise ConstructError("DotNetUInt encountered too large a number")
 45 |         if obj < 0x80:
 46 |             stream_write(stream, int2byte(obj), 1)
 47 |         elif obj < 0x3fff:
 48 |             stream_write(stream, int2byte((obj >> 8) | 0x80), 1)
 49 |             stream_write(stream, int2byte(obj & 0xff), 1)
 50 |         else:
 51 |             stream_write(stream, int2byte((obj >> 24) | 0xc0), 1)
 52 |             stream_write(stream, int2byte((obj >> 16) & 0xff), 1)
 53 |             stream_write(stream, int2byte((obj >> 8) & 0xff), 1)
 54 |             stream_write(stream, int2byte(obj & 0xff), 1)
 55 | 
 56 | 
 57 | # using the @singleton decorator seems to break our ability to run doctests.
 58 | DotNetUInt = _DotNetUInt()
 59 | 
 60 | 
 61 | class _DotNetNullString(Construct):
 62 |     r"""
 63 |     DotNet null string, different from an empty zero-byte string, encoded as a single 0xff byte.
 64 | 
 65 |     Example:
 66 | 
 67 |         >>> repr(DotNetNullString.parse('\xff'))
 68 |         'None'
 69 |         >>> DotNetNullString.build(None)
 70 |         '\xff'
 71 |     """
 72 |     def _parse(self, stream, context, path):
 73 |         if stream_read(stream, 1) != '\xff':
 74 |             raise ConstructError('DotNetNullString encounted an invalid byte.')
 75 |         return None
 76 | 
 77 |     def _build(self, obj, stream, context, path):
 78 |         stream_write(stream, '\xff', 1)
 79 | 
 80 |     def _sizeof(self, context, path):
 81 |         return 1
 82 | 
 83 | 
 84 | DotNetNullString = _DotNetNullString()
 85 | 
 86 | 
 87 | class _DotNetSigToken(Adapter):
 88 |     r"""
 89 |     Adapter used to create or read a compressed token used in signatures. The token must be a typedef,
 90 |     typeref, or typespec token.
 91 | 
 92 |     >>> DotNetSigToken.parse('\x81\x42')
 93 |     452984912
 94 |     >>> DotNetSigToken.build(0x01000002)
 95 |     '\t'
 96 |     """
 97 |     TOKEN_ENCODE = {
 98 |         0x02: 0,
 99 |         0x01: 1,
100 |         0x1b: 2,
101 |     }
102 | 
103 |     def _encode(self, obj, context, path):
104 |         encoded = self.TOKEN_ENCODE.get(obj >> 24, 3)
105 |         if encoded is None:
106 |             raise ConstructError('DotNetSigToken encountered a token other than typedef, typeref, or typespec')
107 |         return ((obj & 0x00ffffff) << 2) | encoded
108 | 
109 |     def _decode(self, obj, context, path):
110 |         if obj & 3 == 3 or obj & 0xfc00000000:
111 |             raise ConstructError('DotNetSigToken encountered an invalid typedef, typeref, or typespec token')
112 |         return (obj >> 2) | [0x02000000, 0x01000000, 0x1b000000][obj & 3]
113 | 
114 | 
115 | DotNetSigToken = _DotNetSigToken(DotNetUInt)
116 | 


--------------------------------------------------------------------------------
/mwcp/utils/elffileutils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Description: Utility for elftools python library.
  3 | """
  4 | 
  5 | import logging
  6 | 
  7 | logger = logging.getLogger(__name__)
  8 | 
  9 | import elftools.elf.elffile as elffile
 10 | import io
 11 | 
 12 | 
 13 | def obtain_elf(file_data):
 14 |     """
 15 |     Given file data, create an elftools.ELFFile object from the data.
 16 | 
 17 |     :param file_data: Input ELF file data
 18 | 
 19 |     :return: An elftools.ELFFile object or None
 20 |     """
 21 |     try:
 22 |         elf = elffile.ELFFile(io.BytesIO(file_data))
 23 |         return elf
 24 |     except elffile.ELFError:
 25 |         logger.debug('An elftools.ELFFile object on the file data could not be created.')
 26 |         return None
 27 | 
 28 | 
 29 | def obtain_section(section_name, elf=None, file_data=None):
 30 |     """
 31 |     Obtain the section obtain for a specficied ELF section of a file.
 32 | 
 33 |     :param section_name: The name of the section to obtain
 34 |     :param elf: elftools.ELFFile object
 35 |     :param file_data: Input file data
 36 | 
 37 |     :return: The elftools.Section object, or None.
 38 |     """
 39 |     if file_data:
 40 |         elf = obtain_elf(file_data)
 41 |     if elf:
 42 |         for section in elf.iter_sections():
 43 |             if section.name == section_name:
 44 |                     return section
 45 |         return None
 46 |     else:
 47 |         return None
 48 | 
 49 | 
 50 | def obtain_section_data(section_name, elf=None, file_data=None, min_size=0):
 51 |     """
 52 |     Obtain the data in a specified ELF section of a file.
 53 | 
 54 |     :param section_name: The name of the section from which to extract data.
 55 |     :param elf: elftools.ELFFile object
 56 |     :param file_data: Input file data
 57 |     :param min_size: The minimum acceptable size for the section_data
 58 | 
 59 |     :return: The PE section data, or None.
 60 |     """
 61 |     if file_data:
 62 |         elf = obtain_elf(file_data)
 63 |     if elf:
 64 |         section = obtain_section(section_name, elf)
 65 |         if section:
 66 |             section_data = section.data()
 67 |             if len(section_data) > min_size:
 68 |                 return section_data
 69 |             return None
 70 |         return None
 71 |     else:
 72 |         return None
 73 | 
 74 | 
 75 | def check_section(section_name, elf=None, file_data=None):
 76 |     """
 77 |     Check if a specified ELF section exists in a file.
 78 | 
 79 |     :param section_name: The name of the section from which to extract data.
 80 |     :param elf: elftools.ELFFile object
 81 |     :param file_data: Input file data
 82 | 
 83 |     :return: True if the section name is observed, False if it is not.
 84 |     """
 85 |     if file_data:
 86 |         elf = obtain_elf(file_data)
 87 |     if elf and obtain_section(section_name, elf):
 88 |         return True
 89 |     return False
 90 | 
 91 | 
 92 | def obtain_physical_offset(mem_offset, elf=None, file_data=None):
 93 |     """
 94 |     For an ELF file (in x86), convert a provided memory offset to a raw offset.
 95 | 
 96 |     :param mem_offset: The memory offset to convert to a raw offset
 97 |     :param elf: elftools.ELFFile object
 98 |     :param file_data: Input file data
 99 | 
100 |     :return: Raw offset, or None.
101 |     """
102 |     if file_data:
103 |         elf = obtain_elf(file_data)
104 |     if elf:
105 |         for phy_offset in elf.address_offsets(mem_offset):
106 |             return phy_offset
107 |     return None
108 | 
109 | 
110 | def obtain_memory_offset(phy_offset, elf=None, file_data=None):
111 |     """
112 |     For an ELF file, convert a provided raw offset to a memory offset.
113 | 
114 |     :param phy_offset: The raw offset to convert to a memory offset
115 |     :param elf: elftools.ELFFile object
116 |     :param file_data: Input file data
117 | 
118 |     :return: Memory offset, or None.
119 |     """
120 |     if file_data:
121 |         elf = obtain_elf(file_data)
122 |     if elf:
123 |         for seg in elf.iter_segments():
124 |             if seg['p_offset'] <= phy_offset < (seg['p_offset'] + seg['p_filesz']):
125 |                 return phy_offset - seg['p_offset'] + seg['p_vaddr']
126 |         return None
127 |     else:
128 |         return None
129 | 


--------------------------------------------------------------------------------
/mwcp/core.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import pathlib
  3 | from typing import Union, Type
  4 | 
  5 | import mwcp
  6 | from mwcp.runner import Runner, YaraRunner
  7 | from mwcp.report import Report
  8 | from mwcp.parser import Parser
  9 | from mwcp import config
 10 | from mwcp import metadata
 11 | 
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | def run(
 17 |         parser: Union[str, Type[Parser]] = None,
 18 |         file_path: Union[str, pathlib.Path] = None,
 19 |         data: bytes = None,
 20 |         *,
 21 |         output_directory: Union[str, pathlib.Path] = None,
 22 |         yara_repo: Union[str, pathlib.Path] = None,
 23 |         recursive: bool = True,
 24 |         knowledge_base: dict = None,
 25 |         include_file_data: bool = False,
 26 |         prefix_output_files: bool = True,
 27 |         external_strings_report: bool = False,
 28 |         include_logs: bool = True,
 29 |         log_level: int = None,
 30 |         log_filter: logging.Filter = None,
 31 | ) -> Report:
 32 |     """
 33 |     Runs a specified parser on a given file path or data.
 34 | 
 35 |     :param parser: Name or class of parser to run.
 36 |         Can be excluded to use YARA matching to determine parser.
 37 |         (use ":" notation to specify source if necessary e.g. "acme:Foo")
 38 |     :param file_path: File path to parse
 39 |     :param data: File data to parse
 40 |     :param yara_repo: Path to directory of yara signatures.
 41 |     :param recursive: Whether to recursively match and run parsers for unidentified files.
 42 |         (Only applicable if given a yara_repo to match files to parsers.)
 43 |     :param output_directory:
 44 |         Directory to write out files.
 45 |         If not provided, files will not be written out.
 46 |     :param include_file_data: Whether to include file data in the generated report.
 47 |         If disabled, only metadata such as the file path, description, and md5 will be included.
 48 |     :param prefix_output_files: Whether to include a prefix of the first 5 characters
 49 |         of the md5 on output files. This is to help avoid overwriting multiple
 50 |         output files with the same name.
 51 |     :param external_strings_report: Whether to output reported DecodedString elements into a
 52 |         separate strings report.
 53 |     :param include_logs: Whether to include error and debug logs in the generated report.
 54 |     :param log_level: If including logs, the logging level to be collected.
 55 |         (Defaults to currently set effective log level)
 56 |     :param log_filter: If including logs, this can be used to pass in a custom filter for the logs.
 57 |         Should be a valid argument for logging.Handler.addFilter()
 58 | 
 59 |     :return: mwcp.Report object containing parse results.
 60 |     """
 61 |     if file_path:
 62 |         file_path = str(file_path)
 63 | 
 64 |     report_config = dict(
 65 |         output_directory=output_directory,
 66 |         include_file_data=include_file_data,
 67 |         prefix_output_files=prefix_output_files,
 68 |         external_strings_report=external_strings_report,
 69 |         include_logs=include_logs,
 70 |         log_level=log_level,
 71 |         log_filter=log_filter,
 72 |         knowledge_base=knowledge_base,
 73 |     )
 74 |     if not yara_repo:
 75 |         yara_repo = config.get("YARA_REPO")
 76 | 
 77 |     # Only run YARA runner if repo has been setup and we can benefit from it.
 78 |     if yara_repo and (not parser or recursive):
 79 |         runner = YaraRunner(yara_repo=yara_repo, recursive=recursive, **report_config)
 80 |     elif parser:
 81 |         runner = Runner(**report_config)
 82 |     else:
 83 |         raise ValueError(f"Must provide either a parser to run or a yara_repo for file matching.")
 84 |     return runner.run(parser, file_path=file_path, data=data)
 85 | 
 86 | 
 87 | def schema(id=None) -> dict:
 88 |     """
 89 |     Generates a JSON Schema for a Report object.
 90 |     NOTE: This is the schema for a single report. Depending on how you use MWCP,
 91 |     you may get a list of these reports instead.
 92 |     """
 93 |     if id is None:
 94 |         id = (
 95 |             f"https://raw.githubusercontent.com/dod-cyber-crime-center/DC3-MWCP/"
 96 |             f"{mwcp.__version__}/mwcp/config/schema.json"
 97 |         )
 98 |     schema = {
 99 |         "$schema": "https://json-schema.org/draft/2019-09/schema",
100 |         "$id": id,
101 |     }
102 |     schema.update(metadata.Report.schema())
103 | 
104 |     # "output_text" may also be included if we are running from the server service.
105 |     schema["properties"]["output_text"] = {
106 |         "type": "string",
107 |         "description": "Raw text output from MWCP.",
108 |     }
109 | 
110 |     return schema
111 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_report.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests Report class.
  3 | """
  4 | import logging
  5 | import runpy
  6 | 
  7 | import pytest
  8 | 
  9 | import mwcp
 10 | from mwcp import metadata
 11 | 
 12 | 
 13 | @pytest.fixture
 14 | def filled_report(report, metadata_items):
 15 |     """
 16 |     Provides a report filled with metadata examples seen above.
 17 |     """
 18 |     logger = logging.getLogger("test_report")
 19 |     with report:
 20 |         for item in metadata_items:
 21 |             report.add(item)
 22 | 
 23 |         logger.info("Test info log")
 24 |         logger.error("Test error log")
 25 |         logger.debug("Test debug log")
 26 | 
 27 |         report.add_tag("test", "tagging")
 28 | 
 29 |     return report
 30 | 
 31 | 
 32 | def test_report_dict(datadir, filled_report):
 33 |     expected = runpy.run_path(str(datadir / "report.py"))["report"]
 34 |     assert filled_report.as_dict() == expected
 35 | 
 36 | 
 37 | def test_report_json(datadir, filled_report):
 38 |     expected = (datadir / "report.json").read_text().replace("MWCP_VERSION", mwcp.__version__)
 39 |     actual = filled_report.as_json()
 40 |     print(actual)
 41 |     assert actual == expected
 42 | 
 43 | 
 44 | def test_split_report(datadir):
 45 |     """
 46 |     Tests split metadata per file.
 47 |     """
 48 |     logger = logging.getLogger("test_split_report")
 49 |     logging.root.setLevel(logging.INFO)
 50 |     input_file = mwcp.FileObject(b"some data", file_path="C:/input_file.bin")
 51 |     report = mwcp.Report(input_file, "FooParser", log_level=logging.INFO)
 52 |     with report:
 53 |         logger.info("Info log in input_file.bin")
 54 |         logger.error("Error log in input_file.bin")
 55 |         report.add(metadata.Mutex("root_mutex"))
 56 | 
 57 |         sub_file = mwcp.FileObject(b"some other data", file_name="sub_file.exe")
 58 |         report.add(metadata.File.from_file_object(sub_file))
 59 |         report.set_file(sub_file)
 60 |         logger.info("Info log in sub_file.exe")
 61 |         logger.error("Error log in sub_file.exe")
 62 |         report.add(metadata.Mutex("sub_mutex"))
 63 | 
 64 |         report.add_tag("test", "tagging")
 65 | 
 66 |     assert len(report.get()) == 3
 67 | 
 68 |     root_metadata = report.get(source=input_file.md5)
 69 |     assert len(root_metadata) == 2
 70 |     assert metadata.Mutex("root_mutex") in root_metadata
 71 | 
 72 |     sub_metadata = report.get(source=sub_file.md5)
 73 |     assert len(sub_metadata) == 1
 74 |     assert metadata.Mutex("sub_mutex") in sub_metadata
 75 | 
 76 |     assert report.logs == [
 77 |         "[+] Info log in input_file.bin",
 78 |         "[!] Error log in input_file.bin",
 79 |         "[+] Info log in sub_file.exe",
 80 |         "[!] Error log in sub_file.exe",
 81 |     ]
 82 |     assert report.errors == [
 83 |         "[!] Error log in input_file.bin",
 84 |         "[!] Error log in sub_file.exe",
 85 |     ]
 86 |     assert report.get_logs(sub_file) == [
 87 |         "[+] Info log in sub_file.exe",
 88 |         "[!] Error log in sub_file.exe",
 89 |     ]
 90 |     assert report.get_logs(sub_file, errors_only=True) == [
 91 |         "[!] Error log in sub_file.exe",
 92 |     ]
 93 | 
 94 |     expected = runpy.run_path(str(datadir / "split_report.py"))["split_report"]
 95 |     assert report.as_list() == expected
 96 | 
 97 | 
 98 | def test_finalized(report):
 99 |     """
100 |     Tests that we can't add metadata after it is finalized.
101 |     """
102 |     with report:
103 |         report.add(metadata.URL("example1.com"))
104 |     with pytest.raises(RuntimeError):
105 |         report.add(metadata.URL("example2.com"))
106 | 
107 | 
108 | def test_deduplication(report):
109 |     """
110 |     Tests that the same metadata information is dedupped.
111 |     """
112 |     with report:
113 |         report.add(metadata.URL("example.com"))
114 |         report.add(metadata.URL("example.com"))
115 |         report.add(metadata.Socket(address="example.com"))
116 |         report.add(metadata.Socket(address="example.com"))  # equivalent more verbose version.
117 |         report.add(metadata.C2Address(address="example.com"))
118 | 
119 |         # Set new file source to ensure we dedup across sources (if not split)
120 |         res_file = mwcp.FileObject(b"residual data", file_name="res.exe")
121 |         report.set_file(res_file)
122 |         report.add(metadata.URL("example.com"))
123 |         report.add(metadata.Socket(address="example.com"))
124 | 
125 |     items = report.get()
126 |     assert items == [
127 |         metadata.URL("example.com"),
128 |         metadata.Network(url=metadata.URL2(url='example.com'), socket=metadata.Socket(address='example.com')),
129 |         metadata.Socket(address="example.com"),
130 |         metadata.C2Address(address="example.com"),
131 |     ]
132 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_report_writer/report_wordwrap.html:
--------------------------------------------------------------------------------
  1 | <h1>File: input_file.bin</h1>
  2 | <table>
  3 | <thead>
  4 | <tr><th>Field       </th><th>Value                                                           </th></tr>
  5 | </thead>
  6 | <tbody>
  7 | <tr><td>Parser      </td><td>FooParser                                                       </td></tr>
  8 | <tr><td>File Path   </td><td>C:/input_file.bin                                               </td></tr>
  9 | <tr><td>Description </td><td>SuperMalware Implant                                            </td></tr>
 10 | <tr><td>Architecture</td><td>                                                                </td></tr>
 11 | <tr><td>MD5         </td><td>1e50210a0202497fb79bc38b6ade6c34                                </td></tr>
 12 | <tr><td>SHA1        </td><td>baf34551fecb48acc3da868eb85e1b6dac9de356                        </td></tr>
 13 | <tr><td>SHA256      </td><td>1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee</td></tr>
 14 | <tr><td>Compile Time</td><td>                                                                </td></tr>
 15 | </tbody>
 16 | </table>
 17 | 
 18 | <h2>RSA Private Key</h2>
 19 | <table>
 20 | <thead>
 21 | <tr><th>Value  </th></tr>
 22 | </thead>
 23 | <tbody>
 24 | <tr><td><pre>Modulus (n):
 25 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 26 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 27 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 28 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 29 |     f3:b3
 30 | Public Exponent (e):
 31 |     1234 (0x4d2)
 32 | Private Exponent (d):
 33 |     1234 (0x4d2)
 34 | p:
 35 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 36 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 37 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 38 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 39 |     f3:b3
 40 | q:
 41 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 42 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 43 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 44 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 45 |     f3:b3
 46 | d mod (p-1):
 47 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 48 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 49 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 50 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 51 |     f3:b3
 52 | d mod (q-1):
 53 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 54 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 55 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 56 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 57 |     f3:b3
 58 | (inverse of q) mod p:
 59 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 60 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 61 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 62 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 63 |     f3:b3
 64 | </pre>        </td></tr>
 65 | </tbody>
 66 | </table>
 67 | 
 68 | <h2>RSA Public Key</h2>
 69 | <table>
 70 | <thead>
 71 | <tr><th>Value  </th></tr>
 72 | </thead>
 73 | <tbody>
 74 | <tr><td><pre>Modulus (n):
 75 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 76 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 77 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 78 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 79 |     f3:b3
 80 | Public Exponent (e):
 81 |     1234 (0x4d2)
 82 | </pre>        </td></tr>
 83 | </tbody>
 84 | </table>
 85 | 
 86 | <h2>User Agent</h2>
 87 | <table>
 88 | <thead>
 89 | <tr><th>Value  </th></tr>
 90 | </thead>
 91 | <tbody>
 92 | <tr><td><pre>This is a really large user agent that will need to be word wrapped.This is a really large user
 93 | agent that will need to be word wrapped.This is a really large user agent that will need to be word
 94 | wrapped.This is a really large user agent that will need to be word wrapped.This is a really large
 95 | user agent that will need to be word wrapped.This is a really large user agent that will need to be
 96 | word wrapped.This is a really large user agent that will need to be word wrapped.This is a really
 97 | large user agent that will need to be word wrapped.This is a really large user agent that will need
 98 | to be word wrapped.This is a really large user agent that will need to be word wrapped.This is a
 99 | really large user agent that will need to be word wrapped.This is a really large user agent that
100 | will need to be word wrapped.This is a really large user agent that will need to be word
101 | wrapped.This is a really large user agent that will need to be word wrapped.This is a really large
102 | user agent that will need to be word wrapped.This is a really large user agent that will need to be
103 | word wrapped.</pre>        </td></tr>
104 | </tbody>
105 | </table>
106 | 
107 | <h1>File Tree</h1>
108 | <pre>
109 | &lt;input_file.bin (1e50210a0202497fb79bc38b6ade6c34) : SuperMalware Implant&gt;
110 | </pre>
111 | 
112 | 


--------------------------------------------------------------------------------
/mwcp/config/__init__.py:
--------------------------------------------------------------------------------
  1 | """Stores default configuration values."""
  2 | 
  3 | import logging
  4 | import os
  5 | import pathlib
  6 | import pkg_resources
  7 | 
  8 | import appdirs
  9 | from ruamel.yaml import YAML
 10 | from ruamel.yaml.scanner import ScannerError
 11 | 
 12 | from mwcp.exceptions import ConfigError
 13 | 
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | yaml = YAML()
 17 | 
 18 | 
 19 | class Config(dict):
 20 | 
 21 |     CONFIG_FILE_NAME = "config.yml"
 22 |     USER_CONFIG_DIR = pathlib.Path(appdirs.user_config_dir("mwcp"))
 23 | 
 24 |     # Fields which contain a file or directory path.
 25 |     PATH_FIELDS = ["LOG_CONFIG_PATH", "TESTCASE_DIR", "MALWARE_REPO", "PARSER_DIR", "PARSER_CONFIG_PATH", "YARA_REPO"]
 26 |     TESTING_FIELDS = ["TESTCASE_DIR", "MALWARE_REPO"]
 27 | 
 28 |     def __init__(self, **kwargs):
 29 |         super().__init__(**kwargs)
 30 |         # We are going to manually add the fields.json path because
 31 |         # the fields.json file is not currently designed to be modified.
 32 |         self["FIELDS_PATH"] = os.path.abspath(pkg_resources.resource_filename("mwcp.config", "fields.json"))
 33 | 
 34 |     def __repr__(self):
 35 |         return f"Config({super().__repr__()})"
 36 | 
 37 |     def clear(self):
 38 |         """Clears config (and re-adds FIELDS_PATH)"""
 39 |         super().clear()
 40 |         self.__init__()
 41 | 
 42 |     @property
 43 |     def user_config_dir(self) -> pathlib.Path:
 44 |         cfg_dir = self.USER_CONFIG_DIR
 45 |         cfg_dir.mkdir(parents=True, exist_ok=True)
 46 |         return cfg_dir
 47 | 
 48 |     @property
 49 |     def user_path(self) -> pathlib.Path:
 50 |         """Returns the path to the user config file."""
 51 |         # Get user directory.
 52 |         cfg_dir = self.user_config_dir
 53 | 
 54 |         # Create a user copy if it doesn't exist.
 55 |         cfg_file_path = cfg_dir / self.CONFIG_FILE_NAME
 56 |         if not cfg_file_path.exists():
 57 |             with pkg_resources.resource_stream("mwcp.config", self.CONFIG_FILE_NAME) as default_cfg:
 58 |                 with open(cfg_file_path, "wb") as fp:
 59 |                     fp.write(default_cfg.read())
 60 | 
 61 |         # Also copy over log_config.yml
 62 |         log_config_path = cfg_dir / "log_config.yml"
 63 |         if not log_config_path.exists():
 64 |             with pkg_resources.resource_stream("mwcp.config", "log_config.yml") as default_log_cfg:
 65 |                 with open(log_config_path, "wb") as fp:
 66 |                     fp.write(default_log_cfg.read())
 67 | 
 68 |         return cfg_file_path
 69 | 
 70 |     @property
 71 |     def pytest_cache_dir(self) -> pathlib.Path:
 72 |         return self.user_config_dir / ".pytest_cache"
 73 | 
 74 |     def load(self, file_path=None, production=False):
 75 |         """
 76 |         Loads configuration file.
 77 | 
 78 |         :param file_path: Path to configuration file. (defaults to `config.yml` in user config directory)
 79 |         :param production: Whether we are loading configuration for a production server.
 80 |             In this mode, the fields for testing (MALWARE_REPO, TESTCASE_DIR) are ignored.
 81 |         """
 82 |         if not file_path:
 83 |             file_path = self.user_path
 84 | 
 85 |         # Convert str file_path to maintain backwards compatibility with previous function definition
 86 |         if isinstance(file_path, str):
 87 |             file_path = pathlib.Path(file_path)
 88 | 
 89 |         with open(file_path, "r") as fp:
 90 |             try:
 91 |                 config = dict(yaml.load(fp))
 92 |             except ScannerError as e:
 93 |                 raise ConfigError(f"Error parsing config: {e}")
 94 | 
 95 |         # Remove testing fields if in production.
 96 |         # This lets us continue using the same configuration as in development without exposing testing parameters.
 97 |         if production:
 98 |             for key in self.TESTING_FIELDS:
 99 |                 config.pop(key, None)
100 | 
101 |         # Convert file path into absolute paths.
102 |         directory = str(file_path.parent)
103 |         for key, value in config.items():
104 |             if key in self.PATH_FIELDS:
105 |                 value = os.path.expanduser(value)
106 |                 value = os.path.expandvars(value)
107 |                 value = os.path.join(directory, value)
108 |                 value = os.path.abspath(value)
109 |                 config[key] = value
110 |         self.update(config)
111 |         self.validate()
112 | 
113 |     def validate(self):
114 |         """
115 |         Validates configuration.
116 | 
117 |         :raises ConfigError: If there is an issue with the configuration.
118 |         """
119 |         for key, value in self.items():
120 |             if key in self.PATH_FIELDS:
121 |                 if not pathlib.Path(value).exists():
122 |                     raise ConfigError(f"Invalid path for {key}: {value}")
123 | 
124 | 
125 | _config = Config()
126 | 


--------------------------------------------------------------------------------
/mwcp/tests/test_issues.py:
--------------------------------------------------------------------------------
  1 | """Tests for found bugs/issues."""
  2 | 
  3 | import csv
  4 | import io
  5 | import sys
  6 | 
  7 | from click.testing import CliRunner
  8 | 
  9 | import mwcp
 10 | from mwcp import cli, metadata
 11 | 
 12 | 
 13 | def test_csv_row_bug_legacy(tmp_path, test_dir):
 14 |     """
 15 |     Tests bug where first row is formatted different from other rows.
 16 |     Occurs when outputting csv and input file is a directory.
 17 |     """
 18 |     runner = CliRunner(mix_stderr=False)
 19 | 
 20 |     with runner.isolated_filesystem(tmp_path):
 21 | 
 22 |         ret = runner.invoke(cli.main, [
 23 |             "parse", "foo",
 24 |             "--format", "csv", str(test_dir / "*"),
 25 |             "--no-output-files",
 26 |             "--legacy",
 27 |         ])
 28 |         print(ret.stdout)
 29 |         print(ret.stderr, file=sys.stderr)
 30 |         assert ret.exit_code == 0
 31 | 
 32 |         reader = csv.reader(io.StringIO(ret.stdout))
 33 |         rows = list(reader)
 34 |         assert len(rows) == len(test_dir.listdir()) + 1
 35 |         assert rows[0] == ["scan_date", "inputfilename", "outputfile.name",
 36 |                            "outputfile.description", "outputfile.md5", "address", "debug", "url"]
 37 |         for i, row in enumerate(rows[1:]):
 38 |             assert row[0] and row[1]
 39 |             # Test entries except the timestamp and full file path.
 40 |             # NOTE: order is not guaranteed due to glob pattern, therefore we are testing all but
 41 |             #   the debug message which contains the input filename.
 42 |             assert row[2] == "fooconfigtest.txt"
 43 |             assert row[3] == "example output file"
 44 |             assert row[4] == "5eb63bbbe01eeed093cb22bb8f5acdc3"
 45 |             # TODO: Figure out how to guarantee file order.
 46 |             # assert row[2:] == [
 47 |             #     "fooconfigtest.txt",
 48 |             #     "example output file",
 49 |             #     "5eb63bbbe01eeed093cb22bb8f5acdc3",
 50 |             #     "127.0.0.1",
 51 |             #     ("[+] File test_{0}.txt identified as Foo.\n"
 52 |             #     "[+] size of inputfile is 23 bytes\n"
 53 |             #     "[+] operating on inputfile test_{0}.txt").format(i),
 54 |             #     "http://127.0.0.1",
 55 |             # ]
 56 | 
 57 | def test_missing_residual_file_with_UnableToParse(tmpdir, make_sample_parser):
 58 |     """
 59 |     Tests bug where residual file isn't reported if a nested parser raises an UnableToParse error on it and
 60 |     no other parser picks it up.
 61 | 
 62 |     Also tests to ensure misidentified file's description gets reset.
 63 |     """
 64 |     # language=Python
 65 |     CODE = """
 66 | from mwcp import FileObject, Parser, UnableToParse
 67 | 
 68 | 
 69 | class Carrier(Parser):
 70 |     DESCRIPTION = "TestParser Carrier"
 71 |     
 72 |     @classmethod
 73 |     def identify(cls, file_object):
 74 |         return file_object.name == "carrier.txt"
 75 |         
 76 |     def run(self):
 77 |         self.logger.info("in Carrier parser")
 78 |         self.dispatcher.add(FileObject(b"I'm a downloader", file_name="downloader.txt"))
 79 | 
 80 | 
 81 | class Downloader(Parser):
 82 |     DESCRIPTION = "TestParser Downloader"
 83 |     
 84 |     @classmethod
 85 |     def identify(cls, file_object):
 86 |         return file_object.name == "downloader.txt"
 87 |         
 88 |     def run(self):
 89 |         self.logger.info("in Downloader parser")
 90 |         self.dispatcher.add(FileObject(b"I'm a false implant", file_name="implant.txt"))
 91 |         self.dispatcher.add(FileObject(b"I'm something else that doesn't get identified.", file_name="other.txt"))
 92 | 
 93 | 
 94 | class Implant(Parser):
 95 |     DESCRIPTION = "TestParser Implant"
 96 |     
 97 |     @classmethod
 98 |     def identify(cls, file_object):
 99 |         return file_object.name == "implant.txt"
100 | 
101 |     def run(self):
102 |         self.logger.info("in Implant parser")
103 |         raise UnableToParse("Oops, misidentified.")
104 | """
105 |     # language=Yaml
106 |     CONFIG = """
107 | RootParser:
108 |     description: root parser
109 |     parsers:
110 |         - SubParser
111 |     
112 | SubParser:
113 |     description: sub parser
114 |     parsers:
115 |         - .Carrier
116 |         - .Downloader
117 |         - .Implant
118 | """
119 |     parser_path, config_file = make_sample_parser(parser_name="SubParser", parser_code=CODE, config_text=CONFIG)
120 |     mwcp.register_parser_directory(str(parser_path.dirname), config_file_path=str(config_file), source_name="ACME")
121 | 
122 |     input_file = tmpdir / "carrier.txt"
123 |     input_file.write_binary(b"I'm a carrier")
124 |     output_directory = tmpdir / "output"
125 |     output_directory.mkdir()
126 | 
127 |     report = mwcp.run("RootParser", file_path=str(input_file), output_directory=output_directory)
128 |     print(report.as_text())
129 |     print(report.as_json())
130 | 
131 |     residual_files = report.get(metadata.File)
132 |     assert len(residual_files) == 3
133 |     assert residual_files[1].name == "implant.txt"
134 |     assert residual_files[1].description == "Unidentified file"
135 |     assert residual_files[1].file_path == str(output_directory / "3e245_implant.txt")
136 | 


--------------------------------------------------------------------------------
/mwcp/parsers/VisualBasic.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Visual Basic
  3 | """
  4 | 
  5 | import pathlib
  6 | import string
  7 | 
  8 | from mwcp import Parser, FileObject
  9 | 
 10 | 
 11 | def istext(s, threshold=0.30):
 12 |     """
 13 |     Check if input string s is ASCII text.
 14 |     www.safaribooksonline.com/library/view/python-cookbook-2nd/0596007973/ch01s12.html
 15 | 
 16 |     :param s: input string
 17 |     :param threshold: percentage of characters allowed to have the high bit set (0 - 1)
 18 | 
 19 |     :return: bool
 20 |     """
 21 |     text_characters = string.printable.encode()
 22 |     null_trans = bytes.maketrans(b"", b"")
 23 |     if not s or b"\0" in s:
 24 |         return False
 25 | 
 26 |     # Get the substring of s made up of non-text characters
 27 |     t = s.translate(null_trans, text_characters)
 28 |     # s is 'text' if less than "threshold" of its characters are non-text
 29 |     return (len(t) / len(s)) <= threshold
 30 | 
 31 | 
 32 | class VBScript(Parser):
 33 |     """
 34 |     Identifies a VBS script.
 35 |     """
 36 |     DESCRIPTION = "VBScript"
 37 | 
 38 |     VB_KEYWORDS = [b"dim ", b"sub ", b"end sub", b"end function", b"createobject("]
 39 | 
 40 |     @classmethod
 41 |     def identify(cls, file_object):
 42 |         """
 43 |         Identify VB code based on the existence of specific VBS keywords.
 44 | 
 45 |         :param file_object: dispatcher.FileObject object
 46 | 
 47 |         :return: bool
 48 |         """
 49 |         lower_cased = file_object.data.lower()
 50 |         return istext(lower_cased) and any(keyword in lower_cased for keyword in cls.VB_KEYWORDS)
 51 | 
 52 | 
 53 | class VBE(Parser):
 54 |     """
 55 |     Finds and extracts VBE encoded VBSscript from file.
 56 |     """
 57 |     DESCRIPTION = "Encoded VBScript"
 58 | 
 59 |     START_TAG = b"#@~^"
 60 |     END_TAG = b"==^#~@"
 61 | 
 62 |     WHICH = "1231232332321323132311233213233211323231311231321323112331123132"
 63 | 
 64 |     @classmethod
 65 |     def identify(cls, file_object):
 66 |         """
 67 |         Check file magic to validate file contains a VBE
 68 |         (not just checking first bytes because we could be an ASP file)
 69 | 
 70 |         :param dispatcher.FileObject file_object: Input file
 71 | 
 72 |         :return bool: If parameters are met
 73 |         """
 74 |         return (
 75 |             cls.START_TAG in file_object.data
 76 |             and cls.END_TAG in file_object.data
 77 |             # Start tag should be found somewhere in the beginning of file.
 78 |             # May not be immediately in the beginning if script is in an ASP.
 79 |             and file_object.data.index(cls.START_TAG) in range(60)
 80 |         )
 81 | 
 82 |     def _generate_alphabet(self):
 83 |         alphabets = [chr(i) * 3 for i in range(128)]
 84 |         alphabets[32:128] = [
 85 |             '.-2', 'Gu0', 'zR!', 'V`)', 'Bq[', 'j^8', '/I3', '&\\=', 'IbX', 'A}:', '4)5', '26e',
 86 |             '[ 9', 'v|\\', 'rzV', 'C\x7fs', '8kf', '9cN', 'p3E', 'E+k', 'hhb', 'qQY', 'Ofx',
 87 |             '\tv^', 'b1}', 'DdJ', '#Tm', 'uCq', '<<<', '~:`', '>>>', '^~S', '@@@', 'wEB', 'J,\'',
 88 |             'a*H', ']tr', '"\'u', 'K71', 'oD7', 'NyM', ';YR', 'L/"', 'PoT', 'g&j', '*rG', '}jd',
 89 |             't9-', 'T{ ', '+?\x7f', '-8.', ',wL', '0g]', 'nS~', 'kGl', 'f4o', '5xy', '%]t', '!0C',
 90 |             'd#&', 'MZv', 'R[%', 'cl$', '?H+', '{U(', 'xp#', ')iA', '(.4', 'sL\t', 'Y!*', '3$D',
 91 |             '\x7fN?', 'mPw', 'U\t;', 'SVU', '|si', ':5a', '_ac', 'eKP', 'FXg', 'X;Q', '1WI',
 92 |             'i"O', 'lmF', 'ZMh', 'H%|', '\'(6', '\\Fp', '=Jn', '$2z', 'yA/', '7=_', '`_K', 'QOZ',
 93 |             ' B,', '6eW'
 94 |         ]
 95 |         alphabets[9] = 'Wn{'
 96 |         return alphabets
 97 | 
 98 |     def decode_vbe(self) -> str:
 99 |         """
100 |         Decodes and returns embedded VBE script.
101 |         """
102 |         data = self.file_object.data
103 | 
104 |         # Extract vbe code part.
105 |         start = data.index(self.START_TAG) + len(self.START_TAG) + 8
106 |         end = data.index(self.END_TAG) - 6
107 |         enc_code = data[start:end].decode("utf-8")
108 | 
109 |         # Perform replacements.
110 |         enc_code = enc_code.replace('@&', '\x0a')
111 |         enc_code = enc_code.replace('@#', '\x0d')
112 |         enc_code = enc_code.replace('@*', '>')
113 |         enc_code = enc_code.replace('@!', '<')
114 |         enc_code = enc_code.replace('@$', '@')
115 | 
116 |         # Create the replacement alphabets and decode the script
117 |         dec_code = []
118 |         alphabets = self._generate_alphabet()
119 |         for i, vbe_datum in enumerate(enc_code):
120 |             vbe_datum_ord = ord(vbe_datum)
121 |             if vbe_datum_ord < 128:
122 |                 dec_code.append(alphabets[vbe_datum_ord][int(self.WHICH[i % 64]) - 1])
123 |             else:
124 |                 dec_code.append(vbe_datum)
125 |         dec_code = "".join(dec_code)
126 | 
127 |         return dec_code
128 | 
129 |     def run(self):
130 |         vbe = self.decode_vbe()
131 |         dec_data = vbe.encode("utf8")
132 |         # Base filename off original if entire file is encoded piece.
133 |         if self.file_object.data.startswith(self.START_TAG):
134 |             stem = pathlib.Path(self.file_object.name).stem
135 |             self.dispatcher.add(FileObject(dec_data, file_name=f"{stem}.vb"))
136 |         else:
137 |             self.dispatcher.add(FileObject(dec_data, ext=".vb"))
138 | 
139 | 
140 | class EncodedASP(VBE):
141 |     """
142 |     Identifies ASP file with VBE.
143 |     """
144 |     DESCRIPTION = "ASP with Encoded VBScript"
145 | 
146 |     START_TAG = b"<%" + VBE.START_TAG
147 | 


--------------------------------------------------------------------------------
/mwcp/utils/construct/windows_enums.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A central location to store common windows enumerations.
  3 | This module will be imported along with 'from mwcp.utils import construct'
  4 | """
  5 | 
  6 | from .core import *
  7 | 
  8 | # Visible interface. Add the classes and functions you would like to be available for users of construct
  9 | # library here.
 10 | __all__ = ['RegHive', 'LanguageIdentifier', 'KnownFolderID', 'AlgorithmID']
 11 | 
 12 | 
 13 | REGHIVES = {
 14 |     "HKCR": 0x80000000,
 15 |     "HKCU": 0x80000001,
 16 |     "HKLM": 0x80000002,
 17 |     "HKU":  0x80000003,
 18 |     "HKPD": 0x80000004,
 19 |     "HKCC": 0x80000005,
 20 |     "HKDD": 0x80000006,
 21 | }
 22 | 
 23 | 
 24 | def RegHive(subcon):
 25 |     r"""
 26 |     Converts an integer to registry hive enum.
 27 | 
 28 |     >>> RegHive(Int32ul).build("HKCU")
 29 |     b'\x01\x00\x00\x80'
 30 |     >>> str(RegHive(Int32ul).parse(b'\x01\x00\x00\x80'))
 31 |     'HKCU'
 32 |     """
 33 |     return Enum(subcon, **REGHIVES)
 34 | 
 35 | 
 36 | # TODO: Extend dictionary to incorporate more languages
 37 | LANGUAGEIDENTIFIERS = {
 38 |     "English (United States)": 0x409,
 39 |     "Korean": 0x412,
 40 |     "Chinese (PRC)": 0x804,
 41 | }
 42 | 
 43 | 
 44 | def LanguageIdentifier(subcon):
 45 |     r"""
 46 |     Converts an integer to language identifer enum
 47 | 
 48 |     >>> LanguageIdentifier(Int32ul).build("English (United States)")
 49 |     b'\t\x04\x00\x00'
 50 |     >>> str(LanguageIdentifier(Int32ul).parse(b"\x04\x08\x00\x00"))
 51 |     'Chinese (PRC)'
 52 |     """
 53 |     return Enum(subcon, **LANGUAGEIDENTIFIERS)
 54 | 
 55 | 
 56 | CSIDL = {
 57 |     'CSIDL_SYSTEM': 37,
 58 |     'CSIDL_COMMON_PROGRAMS': 23,
 59 |     'CSIDL_PROFILE': 40,
 60 |     'CSIDL_ALTSTARTUP': 29,
 61 |     'CSIDL_LOCAL_APPDATA': 28,
 62 |     'CSIDL_PRINTHOOD': 27,
 63 |     'CSIDL_FONTS': 20,
 64 |     'CSIDL_PROGRAM_FILES_COMMON': 43,
 65 |     'CSIDL_PROGRAM_FILESX86': 42,
 66 |     'CSIDL_MYDOCUMENTS': 5,
 67 |     'CSIDL_MYVIDEO': 14,
 68 |     'CSIDL_PROGRAM_FILES': 38,
 69 |     'CSIDL_ADMINTOOLS': 48,
 70 |     'CSIDL_COMMON_DOCUMENTS': 46,
 71 |     'CSIDL_CONNECTIONS': 49,
 72 |     'CSIDL_COMMON_ALTSTARTUP': 30,
 73 |     'CSIDL_DRIVES': 17,
 74 |     'CSIDL_RESOURCES_LOCALIZED': 57,
 75 |     'CSIDL_HISTORY': 34,
 76 |     'CSIDL_NETHOOD': 19,
 77 |     'CSIDL_CDBURN_AREA': 59,
 78 |     'CSIDL_COMMON_DESKTOPDIRECTORY': 25,
 79 |     'CSIDL_SYSTEMX86': 41,
 80 |     'CSIDL_COMMON_TEMPLATES': 45,
 81 |     'CSIDL_MYPICTURES': 39,
 82 |     'CSIDL_COMMON_VIDEO': 55,
 83 |     'CSIDL_COMMON_STARTMENU': 22,
 84 |     'CSIDL_COMMON_FAVORITES': 31,
 85 |     'CSIDL_INTERNET_CACHE': 32,
 86 |     'CSIDL_WINDOWS': 36,
 87 |     'CSIDL_COMMON_PICTURES': 54,
 88 |     'CSIDL_COMMON_APPDATA': 35,
 89 |     'CSIDL_DESKTOPDIRECTORY': 16,
 90 |     'CSIDL_RESOURCES': 56,
 91 |     'CSIDL_COMMON_MUSIC': 53,
 92 |     'CSIDL_COMMON_OEM_LINKS': 58,
 93 |     'CSIDL_NETWORK': 18,
 94 |     'CSIDL_COOKIES': 33,
 95 |     'CSIDL_COMPUTERSNEARME': 61,
 96 |     'CSIDL_COMMON_ADMINTOOLS': 47,
 97 |     'CSIDL_APPDATA': 26,
 98 |     'CSIDL_TEMPLATES': 21,
 99 |     'CSIDL_COMMON_STARTUP': 24,
100 |     'CSIDL_MYMUSIC': 13,
101 |     'CSIDL_PROGRAM_FILES_COMMONX86': 44
102 | }
103 | 
104 | 
105 | def KnownFolderID(subcon):
106 |     r"""
107 |     Converts an integer to a CSIDL (KNownFolderID) value
108 | 
109 |     >>> KnownFolderID(Int32ul).build("CSIDL_SYSTEM")
110 |     b'%\x00\x00\x00'
111 |     >>> str(KnownFolderID(Int32ul).parse(b"\x18\x00\x00\x00"))
112 |     'CSIDL_COMMON_STARTUP'
113 |     """
114 |     return Enum(subcon, **CSIDL)
115 | 
116 | 
117 | ALGIDS = {
118 |     'CALG_DSS_SIGN': 0x00002200,
119 |     'CALG_DES': 0x00006601,
120 |     'CALG_DH_EPHEM': 0x0000aa02,
121 |     'CALG_3DES': 0x00006603,
122 |     'CALG_DESX': 0x00006604,
123 |     'CALG_ECDH': 0x0000aa05,
124 |     'CALG_NO_SIGN': 0x00002000,
125 |     'CALG_DH_SF': 0x0000aa01,
126 |     'CALC_SSL3_SHAMD5': 0x00008008,
127 |     'CALG_3DES_112': 0x00006609,
128 |     'CALG_SKIPJACK': 0x0000660a,
129 |     'CALG_HASH_REPLACE_OWF': 0x0000800b,
130 |     'CALG_CYLINK_MEK': 0x0000660c,
131 |     'CALG_MD4': 0x00008002,
132 |     'CALG_AES_128': 0x0000660e,
133 |     'CALG_AES_192': 0x0000660f,
134 |     'CALG_AES_256': 0x00006610,
135 |     'CALG_AES': 0x00006611,
136 |     'CALG_AGREEDKEY_ANY': 0x0000aa03,
137 |     'CALG_SHA1': 0x00008004,
138 |     'CALG_MAC': 0x00008005,
139 |     'CALG_MD2': 0x00008001,
140 |     'CALG_TLS1_MASTER': 0x00004c06,
141 |     'CALG_RSA_SIGN': 0x00002400,
142 |     'CALG_SCHANNEL_ENC_KEY': 0x00004c07,
143 |     'CALG_HMAC': 0x00008009,
144 |     'CALG_TLS1PRF': 0x0000800a,
145 |     'CALG_TEK': 0x0000660b,
146 |     'CALG_SHA_256': 0x0000800c,
147 |     'CALG_SHA_384': 0x0000800d,
148 |     'CALG_SHA_512': 0x0000800e,
149 |     'CALG_HUGHES_MD5': 0x0000a003,
150 |     'CALG_RC4': 0x00006801,
151 |     'CALG_ECDSA': 0x00002203,
152 |     'CALG_RC2': 0x00006602,
153 |     'CALG_SEAL': 0x00006802,
154 |     'CALG_SSL3_MASTER': 0x00004c01,
155 |     'CALG_SCHANNEL_MASTER_HASH': 0x00004c02,
156 |     'CALG_MD5': 0x00008003,
157 |     'CALG_SCHANNEL_MAC_KEY': 0x00004c03,
158 |     'CALG_KEY_KEYX': 0x0000aa04,
159 |     'CALG_ECMQV': 0x0000a001,
160 |     'CALG_PCT1_MASTER': 0x00004c04,
161 |     'CALG_RSA_KEYX': 0x0000a400,
162 |     'CALG_OID_INFO_CNG_ONLY': 0xffffffff,
163 |     'CALG_SSL2_MASTER': 0x00004c05,
164 |     'CALG_OID_INFO_PARAMETERS': 0xfffffffe,
165 | }
166 | 
167 | 
168 | def AlgorithmID(subcon):
169 |     r"""
170 |     Converts an integer to an AlgorithmID value
171 | 
172 |     >>> str(AlgorithmID(Int16ul).parse(b"\x00\xa4"))
173 |     'CALG_RSA_KEYX'
174 |     >>> AlgorithmID(Int16ul).build("CALG_RC4")
175 |     b'\x01h'
176 |     """
177 |     return Enum(subcon, **ALGIDS)


--------------------------------------------------------------------------------
/mwcp/tests/test_legacy_reporter.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | """
  3 | Tests the legacy features of mwcp.Reporter object.
  4 | 
  5 | These features are now replaced by test_report.py and test_runner.py
  6 | """
  7 | 
  8 | import os
  9 | 
 10 | import pytest
 11 | 
 12 | import mwcp
 13 | 
 14 | 
 15 | @pytest.mark.parametrize('key,value,expected', [
 16 |     ('filepath', br'C:\dir\file.txt', {
 17 |         'filepath': [r'C:\dir\file.txt'],
 18 |         'filename': ['file.txt'],
 19 |         'directory': [r'C:\dir']
 20 |     }),
 21 |     ('servicedll', br'C:\Windows\Temp\1.tmp', {
 22 |         'servicedll': [r'C:\Windows\Temp\1.tmp'],
 23 |         'filepath': [r'C:\Windows\Temp\1.tmp'],
 24 |         'filename': ['1.tmp'],
 25 |         'directory': [r'C:\Windows\Temp']
 26 |     }),
 27 |     ('c2_url', b'http://[fe80::20c:1234:5678:9abc]:80/badness', {
 28 |         'c2_url': ['http://[fe80::20c:1234:5678:9abc]:80/badness'],
 29 |         'url': ['http://[fe80::20c:1234:5678:9abc]:80/badness'],
 30 |         'urlpath': ['/badness'],
 31 |         'c2_socketaddress': [['fe80::20c:1234:5678:9abc', '80', '']],
 32 |         'socketaddress': [['fe80::20c:1234:5678:9abc', '80', '']],
 33 |         'c2_address': ['fe80::20c:1234:5678:9abc'],
 34 |         'address': ['fe80::20c:1234:5678:9abc'],
 35 |         'port': [['80', '']]
 36 |     }),
 37 |     ('url', b'http://127.0.0.1/really/bad?hostname=pwned', {
 38 |         'url': ['http://127.0.0.1/really/bad?hostname=pwned'],
 39 |         'urlpath': ['/really/bad'],
 40 |         'address': ['127.0.0.1']
 41 |     }),
 42 |     ('proxy', (b'admin', b'pass', b'192.168.1.1', b'80', 'tcp'), {
 43 |         'proxy': [['admin', 'pass', '192.168.1.1', '80', 'tcp']],
 44 |         'proxy_socketaddress': [['192.168.1.1', '80', 'tcp']],
 45 |         'socketaddress': [['192.168.1.1', '80', 'tcp']],
 46 |         'proxy_address': ['192.168.1.1'],
 47 |         'address': ['192.168.1.1'],
 48 |         'port': [['80', 'tcp']],
 49 |         'credential': [['admin', 'pass']],
 50 |         'password': ['pass'],
 51 |         'username': ['admin']
 52 |     }),
 53 |     ('rsa_private_key', ('0x7', '0xbb', '0x17', '0x11', '0xb', '0x7', '0x3', '0xe'), {
 54 |         'rsa_private_key': [['0x7', '0xbb', '0x17', '0x11', '0xb', '0x7', '0x3', '0xe']]
 55 |     }),
 56 |     # Test auto padding.
 57 |     ('rsa_private_key', ('0x7', '0xbb', '0x17', '0x11', '0xb'), {
 58 |         'rsa_private_key': [['0x7', '0xbb', '0x17', '0x11', '0xb', '', '', '']]
 59 |     }),
 60 |     ('other', {b'foo': b'bar', 'biz': 'baz'}, {
 61 |         'other': {
 62 |             'foo': 'bar',
 63 |             'biz': 'baz'
 64 |         }
 65 |     })
 66 | ])
 67 | def test_add_metadata(key, value, expected):
 68 |     report = mwcp.Report()
 69 |     with report:
 70 |         report.add_metadata(key, value)
 71 |     assert report.metadata == expected
 72 | 
 73 | 
 74 | def test_other_add_metadata():
 75 |     """Tests that adding multiple 'other' keys of same will convert to a list."""
 76 |     report = mwcp.Report()
 77 |     with report:
 78 |         report.add_metadata('other', {b'foo': b'bar', 'biz': 'baz'})
 79 |         assert report.metadata == {'other': {'foo': 'bar', 'biz': 'baz'}}
 80 |         report.add_metadata('other', {b'foo': b'boop'})
 81 |         assert report.metadata == {'other': {'foo': ['bar', 'boop'], 'biz': 'baz'}}
 82 | 
 83 | 
 84 | def test_output_file(tmpdir):
 85 |     test_file = tmpdir / '9c91e_foo.txt'
 86 |     report = mwcp.Report(output_directory=str(tmpdir))
 87 |     with report:
 88 |         assert report.output_file(b'This is data!', 'foo.txt', description='A foo file') == str(test_file)
 89 | 
 90 |         assert test_file.exists()
 91 |         assert test_file.read_binary() == b'This is data!'
 92 |         assert report.metadata['outputfile'] == [
 93 |             ['foo.txt', 'A foo file', '9c91e665b5b7ba5a3066c92dd02d3d7c']
 94 |         ]
 95 | 
 96 |         # Add file with same name to test name collision code.
 97 |         test_file = tmpdir / '4d8cf_foo.txt'
 98 |         assert report.output_file(b'More data!', 'foo.txt', description='Another foo file') == str(test_file)
 99 | 
100 |         assert test_file.exists()
101 |         assert test_file.read_binary() == b'More data!'
102 |         assert report.metadata['outputfile'] == [
103 |             ['foo.txt', 'A foo file', '9c91e665b5b7ba5a3066c92dd02d3d7c'],
104 |             ['foo.txt', 'Another foo file', '4d8cfa4b19f5f971b0e6d79250cb1321'],
105 |         ]
106 | 
107 |     # Test file sanitization
108 |     test_file = tmpdir / '6f1ed_hello.txt'
109 |     report = mwcp.Report(output_directory=str(tmpdir))
110 |     with report:
111 |         assert report.output_file(b'blah', u'héllo!!\x08.txt') == str(test_file)
112 | 
113 |         assert test_file.exists()
114 |         assert test_file.read_binary() == b'blah'
115 |         assert report.metadata['outputfile'] == [
116 |             [u'héllo!!\x08.txt', '', '6f1ed002ab5595859014ebf0951522d9']
117 |         ]
118 | 
119 | 
120 | def test_print_report(datadir):
121 |     """Tests the text report generation."""
122 |     report = mwcp.Report()
123 |     with report:
124 |         report.add_metadata('proxy', (b'admin', b'pass', b'192.168.1.1', b'80', 'tcp'))
125 |         report.add_metadata('other', {b'foo': 'bar', 'biz': b'baz\x00\x01'})
126 |         report.output_file(b'data', 'file_1.exe', 'example output file')
127 | 
128 |     print(report.as_text())
129 |     assert report.as_text() == (datadir / "report.txt").read_text()
130 | 
131 | 
132 | # TODO: Deal with field ordering?
133 | # def test_standard_field_order():
134 | #     """Tests that STANDARD_FIELD_ORDER is updated to the field.json file."""
135 | #     with open(mwcp.config.get("FIELDS_PATH"), "rb") as f:
136 | #         fields = json.load(f)
137 | #
138 | #     ignore_fields = INFO_FIELD_ORDER + ["debug", "other", "outputfile"]
139 | #
140 | #     assert sorted(STANDARD_FIELD_ORDER) == sorted(set(fields.keys()) - set(ignore_fields))
141 | 


--------------------------------------------------------------------------------
/mwcp/parsers/Python.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Parses Python artifacts
  3 | """
  4 | 
  5 | import os
  6 | from typing import Optional
  7 | 
  8 | from construct import this
  9 | 
 10 | from mwcp import FileObject, Parser
 11 | from mwcp.metadata import Version
 12 | from mwcp.utils import construct
 13 | 
 14 | 
 15 | class PyInstaller(Parser):
 16 |     DESCRIPTION = "PyInstaller"
 17 | 
 18 |     TABLE_ENTRY = construct.Struct(
 19 |         "entry_size" / construct.Int32ub,
 20 |         "offset" / construct.Int32ub,
 21 |         "compressed_size" / construct.Int32ub,
 22 |         "final_size" / construct.Int32ub,
 23 |         "flag" / construct.Flag,
 24 |         "type" / construct.String(1),
 25 |         "name" / construct.String(this.entry_size - 18),
 26 |         "data" / construct.Pointer(
 27 |             this.offset,
 28 |             construct.IfThenElse(
 29 |                 this.flag,
 30 |                 construct.Compressed(construct.Bytes(this.compressed_size), lib="zlib"),
 31 |                 construct.Bytes(this.compressed_size),
 32 |             ),
 33 |         ),
 34 |     )
 35 | 
 36 |     @classmethod
 37 |     def identify(cls, file_object):
 38 |         """
 39 |         Validate the MAGIC data is at the appropriate location and return the correct spec to use
 40 |         for parsing.
 41 |         """
 42 |         magic = b'MEI\x0C\x0B\x0A\x0B\x0E'
 43 |         # pyinstaller 2.0
 44 |         if file_object.data[-24:-24 + len(magic)] == magic:
 45 |             cookie_spec = construct.Struct(
 46 |                 "magic" / construct.Const(magic),
 47 |                 "package_size" / construct.Int32ub,
 48 |                 "toc_offset" / construct.Int32ub,
 49 |                 "toc_entries" / construct.Int32ub,
 50 |                 "python_version" / construct.Int32ub,
 51 |             )
 52 |             return True, cookie_spec
 53 | 
 54 |         # pyinstaller 2.1+
 55 |         elif file_object.data[-88:-88+len(magic)] == magic:
 56 |             cookie_spec = construct.Struct(
 57 |                 "magic" / construct.Const(magic),
 58 |                 "package_size" / construct.Int32ub,
 59 |                 "toc_offset" / construct.Int32ub,
 60 |                 "toc_entries" / construct.Int32ub,
 61 |                 "python_version" / construct.Int32ub,
 62 |                 "python_dll" / construct.String(64),
 63 |             )
 64 |             return True, cookie_spec
 65 | 
 66 |         return False
 67 | 
 68 |     def extract_entry(self, entry, hdr: bytes) -> Optional[FileObject]:
 69 |         """
 70 |         Extracts file data from table entry and returns it as a FileObject.
 71 |         """
 72 |         if not entry.data:
 73 |             return
 74 | 
 75 |         name = entry.name
 76 |         data = entry.data
 77 | 
 78 |         if entry.type in ('s', 'm', 'M'):  # python script/module/package
 79 |             if entry.type == 's' and entry.data[1:4] != b"\x00\x00\x00":  # uncompiled python code
 80 |                 name += ".py"
 81 |             else:
 82 |                 # it is a marshalled code object
 83 |                 # we need to add the pyc header to the data so it can be decompiled
 84 |                 name += ".pyc"
 85 |                 data = hdr + data
 86 | 
 87 |         # TODO: Create a PYZ parser for extracting out individually compressed components.
 88 |         #   This will require determining a way to safely unmarshal data.
 89 |         #   (PyInstaller/loader/pyimod01_archive.py)
 90 |         # case 'z': # zlib archive (pyz)
 91 |         # case 'n': # symbolic link
 92 |         # case 'b': # binary
 93 |         # case 'Z': # zlib (pyz) - frozen Python code (zipfile)
 94 |         # case 'x': # data
 95 |         # case 'l': # splash resource
 96 | 
 97 |         return FileObject(data, file_name=name)
 98 | 
 99 |     def run(self, cookie_spec: construct.Struct):
100 |         """
101 |         Extract the cookie information in order to extract and parse the table of contents. Identify the .manifest
102 |         filename in order to obtain the name of the target script to add to the dispatcher.
103 |         """
104 |         cookie_size = cookie_spec.sizeof()
105 | 
106 |         cookie = cookie_spec.parse(self.file_object.data[-cookie_size:])
107 |         package = self.file_object.data[-cookie.package_size: -cookie_size]
108 |         package_spec = construct.Struct(
109 |             construct.Padding(cookie.toc_offset),
110 |             "toc" / self.TABLE_ENTRY[:],
111 |         )
112 |         info = package_spec.parse(package)
113 | 
114 |         python_version = str(cookie.python_version)[0] + "." + str(cookie.python_version)[1:]
115 |         self.report.add(Version(python_version).add_tag("Python"))
116 | 
117 |         # Extract files base on .manifest files.
118 |         pyz = None
119 |         target_names = []
120 |         for entry in info.toc:
121 |             if entry.name == "PYZ-00.pyz":
122 |                 pyz = entry
123 |             elif entry.name.endswith(".manifest"):
124 |                 target_names.append(os.path.splitext(entry.name)[0].replace(".exe", ''))
125 | 
126 |         # Determine header for pyc files.
127 |         if pyz:
128 |             hdr = pyz.data[4:8] + b'\x00' * 12
129 |         elif cookie.python_version >= 37:  # PEP 552 -- Deterministic pycs
130 |             hdr = b"\x42\x0d\x0d\x0a" + b'\0' * 12  # Bitfield, Timestamp, size
131 |         elif cookie.python_version >= 33:
132 |             hdr = b"\x42\x0d\x0d\x0a" + b'\0' * 8  # (Timestamp + size)
133 |         else:
134 |             hdr = b"\x03\xF3\x0D\x0A" + b'\0' * 4  # Timestamp
135 | 
136 |         # If we had a .manifest, only extract those files.
137 |         if target_names:
138 |             for entry in info.toc:
139 |                 if entry.name in target_names or entry.data.startswith(b"PYZ\x00"):
140 |                     if file := self.extract_entry(entry, hdr):
141 |                         self.dispatcher.add(file)
142 |         else:
143 |             for entry in info.toc:
144 |                 if file := self.extract_entry(entry, hdr):
145 |                     self.dispatcher.add(file)
146 | 


--------------------------------------------------------------------------------
/mwcp/config/fields.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | One of the primary goals of DC3-MWCP is to standardize malware configuration
  3 | parser output. To this end, DC3-MWCP enforces a set of predefined fields for
  4 | parser output. These fields, including their descriptions and examples can be
  5 | viewed by using the -k option of the mwcp.py utility or by viewing 
  6 | fields.json. Below is an example of parser output using these definitions.
  7 | 
  8 | To ensure data portability, values are encoded as strings. Also, since many
  9 | items can have multiple values, most items are specified as lists. Lists are
 10 | used to implement tuples which consist of atomic values which are necessarily,
 11 | connected. For example, a credential consists of a username and password. In
 12 | these tuples, order must be maintained. 
 13 | 
 14 | It will be noted that some values are duplicated. For example, the address in a
 15 | socketaddress is also stored alone as an address. The framework performs this
 16 | duplication to ensure that the data can be used flexibly. This duplication
 17 | is allowed to ensure all cases are covered accurately. For example, a password
 18 | may be used in malware without an associated or explicit username. In other
 19 | cases there may be multiple credentials such that it is necessary to have the
 20 | usernames associated with their respective passwords. In cases where all the
 21 | data for a composite data type is not known, and the tuple provides no
 22 | meaningful association of data, typically the less specific type is used. If an
 23 | incomplete tuple provides a meaningful connection in multiple data items, then
 24 | it should be used. Ex. in the case of a password used without a username, just
 25 | the password field would be populated. However, in cases where the servicename
 26 | and servicedisplayname are known, they would be combined into an incomplete
 27 | tuple to maintain the link between these two items. When possible, the most
 28 | comprehensive type is used. If not all values for a tuple are known, the empty
 29 | string, "", is used.
 30 | 
 31 | The standardized fields used here seek to encompass the most common
 32 | malware configuration items. The goal of this field set is to facilitate
 33 | generation of metadata that is comparable between different backdoors. These
 34 | fields are necessarily abstract and backdoor parser authors should seek to
 35 | follow the descriptions as well as possible. When in doubt, the data should be
 36 | included in the standardized fields if it is of the correct data type. For 
 37 | example, if a domain is used for a domain lookup based port calculation, this
 38 | address should be included in the address field. Due to the heavy focus on
 39 | malware parameters that are typically mitigated, the c2_address, c2_url,
 40 | and c2_socketadress fields are included. These are duplicative of their 
 41 | respective general counterparts. It is recognized that these special c2 tagged
 42 | items warrant special attention as they are used frequently as mitigation
 43 | candidates. A c2_address is an address that known to be is used for command and
 44 | control. It will be repeated as an address. It should be clear, however, that
 45 | malware configuration does not become intelligence or even a collection of
 46 | actionable indicators without vetting.
 47 | 
 48 | While the abstraction provided by these fields helps make the configuration of
 49 | different backdoors more easily comparable, there is no intention to dumb down
 50 | the parser output to fully remove backdoor specific context. The point of the
 51 | "other" field is to contain backdoor specific key value pairs. These keys are
 52 | arbitrary to permit flexibility in describing the peculiarities of individual
 53 | malware families. It is through use of these "other" fields that an analyst can
 54 | determine how a specific abstract item is used. For example, this allows one to
 55 | determine if an address is used for a proxy, for a 
 56 | connectivity check, for a lookup based port calculation, etc. Hence, it is
 57 | common for much or all of the data in the standardized fields to be duplicated
 58 | in the "other" field.
 59 | 
 60 | 
 61 | Example config parser output:
 62 | 
 63 | {
 64 |     "address": [
 65 |         "10.1.1.1",
 66 |         "192.168.1.1"
 67 |     ],
 68 |     "c2_address": [
 69 |         "10.1.1.1",
 70 |         "192.168.1.1"
 71 |     ],
 72 |     "debug": [
 73 |         "Config Offset: 0x5000",
 74 |         "Tertiary C2 not found"
 75 |     ],
 76 |     "interval": [
 77 |         "30"
 78 |     ],
 79 |     "missionid": [
 80 |         "orgA201502"
 81 |     ],
 82 |     "mutex": [
 83 |         "ghurlrat94839d"
 84 |     ],
 85 |     "other": {
 86 |         "C2 password": "MMMMchicken8#@",
 87 |         "Campaign Marker": "orgA201502",
 88 |         "Enable Keylogger": "True",
 89 |         "Primary C2": "10.1.1.1|443",
 90 |         "Secondary C2": "192.168.1.1|443",
 91 |         "Sleep Timer": "30"
 92 |     },
 93 |     "password": [
 94 |         "MMMMchicken8#@"
 95 |     ],
 96 |     "port": [
 97 |         [
 98 |             "443",
 99 |             "tcp"
100 |         ]
101 |     ],
102 |     "socketaddress": [
103 |         [
104 |             "10.1.1.1",
105 |             "443",
106 |             "tcp"
107 |         ],
108 |         [
109 |             "192.168.1.1",
110 |             "443",
111 |             "tcp"
112 |         ],
113 |         [
114 |             "192.168.1.1",
115 |             "80",
116 |             "tcp"
117 |         ]
118 |     ],
119 |     "c2_socketaddress": [
120 |         [
121 |             "10.1.1.1",
122 |             "443",
123 |             "tcp"
124 |         ],
125 |         [
126 |             "192.168.1.1",
127 |             "443",
128 |             "tcp"
129 |         ]
130 |     ],
131 |     "proxy": [
132 |         [
133 |             "admin",
134 |             "pass",
135 |             "192.168.1.1",
136 |             "80",
137 |             "tcp"
138 |         ]
139 |     ],
140 |     "proxy_socketaddress": [
141 |         [
142 |             "192.168.1.1",
143 |             "80",
144 |             "tcp"
145 |         ]
146 |     ],
147 |     "email_address": [
148 |         "user@bad.com"
149 |     ]
150 | }
151 | 


--------------------------------------------------------------------------------
/mwcp/utils/construct/windows_constants.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # IMAGE_SECTION_HEADER.Characteristics
  4 | IMAGE_SCN_TYPE_NO_PAD = 'IMAGE_SCN_TYPE_NO_PAD'
  5 | IMAGE_SCN_CNT_CODE = 'IMAGE_SCN_CNT_CODE'
  6 | IMAGE_SCN_CNT_INITIALIZED_DATA = 'IMAGE_SCN_CNT_INITIALIZED_DATA'
  7 | IMAGE_SCN_CNT_UNINITIALIZED_DATA = 'IMAGE_SCN_CNT_UNINITIALIZED_DATA'
  8 | IMAGE_SCN_LNK_OTHER = 'IMAGE_SCN_LNK_OTHER'
  9 | IMAGE_SCN_LNK_INFO = 'IMAGE_SCN_LNK_INFO'
 10 | IMAGE_SCN_LNK_REMOVE = 'IMAGE_SCN_LNK_REMOVE'
 11 | IMAGE_SCN_LNK_COMDAT = 'IMAGE_SCN_LNK_COMDAT'
 12 | IMAGE_SCN_NO_DEFER_SPEC_EXC = 'IMAGE_SCN_NO_DEFER_SPEC_EXC'
 13 | IMAGE_SCN_GPREL = 'IMAGE_SCN_GPREL'
 14 | IMAGE_SCN_MEM_PURGEABLE = 'IMAGE_SCN_MEM_PURGEABLE'
 15 | IMAGE_SCN_MEM_LOCKED = 'IMAGE_SCN_MEM_LOCKED'
 16 | IMAGE_SCN_MEM_PRELOAD = 'IMAGE_SCN_MEM_PRELOAD'
 17 | IMAGE_SCN_ALIGN_1BYTES = 'IMAGE_SCN_ALIGN_1BYTES'
 18 | IMAGE_SCN_ALIGN_2BYTES = 'IMAGE_SCN_ALIGN_2BYTES'
 19 | IMAGE_SCN_ALIGN_4BYTES = 'IMAGE_SCN_ALIGN_4BYTES'
 20 | IMAGE_SCN_ALIGN_8BYTES = 'IMAGE_SCN_ALIGN_8BYTES'
 21 | IMAGE_SCN_ALIGN_16BYTES = 'IMAGE_SCN_ALIGN_16BYTES'
 22 | IMAGE_SCN_ALIGN_32BYTES = 'IMAGE_SCN_ALIGN_32BYTES'
 23 | IMAGE_SCN_ALIGN_64BYTES = 'IMAGE_SCN_ALIGN_64BYTES'
 24 | IMAGE_SCN_ALIGN_128BYTES = 'IMAGE_SCN_ALIGN_128BYTES'
 25 | IMAGE_SCN_ALIGN_256BYTES = 'IMAGE_SCN_ALIGN_256BYTES'
 26 | IMAGE_SCN_ALIGN_512BYTES = 'IMAGE_SCN_ALIGN_512BYTES'
 27 | IMAGE_SCN_ALIGN_1024BYTES = 'IMAGE_SCN_ALIGN_1024BYTES'
 28 | IMAGE_SCN_ALIGN_2048BYTES = 'IMAGE_SCN_ALIGN_2048BYTES'
 29 | IMAGE_SCN_ALIGN_4096BYTES = 'IMAGE_SCN_ALIGN_4096BYTES'
 30 | IMAGE_SCN_ALIGN_8192BYTES = 'IMAGE_SCN_ALIGN_8192BYTES'
 31 | IMAGE_SCN_LNK_NRELOC_OVFL = 'IMAGE_SCN_LNK_NRELOC_OVFL'
 32 | IMAGE_SCN_MEM_DISCARDABLE = 'IMAGE_SCN_MEM_DISCARDABLE'
 33 | IMAGE_SCN_MEM_NOT_CACHED = 'IMAGE_SCN_MEM_NOT_CACHED'
 34 | IMAGE_SCN_MEM_NOT_PAGED = 'IMAGE_SCN_MEM_NOT_PAGED'
 35 | IMAGE_SCN_MEM_SHARED = 'IMAGE_SCN_MEM_SHARED'
 36 | IMAGE_SCN_MEM_EXECUTE = 'IMAGE_SCN_MEM_EXECUTE'
 37 | IMAGE_SCN_MEM_READ = 'IMAGE_SCN_MEM_READ'
 38 | IMAGE_SCN_MEM_WRITE = 'IMAGE_SCN_MEM_WRITE'
 39 | 
 40 | # IMAGE_OPTIONAL_HEADER.Magic
 41 | IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b
 42 | IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b
 43 | IMAGE_ROM_OPTIONAL_HDR_MAGIC = 0x107
 44 | 
 45 | # IMAGE_OPTIONAL_HEADER.Subsystem
 46 | IMAGE_SUBSYSTEM_UNKNOWN = 0
 47 | IMAGE_SUBSYSTEM_NATIVE = 1
 48 | IMAGE_SUBSYSTEM_WINDOWS_GUI = 2
 49 | IMAGE_SUBSYSTEM_WINDOWS_CUI = 3
 50 | IMAGE_SUBSYSTEM_OS2_CUI = 5
 51 | IMAGE_SUBSYSTEM_POSIX_CUI = 7
 52 | IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9
 53 | IMAGE_SUBSYSTEM_EFI_APPLICATION = 10
 54 | IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11
 55 | IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12
 56 | IMAGE_SUBSYSTEM_EFI_ROM = 13
 57 | IMAGE_SUBSYSTEM_XBOX = 14
 58 | IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16
 59 | 
 60 | 
 61 | # Make default DataDirectory and standard indexes available for convenience.
 62 | # WARNING: Make sure you make a copy of DEFAULT_DATA_DIRECTORIES!!
 63 | DATA_DIR_INDEX_EXPORTS = 0
 64 | DATA_DIR_INDEX_IMPORTS = 1
 65 | DATA_DIR_INDEX_RESOURCE = 2
 66 | DATA_DIR_INDEX_EXCEPTION = 3
 67 | DATA_DIR_INDEX_CERTIFICATE = 4
 68 | DATA_DIR_INDEX_BASE_RELOC = 5
 69 | DATA_DIR_INDEX_DEBUG = 6
 70 | DATA_DIR_INDEX_ARCHITECTURE = 7
 71 | DATA_DIR_INDEX_GLOBAL_PTR = 8
 72 | DATA_DIR_INDEX_TLS = 9
 73 | DATA_DIR_INDEX_LOAD_CONFIG = 10
 74 | DATA_DIR_INDEX_BOUND_IMPORT = 11
 75 | DATA_DIR_INDEX_IMPORT_ADDRESS = 12
 76 | DATA_DIR_INDEX_DELAY_IMPORT_DESCRIPTOR = 13
 77 | DATA_DIR_INDEX_CLR_HEADER = 14
 78 | DEFAULT_DATA_DIRECTORIES = [dict(VirtualAddress=0, Size=0)] * 16
 79 | 
 80 | # IMAGE_OPTIONAL_HEADER.DllCharacteristics
 81 | IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE = 'IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE'
 82 | IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY = 'IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY'
 83 | IMAGE_DLLCHARACTERISTICS_NX_COMPAT = 'IMAGE_DLLCHARACTERISTICS_NX_COMPAT'
 84 | IMAGE_DLLCHARACTERISTICS_NO_ISOLATION = 'IMAGE_DLLCHARACTERISTICS_NO_ISOLATION'
 85 | IMAGE_DLLCHARACTERISTICS_NO_SEH = 'IMAGE_DLLCHARACTERISTICS_NO_SEH'
 86 | IMAGE_DLLCHARACTERISTICS_NO_BIND = 'IMAGE_DLLCHARACTERISTICS_NO_BIND'
 87 | IMAGE_DLLCHARACTERISTICS_WDM_DRIVER = 'IMAGE_DLLCHARACTERISTICS_WDM_DRIVER'
 88 | IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE = 'IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE'
 89 | 
 90 | # IMAGE_FILE_HEADER.Machine
 91 | IMAGE_FILE_MACHINE_UNKNOWN = 0x0
 92 | IMAGE_FILE_MACHINE_AM33 = 0x1d3
 93 | IMAGE_FILE_MACHINE_AMD64 = 0x8664
 94 | IMAGE_FILE_MACHINE_ARM = 0x1c0
 95 | IMAGE_FILE_MACHINE_ARM64 = 0xaa64
 96 | IMAGE_FILE_MACHINE_ARMNT = 0x1c4
 97 | IMAGE_FILE_MACHINE_EBC = 0xebc
 98 | IMAGE_FILE_MACHINE_I386 = 0x14c
 99 | IMAGE_FILE_MACHINE_IA64 = 0x200
100 | IMAGE_FILE_MACHINE_M32R = 0x9041
101 | IMAGE_FILE_MACHINE_MIPS16 = 0x266
102 | IMAGE_FILE_MACHINE_MIPSFPU = 0x366
103 | IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466
104 | IMAGE_FILE_MACHINE_POWERPC = 0x1f0
105 | IMAGE_FILE_MACHINE_POWERPCFP = 0x1f1
106 | IMAGE_FILE_MACHINE_R4000 = 0x166
107 | IMAGE_FILE_MACHINE_RISCV32 = 0x5032
108 | IMAGE_FILE_MACHINE_RISCV64 = 0x5064
109 | IMAGE_FILE_MACHINE_RISCV128 = 0x5128
110 | IMAGE_FILE_MACHINE_SH3 = 0x1a2
111 | IMAGE_FILE_MACHINE_SH3DSP = 0x1a3
112 | IMAGE_FILE_MACHINE_SH4 = 0x1a6
113 | IMAGE_FILE_MACHINE_SH5 = 0x1a8
114 | IMAGE_FILE_MACHINE_THUMB = 0x1c2
115 | IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169
116 | 
117 | 
118 | # IMAGE_FILE_HEADER characterstics.
119 | IMAGE_FILE_RELOCS_STRIPPED = 'IMAGE_FILE_RELOCS_STRIPPED'
120 | IMAGE_FILE_EXECUTABLE_IMAGE = 'IMAGE_FILE_EXECUTABLE_IMAGE'
121 | IMAGE_FILE_LINE_NUMS_STRIPPED = 'IMAGE_FILE_LINE_NUMS_STRIPPED'
122 | IMAGE_FILE_LOCAL_SYMS_STRIPPED = 'IMAGE_FILE_LOCAL_SYMS_STRIPPED'
123 | IMAGE_FILE_AGGRESIVE_WS_TRIM = 'IMAGE_FILE_AGGRESIVE_WS_TRIM'
124 | IMAGE_FILE_LARGE_ADDRESS_AWARE = 'IMAGE_FILE_LARGE_ADDRESS_AWARE'
125 | IMAGE_FILE_BYTES_REVERSED_LO = 'IMAGE_FILE_BYTES_REVERSED_LO'
126 | IMAGE_FILE_32BIT_MACHINE = 'IMAGE_FILE_32BIT_MACHINE'
127 | IMAGE_FILE_DEBUG_STRIPPED = 'IMAGE_FILE_DEBUG_STRIPPED'
128 | IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP'
129 | IMAGE_FILE_NET_RUN_FROM_SWAP = 'IMAGE_FILE_NET_RUN_FROM_SWAP'
130 | IMAGE_FILE_SYSTEM = 'IMAGE_FILE_SYSTEM'
131 | IMAGE_FILE_DLL = 'IMAGE_FILE_DLL'
132 | IMAGE_FILE_UP_SYSTEM_ONLY = 'IMAGE_FILE_UP_SYSTEM_ONLY'
133 | IMAGE_FILE_BYTES_REVERSED_HI = 'IMAGE_FILE_BYTES_REVERSED_HI'


--------------------------------------------------------------------------------
/mwcp/utils/construct/construct_template.html:
--------------------------------------------------------------------------------
  1 | {#
  2 |     This is the html template used to convert parsed constructs into user-friendly html
  3 |     that can be used in reports.
  4 | 
  5 |     Please see construct_html.py for its use.
  6 | #}
  7 | 
  8 | <html>
  9 | <head>
 10 | <meta http-equiv=Content-Type content="text/html; charset=windows-1252">
 11 | <meta name=Generator content="Microsoft Word 14 (filtered)">
 12 | <style>
 13 | <!--
 14 |  /* Font Definitions */
 15 |  @font-face
 16 | 	{font-family:Courier;
 17 | 	panose-1:2 7 4 9 2 2 5 2 4 4;}
 18 | @font-face
 19 | 	{font-family:"Cambria Math";
 20 | 	panose-1:2 4 5 3 5 4 6 3 2 4;}
 21 | @font-face
 22 | 	{font-family:Calibri;
 23 | 	panose-1:2 15 5 2 2 2 4 3 2 4;}
 24 |  /* Style Definitions */
 25 |  p.MsoNormal, li.MsoNormal, div.MsoNormal
 26 | 	{margin:0in;
 27 | 	margin-bottom:.0001pt;
 28 | 	font-size:11.0pt;
 29 | 	font-family:"Times New Roman","serif";}
 30 | h1
 31 | 	{mso-style-link:"Heading 1 Char";
 32 | 	margin:0in;
 33 | 	margin-bottom:.0001pt;
 34 | 	page-break-after:avoid;
 35 | 	font-size:16.0pt;
 36 | 	font-family:"Calibri","sans-serif";
 37 | 	color:#943634;}
 38 | h3
 39 | 	{mso-style-link:"Heading 3 Char";
 40 | 	margin-top:10.0pt;
 41 | 	margin-right:0in;
 42 | 	margin-bottom:0in;
 43 | 	margin-left:0in;
 44 | 	margin-bottom:.0001pt;
 45 | 	page-break-after:avoid;
 46 | 	font-size:11.0pt;
 47 | 	font-family:"Times New Roman","serif";}
 48 | p.MsoNoSpacing, li.MsoNoSpacing, div.MsoNoSpacing
 49 | 	{mso-style-link:"No Spacing Char";
 50 | 	margin:0in;
 51 | 	margin-bottom:.0001pt;
 52 | 	font-size:11.0pt;
 53 | 	font-family:"Calibri","sans-serif";}
 54 | span.Heading1Char
 55 | 	{mso-style-name:"Heading 1 Char";
 56 | 	mso-style-link:"Heading 1";
 57 | 	color:#943634;
 58 | 	font-weight:bold;}
 59 | span.Heading3Char
 60 | 	{mso-style-name:"Heading 3 Char";
 61 | 	mso-style-link:"Heading 3";
 62 | 	font-family:"Times New Roman","serif";
 63 | 	font-weight:bold;}
 64 | span.NoSpacingChar
 65 | 	{mso-style-name:"No Spacing Char";
 66 | 	mso-style-link:"No Spacing";
 67 | 	font-family:"Calibri","sans-serif";}
 68 | .MsoChpDefault
 69 | 	{font-family:"Calibri","sans-serif";}
 70 | .MsoPapDefault
 71 | 	{margin-bottom:10.0pt;
 72 | 	line-height:115%;}
 73 | @page WordSection1
 74 | 	{size:8.5in 11.0in;
 75 | 	margin:1.0in 1.0in 1.0in 1.0in;}
 76 | div.WordSection1
 77 | 	{page:WordSection1;}
 78 | -->
 79 | </style>
 80 | </head>
 81 | <body lang=EN-US>
 82 | <div class=WordSection1>
 83 | 
 84 | <!-- Hex Dump -->
 85 | <p class=MsoNormal style='background:#FFFFFF'>
 86 |     <span style='font-size:8.0pt;font-family:"Courier New"'>
 87 |         &nbsp;offset&nbsp;|
 88 |         {%- for i in range(width) -%}
 89 |             &nbsp;{% if i < 16 %}&nbsp;{% endif %}{{'%x'|format(i)}}
 90 |         {%- endfor -%}
 91 |         &nbsp;|&nbsp;
 92 |         {%- for _ in range(width) -%}
 93 |             {{'%x'|format(loop.cycle(*range(16)))}}  {#- We only have enough space for the first digit -#}
 94 |         {%- endfor -%}
 95 |         <br/>
 96 |         &nbsp;------ |  {{'-- ' * width}} | {{'-' * width}}
 97 |         {% for offset, hex_line, ascii_line in hex_dump %}
 98 |         <br/>&nbsp;{{offset}}&nbsp;|&nbsp;{{hex_line}}&nbsp;|&nbsp;{{ascii_line}}
 99 |         {% endfor %}
100 |     </span>
101 | </p>
102 | 
103 | <!-- Variable Table -->
104 | <p class=MsoNormal>&nbsp;</p>
105 |     <table class=MsoNormalTable border=1 cellspacing=0 cellpadding=0
106 |      style='margin-left:5.4pt;border-collapse:collapse;border:none'>
107 |      <tr>
108 |       <td width=175 valign=top style='width:131.05pt;border:solid windowtext 1.0pt;
109 |       background:#244061;padding:0in 5.4pt 0in 5.4pt'>
110 |       <p class=MsoNoSpacing align=center style='text-align:center'><b><span
111 |       style='font-size:9.0pt;font-family:"Times New Roman","serif";color:white'>Offset</span></b></p>
112 |       </td>
113 |       <td width=238 valign=top style='width:178.7pt;border:solid windowtext 1.0pt;
114 |       border-left:none;background:#244061;padding:0in 5.4pt 0in 5.4pt'>
115 |       <p class=MsoNoSpacing align=center style='text-align:center'><b><span
116 |       style='font-size:9.0pt;font-family:"Times New Roman","serif";color:white'>Name</span></b></p>
117 |       </td>
118 |       <td width=218 valign=top style='width:163.65pt;border:solid windowtext 1.0pt;
119 |       border-left:none;background:#244061;padding:0in 5.4pt 0in 5.4pt'>
120 |       <p class=MsoNoSpacing align=center style='text-align:center'><b><span
121 |       style='font-size:9.0pt;font-family:"Times New Roman","serif";color:white'>Value</span></b></p>
122 |       </td>
123 |      </tr>
124 | 
125 |     {% for offset, (colors, member) in color_map.items()|sort %}
126 |         <tr>
127 |             {# Offset #}
128 |             <td width=175 valign=top style="width:131.05pt;border:solid windowtext 1.0pt;  border-top:none;padding:0in 5.4pt 0in 5.4pt">
129 |                 <p class=MsoNoSpacing>
130 |                     <span style="font-size:8.0pt;font-family:'Courier New';  ">{{'%06x' % offset}}</span>
131 |                 </p>
132 |             </td>
133 | 
134 |             {# Name #}
135 |             <td width=238 valign=top style="width:178.7pt;border-top:none;border-left:  none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;  padding:0in 5.4pt 0in 5.4pt">
136 |                 <p class=MsoNoSpacing>
137 |                     <span style="font-size:8.0pt;font-family:'Courier New';background:{{colors[0]}};color:{{colors[1]}}">
138 |                         {{member.name}}
139 |                     </span>
140 |                 </p>
141 |             </td>
142 | 
143 |             {# Value #}
144 |             <td width=218 valign=top style="width:163.65pt;border-top:none;border-left:  none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;  padding:0in 5.4pt 0in 5.4pt">
145 |                 <p class=MsoNoSpacing>
146 |                     <span style="font-size:8.0pt;font-family:'Courier New'">
147 |                         {{member.value_str|replace('\n', '<br/>')|replace('\t', '&nbsp;&nbsp;')}}
148 |                     </span>
149 |                 </p>
150 |             </td>
151 |         </tr>
152 |     {% endfor %}
153 | 
154 |     </table>
155 | </div>
156 | </body>
157 | </html>


--------------------------------------------------------------------------------
/mwcp/stix/report_writer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This serves as the STIX Report Writer.  This expands on the same report every time write is called.
  3 | A STIX package is generated and returned as a string when serialize is called
  4 | """
  5 | 
  6 | from stix2 import v21 as stix
  7 | from stix2.v21 import _Observable
  8 | 
  9 | import mwcp
 10 | from mwcp import metadata
 11 | from mwcp.report_writers import ReportWriter
 12 | 
 13 | 
 14 | class STIXWriter(ReportWriter):
 15 |     """
 16 |     Used to create a STIX Bundle that represents one or more MWCP Reports.
 17 |     Write must be called by each report that should be included in the final result.
 18 |     Serialize is called once this process is completed to return the STIX Bundle as a string.
 19 |     """
 20 |     def __init__(self, fixed_timestamp: str = None):
 21 |         # used to ensure we deduplicate objects prior to loading them into the bundle
 22 |         self._all_objects = {}
 23 |         # applies a fixed timestamp to all SDOs and SROs for their created and updated times
 24 |         self.fixed_timestamp = fixed_timestamp
 25 | 
 26 |     def write(self, report: metadata.Report):
 27 |         linked_ids = set()
 28 |         analysis_data = {
 29 |             "product": "mwcp",
 30 |             "version": mwcp.__version__,
 31 |             "result_name": report.parser,
 32 |             "allow_custom": True,
 33 |             "created": self.fixed_timestamp,
 34 |             "modified": self.fixed_timestamp
 35 |         }
 36 | 
 37 |         note_content = ["Description: " + str(report.input_file.description)]
 38 | 
 39 |         # we need to turn the FileObj into a metadata.File to fetch STIX content
 40 |         file_result = report.input_file.as_stix(None, self.fixed_timestamp)
 41 |         
 42 |         for item in file_result.linked_stix:
 43 |             self._add_stix_object(item)
 44 | 
 45 |         for item in file_result.unlinked_stix:
 46 |             self._add_stix_object(item)
 47 | 
 48 |         # the file should always be the first STIX object written
 49 |         base_file = file_result.linked_stix[0]
 50 | 
 51 |         analysis_data["sample_ref"] = base_file.id
 52 | 
 53 |         if file_result.note_content:
 54 |             note_content.append(file_result.note_content)
 55 | 
 56 |         for element in report.metadata:
 57 |             result = element.as_stix(base_file, self.fixed_timestamp)
 58 | 
 59 |             # Content is loaded to the master note for the File
 60 |             if result.note_content:
 61 |                 note_content.append(result.note_content)
 62 | 
 63 |             # Linked items will be added the result set for the Malware Analysis
 64 |             for item in result.linked_stix:
 65 |                 linked_ids.add(item.id)
 66 |                 self._add_stix_object(item)
 67 | 
 68 |             # Unlinked items are added to the final result, but are not linked within the Malware Analysis.
 69 |             # Links should happen via relationships or embedded STIX relationships within the objects
 70 |             for item in result.unlinked_stix:
 71 |                 self._add_stix_object(item)
 72 | 
 73 |         # make a single large Note for all Other data which was collected and not otherwise applied
 74 |         if len(note_content) > 0:
 75 |             note_params = {
 76 |                 "content": "\n".join(note_content),
 77 |                 "object_refs": [base_file.id],
 78 |                 "created": self.fixed_timestamp,
 79 |                 "modified": self.fixed_timestamp,
 80 |                 "allow_custom": True
 81 |             }
 82 | 
 83 |             if len(file_result.note_labels) > 0:
 84 |                 file_result.note_labels.sort()
 85 |                 note_params["labels"] = file_result.note_labels
 86 | 
 87 |             note = stix.Note(**note_params)
 88 |             self._add_stix_object(note)
 89 | 
 90 |         # the malware analysis must be made last since we need the IDs for everything that came out of it
 91 |         if len(linked_ids) > 0:
 92 |             refs = list(linked_ids)
 93 |             refs.sort()
 94 |             analysis_data["analysis_sco_refs"] = refs
 95 |         else:
 96 |             analysis_data["result"] = "unknown"
 97 | 
 98 |         if report.tags:
 99 |             tags = list(report.tags)
100 |             tags.sort()
101 |             analysis_data["labels"] = tags
102 |             
103 |         malware_analysis = stix.MalwareAnalysis(**analysis_data)
104 |         self._add_stix_object(malware_analysis)
105 | 
106 |     def serialize(self) -> str:
107 |         # Consolidate Notes down to avoid needless duplication
108 |         note_lookup = {}
109 |         to_remove = []
110 |         for idx, item in self._all_objects.items():
111 |             if item.type == "note":
112 |                 if hasattr(item, "abstract"):
113 |                     key = item.abstract + item.content
114 |                 else:
115 |                     key = item.content
116 | 
117 |                 if hasattr(item, "labels"):
118 |                     key += " / ".join(item.labels)
119 | 
120 |                 if key in note_lookup:
121 |                     existing = note_lookup[key]
122 |                     for ref in item.object_refs:
123 |                         if ref not in existing.object_refs:
124 |                             existing.object_refs.append(ref)
125 |                     to_remove.append(idx)
126 |                 else:
127 |                     note_lookup[key] = item
128 | 
129 |         # remove the duplicate notes
130 |         # done outside of the initial loop to avoid messing with for
131 |         for idx in to_remove:
132 |             self._all_objects.pop(idx)
133 | 
134 |         values = self._all_objects.values()
135 |         if len(values) > 0:
136 |             package = stix.Bundle(objects=values, allow_custom=True)
137 |         else:
138 |             package = stix.Bundle()
139 | 
140 |         return package.serialize(indent=4)
141 | 
142 |     def _add_stix_object(self, stix_object: _Observable):
143 |         """
144 |         Adds a STIX object to the all objects dictionary and replaces the existing element if the new version has more details
145 |         """
146 |         if stix_object.id in self._all_objects:
147 |             if len(stix_object.serialize()) > len(self._all_objects[stix_object.id].serialize()):
148 |                 self._all_objects[stix_object.id] = stix_object
149 |         else:
150 |             self._all_objects[stix_object.id] = stix_object
151 | 


--------------------------------------------------------------------------------
/mwcp/utils/construct/ARM.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Helper constructs for parsing the ARM instruction set.
  3 | This module will be imported along with 'from mwcp.utils import construct'
  4 | and accessible from the submodule "ARM". (e.g. construct.ARM.LDR)
  5 | """
  6 | 
  7 | from . import core as construct
  8 | from .core import this
  9 | 
 10 | from . import helpers
 11 | from mwcp.utils import elffileutils
 12 | 
 13 | 
 14 | def _ByteSwapped(subcon, **ctx):
 15 |     r"""
 16 |     MODIFIED version of ByteSwapped that allows providing a context.
 17 |     Swap the byte order within boundaries of the given subcon.
 18 | 
 19 |     :param subcon: the subcon on top of byte swapped bytes
 20 |     :param **ctx: Context passed to subcon.sizeof()
 21 | 
 22 |     Example::
 23 | 
 24 |         Int24ul <--> ByteSwapped(Int24ub)
 25 |     """
 26 |     size = subcon.sizeof(**ctx)
 27 |     return construct.Transformed(subcon, construct.swapbytes, size, construct.swapbytes, size)
 28 | 
 29 | 
 30 | # Single Data Transfer (LDR, STR)
 31 | _ldr_str_inst = construct.BitStruct(
 32 |     'cond' / construct.Nibble,
 33 |     construct.Const(1, construct.BitsInteger(2)),  # must be '01'
 34 |     'reg_imm_offset' / construct.Bit,              # 0 = immediate offset, 1 = register offset
 35 |     'pre_post_indexing' / construct.Bit,           # 0 = post, 1 = pre
 36 |     'up_down' / construct.Bit,                     # 0 = down, 1 = up
 37 |     'byte_word' / construct.Bit,                   # 0 = word, 1 = byte
 38 |     'write_back' / construct.Flag,
 39 |     'load_store' / construct.Bit,                  # 0 = store, 1 = load
 40 |     'base_register' / construct.Nibble,
 41 |     'src_dest_register' / construct.Nibble,
 42 |     'offset' / construct.IfThenElse(
 43 |         this.reg_imm_offset,
 44 |         construct.Octet >> construct.Nibble,       # shift applied to Rm >> Rm
 45 |         construct.BitsInteger(12)
 46 |     )
 47 | )
 48 | 
 49 | LDR = construct.ExprValidator(_ByteSwapped(_ldr_str_inst, reg_imm_offset=0), this.load_store == 1)
 50 | 
 51 | 
 52 | # Data Processing
 53 | _data_proc_inst = construct.BitStruct(
 54 |     'cond' / construct.Nibble,
 55 |     construct.Const(0, construct.BitsInteger(2)),  # must be '00'
 56 |     'reg_imm_operand' / construct.Bit,             # 0 = immediate, 1 = register
 57 |     'opcode' / construct.Enum(
 58 |         construct.Nibble,
 59 |         AND=0x0, EOR=0x1, SUB=0x2, RSB=0x3, ADD=0x4, ADC=0x5, SBC=0x6, RSC=0x7,
 60 |         TST=0x8, TEQ=0x9, CMP=0xA, CMN=0xB, ORR=0xC, MOV=0xD, BIC=0xE, MVN=0xF,
 61 |     ),
 62 |     'set_cond' / construct.Flag,
 63 |     'operand_1_reg' / construct.Nibble,
 64 |     'dest_reg' / construct.Nibble,
 65 |     'operand_2' / construct.IfThenElse(
 66 |         this.reg_imm_operand,
 67 |         construct.Octet >> construct.Nibble,       # shift applied to Rm >> Rm
 68 |         construct.Nibble >> construct.Octet,       # rotate applied to Imm >> Imm
 69 |     ),
 70 | )
 71 | # TODO: Finish adding support for analyzing data processing instructions.
 72 | # (shifting/rotating will need to applied to the second operand)
 73 | 
 74 | 
 75 | def ELFPointer(inst, inst_end, subcon, elf=None):
 76 |     r"""
 77 |     This is the ARM version of ELFPointer.
 78 |     This subconstruct takes two arguments which
 79 |     specify the parsed ARM instruction containing an immediate offset in its second operand
 80 |     and the end offset (physical) for said instruction.
 81 | 
 82 |     The following ARM instructions are currently supported:
 83 |         - LDR
 84 | 
 85 |     Example: for the instruction "LDR  R1, =data_offset"
 86 |     spec = Struct(
 87 |         'inst' / ARM.LDR,
 88 |         'inst_end' / Tell,
 89 |         'data' / ARM.ELFPointer(this.inst, this.inst_end, Bytes(100))
 90 |     )
 91 | 
 92 |     spec = Struct(
 93 |         're' / Regex(
 94 |             '\x01\x03(?P<data_ldr_inst>.{4})(?P<end>)\x06\x07', data_ldr_inst=ARM.LDR, end=Tell),
 95 |         'data' / ARM.ELFPointer(this.re.data_ldr_inst, this.re.end, Bytes(100))
 96 |     )
 97 | 
 98 |     spec.parse(file_data, elf=elf_object)
 99 | 
100 |     :param inst: a construct.Container or function that represents the assembly instruction
101 |     :param inst_end: an int or a function that represents the location of the end of the instruction.
102 |     :param subcon: the subcon to use at the offset
103 |     :param elf: Optional elftools.ELFFile file object.
104 |         (if not supplied here, this must be supplied during parse()/build()
105 |     """
106 |     def _obtain_literal_pool_mem_offset(ctx):
107 |         """Obtains the memory offset to the entry in the literal pool."""
108 |         # Validate LDR instruction
109 |         _inst = inst(ctx._) if callable(inst) else inst
110 |         if _inst.load_store != 1:
111 |             raise construct.ConstructError('Load/Store bit must be set to 1')
112 |         if _inst.base_register != 15 or _inst.reg_imm_offset == 1:
113 |             raise construct.ConstructError(
114 |                 'Only instructions with PC relative addressing is currently supported.')
115 |         if _inst.write_back:
116 |             raise construct.ConstructError('Write back cannot be enabled for PC relative addressing.')
117 |         # According to spec, PC is an address 8 bytes from the start of the instruction.
118 |         # (Which means 4 bytes from end.)
119 |         _elf = elf or ctx._params.elf
120 |         _inst_end = inst_end(ctx._) if callable(inst_end) else inst_end
121 |         _inst_end = elffileutils.obtain_memory_offset(_inst_end, elf=_elf)
122 |         pc = _inst_end + 4
123 |         mem_offset = pc + _inst.offset
124 |         return mem_offset
125 | 
126 |     # HACK: FocusLast (which is FocusedSeq) will try to create a child context when it performs it's parsing.
127 |     # The user will be unaware of this shift and can cause issues if the subcon is dynamic.
128 |     # Therefore, patch the given subcon to use the parent context during parsing.
129 |     # TODO: Embedded() should allow for this functionality!
130 |     class _Embedded(construct.Subconstruct):
131 |         def _parse(self, stream, context, path):
132 |             return self.subcon._parsereport(stream, context._, path)
133 |     subcon = _Embedded(subcon)
134 | 
135 |     # Use original ELFPointer to create a pointer to the entry in the literal pool, which
136 |     # in turn, is a pointer to the data we actually want.
137 |     return helpers.FocusLast(
138 |         helpers.ELFPointer(_obtain_literal_pool_mem_offset, construct.Int32ul, elf=elf),
139 |         helpers.ELFPointer(this[0], subcon, elf=elf),
140 |     )
141 | 


--------------------------------------------------------------------------------
/mwcp/parsers/RSA.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module contains parsers for digital certificates and RSA certificates.
  3 | """
  4 | import base64
  5 | import hashlib
  6 | import re
  7 | import string
  8 | from datetime import datetime
  9 | 
 10 | import pyasn1.codec.der.decoder as asn1_decoder
 11 | import pyasn1.codec.der.decoder as ber_decoder
 12 | import pyasn1_modules.rfc2437 as rfc2437
 13 | import pyasn1_modules.rfc2459 as rfc2459
 14 | from pyasn1.error import PyAsn1Error
 15 | 
 16 | from mwcp import Parser, metadata
 17 | 
 18 | 
 19 | class DigitalCertificate(Parser):
 20 |     DESCRIPTION = "Digital Certificate (PEM)"
 21 | 
 22 |     RSA_CERT_RE = re.compile(br"-----BEGIN CERTIFICATE-----(?P<data>[^-]*)-----END CERTIFICATE-----", re.DOTALL)
 23 |     OIDS = {
 24 |         "2.5.4.3": "CN",
 25 |         "2.5.4.4": "Surname",
 26 |         "2.5.4.6": "C",
 27 |         "2.5.4.8": "ST",
 28 |         "2.5.4.7": "L",
 29 |         "2.5.4.10": "O",
 30 |         "2.5.4.11": "OU",
 31 |         "2.5.4.12": "Title",
 32 |         "1.2.840.113549.1.9.1": "emailAddress"
 33 |     }
 34 | 
 35 |     @classmethod
 36 |     def identify(cls, file_object):
 37 |         return cls.RSA_CERT_RE.search(file_object.data) and all(
 38 |             c in string.printable.encode() for c in file_object.data
 39 |         )
 40 | 
 41 |     @staticmethod
 42 |     def _from_bits(bits):
 43 |         """
 44 |         Convert a bitstream to characters.
 45 | 
 46 |         :param bits: A bitstream.
 47 | 
 48 |         :return: Converted bitstream.
 49 |         """
 50 |         chars = bytearray()
 51 |         for b in range(len(bits) // 8):
 52 |             byte = bits[b * 8: (b + 1) * 8]
 53 |             chars.append((int("".join([str(bit) for bit in byte]), 2)))
 54 |         return bytes(chars)
 55 | 
 56 |     def _parse_rdn(self, rdn_list):
 57 |         """
 58 |         Given a rdn list, convert it to a readable string.
 59 | 
 60 |         :param rdn_list: The rdn data in list format
 61 |         :return: A readable string containing the rdn information
 62 |         """
 63 |         str_list = []
 64 |         for rdn in rdn_list:
 65 |             oid = str(rdn[0][0])
 66 |             value = rdn[0][1]
 67 |             str_list.append("{}={} ".format(self.OIDS.get(oid, oid), ber_decoder.decode(value)[0]))
 68 |         return ", ".join(str_list)
 69 | 
 70 |     def parse_rsa_cert(self, rsa_data: bytes):
 71 |         """
 72 |         Given an RSA certificate in DER format, parse it for reportable information.
 73 | 
 74 |         :param rsa_data: The RSA data in DER format
 75 |         :return:
 76 |         """
 77 |         self.logger.debug("The RSA Certificate is stored in ASN.1 DER format. Parsing for reportable metadata.")
 78 |         cert = asn1_decoder.decode(rsa_data, asn1Spec=rfc2459.Certificate())[0]
 79 |         tbs_cert = cert.getComponentByName("tbsCertificate")
 80 |         rsa_key_data = self._from_bits(
 81 |             tbs_cert.getComponentByName("subjectPublicKeyInfo").getComponentByName("subjectPublicKey"))
 82 |         serial = tbs_cert.getComponentByName("serialNumber")
 83 |         issuer = self._parse_rdn(tbs_cert.getComponentByName("issuer")[0])
 84 |         subject = self._parse_rdn(tbs_cert.getComponentByName("subject")[0])
 85 |         valid_from = tbs_cert.getComponentByName("validity").getComponentByName("notBefore").getComponentByName(
 86 |             "utcTime")
 87 |         valid_from_str = datetime.strptime(str(valid_from), "%y%m%d%H%M%SZ").strftime("%Y-%m-%d %H:%M:%S")
 88 |         valid_to = tbs_cert.getComponentByName("validity").getComponentByName("notAfter").getComponentByName("utcTime")
 89 |         valid_to_str = datetime.strptime(str(valid_to), "%y%m%d%H%M%SZ").strftime("%Y-%m-%d %H:%M:%S")
 90 | 
 91 |         info_dict = {"rsa_cert_serial": "0x{:x}".format(int(serial)),
 92 |                      "rsa_cert_issuer": "{}".format(issuer),
 93 |                      "rsa_cert_subject": subject,
 94 |                      "rsa_cert_valid_from": "{}".format(valid_from_str),
 95 |                      "rsa_cert_valid_to": "{}".format(valid_to_str),
 96 |                      "rsa_cert_modulus": None,
 97 |                      "rsa_pub_exponent": None,
 98 |                      "rsa_cert_sha1": None}
 99 |         # If we fail to extract Public Key, don"t fail the entire thing.
100 |         try:
101 |             rsa_info = asn1_decoder.decode(rsa_key_data, asn1Spec=rfc2437.RSAPublicKey())[0]
102 |             info_dict["rsa_cert_modulus"] = int(rsa_info.getComponentByName("modulus"))
103 |             info_dict["rsa_pub_exponent"] = int(rsa_info.getComponentByName("publicExponent"))
104 |             info_dict["rsa_cert_sha1"] = hashlib.sha1(rsa_data).hexdigest()
105 |         except PyAsn1Error:
106 |             self.logger.debug("Failed to extract RSAPublicKey", exc_info=1)
107 | 
108 |         return info_dict
109 | 
110 |     def run(self):
111 |         # Extract and report certificate information.
112 |         for cert in self.RSA_CERT_RE.finditer(self.file_object.data):
113 |             cert_pem = cert.group("data")
114 |             cert_der = base64.b64decode(cert_pem)
115 |             if cert_der:
116 |                 cert_info = self.parse_rsa_cert(cert_der)
117 | 
118 |                 pub_exponent = cert_info.pop("rsa_pub_exponent")
119 |                 modulus = cert_info.pop("rsa_cert_modulus")
120 |                 if pub_exponent or modulus:
121 |                     self.report.add(metadata.RSAPublicKey(public_exponent=pub_exponent, modulus=modulus))
122 | 
123 |                 ssl_cert_sha1 = cert_info.pop("rsa_cert_sha1")
124 |                 if ssl_cert_sha1:
125 |                     self.report.add(metadata.SSLCertSHA1(ssl_cert_sha1))
126 | 
127 |                 # TODO: Add a proper SSLCert metadata element.
128 |                 for key, value in cert_info.items():
129 |                     self.report.add(metadata.Other(key, value))
130 | 
131 | 
132 | class PrivateKey(Parser):
133 |     DESCRIPTION = "RSA Private Key"
134 | 
135 |     RSA_PRIV_KEY_RE = re.compile(
136 |         br"-----BEGIN RSA PRIVATE KEY-----(?P<data>[^-]*)-----END RSA PRIVATE KEY-----",
137 |         re.DOTALL
138 |     )
139 | 
140 |     @classmethod
141 |     def identify(cls, file_object):
142 |         return cls.RSA_PRIV_KEY_RE.search(file_object.data) and all(
143 |             c in string.printable.encode() for c in file_object.data
144 |         )
145 | 
146 |     def run(self):
147 |         for match in self.RSA_PRIV_KEY_RE.finditer(self.file_object.data):
148 |             self.report.add(metadata.RSAPrivateKey.from_PEM(match.group(0).decode()))
149 | 


--------------------------------------------------------------------------------
/mwcp/utils/custombase64.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Custom Base64 related utility
  3 | """
  4 | 
  5 | import base64
  6 | import logging
  7 | import sys
  8 | 
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | # Standard alphabet base on size.
 14 | _STD_ALPHA = {
 15 |     16: b'0123456789ABCDEF',
 16 |     32: b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=',
 17 |     64: b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=',
 18 | }
 19 | 
 20 | 
 21 | def _validate_alphabet(alphabet, type):
 22 |     """
 23 |     validate the custom alphabet
 24 |         - 64 or 65 characters
 25 |         - mappings are unique
 26 |     """
 27 |     if len(alphabet) not in (type, type+1):
 28 |         raise ValueError('invalid alphabet provided')
 29 | 
 30 |     if len(alphabet) != len(set(alphabet)):
 31 |         raise ValueError('mapping must be unique')
 32 | 
 33 |     return
 34 | 
 35 | 
 36 | def _adjust_pad(alphabet, data, decode):
 37 |     logger.warning('The padding character has not been specified in the custom alphabet')
 38 | 
 39 |     if not (len(data) * 8) % 6:
 40 |         logger.info('The data does not require the padding character.  continuing')
 41 |         return alphabet
 42 | 
 43 |     if decode:
 44 |         for char in data:
 45 |             if char not in alphabet:
 46 |                 logger.info(
 47 |                     'The character "{}" does not appear in the alphabet, '
 48 |                     'but was found in the encoded data.  it will be used as the padding char'.format(char))
 49 |                 return alphabet + bytes([char]) if isinstance(char, int) else char  # support for python 2 or 3
 50 |         raise ValueError('please provide a padding character to the custom alphabet')
 51 |     else:
 52 |         if b'=' not in alphabet:
 53 |             return alphabet + b'='
 54 |         else:
 55 |             raise ValueError('ERROR: please provide a padding character to the custom alphabet')
 56 | 
 57 | 
 58 | def _code(data, custom_alpha, size, decode, code_func):
 59 |     if isinstance(custom_alpha, str):
 60 |         custom_alpha = custom_alpha.encode()
 61 |     if isinstance(data, str):
 62 |         data = data.encode()
 63 |     _validate_alphabet(custom_alpha, size)
 64 |     if size != 16 and len(custom_alpha) == size:
 65 |         _adjust_pad(custom_alpha, data, decode)
 66 |     std_alpha = _STD_ALPHA[size]
 67 | 
 68 |     if decode:
 69 |         table = bytes.maketrans(custom_alpha, std_alpha)
 70 |         data = data.translate(table)
 71 |         return code_func(data)
 72 |     else:
 73 |         table = bytes.maketrans(std_alpha, custom_alpha)
 74 |         data = code_func(data)
 75 |         return data.translate(table)
 76 | 
 77 | 
 78 | def b64encode(data, alphabet=None):
 79 |     """
 80 |     Base64 encode
 81 |     :param data: data.
 82 |     :param alphabet: custom alphabet or standard alphabet.
 83 |     :return: base64 encoded data.
 84 | 
 85 |     >>> b64encode('hello world')
 86 |     'aGVsbG8gd29ybGQ='
 87 |     >>> custom_alphabet = b'EFGHQRSTUVWefghijklmnopIJKLMNOPABCDqrstuvwxyXYZabcdz0123456789+/='
 88 |     >>> b64encode('hello world', alphabet=custom_alphabet)
 89 |     'LSoXMS8BO29dMSj='
 90 |     """
 91 |     alphabet = alphabet or _STD_ALPHA[64]
 92 |     return _code(data, alphabet, 64, False, base64.b64encode)
 93 | 
 94 | 
 95 | def b64decode(data, alphabet=None):
 96 |     """
 97 |     Base64 decode (pads characters if necessary)
 98 |     :param data: base64 encoded data.
 99 |     :param alphabet: custom alphabet or standard alphabet.
100 |     :return: base64 decoded data.
101 | 
102 |     >>> b64decode('aGVsbG8gd29ybGQ=')
103 |     'hello world'
104 |     >>> custom_alphabet = b'EFGHQRSTUVWefghijklmnopIJKLMNOPABCDqrstuvwxyXYZabcdz0123456789+/='
105 |     >>> b64decode('LSoXMS8BO29dMSj=', alphabet=custom_alphabet)
106 |     'hello world'
107 |     >>> b64decode('LSoXMS8BO29dMSj', alphabet=custom_alphabet)
108 |     'hello world'
109 |     """
110 |     alphabet = alphabet or _STD_ALPHA[64]
111 |     # Pad the data, if necessary
112 |     data += alphabet[len(alphabet)-1:] * ((-len(data)) % 4)
113 |     return _code(data, alphabet, 64, True, base64.b64decode)
114 | 
115 | 
116 | def b32encode(data, alphabet=None):
117 |     """
118 |     Base32 encodes
119 |     :param data: data
120 |     :param alphabet: custom alphabet or standard alphabet.
121 |     :return: base32 encoded data.
122 | 
123 |     >>> b32encode('hello world')
124 |     'NBSWY3DPEB3W64TMMQ======'
125 |     >>> custom_alphabet = 'FGHIJQ345RSTUVWXYKLMABCDENOPZ267='
126 |     >>> b32encode('hello world', alphabet=custom_alphabet)
127 |     'VGLCEPIXJGPC6ZMUUY======'
128 |     """
129 |     alphabet = alphabet or _STD_ALPHA[32]
130 |     return _code(data, alphabet, 32, False, base64.b32encode)
131 | 
132 | 
133 | def b32decode(data, alphabet=None):
134 |     """
135 |     Base32 decode (pads characters if necessary)
136 |     :param data: base32 encoded data.
137 |     :param alphabet: custom alphabet or standard alphabet.
138 |     :return: base32 decoded data.
139 | 
140 |     >>> b32decode('NBSWY3DPEB3W64TMMQ======')
141 |     'hello world'
142 |     >>> custom_alphabet = 'FGHIJQ345RSTUVWXYKLMABCDENOPZ267='
143 |     >>> b32decode('VGLCEPIXJGPC6ZMUUY======', alphabet=custom_alphabet)
144 |     'hello world'
145 |     >>> b32decode('VGLCEPIXJGPC6ZMUUY', alphabet=custom_alphabet)
146 |     'hello world'
147 |     """
148 |     alphabet = alphabet or _STD_ALPHA[32]
149 |     # Pad the data, if necessary
150 |     data += alphabet[len(alphabet)-1:] * ((-len(data)) % 8)
151 |     return _code(data, alphabet, 32, True, base64.b32decode)
152 | 
153 | 
154 | def b16encode(data, alphabet=None):
155 |     """
156 |     Base16 encodes
157 |     :param data: data
158 |     :param alphabet: custom alphabet or standard alphabet.
159 |     :return: base16 encoded data.
160 | 
161 |     >>> b16encode('hello world')
162 |     '68656C6C6F20776F726C64'
163 |     >>> custom_alphabet = '78BDE0123F459A6C'
164 |     >>> b16encode('hello world', alphabet=custom_alphabet)
165 |     '131019191CB7221C2B191E'
166 |     """
167 |     alphabet = alphabet or _STD_ALPHA[16]
168 |     return _code(data, alphabet, 16, False, base64.b16encode)
169 | 
170 | 
171 | def b16decode(data, alphabet=None):
172 |     """
173 |     Base16 decode
174 |     :param data: base16 encoded data.
175 |     :param alphabet: custom alphabet or standard alphabet.
176 |     :return: base16 decoded data.
177 | 
178 |     >>> b16decode('68656C6C6F20776F726C64')
179 |     'hello world'
180 |     >>> custom_alphabet = '78BDE0123F459A6C'
181 |     >>> b16decode('131019191CB7221C2B191E', alphabet=custom_alphabet)
182 |     'hello world'
183 |     """
184 |     alphabet = alphabet or _STD_ALPHA[16]
185 |     return _code(data, alphabet, 16, True, base64.b16decode)
186 | 


--------------------------------------------------------------------------------
/mwcp/parser.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import logging
  3 | from typing import TYPE_CHECKING, Union, Tuple, Any
  4 | import warnings
  5 | 
  6 | # This is here for type hints and autocomplete in PyCharm
  7 | # noinspection PyUnreachableCode
  8 | if TYPE_CHECKING:
  9 |     from mwcp import FileObject, Report
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | # A way to create a class properties
 15 | # (Adding ABCMeta so, parsers have the freedom to use it.)
 16 | class ParserMeta(abc.ABCMeta):
 17 |     @property
 18 |     def name(cls):
 19 |         try:
 20 |             return cls._name
 21 |         except AttributeError:
 22 |             return cls.__name__
 23 | 
 24 |     @name.setter
 25 |     def name(cls, value):
 26 |         cls._name = value
 27 | 
 28 |     @property
 29 |     def source(cls):
 30 |         try:
 31 |             return cls._source
 32 |         except AttributeError:
 33 |             module, _, _ = cls.__module__.partition(".")
 34 |             return module
 35 | 
 36 |     @source.setter
 37 |     def source(cls, value):
 38 |         cls._source = value
 39 | 
 40 |     def __repr__(cls):
 41 |         return "<{}>".format(cls.name)
 42 | 
 43 | 
 44 | class Parser(metaclass=ParserMeta):
 45 |     """
 46 |     Interface for all parser objects.
 47 |     Either use this as a base for all component parsers, or
 48 |     inherit this class into a customized base class for all parsers.  This class includes some of the required data
 49 |     used by various other classes.
 50 |     """
 51 | 
 52 |     file_object = None  # type: FileObject
 53 |     # This is the description that will be given to the file object during output
 54 |     # if no description is set in the file_object. This must be overwritten by inherited classes.
 55 |     DESCRIPTION = None
 56 |     # This is a tuple of tags that will be added to the file object after identification.
 57 |     TAGS = ()
 58 | 
 59 |     # TODO: Deprecate the AUTHOR field?
 60 |     AUTHOR = ""  # Optional
 61 | 
 62 |     def __init__(self, file_object, report, dispatcher):
 63 |         """
 64 |         Initializes the Parser.
 65 | 
 66 |         :param FileObject file_object: Object containing data about component file.
 67 |         :param mwcp.Report report: Report object to be filled in.
 68 |         :param Dispatcher dispatcher: reference to the dispatcher object
 69 |         """
 70 |         if not self.DESCRIPTION:
 71 |             raise NotImplementedError("Parser class is missing a DESCRIPTION.")
 72 |         self.file_object = file_object
 73 |         self.report = report
 74 |         self.dispatcher = dispatcher
 75 |         self.logger = logging.getLogger(".".join([self.__class__.__module__, self.__class__.__name__]))
 76 | 
 77 |     @property
 78 |     def reporter(self) -> "Report":
 79 |         warnings.warn(
 80 |             "reporter has been renamed to report and is now an instance of mwcp.Report",
 81 |             DeprecationWarning
 82 |         )
 83 |         return self.report
 84 | 
 85 |     @property
 86 |     def knowledge_base(self) -> dict:
 87 |         """
 88 |         Convenience function for getting knowledge_base.
 89 |         """
 90 |         return self.report.knowledge_base
 91 | 
 92 |     @classmethod
 93 |     def get_logger(cls):
 94 |         return logging.getLogger(".".join([cls.__module__, cls.__name__]))
 95 | 
 96 |     @classmethod
 97 |     def iter_subclasses(cls):
 98 |         """Yields all classes that inherit from this class."""
 99 |         for subclass in cls.__subclasses__():
100 |             yield subclass
101 |             for _subclass in subclass.iter_subclasses():
102 |                 yield _subclass
103 | 
104 |     @classmethod
105 |     def identify(cls, file_object: "FileObject") -> Union[bool, Tuple[bool, Any]]:
106 |         """
107 |         Determines if this parser is identified to support the given file_object.
108 |         This function must be overwritten in order to support identification.
109 | 
110 |         The passed in file_object may be modified at this time to provide
111 |         a new file_name or description.
112 |         (Be aware, that this change will be in affect for future parsers.
113 |         Therefore, don't change it if you are returning False or the dispatcher is in greedy mode.)
114 | 
115 |         :param file_object: file object to use for identification
116 |         :type file_object: dispatcher.FileObject
117 | 
118 |         :return bool: Boolean indicating if this parser supports the file_object
119 |             Extra arguments to pass into the run() function can also be provided.
120 |         """
121 |         logger.warning("Missing identify() function for: {}.{}".format(cls.__module__, cls.__name__))
122 |         return True  # Default to True to keep backwards compatibility for legacy parsers.
123 | 
124 |     @staticmethod
125 |     def unpack_identify(result) -> Tuple[bool, Any]:
126 |         """
127 |         Helper function to normalize identify results to always produce a tuple of identification result and extras.
128 |         """
129 |         if isinstance(result, tuple) and isinstance(result[0], bool):
130 |             identified, *rest = result
131 |             rest = tuple(rest)
132 |         else:
133 |             identified = bool(result)
134 |             rest = tuple()
135 |         return (identified, *rest)
136 | 
137 |     @classmethod
138 |     def parse(cls, file_object, report, *run_args, dispatcher=None):
139 |         """
140 |         Runs parser on given file_object.
141 | 
142 |         :param FileObject file_object: Object containing data about component file.
143 |         :param mwcp.Report report: reference to report object used to report new metadata.
144 |         :param run_args: Extra arguments returned from identify() to pass to run() function.
145 |         :param Dispatcher dispatcher: reference to the dispatcher object. (if used)
146 |         :return:
147 |         """
148 |         if dispatcher:
149 |             report.set_file(file_object)
150 |             parser_object = cls(file_object, report, dispatcher)
151 |             parser_object.run(*run_args)
152 | 
153 |         # If dispatcher isn't provided, create a dummy one containing only this parser.
154 |         # This is necessary to ensure identification is run first.
155 |         else:
156 |             from mwcp import Dispatcher  # Must import here to avoid cyclic import.
157 | 
158 |             dispatcher = Dispatcher(cls.name, cls.source, author=cls.AUTHOR, description=cls.DESCRIPTION, parsers=[cls])
159 |             dispatcher.parse(file_object, report, *run_args)
160 | 
161 |     def run(self, *args):
162 |         """
163 |         This function can be overwritten. It is called to run the parser.
164 |         You don't have to overwrite this method if you only want to identify/output the file.
165 |         :return:
166 |         """
167 |         pass
168 | 


--------------------------------------------------------------------------------
/mwcp/utils/logutil.py:
--------------------------------------------------------------------------------
  1 | """Utilities for setting up logging."""
  2 | import copy
  3 | import errno
  4 | import logging.config
  5 | import logging.handlers
  6 | import multiprocessing as mp
  7 | import os
  8 | import platform
  9 | import sys
 10 | import threading
 11 | import traceback
 12 | import warnings
 13 | from collections import deque
 14 | 
 15 | import appdirs
 16 | import yaml
 17 | 
 18 | import mwcp
 19 | 
 20 | # Queue used to send over log messages from child to main process.
 21 | # (See mwcp.utils.multi_proc for its use.)
 22 | mp_queue = mp.Queue()
 23 | 
 24 | 
 25 | class LevelCharFilter(logging.Filter):
 26 |     """Logging filter used to add a 'level_char' format variable."""
 27 | 
 28 |     def filter(self, record):
 29 |         if record.levelno >= logging.ERROR:
 30 |             record.level_char = "!"
 31 |         elif record.levelno >= logging.WARN:
 32 |             record.level_char = "-"
 33 |         elif record.levelno >= logging.INFO:
 34 |             record.level_char = "+"
 35 |         elif record.levelno >= logging.DEBUG:
 36 |             record.level_char = "*"
 37 |         else:
 38 |             record.level_char = " "
 39 |         return True
 40 | 
 41 | 
 42 | class MPRotatingFileHandler(logging.handlers.RotatingFileHandler):
 43 |     """
 44 |     Handle the uncommon case of the log attempting to roll over when
 45 |     another process has the log open. This only happens on Windows, and
 46 |     the log ends up being a handful of KBs greater than 1024. Entries
 47 |     are still written, and the rollover happens if/when the MainProcess is
 48 |     the only process with the log file open.
 49 |     """
 50 | 
 51 |     def __init__(self, filename, **kwargs):
 52 |         # Expand and variables and home directories and make path if it doesn't exist.
 53 |         filename = os.path.expandvars(os.path.expanduser(filename))
 54 | 
 55 |         # If path is relative, add to standard log directory.
 56 |         if not os.path.isabs(filename):
 57 |             filename = os.path.join(appdirs.user_log_dir("mwcp", appauthor=False), filename)
 58 | 
 59 |         directory = os.path.dirname(filename)
 60 |         if not os.path.exists(directory):
 61 |             os.makedirs(directory)
 62 |         super(MPRotatingFileHandler, self).__init__(filename, **kwargs)
 63 | 
 64 |     def doRollover(self):
 65 |         """
 66 |         Attempt to roll over to the next log file. If the current file
 67 |         is locked (Windows issue), keep writing to the original file until
 68 |         it is unlocked.
 69 | 
 70 |         :return:
 71 |         """
 72 |         try:
 73 |             super(MPRotatingFileHandler, self).doRollover()
 74 |         except OSError as e:
 75 |             if not (sys.platform == "win32" and e.errno == errno.EACCES):
 76 |                 raise
 77 | 
 78 | 
 79 | class MPChildHandler(logging.Handler):
 80 |     """
 81 |     Simple handler for child processes.
 82 | 
 83 |     Ensures pickle-ability and sends the record entry to the queue.
 84 |     """
 85 | 
 86 |     def __init__(self, log_queue):
 87 |         super(MPChildHandler, self).__init__()
 88 |         self.queue = log_queue
 89 | 
 90 |     def emit(self, record):
 91 |         if record.exc_info:
 92 |             record.exc_text = "".join(traceback.format_exception(*record.exc_info))
 93 |             record.exc_info = None
 94 | 
 95 |         self.queue.put(record)
 96 | 
 97 | 
 98 | class ListHandler(logging.Handler):
 99 |     """
100 |     Log to a list, with an optional maximum number of records to store.
101 | 
102 |     Full records are available with the `records` property, and messages (i.e.
103 |     the text of the log entry) at available with the `messages` property.
104 |     """
105 | 
106 |     def __init__(self, entries=None):
107 |         """
108 |         Behaves essentially identical to any other handler.
109 | 
110 |         The only option is max_entries, to specify the max number of log
111 |         entries kept. By default, no limit.
112 | 
113 |         :param int entries: Maximum number of records to store.
114 |         """
115 |         super(ListHandler, self).__init__()
116 | 
117 |         self._deque = deque(maxlen=entries)
118 | 
119 |     def __copy__(self):
120 |         new_handler = ListHandler()
121 |         # Actually copy the deque, otherwise we'll get double entries
122 |         new_handler._deque = copy.copy(self._deque)
123 |         return new_handler
124 | 
125 |     def emit(self, record):
126 |         msg = self.format(record)
127 |         record.formatted_msg = msg
128 |         self._deque.append(record)
129 | 
130 |     def clear(self):
131 |         return self._deque.clear()
132 | 
133 |     @property
134 |     def records(self):
135 |         """
136 |         List of the last `max_entries` records logged.
137 |         """
138 |         return list(self._deque)
139 | 
140 |     @property
141 |     def messages(self):
142 |         """
143 |         List of the last `max_entries` formatted messages logged.
144 |         """
145 |         return [record.formatted_msg for record in self._deque]
146 | 
147 | 
148 | def start_listener():
149 |     """Start the listener thread for multi-process logging."""
150 |     if mp.current_process().name != "MainProcess":
151 |         return
152 | 
153 |     def _mp_log_listener(log_queue):
154 |         while True:
155 |             record = log_queue.get()
156 |             _logger = logging.getLogger(record.name)
157 |             if _logger.isEnabledFor(record.levelno):
158 |                 _logger.handle(record)
159 | 
160 |     listener_thread = threading.Thread(target=_mp_log_listener, args=(mp_queue,))
161 |     listener_thread.daemon = True
162 |     listener_thread.start()
163 | 
164 | 
165 | def setup_logging(default_level=logging.INFO, queue=None):
166 |     """
167 |     Sets up logging using default log config file or log config file set by 'MWCP_LOG_CFG'
168 | 
169 |     :param default_level: Default log level to set to if config file fails.
170 |     :param queue: Queue used to pass logs to.
171 |     """
172 |     if queue:
173 |         assert mp.current_process().name != "MainProcess"
174 |         logging.root.addHandler(MPChildHandler(queue))
175 |         # If on Windows, allow all records to pass through, this is necessary because Windows
176 |         # subprocesses don't duplicate the global state like posix systems.
177 |         # Therefore, we have to pass all log messages through since effective
178 |         # log level is unknown.
179 |         if "Windows" in platform.platform():
180 |             logging.root.setLevel(logging.DEBUG)
181 |     else:
182 |         # Allow setting log configuration using 'MWCP_LOG_CFG' environment variable.
183 |         log_config = os.getenv("MWCP_LOG_CFG", None)
184 |         if log_config is None:
185 |             log_config = mwcp.config.get("LOG_CONFIG_PATH", None)
186 |         else:
187 |             warnings.warn(
188 |                 "Using MWCP_LOG_CFG to set log configuration is deprecated. "
189 |                 "Please specify path in the configuration file instead."
190 |             )
191 |         if log_config:
192 |             try:
193 |                 with open(log_config, "rt") as f:
194 |                     config = yaml.safe_load(f.read())
195 |                 logging.config.dictConfig(config)
196 |             except IOError as e:
197 |                 warnings.warn("Unable to set log config file: {} with error: {}".format(log_config, e))
198 |                 logging.basicConfig(level=default_level)
199 |         else:
200 |             logging.basicConfig(level=default_level)
201 | 
202 |         # Startup queue listener if we are in the main process.
203 |         start_listener()
204 | 


--------------------------------------------------------------------------------