├── mwcp ├── stix │ ├── __init__.py │ ├── objects.py │ └── report_writer.py ├── tests │ ├── __init__.py │ ├── test_string_report │ │ ├── strings.txt │ │ └── strings.json │ ├── test_disassembly │ │ ├── strings.exe │ │ ├── Sample.py │ │ └── strings.c │ ├── test_poshdeob.py │ ├── test_runner │ │ ├── yara_repo │ │ │ ├── rule_a.yara │ │ │ ├── rule_b.yara │ │ │ └── sibling_dispatch.yara │ │ ├── Sample.py │ │ └── SiblingDispatch.py │ ├── test_cli │ │ ├── csv_legacy.csv │ │ ├── parse.txt │ │ ├── csv_cli.csv │ │ ├── fb843efb2ffec987db12e72ca75c9ea2.json │ │ └── parse.json │ ├── test_server │ │ └── DecodedStringTestParser.py │ ├── test_report_writer │ │ ├── report_foreign.txt │ │ ├── report_foreign.md │ │ ├── report_foreign.html │ │ ├── report_wordwrap.txt │ │ └── report_wordwrap.html │ ├── test_custombase64.py │ ├── test_legacy_reporter │ │ └── report.txt │ ├── test_string_report.py │ ├── test_testing.py │ ├── test_stix.py │ ├── test_disassembly.py │ ├── test_pecon.py │ ├── test_report │ │ └── split_report.py │ ├── test_report_writer.py │ ├── test_runner.py │ ├── test_construct.py │ ├── test_report.py │ ├── test_issues.py │ └── test_legacy_reporter.py ├── tools │ ├── __init__.py │ ├── server │ │ ├── templates │ │ │ ├── results.html │ │ │ ├── parsers.html │ │ │ ├── base.html │ │ │ └── upload.html │ │ └── __init__.py │ └── update_schema.py ├── resources │ ├── __init__.py │ └── RATDecoders │ │ ├── __init__.py │ │ └── PLACE_PARSERS_HERE ├── utils │ ├── __init__.py │ ├── construct │ │ ├── __init__.py │ │ ├── network.py │ │ ├── datetime_.py │ │ ├── MIPS.py │ │ ├── dotnet.py │ │ ├── windows_enums.py │ │ ├── windows_constants.py │ │ ├── construct_template.html │ │ └── ARM.py │ ├── stringutils.py │ ├── multi_proc.py │ ├── elffileutils.py │ ├── custombase64.py │ └── logutil.py ├── parsers │ ├── __init__.py │ ├── TA.py │ ├── GenericDropper.py │ ├── PDF.py │ ├── PowerShell.py │ ├── Decoy.py │ ├── foo.py │ ├── Archive.py │ ├── ISO.py │ ├── tests │ │ └── foo │ │ │ └── f144899b86766688991c5d0d10902f4a.json │ ├── VisualBasic.py │ ├── Python.py │ └── RSA.py ├── exceptions.py ├── __init__.py ├── config │ ├── log_config.yml │ ├── config.yml │ ├── __init__.py │ └── fields.txt ├── parser_config.yml ├── core.py └── parser.py ├── docs └── PythonStyleGuide.md ├── MANIFEST.in ├── setup.cfg ├── noxfile.py ├── .gitignore ├── LICENSE.txt ├── .github └── workflows │ └── workflow.yml └── setup.py /mwcp/stix/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mwcp/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mwcp/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mwcp/resources/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mwcp/resources/RATDecoders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mwcp/resources/RATDecoders/PLACE_PARSERS_HERE: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mwcp/tests/test_string_report/strings.txt: -------------------------------------------------------------------------------- 1 | hello 2 | world -------------------------------------------------------------------------------- /mwcp/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """This package is used to store common helper utilities for developing parsers.""" 2 | -------------------------------------------------------------------------------- /mwcp/tools/server/templates/results.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | {{ highlight|safe }} 5 | {% endblock %} -------------------------------------------------------------------------------- /mwcp/tests/test_disassembly/strings.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dod-cyber-crime-center/DC3-MWCP/HEAD/mwcp/tests/test_disassembly/strings.exe -------------------------------------------------------------------------------- /docs/PythonStyleGuide.md: -------------------------------------------------------------------------------- 1 | # Python Style Guide 2 | 3 | Use [black](https://github.com/psf/black) with line lengths of 120. 4 | 5 | ```bash 6 | $ pip install black 7 | $ black -l 120 8 | ``` 9 | -------------------------------------------------------------------------------- /mwcp/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Path to parser configuration file. 4 | # (Used when parsers are installed through entry points.) 5 | config = os.path.join(os.path.dirname(__file__), "..", "parser_config.yml") 6 | -------------------------------------------------------------------------------- /mwcp/tests/test_poshdeob.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests powershell deofuscator 3 | """ 4 | 5 | import doctest 6 | 7 | from mwcp.utils import poshdeob 8 | 9 | 10 | def test_doctests(): 11 | """Tests that the doctests work.""" 12 | results = doctest.testmod(poshdeob) 13 | assert not results.failed 14 | -------------------------------------------------------------------------------- /mwcp/tests/test_runner/yara_repo/rule_a.yara: -------------------------------------------------------------------------------- 1 | 2 | rule Rule_Mapped { 3 | meta: 4 | mwcp = "dc3:foo" 5 | strings: 6 | $str = "mapped" 7 | condition: 8 | all of them 9 | } 10 | 11 | rule Rule_Unmapped { 12 | strings: 13 | $str = "unmapped" 14 | condition: 15 | all of them 16 | } 17 | -------------------------------------------------------------------------------- /mwcp/tests/test_cli/csv_legacy.csv: -------------------------------------------------------------------------------- 1 | scan_date,inputfilename,outputfile.name,outputfile.description,outputfile.md5,a,address,other.field1,other.field2 2 | [TIMESTAMP],file1.exe,"out_name 3 | out_name2","out_desc 4 | out_desc2","out_md5 5 | out_md52",,"https://google.com 6 | ftp://amazon.com",value1,"value2 7 | value3" 8 | [TIMESTAMP],file2.exe,,,,"b 9 | c",,, 10 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md LICENSE.txt 2 | include mwcp/resources/RATDecoders/PLACE_PARSERS_HERE 3 | include mwcp/parser_config.yml 4 | recursive-include mwcp/resources *.json *.txt 5 | recursive-include mwcp/config * 6 | recursive-include mwcp/parsers/tests * 7 | graft mwcp/tests 8 | graft mwcp/tools/server 9 | include mwcp/utils/construct/construct_template.html 10 | -------------------------------------------------------------------------------- /mwcp/tests/test_runner/yara_repo/rule_b.yara: -------------------------------------------------------------------------------- 1 | 2 | rule FileA { 3 | meta: 4 | mwcp = "Sample.FileA" 5 | strings: 6 | $str = "file a" 7 | condition: 8 | all of them 9 | } 10 | 11 | 12 | rule FileB { 13 | meta: 14 | mwcp = "Sample.FileB" 15 | strings: 16 | $str = "file b" 17 | condition: 18 | all of them 19 | } 20 | 21 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | version = attr:mwcp.__version__ 3 | description = A framework for malware configuration parsers. 4 | long_description_content_type = text/markdown 5 | long_description = file:README.md 6 | 7 | [tool:pytest] 8 | testpaths = mwcp/tests 9 | required_plugins = pytest-datadir pytest-xdist 10 | filterwarnings = 11 | ignore::DeprecationWarning 12 | addopts = 13 | -p no:faulthandler 14 | -------------------------------------------------------------------------------- /mwcp/tools/update_schema.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a script for updating the formal schema file - schema.json 3 | """ 4 | import json 5 | import pathlib 6 | 7 | import mwcp 8 | 9 | 10 | def main(): 11 | schema_json = pathlib.Path(mwcp.__file__).parent / "config" / "schema.json" 12 | 13 | with schema_json.open("w") as fo: 14 | json.dump(mwcp.schema(), fo, indent=4) 15 | 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /mwcp/utils/construct/__init__.py: -------------------------------------------------------------------------------- 1 | """This is a wrapper interface to the construct library which adds extra helper functions.""" 2 | 3 | # from __future__ import absolute_import 4 | 5 | # Import interface 6 | from .core import * 7 | 8 | from .construct_html import html_hex 9 | from .helpers import * 10 | from .dotnet import * 11 | from .datetime_ import * 12 | from .windows_structures import * 13 | from .network import * 14 | from .windows_enums import * 15 | from . import ARM 16 | from . import MIPS 17 | 18 | -------------------------------------------------------------------------------- /mwcp/tests/test_server/DecodedStringTestParser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sample parser that reports decoded strings. 3 | """ 4 | 5 | from mwcp import metadata, Parser 6 | 7 | 8 | class Implant(Parser): 9 | DESCRIPTION = "Sample Implant" 10 | 11 | @classmethod 12 | def identify(cls, file_object): 13 | return True 14 | 15 | def run(self): 16 | self.report.add(metadata.DecodedString("string A")) 17 | self.report.add(metadata.DecodedString("string B", encryption_key=metadata.EncryptionKey(b"\xde\xad\xbe\xef", "xor"))) 18 | -------------------------------------------------------------------------------- /mwcp/tests/test_runner/Sample.py: -------------------------------------------------------------------------------- 1 | 2 | from mwcp import Parser, FileObject 3 | 4 | 5 | class FileA(Parser): 6 | DESCRIPTION = "File A" 7 | 8 | @classmethod 9 | def identify(cls, file_object): 10 | return b"matches file a" in file_object.data 11 | 12 | def run(self): 13 | self.dispatcher.add(FileObject(b"matches file b")) 14 | 15 | 16 | class FileB(Parser): 17 | DESCRIPTION = "File B" 18 | 19 | @classmethod 20 | def identify(cls, file_object): 21 | return b"matches file b" in file_object.data 22 | -------------------------------------------------------------------------------- /mwcp/tools/server/templates/parsers.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 | 6 | 7 | {% for column in headers %} 8 | 9 | {% endfor %} 10 | 11 | 12 | 13 | {% for parser in parsers %} 14 | 15 | {% for column in parser %} 16 | 17 | {% endfor %} 18 | 19 | {% endfor %} 20 | 21 |
{{ column }}
{{ column }}
22 | {% endblock %} -------------------------------------------------------------------------------- /mwcp/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class MWCPError(Exception): 4 | """ 5 | Base class for custom exceptions thrown by MWCP. 6 | """ 7 | 8 | 9 | class ConfigError(MWCPError): 10 | """ 11 | This exception is thrown if there is an issue with the configuration file. 12 | """ 13 | 14 | 15 | class UnableToParse(MWCPError): 16 | """ 17 | This exception can be thrown if a parser that has been correctly identified has failed to parse 18 | the file and you would like other parsers to be tried. 19 | """ 20 | 21 | 22 | class ValidationError(MWCPError): 23 | """ 24 | This exception can be thrown if validation fails when adding metadata. 25 | """ 26 | 27 | 28 | class ParserNotFoundError(MWCPError): 29 | """ 30 | This exception gets thrown if a parser can't be found. 31 | """ 32 | -------------------------------------------------------------------------------- /mwcp/tools/server/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import flask as f 4 | 5 | from . import server 6 | 7 | 8 | def create_app(extra_config=None): 9 | """ 10 | Create a Flask app instance for the MWCP API. 11 | 12 | :param dict extra_config: Extra configuration options to add to the app 13 | :return: Flask app for MWCP API 14 | """ 15 | app = f.Flask(__name__) 16 | 17 | app.config.setdefault("MENU_LINKS", []).extend( 18 | [ 19 | {"name": "Upload", "endpoint": "mwcp.upload"}, 20 | {"name": "Parsers", "endpoint": "mwcp.parsers_list"}, 21 | ] 22 | ) 23 | 24 | if extra_config: 25 | app.config.from_mapping(extra_config) 26 | 27 | server.init_app(app) 28 | app.register_blueprint(server.bp) 29 | 30 | return app 31 | -------------------------------------------------------------------------------- /mwcp/tests/test_runner/yara_repo/sibling_dispatch.yara: -------------------------------------------------------------------------------- 1 | /* 2 | Rules for test_yara_runner_sibling_dispatch 3 | */ 4 | 5 | rule Parent { 6 | meta: 7 | mwcp = "SiblingDispatch.Parent" 8 | strings: 9 | $str = "parent" 10 | condition: 11 | all of them 12 | } 13 | 14 | 15 | rule Sibling1 { 16 | meta: 17 | mwcp = "SiblingDispatch.Sibling1" 18 | strings: 19 | $str = "sibling 1" 20 | condition: 21 | all of them 22 | } 23 | 24 | 25 | rule Sibling2 { 26 | meta: 27 | mwcp = "SiblingDispatch.Sibling2" 28 | strings: 29 | $str = "sibling 2" 30 | condition: 31 | all of them 32 | } 33 | 34 | 35 | rule Grandchild { 36 | meta: 37 | mwcp = "SiblingDispatch.Grandchild" 38 | strings: 39 | $str = "grandchild" 40 | condition: 41 | all of them 42 | } 43 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | """ 2 | Runs tests and other routines. 3 | 4 | Usage: 5 | 1. Install "nox" 6 | 2. Run "nox" or "nox -s test" 7 | """ 8 | 9 | import nox 10 | 11 | 12 | @nox.session(python="3.10") 13 | def test(session): 14 | """Run pytests""" 15 | session.install("-e", ".[testing]") 16 | session.run("pytest") 17 | 18 | 19 | @nox.session(python="3.10") 20 | def build(session): 21 | """Build source and wheel distribution""" 22 | session.run("python", "setup.py", "sdist") 23 | session.run("python", "setup.py", "bdist_wheel") 24 | 25 | 26 | @nox.session(python=False) 27 | def release_patch(session): 28 | """Generate release patch""" 29 | session.run("mkdir", "-p", "dist", external=True) 30 | with open("./dist/updates.patch", "w") as out: 31 | session.run( 32 | "git", "format-patch", "--stdout", "master", 33 | external=True, 34 | stdout=out 35 | ) 36 | -------------------------------------------------------------------------------- /mwcp/__init__.py: -------------------------------------------------------------------------------- 1 | """Exposes interface for MWCP.""" 2 | 3 | import logging 4 | 5 | # Add null handler to root logger to avoid "no handler" error when this is used as a library 6 | logging.getLogger().addHandler(logging.NullHandler()) 7 | 8 | 9 | from mwcp.config import _config as config 10 | from mwcp.parser import Parser 11 | from mwcp.file_object import FileObject 12 | from mwcp.registry import ( 13 | register_entry_points, register_parser_directory, register_parser_package, 14 | iter_parsers, get_parser_descriptions, set_default_source, 15 | clear as clear_registry, 16 | clear_default_source, 17 | ParserNotFoundError 18 | ) 19 | from mwcp.runner import Runner 20 | from mwcp.report import Report 21 | from mwcp.dispatcher import Dispatcher, UnidentifiedFile 22 | from mwcp.utils.logutil import setup_logging 23 | from mwcp.core import run, schema 24 | from mwcp.exceptions import * 25 | 26 | 27 | __version__ = "3.14.0" 28 | -------------------------------------------------------------------------------- /mwcp/parsers/TA.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper for Techanarchy RATDecoders using techanarchy_bridge 3 | """ 4 | 5 | import os 6 | from pathlib import Path 7 | 8 | from mwcp import Parser 9 | from mwcp.resources import RATDecoders 10 | 11 | RAT_DECODERS = [decoder.stem for decoder in Path(RATDecoders.__file__).parent.glob("[!_]*.py")] 12 | 13 | 14 | def run(self): 15 | from mwcp.resources import techanarchy_bridge 16 | 17 | name = self.__class__.__name__ 18 | scriptpath = os.path.join(os.path.dirname(RATDecoders.__file__), name + ".py") 19 | techanarchy_bridge.run_decoder(self, scriptpath) 20 | 21 | 22 | # Dynamically declare Parser classes. 23 | for name in RAT_DECODERS: 24 | if name == "TEMPLATE": 25 | continue 26 | klass = type(name, (Parser,), {"DESCRIPTION": name, "run": run, "AUTHOR": "TechAnarchy"}) 27 | klass.__module__ = __name__ # Module originally gets incorrectly set to "abc" 28 | globals()[name] = klass 29 | -------------------------------------------------------------------------------- /mwcp/tests/test_report_writer/report_foreign.txt: -------------------------------------------------------------------------------- 1 | ----- File: input_file.bin ----- 2 | Field Value 3 | ------------ ---------------------------------------------------------------- 4 | Parser FooParser 5 | File Path C:/input_file.bin 6 | Description SuperMalware Implant 7 | Architecture 8 | MD5 1e50210a0202497fb79bc38b6ade6c34 9 | SHA1 baf34551fecb48acc3da868eb85e1b6dac9de356 10 | SHA256 1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee 11 | Compile Time 12 | 13 | ---- Miscellaneous ---- 14 | Key Value 15 | ------ ----------------------------------- 16 | JAPAN ユーザー別サイト 17 | CHINA 简体中文 18 | KOREA 크로스 플랫폼으로 19 | ISRAEL מדורים מבוקשים 20 | EGYPT أفضل البحوث 21 | RUSSIA Десятую Международную 22 | MATH ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i) 23 | FRANCE français langue étrangère 24 | SPAIN mañana olé 25 | 26 | ----- File Tree ----- 27 | 28 | 29 | -------------------------------------------------------------------------------- /mwcp/tests/test_custombase64.py: -------------------------------------------------------------------------------- 1 | """Tests mwcp.utils.custombase64""" 2 | 3 | from mwcp.utils import custombase64 4 | 5 | 6 | def test_base64(): 7 | custom_alphabet = b'EFGHQRSTUVWefghijklmnopIJKLMNOPABCDqrstuvwxyXYZabcdz0123456789+/=' 8 | assert custombase64.b64encode(b'hello world', custom_alphabet) == b'LSoXMS8BO29dMSj=' 9 | assert custombase64.b64decode(b'LSoXMS8BO29dMSj=', custom_alphabet) == b'hello world' 10 | 11 | 12 | def test_base32(): 13 | custom_alphabet = b'FGHIJQ345RSTUVWXYKLMABCDENOPZ267=' 14 | assert custombase64.b32encode(b'hello world', custom_alphabet) == b'VGLCEPIXJGPC6ZMUUY======' 15 | assert custombase64.b32decode(b'VGLCEPIXJGPC6ZMUUY======', custom_alphabet) == b'hello world' 16 | 17 | 18 | def test_base16(): 19 | custom_alphabet = b'78BDE0123F459A6C' 20 | assert custombase64.b16encode(b'hello world', custom_alphabet) == b'131019191CB7221C2B191E' 21 | assert custombase64.b16decode(b'131019191CB7221C2B191E', custom_alphabet) == b'hello world' 22 | -------------------------------------------------------------------------------- /mwcp/tests/test_legacy_reporter/report.txt: -------------------------------------------------------------------------------- 1 | ---- Credential ---- 2 | Username Password 3 | ---------- ---------- 4 | admin pass 5 | 6 | ---- Network ---- 7 | Tags Address Port Network Protocol Username Password 8 | ------ ----------- ------ ------------------ ---------- ---------- 9 | proxy 192.168.1.1 80 tcp admin pass 10 | 11 | ---- Socket ---- 12 | Tags Address Port Network Protocol 13 | ------ ----------- ------ ------------------ 14 | proxy 192.168.1.1 80 tcp 15 | 16 | ---- Miscellaneous ---- 17 | Key Value 18 | ----- -------------- 19 | foo bar 20 | biz b'baz\x00\x01' 21 | 22 | ---- Residual Files ---- 23 | Filename Description Derivation MD5 Arch Compile Time 24 | ---------- ------------------- ------------ -------------------------------- ------ -------------- 25 | file_1.exe example output file 8d777f385d3dfec8815d20f7496026dc 26 | 27 | -------------------------------------------------------------------------------- /mwcp/tests/test_string_report.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests components of string report extension. 3 | """ 4 | 5 | from mwcp import metadata 6 | 7 | 8 | def test_strings(report): 9 | with report: 10 | report.add(metadata.DecodedString("hello")) 11 | report.add(metadata.DecodedString("world", encryption_key=metadata.EncryptionKey(b"\xde\xad\xbe\xef"))) 12 | assert report.strings() == ["hello", "world"] 13 | 14 | 15 | def test_string_report_generation(report, datadir): 16 | report._external_strings_report = True 17 | with report: 18 | report.add(metadata.DecodedString("hello")) 19 | report.add(metadata.DecodedString("world", encryption_key=metadata.EncryptionKey(b"\xde\xad\xbe\xef"))) 20 | string_reports = report.get(metadata.File)[:2] 21 | assert string_reports[0].name.endswith(f"_strings.json") 22 | assert string_reports[1].name.endswith(f"_strings.txt") 23 | assert string_reports[0].data.decode("utf8") == (datadir / "strings.json").read_text() 24 | assert string_reports[1].data.decode("utf8") == (datadir / "strings.txt").read_text() 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .DS_Store 3 | .project 4 | .vscode/ 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *,cover 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | 60 | # Sphinx documentation 61 | docs/_build/ 62 | 63 | # PyBuilder 64 | target/ 65 | 66 | # PyCharm 67 | /.idea 68 | 69 | /scratch 70 | TODO.txt -------------------------------------------------------------------------------- /mwcp/tools/server/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 11 | {{ g.title + " |" if g.title else "" }} {{ config.get('SERVICE_NAME', 'DC3-MWCP Service') }} 12 | 13 | 14 |
15 | 23 |

{{ g.title|default(config.get('SERVICE_NAME', 'DC3-MWCP Service')) }}

24 | 25 | {% block content %}{% endblock %} 26 | 27 |
28 | {% block footer %} 29 |

30 | {{ config.get('SERVICE_NAME', 'DC3-MWCP Service') }}. 31 |

32 | {% endblock %} 33 |
34 |
35 | 36 | 37 | -------------------------------------------------------------------------------- /mwcp/utils/stringutils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility used for string conversions. 3 | """ 4 | 5 | import string 6 | import sys 7 | import unicodedata 8 | 9 | 10 | def convert_to_unicode(input_value): 11 | if isinstance(input_value, str): 12 | return input_value 13 | elif isinstance(input_value, bytes): 14 | return str(input_value, encoding="latin1", errors="replace") 15 | else: 16 | return convert_to_unicode(str(input_value)) 17 | 18 | 19 | VALID_FILENAME_CHARS = "-_.() {}{}".format(string.ascii_letters, string.digits).encode("ascii") 20 | 21 | 22 | def sanitize_filename(filename: str) -> str: 23 | """ 24 | Convert given filename to sanitized version that is safe to be used to write to the file system. 25 | """ 26 | filename = convert_to_unicode(filename) 27 | filename = unicodedata.normalize("NFKD", filename) # convert accented characters 28 | filename = convert_to_unicode(bytes(c for c in filename.encode("ascii", "ignore") if c in VALID_FILENAME_CHARS)) 29 | 30 | # If in Windows, remove any `.lnk` extension to prevent issues with the file explorer. 31 | if sys.platform == "win32" and filename.lower().endswith(".lnk"): 32 | filename = filename[:-len(".lnk")] + "_lnk" 33 | 34 | return filename 35 | -------------------------------------------------------------------------------- /mwcp/config/log_config.yml: -------------------------------------------------------------------------------- 1 | version: 1 2 | disable_existing_loggers: False # fixes issue with module level loggers 3 | 4 | filters: 5 | # This filter is necessary to use the "%(level_char)s" format variable. 6 | level_char: 7 | (): mwcp.utils.logutil.LevelCharFilter 8 | 9 | formatters: 10 | simple: 11 | format: "[%(level_char)s] (%(processName)s:%(name)s): %(message)s" 12 | error: 13 | format: " [%(name)s:%(funcName)s():%(lineno)d]: %(message)s" 14 | 15 | handlers: 16 | console: 17 | class: logging.StreamHandler 18 | formatter: simple 19 | filters: [level_char] 20 | stream: ext://sys.stderr 21 | 22 | error_file: 23 | # Custom handler necessary to fix issues that can occur in Windows. 24 | (): mwcp.utils.logutil.MPRotatingFileHandler 25 | level: WARNING 26 | formatter: error 27 | filename: "errors.log" 28 | maxBytes: 10485760 # 10MB 29 | backupCount: 3 30 | encoding: utf8 31 | 32 | null_handler: 33 | class: logging.NullHandler 34 | 35 | mwcp_server: 36 | (): mwcp.utils.logutil.ListHandler 37 | level: INFO 38 | filters: [level_char] 39 | formatter: simple 40 | entries: 1000 41 | 42 | root: 43 | level: INFO 44 | handlers: [console, error_file, mwcp_server] 45 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | This project constitutes a work of the United States Government and is not subject to domestic copyright protection under 17 USC § 105. 2 | 3 | However, because the project utilizes code licensed from contributors and other third parties, it therefore is licensed under the MIT License. http://opensource.org/licenses/mit-license.php. Under that license, permission is granted free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the conditions that any appropriate copyright notices and this permission notice are included in all copies or substantial portions of the Software. 4 | 5 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 6 | -------------------------------------------------------------------------------- /mwcp/tests/test_string_report/strings.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "string_report", 3 | "tags": [], 4 | "file": { 5 | "type": "file", 6 | "tags": [], 7 | "name": "input_file.bin", 8 | "description": null, 9 | "md5": "1e50210a0202497fb79bc38b6ade6c34", 10 | "sha1": "baf34551fecb48acc3da868eb85e1b6dac9de356", 11 | "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee", 12 | "architecture": null, 13 | "compile_time": null, 14 | "file_path": "C:/input_file.bin", 15 | "data": null, 16 | "derivation": null 17 | }, 18 | "strings": [ 19 | { 20 | "type": "decoded_string", 21 | "tags": [], 22 | "value": "hello", 23 | "encryption_key": null 24 | }, 25 | { 26 | "type": "decoded_string", 27 | "tags": [], 28 | "value": "world", 29 | "encryption_key": { 30 | "type": "encryption_key", 31 | "tags": [], 32 | "key": "3q2+7w==", 33 | "algorithm": null, 34 | "mode": null, 35 | "iv": null, 36 | "secret": null, 37 | "key_derivation": null 38 | } 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /mwcp/parser_config.yml: -------------------------------------------------------------------------------- 1 | Archive: 2 | description: Archive file types 3 | author: DC3 4 | parsers: 5 | - .Zip 6 | - .Gzip 7 | 8 | Decoy: 9 | description: Generic file types described as a decoy file 10 | author: DC3 11 | parsers: 12 | - .DOC 13 | - .PDF 14 | - .RTF 15 | - .JPG 16 | - .DOCX 17 | - .XLSX 18 | - .PPTX 19 | 20 | foo: 21 | description: example parser that works on any file 22 | author: DC3 23 | parsers: 24 | - .Foo 25 | 26 | GenericDropper: 27 | description: Generic Dropper 28 | author: DC3 29 | parsers: 30 | - .Overlay 31 | - .RSRC 32 | 33 | ISO: .ImageFile 34 | 35 | PDF: .Document 36 | 37 | PowerShell: .Script 38 | 39 | Python: 40 | description: Python artifacts 41 | author: DC3 42 | parsers: 43 | - .PyInstaller 44 | 45 | Quarantined: 46 | description: Anti-Virus Quarantined File 47 | author: DC3 48 | parsers: 49 | - .McAfee 50 | - .Defender 51 | - .SymantecQB 52 | - .SymantecSubSDK 53 | - .AhnLab 54 | - .Avast_AVG 55 | 56 | RSA: 57 | description: RSA artifacts 58 | author: DC3 59 | parsers: 60 | - .DigitalCertificate 61 | - .PrivateKey 62 | 63 | VisualBasic: 64 | description: VisualBasic Script 65 | author: DC3 66 | parsers: 67 | - .EncodedASP # must come first 68 | - .VBE 69 | - .VBScript 70 | -------------------------------------------------------------------------------- /mwcp/utils/construct/network.py: -------------------------------------------------------------------------------- 1 | """ 2 | Network constructs 3 | """ 4 | 5 | from .core import * 6 | 7 | 8 | class _MACAddressAdapter(Adapter): 9 | r""" 10 | Adapter used to format a MAC address from a list of 6 bytes 11 | 12 | e.g. 13 | >>> _MACAddressAdapter(Byte[6]).parse(b'\x00\x0c\x29\xd3\x91\xbc') 14 | '00-0c-29-d3-91-bc' 15 | """ 16 | def _encode(self, obj, context, path): 17 | return list(map(chr, obj.split("-"))) 18 | 19 | def _decode(self, obj, context, path): 20 | return '{:02x}-{:02x}-{:02x}-{:02x}-{:02x}-{:02x}'.format(*obj) 21 | 22 | 23 | # A MacAddress parsed from single bytes. 24 | MacAddress = _MACAddressAdapter(Byte[6]) 25 | 26 | 27 | class _IP4AddressAdapter(Adapter): 28 | r""" 29 | Adapter used to format a IP address from a list of four ints. 30 | 31 | e.g. 32 | >>> _IP4AddressAdapter(Byte[4]).parse(b'\x01\x02\x03\x04') 33 | '1.2.3.4' 34 | >>> _IP4AddressAdapter(Int16ul[4]).parse(b'\x01\x00\x02\x00\x03\x00\x04\x00') 35 | '1.2.3.4' 36 | """ 37 | 38 | def _encode(self, obj, context, path): 39 | return list(map(int, obj.split('.'))) 40 | 41 | def _decode(self, obj, context, path): 42 | return '{0}.{1}.{2}.{3}'.format(*obj) 43 | 44 | 45 | # An IP4Address parsed from single bytes. 46 | IP4Address = _IP4AddressAdapter(Byte[4]) 47 | -------------------------------------------------------------------------------- /mwcp/tests/test_runner/SiblingDispatch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parsers for test_yara_runner_sibling_dispatch 3 | """ 4 | 5 | from mwcp import Parser, FileObject 6 | 7 | 8 | class Parent(Parser): 9 | DESCRIPTION = "Parent" 10 | 11 | @classmethod 12 | def identify(cls, file_object): 13 | return b"parent" in file_object.data 14 | 15 | def run(self): 16 | self.dispatcher.add(FileObject(b"sibling 1")) 17 | self.dispatcher.add(FileObject(b"sibling 2")) 18 | 19 | 20 | class Sibling1(Parser): 21 | DESCRIPTION = "Sibling 1" 22 | 23 | @classmethod 24 | def identify(cls, file_object): 25 | return b"sibling 1" in file_object.data 26 | 27 | 28 | class Sibling2(Parser): 29 | DESCRIPTION = "Sibling 2" 30 | 31 | @classmethod 32 | def identify(cls, file_object): 33 | return b"sibling 2" in file_object.data 34 | 35 | def run(self): 36 | # Testing corner case where we dispatch a file that is a parent of an already processed sibling. 37 | sibling = self.file_object.siblings[0] 38 | assert sibling.description == "Sibling 1" # sanity check 39 | self.dispatcher.add(FileObject(b"grandchild"), parent=sibling) 40 | 41 | 42 | class Grandchild(Parser): 43 | DESCRIPTION = "Grandchild" 44 | 45 | @classmethod 46 | def identify(cls, file_object): 47 | return b"grandchild" in file_object.data 48 | -------------------------------------------------------------------------------- /mwcp/config/config.yml: -------------------------------------------------------------------------------- 1 | 2 | # Path to logging configuration file. 3 | LOG_CONFIG_PATH: ./log_config.yml 4 | 5 | # Overwrites the directory containing JSON test case files for all parsers. 6 | # If not set, the "tests" directory located within the root directory of the respective parser source 7 | # will be used. 8 | #TESTCASE_DIR: ~/mwcp_tests 9 | 10 | # Directory containing malware samples used for testing. 11 | #MALWARE_REPO: ~/malware_repo 12 | 13 | # Optional extra parser directory to use along with registered parser extension. 14 | # This allows you so use a directory of parsers that aren't officially part of any python package. 15 | #PARSER_DIR: ~/mwcp_parsers 16 | 17 | # Path to the parser_config.yml file to use for the provided extra parser directory above. 18 | # If this is not set, the "config" attribute in the __init__.py of the parser's directory is used instead. 19 | # This must be set if the parser directory doesn't have an __init__.py file! 20 | #PARSER_CONFIG_PATH: ~/mwcp_parsers/parser_config.yml 21 | 22 | # Name (or path) of a default parser source to use if not explicitly defined. 23 | # If this is not set, all sources will be considered. 24 | # (This is useful for enforcing parsers of only a specific source to be used) 25 | #PARSER_SOURCE: acme 26 | 27 | # Directory containing yara signatures. 28 | #YARA_REPO: ~/yara_repo 29 | 30 | # Keep temporary directory created by FileObject.temp_path() 31 | # KEEP_TMP: false 32 | -------------------------------------------------------------------------------- /mwcp/tools/server/templates/upload.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 |
5 |
6 | 7 | 8 | 9 | 14 | 15 |
16 | 17 | 18 |
19 | 20 | 21 |
22 | 23 | 24 |
25 | 26 | 27 |
28 | 29 | 30 |
31 |
32 | {% endblock %} -------------------------------------------------------------------------------- /mwcp/tests/test_cli/parse.txt: -------------------------------------------------------------------------------- 1 | ----- File: test.txt ----- 2 | Field Value 3 | ------------ ---------------------------------------------------------------- 4 | Parser foo 5 | File Path test.txt 6 | Description Foo 7 | Architecture 8 | MD5 fb843efb2ffec987db12e72ca75c9ea2 9 | SHA1 5e90c4c2be31a7a0be133b3dbb4846b0434bc2ab 10 | SHA256 fe5af8c641835c24f3bbc237a659814b96ed64d2898fae4cb3d2c0ac5161f5e9 11 | Compile Time 12 | 13 | ---- Network ---- 14 | Url Protocol Address 15 | ---------------- ---------- --------- 16 | http://127.0.0.1 http 127.0.0.1 17 | 18 | ---- Socket ---- 19 | Address 20 | --------- 21 | 127.0.0.1 22 | 23 | ---- URL ---- 24 | Url Protocol 25 | ---------------- ---------- 26 | http://127.0.0.1 http 27 | 28 | ---- Residual Files ---- 29 | Filename Description Derivation MD5 Arch Compile Time 30 | ----------------- ------------------- -------------------------- -------------------------------- ------ -------------- 31 | fooconfigtest.txt example output file extracted and decompressed 5eb63bbbe01eeed093cb22bb8f5acdc3 32 | 33 | ---- Logs ---- 34 | [+] File test.txt identified as Foo. 35 | [+] size of inputfile is 23 bytes 36 | [+] test.txt dispatched residual file: fooconfigtest.txt 37 | [+] File fooconfigtest.txt described as example output file 38 | [+] operating on inputfile test.txt 39 | 40 | ----- File Tree ----- 41 | 42 | └── 43 | 44 | 45 | -------------------------------------------------------------------------------- /mwcp/tests/test_report_writer/report_foreign.md: -------------------------------------------------------------------------------- 1 | # File: input_file.bin 2 | | Field | Value | 3 | |:-------------|:-----------------------------------------------------------------| 4 | | Parser | FooParser | 5 | | File Path | C:/input_file.bin | 6 | | Description | SuperMalware Implant | 7 | | Architecture | | 8 | | MD5 | 1e50210a0202497fb79bc38b6ade6c34 | 9 | | SHA1 | baf34551fecb48acc3da868eb85e1b6dac9de356 | 10 | | SHA256 | 1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee | 11 | | Compile Time | | 12 | 13 | ## Miscellaneous 14 | | Key | Value | 15 | |:-------|:------------------------------------| 16 | | JAPAN | ユーザー別サイト | 17 | | CHINA | 简体中文 | 18 | | KOREA | 크로스 플랫폼으로 | 19 | | ISRAEL | מדורים מבוקשים | 20 | | EGYPT | أفضل البحوث | 21 | | RUSSIA | Десятую Международную | 22 | | MATH | ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i) | 23 | | FRANCE | français langue étrangère | 24 | | SPAIN | mañana olé | 25 | 26 | # File Tree 27 | ``` 28 | 29 | ``` 30 | 31 | -------------------------------------------------------------------------------- /mwcp/parsers/GenericDropper.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains parsers for common Dropper types containing embedded file(s) in plaintext 3 | """ 4 | 5 | from mwcp import FileObject, Parser 6 | from mwcp.utils import pefileutils 7 | 8 | 9 | class Overlay(Parser): 10 | DESCRIPTION = "Dropper (Overlay)" 11 | 12 | @classmethod 13 | def identify(cls, file_object): 14 | """ 15 | Validate input file is a PE and there is a pefile.PE object starting at the overlay. 16 | """ 17 | if not file_object.pe: 18 | return False 19 | overlay = file_object.pe.get_overlay() 20 | return overlay and pefileutils.obtain_pe(overlay) 21 | 22 | def run(self): 23 | """ 24 | Extract PE file from overlay and add to dispatcher 25 | """ 26 | overlay = self.file_object.pe.get_overlay() 27 | self.dispatcher.add(FileObject(overlay)) 28 | 29 | 30 | class RSRC(Parser): 31 | DESCRIPTION = "Dropper (RSRC)" 32 | 33 | @classmethod 34 | def identify(cls, file_object): 35 | """ 36 | Validate a PE file is in the resources in plaintext 37 | """ 38 | return ( 39 | file_object.pe 40 | and any(pefileutils.obtain_pe(rsrc.data) for rsrc in file_object.resources) 41 | ) 42 | 43 | def run(self): 44 | """ 45 | Extract embedded PE files from resources 46 | 47 | :return: 48 | """ 49 | for rsrc in self.file_object.resources: 50 | file = FileObject(rsrc.data, def_stub=rsrc.fname_stub) 51 | if file.pe: 52 | self.logger.info(f"PE file identified in resource {rsrc.rsrc_entry}") 53 | self.dispatcher.add(file) 54 | -------------------------------------------------------------------------------- /mwcp/utils/multi_proc.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper methods for setting up multiprocessing workers with logging capabilities 3 | """ 4 | 5 | import logging 6 | import multiprocessing as mp 7 | import multiprocessing.pool 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | from mwcp import registry 12 | from mwcp.utils import logutil 13 | 14 | 15 | def initializer(parser_sources, default_source): 16 | """Initializer function that runs at the beginning of each process creation.""" 17 | registry._sources = parser_sources # Propagate registered parser information. 18 | registry._default_source = default_source 19 | 20 | 21 | class TProcess(mp.Process): 22 | """ 23 | Slighted modified subclass of :class:`multiprocessing.Process`. 24 | 25 | Use this in place of ``Process`` to enable logging in the spawned process. 26 | """ 27 | 28 | def __init__(self, group=None, target=None, name=None, args=(), kwargs=None): 29 | kwargs = kwargs or {} 30 | # NOTE: Forcing group to be None since BaseProcess asserts it to be None. 31 | super(TProcess, self).__init__(group=None, target=target, name=name, args=args, kwargs=kwargs) 32 | self.queue = logutil.mp_queue 33 | 34 | def run(self): 35 | logutil.setup_logging(queue=self.queue) 36 | logger.debug("Setup logger in {}".format(mp.current_process().name)) 37 | super(TProcess, self).run() 38 | 39 | 40 | class TPool(mp.pool.Pool): 41 | """ 42 | Version of :class:`multiprocessing.pool.Pool` that uses :class:`TProcess`. 43 | """ 44 | 45 | Process = TProcess 46 | 47 | def __init__(self, processes=None, maxtasksperchild=None): 48 | """Overwrite to add initializer.""" 49 | super(TPool, self).__init__( 50 | processes=processes, 51 | maxtasksperchild=maxtasksperchild, 52 | initializer=initializer, 53 | initargs=(registry._sources, registry._default_source), 54 | ) 55 | -------------------------------------------------------------------------------- /mwcp/tests/test_report_writer/report_foreign.html: -------------------------------------------------------------------------------- 1 |

File: input_file.bin

2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 |
Field Value
Parser FooParser
File Path C:/input_file.bin
Description SuperMalware Implant
Architecture
MD5 1e50210a0202497fb79bc38b6ade6c34
SHA1 baf34551fecb48acc3da868eb85e1b6dac9de356
SHA256 1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee
Compile Time
17 | 18 |

Miscellaneous

19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
Key Value
JAPAN ユーザー別サイト
CHINA 简体中文
KOREA 크로스 플랫폼으로
ISRAELמדורים מבוקשים
EGYPT أفضل البحوث
RUSSIAДесятую Международную
MATH ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i)
FRANCEfrançais langue étrangère
SPAIN mañana olé
35 | 36 |

File Tree

37 |
38 | <input_file.bin (1e50210a0202497fb79bc38b6ade6c34) : SuperMalware Implant>
39 | 
40 | 41 | -------------------------------------------------------------------------------- /mwcp/tests/test_testing.py: -------------------------------------------------------------------------------- 1 | 2 | import mwcp 3 | from mwcp import testing 4 | 5 | 6 | def test_get_malware_repo_path(tmp_path): 7 | """Tests generating malware repo path.""" 8 | malware_repo = tmp_path / "malware_repo" 9 | malware_repo.mkdir() 10 | mwcp.config["MALWARE_REPO"] = str(malware_repo) 11 | 12 | test_file = tmp_path / "test.txt" 13 | test_file.write_bytes(b"This is some test data!") 14 | testing.add_to_malware_repo(test_file) 15 | 16 | expected_path = malware_repo / "fb84" / "fb843efb2ffec987db12e72ca75c9ea2" 17 | 18 | # Test with hashing a file. 19 | sample_path = testing.get_path_in_malware_repo(test_file) 20 | assert sample_path == expected_path 21 | 22 | # Test with md5 23 | sample_path = testing.get_path_in_malware_repo(md5="fb843efb2ffec987db12e72ca75c9ea2") 24 | assert sample_path == expected_path 25 | 26 | # Test with partial md5 27 | sample_path = testing.get_path_in_malware_repo(md5="fb843e") 28 | assert sample_path == expected_path 29 | 30 | 31 | def test_add_to_malware_repo(tmp_path): 32 | """Tests adding a file to the malware repo.""" 33 | malware_repo = tmp_path / "malware_repo" 34 | malware_repo.mkdir() 35 | test_file = tmp_path / "test.txt" 36 | test_file.write_bytes(b"This is some test data!") 37 | 38 | mwcp.config["MALWARE_REPO"] = str(malware_repo) 39 | sample_path = testing.add_to_malware_repo(test_file) 40 | expected_sample_path = malware_repo / "fb84" / "fb843efb2ffec987db12e72ca75c9ea2" 41 | assert sample_path == expected_sample_path 42 | assert expected_sample_path.exists() 43 | assert expected_sample_path.read_bytes() == test_file.read_bytes() 44 | 45 | 46 | def test_iter_md5s(): 47 | """Tests obtaining md5s for a parser based on test cases""" 48 | mwcp.register_entry_points() 49 | mwcp.config["TESTCASE_DIR"] = None # need to clear any previously set testcase_dir from a previous unit test. 50 | assert list(testing.iter_md5s("foo")) == ["f144899b86766688991c5d0d10902f4a"] 51 | assert list(testing.iter_md5s("bogus")) == [] 52 | -------------------------------------------------------------------------------- /mwcp/parsers/PDF.py: -------------------------------------------------------------------------------- 1 | """ 2 | PDF 3 | """ 4 | import re 5 | 6 | from mwcp import Parser, metadata 7 | 8 | 9 | class Document(Parser): 10 | """ 11 | Parses PDF file with some basic metadata extraction. 12 | """ 13 | DESCRIPTION = "PDF Document" 14 | AUTHOR = "DC3" 15 | 16 | IGNORE_DOMAINS = [ 17 | b"www.w3.org", 18 | b"ns.adobe.com", 19 | b"purl.org", 20 | ] 21 | 22 | # 2-6 character protocol -> :// -> Up to 253 alphanumeric, "-", "_", or "." characters, (which should include all 23 | # valid domains or IP addresses) -> Nothing, or a port or "/" -> (For the port or "/") any non-whitespace characters. 24 | URL_RE = re.compile( 25 | b"[a-zA-Z]{2,6}" # scheme 26 | b"://" 27 | b"([\w._\-]+(:[\w._\-]+)?@)?" # user info 28 | b"[\w._\-]{4,253}" # host 29 | b"(:[\d]{1,5})?" # port 30 | b"(/[\w._\-~=%]*)*" # path 31 | b"(\?[\w._\-~=&,%]+)?" # query 32 | b"(#[\w._\-~]+)?" # fragment 33 | ) 34 | EMAIL_RE = re.compile(b"[\w.+-]+@([A-Za-z0-9](|[\w-]{0,61}[A-Za-z0-9])\.)+[A-Za-z]{2,6}") 35 | 36 | @classmethod 37 | def identify(cls, file_object): 38 | return file_object.data.startswith(b"%PDF") and ( 39 | cls.URL_RE.search(file_object.data) 40 | or cls.EMAIL_RE.search(file_object.data) 41 | ) 42 | 43 | def extract_urls(self): 44 | """ 45 | Statically extract URLs embedded in the PDF. 46 | """ 47 | for match in self.URL_RE.finditer(self.file_object.data): 48 | url = match.group() 49 | if not any(domain in url for domain in self.IGNORE_DOMAINS): 50 | self.report.add(metadata.URL(url)) 51 | 52 | def extract_emails(self): 53 | """ 54 | Statically extract URLs embedded in the PDF. 55 | """ 56 | for match in self.EMAIL_RE.finditer(self.file_object.data): 57 | self.report.add(metadata.EmailAddress(match.group())) 58 | 59 | def run(self): 60 | self.extract_urls() 61 | self.extract_emails() 62 | -------------------------------------------------------------------------------- /mwcp/parsers/PowerShell.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | from typing import List 4 | 5 | from mwcp import Parser, metadata 6 | 7 | 8 | class Script(Parser): 9 | """ 10 | Generic parser for pulling suspect URLs from a Powershell script 11 | """ 12 | DESCRIPTION = "PowerShell Script" 13 | AUTHOR = "DC3" 14 | 15 | INVALID_DOMAINS = [ 16 | "ipify.org", 17 | "whatismyipaddress.com" 18 | ] 19 | 20 | URL_REGEX = re.compile( 21 | ( 22 | # HTTP/HTTPS. 23 | b"(https?://)" 24 | b"(([" 25 | # IP address. 26 | b"(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\." 27 | b"(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\." 28 | b"(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\." 29 | b"(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])]|" 30 | # Or domain name. 31 | b"[a-zA-Z0-9.-]+)" 32 | # Optional port. 33 | b"(:\\d+)?" 34 | # URI. 35 | b"(/[()a-zA-Z0-9_:%=/.-]*)?" 36 | ) 37 | ) 38 | 39 | @classmethod 40 | def identify(cls, file_object): 41 | return file_object.name.endswith(".ps1") 42 | 43 | def extract_urls(self, data: bytes) -> List[str]: 44 | """ 45 | Extract URLs using regular expression. 46 | 47 | :param data: Data to search for URLs in 48 | :return: List of extracted URLs (with duplicates removed) 49 | :rtype: list[str] 50 | """ 51 | urls = set() 52 | for match in self.URL_REGEX.finditer(data): 53 | url = match.group().decode() 54 | if not any(invalid in url for invalid in self.INVALID_DOMAINS): 55 | urls.add(url) 56 | return list(urls) 57 | 58 | def run(self): 59 | """ 60 | Presently only search for extract-able URLs. 61 | """ 62 | # General report of URLS. 63 | urls = self.extract_urls(self.file_object.data) 64 | for url in urls: 65 | self.report.add(metadata.URL(url)) 66 | -------------------------------------------------------------------------------- /mwcp/tests/test_stix.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests STIX Reports. 3 | """ 4 | import logging 5 | import json 6 | 7 | import pytest 8 | 9 | import mwcp 10 | 11 | 12 | class CheatUUID: 13 | """ 14 | Used to provide a mock that overrides the uuid.uuid4 function with something that is deterministic 15 | """ 16 | def __init__(self): 17 | self.counter = 0 18 | 19 | def uuid4(self): 20 | self.counter += 1 21 | return "00000000-0000-4006-9000-{:012d}".format(self.counter) 22 | 23 | 24 | @pytest.fixture 25 | def filled_report(report, metadata_items): 26 | """ 27 | Provides a report filled with metadata examples seen above. 28 | """ 29 | logger = logging.getLogger("test_report") 30 | 31 | with report: 32 | report.input_file.description = "SuperMalware Implant" 33 | 34 | for item in metadata_items: 35 | report.add(item) 36 | 37 | logger.info("Test info log") 38 | logger.error("Test error log") 39 | logger.debug("Test debug log") 40 | 41 | report.add_tag("test", "tagging") 42 | 43 | return report 44 | 45 | 46 | def test_report_stix(datadir, filled_report, mocker): 47 | # Instead of creating UUIDv4s we will auto increment them to allow easier compares 48 | uuid_generator = CheatUUID() 49 | mocker.patch( 50 | 'uuid.uuid4', 51 | uuid_generator.uuid4 52 | ) 53 | 54 | # Writer must be initialized with a fixed time so we can easily compare results 55 | # TODO: Look into using freezegun library. 56 | actual = filled_report.as_stix(fixed_timestamp="2022-01-01T07:32:00.000Z") 57 | print(actual) 58 | actual = json.loads(actual) 59 | with open(datadir / "report.json", "rt") as input_file: 60 | expected = json.load(input_file) 61 | 62 | # sometimes the ordering of sco_refs will change so this cleans them up 63 | for obj in expected["objects"]: 64 | # always keep the current version of MWCP for the expected result 65 | if obj["type"] == "malware-analysis": 66 | obj["version"] = mwcp.__version__ 67 | 68 | assert actual == expected 69 | -------------------------------------------------------------------------------- /mwcp/tests/test_cli/csv_cli.csv: -------------------------------------------------------------------------------- 1 | MD5,MetaIndex,Category,Field,Value 2 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,parser,foo 3 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,filename,test.txt 4 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,description,Foo 5 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,architecture, 6 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,compile_time, 7 | fb843efb2ffec987db12e72ca75c9ea2,0,Input File,derivation, 8 | fb843efb2ffec987db12e72ca75c9ea2,1,URL,url,http://127.0.0.1 9 | fb843efb2ffec987db12e72ca75c9ea2,1,URL,path, 10 | fb843efb2ffec987db12e72ca75c9ea2,1,URL,query, 11 | fb843efb2ffec987db12e72ca75c9ea2,1,URL,protocol,http 12 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,url.url,http://127.0.0.1 13 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,path, 14 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,query, 15 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,protocol,http 16 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,address,127.0.0.1 17 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,port, 18 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,network_protocol, 19 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,listen, 20 | fb843efb2ffec987db12e72ca75c9ea2,2,Network,credential, 21 | fb843efb2ffec987db12e72ca75c9ea2,3,Socket,address,127.0.0.1 22 | fb843efb2ffec987db12e72ca75c9ea2,3,Socket,port, 23 | fb843efb2ffec987db12e72ca75c9ea2,3,Socket,network_protocol, 24 | fb843efb2ffec987db12e72ca75c9ea2,3,Socket,listen, 25 | fb843efb2ffec987db12e72ca75c9ea2,4,File,name,fooconfigtest.txt 26 | fb843efb2ffec987db12e72ca75c9ea2,4,File,description,example output file 27 | fb843efb2ffec987db12e72ca75c9ea2,4,File,md5,5eb63bbbe01eeed093cb22bb8f5acdc3 28 | fb843efb2ffec987db12e72ca75c9ea2,4,File,sha1,2aae6c35c94fcfb415dbe95f408b9ce91ee846ed 29 | fb843efb2ffec987db12e72ca75c9ea2,4,File,sha256,b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9 30 | fb843efb2ffec987db12e72ca75c9ea2,4,File,architecture, 31 | fb843efb2ffec987db12e72ca75c9ea2,4,File,compile_time, 32 | fb843efb2ffec987db12e72ca75c9ea2,4,File,file_path, 33 | fb843efb2ffec987db12e72ca75c9ea2,4,File,data, 34 | fb843efb2ffec987db12e72ca75c9ea2,4,File,derivation,extracted and decompressed 35 | 36 | -------------------------------------------------------------------------------- /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | name: Build Pipeline 2 | 3 | on: [ push ] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | # This workflow can be matrixed against multiple Python versions if desired. eg. [3.7, 3.8, 3.9, "3.10"] 11 | python-version: [ "3.11" ] 12 | 13 | steps: 14 | # Get the code from the repository to be linted, packaged, and pushed 15 | - name: Get Repo 16 | uses: actions/checkout@v3 17 | 18 | # Setup the Python environment 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | 24 | # Install the packages for linting and building the package 25 | - name: Prepare Build Environment 26 | run: | 27 | pip install -q flake8 twine wheel nox 28 | 29 | # Lint the Python code to check for syntax errors or code smell 30 | - name: Lint with Flake8 31 | run: | 32 | # stop the build if there are Python syntax errors or undefined names 33 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 34 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 35 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 36 | 37 | # Build the distributable package as well as the release patch 38 | - name: Build Objects 39 | if: startsWith(github.ref, 'refs/tags') 40 | run: nox -s build 41 | 42 | # Ensure the objects were packaged correctly and there wasn't an issue with 43 | # the compilation or packaging process. 44 | - name: Check Objects 45 | if: startsWith(github.ref, 'refs/tags') 46 | run: twine check dist/* 47 | 48 | # If this commit is the result of a Git tag, push the wheel and tar packages 49 | # to the PyPi registry 50 | - name: Publish to PyPI 51 | if: startsWith(github.ref, 'refs/tags') 52 | run: twine upload --repository-url https://upload.pypi.org/legacy/ -u __token__ -p ${{ secrets.PYPI_API_TOKEN }} --skip-existing --verbose dist/* -------------------------------------------------------------------------------- /mwcp/parsers/Decoy.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains parsers for common Decoy documents. 3 | """ 4 | 5 | from mwcp import Parser 6 | 7 | 8 | class Base(Parser): 9 | # Must be implemented in child class 10 | HEADER = None 11 | EXT = None 12 | 13 | @classmethod 14 | def identify(cls, file_object): 15 | """ 16 | Validate the file starts with the file header 17 | """ 18 | if not cls.HEADER: 19 | raise NotImplementedError("[*] HEADER was not set.") 20 | return file_object.data.startswith(cls.HEADER) 21 | 22 | def run(self): 23 | # Update file extension if unknown or generic .bin 24 | if self.EXT and self.file_object.ext in ("", ".bin"): 25 | self.file_object.ext = self.EXT 26 | 27 | 28 | class DOC(Base): 29 | DESCRIPTION = "Decoy Document (.doc)" 30 | 31 | HEADER = b"\xd0\xcf\x11\xe0" 32 | EXT = ".doc" 33 | 34 | 35 | class PDF(Base): 36 | DESCRIPTION = "Decoy Document (.pdf)" 37 | 38 | HEADER = b"%PDF-" 39 | EXT = ".pdf" 40 | 41 | 42 | class RTF(Base): 43 | DESCRIPTION = "Decoy Document (.rtf)" 44 | 45 | HEADER = b"{\\rt" 46 | EXT = ".rtf" 47 | 48 | 49 | class JPG(Base): 50 | DESCRIPTION = "Decoy (.jpg)" 51 | 52 | HEADER = b"\xff\xd8\xff\xe0" 53 | EXT = ".jpg" 54 | 55 | 56 | class XMLDocument(Base): 57 | DESCRIPTION = "Decoy XML Document" 58 | 59 | HEADER = b"PK\x03\x04" 60 | # Must be implemented by child class 61 | RELS_PATH = None 62 | 63 | @classmethod 64 | def identify(cls, file_object): 65 | if not super().identify(file_object): 66 | return False 67 | if cls.RELS_PATH: 68 | return cls.RELS_PATH in file_object.data 69 | else: 70 | return True 71 | 72 | 73 | class DOCX(XMLDocument): 74 | DESCRIPTION = "Decoy Document (.docx)" 75 | 76 | EXT = ".docx" 77 | RELS_PATH = b"word/_rels" 78 | 79 | 80 | class XLSX(XMLDocument): 81 | DESCRIPTION = "Decoy Document (.xlsx)" 82 | 83 | EXT = ".xlsx" 84 | RELS_PATH = b"xl/_rels" 85 | 86 | 87 | class PPTX(XMLDocument): 88 | DESCRIPTION = "Decoy Document (.pptx)" 89 | 90 | EXT = ".pptx" 91 | RELS_PATH = b"ppt/_rels" 92 | -------------------------------------------------------------------------------- /mwcp/utils/construct/datetime_.py: -------------------------------------------------------------------------------- 1 | """ 2 | Date/Time constructs 3 | """ 4 | 5 | from __future__ import absolute_import 6 | 7 | import datetime 8 | 9 | from .core import * 10 | 11 | 12 | # TODO: Implement _encode. 13 | class _DateTimeDateDataAdapter(Adapter): 14 | r""" 15 | Adapter for a C# DateTime.dateData object to DateTime format. Obtain the DateTime.Ticks and the DateTime.Kind 16 | property to format datetime. 17 | 18 | 19 | >>> _DateTimeDateDataAdapter(Int64sl).parse(b'\x80\xb4N3\xd1\xd4\xd1H') 20 | '2014-11-23 01:09:01 UTC' 21 | """ 22 | def _decode(self, obj, context, path): 23 | ticks = obj & 0x3fffffffffffffff 24 | kind = (obj >> 62) & 0x03 25 | converted_ticks = datetime.datetime(1, 1, 1) + datetime.timedelta(microseconds=ticks / 10) 26 | if kind == 0: 27 | return converted_ticks.strftime("%Y-%m-%d %H:%M:%S") 28 | elif kind == 1: 29 | return converted_ticks.strftime("%Y-%m-%d %H:%M:%S UTC") 30 | elif kind == 2: 31 | return converted_ticks.strftime("%Y-%m-%d %H:%M:%S Local") 32 | 33 | 34 | DateTimeDateData = _DateTimeDateDataAdapter(Int64sl) 35 | 36 | 37 | # TODO: Implement _encode 38 | class EpochTimeAdapter(Adapter): 39 | r""" 40 | Adapter to convert time_t, EpochTime, to an isoformat 41 | 42 | >>> EpochTimeAdapter(construct.Int32ul, tz=datetime.timezone.utc).parse(b'\xff\x93\x37\x57') 43 | '2016-05-14T21:09:19+00:00' 44 | >>> EpochTimeAdapter(construct.Int32ul).parse(b'\xff\x93\x37\x57') 45 | '2016-05-14T17:09:19' 46 | """ 47 | def __init__(self, subcon, tz=None): 48 | """ 49 | :param tz: Optional timezone object, default is localtime 50 | :param subcon: subcon to parse EpochTime. 51 | """ 52 | super().__init__(subcon) 53 | self._tz = tz 54 | 55 | def _decode(self, obj, context, path): 56 | try: 57 | return datetime.datetime.fromtimestamp(obj, tz=self._tz).isoformat() 58 | except OSError as e: 59 | raise construct.ConstructError(e) 60 | 61 | 62 | # Add common helpers 63 | EpochTime = EpochTimeAdapter(Int32ul) 64 | EpochTimeUTC = EpochTimeAdapter(construct.Int32ul, tz=datetime.timezone.utc) 65 | -------------------------------------------------------------------------------- /mwcp/parsers/foo.py: -------------------------------------------------------------------------------- 1 | """This is an example parser used to show the different methods of adding data to the reporter.""" 2 | import logging 3 | import os 4 | 5 | from mwcp import Parser, FileObject, metadata 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class Foo(Parser): 11 | DESCRIPTION = "Foo" 12 | 13 | @classmethod 14 | def identify(cls, file_object): 15 | # identifies if the parser can parse the given file. 16 | # checking filename to avoid infinite loop. 17 | return file_object.name != "fooconfigtest.txt" 18 | 19 | def run(self): 20 | # retrieve input file 21 | input_file = self.file_object 22 | 23 | # Pull external information from user or other parsers through knowledge_base 24 | secret = self.knowledge_base.get("secret", None) 25 | if secret: 26 | self.report.add(metadata.Other("secret_using_external_knowledge", secret + "!")) 27 | # Pass in our own information for other parsers. 28 | self.knowledge_base["encryption_key"] = b"\xde\xad\xbe\xef" 29 | 30 | # standardized metadata 31 | self.report.add(metadata.URL("http://127.0.0.1")) 32 | 33 | # demonstrate access to sample 34 | logger.info(f"size of inputfile is {len(input_file.data)} bytes") 35 | 36 | # other, non-standardized metadata 37 | # also demonstrate use of pefile object 38 | if input_file.pe: 39 | self.report.add(metadata.Other( 40 | "section0", input_file.pe.sections[0].Name.rstrip(b"\x00") 41 | )) 42 | 43 | # Dispatch residual files to also be processed. 44 | self.dispatcher.add(FileObject( 45 | b"hello world", 46 | file_name="fooconfigtest.txt", 47 | description="example output file", 48 | derivation="extracted and decompressed", 49 | )) 50 | # Alternatively we can manually report a residual file without being processed. 51 | if False: 52 | self.report.add(metadata.File( 53 | "fooconfigtest.txt", description="example output file", data=b"hello world" 54 | )) 55 | 56 | # demonstrate use of filename() 57 | logger.info(f"operating on inputfile {input_file.name}") 58 | -------------------------------------------------------------------------------- /mwcp/parsers/Archive.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parsers for archive type files. 3 | """ 4 | 5 | import io 6 | import ntpath 7 | import pathlib 8 | import tarfile 9 | import zipfile 10 | 11 | from mwcp import Parser, FileObject 12 | 13 | 14 | class Zip(Parser): 15 | DESCRIPTION = "Zip Archive File" 16 | 17 | ZIP_HEADER = b"PK" 18 | 19 | @classmethod 20 | def identify(cls, file_object): 21 | """ 22 | Identify as a Zip archive file. 23 | """ 24 | return file_object.data.startswith(cls.ZIP_HEADER) 25 | 26 | def parse_extracted(self, file_name, file_data): 27 | self.dispatcher.add(FileObject(file_data, file_name=file_name)) 28 | 29 | def run(self): 30 | """ 31 | Use the zipfile Python library to extract the contents of a Zip archive. 32 | """ 33 | self.logger.info("Attempting to extract files from Zip archive.") 34 | try: 35 | z = zipfile.ZipFile(io.BytesIO(self.file_object.data)) 36 | for obj in z.infolist(): 37 | file_data = z.read(obj) 38 | file_name = ntpath.basename(obj.filename) 39 | # see if there is data, before passing to the parse_extracted function 40 | if not len(file_data): 41 | continue 42 | self.parse_extracted(file_name, file_data) 43 | except IOError: 44 | self.logger.exception("Failed to extract Zip archive.") 45 | except zipfile.BadZipfile: 46 | self.logger.exception("Invalid zip file") 47 | 48 | 49 | class Gzip(Parser): 50 | DESCRIPTION = "Gzip Archive file" 51 | 52 | HEADER = b"\x1F\x8B" 53 | 54 | @classmethod 55 | def identify(cls, file_object): 56 | return file_object.data.startswith(cls.HEADER) 57 | 58 | def parse_extracted(self, file_name, file_data): 59 | self.dispatcher.add(FileObject(file_data, file_name=file_name)) 60 | 61 | def run(self): 62 | with self.file_object.open() as fo: 63 | with tarfile.open(fileobj=fo, mode="r:gz") as tar: 64 | for member in tar.getmembers(): 65 | if member.isfile(): 66 | data = tar.extractfile(member).read() 67 | name = pathlib.Path(member.name).name 68 | self.parse_extracted(name, data) 69 | -------------------------------------------------------------------------------- /mwcp/utils/construct/MIPS.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper constructs for parsing the MIPS instruction set. 3 | This module will be imported along with 'from mwcp.utils import construct' 4 | and accessible from the submodule "MIPS". (e.g. construct.MIPS.lw) 5 | 6 | reference: github.com/MIPT-ILab/mipt-mips/wiki/MIPS-Instruction-Set 7 | """ 8 | 9 | from .core import * 10 | from .core import this 11 | 12 | 13 | _REGISTERS = { 14 | '$zero': 0, 15 | '$at': 1, 16 | '$v0': 2, '$v1': 3, 17 | '$a0': 4, '$a1': 5, '$a2': 6, '$a3': 7, 18 | '$t0': 8, '$t1': 9, '$t2': 10, '$t3': 11, '$t4': 12, '$t5': 13, '$t6': 14, '$t7': 15, 19 | '$s0': 16, '$s1': 17, '$s2': 18, '$s3': 19, '$s4': 20, '$s5': 21, '$s6': 22, '$s7': 23, 20 | '$t8': 24, '$t9': 25, 21 | '$k0': 26, '$k1': 27, 22 | '$gp': 28, '$sp': 29, '$fp': 30, '$ra': 31, 23 | } 24 | _Register = Enum(BitsInteger(5), **_REGISTERS) 25 | 26 | # I-type instruction 27 | _I_inst = Struct( 28 | *BitStruct( 29 | 'opcode' / Enum( 30 | BitsInteger(6), 31 | # NOTE: Some opcode values are reserved for other instruction formats 32 | # and we should let construct fail if it sees one. 33 | j=0x02, jal=0x03, beq=0x04, bne=0x05, blez=0x06, bgtz=0x07, 34 | addi=0x08, addiu=0x09, slti=0x0A, sltiu=0x0B, andi=0x0C, ori=0x0D, xori=0x0E, lui=0x0F, 35 | beql=0x14, bnel=0x15, blezl=0x16, bgtzl=0x17, 36 | daddi=0x18, daddiu=0x19, ldl=0x1A, ldr=0x1B, jalx=0x1D, 37 | lb=0x20, lh=0x21, lwl=0x22, lw=0x23, lbu=0x24, lhu=0x25, lwr=0x26, lwu=0x27, 38 | sb=0x28, sh=0x29, swl=0x2A, sw=0x2B, sdl=0x2C, sdr=0x2D, swr=0x2E, cache=0x2F, 39 | ll=0x30, lwc1=0x31, lwc2=0x32, pref=0x33, lld=0x34, ldc1=0x35, ldc2=0x36, ld=0x37, 40 | sc=0x38, swc1=0x39, swc2=0x3A, scd=0x3C, sdc1=0x3D, sdc2=0x3E, sd=0x3F, 41 | ), 42 | 'src_register' / _Register, 43 | 'target_register' / _Register, 44 | # 'imm_constant' / construct.BitsInteger(16) 45 | ), 46 | # Need to move immediate outside of BitStruct to create signed number. 47 | # (Luckly, the constant is byte aligned) 48 | 'imm_constant' / Int16sb 49 | ) 50 | 51 | 52 | lw = ExprValidator(_I_inst, this.opcode == 'lw') 53 | 54 | # TODO: Create a MIPS version of ELFPointer that will account for the Global Offset Table and $gp register 55 | # from extracted "la" psuedo instructions. 56 | -------------------------------------------------------------------------------- /mwcp/stix/objects.py: -------------------------------------------------------------------------------- 1 | """ 2 | This provides helper objects that can be used to generate STIX content 3 | """ 4 | from __future__ import annotations 5 | 6 | 7 | class STIXResult: 8 | """ 9 | Provides a means to return STIX 2.1 content 10 | 11 | :var linked_stix: An array of STIX objects that should be linked to a parent malware analysis object 12 | :var unlinked_stix: An array of STIX objects that should not be linked to a parent malware analysis object. 13 | This can include relationship objects, objects connected by relationship objects, 14 | and objects with embedded references like Notes 15 | :var note_content: The content of the note which will be attached to the STIX file object being analyzed by the 16 | malware analysis 17 | :var note_labels: The labels of the note which will be attached to the STIX file object being analyzed by the 18 | malware analysis 19 | """ 20 | 21 | def __init__(self, note_content: str = "", fixed_timestamp: str = None): 22 | self.linked_stix = [] 23 | self.unlinked_stix = [] 24 | self.note_content = note_content 25 | self.note_labels = [] 26 | self.fixed_timestamp = fixed_timestamp 27 | 28 | def add_linked(self, stix_content): 29 | self.linked_stix.append(stix_content) 30 | 31 | def add_unlinked(self, stix_content): 32 | self.unlinked_stix.append(stix_content) 33 | 34 | def create_tag_note(self, metadata, stix_content): 35 | note = metadata.as_stix_tags(stix_content, self.fixed_timestamp) 36 | if note: 37 | self.unlinked_stix.append(note) 38 | 39 | def merge(self, other: STIXResult): 40 | self.linked_stix.extend(other.linked_stix) 41 | self.unlinked_stix.extend(other.unlinked_stix) 42 | 43 | if self.note_content == "": 44 | self.note_content = other.note_content 45 | elif other.note_content != "": 46 | self.note_content += "\n" + other.note_content 47 | 48 | def merge_ref(self, other: STIXResult): 49 | """ 50 | A merge for when the target is a reference for the current object. 51 | """ 52 | self.unlinked_stix.extend(other.linked_stix) 53 | self.unlinked_stix.extend(other.unlinked_stix) 54 | 55 | if self.note_content == "": 56 | self.note_content = other.note_content 57 | elif other.note_content != "": 58 | self.note_content += "\n" + other.note_content 59 | -------------------------------------------------------------------------------- /mwcp/tests/test_disassembly/Sample.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sample parser that uses dragodis disassembly library. 3 | (This is a conversion from the kordesii "Sample" parser.) 4 | """ 5 | 6 | import dragodis 7 | import rugosa 8 | 9 | from mwcp import metadata, Parser 10 | 11 | 12 | class Implant(Parser): 13 | DESCRIPTION = "Sample Implant" 14 | 15 | @classmethod 16 | def identify(cls, file_object): 17 | return file_object.md5 == "e1b6be6c0c2db8b3d4dca56062ca6301" 18 | 19 | @staticmethod 20 | def xor_decrypt(key, enc_data): 21 | return bytes((x ^ key) for x in enc_data) 22 | 23 | def find_strings(self, dis: dragodis.Disassembler): 24 | """ 25 | Extracts and reports DecodedString objects for the parameters following xor encryption function: 26 | 27 | void encrypt(char *s, char key) 28 | { 29 | while (*s) 30 | *s++ ^= key; 31 | } 32 | """ 33 | emulator = rugosa.Emulator(dis) 34 | pattern = rugosa.re.compile(br"\x8b\x45\x08\x0f\xbe\x08") 35 | for encrypt_func in pattern.find_functions(dis): 36 | self.logger.info("Found XOR encrypt function at: 0x%x", encrypt_func.start) 37 | for call_ea in encrypt_func.calls_to: 38 | self.logger.debug("Tracing 0x%08x", call_ea) 39 | # Extract arguments for call to xor function. 40 | context = emulator.context_at(call_ea) 41 | enc_str_ptr, key = context.get_function_arg_values() 42 | 43 | enc_string_data = rugosa.get_terminated_bytes(dis, enc_str_ptr) 44 | dec_string_data = self.xor_decrypt(key, enc_string_data) 45 | string = rugosa.DecodedString( 46 | dec_data=dec_string_data, 47 | enc_data=enc_string_data, 48 | # data is encrypted in-place, so include string pointer as decoded source. 49 | dec_source=enc_str_ptr, 50 | ) 51 | # Annotate underlying disassembler with decrypted data. 52 | string.patch(dis, rename=False) 53 | 54 | # Report decoded string. 55 | self.report.add(metadata.DecodedString( 56 | str(string), encryption_key=metadata.EncryptionKey(bytes([key]), "xor") 57 | )) 58 | 59 | def run(self): 60 | with self.file_object.disassembly(report=self.report) as dis: 61 | self.find_strings(dis) 62 | -------------------------------------------------------------------------------- /mwcp/tests/test_disassembly.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests components that use Dragodis disassembly. 3 | """ 4 | 5 | import os 6 | 7 | import pytest 8 | 9 | import mwcp 10 | from mwcp import metadata 11 | from mwcp.tests.test_parsers import _test_parser 12 | 13 | dragodis = pytest.importorskip("dragodis", reason="Dragodis not installed") 14 | 15 | 16 | @pytest.mark.parametrize("backend", ["ida", "ghidra"]) 17 | def test_disassembly(datadir, backend): 18 | """Tests basic disassembly""" 19 | strings_exe = datadir / "strings.exe" 20 | 21 | input_file = mwcp.FileObject.from_path(strings_exe) 22 | try: 23 | with input_file.disassembly(backend) as dis: 24 | insn = dis.get_instruction(0x401000) 25 | assert insn.mnemonic == "push" 26 | except dragodis.NotInstalledError as e: 27 | pytest.skip(e) 28 | 29 | 30 | @pytest.mark.parametrize("backend", ["ida", "ghidra"]) 31 | def test_file_object_disassembly(datadir, backend): 32 | """Tests disassembler project file gets reported when using FileObject.disassembly()""" 33 | strings_exe = datadir / "strings.exe" 34 | 35 | input_file = mwcp.FileObject.from_path(strings_exe) 36 | report = mwcp.Report(input_file, "FooParser") 37 | with report: 38 | try: 39 | with input_file.disassembly(backend, report=report) as dis: 40 | line = dis.get_line(0x401000) 41 | line.set_comment("test comment") 42 | except dragodis.NotInstalledError as e: 43 | pytest.skip(e) 44 | # After we leave disassembly context, we should see the project file in the report. 45 | files = report.get(metadata.File) 46 | assert len(files) == 1 47 | project_file = files[0] 48 | assert project_file.data 49 | if backend == "ida": 50 | assert project_file.name == "strings.exe.idb" 51 | else: 52 | assert project_file.name == "strings.exe_ghidra.zip" 53 | assert project_file.derivation == "supplemental" 54 | 55 | 56 | @pytest.mark.parametrize("backend", ["ida", "ghidra"]) 57 | def test_Sample(pytestconfig, datadir, backend): 58 | """Tests running the Sample parser.""" 59 | mwcp.register_parser_directory(str(datadir), source_name="test") 60 | os.environ["DRAGODIS_DISASSEMBLER"] = backend 61 | input_file_path = datadir / "strings.exe" 62 | results_path = datadir / "strings.json" 63 | 64 | try: 65 | _test_parser(pytestconfig, input_file_path, results_path) 66 | except dragodis.NotInstalledError as e: 67 | pytest.skip(e) 68 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | A framework for malware configuration parsers. 4 | """ 5 | 6 | from setuptools import setup, find_namespace_packages 7 | 8 | setup( 9 | name="mwcp", 10 | author="DC3", 11 | author_email="dc3.tsd@us.af.mil", 12 | keywords="malware", 13 | url="https://github.com/dod-cyber-crime-center/DC3-MWCP/", 14 | packages=find_namespace_packages(), 15 | include_package_data=True, 16 | license='MIT', 17 | classifiers=[ 18 | 'Development Status :: 5 - Production/Stable', 19 | 'Intended Audience :: Developers', 20 | 'License :: OSI Approved :: MIT License', 21 | 'Programming Language :: Python :: 3', 22 | 'Programming Language :: Python :: 3.9', 23 | 'Programming Language :: Python :: 3.10', 24 | 'Programming Language :: Python :: 3.11', 25 | 'Programming Language :: Python :: 3.12', 26 | ], 27 | python_requires=">=3.9", 28 | entry_points={ 29 | 'console_scripts': [ 30 | 'mwcp = mwcp.cli:main', 31 | 'poshdeob = mwcp.utils.poshdeob:main', 32 | 'mwcp_update_legacy_tests = mwcp.tools.update_legacy_tests:main', 33 | ], 34 | 'mwcp.parsers': [ 35 | 'dc3 = mwcp.parsers', 36 | ] 37 | }, 38 | install_requires=[ 39 | 'anytree', 40 | 'appdirs', 41 | 'attrs>=20.3.0', 42 | 'bitarray', 43 | 'cattrs', 44 | 'click>=8.0.1', 45 | 'construct >=2.9.45, <2.11', 46 | 'defusedxml', 47 | 'future', 48 | 'isodate', 49 | 'jinja2', # For construct.html_hex() 50 | 'jsonschema_extractor>=1.0', 51 | 'lief>=0.16.0;python_version>="3.9"', 52 | 'packaging', 53 | 'pandas', 54 | 'pefile>=2019.4.18', 55 | 'pyasn1', 56 | 'pyasn1_modules', 57 | 'pyelftools', 58 | 'pyparsing', 59 | 'pytest>=6.0.0', 60 | 'pytest-datadir', 61 | 'pytest-xdist', 62 | 'pytest-mock', 63 | 'pytest-cov', 64 | 'pyyaml', 65 | 'requests', 66 | 'ruamel.yaml', 67 | 'six', 68 | 'tabulate[widechars]<1.0.0', 69 | 'stix2', 70 | 'yara-python', 71 | # For the server and API 72 | 'flask', 73 | 'pygments', 74 | 75 | # Dependencies for builtin parsers. 76 | 'pycdlib', 77 | 'pycryptodome', 78 | 'olefile', 79 | ], 80 | extras_require={ 81 | 'dragodis': ['dragodis>=0.2.0'], 82 | 'kordesii': ['kordesii>=2.0.0'], 83 | 'testing': [ 84 | 'jsonschema', 85 | 'dragodis', 86 | 'rugosa', 87 | ], 88 | } 89 | ) 90 | -------------------------------------------------------------------------------- /mwcp/tests/test_disassembly/strings.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | char string01[] = "Idmmn!Vnsme "; 5 | char string02[] = "Vgqv\"qvpkle\"ukvj\"ig{\"2z20"; 6 | char string03[] = "Wkf#rvj`h#aqltm#el{#ivnsp#lufq#wkf#obyz#gld-"; 7 | char string04[] = "Keo$mw$wpvkjc$ej`$ehwk$cmraw$wle`a*"; 8 | char string05[] = "Dfla%gpwkv%mji`v%lk%rjji%fijqm+"; 9 | char string06[] = "Egru&ghb&biau&cgen&ngrc&rnc&irnct("; 10 | char string13[] = "\\cv}3g{v3pargv3qfg3w|}4g3qavrx3g{v3t\x7fr``="; 11 | char string17[] = "C\x7frer7c\x7fr7q{xxs7zve|7~d7cry7~yt\x7frd9"; 12 | char string1a[] = "+()./,-\"#*"; 13 | char string23[] = "`QFBWFsQL@FPPb"; 14 | char string27[] = "tSUdFS"; 15 | char string40[] = "\x01\x13\x10n\x0e\x05\x14"; 16 | char string46[] = "-\",5 , v,tr4v,trv4t,v\x7f,ttt"; 17 | char string73[] = "@AKJDGBA@KJGDBJKAGDC"; 18 | char string75[] = "!\x1d\x10U\x05\x14\x06\x01U\x02\x1c\x19\x19U\x19\x1a\x1a\x1eU\x17\x07\x1c\x12\x1d\x01\x10\x07U\x01\x1a\x18\x1a\x07\x07\x1a\x02["; 19 | char string77[] = "4\x16\x05\x04W\x16\x19\x13W\x15\x02\x04\x04\x12\x04W\x04\x03\x16\x1b\x1b\x12\x13W\x1e\x19W\x04\x16\x19\x13W\x13\x05\x1e\x11\x03\x04Y"; 20 | char string7a[] = ".\x12\x1fZ\x10\x1b\x19\x11\x1f\x0eZ\x12\x0f\x14\x1dZ\x15\x14Z\x0e\x12\x1fZ\x18\x1b\x19\x11Z\x15\x1cZ\x0e\x12\x1fZ\r\x13\x1e\x1fZ\x19\x12\x1b\x13\x08T"; 21 | char string7f[] = "LMFOGHKNLMGFOHKFGNLKHNMLOKGNKGHFGLHKGLMHKGOFNMLHKGFNLMJNMLIJFGNMLOJIMLNGFJHNM";; 22 | 23 | 24 | 25 | void encrypt(char *s, char key) 26 | { 27 | while (*s) 28 | *s++ ^= key; 29 | } 30 | 31 | void decrypt() 32 | { 33 | encrypt(&string01[0], 0x01); 34 | encrypt(&string02[0], 0x02); 35 | encrypt(&string03[0], 0x03); 36 | encrypt(&string04[0], 0x04); 37 | encrypt(&string05[0], 0x05); 38 | encrypt(&string06[0], 0x06); 39 | encrypt(&string13[0], 0x13); 40 | encrypt(&string17[0], 0x17); 41 | encrypt(&string1a[0], 0x1a); 42 | encrypt(&string23[0], 0x23); 43 | encrypt(&string27[0], 0x27); 44 | encrypt(&string40[0], 0x40); 45 | encrypt(&string46[0], 0x46); 46 | encrypt(&string73[0], 0x73); 47 | encrypt(&string75[0], 0x75); 48 | encrypt(&string77[0], 0x77); 49 | encrypt(&string7a[0], 0x7a); 50 | encrypt(&string7f[0], 0x7f); 51 | } 52 | 53 | int main() 54 | { 55 | decrypt(); 56 | printf("%s\n", string01); 57 | printf("%s\n", string02); 58 | printf("%s\n", string03); 59 | printf("%s\n", string04); 60 | printf("%s\n", string05); 61 | printf("%s\n", string06); 62 | printf("%s\n", string13); 63 | printf("%s\n", string17); 64 | printf("%s\n", string1a); 65 | printf("%s\n", string23); 66 | printf("%s\n", string27); 67 | printf("%s\n", string40); 68 | printf("%s\n", string46); 69 | printf("%s\n", string73); 70 | printf("%s\n", string75); 71 | printf("%s\n", string77); 72 | printf("%s\n", string7a); 73 | printf("%s\n", string7f); 74 | 75 | return 0; 76 | } 77 | -------------------------------------------------------------------------------- /mwcp/parsers/ISO.py: -------------------------------------------------------------------------------- 1 | """ 2 | ISO Image 3 | """ 4 | 5 | from io import BytesIO 6 | 7 | import pycdlib 8 | 9 | from mwcp import Parser, FileObject 10 | 11 | 12 | class ImageFile(Parser): 13 | DESCRIPTION = "ISO Image File" 14 | AUTHOR = "DC3" 15 | 16 | MAGIC = b"CD001" 17 | OFFSETS = [0x8001, 0x8801, 0x9001] 18 | 19 | @classmethod 20 | def identify(cls, file_object): 21 | for offset in cls.OFFSETS: 22 | if file_object.data[offset:offset+len(cls.MAGIC)] == cls.MAGIC: 23 | return True 24 | return False 25 | 26 | def walk_handler(self, iso, **kwargs): 27 | """ 28 | Uses the iso.walk function to walk the ISO image depending on the path 29 | provided in kwargs. 30 | 31 | :param iso: an open PyCdlib instance 32 | :param kwargs: Keyword arguments to be passed to the iso.walk function. 33 | Note that this must include either iso_path, udf_path, joliet_path, or 34 | rr_path. 35 | :return: 36 | """ 37 | for dirname, dirlist, filelist in iso.walk(**kwargs): 38 | for filename in filelist: 39 | if dirname == "/": 40 | path = dirname + filename 41 | else: 42 | path = dirname + "/" + filename 43 | 44 | filedata = BytesIO() 45 | 46 | if "iso_path" in kwargs: 47 | iso.get_file_from_iso_fp(filedata, iso_path=path) 48 | elif "udf_path" in kwargs: 49 | iso.get_file_from_iso_fp(filedata, udf_path=path) 50 | elif "joliet_path" in kwargs: 51 | iso.get_file_from_iso_fp(filedata, joliet_path=path) 52 | elif "rr_path" in kwargs: 53 | iso.get_file_from_iso_fp(filedata, rr_path=path) 54 | else: 55 | return 56 | 57 | self.dispatcher.add(FileObject(file_data=filedata.getvalue(), file_name=filename)) 58 | 59 | def run(self): 60 | """ 61 | Walk the ISO image to extract embedded files. 62 | 63 | :return: 64 | """ 65 | iso = pycdlib.PyCdlib() 66 | iso.open_fp(BytesIO(self.file_object.data)) 67 | 68 | try: 69 | self.walk_handler(iso, iso_path="/") 70 | 71 | if iso.has_udf(): 72 | self.logger.info("UDF extension identified") 73 | self.walk_handler(iso, udf_path="/") 74 | 75 | if iso.has_joliet(): 76 | self.logger.info("Joliet extension identified") 77 | self.walk_handler(iso, joliet_path="/") 78 | 79 | if iso.has_rock_ridge(): 80 | self.logger.info("Rock Ridge extension identified") 81 | self.walk_handler(iso, rr_path="/") 82 | finally: 83 | iso.close() 84 | -------------------------------------------------------------------------------- /mwcp/tests/test_cli/fb843efb2ffec987db12e72ca75c9ea2.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "report", 3 | "tags": [], 4 | "mwcp_version": "MWCP_VERSION", 5 | "input_file": { 6 | "type": "file", 7 | "tags": [], 8 | "name": "fb843efb2ffec987db12e72ca75c9ea2.bin", 9 | "description": "Foo", 10 | "md5": "fb843efb2ffec987db12e72ca75c9ea2", 11 | "sha1": "5e90c4c2be31a7a0be133b3dbb4846b0434bc2ab", 12 | "sha256": "fe5af8c641835c24f3bbc237a659814b96ed64d2898fae4cb3d2c0ac5161f5e9", 13 | "architecture": null, 14 | "compile_time": null, 15 | "file_path": null, 16 | "data": null, 17 | "derivation": null 18 | }, 19 | "parser": "dc3:foo", 20 | "recursive": false, 21 | "external_knowledge": {}, 22 | "errors": [], 23 | "logs": [ 24 | "[+] File fb843efb2ffec987db12e72ca75c9ea2.bin identified as Foo.", 25 | "[+] size of inputfile is 23 bytes", 26 | "[+] fb843efb2ffec987db12e72ca75c9ea2.bin dispatched residual file: fooconfigtest.txt", 27 | "[+] File fooconfigtest.txt described as example output file", 28 | "[+] operating on inputfile fb843efb2ffec987db12e72ca75c9ea2.bin" 29 | ], 30 | "metadata": [ 31 | { 32 | "type": "url", 33 | "tags": [], 34 | "url": "http://127.0.0.1", 35 | "path": null, 36 | "query": null, 37 | "protocol": "http" 38 | }, 39 | { 40 | "type": "network", 41 | "tags": [], 42 | "url": { 43 | "type": "url", 44 | "tags": [], 45 | "url": "http://127.0.0.1", 46 | "path": null, 47 | "query": null, 48 | "protocol": "http" 49 | }, 50 | "socket": { 51 | "type": "socket", 52 | "tags": [], 53 | "address": "127.0.0.1", 54 | "port": null, 55 | "network_protocol": null, 56 | "listen": null 57 | }, 58 | "credential": null 59 | }, 60 | { 61 | "type": "socket", 62 | "tags": [], 63 | "address": "127.0.0.1", 64 | "port": null, 65 | "network_protocol": null, 66 | "listen": null 67 | }, 68 | { 69 | "type": "file", 70 | "tags": [], 71 | "name": "fooconfigtest.txt", 72 | "description": "example output file", 73 | "md5": "5eb63bbbe01eeed093cb22bb8f5acdc3", 74 | "sha1": "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", 75 | "sha256": "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9", 76 | "architecture": null, 77 | "compile_time": null, 78 | "file_path": null, 79 | "data": null, 80 | "derivation": "extracted and decompressed" 81 | } 82 | ] 83 | } -------------------------------------------------------------------------------- /mwcp/tests/test_pecon.py: -------------------------------------------------------------------------------- 1 | """ 2 | These are pytest test cases for pecon. 3 | """ 4 | 5 | from mwcp.utils import pecon 6 | 7 | 8 | default_pe = ( 9 | b'MZ\x90\x00\x03\x00\x00\x00\x04\x00\x00\x00\xff\xff\x00\x00\xb8\x00\x00\x00' 10 | b'\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 11 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 12 | b'\x00\x00\xe0\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 13 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 14 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 15 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 16 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 17 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 18 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 19 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 20 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00PE\x00\x00L\x01\x00' 21 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe0\x00\x0f\x01\x0b\x01' 22 | b'\x01G\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 23 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x02\x00\x00\x01' 24 | b'\x00\x00\x00\x00\x00\x00\x00\x05\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00' 25 | b'\x00\x02\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x10\x00\x00\x10\x00' 26 | b'\x00\x00\x00\x10\x00\x00\x10\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00' 27 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 28 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 29 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 30 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 31 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 32 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 33 | b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 34 | ) 35 | 36 | 37 | def test_reconstruction(): 38 | """Tests basic PE reconstruction""" 39 | pe = pecon.PE() 40 | 41 | # Test accessing some random fields. 42 | assert pe.DosHeader.e_magic == b'MZ' 43 | assert pe.SectionTable == [] 44 | assert pe.OptionalHeader.FileAlignment == 512 45 | assert pe.OptionalHeader.DataDirectory.imports.VirtualAddress == 0 46 | 47 | # Test building 48 | pe_data = pe.build() 49 | assert pe_data == default_pe 50 | 51 | 52 | def test_parsing(): 53 | """Tests parsing and then rebuilding existing PE file.""" 54 | pe = pecon.PE(default_pe) 55 | 56 | assert pe.build() == default_pe 57 | 58 | pe.DosHeader.e_magic = b'ZM' 59 | assert pe.build() == b'ZM' + default_pe[2:] 60 | -------------------------------------------------------------------------------- /mwcp/tests/test_report/split_report.py: -------------------------------------------------------------------------------- 1 | 2 | import mwcp 3 | 4 | 5 | split_report = [ 6 | { 7 | "errors": [ 8 | "[!] Error log in input_file.bin", 9 | ], 10 | "logs": [ 11 | "[+] Info log in input_file.bin", 12 | "[!] Error log in input_file.bin", 13 | ], 14 | "mwcp_version": mwcp.__version__, 15 | "input_file": { 16 | "architecture": None, 17 | "compile_time": None, 18 | "data": None, 19 | "derivation": None, 20 | "description": None, 21 | "file_path": "C:/input_file.bin", 22 | "md5": "1e50210a0202497fb79bc38b6ade6c34", 23 | "name": "input_file.bin", 24 | "sha1": "baf34551fecb48acc3da868eb85e1b6dac9de356", 25 | "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee", 26 | "tags": [], 27 | "type": "file" 28 | }, 29 | "metadata": [ 30 | { 31 | "tags": [], 32 | "type": "mutex", 33 | "value": "root_mutex" 34 | }, 35 | { 36 | "architecture": None, 37 | "compile_time": None, 38 | "data": None, 39 | "derivation": None, 40 | "description": None, 41 | "file_path": None, 42 | "md5": "4844437d5747acd52a54981b48f60c8e", 43 | "name": "sub_file.exe", 44 | "sha1": "7bd8e7cb8e1e8b7b2e94b472422512935c9d4519", 45 | "sha256": "c2b8761db47791e06799e99a698ed4d63cdbdb9f5f16224c90b625b02581350c", 46 | "tags": [], 47 | "type": "file" 48 | } 49 | ], 50 | "parser": None, 51 | "recursive": False, 52 | "external_knowledge": {}, 53 | "tags": ["tagging", "test"], 54 | "type": "report" 55 | }, 56 | { 57 | "errors": [ 58 | "[!] Error log in sub_file.exe", 59 | ], 60 | "logs": [ 61 | "[+] Info log in sub_file.exe", 62 | "[!] Error log in sub_file.exe", 63 | ], 64 | "mwcp_version": mwcp.__version__, 65 | "input_file": { 66 | "architecture": None, 67 | "compile_time": None, 68 | "data": None, 69 | "derivation": None, 70 | "description": None, 71 | "file_path": None, 72 | "md5": "4844437d5747acd52a54981b48f60c8e", 73 | "name": "sub_file.exe", 74 | "sha1": "7bd8e7cb8e1e8b7b2e94b472422512935c9d4519", 75 | "sha256": "c2b8761db47791e06799e99a698ed4d63cdbdb9f5f16224c90b625b02581350c", 76 | "tags": [], 77 | "type": "file" 78 | }, 79 | "metadata": [ 80 | { 81 | "tags": [], 82 | "type": "mutex", 83 | "value": "sub_mutex" 84 | } 85 | ], 86 | "parser": None, 87 | "recursive": False, 88 | "external_knowledge": {}, 89 | "tags": ["tagging", "test"], 90 | "type": "report" 91 | } 92 | ] 93 | -------------------------------------------------------------------------------- /mwcp/tests/test_cli/parse.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "report", 4 | "tags": [], 5 | "mwcp_version": "MWCP_VERSION", 6 | "input_file": { 7 | "type": "file", 8 | "tags": [], 9 | "name": "test.txt", 10 | "description": "Foo", 11 | "md5": "fb843efb2ffec987db12e72ca75c9ea2", 12 | "sha1": "5e90c4c2be31a7a0be133b3dbb4846b0434bc2ab", 13 | "sha256": "fe5af8c641835c24f3bbc237a659814b96ed64d2898fae4cb3d2c0ac5161f5e9", 14 | "architecture": null, 15 | "compile_time": null, 16 | "file_path": "test.txt", 17 | "data": null, 18 | "derivation": null 19 | }, 20 | "parser": "foo", 21 | "recursive": true, 22 | "external_knowledge": {}, 23 | "errors": [], 24 | "logs": [ 25 | "[+] File test.txt identified as Foo.", 26 | "[+] size of inputfile is 23 bytes", 27 | "[+] test.txt dispatched residual file: fooconfigtest.txt", 28 | "[+] File fooconfigtest.txt described as example output file", 29 | "[+] operating on inputfile test.txt" 30 | ], 31 | "metadata": [ 32 | { 33 | "type": "url", 34 | "tags": [], 35 | "url": "http://127.0.0.1", 36 | "path": null, 37 | "query": null, 38 | "protocol": "http" 39 | }, 40 | { 41 | "type": "network", 42 | "tags": [], 43 | "url": { 44 | "type": "url", 45 | "tags": [], 46 | "url": "http://127.0.0.1", 47 | "path": null, 48 | "query": null, 49 | "protocol": "http" 50 | }, 51 | "socket": { 52 | "type": "socket", 53 | "tags": [], 54 | "address": "127.0.0.1", 55 | "port": null, 56 | "network_protocol": null, 57 | "listen": null 58 | }, 59 | "credential": null 60 | }, 61 | { 62 | "type": "socket", 63 | "tags": [], 64 | "address": "127.0.0.1", 65 | "port": null, 66 | "network_protocol": null, 67 | "listen": null 68 | }, 69 | { 70 | "type": "file", 71 | "tags": [], 72 | "name": "fooconfigtest.txt", 73 | "description": "example output file", 74 | "md5": "5eb63bbbe01eeed093cb22bb8f5acdc3", 75 | "sha1": "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", 76 | "sha256": "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9", 77 | "architecture": null, 78 | "compile_time": null, 79 | "file_path": null, 80 | "data": null, 81 | "derivation": "extracted and decompressed" 82 | } 83 | ] 84 | } 85 | ] 86 | -------------------------------------------------------------------------------- /mwcp/parsers/tests/foo/f144899b86766688991c5d0d10902f4a.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "report", 3 | "tags": [], 4 | "mwcp_version": "3.12.0", 5 | "input_file": { 6 | "type": "file", 7 | "tags": [], 8 | "name": "f144899b86766688991c5d0d10902f4a.bin", 9 | "description": "Foo", 10 | "md5": "f144899b86766688991c5d0d10902f4a", 11 | "sha1": "bdca4e5c28a89d3b5281cc189f5910eaad16395a", 12 | "sha256": "8a599bf73a83c1d32a1b426ed736488ae34991fda6ac4cd27cc0597e21cd8420", 13 | "architecture": null, 14 | "compile_time": null, 15 | "file_path": null, 16 | "data": null, 17 | "derivation": null 18 | }, 19 | "parser": "dc3:foo", 20 | "recursive": false, 21 | "external_knowledge": { 22 | "secret": "hello" 23 | }, 24 | "errors": [], 25 | "logs": [ 26 | "[+] File f144899b86766688991c5d0d10902f4a.bin identified as Foo.", 27 | "[+] size of inputfile is 15765 bytes", 28 | "[+] f144899b86766688991c5d0d10902f4a.bin dispatched residual file: fooconfigtest.txt", 29 | "[+] File fooconfigtest.txt described as example output file", 30 | "[+] operating on inputfile f144899b86766688991c5d0d10902f4a.bin" 31 | ], 32 | "metadata": [ 33 | { 34 | "type": "other", 35 | "tags": [], 36 | "key": "secret_using_external_knowledge", 37 | "value": "hello!", 38 | "value_format": "string" 39 | }, 40 | { 41 | "type": "url", 42 | "tags": [], 43 | "url": "http://127.0.0.1", 44 | "path": null, 45 | "query": null, 46 | "protocol": "http" 47 | }, 48 | { 49 | "credential": null, 50 | "socket": { 51 | "type": "socket", 52 | "tags": [], 53 | "address": "127.0.0.1", 54 | "port": null, 55 | "network_protocol": null, 56 | "listen": null 57 | }, 58 | "tags": [], 59 | "type": "network", 60 | "url": { 61 | "type": "url", 62 | "tags": [], 63 | "url": "http://127.0.0.1", 64 | "path": null, 65 | "query": null, 66 | "protocol": "http" 67 | } 68 | }, 69 | { 70 | "type": "socket", 71 | "tags": [], 72 | "address": "127.0.0.1", 73 | "port": null, 74 | "network_protocol": null, 75 | "listen": null 76 | }, 77 | { 78 | "type": "file", 79 | "tags": [], 80 | "name": "fooconfigtest.txt", 81 | "description": "example output file", 82 | "md5": "5eb63bbbe01eeed093cb22bb8f5acdc3", 83 | "sha1": "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", 84 | "sha256": "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9", 85 | "architecture": null, 86 | "compile_time": null, 87 | "file_path": null, 88 | "data": null, 89 | "derivation": "extracted and decompressed" 90 | } 91 | ] 92 | } -------------------------------------------------------------------------------- /mwcp/tests/test_report_writer.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | 4 | from mwcp import metadata 5 | 6 | 7 | @pytest.mark.parametrize("text_format,report_name", [ 8 | ("markdown", "report.md"), 9 | ("simple", "report.txt"), 10 | ("html", "report.html"), 11 | ]) 12 | def test_basic(datadir, report, metadata_items, text_format, report_name): 13 | """ 14 | Tests each metadata element to ensure they are presented 15 | nicely in a report. 16 | """ 17 | with report: 18 | report.input_file.description = "SuperMalware Implant" 19 | for item in metadata_items: 20 | report.add(item) 21 | report.add_tag("test", "tagging") 22 | 23 | actual = report.as_text(text_format) 24 | print(actual) 25 | expected = (datadir / report_name).read_text() 26 | assert actual == expected 27 | 28 | 29 | @pytest.mark.parametrize("text_format,report_name", [ 30 | ("markdown", "report_wordwrap.md"), 31 | ("simple", "report_wordwrap.txt"), 32 | ("html", "report_wordwrap.html"), 33 | ]) 34 | def test_wordwrap(datadir, report, text_format, report_name): 35 | with report: 36 | report.input_file.description = "SuperMalware Implant" 37 | large_num = int("123"*50) # Large number that will require word wrapping. 38 | report.add(metadata.RSAPublicKey(1234, large_num)) 39 | report.add(metadata.RSAPrivateKey( 40 | 1234, large_num, 1234, large_num, large_num, large_num, large_num, large_num)) 41 | report.add(metadata.UserAgent("This is a really large user agent that will need to be word wrapped." * 16)) 42 | 43 | actual = report.as_text(text_format) 44 | print(actual) 45 | expected = (datadir / report_name).read_text() 46 | assert actual == expected 47 | 48 | 49 | @pytest.mark.parametrize("text_format,report_name", [ 50 | ("markdown", "report_foreign.md"), 51 | ("simple", "report_foreign.txt"), 52 | ("html", "report_foreign.html"), 53 | ]) 54 | def test_foreign_characters(datadir, report, text_format, report_name): 55 | with report: 56 | report.input_file.description = "SuperMalware Implant" 57 | report.add(metadata.Other("JAPAN", "\u30E6\u30FC\u30B6\u30FC\u5225\u30B5\u30A4\u30C8")) 58 | report.add(metadata.Other("CHINA", "\u7B80\u4F53\u4E2D\u6587")) 59 | report.add(metadata.Other("KOREA", "\uD06C\uB85C\uC2A4 \uD50C\uB7AB\uD3FC\uC73C\uB85C")) 60 | report.add(metadata.Other("ISRAEL", "\u05DE\u05D3\u05D5\u05E8\u05D9\u05DD \u05DE\u05D1\u05D5\u05E7\u05E9\u05D9\u05DD")) 61 | report.add(metadata.Other("EGYPT", "\u0623\u0641\u0636\u0644 \u0627\u0644\u0628\u062D\u0648\u062B")) 62 | report.add(metadata.Other( 63 | "RUSSIA", 64 | "\u0414\u0435\u0441\u044F\u0442\u0443\u044E \u041C\u0435\u0436\u0434\u0443\u043D\u0430" 65 | "\u0440\u043E\u0434\u043D\u0443\u044E" 66 | )) 67 | report.add(metadata.Other("MATH", "\u222E E\u22C5da = Q, n \u2192 \u221E, \u2211 f(i) = \u220F g(i)")) 68 | report.add(metadata.Other("FRANCE", "fran\u00E7ais langue \u00E9trang\u00E8re")) 69 | report.add(metadata.Other("SPAIN", "ma\u00F1ana ol\u00E9")) 70 | 71 | actual = report.as_text(text_format) 72 | print(actual) 73 | expected = (datadir / report_name).read_text("utf-8") 74 | assert actual == expected 75 | -------------------------------------------------------------------------------- /mwcp/tests/test_runner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests mwcp.Runner components. 3 | """ 4 | import textwrap 5 | 6 | import mwcp 7 | 8 | 9 | def test_running_parser_class(): 10 | from mwcp import Parser 11 | 12 | class TestParser(Parser): 13 | DESCRIPTION = "Test" 14 | 15 | report = mwcp.run(TestParser, data=b"test") 16 | assert report 17 | assert report.parser == "TestParser" 18 | 19 | 20 | def test_yara_runner(datadir): 21 | mwcp.register_entry_points() 22 | 23 | # File should map to foo parser. 24 | report = mwcp.run(data=b"mapped file", yara_repo=datadir / "yara_repo", recursive=False) 25 | assert report 26 | # Report "parser" will be "-" because it was initially unknown, but the parser mapped 27 | # to the input file should be foo. 28 | assert report.parser == "-" 29 | assert report.input_file.description == "Foo" 30 | assert report.input_file.parser.name == "foo.Foo" 31 | 32 | 33 | def test_yara_runner_recursive(datadir): 34 | mwcp.register_parser_directory(str(datadir), source_name="test") 35 | 36 | # Initial file should map to FileA and residual to FileB. 37 | # Recursion detection should take effect. 38 | report = mwcp.run(data=b"matches file a", yara_repo=datadir / "yara_repo", recursive=True) 39 | assert report 40 | assert report.parser == "-" 41 | assert report.input_file.description == "File A" 42 | residual_file = report.input_file.children[0] 43 | assert residual_file.description == "File B" 44 | 45 | # Recursion will not take effect. 46 | report = mwcp.run(data=b"matches file a", yara_repo=datadir / "yara_repo", recursive=False) 47 | assert report 48 | assert report.parser == "-" 49 | assert report.input_file.description == "File A" 50 | residual_file = report.input_file.children[0] 51 | assert residual_file.description == "Unidentified file" 52 | 53 | 54 | def test_yara_runner_sibling_dispatch(datadir): 55 | """ 56 | Tests Github issue #40 where a file doesn't get processed because 57 | it was dispatched with a parent of an already processed sibling. 58 | """ 59 | mwcp.register_parser_directory(str(datadir), source_name="test") 60 | 61 | # Test running SingleDispatch parser and see if we successfully get the Grandchild to be parsed. 62 | report = mwcp.run(data=b"matches parent", yara_repo=datadir / "yara_repo", recursive=True) 63 | assert report 64 | assert report.parser == "-" 65 | input_file = report.input_file 66 | assert input_file.description == "Parent" 67 | children = input_file.children 68 | assert len(children) == 2 69 | assert children[0].description == "Sibling 1" 70 | assert children[1].description == "Sibling 2" 71 | assert len(children[0].children) == 1 72 | # This was originally unidentified due to not being processed. 73 | assert children[0].children[0].description == "Grandchild" 74 | assert report.file_tree() == textwrap.dedent("""\ 75 | <40b44905ee15a698e22f086c758a3981.bin (40b44905ee15a698e22f086c758a3981) : Parent> 76 | ├── 77 | │ └── <3ca5088d02dfb0fc668a0e2898ec3d93.bin (3ca5088d02dfb0fc668a0e2898ec3d93) : Grandchild> 78 | └── """) 79 | -------------------------------------------------------------------------------- /mwcp/tests/test_construct.py: -------------------------------------------------------------------------------- 1 | """Tests our construct helpers.""" 2 | 3 | import doctest 4 | import os 5 | import sys 6 | 7 | import pytest 8 | 9 | from mwcp.utils import construct 10 | 11 | 12 | # @pytest.mark.xfail( 13 | # raises=ValueError, 14 | # reason="Doctest is producing a 'wrapper loop when unwrapping obj_' error" 15 | # ) 16 | @pytest.mark.parametrize("module", [ 17 | construct.helpers, 18 | construct.datetime_, 19 | construct.network, 20 | construct.windows_enums, 21 | construct.windows_structures, 22 | ]) 23 | def test_helpers(module): 24 | """Tests that the doctests for the helpers work.""" 25 | results = doctest.testmod(module) 26 | assert not results.failed 27 | 28 | 29 | def test_html(): 30 | """Tests the html construct.""" 31 | # Test doctests 32 | results = doctest.testmod(construct.construct_html) 33 | assert not results.failed 34 | 35 | # Test with an example 36 | EMBED_SPEC = construct.Struct( 37 | 'a' / construct.IP4Address, 38 | 'b' / construct.IP4Address, 39 | 'c' / construct.IP4Address, 40 | 'd' / construct.IP4Address 41 | ) 42 | 43 | address_struct = construct.Struct( 44 | 'first' / construct.Struct('a' / construct.Byte, 'b' / construct.Byte), 45 | 'second' / construct.Struct('inner2' / construct.Bytes(2)) 46 | # 'internal' / IP4Address 47 | ) 48 | 49 | PACKET = construct.Struct( 50 | construct.Padding(0x9), 51 | 'Hardcoded Value 1' / construct.HexString(construct.Int32ul), 52 | 'Hardcoded Value 2' / construct.HexString(construct.Int32ul), 53 | 'Hardcoded Value 3' / construct.HexString(construct.Int32ul), 54 | construct.Padding(0x17), 55 | 'Compromised Host IP' / construct.IP4Address, # Use IP adapter 56 | # 'Unknown IP Addresses' / construct.Switch( 57 | # this['Hardcoded Value 1'], 58 | # { 59 | # '0x1f4' : EMBED_SPEC 60 | # }, 61 | # ), 62 | 'Unknown IP Addresses' / address_struct[4], 63 | # 'Unknown IP Addresses' / IP4Address[4], 64 | construct.Padding(8), 65 | 'Unknown Indicator' / construct.String(0xF), 66 | construct.Padding(2), 67 | 'Number of CPUs' / construct.Int32ul, 68 | 'CPU Mhz' / construct.Int32ul, 69 | 'Total Memory (MB)' / construct.Int32ul, 70 | 'Compromised System Kernel' / construct.CString(), 71 | 'Possible Trojan Version' / construct.CString() 72 | ) 73 | 74 | data = (b'\x01\x00\x00\x00}\x00\x00\x00\x00\xf4\x01\x00\x002\x00\x00\x00\xe8' 75 | b'\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01' 76 | b'\x01\x00\x00\x00\x00\x01\x00\x00\x00\xc0\xa8\x01\r\xc0\xa8\x01\r\xc0' 77 | b'\xa8\x01\r\xc0\xa8\x01\r\xc0\xa8\x01\r\xff\xff\x01\x00\x00\x00\x00\x00' 78 | b'-== Love AV ==-:\x00\x01\x00\x00\x00d\n\x00\x00\xc4\x07\x00\x00' 79 | b'Linux 3.13.0-93-generic\x001:G2.40\x00') 80 | 81 | html_data = construct.html_hex(PACKET, data, depth=1) 82 | 83 | with open(os.path.join(os.path.dirname(__file__), 'construct_html.html'), 'r') as fo: 84 | expected_html_data = fo.read() 85 | 86 | assert html_data == expected_html_data 87 | 88 | 89 | def test_base64(): 90 | """Test the Base64 Adapter with bug associated with unicode encoding on build""" 91 | spec = construct.Base64(construct.CString("utf-16le")) 92 | data = b'Y\x00W\x00J\x00j\x00Z\x00A\x00=\x00=\x00\x00\x00' 93 | assert spec.parse(data) == b"abcd" 94 | assert spec.build(b"abcd") == data 95 | 96 | spec = construct.Base64(construct.CString("utf-8")) 97 | data = b'YWJjZA==\x00' 98 | assert spec.parse(data) == b"abcd" 99 | assert spec.build(b"abcd") == data 100 | -------------------------------------------------------------------------------- /mwcp/tests/test_report_writer/report_wordwrap.txt: -------------------------------------------------------------------------------- 1 | ----- File: input_file.bin ----- 2 | Field Value 3 | ------------ ---------------------------------------------------------------- 4 | Parser FooParser 5 | File Path C:/input_file.bin 6 | Description SuperMalware Implant 7 | Architecture 8 | MD5 1e50210a0202497fb79bc38b6ade6c34 9 | SHA1 baf34551fecb48acc3da868eb85e1b6dac9de356 10 | SHA256 1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee 11 | Compile Time 12 | 13 | ---- RSA Private Key ---- 14 | Value 15 | ------------------------------------------------- 16 | Modulus (n): 17 | 9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0: 18 | d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78: 19 | ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f: 20 | 84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f: 21 | f3:b3 22 | Public Exponent (e): 23 | 1234 (0x4d2) 24 | Private Exponent (d): 25 | 1234 (0x4d2) 26 | p: 27 | 9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0: 28 | d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78: 29 | ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f: 30 | 84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f: 31 | f3:b3 32 | q: 33 | 9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0: 34 | d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78: 35 | ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f: 36 | 84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f: 37 | f3:b3 38 | d mod (p-1): 39 | 9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0: 40 | d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78: 41 | ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f: 42 | 84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f: 43 | f3:b3 44 | d mod (q-1): 45 | 9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0: 46 | d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78: 47 | ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f: 48 | 84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f: 49 | f3:b3 50 | (inverse of q) mod p: 51 | 9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0: 52 | d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78: 53 | ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f: 54 | 84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f: 55 | f3:b3 56 | 57 | ---- RSA Public Key ---- 58 | Value 59 | ------------------------------------------------- 60 | Modulus (n): 61 | 9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0: 62 | d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78: 63 | ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f: 64 | 84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f: 65 | f3:b3 66 | Public Exponent (e): 67 | 1234 (0x4d2) 68 | 69 | ---- User Agent ---- 70 | Value 71 | ---------------------------------------------------------------------------------------------------- 72 | This is a really large user agent that will need to be word wrapped.This is a really large user 73 | agent that will need to be word wrapped.This is a really large user agent that will need to be 74 | word wrapped.This is a really large user agent that will need to be word wrapped.This is a really 75 | large user agent that will need to be word wrapped.This is a really large user agent that will 76 | need to be word wrapped.This is a really large user agent that will need to be word wrapped.This 77 | is a really large user agent that will need to be word wrapped.This is a really large user agent 78 | that will need to be word wrapped.This is a really large user agent that will need to be word 79 | wrapped.This is a really large user agent that will need to be word wrapped.This is a really large 80 | user agent that will need to be word wrapped.This is a really large user agent that will need to 81 | be word wrapped.This is a really large user agent that will need to be word wrapped.This is a 82 | really large user agent that will need to be word wrapped.This is a really large user agent that 83 | will need to be word wrapped. 84 | 85 | ----- File Tree ----- 86 | 87 | 88 | -------------------------------------------------------------------------------- /mwcp/utils/construct/dotnet.py: -------------------------------------------------------------------------------- 1 | """Construct helpers for .NET""" 2 | 3 | from .core import * 4 | 5 | 6 | class _DotNetUInt(Construct): 7 | r""" 8 | DotNet encoded unsigned 32-bit integer, where first byte indicates the length of the integer. 9 | 10 | Example: 11 | 12 | >>> DotNetUInt.build(16) 13 | '\x10' 14 | >>> DotNetUInt.parse(_) 15 | 16 16 | >>> DotNetUInt.build(256) 17 | '\x81\x00' 18 | >>> DotNetUInt.parse(_) 19 | 256 20 | >>> DotNetUInt.build(0xffff) 21 | '\xc0\x00\xff\xff' 22 | >>> DotNetUInt.parse(_) 23 | 65535 24 | """ 25 | def _parse(self, stream, context, path): 26 | b = byte2int(stream_read(stream, 1)) 27 | if b & 0x80 == 0: 28 | num = b 29 | elif b & 0xc0 == 0x80: 30 | num = ((b & 0x3f) << 8) + byte2int(stream_read(stream, 1)) 31 | elif b & 0xe0 == 0xc0: 32 | num = (b & 0x1f) << 24 33 | num += byte2int(stream_read(stream, 1)) << 16 34 | num += byte2int(stream_read(stream, 1)) << 8 35 | num += byte2int(stream_read(stream, 1)) 36 | else: 37 | raise ConstructError('DotNetUInt encountered an invalid string') 38 | return num 39 | 40 | def _build(self, obj, stream, context, path): 41 | if obj < 0: 42 | raise ConstructError("DotNetUInt cannot build from negative number") 43 | if obj > 0x1fffffff: 44 | raise ConstructError("DotNetUInt encountered too large a number") 45 | if obj < 0x80: 46 | stream_write(stream, int2byte(obj), 1) 47 | elif obj < 0x3fff: 48 | stream_write(stream, int2byte((obj >> 8) | 0x80), 1) 49 | stream_write(stream, int2byte(obj & 0xff), 1) 50 | else: 51 | stream_write(stream, int2byte((obj >> 24) | 0xc0), 1) 52 | stream_write(stream, int2byte((obj >> 16) & 0xff), 1) 53 | stream_write(stream, int2byte((obj >> 8) & 0xff), 1) 54 | stream_write(stream, int2byte(obj & 0xff), 1) 55 | 56 | 57 | # using the @singleton decorator seems to break our ability to run doctests. 58 | DotNetUInt = _DotNetUInt() 59 | 60 | 61 | class _DotNetNullString(Construct): 62 | r""" 63 | DotNet null string, different from an empty zero-byte string, encoded as a single 0xff byte. 64 | 65 | Example: 66 | 67 | >>> repr(DotNetNullString.parse('\xff')) 68 | 'None' 69 | >>> DotNetNullString.build(None) 70 | '\xff' 71 | """ 72 | def _parse(self, stream, context, path): 73 | if stream_read(stream, 1) != '\xff': 74 | raise ConstructError('DotNetNullString encounted an invalid byte.') 75 | return None 76 | 77 | def _build(self, obj, stream, context, path): 78 | stream_write(stream, '\xff', 1) 79 | 80 | def _sizeof(self, context, path): 81 | return 1 82 | 83 | 84 | DotNetNullString = _DotNetNullString() 85 | 86 | 87 | class _DotNetSigToken(Adapter): 88 | r""" 89 | Adapter used to create or read a compressed token used in signatures. The token must be a typedef, 90 | typeref, or typespec token. 91 | 92 | >>> DotNetSigToken.parse('\x81\x42') 93 | 452984912 94 | >>> DotNetSigToken.build(0x01000002) 95 | '\t' 96 | """ 97 | TOKEN_ENCODE = { 98 | 0x02: 0, 99 | 0x01: 1, 100 | 0x1b: 2, 101 | } 102 | 103 | def _encode(self, obj, context, path): 104 | encoded = self.TOKEN_ENCODE.get(obj >> 24, 3) 105 | if encoded is None: 106 | raise ConstructError('DotNetSigToken encountered a token other than typedef, typeref, or typespec') 107 | return ((obj & 0x00ffffff) << 2) | encoded 108 | 109 | def _decode(self, obj, context, path): 110 | if obj & 3 == 3 or obj & 0xfc00000000: 111 | raise ConstructError('DotNetSigToken encountered an invalid typedef, typeref, or typespec token') 112 | return (obj >> 2) | [0x02000000, 0x01000000, 0x1b000000][obj & 3] 113 | 114 | 115 | DotNetSigToken = _DotNetSigToken(DotNetUInt) 116 | -------------------------------------------------------------------------------- /mwcp/utils/elffileutils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Description: Utility for elftools python library. 3 | """ 4 | 5 | import logging 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | import elftools.elf.elffile as elffile 10 | import io 11 | 12 | 13 | def obtain_elf(file_data): 14 | """ 15 | Given file data, create an elftools.ELFFile object from the data. 16 | 17 | :param file_data: Input ELF file data 18 | 19 | :return: An elftools.ELFFile object or None 20 | """ 21 | try: 22 | elf = elffile.ELFFile(io.BytesIO(file_data)) 23 | return elf 24 | except elffile.ELFError: 25 | logger.debug('An elftools.ELFFile object on the file data could not be created.') 26 | return None 27 | 28 | 29 | def obtain_section(section_name, elf=None, file_data=None): 30 | """ 31 | Obtain the section obtain for a specficied ELF section of a file. 32 | 33 | :param section_name: The name of the section to obtain 34 | :param elf: elftools.ELFFile object 35 | :param file_data: Input file data 36 | 37 | :return: The elftools.Section object, or None. 38 | """ 39 | if file_data: 40 | elf = obtain_elf(file_data) 41 | if elf: 42 | for section in elf.iter_sections(): 43 | if section.name == section_name: 44 | return section 45 | return None 46 | else: 47 | return None 48 | 49 | 50 | def obtain_section_data(section_name, elf=None, file_data=None, min_size=0): 51 | """ 52 | Obtain the data in a specified ELF section of a file. 53 | 54 | :param section_name: The name of the section from which to extract data. 55 | :param elf: elftools.ELFFile object 56 | :param file_data: Input file data 57 | :param min_size: The minimum acceptable size for the section_data 58 | 59 | :return: The PE section data, or None. 60 | """ 61 | if file_data: 62 | elf = obtain_elf(file_data) 63 | if elf: 64 | section = obtain_section(section_name, elf) 65 | if section: 66 | section_data = section.data() 67 | if len(section_data) > min_size: 68 | return section_data 69 | return None 70 | return None 71 | else: 72 | return None 73 | 74 | 75 | def check_section(section_name, elf=None, file_data=None): 76 | """ 77 | Check if a specified ELF section exists in a file. 78 | 79 | :param section_name: The name of the section from which to extract data. 80 | :param elf: elftools.ELFFile object 81 | :param file_data: Input file data 82 | 83 | :return: True if the section name is observed, False if it is not. 84 | """ 85 | if file_data: 86 | elf = obtain_elf(file_data) 87 | if elf and obtain_section(section_name, elf): 88 | return True 89 | return False 90 | 91 | 92 | def obtain_physical_offset(mem_offset, elf=None, file_data=None): 93 | """ 94 | For an ELF file (in x86), convert a provided memory offset to a raw offset. 95 | 96 | :param mem_offset: The memory offset to convert to a raw offset 97 | :param elf: elftools.ELFFile object 98 | :param file_data: Input file data 99 | 100 | :return: Raw offset, or None. 101 | """ 102 | if file_data: 103 | elf = obtain_elf(file_data) 104 | if elf: 105 | for phy_offset in elf.address_offsets(mem_offset): 106 | return phy_offset 107 | return None 108 | 109 | 110 | def obtain_memory_offset(phy_offset, elf=None, file_data=None): 111 | """ 112 | For an ELF file, convert a provided raw offset to a memory offset. 113 | 114 | :param phy_offset: The raw offset to convert to a memory offset 115 | :param elf: elftools.ELFFile object 116 | :param file_data: Input file data 117 | 118 | :return: Memory offset, or None. 119 | """ 120 | if file_data: 121 | elf = obtain_elf(file_data) 122 | if elf: 123 | for seg in elf.iter_segments(): 124 | if seg['p_offset'] <= phy_offset < (seg['p_offset'] + seg['p_filesz']): 125 | return phy_offset - seg['p_offset'] + seg['p_vaddr'] 126 | return None 127 | else: 128 | return None 129 | -------------------------------------------------------------------------------- /mwcp/core.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pathlib 3 | from typing import Union, Type 4 | 5 | import mwcp 6 | from mwcp.runner import Runner, YaraRunner 7 | from mwcp.report import Report 8 | from mwcp.parser import Parser 9 | from mwcp import config 10 | from mwcp import metadata 11 | 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def run( 17 | parser: Union[str, Type[Parser]] = None, 18 | file_path: Union[str, pathlib.Path] = None, 19 | data: bytes = None, 20 | *, 21 | output_directory: Union[str, pathlib.Path] = None, 22 | yara_repo: Union[str, pathlib.Path] = None, 23 | recursive: bool = True, 24 | knowledge_base: dict = None, 25 | include_file_data: bool = False, 26 | prefix_output_files: bool = True, 27 | external_strings_report: bool = False, 28 | include_logs: bool = True, 29 | log_level: int = None, 30 | log_filter: logging.Filter = None, 31 | ) -> Report: 32 | """ 33 | Runs a specified parser on a given file path or data. 34 | 35 | :param parser: Name or class of parser to run. 36 | Can be excluded to use YARA matching to determine parser. 37 | (use ":" notation to specify source if necessary e.g. "acme:Foo") 38 | :param file_path: File path to parse 39 | :param data: File data to parse 40 | :param yara_repo: Path to directory of yara signatures. 41 | :param recursive: Whether to recursively match and run parsers for unidentified files. 42 | (Only applicable if given a yara_repo to match files to parsers.) 43 | :param output_directory: 44 | Directory to write out files. 45 | If not provided, files will not be written out. 46 | :param include_file_data: Whether to include file data in the generated report. 47 | If disabled, only metadata such as the file path, description, and md5 will be included. 48 | :param prefix_output_files: Whether to include a prefix of the first 5 characters 49 | of the md5 on output files. This is to help avoid overwriting multiple 50 | output files with the same name. 51 | :param external_strings_report: Whether to output reported DecodedString elements into a 52 | separate strings report. 53 | :param include_logs: Whether to include error and debug logs in the generated report. 54 | :param log_level: If including logs, the logging level to be collected. 55 | (Defaults to currently set effective log level) 56 | :param log_filter: If including logs, this can be used to pass in a custom filter for the logs. 57 | Should be a valid argument for logging.Handler.addFilter() 58 | 59 | :return: mwcp.Report object containing parse results. 60 | """ 61 | if file_path: 62 | file_path = str(file_path) 63 | 64 | report_config = dict( 65 | output_directory=output_directory, 66 | include_file_data=include_file_data, 67 | prefix_output_files=prefix_output_files, 68 | external_strings_report=external_strings_report, 69 | include_logs=include_logs, 70 | log_level=log_level, 71 | log_filter=log_filter, 72 | knowledge_base=knowledge_base, 73 | ) 74 | if not yara_repo: 75 | yara_repo = config.get("YARA_REPO") 76 | 77 | # Only run YARA runner if repo has been setup and we can benefit from it. 78 | if yara_repo and (not parser or recursive): 79 | runner = YaraRunner(yara_repo=yara_repo, recursive=recursive, **report_config) 80 | elif parser: 81 | runner = Runner(**report_config) 82 | else: 83 | raise ValueError(f"Must provide either a parser to run or a yara_repo for file matching.") 84 | return runner.run(parser, file_path=file_path, data=data) 85 | 86 | 87 | def schema(id=None) -> dict: 88 | """ 89 | Generates a JSON Schema for a Report object. 90 | NOTE: This is the schema for a single report. Depending on how you use MWCP, 91 | you may get a list of these reports instead. 92 | """ 93 | if id is None: 94 | id = ( 95 | f"https://raw.githubusercontent.com/dod-cyber-crime-center/DC3-MWCP/" 96 | f"{mwcp.__version__}/mwcp/config/schema.json" 97 | ) 98 | schema = { 99 | "$schema": "https://json-schema.org/draft/2019-09/schema", 100 | "$id": id, 101 | } 102 | schema.update(metadata.Report.schema()) 103 | 104 | # "output_text" may also be included if we are running from the server service. 105 | schema["properties"]["output_text"] = { 106 | "type": "string", 107 | "description": "Raw text output from MWCP.", 108 | } 109 | 110 | return schema 111 | -------------------------------------------------------------------------------- /mwcp/tests/test_report.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests Report class. 3 | """ 4 | import logging 5 | import runpy 6 | 7 | import pytest 8 | 9 | import mwcp 10 | from mwcp import metadata 11 | 12 | 13 | @pytest.fixture 14 | def filled_report(report, metadata_items): 15 | """ 16 | Provides a report filled with metadata examples seen above. 17 | """ 18 | logger = logging.getLogger("test_report") 19 | with report: 20 | for item in metadata_items: 21 | report.add(item) 22 | 23 | logger.info("Test info log") 24 | logger.error("Test error log") 25 | logger.debug("Test debug log") 26 | 27 | report.add_tag("test", "tagging") 28 | 29 | return report 30 | 31 | 32 | def test_report_dict(datadir, filled_report): 33 | expected = runpy.run_path(str(datadir / "report.py"))["report"] 34 | assert filled_report.as_dict() == expected 35 | 36 | 37 | def test_report_json(datadir, filled_report): 38 | expected = (datadir / "report.json").read_text().replace("MWCP_VERSION", mwcp.__version__) 39 | actual = filled_report.as_json() 40 | print(actual) 41 | assert actual == expected 42 | 43 | 44 | def test_split_report(datadir): 45 | """ 46 | Tests split metadata per file. 47 | """ 48 | logger = logging.getLogger("test_split_report") 49 | logging.root.setLevel(logging.INFO) 50 | input_file = mwcp.FileObject(b"some data", file_path="C:/input_file.bin") 51 | report = mwcp.Report(input_file, "FooParser", log_level=logging.INFO) 52 | with report: 53 | logger.info("Info log in input_file.bin") 54 | logger.error("Error log in input_file.bin") 55 | report.add(metadata.Mutex("root_mutex")) 56 | 57 | sub_file = mwcp.FileObject(b"some other data", file_name="sub_file.exe") 58 | report.add(metadata.File.from_file_object(sub_file)) 59 | report.set_file(sub_file) 60 | logger.info("Info log in sub_file.exe") 61 | logger.error("Error log in sub_file.exe") 62 | report.add(metadata.Mutex("sub_mutex")) 63 | 64 | report.add_tag("test", "tagging") 65 | 66 | assert len(report.get()) == 3 67 | 68 | root_metadata = report.get(source=input_file.md5) 69 | assert len(root_metadata) == 2 70 | assert metadata.Mutex("root_mutex") in root_metadata 71 | 72 | sub_metadata = report.get(source=sub_file.md5) 73 | assert len(sub_metadata) == 1 74 | assert metadata.Mutex("sub_mutex") in sub_metadata 75 | 76 | assert report.logs == [ 77 | "[+] Info log in input_file.bin", 78 | "[!] Error log in input_file.bin", 79 | "[+] Info log in sub_file.exe", 80 | "[!] Error log in sub_file.exe", 81 | ] 82 | assert report.errors == [ 83 | "[!] Error log in input_file.bin", 84 | "[!] Error log in sub_file.exe", 85 | ] 86 | assert report.get_logs(sub_file) == [ 87 | "[+] Info log in sub_file.exe", 88 | "[!] Error log in sub_file.exe", 89 | ] 90 | assert report.get_logs(sub_file, errors_only=True) == [ 91 | "[!] Error log in sub_file.exe", 92 | ] 93 | 94 | expected = runpy.run_path(str(datadir / "split_report.py"))["split_report"] 95 | assert report.as_list() == expected 96 | 97 | 98 | def test_finalized(report): 99 | """ 100 | Tests that we can't add metadata after it is finalized. 101 | """ 102 | with report: 103 | report.add(metadata.URL("example1.com")) 104 | with pytest.raises(RuntimeError): 105 | report.add(metadata.URL("example2.com")) 106 | 107 | 108 | def test_deduplication(report): 109 | """ 110 | Tests that the same metadata information is dedupped. 111 | """ 112 | with report: 113 | report.add(metadata.URL("example.com")) 114 | report.add(metadata.URL("example.com")) 115 | report.add(metadata.Socket(address="example.com")) 116 | report.add(metadata.Socket(address="example.com")) # equivalent more verbose version. 117 | report.add(metadata.C2Address(address="example.com")) 118 | 119 | # Set new file source to ensure we dedup across sources (if not split) 120 | res_file = mwcp.FileObject(b"residual data", file_name="res.exe") 121 | report.set_file(res_file) 122 | report.add(metadata.URL("example.com")) 123 | report.add(metadata.Socket(address="example.com")) 124 | 125 | items = report.get() 126 | assert items == [ 127 | metadata.URL("example.com"), 128 | metadata.Network(url=metadata.URL2(url='example.com'), socket=metadata.Socket(address='example.com')), 129 | metadata.Socket(address="example.com"), 130 | metadata.C2Address(address="example.com"), 131 | ] 132 | -------------------------------------------------------------------------------- /mwcp/tests/test_report_writer/report_wordwrap.html: -------------------------------------------------------------------------------- 1 |

File: input_file.bin

2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 |
Field Value
Parser FooParser
File Path C:/input_file.bin
Description SuperMalware Implant
Architecture
MD5 1e50210a0202497fb79bc38b6ade6c34
SHA1 baf34551fecb48acc3da868eb85e1b6dac9de356
SHA256 1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee
Compile Time
17 | 18 |

RSA Private Key

19 | 20 | 21 | 22 | 23 | 24 | 65 | 66 |
Value
Modulus (n):
 25 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 26 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 27 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 28 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 29 |     f3:b3
 30 | Public Exponent (e):
 31 |     1234 (0x4d2)
 32 | Private Exponent (d):
 33 |     1234 (0x4d2)
 34 | p:
 35 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 36 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 37 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 38 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 39 |     f3:b3
 40 | q:
 41 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 42 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 43 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 44 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 45 |     f3:b3
 46 | d mod (p-1):
 47 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 48 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 49 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 50 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 51 |     f3:b3
 52 | d mod (q-1):
 53 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 54 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 55 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 56 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 57 |     f3:b3
 58 | (inverse of q) mod p:
 59 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 60 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 61 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 62 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 63 |     f3:b3
 64 | 
67 | 68 |

RSA Public Key

69 | 70 | 71 | 72 | 73 | 74 | 83 | 84 |
Value
Modulus (n):
 75 |     9a:10:6f:8d:3e:4b:bb:73:1e:b2:aa:0e:c7:e9:a0:
 76 |     d3:1b:fb:db:5a:ce:26:92:d2:3d:db:2a:95:ae:78:
 77 |     ad:7a:e0:2d:90:73:38:5b:57:72:5a:28:10:f3:1f:
 78 |     84:ff:3b:31:f8:4f:f3:b3:1f:84:ff:3b:31:f8:4f:
 79 |     f3:b3
 80 | Public Exponent (e):
 81 |     1234 (0x4d2)
 82 | 
85 | 86 |

User Agent

87 | 88 | 89 | 90 | 91 | 92 | 104 | 105 |
Value
This is a really large user agent that will need to be word wrapped.This is a really large user
 93 | agent that will need to be word wrapped.This is a really large user agent that will need to be word
 94 | wrapped.This is a really large user agent that will need to be word wrapped.This is a really large
 95 | user agent that will need to be word wrapped.This is a really large user agent that will need to be
 96 | word wrapped.This is a really large user agent that will need to be word wrapped.This is a really
 97 | large user agent that will need to be word wrapped.This is a really large user agent that will need
 98 | to be word wrapped.This is a really large user agent that will need to be word wrapped.This is a
 99 | really large user agent that will need to be word wrapped.This is a really large user agent that
100 | will need to be word wrapped.This is a really large user agent that will need to be word
101 | wrapped.This is a really large user agent that will need to be word wrapped.This is a really large
102 | user agent that will need to be word wrapped.This is a really large user agent that will need to be
103 | word wrapped.
106 | 107 |

File Tree

108 |
109 | <input_file.bin (1e50210a0202497fb79bc38b6ade6c34) : SuperMalware Implant>
110 | 
111 | 112 | -------------------------------------------------------------------------------- /mwcp/config/__init__.py: -------------------------------------------------------------------------------- 1 | """Stores default configuration values.""" 2 | 3 | import logging 4 | import os 5 | import pathlib 6 | import pkg_resources 7 | 8 | import appdirs 9 | from ruamel.yaml import YAML 10 | from ruamel.yaml.scanner import ScannerError 11 | 12 | from mwcp.exceptions import ConfigError 13 | 14 | 15 | logger = logging.getLogger(__name__) 16 | yaml = YAML() 17 | 18 | 19 | class Config(dict): 20 | 21 | CONFIG_FILE_NAME = "config.yml" 22 | USER_CONFIG_DIR = pathlib.Path(appdirs.user_config_dir("mwcp")) 23 | 24 | # Fields which contain a file or directory path. 25 | PATH_FIELDS = ["LOG_CONFIG_PATH", "TESTCASE_DIR", "MALWARE_REPO", "PARSER_DIR", "PARSER_CONFIG_PATH", "YARA_REPO"] 26 | TESTING_FIELDS = ["TESTCASE_DIR", "MALWARE_REPO"] 27 | 28 | def __init__(self, **kwargs): 29 | super().__init__(**kwargs) 30 | # We are going to manually add the fields.json path because 31 | # the fields.json file is not currently designed to be modified. 32 | self["FIELDS_PATH"] = os.path.abspath(pkg_resources.resource_filename("mwcp.config", "fields.json")) 33 | 34 | def __repr__(self): 35 | return f"Config({super().__repr__()})" 36 | 37 | def clear(self): 38 | """Clears config (and re-adds FIELDS_PATH)""" 39 | super().clear() 40 | self.__init__() 41 | 42 | @property 43 | def user_config_dir(self) -> pathlib.Path: 44 | cfg_dir = self.USER_CONFIG_DIR 45 | cfg_dir.mkdir(parents=True, exist_ok=True) 46 | return cfg_dir 47 | 48 | @property 49 | def user_path(self) -> pathlib.Path: 50 | """Returns the path to the user config file.""" 51 | # Get user directory. 52 | cfg_dir = self.user_config_dir 53 | 54 | # Create a user copy if it doesn't exist. 55 | cfg_file_path = cfg_dir / self.CONFIG_FILE_NAME 56 | if not cfg_file_path.exists(): 57 | with pkg_resources.resource_stream("mwcp.config", self.CONFIG_FILE_NAME) as default_cfg: 58 | with open(cfg_file_path, "wb") as fp: 59 | fp.write(default_cfg.read()) 60 | 61 | # Also copy over log_config.yml 62 | log_config_path = cfg_dir / "log_config.yml" 63 | if not log_config_path.exists(): 64 | with pkg_resources.resource_stream("mwcp.config", "log_config.yml") as default_log_cfg: 65 | with open(log_config_path, "wb") as fp: 66 | fp.write(default_log_cfg.read()) 67 | 68 | return cfg_file_path 69 | 70 | @property 71 | def pytest_cache_dir(self) -> pathlib.Path: 72 | return self.user_config_dir / ".pytest_cache" 73 | 74 | def load(self, file_path=None, production=False): 75 | """ 76 | Loads configuration file. 77 | 78 | :param file_path: Path to configuration file. (defaults to `config.yml` in user config directory) 79 | :param production: Whether we are loading configuration for a production server. 80 | In this mode, the fields for testing (MALWARE_REPO, TESTCASE_DIR) are ignored. 81 | """ 82 | if not file_path: 83 | file_path = self.user_path 84 | 85 | # Convert str file_path to maintain backwards compatibility with previous function definition 86 | if isinstance(file_path, str): 87 | file_path = pathlib.Path(file_path) 88 | 89 | with open(file_path, "r") as fp: 90 | try: 91 | config = dict(yaml.load(fp)) 92 | except ScannerError as e: 93 | raise ConfigError(f"Error parsing config: {e}") 94 | 95 | # Remove testing fields if in production. 96 | # This lets us continue using the same configuration as in development without exposing testing parameters. 97 | if production: 98 | for key in self.TESTING_FIELDS: 99 | config.pop(key, None) 100 | 101 | # Convert file path into absolute paths. 102 | directory = str(file_path.parent) 103 | for key, value in config.items(): 104 | if key in self.PATH_FIELDS: 105 | value = os.path.expanduser(value) 106 | value = os.path.expandvars(value) 107 | value = os.path.join(directory, value) 108 | value = os.path.abspath(value) 109 | config[key] = value 110 | self.update(config) 111 | self.validate() 112 | 113 | def validate(self): 114 | """ 115 | Validates configuration. 116 | 117 | :raises ConfigError: If there is an issue with the configuration. 118 | """ 119 | for key, value in self.items(): 120 | if key in self.PATH_FIELDS: 121 | if not pathlib.Path(value).exists(): 122 | raise ConfigError(f"Invalid path for {key}: {value}") 123 | 124 | 125 | _config = Config() 126 | -------------------------------------------------------------------------------- /mwcp/tests/test_issues.py: -------------------------------------------------------------------------------- 1 | """Tests for found bugs/issues.""" 2 | 3 | import csv 4 | import io 5 | import sys 6 | 7 | from click.testing import CliRunner 8 | 9 | import mwcp 10 | from mwcp import cli, metadata 11 | 12 | 13 | def test_csv_row_bug_legacy(tmp_path, test_dir): 14 | """ 15 | Tests bug where first row is formatted different from other rows. 16 | Occurs when outputting csv and input file is a directory. 17 | """ 18 | runner = CliRunner(mix_stderr=False) 19 | 20 | with runner.isolated_filesystem(tmp_path): 21 | 22 | ret = runner.invoke(cli.main, [ 23 | "parse", "foo", 24 | "--format", "csv", str(test_dir / "*"), 25 | "--no-output-files", 26 | "--legacy", 27 | ]) 28 | print(ret.stdout) 29 | print(ret.stderr, file=sys.stderr) 30 | assert ret.exit_code == 0 31 | 32 | reader = csv.reader(io.StringIO(ret.stdout)) 33 | rows = list(reader) 34 | assert len(rows) == len(test_dir.listdir()) + 1 35 | assert rows[0] == ["scan_date", "inputfilename", "outputfile.name", 36 | "outputfile.description", "outputfile.md5", "address", "debug", "url"] 37 | for i, row in enumerate(rows[1:]): 38 | assert row[0] and row[1] 39 | # Test entries except the timestamp and full file path. 40 | # NOTE: order is not guaranteed due to glob pattern, therefore we are testing all but 41 | # the debug message which contains the input filename. 42 | assert row[2] == "fooconfigtest.txt" 43 | assert row[3] == "example output file" 44 | assert row[4] == "5eb63bbbe01eeed093cb22bb8f5acdc3" 45 | # TODO: Figure out how to guarantee file order. 46 | # assert row[2:] == [ 47 | # "fooconfigtest.txt", 48 | # "example output file", 49 | # "5eb63bbbe01eeed093cb22bb8f5acdc3", 50 | # "127.0.0.1", 51 | # ("[+] File test_{0}.txt identified as Foo.\n" 52 | # "[+] size of inputfile is 23 bytes\n" 53 | # "[+] operating on inputfile test_{0}.txt").format(i), 54 | # "http://127.0.0.1", 55 | # ] 56 | 57 | def test_missing_residual_file_with_UnableToParse(tmpdir, make_sample_parser): 58 | """ 59 | Tests bug where residual file isn't reported if a nested parser raises an UnableToParse error on it and 60 | no other parser picks it up. 61 | 62 | Also tests to ensure misidentified file's description gets reset. 63 | """ 64 | # language=Python 65 | CODE = """ 66 | from mwcp import FileObject, Parser, UnableToParse 67 | 68 | 69 | class Carrier(Parser): 70 | DESCRIPTION = "TestParser Carrier" 71 | 72 | @classmethod 73 | def identify(cls, file_object): 74 | return file_object.name == "carrier.txt" 75 | 76 | def run(self): 77 | self.logger.info("in Carrier parser") 78 | self.dispatcher.add(FileObject(b"I'm a downloader", file_name="downloader.txt")) 79 | 80 | 81 | class Downloader(Parser): 82 | DESCRIPTION = "TestParser Downloader" 83 | 84 | @classmethod 85 | def identify(cls, file_object): 86 | return file_object.name == "downloader.txt" 87 | 88 | def run(self): 89 | self.logger.info("in Downloader parser") 90 | self.dispatcher.add(FileObject(b"I'm a false implant", file_name="implant.txt")) 91 | self.dispatcher.add(FileObject(b"I'm something else that doesn't get identified.", file_name="other.txt")) 92 | 93 | 94 | class Implant(Parser): 95 | DESCRIPTION = "TestParser Implant" 96 | 97 | @classmethod 98 | def identify(cls, file_object): 99 | return file_object.name == "implant.txt" 100 | 101 | def run(self): 102 | self.logger.info("in Implant parser") 103 | raise UnableToParse("Oops, misidentified.") 104 | """ 105 | # language=Yaml 106 | CONFIG = """ 107 | RootParser: 108 | description: root parser 109 | parsers: 110 | - SubParser 111 | 112 | SubParser: 113 | description: sub parser 114 | parsers: 115 | - .Carrier 116 | - .Downloader 117 | - .Implant 118 | """ 119 | parser_path, config_file = make_sample_parser(parser_name="SubParser", parser_code=CODE, config_text=CONFIG) 120 | mwcp.register_parser_directory(str(parser_path.dirname), config_file_path=str(config_file), source_name="ACME") 121 | 122 | input_file = tmpdir / "carrier.txt" 123 | input_file.write_binary(b"I'm a carrier") 124 | output_directory = tmpdir / "output" 125 | output_directory.mkdir() 126 | 127 | report = mwcp.run("RootParser", file_path=str(input_file), output_directory=output_directory) 128 | print(report.as_text()) 129 | print(report.as_json()) 130 | 131 | residual_files = report.get(metadata.File) 132 | assert len(residual_files) == 3 133 | assert residual_files[1].name == "implant.txt" 134 | assert residual_files[1].description == "Unidentified file" 135 | assert residual_files[1].file_path == str(output_directory / "3e245_implant.txt") 136 | -------------------------------------------------------------------------------- /mwcp/parsers/VisualBasic.py: -------------------------------------------------------------------------------- 1 | """ 2 | Visual Basic 3 | """ 4 | 5 | import pathlib 6 | import string 7 | 8 | from mwcp import Parser, FileObject 9 | 10 | 11 | def istext(s, threshold=0.30): 12 | """ 13 | Check if input string s is ASCII text. 14 | www.safaribooksonline.com/library/view/python-cookbook-2nd/0596007973/ch01s12.html 15 | 16 | :param s: input string 17 | :param threshold: percentage of characters allowed to have the high bit set (0 - 1) 18 | 19 | :return: bool 20 | """ 21 | text_characters = string.printable.encode() 22 | null_trans = bytes.maketrans(b"", b"") 23 | if not s or b"\0" in s: 24 | return False 25 | 26 | # Get the substring of s made up of non-text characters 27 | t = s.translate(null_trans, text_characters) 28 | # s is 'text' if less than "threshold" of its characters are non-text 29 | return (len(t) / len(s)) <= threshold 30 | 31 | 32 | class VBScript(Parser): 33 | """ 34 | Identifies a VBS script. 35 | """ 36 | DESCRIPTION = "VBScript" 37 | 38 | VB_KEYWORDS = [b"dim ", b"sub ", b"end sub", b"end function", b"createobject("] 39 | 40 | @classmethod 41 | def identify(cls, file_object): 42 | """ 43 | Identify VB code based on the existence of specific VBS keywords. 44 | 45 | :param file_object: dispatcher.FileObject object 46 | 47 | :return: bool 48 | """ 49 | lower_cased = file_object.data.lower() 50 | return istext(lower_cased) and any(keyword in lower_cased for keyword in cls.VB_KEYWORDS) 51 | 52 | 53 | class VBE(Parser): 54 | """ 55 | Finds and extracts VBE encoded VBSscript from file. 56 | """ 57 | DESCRIPTION = "Encoded VBScript" 58 | 59 | START_TAG = b"#@~^" 60 | END_TAG = b"==^#~@" 61 | 62 | WHICH = "1231232332321323132311233213233211323231311231321323112331123132" 63 | 64 | @classmethod 65 | def identify(cls, file_object): 66 | """ 67 | Check file magic to validate file contains a VBE 68 | (not just checking first bytes because we could be an ASP file) 69 | 70 | :param dispatcher.FileObject file_object: Input file 71 | 72 | :return bool: If parameters are met 73 | """ 74 | return ( 75 | cls.START_TAG in file_object.data 76 | and cls.END_TAG in file_object.data 77 | # Start tag should be found somewhere in the beginning of file. 78 | # May not be immediately in the beginning if script is in an ASP. 79 | and file_object.data.index(cls.START_TAG) in range(60) 80 | ) 81 | 82 | def _generate_alphabet(self): 83 | alphabets = [chr(i) * 3 for i in range(128)] 84 | alphabets[32:128] = [ 85 | '.-2', 'Gu0', 'zR!', 'V`)', 'Bq[', 'j^8', '/I3', '&\\=', 'IbX', 'A}:', '4)5', '26e', 86 | '[ 9', 'v|\\', 'rzV', 'C\x7fs', '8kf', '9cN', 'p3E', 'E+k', 'hhb', 'qQY', 'Ofx', 87 | '\tv^', 'b1}', 'DdJ', '#Tm', 'uCq', '<<<', '~:`', '>>>', '^~S', '@@@', 'wEB', 'J,\'', 88 | 'a*H', ']tr', '"\'u', 'K71', 'oD7', 'NyM', ';YR', 'L/"', 'PoT', 'g&j', '*rG', '}jd', 89 | 't9-', 'T{ ', '+?\x7f', '-8.', ',wL', '0g]', 'nS~', 'kGl', 'f4o', '5xy', '%]t', '!0C', 90 | 'd#&', 'MZv', 'R[%', 'cl$', '?H+', '{U(', 'xp#', ')iA', '(.4', 'sL\t', 'Y!*', '3$D', 91 | '\x7fN?', 'mPw', 'U\t;', 'SVU', '|si', ':5a', '_ac', 'eKP', 'FXg', 'X;Q', '1WI', 92 | 'i"O', 'lmF', 'ZMh', 'H%|', '\'(6', '\\Fp', '=Jn', '$2z', 'yA/', '7=_', '`_K', 'QOZ', 93 | ' B,', '6eW' 94 | ] 95 | alphabets[9] = 'Wn{' 96 | return alphabets 97 | 98 | def decode_vbe(self) -> str: 99 | """ 100 | Decodes and returns embedded VBE script. 101 | """ 102 | data = self.file_object.data 103 | 104 | # Extract vbe code part. 105 | start = data.index(self.START_TAG) + len(self.START_TAG) + 8 106 | end = data.index(self.END_TAG) - 6 107 | enc_code = data[start:end].decode("utf-8") 108 | 109 | # Perform replacements. 110 | enc_code = enc_code.replace('@&', '\x0a') 111 | enc_code = enc_code.replace('@#', '\x0d') 112 | enc_code = enc_code.replace('@*', '>') 113 | enc_code = enc_code.replace('@!', '<') 114 | enc_code = enc_code.replace('@$', '@') 115 | 116 | # Create the replacement alphabets and decode the script 117 | dec_code = [] 118 | alphabets = self._generate_alphabet() 119 | for i, vbe_datum in enumerate(enc_code): 120 | vbe_datum_ord = ord(vbe_datum) 121 | if vbe_datum_ord < 128: 122 | dec_code.append(alphabets[vbe_datum_ord][int(self.WHICH[i % 64]) - 1]) 123 | else: 124 | dec_code.append(vbe_datum) 125 | dec_code = "".join(dec_code) 126 | 127 | return dec_code 128 | 129 | def run(self): 130 | vbe = self.decode_vbe() 131 | dec_data = vbe.encode("utf8") 132 | # Base filename off original if entire file is encoded piece. 133 | if self.file_object.data.startswith(self.START_TAG): 134 | stem = pathlib.Path(self.file_object.name).stem 135 | self.dispatcher.add(FileObject(dec_data, file_name=f"{stem}.vb")) 136 | else: 137 | self.dispatcher.add(FileObject(dec_data, ext=".vb")) 138 | 139 | 140 | class EncodedASP(VBE): 141 | """ 142 | Identifies ASP file with VBE. 143 | """ 144 | DESCRIPTION = "ASP with Encoded VBScript" 145 | 146 | START_TAG = b"<%" + VBE.START_TAG 147 | -------------------------------------------------------------------------------- /mwcp/utils/construct/windows_enums.py: -------------------------------------------------------------------------------- 1 | """ 2 | A central location to store common windows enumerations. 3 | This module will be imported along with 'from mwcp.utils import construct' 4 | """ 5 | 6 | from .core import * 7 | 8 | # Visible interface. Add the classes and functions you would like to be available for users of construct 9 | # library here. 10 | __all__ = ['RegHive', 'LanguageIdentifier', 'KnownFolderID', 'AlgorithmID'] 11 | 12 | 13 | REGHIVES = { 14 | "HKCR": 0x80000000, 15 | "HKCU": 0x80000001, 16 | "HKLM": 0x80000002, 17 | "HKU": 0x80000003, 18 | "HKPD": 0x80000004, 19 | "HKCC": 0x80000005, 20 | "HKDD": 0x80000006, 21 | } 22 | 23 | 24 | def RegHive(subcon): 25 | r""" 26 | Converts an integer to registry hive enum. 27 | 28 | >>> RegHive(Int32ul).build("HKCU") 29 | b'\x01\x00\x00\x80' 30 | >>> str(RegHive(Int32ul).parse(b'\x01\x00\x00\x80')) 31 | 'HKCU' 32 | """ 33 | return Enum(subcon, **REGHIVES) 34 | 35 | 36 | # TODO: Extend dictionary to incorporate more languages 37 | LANGUAGEIDENTIFIERS = { 38 | "English (United States)": 0x409, 39 | "Korean": 0x412, 40 | "Chinese (PRC)": 0x804, 41 | } 42 | 43 | 44 | def LanguageIdentifier(subcon): 45 | r""" 46 | Converts an integer to language identifer enum 47 | 48 | >>> LanguageIdentifier(Int32ul).build("English (United States)") 49 | b'\t\x04\x00\x00' 50 | >>> str(LanguageIdentifier(Int32ul).parse(b"\x04\x08\x00\x00")) 51 | 'Chinese (PRC)' 52 | """ 53 | return Enum(subcon, **LANGUAGEIDENTIFIERS) 54 | 55 | 56 | CSIDL = { 57 | 'CSIDL_SYSTEM': 37, 58 | 'CSIDL_COMMON_PROGRAMS': 23, 59 | 'CSIDL_PROFILE': 40, 60 | 'CSIDL_ALTSTARTUP': 29, 61 | 'CSIDL_LOCAL_APPDATA': 28, 62 | 'CSIDL_PRINTHOOD': 27, 63 | 'CSIDL_FONTS': 20, 64 | 'CSIDL_PROGRAM_FILES_COMMON': 43, 65 | 'CSIDL_PROGRAM_FILESX86': 42, 66 | 'CSIDL_MYDOCUMENTS': 5, 67 | 'CSIDL_MYVIDEO': 14, 68 | 'CSIDL_PROGRAM_FILES': 38, 69 | 'CSIDL_ADMINTOOLS': 48, 70 | 'CSIDL_COMMON_DOCUMENTS': 46, 71 | 'CSIDL_CONNECTIONS': 49, 72 | 'CSIDL_COMMON_ALTSTARTUP': 30, 73 | 'CSIDL_DRIVES': 17, 74 | 'CSIDL_RESOURCES_LOCALIZED': 57, 75 | 'CSIDL_HISTORY': 34, 76 | 'CSIDL_NETHOOD': 19, 77 | 'CSIDL_CDBURN_AREA': 59, 78 | 'CSIDL_COMMON_DESKTOPDIRECTORY': 25, 79 | 'CSIDL_SYSTEMX86': 41, 80 | 'CSIDL_COMMON_TEMPLATES': 45, 81 | 'CSIDL_MYPICTURES': 39, 82 | 'CSIDL_COMMON_VIDEO': 55, 83 | 'CSIDL_COMMON_STARTMENU': 22, 84 | 'CSIDL_COMMON_FAVORITES': 31, 85 | 'CSIDL_INTERNET_CACHE': 32, 86 | 'CSIDL_WINDOWS': 36, 87 | 'CSIDL_COMMON_PICTURES': 54, 88 | 'CSIDL_COMMON_APPDATA': 35, 89 | 'CSIDL_DESKTOPDIRECTORY': 16, 90 | 'CSIDL_RESOURCES': 56, 91 | 'CSIDL_COMMON_MUSIC': 53, 92 | 'CSIDL_COMMON_OEM_LINKS': 58, 93 | 'CSIDL_NETWORK': 18, 94 | 'CSIDL_COOKIES': 33, 95 | 'CSIDL_COMPUTERSNEARME': 61, 96 | 'CSIDL_COMMON_ADMINTOOLS': 47, 97 | 'CSIDL_APPDATA': 26, 98 | 'CSIDL_TEMPLATES': 21, 99 | 'CSIDL_COMMON_STARTUP': 24, 100 | 'CSIDL_MYMUSIC': 13, 101 | 'CSIDL_PROGRAM_FILES_COMMONX86': 44 102 | } 103 | 104 | 105 | def KnownFolderID(subcon): 106 | r""" 107 | Converts an integer to a CSIDL (KNownFolderID) value 108 | 109 | >>> KnownFolderID(Int32ul).build("CSIDL_SYSTEM") 110 | b'%\x00\x00\x00' 111 | >>> str(KnownFolderID(Int32ul).parse(b"\x18\x00\x00\x00")) 112 | 'CSIDL_COMMON_STARTUP' 113 | """ 114 | return Enum(subcon, **CSIDL) 115 | 116 | 117 | ALGIDS = { 118 | 'CALG_DSS_SIGN': 0x00002200, 119 | 'CALG_DES': 0x00006601, 120 | 'CALG_DH_EPHEM': 0x0000aa02, 121 | 'CALG_3DES': 0x00006603, 122 | 'CALG_DESX': 0x00006604, 123 | 'CALG_ECDH': 0x0000aa05, 124 | 'CALG_NO_SIGN': 0x00002000, 125 | 'CALG_DH_SF': 0x0000aa01, 126 | 'CALC_SSL3_SHAMD5': 0x00008008, 127 | 'CALG_3DES_112': 0x00006609, 128 | 'CALG_SKIPJACK': 0x0000660a, 129 | 'CALG_HASH_REPLACE_OWF': 0x0000800b, 130 | 'CALG_CYLINK_MEK': 0x0000660c, 131 | 'CALG_MD4': 0x00008002, 132 | 'CALG_AES_128': 0x0000660e, 133 | 'CALG_AES_192': 0x0000660f, 134 | 'CALG_AES_256': 0x00006610, 135 | 'CALG_AES': 0x00006611, 136 | 'CALG_AGREEDKEY_ANY': 0x0000aa03, 137 | 'CALG_SHA1': 0x00008004, 138 | 'CALG_MAC': 0x00008005, 139 | 'CALG_MD2': 0x00008001, 140 | 'CALG_TLS1_MASTER': 0x00004c06, 141 | 'CALG_RSA_SIGN': 0x00002400, 142 | 'CALG_SCHANNEL_ENC_KEY': 0x00004c07, 143 | 'CALG_HMAC': 0x00008009, 144 | 'CALG_TLS1PRF': 0x0000800a, 145 | 'CALG_TEK': 0x0000660b, 146 | 'CALG_SHA_256': 0x0000800c, 147 | 'CALG_SHA_384': 0x0000800d, 148 | 'CALG_SHA_512': 0x0000800e, 149 | 'CALG_HUGHES_MD5': 0x0000a003, 150 | 'CALG_RC4': 0x00006801, 151 | 'CALG_ECDSA': 0x00002203, 152 | 'CALG_RC2': 0x00006602, 153 | 'CALG_SEAL': 0x00006802, 154 | 'CALG_SSL3_MASTER': 0x00004c01, 155 | 'CALG_SCHANNEL_MASTER_HASH': 0x00004c02, 156 | 'CALG_MD5': 0x00008003, 157 | 'CALG_SCHANNEL_MAC_KEY': 0x00004c03, 158 | 'CALG_KEY_KEYX': 0x0000aa04, 159 | 'CALG_ECMQV': 0x0000a001, 160 | 'CALG_PCT1_MASTER': 0x00004c04, 161 | 'CALG_RSA_KEYX': 0x0000a400, 162 | 'CALG_OID_INFO_CNG_ONLY': 0xffffffff, 163 | 'CALG_SSL2_MASTER': 0x00004c05, 164 | 'CALG_OID_INFO_PARAMETERS': 0xfffffffe, 165 | } 166 | 167 | 168 | def AlgorithmID(subcon): 169 | r""" 170 | Converts an integer to an AlgorithmID value 171 | 172 | >>> str(AlgorithmID(Int16ul).parse(b"\x00\xa4")) 173 | 'CALG_RSA_KEYX' 174 | >>> AlgorithmID(Int16ul).build("CALG_RC4") 175 | b'\x01h' 176 | """ 177 | return Enum(subcon, **ALGIDS) -------------------------------------------------------------------------------- /mwcp/tests/test_legacy_reporter.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Tests the legacy features of mwcp.Reporter object. 4 | 5 | These features are now replaced by test_report.py and test_runner.py 6 | """ 7 | 8 | import os 9 | 10 | import pytest 11 | 12 | import mwcp 13 | 14 | 15 | @pytest.mark.parametrize('key,value,expected', [ 16 | ('filepath', br'C:\dir\file.txt', { 17 | 'filepath': [r'C:\dir\file.txt'], 18 | 'filename': ['file.txt'], 19 | 'directory': [r'C:\dir'] 20 | }), 21 | ('servicedll', br'C:\Windows\Temp\1.tmp', { 22 | 'servicedll': [r'C:\Windows\Temp\1.tmp'], 23 | 'filepath': [r'C:\Windows\Temp\1.tmp'], 24 | 'filename': ['1.tmp'], 25 | 'directory': [r'C:\Windows\Temp'] 26 | }), 27 | ('c2_url', b'http://[fe80::20c:1234:5678:9abc]:80/badness', { 28 | 'c2_url': ['http://[fe80::20c:1234:5678:9abc]:80/badness'], 29 | 'url': ['http://[fe80::20c:1234:5678:9abc]:80/badness'], 30 | 'urlpath': ['/badness'], 31 | 'c2_socketaddress': [['fe80::20c:1234:5678:9abc', '80', '']], 32 | 'socketaddress': [['fe80::20c:1234:5678:9abc', '80', '']], 33 | 'c2_address': ['fe80::20c:1234:5678:9abc'], 34 | 'address': ['fe80::20c:1234:5678:9abc'], 35 | 'port': [['80', '']] 36 | }), 37 | ('url', b'http://127.0.0.1/really/bad?hostname=pwned', { 38 | 'url': ['http://127.0.0.1/really/bad?hostname=pwned'], 39 | 'urlpath': ['/really/bad'], 40 | 'address': ['127.0.0.1'] 41 | }), 42 | ('proxy', (b'admin', b'pass', b'192.168.1.1', b'80', 'tcp'), { 43 | 'proxy': [['admin', 'pass', '192.168.1.1', '80', 'tcp']], 44 | 'proxy_socketaddress': [['192.168.1.1', '80', 'tcp']], 45 | 'socketaddress': [['192.168.1.1', '80', 'tcp']], 46 | 'proxy_address': ['192.168.1.1'], 47 | 'address': ['192.168.1.1'], 48 | 'port': [['80', 'tcp']], 49 | 'credential': [['admin', 'pass']], 50 | 'password': ['pass'], 51 | 'username': ['admin'] 52 | }), 53 | ('rsa_private_key', ('0x7', '0xbb', '0x17', '0x11', '0xb', '0x7', '0x3', '0xe'), { 54 | 'rsa_private_key': [['0x7', '0xbb', '0x17', '0x11', '0xb', '0x7', '0x3', '0xe']] 55 | }), 56 | # Test auto padding. 57 | ('rsa_private_key', ('0x7', '0xbb', '0x17', '0x11', '0xb'), { 58 | 'rsa_private_key': [['0x7', '0xbb', '0x17', '0x11', '0xb', '', '', '']] 59 | }), 60 | ('other', {b'foo': b'bar', 'biz': 'baz'}, { 61 | 'other': { 62 | 'foo': 'bar', 63 | 'biz': 'baz' 64 | } 65 | }) 66 | ]) 67 | def test_add_metadata(key, value, expected): 68 | report = mwcp.Report() 69 | with report: 70 | report.add_metadata(key, value) 71 | assert report.metadata == expected 72 | 73 | 74 | def test_other_add_metadata(): 75 | """Tests that adding multiple 'other' keys of same will convert to a list.""" 76 | report = mwcp.Report() 77 | with report: 78 | report.add_metadata('other', {b'foo': b'bar', 'biz': 'baz'}) 79 | assert report.metadata == {'other': {'foo': 'bar', 'biz': 'baz'}} 80 | report.add_metadata('other', {b'foo': b'boop'}) 81 | assert report.metadata == {'other': {'foo': ['bar', 'boop'], 'biz': 'baz'}} 82 | 83 | 84 | def test_output_file(tmpdir): 85 | test_file = tmpdir / '9c91e_foo.txt' 86 | report = mwcp.Report(output_directory=str(tmpdir)) 87 | with report: 88 | assert report.output_file(b'This is data!', 'foo.txt', description='A foo file') == str(test_file) 89 | 90 | assert test_file.exists() 91 | assert test_file.read_binary() == b'This is data!' 92 | assert report.metadata['outputfile'] == [ 93 | ['foo.txt', 'A foo file', '9c91e665b5b7ba5a3066c92dd02d3d7c'] 94 | ] 95 | 96 | # Add file with same name to test name collision code. 97 | test_file = tmpdir / '4d8cf_foo.txt' 98 | assert report.output_file(b'More data!', 'foo.txt', description='Another foo file') == str(test_file) 99 | 100 | assert test_file.exists() 101 | assert test_file.read_binary() == b'More data!' 102 | assert report.metadata['outputfile'] == [ 103 | ['foo.txt', 'A foo file', '9c91e665b5b7ba5a3066c92dd02d3d7c'], 104 | ['foo.txt', 'Another foo file', '4d8cfa4b19f5f971b0e6d79250cb1321'], 105 | ] 106 | 107 | # Test file sanitization 108 | test_file = tmpdir / '6f1ed_hello.txt' 109 | report = mwcp.Report(output_directory=str(tmpdir)) 110 | with report: 111 | assert report.output_file(b'blah', u'héllo!!\x08.txt') == str(test_file) 112 | 113 | assert test_file.exists() 114 | assert test_file.read_binary() == b'blah' 115 | assert report.metadata['outputfile'] == [ 116 | [u'héllo!!\x08.txt', '', '6f1ed002ab5595859014ebf0951522d9'] 117 | ] 118 | 119 | 120 | def test_print_report(datadir): 121 | """Tests the text report generation.""" 122 | report = mwcp.Report() 123 | with report: 124 | report.add_metadata('proxy', (b'admin', b'pass', b'192.168.1.1', b'80', 'tcp')) 125 | report.add_metadata('other', {b'foo': 'bar', 'biz': b'baz\x00\x01'}) 126 | report.output_file(b'data', 'file_1.exe', 'example output file') 127 | 128 | print(report.as_text()) 129 | assert report.as_text() == (datadir / "report.txt").read_text() 130 | 131 | 132 | # TODO: Deal with field ordering? 133 | # def test_standard_field_order(): 134 | # """Tests that STANDARD_FIELD_ORDER is updated to the field.json file.""" 135 | # with open(mwcp.config.get("FIELDS_PATH"), "rb") as f: 136 | # fields = json.load(f) 137 | # 138 | # ignore_fields = INFO_FIELD_ORDER + ["debug", "other", "outputfile"] 139 | # 140 | # assert sorted(STANDARD_FIELD_ORDER) == sorted(set(fields.keys()) - set(ignore_fields)) 141 | -------------------------------------------------------------------------------- /mwcp/parsers/Python.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parses Python artifacts 3 | """ 4 | 5 | import os 6 | from typing import Optional 7 | 8 | from construct import this 9 | 10 | from mwcp import FileObject, Parser 11 | from mwcp.metadata import Version 12 | from mwcp.utils import construct 13 | 14 | 15 | class PyInstaller(Parser): 16 | DESCRIPTION = "PyInstaller" 17 | 18 | TABLE_ENTRY = construct.Struct( 19 | "entry_size" / construct.Int32ub, 20 | "offset" / construct.Int32ub, 21 | "compressed_size" / construct.Int32ub, 22 | "final_size" / construct.Int32ub, 23 | "flag" / construct.Flag, 24 | "type" / construct.String(1), 25 | "name" / construct.String(this.entry_size - 18), 26 | "data" / construct.Pointer( 27 | this.offset, 28 | construct.IfThenElse( 29 | this.flag, 30 | construct.Compressed(construct.Bytes(this.compressed_size), lib="zlib"), 31 | construct.Bytes(this.compressed_size), 32 | ), 33 | ), 34 | ) 35 | 36 | @classmethod 37 | def identify(cls, file_object): 38 | """ 39 | Validate the MAGIC data is at the appropriate location and return the correct spec to use 40 | for parsing. 41 | """ 42 | magic = b'MEI\x0C\x0B\x0A\x0B\x0E' 43 | # pyinstaller 2.0 44 | if file_object.data[-24:-24 + len(magic)] == magic: 45 | cookie_spec = construct.Struct( 46 | "magic" / construct.Const(magic), 47 | "package_size" / construct.Int32ub, 48 | "toc_offset" / construct.Int32ub, 49 | "toc_entries" / construct.Int32ub, 50 | "python_version" / construct.Int32ub, 51 | ) 52 | return True, cookie_spec 53 | 54 | # pyinstaller 2.1+ 55 | elif file_object.data[-88:-88+len(magic)] == magic: 56 | cookie_spec = construct.Struct( 57 | "magic" / construct.Const(magic), 58 | "package_size" / construct.Int32ub, 59 | "toc_offset" / construct.Int32ub, 60 | "toc_entries" / construct.Int32ub, 61 | "python_version" / construct.Int32ub, 62 | "python_dll" / construct.String(64), 63 | ) 64 | return True, cookie_spec 65 | 66 | return False 67 | 68 | def extract_entry(self, entry, hdr: bytes) -> Optional[FileObject]: 69 | """ 70 | Extracts file data from table entry and returns it as a FileObject. 71 | """ 72 | if not entry.data: 73 | return 74 | 75 | name = entry.name 76 | data = entry.data 77 | 78 | if entry.type in ('s', 'm', 'M'): # python script/module/package 79 | if entry.type == 's' and entry.data[1:4] != b"\x00\x00\x00": # uncompiled python code 80 | name += ".py" 81 | else: 82 | # it is a marshalled code object 83 | # we need to add the pyc header to the data so it can be decompiled 84 | name += ".pyc" 85 | data = hdr + data 86 | 87 | # TODO: Create a PYZ parser for extracting out individually compressed components. 88 | # This will require determining a way to safely unmarshal data. 89 | # (PyInstaller/loader/pyimod01_archive.py) 90 | # case 'z': # zlib archive (pyz) 91 | # case 'n': # symbolic link 92 | # case 'b': # binary 93 | # case 'Z': # zlib (pyz) - frozen Python code (zipfile) 94 | # case 'x': # data 95 | # case 'l': # splash resource 96 | 97 | return FileObject(data, file_name=name) 98 | 99 | def run(self, cookie_spec: construct.Struct): 100 | """ 101 | Extract the cookie information in order to extract and parse the table of contents. Identify the .manifest 102 | filename in order to obtain the name of the target script to add to the dispatcher. 103 | """ 104 | cookie_size = cookie_spec.sizeof() 105 | 106 | cookie = cookie_spec.parse(self.file_object.data[-cookie_size:]) 107 | package = self.file_object.data[-cookie.package_size: -cookie_size] 108 | package_spec = construct.Struct( 109 | construct.Padding(cookie.toc_offset), 110 | "toc" / self.TABLE_ENTRY[:], 111 | ) 112 | info = package_spec.parse(package) 113 | 114 | python_version = str(cookie.python_version)[0] + "." + str(cookie.python_version)[1:] 115 | self.report.add(Version(python_version).add_tag("Python")) 116 | 117 | # Extract files base on .manifest files. 118 | pyz = None 119 | target_names = [] 120 | for entry in info.toc: 121 | if entry.name == "PYZ-00.pyz": 122 | pyz = entry 123 | elif entry.name.endswith(".manifest"): 124 | target_names.append(os.path.splitext(entry.name)[0].replace(".exe", '')) 125 | 126 | # Determine header for pyc files. 127 | if pyz: 128 | hdr = pyz.data[4:8] + b'\x00' * 12 129 | elif cookie.python_version >= 37: # PEP 552 -- Deterministic pycs 130 | hdr = b"\x42\x0d\x0d\x0a" + b'\0' * 12 # Bitfield, Timestamp, size 131 | elif cookie.python_version >= 33: 132 | hdr = b"\x42\x0d\x0d\x0a" + b'\0' * 8 # (Timestamp + size) 133 | else: 134 | hdr = b"\x03\xF3\x0D\x0A" + b'\0' * 4 # Timestamp 135 | 136 | # If we had a .manifest, only extract those files. 137 | if target_names: 138 | for entry in info.toc: 139 | if entry.name in target_names or entry.data.startswith(b"PYZ\x00"): 140 | if file := self.extract_entry(entry, hdr): 141 | self.dispatcher.add(file) 142 | else: 143 | for entry in info.toc: 144 | if file := self.extract_entry(entry, hdr): 145 | self.dispatcher.add(file) 146 | -------------------------------------------------------------------------------- /mwcp/config/fields.txt: -------------------------------------------------------------------------------- 1 | 2 | One of the primary goals of DC3-MWCP is to standardize malware configuration 3 | parser output. To this end, DC3-MWCP enforces a set of predefined fields for 4 | parser output. These fields, including their descriptions and examples can be 5 | viewed by using the -k option of the mwcp.py utility or by viewing 6 | fields.json. Below is an example of parser output using these definitions. 7 | 8 | To ensure data portability, values are encoded as strings. Also, since many 9 | items can have multiple values, most items are specified as lists. Lists are 10 | used to implement tuples which consist of atomic values which are necessarily, 11 | connected. For example, a credential consists of a username and password. In 12 | these tuples, order must be maintained. 13 | 14 | It will be noted that some values are duplicated. For example, the address in a 15 | socketaddress is also stored alone as an address. The framework performs this 16 | duplication to ensure that the data can be used flexibly. This duplication 17 | is allowed to ensure all cases are covered accurately. For example, a password 18 | may be used in malware without an associated or explicit username. In other 19 | cases there may be multiple credentials such that it is necessary to have the 20 | usernames associated with their respective passwords. In cases where all the 21 | data for a composite data type is not known, and the tuple provides no 22 | meaningful association of data, typically the less specific type is used. If an 23 | incomplete tuple provides a meaningful connection in multiple data items, then 24 | it should be used. Ex. in the case of a password used without a username, just 25 | the password field would be populated. However, in cases where the servicename 26 | and servicedisplayname are known, they would be combined into an incomplete 27 | tuple to maintain the link between these two items. When possible, the most 28 | comprehensive type is used. If not all values for a tuple are known, the empty 29 | string, "", is used. 30 | 31 | The standardized fields used here seek to encompass the most common 32 | malware configuration items. The goal of this field set is to facilitate 33 | generation of metadata that is comparable between different backdoors. These 34 | fields are necessarily abstract and backdoor parser authors should seek to 35 | follow the descriptions as well as possible. When in doubt, the data should be 36 | included in the standardized fields if it is of the correct data type. For 37 | example, if a domain is used for a domain lookup based port calculation, this 38 | address should be included in the address field. Due to the heavy focus on 39 | malware parameters that are typically mitigated, the c2_address, c2_url, 40 | and c2_socketadress fields are included. These are duplicative of their 41 | respective general counterparts. It is recognized that these special c2 tagged 42 | items warrant special attention as they are used frequently as mitigation 43 | candidates. A c2_address is an address that known to be is used for command and 44 | control. It will be repeated as an address. It should be clear, however, that 45 | malware configuration does not become intelligence or even a collection of 46 | actionable indicators without vetting. 47 | 48 | While the abstraction provided by these fields helps make the configuration of 49 | different backdoors more easily comparable, there is no intention to dumb down 50 | the parser output to fully remove backdoor specific context. The point of the 51 | "other" field is to contain backdoor specific key value pairs. These keys are 52 | arbitrary to permit flexibility in describing the peculiarities of individual 53 | malware families. It is through use of these "other" fields that an analyst can 54 | determine how a specific abstract item is used. For example, this allows one to 55 | determine if an address is used for a proxy, for a 56 | connectivity check, for a lookup based port calculation, etc. Hence, it is 57 | common for much or all of the data in the standardized fields to be duplicated 58 | in the "other" field. 59 | 60 | 61 | Example config parser output: 62 | 63 | { 64 | "address": [ 65 | "10.1.1.1", 66 | "192.168.1.1" 67 | ], 68 | "c2_address": [ 69 | "10.1.1.1", 70 | "192.168.1.1" 71 | ], 72 | "debug": [ 73 | "Config Offset: 0x5000", 74 | "Tertiary C2 not found" 75 | ], 76 | "interval": [ 77 | "30" 78 | ], 79 | "missionid": [ 80 | "orgA201502" 81 | ], 82 | "mutex": [ 83 | "ghurlrat94839d" 84 | ], 85 | "other": { 86 | "C2 password": "MMMMchicken8#@", 87 | "Campaign Marker": "orgA201502", 88 | "Enable Keylogger": "True", 89 | "Primary C2": "10.1.1.1|443", 90 | "Secondary C2": "192.168.1.1|443", 91 | "Sleep Timer": "30" 92 | }, 93 | "password": [ 94 | "MMMMchicken8#@" 95 | ], 96 | "port": [ 97 | [ 98 | "443", 99 | "tcp" 100 | ] 101 | ], 102 | "socketaddress": [ 103 | [ 104 | "10.1.1.1", 105 | "443", 106 | "tcp" 107 | ], 108 | [ 109 | "192.168.1.1", 110 | "443", 111 | "tcp" 112 | ], 113 | [ 114 | "192.168.1.1", 115 | "80", 116 | "tcp" 117 | ] 118 | ], 119 | "c2_socketaddress": [ 120 | [ 121 | "10.1.1.1", 122 | "443", 123 | "tcp" 124 | ], 125 | [ 126 | "192.168.1.1", 127 | "443", 128 | "tcp" 129 | ] 130 | ], 131 | "proxy": [ 132 | [ 133 | "admin", 134 | "pass", 135 | "192.168.1.1", 136 | "80", 137 | "tcp" 138 | ] 139 | ], 140 | "proxy_socketaddress": [ 141 | [ 142 | "192.168.1.1", 143 | "80", 144 | "tcp" 145 | ] 146 | ], 147 | "email_address": [ 148 | "user@bad.com" 149 | ] 150 | } 151 | -------------------------------------------------------------------------------- /mwcp/utils/construct/windows_constants.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # IMAGE_SECTION_HEADER.Characteristics 4 | IMAGE_SCN_TYPE_NO_PAD = 'IMAGE_SCN_TYPE_NO_PAD' 5 | IMAGE_SCN_CNT_CODE = 'IMAGE_SCN_CNT_CODE' 6 | IMAGE_SCN_CNT_INITIALIZED_DATA = 'IMAGE_SCN_CNT_INITIALIZED_DATA' 7 | IMAGE_SCN_CNT_UNINITIALIZED_DATA = 'IMAGE_SCN_CNT_UNINITIALIZED_DATA' 8 | IMAGE_SCN_LNK_OTHER = 'IMAGE_SCN_LNK_OTHER' 9 | IMAGE_SCN_LNK_INFO = 'IMAGE_SCN_LNK_INFO' 10 | IMAGE_SCN_LNK_REMOVE = 'IMAGE_SCN_LNK_REMOVE' 11 | IMAGE_SCN_LNK_COMDAT = 'IMAGE_SCN_LNK_COMDAT' 12 | IMAGE_SCN_NO_DEFER_SPEC_EXC = 'IMAGE_SCN_NO_DEFER_SPEC_EXC' 13 | IMAGE_SCN_GPREL = 'IMAGE_SCN_GPREL' 14 | IMAGE_SCN_MEM_PURGEABLE = 'IMAGE_SCN_MEM_PURGEABLE' 15 | IMAGE_SCN_MEM_LOCKED = 'IMAGE_SCN_MEM_LOCKED' 16 | IMAGE_SCN_MEM_PRELOAD = 'IMAGE_SCN_MEM_PRELOAD' 17 | IMAGE_SCN_ALIGN_1BYTES = 'IMAGE_SCN_ALIGN_1BYTES' 18 | IMAGE_SCN_ALIGN_2BYTES = 'IMAGE_SCN_ALIGN_2BYTES' 19 | IMAGE_SCN_ALIGN_4BYTES = 'IMAGE_SCN_ALIGN_4BYTES' 20 | IMAGE_SCN_ALIGN_8BYTES = 'IMAGE_SCN_ALIGN_8BYTES' 21 | IMAGE_SCN_ALIGN_16BYTES = 'IMAGE_SCN_ALIGN_16BYTES' 22 | IMAGE_SCN_ALIGN_32BYTES = 'IMAGE_SCN_ALIGN_32BYTES' 23 | IMAGE_SCN_ALIGN_64BYTES = 'IMAGE_SCN_ALIGN_64BYTES' 24 | IMAGE_SCN_ALIGN_128BYTES = 'IMAGE_SCN_ALIGN_128BYTES' 25 | IMAGE_SCN_ALIGN_256BYTES = 'IMAGE_SCN_ALIGN_256BYTES' 26 | IMAGE_SCN_ALIGN_512BYTES = 'IMAGE_SCN_ALIGN_512BYTES' 27 | IMAGE_SCN_ALIGN_1024BYTES = 'IMAGE_SCN_ALIGN_1024BYTES' 28 | IMAGE_SCN_ALIGN_2048BYTES = 'IMAGE_SCN_ALIGN_2048BYTES' 29 | IMAGE_SCN_ALIGN_4096BYTES = 'IMAGE_SCN_ALIGN_4096BYTES' 30 | IMAGE_SCN_ALIGN_8192BYTES = 'IMAGE_SCN_ALIGN_8192BYTES' 31 | IMAGE_SCN_LNK_NRELOC_OVFL = 'IMAGE_SCN_LNK_NRELOC_OVFL' 32 | IMAGE_SCN_MEM_DISCARDABLE = 'IMAGE_SCN_MEM_DISCARDABLE' 33 | IMAGE_SCN_MEM_NOT_CACHED = 'IMAGE_SCN_MEM_NOT_CACHED' 34 | IMAGE_SCN_MEM_NOT_PAGED = 'IMAGE_SCN_MEM_NOT_PAGED' 35 | IMAGE_SCN_MEM_SHARED = 'IMAGE_SCN_MEM_SHARED' 36 | IMAGE_SCN_MEM_EXECUTE = 'IMAGE_SCN_MEM_EXECUTE' 37 | IMAGE_SCN_MEM_READ = 'IMAGE_SCN_MEM_READ' 38 | IMAGE_SCN_MEM_WRITE = 'IMAGE_SCN_MEM_WRITE' 39 | 40 | # IMAGE_OPTIONAL_HEADER.Magic 41 | IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b 42 | IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b 43 | IMAGE_ROM_OPTIONAL_HDR_MAGIC = 0x107 44 | 45 | # IMAGE_OPTIONAL_HEADER.Subsystem 46 | IMAGE_SUBSYSTEM_UNKNOWN = 0 47 | IMAGE_SUBSYSTEM_NATIVE = 1 48 | IMAGE_SUBSYSTEM_WINDOWS_GUI = 2 49 | IMAGE_SUBSYSTEM_WINDOWS_CUI = 3 50 | IMAGE_SUBSYSTEM_OS2_CUI = 5 51 | IMAGE_SUBSYSTEM_POSIX_CUI = 7 52 | IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9 53 | IMAGE_SUBSYSTEM_EFI_APPLICATION = 10 54 | IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11 55 | IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12 56 | IMAGE_SUBSYSTEM_EFI_ROM = 13 57 | IMAGE_SUBSYSTEM_XBOX = 14 58 | IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16 59 | 60 | 61 | # Make default DataDirectory and standard indexes available for convenience. 62 | # WARNING: Make sure you make a copy of DEFAULT_DATA_DIRECTORIES!! 63 | DATA_DIR_INDEX_EXPORTS = 0 64 | DATA_DIR_INDEX_IMPORTS = 1 65 | DATA_DIR_INDEX_RESOURCE = 2 66 | DATA_DIR_INDEX_EXCEPTION = 3 67 | DATA_DIR_INDEX_CERTIFICATE = 4 68 | DATA_DIR_INDEX_BASE_RELOC = 5 69 | DATA_DIR_INDEX_DEBUG = 6 70 | DATA_DIR_INDEX_ARCHITECTURE = 7 71 | DATA_DIR_INDEX_GLOBAL_PTR = 8 72 | DATA_DIR_INDEX_TLS = 9 73 | DATA_DIR_INDEX_LOAD_CONFIG = 10 74 | DATA_DIR_INDEX_BOUND_IMPORT = 11 75 | DATA_DIR_INDEX_IMPORT_ADDRESS = 12 76 | DATA_DIR_INDEX_DELAY_IMPORT_DESCRIPTOR = 13 77 | DATA_DIR_INDEX_CLR_HEADER = 14 78 | DEFAULT_DATA_DIRECTORIES = [dict(VirtualAddress=0, Size=0)] * 16 79 | 80 | # IMAGE_OPTIONAL_HEADER.DllCharacteristics 81 | IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE = 'IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE' 82 | IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY = 'IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY' 83 | IMAGE_DLLCHARACTERISTICS_NX_COMPAT = 'IMAGE_DLLCHARACTERISTICS_NX_COMPAT' 84 | IMAGE_DLLCHARACTERISTICS_NO_ISOLATION = 'IMAGE_DLLCHARACTERISTICS_NO_ISOLATION' 85 | IMAGE_DLLCHARACTERISTICS_NO_SEH = 'IMAGE_DLLCHARACTERISTICS_NO_SEH' 86 | IMAGE_DLLCHARACTERISTICS_NO_BIND = 'IMAGE_DLLCHARACTERISTICS_NO_BIND' 87 | IMAGE_DLLCHARACTERISTICS_WDM_DRIVER = 'IMAGE_DLLCHARACTERISTICS_WDM_DRIVER' 88 | IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE = 'IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE' 89 | 90 | # IMAGE_FILE_HEADER.Machine 91 | IMAGE_FILE_MACHINE_UNKNOWN = 0x0 92 | IMAGE_FILE_MACHINE_AM33 = 0x1d3 93 | IMAGE_FILE_MACHINE_AMD64 = 0x8664 94 | IMAGE_FILE_MACHINE_ARM = 0x1c0 95 | IMAGE_FILE_MACHINE_ARM64 = 0xaa64 96 | IMAGE_FILE_MACHINE_ARMNT = 0x1c4 97 | IMAGE_FILE_MACHINE_EBC = 0xebc 98 | IMAGE_FILE_MACHINE_I386 = 0x14c 99 | IMAGE_FILE_MACHINE_IA64 = 0x200 100 | IMAGE_FILE_MACHINE_M32R = 0x9041 101 | IMAGE_FILE_MACHINE_MIPS16 = 0x266 102 | IMAGE_FILE_MACHINE_MIPSFPU = 0x366 103 | IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466 104 | IMAGE_FILE_MACHINE_POWERPC = 0x1f0 105 | IMAGE_FILE_MACHINE_POWERPCFP = 0x1f1 106 | IMAGE_FILE_MACHINE_R4000 = 0x166 107 | IMAGE_FILE_MACHINE_RISCV32 = 0x5032 108 | IMAGE_FILE_MACHINE_RISCV64 = 0x5064 109 | IMAGE_FILE_MACHINE_RISCV128 = 0x5128 110 | IMAGE_FILE_MACHINE_SH3 = 0x1a2 111 | IMAGE_FILE_MACHINE_SH3DSP = 0x1a3 112 | IMAGE_FILE_MACHINE_SH4 = 0x1a6 113 | IMAGE_FILE_MACHINE_SH5 = 0x1a8 114 | IMAGE_FILE_MACHINE_THUMB = 0x1c2 115 | IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169 116 | 117 | 118 | # IMAGE_FILE_HEADER characterstics. 119 | IMAGE_FILE_RELOCS_STRIPPED = 'IMAGE_FILE_RELOCS_STRIPPED' 120 | IMAGE_FILE_EXECUTABLE_IMAGE = 'IMAGE_FILE_EXECUTABLE_IMAGE' 121 | IMAGE_FILE_LINE_NUMS_STRIPPED = 'IMAGE_FILE_LINE_NUMS_STRIPPED' 122 | IMAGE_FILE_LOCAL_SYMS_STRIPPED = 'IMAGE_FILE_LOCAL_SYMS_STRIPPED' 123 | IMAGE_FILE_AGGRESIVE_WS_TRIM = 'IMAGE_FILE_AGGRESIVE_WS_TRIM' 124 | IMAGE_FILE_LARGE_ADDRESS_AWARE = 'IMAGE_FILE_LARGE_ADDRESS_AWARE' 125 | IMAGE_FILE_BYTES_REVERSED_LO = 'IMAGE_FILE_BYTES_REVERSED_LO' 126 | IMAGE_FILE_32BIT_MACHINE = 'IMAGE_FILE_32BIT_MACHINE' 127 | IMAGE_FILE_DEBUG_STRIPPED = 'IMAGE_FILE_DEBUG_STRIPPED' 128 | IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP' 129 | IMAGE_FILE_NET_RUN_FROM_SWAP = 'IMAGE_FILE_NET_RUN_FROM_SWAP' 130 | IMAGE_FILE_SYSTEM = 'IMAGE_FILE_SYSTEM' 131 | IMAGE_FILE_DLL = 'IMAGE_FILE_DLL' 132 | IMAGE_FILE_UP_SYSTEM_ONLY = 'IMAGE_FILE_UP_SYSTEM_ONLY' 133 | IMAGE_FILE_BYTES_REVERSED_HI = 'IMAGE_FILE_BYTES_REVERSED_HI' -------------------------------------------------------------------------------- /mwcp/utils/construct/construct_template.html: -------------------------------------------------------------------------------- 1 | {# 2 | This is the html template used to convert parsed constructs into user-friendly html 3 | that can be used in reports. 4 | 5 | Please see construct_html.py for its use. 6 | #} 7 | 8 | 9 | 10 | 11 | 12 | 80 | 81 | 82 |
83 | 84 | 85 |

86 | 87 |  offset | 88 | {%- for i in range(width) -%} 89 |  {% if i < 16 %} {% endif %}{{'%x'|format(i)}} 90 | {%- endfor -%} 91 |  |  92 | {%- for _ in range(width) -%} 93 | {{'%x'|format(loop.cycle(*range(16)))}} {#- We only have enough space for the first digit -#} 94 | {%- endfor -%} 95 |
96 |  ------ | {{'-- ' * width}} | {{'-' * width}} 97 | {% for offset, hex_line, ascii_line in hex_dump %} 98 |
 {{offset}} | {{hex_line}} | {{ascii_line}} 99 | {% endfor %} 100 |
101 |

102 | 103 | 104 |

 

105 | 107 | 108 | 113 | 118 | 123 | 124 | 125 | {% for offset, (colors, member) in color_map.items()|sort %} 126 | 127 | {# Offset #} 128 | 133 | 134 | {# Name #} 135 | 142 | 143 | {# Value #} 144 | 151 | 152 | {% endfor %} 153 | 154 |
110 |

Offset

112 |
115 |

Name

117 |
120 |

Value

122 |
129 |

130 | {{'%06x' % offset}} 131 |

132 |
136 |

137 | 138 | {{member.name}} 139 | 140 |

141 |
145 |

146 | 147 | {{member.value_str|replace('\n', '
')|replace('\t', '  ')}} 148 |
149 |

150 |
155 |
156 | 157 | -------------------------------------------------------------------------------- /mwcp/stix/report_writer.py: -------------------------------------------------------------------------------- 1 | """ 2 | This serves as the STIX Report Writer. This expands on the same report every time write is called. 3 | A STIX package is generated and returned as a string when serialize is called 4 | """ 5 | 6 | from stix2 import v21 as stix 7 | from stix2.v21 import _Observable 8 | 9 | import mwcp 10 | from mwcp import metadata 11 | from mwcp.report_writers import ReportWriter 12 | 13 | 14 | class STIXWriter(ReportWriter): 15 | """ 16 | Used to create a STIX Bundle that represents one or more MWCP Reports. 17 | Write must be called by each report that should be included in the final result. 18 | Serialize is called once this process is completed to return the STIX Bundle as a string. 19 | """ 20 | def __init__(self, fixed_timestamp: str = None): 21 | # used to ensure we deduplicate objects prior to loading them into the bundle 22 | self._all_objects = {} 23 | # applies a fixed timestamp to all SDOs and SROs for their created and updated times 24 | self.fixed_timestamp = fixed_timestamp 25 | 26 | def write(self, report: metadata.Report): 27 | linked_ids = set() 28 | analysis_data = { 29 | "product": "mwcp", 30 | "version": mwcp.__version__, 31 | "result_name": report.parser, 32 | "allow_custom": True, 33 | "created": self.fixed_timestamp, 34 | "modified": self.fixed_timestamp 35 | } 36 | 37 | note_content = ["Description: " + str(report.input_file.description)] 38 | 39 | # we need to turn the FileObj into a metadata.File to fetch STIX content 40 | file_result = report.input_file.as_stix(None, self.fixed_timestamp) 41 | 42 | for item in file_result.linked_stix: 43 | self._add_stix_object(item) 44 | 45 | for item in file_result.unlinked_stix: 46 | self._add_stix_object(item) 47 | 48 | # the file should always be the first STIX object written 49 | base_file = file_result.linked_stix[0] 50 | 51 | analysis_data["sample_ref"] = base_file.id 52 | 53 | if file_result.note_content: 54 | note_content.append(file_result.note_content) 55 | 56 | for element in report.metadata: 57 | result = element.as_stix(base_file, self.fixed_timestamp) 58 | 59 | # Content is loaded to the master note for the File 60 | if result.note_content: 61 | note_content.append(result.note_content) 62 | 63 | # Linked items will be added the result set for the Malware Analysis 64 | for item in result.linked_stix: 65 | linked_ids.add(item.id) 66 | self._add_stix_object(item) 67 | 68 | # Unlinked items are added to the final result, but are not linked within the Malware Analysis. 69 | # Links should happen via relationships or embedded STIX relationships within the objects 70 | for item in result.unlinked_stix: 71 | self._add_stix_object(item) 72 | 73 | # make a single large Note for all Other data which was collected and not otherwise applied 74 | if len(note_content) > 0: 75 | note_params = { 76 | "content": "\n".join(note_content), 77 | "object_refs": [base_file.id], 78 | "created": self.fixed_timestamp, 79 | "modified": self.fixed_timestamp, 80 | "allow_custom": True 81 | } 82 | 83 | if len(file_result.note_labels) > 0: 84 | file_result.note_labels.sort() 85 | note_params["labels"] = file_result.note_labels 86 | 87 | note = stix.Note(**note_params) 88 | self._add_stix_object(note) 89 | 90 | # the malware analysis must be made last since we need the IDs for everything that came out of it 91 | if len(linked_ids) > 0: 92 | refs = list(linked_ids) 93 | refs.sort() 94 | analysis_data["analysis_sco_refs"] = refs 95 | else: 96 | analysis_data["result"] = "unknown" 97 | 98 | if report.tags: 99 | tags = list(report.tags) 100 | tags.sort() 101 | analysis_data["labels"] = tags 102 | 103 | malware_analysis = stix.MalwareAnalysis(**analysis_data) 104 | self._add_stix_object(malware_analysis) 105 | 106 | def serialize(self) -> str: 107 | # Consolidate Notes down to avoid needless duplication 108 | note_lookup = {} 109 | to_remove = [] 110 | for idx, item in self._all_objects.items(): 111 | if item.type == "note": 112 | if hasattr(item, "abstract"): 113 | key = item.abstract + item.content 114 | else: 115 | key = item.content 116 | 117 | if hasattr(item, "labels"): 118 | key += " / ".join(item.labels) 119 | 120 | if key in note_lookup: 121 | existing = note_lookup[key] 122 | for ref in item.object_refs: 123 | if ref not in existing.object_refs: 124 | existing.object_refs.append(ref) 125 | to_remove.append(idx) 126 | else: 127 | note_lookup[key] = item 128 | 129 | # remove the duplicate notes 130 | # done outside of the initial loop to avoid messing with for 131 | for idx in to_remove: 132 | self._all_objects.pop(idx) 133 | 134 | values = self._all_objects.values() 135 | if len(values) > 0: 136 | package = stix.Bundle(objects=values, allow_custom=True) 137 | else: 138 | package = stix.Bundle() 139 | 140 | return package.serialize(indent=4) 141 | 142 | def _add_stix_object(self, stix_object: _Observable): 143 | """ 144 | Adds a STIX object to the all objects dictionary and replaces the existing element if the new version has more details 145 | """ 146 | if stix_object.id in self._all_objects: 147 | if len(stix_object.serialize()) > len(self._all_objects[stix_object.id].serialize()): 148 | self._all_objects[stix_object.id] = stix_object 149 | else: 150 | self._all_objects[stix_object.id] = stix_object 151 | -------------------------------------------------------------------------------- /mwcp/utils/construct/ARM.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper constructs for parsing the ARM instruction set. 3 | This module will be imported along with 'from mwcp.utils import construct' 4 | and accessible from the submodule "ARM". (e.g. construct.ARM.LDR) 5 | """ 6 | 7 | from . import core as construct 8 | from .core import this 9 | 10 | from . import helpers 11 | from mwcp.utils import elffileutils 12 | 13 | 14 | def _ByteSwapped(subcon, **ctx): 15 | r""" 16 | MODIFIED version of ByteSwapped that allows providing a context. 17 | Swap the byte order within boundaries of the given subcon. 18 | 19 | :param subcon: the subcon on top of byte swapped bytes 20 | :param **ctx: Context passed to subcon.sizeof() 21 | 22 | Example:: 23 | 24 | Int24ul <--> ByteSwapped(Int24ub) 25 | """ 26 | size = subcon.sizeof(**ctx) 27 | return construct.Transformed(subcon, construct.swapbytes, size, construct.swapbytes, size) 28 | 29 | 30 | # Single Data Transfer (LDR, STR) 31 | _ldr_str_inst = construct.BitStruct( 32 | 'cond' / construct.Nibble, 33 | construct.Const(1, construct.BitsInteger(2)), # must be '01' 34 | 'reg_imm_offset' / construct.Bit, # 0 = immediate offset, 1 = register offset 35 | 'pre_post_indexing' / construct.Bit, # 0 = post, 1 = pre 36 | 'up_down' / construct.Bit, # 0 = down, 1 = up 37 | 'byte_word' / construct.Bit, # 0 = word, 1 = byte 38 | 'write_back' / construct.Flag, 39 | 'load_store' / construct.Bit, # 0 = store, 1 = load 40 | 'base_register' / construct.Nibble, 41 | 'src_dest_register' / construct.Nibble, 42 | 'offset' / construct.IfThenElse( 43 | this.reg_imm_offset, 44 | construct.Octet >> construct.Nibble, # shift applied to Rm >> Rm 45 | construct.BitsInteger(12) 46 | ) 47 | ) 48 | 49 | LDR = construct.ExprValidator(_ByteSwapped(_ldr_str_inst, reg_imm_offset=0), this.load_store == 1) 50 | 51 | 52 | # Data Processing 53 | _data_proc_inst = construct.BitStruct( 54 | 'cond' / construct.Nibble, 55 | construct.Const(0, construct.BitsInteger(2)), # must be '00' 56 | 'reg_imm_operand' / construct.Bit, # 0 = immediate, 1 = register 57 | 'opcode' / construct.Enum( 58 | construct.Nibble, 59 | AND=0x0, EOR=0x1, SUB=0x2, RSB=0x3, ADD=0x4, ADC=0x5, SBC=0x6, RSC=0x7, 60 | TST=0x8, TEQ=0x9, CMP=0xA, CMN=0xB, ORR=0xC, MOV=0xD, BIC=0xE, MVN=0xF, 61 | ), 62 | 'set_cond' / construct.Flag, 63 | 'operand_1_reg' / construct.Nibble, 64 | 'dest_reg' / construct.Nibble, 65 | 'operand_2' / construct.IfThenElse( 66 | this.reg_imm_operand, 67 | construct.Octet >> construct.Nibble, # shift applied to Rm >> Rm 68 | construct.Nibble >> construct.Octet, # rotate applied to Imm >> Imm 69 | ), 70 | ) 71 | # TODO: Finish adding support for analyzing data processing instructions. 72 | # (shifting/rotating will need to applied to the second operand) 73 | 74 | 75 | def ELFPointer(inst, inst_end, subcon, elf=None): 76 | r""" 77 | This is the ARM version of ELFPointer. 78 | This subconstruct takes two arguments which 79 | specify the parsed ARM instruction containing an immediate offset in its second operand 80 | and the end offset (physical) for said instruction. 81 | 82 | The following ARM instructions are currently supported: 83 | - LDR 84 | 85 | Example: for the instruction "LDR R1, =data_offset" 86 | spec = Struct( 87 | 'inst' / ARM.LDR, 88 | 'inst_end' / Tell, 89 | 'data' / ARM.ELFPointer(this.inst, this.inst_end, Bytes(100)) 90 | ) 91 | 92 | spec = Struct( 93 | 're' / Regex( 94 | '\x01\x03(?P.{4})(?P)\x06\x07', data_ldr_inst=ARM.LDR, end=Tell), 95 | 'data' / ARM.ELFPointer(this.re.data_ldr_inst, this.re.end, Bytes(100)) 96 | ) 97 | 98 | spec.parse(file_data, elf=elf_object) 99 | 100 | :param inst: a construct.Container or function that represents the assembly instruction 101 | :param inst_end: an int or a function that represents the location of the end of the instruction. 102 | :param subcon: the subcon to use at the offset 103 | :param elf: Optional elftools.ELFFile file object. 104 | (if not supplied here, this must be supplied during parse()/build() 105 | """ 106 | def _obtain_literal_pool_mem_offset(ctx): 107 | """Obtains the memory offset to the entry in the literal pool.""" 108 | # Validate LDR instruction 109 | _inst = inst(ctx._) if callable(inst) else inst 110 | if _inst.load_store != 1: 111 | raise construct.ConstructError('Load/Store bit must be set to 1') 112 | if _inst.base_register != 15 or _inst.reg_imm_offset == 1: 113 | raise construct.ConstructError( 114 | 'Only instructions with PC relative addressing is currently supported.') 115 | if _inst.write_back: 116 | raise construct.ConstructError('Write back cannot be enabled for PC relative addressing.') 117 | # According to spec, PC is an address 8 bytes from the start of the instruction. 118 | # (Which means 4 bytes from end.) 119 | _elf = elf or ctx._params.elf 120 | _inst_end = inst_end(ctx._) if callable(inst_end) else inst_end 121 | _inst_end = elffileutils.obtain_memory_offset(_inst_end, elf=_elf) 122 | pc = _inst_end + 4 123 | mem_offset = pc + _inst.offset 124 | return mem_offset 125 | 126 | # HACK: FocusLast (which is FocusedSeq) will try to create a child context when it performs it's parsing. 127 | # The user will be unaware of this shift and can cause issues if the subcon is dynamic. 128 | # Therefore, patch the given subcon to use the parent context during parsing. 129 | # TODO: Embedded() should allow for this functionality! 130 | class _Embedded(construct.Subconstruct): 131 | def _parse(self, stream, context, path): 132 | return self.subcon._parsereport(stream, context._, path) 133 | subcon = _Embedded(subcon) 134 | 135 | # Use original ELFPointer to create a pointer to the entry in the literal pool, which 136 | # in turn, is a pointer to the data we actually want. 137 | return helpers.FocusLast( 138 | helpers.ELFPointer(_obtain_literal_pool_mem_offset, construct.Int32ul, elf=elf), 139 | helpers.ELFPointer(this[0], subcon, elf=elf), 140 | ) 141 | -------------------------------------------------------------------------------- /mwcp/parsers/RSA.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains parsers for digital certificates and RSA certificates. 3 | """ 4 | import base64 5 | import hashlib 6 | import re 7 | import string 8 | from datetime import datetime 9 | 10 | import pyasn1.codec.der.decoder as asn1_decoder 11 | import pyasn1.codec.der.decoder as ber_decoder 12 | import pyasn1_modules.rfc2437 as rfc2437 13 | import pyasn1_modules.rfc2459 as rfc2459 14 | from pyasn1.error import PyAsn1Error 15 | 16 | from mwcp import Parser, metadata 17 | 18 | 19 | class DigitalCertificate(Parser): 20 | DESCRIPTION = "Digital Certificate (PEM)" 21 | 22 | RSA_CERT_RE = re.compile(br"-----BEGIN CERTIFICATE-----(?P[^-]*)-----END CERTIFICATE-----", re.DOTALL) 23 | OIDS = { 24 | "2.5.4.3": "CN", 25 | "2.5.4.4": "Surname", 26 | "2.5.4.6": "C", 27 | "2.5.4.8": "ST", 28 | "2.5.4.7": "L", 29 | "2.5.4.10": "O", 30 | "2.5.4.11": "OU", 31 | "2.5.4.12": "Title", 32 | "1.2.840.113549.1.9.1": "emailAddress" 33 | } 34 | 35 | @classmethod 36 | def identify(cls, file_object): 37 | return cls.RSA_CERT_RE.search(file_object.data) and all( 38 | c in string.printable.encode() for c in file_object.data 39 | ) 40 | 41 | @staticmethod 42 | def _from_bits(bits): 43 | """ 44 | Convert a bitstream to characters. 45 | 46 | :param bits: A bitstream. 47 | 48 | :return: Converted bitstream. 49 | """ 50 | chars = bytearray() 51 | for b in range(len(bits) // 8): 52 | byte = bits[b * 8: (b + 1) * 8] 53 | chars.append((int("".join([str(bit) for bit in byte]), 2))) 54 | return bytes(chars) 55 | 56 | def _parse_rdn(self, rdn_list): 57 | """ 58 | Given a rdn list, convert it to a readable string. 59 | 60 | :param rdn_list: The rdn data in list format 61 | :return: A readable string containing the rdn information 62 | """ 63 | str_list = [] 64 | for rdn in rdn_list: 65 | oid = str(rdn[0][0]) 66 | value = rdn[0][1] 67 | str_list.append("{}={} ".format(self.OIDS.get(oid, oid), ber_decoder.decode(value)[0])) 68 | return ", ".join(str_list) 69 | 70 | def parse_rsa_cert(self, rsa_data: bytes): 71 | """ 72 | Given an RSA certificate in DER format, parse it for reportable information. 73 | 74 | :param rsa_data: The RSA data in DER format 75 | :return: 76 | """ 77 | self.logger.debug("The RSA Certificate is stored in ASN.1 DER format. Parsing for reportable metadata.") 78 | cert = asn1_decoder.decode(rsa_data, asn1Spec=rfc2459.Certificate())[0] 79 | tbs_cert = cert.getComponentByName("tbsCertificate") 80 | rsa_key_data = self._from_bits( 81 | tbs_cert.getComponentByName("subjectPublicKeyInfo").getComponentByName("subjectPublicKey")) 82 | serial = tbs_cert.getComponentByName("serialNumber") 83 | issuer = self._parse_rdn(tbs_cert.getComponentByName("issuer")[0]) 84 | subject = self._parse_rdn(tbs_cert.getComponentByName("subject")[0]) 85 | valid_from = tbs_cert.getComponentByName("validity").getComponentByName("notBefore").getComponentByName( 86 | "utcTime") 87 | valid_from_str = datetime.strptime(str(valid_from), "%y%m%d%H%M%SZ").strftime("%Y-%m-%d %H:%M:%S") 88 | valid_to = tbs_cert.getComponentByName("validity").getComponentByName("notAfter").getComponentByName("utcTime") 89 | valid_to_str = datetime.strptime(str(valid_to), "%y%m%d%H%M%SZ").strftime("%Y-%m-%d %H:%M:%S") 90 | 91 | info_dict = {"rsa_cert_serial": "0x{:x}".format(int(serial)), 92 | "rsa_cert_issuer": "{}".format(issuer), 93 | "rsa_cert_subject": subject, 94 | "rsa_cert_valid_from": "{}".format(valid_from_str), 95 | "rsa_cert_valid_to": "{}".format(valid_to_str), 96 | "rsa_cert_modulus": None, 97 | "rsa_pub_exponent": None, 98 | "rsa_cert_sha1": None} 99 | # If we fail to extract Public Key, don"t fail the entire thing. 100 | try: 101 | rsa_info = asn1_decoder.decode(rsa_key_data, asn1Spec=rfc2437.RSAPublicKey())[0] 102 | info_dict["rsa_cert_modulus"] = int(rsa_info.getComponentByName("modulus")) 103 | info_dict["rsa_pub_exponent"] = int(rsa_info.getComponentByName("publicExponent")) 104 | info_dict["rsa_cert_sha1"] = hashlib.sha1(rsa_data).hexdigest() 105 | except PyAsn1Error: 106 | self.logger.debug("Failed to extract RSAPublicKey", exc_info=1) 107 | 108 | return info_dict 109 | 110 | def run(self): 111 | # Extract and report certificate information. 112 | for cert in self.RSA_CERT_RE.finditer(self.file_object.data): 113 | cert_pem = cert.group("data") 114 | cert_der = base64.b64decode(cert_pem) 115 | if cert_der: 116 | cert_info = self.parse_rsa_cert(cert_der) 117 | 118 | pub_exponent = cert_info.pop("rsa_pub_exponent") 119 | modulus = cert_info.pop("rsa_cert_modulus") 120 | if pub_exponent or modulus: 121 | self.report.add(metadata.RSAPublicKey(public_exponent=pub_exponent, modulus=modulus)) 122 | 123 | ssl_cert_sha1 = cert_info.pop("rsa_cert_sha1") 124 | if ssl_cert_sha1: 125 | self.report.add(metadata.SSLCertSHA1(ssl_cert_sha1)) 126 | 127 | # TODO: Add a proper SSLCert metadata element. 128 | for key, value in cert_info.items(): 129 | self.report.add(metadata.Other(key, value)) 130 | 131 | 132 | class PrivateKey(Parser): 133 | DESCRIPTION = "RSA Private Key" 134 | 135 | RSA_PRIV_KEY_RE = re.compile( 136 | br"-----BEGIN RSA PRIVATE KEY-----(?P[^-]*)-----END RSA PRIVATE KEY-----", 137 | re.DOTALL 138 | ) 139 | 140 | @classmethod 141 | def identify(cls, file_object): 142 | return cls.RSA_PRIV_KEY_RE.search(file_object.data) and all( 143 | c in string.printable.encode() for c in file_object.data 144 | ) 145 | 146 | def run(self): 147 | for match in self.RSA_PRIV_KEY_RE.finditer(self.file_object.data): 148 | self.report.add(metadata.RSAPrivateKey.from_PEM(match.group(0).decode())) 149 | -------------------------------------------------------------------------------- /mwcp/utils/custombase64.py: -------------------------------------------------------------------------------- 1 | """ 2 | Custom Base64 related utility 3 | """ 4 | 5 | import base64 6 | import logging 7 | import sys 8 | 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | # Standard alphabet base on size. 14 | _STD_ALPHA = { 15 | 16: b'0123456789ABCDEF', 16 | 32: b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=', 17 | 64: b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=', 18 | } 19 | 20 | 21 | def _validate_alphabet(alphabet, type): 22 | """ 23 | validate the custom alphabet 24 | - 64 or 65 characters 25 | - mappings are unique 26 | """ 27 | if len(alphabet) not in (type, type+1): 28 | raise ValueError('invalid alphabet provided') 29 | 30 | if len(alphabet) != len(set(alphabet)): 31 | raise ValueError('mapping must be unique') 32 | 33 | return 34 | 35 | 36 | def _adjust_pad(alphabet, data, decode): 37 | logger.warning('The padding character has not been specified in the custom alphabet') 38 | 39 | if not (len(data) * 8) % 6: 40 | logger.info('The data does not require the padding character. continuing') 41 | return alphabet 42 | 43 | if decode: 44 | for char in data: 45 | if char not in alphabet: 46 | logger.info( 47 | 'The character "{}" does not appear in the alphabet, ' 48 | 'but was found in the encoded data. it will be used as the padding char'.format(char)) 49 | return alphabet + bytes([char]) if isinstance(char, int) else char # support for python 2 or 3 50 | raise ValueError('please provide a padding character to the custom alphabet') 51 | else: 52 | if b'=' not in alphabet: 53 | return alphabet + b'=' 54 | else: 55 | raise ValueError('ERROR: please provide a padding character to the custom alphabet') 56 | 57 | 58 | def _code(data, custom_alpha, size, decode, code_func): 59 | if isinstance(custom_alpha, str): 60 | custom_alpha = custom_alpha.encode() 61 | if isinstance(data, str): 62 | data = data.encode() 63 | _validate_alphabet(custom_alpha, size) 64 | if size != 16 and len(custom_alpha) == size: 65 | _adjust_pad(custom_alpha, data, decode) 66 | std_alpha = _STD_ALPHA[size] 67 | 68 | if decode: 69 | table = bytes.maketrans(custom_alpha, std_alpha) 70 | data = data.translate(table) 71 | return code_func(data) 72 | else: 73 | table = bytes.maketrans(std_alpha, custom_alpha) 74 | data = code_func(data) 75 | return data.translate(table) 76 | 77 | 78 | def b64encode(data, alphabet=None): 79 | """ 80 | Base64 encode 81 | :param data: data. 82 | :param alphabet: custom alphabet or standard alphabet. 83 | :return: base64 encoded data. 84 | 85 | >>> b64encode('hello world') 86 | 'aGVsbG8gd29ybGQ=' 87 | >>> custom_alphabet = b'EFGHQRSTUVWefghijklmnopIJKLMNOPABCDqrstuvwxyXYZabcdz0123456789+/=' 88 | >>> b64encode('hello world', alphabet=custom_alphabet) 89 | 'LSoXMS8BO29dMSj=' 90 | """ 91 | alphabet = alphabet or _STD_ALPHA[64] 92 | return _code(data, alphabet, 64, False, base64.b64encode) 93 | 94 | 95 | def b64decode(data, alphabet=None): 96 | """ 97 | Base64 decode (pads characters if necessary) 98 | :param data: base64 encoded data. 99 | :param alphabet: custom alphabet or standard alphabet. 100 | :return: base64 decoded data. 101 | 102 | >>> b64decode('aGVsbG8gd29ybGQ=') 103 | 'hello world' 104 | >>> custom_alphabet = b'EFGHQRSTUVWefghijklmnopIJKLMNOPABCDqrstuvwxyXYZabcdz0123456789+/=' 105 | >>> b64decode('LSoXMS8BO29dMSj=', alphabet=custom_alphabet) 106 | 'hello world' 107 | >>> b64decode('LSoXMS8BO29dMSj', alphabet=custom_alphabet) 108 | 'hello world' 109 | """ 110 | alphabet = alphabet or _STD_ALPHA[64] 111 | # Pad the data, if necessary 112 | data += alphabet[len(alphabet)-1:] * ((-len(data)) % 4) 113 | return _code(data, alphabet, 64, True, base64.b64decode) 114 | 115 | 116 | def b32encode(data, alphabet=None): 117 | """ 118 | Base32 encodes 119 | :param data: data 120 | :param alphabet: custom alphabet or standard alphabet. 121 | :return: base32 encoded data. 122 | 123 | >>> b32encode('hello world') 124 | 'NBSWY3DPEB3W64TMMQ======' 125 | >>> custom_alphabet = 'FGHIJQ345RSTUVWXYKLMABCDENOPZ267=' 126 | >>> b32encode('hello world', alphabet=custom_alphabet) 127 | 'VGLCEPIXJGPC6ZMUUY======' 128 | """ 129 | alphabet = alphabet or _STD_ALPHA[32] 130 | return _code(data, alphabet, 32, False, base64.b32encode) 131 | 132 | 133 | def b32decode(data, alphabet=None): 134 | """ 135 | Base32 decode (pads characters if necessary) 136 | :param data: base32 encoded data. 137 | :param alphabet: custom alphabet or standard alphabet. 138 | :return: base32 decoded data. 139 | 140 | >>> b32decode('NBSWY3DPEB3W64TMMQ======') 141 | 'hello world' 142 | >>> custom_alphabet = 'FGHIJQ345RSTUVWXYKLMABCDENOPZ267=' 143 | >>> b32decode('VGLCEPIXJGPC6ZMUUY======', alphabet=custom_alphabet) 144 | 'hello world' 145 | >>> b32decode('VGLCEPIXJGPC6ZMUUY', alphabet=custom_alphabet) 146 | 'hello world' 147 | """ 148 | alphabet = alphabet or _STD_ALPHA[32] 149 | # Pad the data, if necessary 150 | data += alphabet[len(alphabet)-1:] * ((-len(data)) % 8) 151 | return _code(data, alphabet, 32, True, base64.b32decode) 152 | 153 | 154 | def b16encode(data, alphabet=None): 155 | """ 156 | Base16 encodes 157 | :param data: data 158 | :param alphabet: custom alphabet or standard alphabet. 159 | :return: base16 encoded data. 160 | 161 | >>> b16encode('hello world') 162 | '68656C6C6F20776F726C64' 163 | >>> custom_alphabet = '78BDE0123F459A6C' 164 | >>> b16encode('hello world', alphabet=custom_alphabet) 165 | '131019191CB7221C2B191E' 166 | """ 167 | alphabet = alphabet or _STD_ALPHA[16] 168 | return _code(data, alphabet, 16, False, base64.b16encode) 169 | 170 | 171 | def b16decode(data, alphabet=None): 172 | """ 173 | Base16 decode 174 | :param data: base16 encoded data. 175 | :param alphabet: custom alphabet or standard alphabet. 176 | :return: base16 decoded data. 177 | 178 | >>> b16decode('68656C6C6F20776F726C64') 179 | 'hello world' 180 | >>> custom_alphabet = '78BDE0123F459A6C' 181 | >>> b16decode('131019191CB7221C2B191E', alphabet=custom_alphabet) 182 | 'hello world' 183 | """ 184 | alphabet = alphabet or _STD_ALPHA[16] 185 | return _code(data, alphabet, 16, True, base64.b16decode) 186 | -------------------------------------------------------------------------------- /mwcp/parser.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import logging 3 | from typing import TYPE_CHECKING, Union, Tuple, Any 4 | import warnings 5 | 6 | # This is here for type hints and autocomplete in PyCharm 7 | # noinspection PyUnreachableCode 8 | if TYPE_CHECKING: 9 | from mwcp import FileObject, Report 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | # A way to create a class properties 15 | # (Adding ABCMeta so, parsers have the freedom to use it.) 16 | class ParserMeta(abc.ABCMeta): 17 | @property 18 | def name(cls): 19 | try: 20 | return cls._name 21 | except AttributeError: 22 | return cls.__name__ 23 | 24 | @name.setter 25 | def name(cls, value): 26 | cls._name = value 27 | 28 | @property 29 | def source(cls): 30 | try: 31 | return cls._source 32 | except AttributeError: 33 | module, _, _ = cls.__module__.partition(".") 34 | return module 35 | 36 | @source.setter 37 | def source(cls, value): 38 | cls._source = value 39 | 40 | def __repr__(cls): 41 | return "<{}>".format(cls.name) 42 | 43 | 44 | class Parser(metaclass=ParserMeta): 45 | """ 46 | Interface for all parser objects. 47 | Either use this as a base for all component parsers, or 48 | inherit this class into a customized base class for all parsers. This class includes some of the required data 49 | used by various other classes. 50 | """ 51 | 52 | file_object = None # type: FileObject 53 | # This is the description that will be given to the file object during output 54 | # if no description is set in the file_object. This must be overwritten by inherited classes. 55 | DESCRIPTION = None 56 | # This is a tuple of tags that will be added to the file object after identification. 57 | TAGS = () 58 | 59 | # TODO: Deprecate the AUTHOR field? 60 | AUTHOR = "" # Optional 61 | 62 | def __init__(self, file_object, report, dispatcher): 63 | """ 64 | Initializes the Parser. 65 | 66 | :param FileObject file_object: Object containing data about component file. 67 | :param mwcp.Report report: Report object to be filled in. 68 | :param Dispatcher dispatcher: reference to the dispatcher object 69 | """ 70 | if not self.DESCRIPTION: 71 | raise NotImplementedError("Parser class is missing a DESCRIPTION.") 72 | self.file_object = file_object 73 | self.report = report 74 | self.dispatcher = dispatcher 75 | self.logger = logging.getLogger(".".join([self.__class__.__module__, self.__class__.__name__])) 76 | 77 | @property 78 | def reporter(self) -> "Report": 79 | warnings.warn( 80 | "reporter has been renamed to report and is now an instance of mwcp.Report", 81 | DeprecationWarning 82 | ) 83 | return self.report 84 | 85 | @property 86 | def knowledge_base(self) -> dict: 87 | """ 88 | Convenience function for getting knowledge_base. 89 | """ 90 | return self.report.knowledge_base 91 | 92 | @classmethod 93 | def get_logger(cls): 94 | return logging.getLogger(".".join([cls.__module__, cls.__name__])) 95 | 96 | @classmethod 97 | def iter_subclasses(cls): 98 | """Yields all classes that inherit from this class.""" 99 | for subclass in cls.__subclasses__(): 100 | yield subclass 101 | for _subclass in subclass.iter_subclasses(): 102 | yield _subclass 103 | 104 | @classmethod 105 | def identify(cls, file_object: "FileObject") -> Union[bool, Tuple[bool, Any]]: 106 | """ 107 | Determines if this parser is identified to support the given file_object. 108 | This function must be overwritten in order to support identification. 109 | 110 | The passed in file_object may be modified at this time to provide 111 | a new file_name or description. 112 | (Be aware, that this change will be in affect for future parsers. 113 | Therefore, don't change it if you are returning False or the dispatcher is in greedy mode.) 114 | 115 | :param file_object: file object to use for identification 116 | :type file_object: dispatcher.FileObject 117 | 118 | :return bool: Boolean indicating if this parser supports the file_object 119 | Extra arguments to pass into the run() function can also be provided. 120 | """ 121 | logger.warning("Missing identify() function for: {}.{}".format(cls.__module__, cls.__name__)) 122 | return True # Default to True to keep backwards compatibility for legacy parsers. 123 | 124 | @staticmethod 125 | def unpack_identify(result) -> Tuple[bool, Any]: 126 | """ 127 | Helper function to normalize identify results to always produce a tuple of identification result and extras. 128 | """ 129 | if isinstance(result, tuple) and isinstance(result[0], bool): 130 | identified, *rest = result 131 | rest = tuple(rest) 132 | else: 133 | identified = bool(result) 134 | rest = tuple() 135 | return (identified, *rest) 136 | 137 | @classmethod 138 | def parse(cls, file_object, report, *run_args, dispatcher=None): 139 | """ 140 | Runs parser on given file_object. 141 | 142 | :param FileObject file_object: Object containing data about component file. 143 | :param mwcp.Report report: reference to report object used to report new metadata. 144 | :param run_args: Extra arguments returned from identify() to pass to run() function. 145 | :param Dispatcher dispatcher: reference to the dispatcher object. (if used) 146 | :return: 147 | """ 148 | if dispatcher: 149 | report.set_file(file_object) 150 | parser_object = cls(file_object, report, dispatcher) 151 | parser_object.run(*run_args) 152 | 153 | # If dispatcher isn't provided, create a dummy one containing only this parser. 154 | # This is necessary to ensure identification is run first. 155 | else: 156 | from mwcp import Dispatcher # Must import here to avoid cyclic import. 157 | 158 | dispatcher = Dispatcher(cls.name, cls.source, author=cls.AUTHOR, description=cls.DESCRIPTION, parsers=[cls]) 159 | dispatcher.parse(file_object, report, *run_args) 160 | 161 | def run(self, *args): 162 | """ 163 | This function can be overwritten. It is called to run the parser. 164 | You don't have to overwrite this method if you only want to identify/output the file. 165 | :return: 166 | """ 167 | pass 168 | -------------------------------------------------------------------------------- /mwcp/utils/logutil.py: -------------------------------------------------------------------------------- 1 | """Utilities for setting up logging.""" 2 | import copy 3 | import errno 4 | import logging.config 5 | import logging.handlers 6 | import multiprocessing as mp 7 | import os 8 | import platform 9 | import sys 10 | import threading 11 | import traceback 12 | import warnings 13 | from collections import deque 14 | 15 | import appdirs 16 | import yaml 17 | 18 | import mwcp 19 | 20 | # Queue used to send over log messages from child to main process. 21 | # (See mwcp.utils.multi_proc for its use.) 22 | mp_queue = mp.Queue() 23 | 24 | 25 | class LevelCharFilter(logging.Filter): 26 | """Logging filter used to add a 'level_char' format variable.""" 27 | 28 | def filter(self, record): 29 | if record.levelno >= logging.ERROR: 30 | record.level_char = "!" 31 | elif record.levelno >= logging.WARN: 32 | record.level_char = "-" 33 | elif record.levelno >= logging.INFO: 34 | record.level_char = "+" 35 | elif record.levelno >= logging.DEBUG: 36 | record.level_char = "*" 37 | else: 38 | record.level_char = " " 39 | return True 40 | 41 | 42 | class MPRotatingFileHandler(logging.handlers.RotatingFileHandler): 43 | """ 44 | Handle the uncommon case of the log attempting to roll over when 45 | another process has the log open. This only happens on Windows, and 46 | the log ends up being a handful of KBs greater than 1024. Entries 47 | are still written, and the rollover happens if/when the MainProcess is 48 | the only process with the log file open. 49 | """ 50 | 51 | def __init__(self, filename, **kwargs): 52 | # Expand and variables and home directories and make path if it doesn't exist. 53 | filename = os.path.expandvars(os.path.expanduser(filename)) 54 | 55 | # If path is relative, add to standard log directory. 56 | if not os.path.isabs(filename): 57 | filename = os.path.join(appdirs.user_log_dir("mwcp", appauthor=False), filename) 58 | 59 | directory = os.path.dirname(filename) 60 | if not os.path.exists(directory): 61 | os.makedirs(directory) 62 | super(MPRotatingFileHandler, self).__init__(filename, **kwargs) 63 | 64 | def doRollover(self): 65 | """ 66 | Attempt to roll over to the next log file. If the current file 67 | is locked (Windows issue), keep writing to the original file until 68 | it is unlocked. 69 | 70 | :return: 71 | """ 72 | try: 73 | super(MPRotatingFileHandler, self).doRollover() 74 | except OSError as e: 75 | if not (sys.platform == "win32" and e.errno == errno.EACCES): 76 | raise 77 | 78 | 79 | class MPChildHandler(logging.Handler): 80 | """ 81 | Simple handler for child processes. 82 | 83 | Ensures pickle-ability and sends the record entry to the queue. 84 | """ 85 | 86 | def __init__(self, log_queue): 87 | super(MPChildHandler, self).__init__() 88 | self.queue = log_queue 89 | 90 | def emit(self, record): 91 | if record.exc_info: 92 | record.exc_text = "".join(traceback.format_exception(*record.exc_info)) 93 | record.exc_info = None 94 | 95 | self.queue.put(record) 96 | 97 | 98 | class ListHandler(logging.Handler): 99 | """ 100 | Log to a list, with an optional maximum number of records to store. 101 | 102 | Full records are available with the `records` property, and messages (i.e. 103 | the text of the log entry) at available with the `messages` property. 104 | """ 105 | 106 | def __init__(self, entries=None): 107 | """ 108 | Behaves essentially identical to any other handler. 109 | 110 | The only option is max_entries, to specify the max number of log 111 | entries kept. By default, no limit. 112 | 113 | :param int entries: Maximum number of records to store. 114 | """ 115 | super(ListHandler, self).__init__() 116 | 117 | self._deque = deque(maxlen=entries) 118 | 119 | def __copy__(self): 120 | new_handler = ListHandler() 121 | # Actually copy the deque, otherwise we'll get double entries 122 | new_handler._deque = copy.copy(self._deque) 123 | return new_handler 124 | 125 | def emit(self, record): 126 | msg = self.format(record) 127 | record.formatted_msg = msg 128 | self._deque.append(record) 129 | 130 | def clear(self): 131 | return self._deque.clear() 132 | 133 | @property 134 | def records(self): 135 | """ 136 | List of the last `max_entries` records logged. 137 | """ 138 | return list(self._deque) 139 | 140 | @property 141 | def messages(self): 142 | """ 143 | List of the last `max_entries` formatted messages logged. 144 | """ 145 | return [record.formatted_msg for record in self._deque] 146 | 147 | 148 | def start_listener(): 149 | """Start the listener thread for multi-process logging.""" 150 | if mp.current_process().name != "MainProcess": 151 | return 152 | 153 | def _mp_log_listener(log_queue): 154 | while True: 155 | record = log_queue.get() 156 | _logger = logging.getLogger(record.name) 157 | if _logger.isEnabledFor(record.levelno): 158 | _logger.handle(record) 159 | 160 | listener_thread = threading.Thread(target=_mp_log_listener, args=(mp_queue,)) 161 | listener_thread.daemon = True 162 | listener_thread.start() 163 | 164 | 165 | def setup_logging(default_level=logging.INFO, queue=None): 166 | """ 167 | Sets up logging using default log config file or log config file set by 'MWCP_LOG_CFG' 168 | 169 | :param default_level: Default log level to set to if config file fails. 170 | :param queue: Queue used to pass logs to. 171 | """ 172 | if queue: 173 | assert mp.current_process().name != "MainProcess" 174 | logging.root.addHandler(MPChildHandler(queue)) 175 | # If on Windows, allow all records to pass through, this is necessary because Windows 176 | # subprocesses don't duplicate the global state like posix systems. 177 | # Therefore, we have to pass all log messages through since effective 178 | # log level is unknown. 179 | if "Windows" in platform.platform(): 180 | logging.root.setLevel(logging.DEBUG) 181 | else: 182 | # Allow setting log configuration using 'MWCP_LOG_CFG' environment variable. 183 | log_config = os.getenv("MWCP_LOG_CFG", None) 184 | if log_config is None: 185 | log_config = mwcp.config.get("LOG_CONFIG_PATH", None) 186 | else: 187 | warnings.warn( 188 | "Using MWCP_LOG_CFG to set log configuration is deprecated. " 189 | "Please specify path in the configuration file instead." 190 | ) 191 | if log_config: 192 | try: 193 | with open(log_config, "rt") as f: 194 | config = yaml.safe_load(f.read()) 195 | logging.config.dictConfig(config) 196 | except IOError as e: 197 | warnings.warn("Unable to set log config file: {} with error: {}".format(log_config, e)) 198 | logging.basicConfig(level=default_level) 199 | else: 200 | logging.basicConfig(level=default_level) 201 | 202 | # Startup queue listener if we are in the main process. 203 | start_listener() 204 | --------------------------------------------------------------------------------