├── maco
├── __init__.py
├── model
│ ├── __init__.py
│ └── model.py
├── exceptions.py
├── extractor.py
├── base_test.py
├── yara.py
├── collector.py
├── cli.py
└── utils.py
├── demo_extractors
├── __init__.py
├── complex
│ ├── __init__.py
│ ├── complex_utils.py
│ └── complex.py
├── requirements.txt
├── shared.py
├── nothing.py
├── terminator.py
├── elfy.py
└── limit_other.py
├── extractor_setup
├── maco
├── README.md
├── LICENSE.md
└── pyproject.toml
├── model_setup
├── maco
├── README.md
├── LICENSE.md
└── pyproject.toml
├── tests
├── extractors
│ ├── __init__.py
│ ├── bob
│ │ ├── __init__.py
│ │ └── bob.py
│ ├── import_rewriting
│ │ └── __init__.py
│ ├── test_basic.py
│ ├── basic.py
│ └── basic_longer.py
├── requirements.txt
├── data
│ ├── example.txt.cart
│ ├── trigger_complex.txt
│ └── trigger_complex.txt.cart
├── pytest.ini
├── test_cli.py
├── test_extractor.py
├── test_demo_extractors.py
├── test_helpers.py
├── test_parallelism.py
├── test_base_test.py
├── test_detection.py
├── benchmark.py
└── test_model.py
├── requirements.txt
├── .vscode
├── extensions.json
└── settings.json
├── .pre-commit-config.yaml
├── tox.ini
├── LICENSE.md
├── pipelines
├── test.yaml
└── publish.yaml
├── pyproject.toml
├── .gitignore
└── README.md
/maco/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/demo_extractors/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extractor_setup/maco:
--------------------------------------------------------------------------------
1 | ../maco/
--------------------------------------------------------------------------------
/model_setup/maco:
--------------------------------------------------------------------------------
1 | ../maco/
--------------------------------------------------------------------------------
/tests/extractors/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/model_setup/README.md:
--------------------------------------------------------------------------------
1 | ../README.md
--------------------------------------------------------------------------------
/tests/extractors/bob/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/demo_extractors/complex/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extractor_setup/README.md:
--------------------------------------------------------------------------------
1 | ../README.md
--------------------------------------------------------------------------------
/model_setup/LICENSE.md:
--------------------------------------------------------------------------------
1 | ../LICENSE.md
--------------------------------------------------------------------------------
/extractor_setup/LICENSE.md:
--------------------------------------------------------------------------------
1 | ../LICENSE.md
--------------------------------------------------------------------------------
/tests/extractors/import_rewriting/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | git-python
3 |
--------------------------------------------------------------------------------
/maco/model/__init__.py:
--------------------------------------------------------------------------------
1 | from maco.model.model import * # noqa: F403
2 |
--------------------------------------------------------------------------------
/demo_extractors/requirements.txt:
--------------------------------------------------------------------------------
1 | httpx
2 |
3 | # Install maco from source for testing
4 | ../
5 |
--------------------------------------------------------------------------------
/tests/data/example.txt.cart:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CybercentreCanada/Maco/HEAD/tests/data/example.txt.cart
--------------------------------------------------------------------------------
/tests/data/trigger_complex.txt:
--------------------------------------------------------------------------------
1 | file to trigger demo extractors
2 |
3 | self_trigger
4 |
5 | Complex
6 | Paradise
7 |
--------------------------------------------------------------------------------
/tests/data/trigger_complex.txt.cart:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CybercentreCanada/Maco/HEAD/tests/data/trigger_complex.txt.cart
--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | minversion = 6.0
3 | addopts = -ra -q -k "not git and not extractors"
4 | testpaths =
5 | tests
6 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cart
2 | pydantic>=2.0.0
3 | tomli >= 1.1.0 ; python_version < "3.11"
4 | uv
5 | yara-x
6 | multiprocess>=0.70.17
7 |
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": [
3 | "ms-python.python",
4 | "charliermarsh.ruff",
5 | "elagil.pre-commit-helper"
6 | ]
7 | }
8 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/astral-sh/ruff-pre-commit
3 | # Ruff version.
4 | rev: v0.9.9
5 | hooks:
6 | # Run the linter.
7 | - id: ruff
8 | args: [--fix, --preview]
9 | # Run the formatter.
10 | - id: ruff-format
11 |
--------------------------------------------------------------------------------
/tests/extractors/bob/bob.py:
--------------------------------------------------------------------------------
1 | """Simple extractor for testing module and submodule with the same name."""
2 |
3 | from maco import extractor
4 |
5 |
6 | class Bob(extractor.Extractor):
7 | """A simplistic script for testing."""
8 |
9 | family = "bob"
10 | author = "bob"
11 | last_modified = "2022-06-14"
12 |
--------------------------------------------------------------------------------
/demo_extractors/complex/complex_utils.py:
--------------------------------------------------------------------------------
1 | """Example of a complex function invoked by the extractor."""
2 |
3 | from typing import Dict
4 |
5 |
6 | def getdata() -> Dict[str, int]:
7 | """This could be some complex and long function to support the main script.
8 |
9 | Returns:
10 | (Dict[str, int]): returns mock results
11 | """
12 | return {"result": 5}
13 |
--------------------------------------------------------------------------------
/demo_extractors/shared.py:
--------------------------------------------------------------------------------
1 | """Custom model based on Maco's model."""
2 |
3 | from typing import Optional
4 |
5 | import pydantic
6 |
7 | from maco import model
8 |
9 |
10 | class MyCustomModel(model.ExtractorModel):
11 | """Custom model based on Maco's model."""
12 |
13 | class Other(pydantic.BaseModel):
14 | """Custom 'other' class."""
15 |
16 | key1: str
17 | key2: bool
18 | key3: int
19 |
20 | # set a custom class here as valid for the 'other' property
21 | other: Optional[Other] = None
22 |
--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | """CLI testing."""
2 |
3 | import os
4 | import unittest
5 |
6 | from maco import cli
7 |
8 |
9 | class TestCLI(unittest.TestCase):
10 | """Test CLI."""
11 |
12 | def test_process_filesystem(self):
13 | """Test process_filesystem."""
14 | maco_path = os.path.abspath(os.path.join(__file__, "../../demo_extractors"))
15 | test_path = os.path.abspath(os.path.join(__file__, "../data"))
16 | results = cli.process_filesystem(
17 | maco_path,
18 | test_path,
19 | include=[],
20 | exclude=[],
21 | pretty=True,
22 | force=False,
23 | include_base64=False,
24 | )
25 | self.assertEqual(results, (3, 3, 3))
26 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "[python]": {
3 | "editor.defaultFormatter": "charliermarsh.ruff"
4 | },
5 | "editor.codeActionsOnSave": {
6 | "source.organizeImports": "explicit"
7 | },
8 | "editor.formatOnSave": true,
9 | "editor.rulers": [
10 | 120
11 | ],
12 | "editor.tabSize": 4,
13 | "editor.wordWrap": "wordWrapColumn",
14 | "editor.wordWrapColumn": 120,
15 | "files.insertFinalNewline": true,
16 | "files.trimFinalNewlines": true,
17 | "files.trimTrailingWhitespace": true,
18 | "pre-commit-helper.runOnSave": "all hooks",
19 | "python.testing.pytestArgs": [
20 | "tests"
21 | ],
22 | "python.testing.pytestEnabled": true,
23 | "ruff.lint.enable": true,
24 | "ruff.configuration": "pyproject.toml",
25 | "ruff.lint.preview": true
26 | }
27 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py38,py39,py310,py311,py312,parallelism,style
3 | [testenv]
4 | # install testing framework
5 | deps =
6 | pytest
7 | -r requirements.txt
8 | -r tests/requirements.txt
9 | # run the tests
10 | commands = python -m pytest tests/ -p no:cacheprovider --durations=10 -ra -q -k "not git and not extractors and not parallelism" -vv -W ignore::DeprecationWarning
11 |
12 | [testenv:style]
13 | # install testing framework
14 | deps =
15 | ruff
16 | # run the tests
17 | commands =
18 | ruff format --check
19 | ruff check
20 |
21 | [testenv:parallelism]
22 | # install parallel testing framework
23 | deps =
24 | pytest
25 | pytest-xdist
26 | -r requirements.txt
27 | -r tests/requirements.txt
28 | # run parallel tests
29 | commands = python -m pytest tests/test_parallelism.py -p no:cacheprovider -n 4 -vv -W ignore::DeprecationWarning
30 |
--------------------------------------------------------------------------------
/maco/exceptions.py:
--------------------------------------------------------------------------------
1 | """Exception classes for extractors."""
2 |
3 |
4 | # Can be raised by extractors to abort analysis of a sample
5 | # ie. Can abort if preliminary checks at start of run indicate the file shouldn't be analyzed by extractor
6 | class AnalysisAbortedException(Exception):
7 | """Raised when extractors voluntarily abort analysis of a sample."""
8 |
9 | pass
10 |
11 |
12 | class ExtractorLoadError(Exception):
13 | """Raised when extractors cannot be loaded."""
14 |
15 | pass
16 |
17 |
18 | class InvalidExtractor(ValueError):
19 | """Raised when an extractor is invalid."""
20 |
21 | pass
22 |
23 |
24 | class NoHitException(Exception):
25 | """Raised when the YARA rule of an extractor doesn't hit."""
26 |
27 | pass
28 |
29 |
30 | class SyntaxError(Exception):
31 | """Raised when there's a syntax error in the YARA rule."""
32 |
33 | pass
34 |
--------------------------------------------------------------------------------
/tests/extractors/test_basic.py:
--------------------------------------------------------------------------------
1 | """Test basic extractors."""
2 |
3 | import io
4 | import os
5 |
6 | from maco import base_test
7 |
8 |
9 | class TestBasicLonger(base_test.BaseTest):
10 | """Test that an extractor containing the name of another extractor works properly."""
11 |
12 | name = "BasicLonger"
13 | path = os.path.join(__file__, "..")
14 |
15 | def test_run(self):
16 | """Test run."""
17 | ret = self.extract(io.BytesIO(b"BasicLonger"))
18 | self.assertEqual(ret["family"], "basic_longer")
19 |
20 |
21 | class TestBasic(base_test.BaseTest):
22 | """Test that an extractor containing the name of another extractor works properly."""
23 |
24 | name = "Basic"
25 | path = os.path.join(__file__, "..")
26 |
27 | def test_run(self):
28 | """Test run."""
29 | ret = self.extract(io.BytesIO(b"Basic"))
30 | self.assertEqual(ret["family"], "basic")
31 |
--------------------------------------------------------------------------------
/demo_extractors/nothing.py:
--------------------------------------------------------------------------------
1 | """Demo extractor that returns nothing."""
2 |
3 | from io import BytesIO
4 | from typing import List
5 |
6 | from maco import extractor, yara
7 |
8 |
9 | class Nothing(extractor.Extractor):
10 | """Returns no extracted data."""
11 |
12 | family = "nothing"
13 | author = "blue"
14 | last_modified = "2022-06-14"
15 | yara_rule = """
16 | rule Nothing
17 | {
18 | strings:
19 | $self_trigger = "Nothing"
20 |
21 | condition:
22 | $self_trigger
23 | }
24 | """
25 |
26 | def run(self, stream: BytesIO, matches: List[yara.Match]):
27 | """Run the analysis process.
28 |
29 | Args:
30 | stream (BytesIO): file object from disk/network/memory.
31 | matches (List[yara.Match]): yara rule matches
32 | """
33 | # return config model formatted results
34 | return
35 |
--------------------------------------------------------------------------------
/demo_extractors/terminator.py:
--------------------------------------------------------------------------------
1 | """Example extractor that terminates early during extraction."""
2 |
3 | from io import BytesIO
4 | from typing import List, Optional
5 |
6 | from maco import extractor, model, yara
7 | from maco.exceptions import AnalysisAbortedException
8 |
9 |
10 | class Terminator(extractor.Extractor):
11 | """Terminates early during extraction."""
12 |
13 | family = "terminator"
14 | author = "skynet"
15 | last_modified = "1997-08-29"
16 |
17 | def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
18 | """Run the analysis process but terminate early.
19 |
20 | Args:
21 | stream (BytesIO): file object from disk/network/memory.
22 | matches (List[yara.Match]): yara rule matches
23 |
24 | Raises:
25 | AnalysisAbortedException: Extractor has decided to terminate early
26 | """
27 | # Terminate early and indicate I can't run on this sample
28 | raise AnalysisAbortedException("I can't run on this sample")
29 |
--------------------------------------------------------------------------------
/tests/extractors/basic.py:
--------------------------------------------------------------------------------
1 | """Basic extractor."""
2 |
3 | from io import BytesIO
4 | from typing import List
5 |
6 | from maco import extractor, model, yara
7 |
8 |
9 | class Basic(extractor.Extractor):
10 | """A simplistic script for testing."""
11 |
12 | family = "basic"
13 | author = "blue"
14 | last_modified = "2022-06-14"
15 | yara_rule = """
16 | rule Basic
17 | {
18 | strings:
19 | $self_trigger = "Basic"
20 |
21 | condition:
22 | $self_trigger
23 | }
24 | """
25 |
26 | def run(self, stream: BytesIO, matches: List[yara.Match]) -> model.ExtractorModel:
27 | """Run the extractor.
28 |
29 | Returns:
30 | (model.ExtractorModel): Results from extractor
31 |
32 | """
33 | # use a custom model that inherits from ExtractorModel
34 | # this model defines what can go in the 'other' dict
35 | tmp = model.ExtractorModel(family="basic")
36 | tmp.campaign_id.append("12345")
37 | tmp.other = dict(key1="key1", key2=True, key3=45)
38 | return tmp
39 |
--------------------------------------------------------------------------------
/tests/extractors/basic_longer.py:
--------------------------------------------------------------------------------
1 | """Basic longer extractor."""
2 |
3 | from io import BytesIO
4 | from typing import List
5 |
6 | from maco import extractor, model, yara
7 |
8 |
9 | class BasicLonger(extractor.Extractor):
10 | """A simplistic script for testing."""
11 |
12 | family = "basic_longer"
13 | author = "blue"
14 | last_modified = "2022-06-14"
15 | yara_rule = """
16 | rule BasicLonger
17 | {
18 | strings:
19 | $self_trigger = "BasicLonger"
20 |
21 | condition:
22 | $self_trigger
23 | }
24 | """
25 |
26 | def run(self, stream: BytesIO, matches: List[yara.Match]) -> model.ExtractorModel:
27 | """Run the extractor.
28 |
29 | Returns:
30 | (model.ExtractorModel): Results from extractor
31 | """
32 | # use a custom model that inherits from ExtractorModel
33 | # this model defines what can go in the 'other' dict
34 | tmp = model.ExtractorModel(family="basic_longer")
35 | tmp.campaign_id.append("12345")
36 | tmp.other = dict(key1="key1", key2=True, key3=45)
37 | return tmp
38 |
--------------------------------------------------------------------------------
/demo_extractors/elfy.py:
--------------------------------------------------------------------------------
1 | """Demo extractor that targets ELF files."""
2 |
3 | from io import BytesIO
4 | from typing import List, Optional
5 |
6 | from maco import extractor, model, yara
7 |
8 |
9 | class Elfy(extractor.Extractor):
10 | """Check basic elf property."""
11 |
12 | family = "elfy"
13 | author = "blue"
14 | last_modified = "2022-06-14"
15 | yara_rule = """
16 | import "elf"
17 |
18 | rule Elfy
19 | {
20 | condition:
21 | elf.number_of_sections > 50
22 | }
23 | """
24 |
25 | def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
26 | """Run the analysis process.
27 |
28 | Args:
29 | stream (BytesIO): file object from disk/network/memory.
30 | matches (List[yara.Match]): yara rule matches
31 |
32 | Returns:
33 | (Optional[model.ExtractorModel]): model of results
34 |
35 | """
36 | # return config model formatted results
37 | ret = model.ExtractorModel(family=self.family)
38 | # the list for campaign_id already exists and is empty, so we just add an item
39 | ret.campaign_id.append(str(len(stream.read())))
40 | return ret
41 |
--------------------------------------------------------------------------------
/model_setup/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "setuptools_scm>=8"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "maco-model"
7 | dynamic = ["version"]
8 | description = "This package contains only the Pydantic model for Maco."
9 | dependencies = ["pydantic>=2.0.0"]
10 | requires-python = ">=3.8"
11 | authors = [{ name = "sl-govau" }]
12 | maintainers = [{ name = "cccs-rs" }]
13 | readme = "README.md"
14 | license = { file = "LICENSE.md" }
15 |
16 | classifiers = [
17 | "Development Status :: 5 - Production/Stable",
18 | "Intended Audience :: Developers",
19 |
20 | "Topic :: Software Development :: Libraries :: Python Modules",
21 |
22 | "License :: OSI Approved :: MIT License",
23 |
24 | "Programming Language :: Python :: 3.8",
25 | "Programming Language :: Python :: 3.9",
26 | "Programming Language :: Python :: 3.10",
27 | "Programming Language :: Python :: 3.11",
28 | "Programming Language :: Python :: 3.12",
29 | ]
30 |
31 |
32 | [project.urls]
33 | Repository = "https://github.com/CybercentreCanada/Maco"
34 | Issues = "https://github.com/CybercentreCanada/Maco/issues"
35 |
36 | [tool.setuptools_scm]
37 | root = ".."
38 |
39 | [tool.setuptools]
40 | packages = ["maco.model"]
41 |
--------------------------------------------------------------------------------
/extractor_setup/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "setuptools_scm>=8"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "maco-extractor"
7 | description = "This package contains the essentials for creating Maco extractors and using them at runtime."
8 | dynamic = ["version"]
9 | dependencies = ["pydantic>=2.0.0", "yara-x"]
10 | requires-python = ">=3.8"
11 | authors = [{ name = "sl-govau" }]
12 | maintainers = [{ name = "cccs-rs" }]
13 | readme = "README.md"
14 | license = { file = "LICENSE.md" }
15 |
16 | classifiers = [
17 | "Development Status :: 5 - Production/Stable",
18 | "Intended Audience :: Developers",
19 |
20 | "Topic :: Software Development :: Libraries :: Python Modules",
21 |
22 | "License :: OSI Approved :: MIT License",
23 |
24 | "Programming Language :: Python :: 3.8",
25 | "Programming Language :: Python :: 3.9",
26 | "Programming Language :: Python :: 3.10",
27 | "Programming Language :: Python :: 3.11",
28 | "Programming Language :: Python :: 3.12",
29 | ]
30 |
31 |
32 | [project.urls]
33 | Repository = "https://github.com/CybercentreCanada/Maco"
34 | Issues = "https://github.com/CybercentreCanada/Maco/issues"
35 |
36 | [tool.setuptools_scm]
37 | root = ".."
38 |
39 | [tool.setuptools]
40 | packages = ["maco.model"]
41 | py-modules = ["maco.extractor", "maco.yara", "maco.exceptions"]
42 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Crown Copyright, Government of Canada (Canadian Centre for Cyber Security / Communications Security Establishment) and Government of Australia (Australian Cyber Security Centre / Australian Signals Directorate)
4 |
5 | Copyright title to all 3rd party software distributed with maco is held by the respective copyright holders as noted in those files. Users are asked to read the 3rd Party Licenses referenced with those assets.
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8 |
9 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
10 |
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
12 |
--------------------------------------------------------------------------------
/tests/test_extractor.py:
--------------------------------------------------------------------------------
1 | """Extractor testing."""
2 |
3 | import unittest
4 |
5 | from maco import extractor
6 |
7 |
8 | class TestExtractor(unittest.TestCase):
9 | """Test extractor."""
10 |
11 | def test_bad(self):
12 | """Test bad extractor."""
13 |
14 | class Tmp(extractor.Extractor):
15 | family = "smell_ya_later"
16 | author = "me"
17 | last_modified = "yeah"
18 |
19 | Tmp()
20 |
21 | class Tmp1(Tmp):
22 | family = None
23 |
24 | self.assertRaises(extractor.InvalidExtractor, Tmp1)
25 |
26 | class Tmp1(extractor.Extractor):
27 | author = None
28 |
29 | self.assertRaises(extractor.InvalidExtractor, Tmp1)
30 |
31 | class Tmp1(extractor.Extractor):
32 | version = None
33 |
34 | self.assertRaises(extractor.InvalidExtractor, Tmp1)
35 |
36 | class Tmp1(Tmp):
37 | yara_rule: str = "t"
38 |
39 | self.assertRaises(extractor.InvalidExtractor, Tmp1)
40 |
41 | class Tmp1(Tmp):
42 | yara_rule = """
43 | rule DifferentName
44 | {
45 | condition:
46 | true
47 | }
48 | """
49 |
50 | Tmp1()
51 |
52 | class Tmp1(Tmp):
53 | yara_rule = """
54 | rule Tmp1
55 | {
56 | condition:
57 | true
58 | }
59 | rule OtherName
60 | {
61 | condition:
62 | true
63 | }
64 | """
65 |
66 | Tmp1()
67 |
--------------------------------------------------------------------------------
/pipelines/test.yaml:
--------------------------------------------------------------------------------
1 | name: tests
2 |
3 | trigger: ["*"]
4 | pr: ["*"]
5 |
6 | pool:
7 | vmImage: "ubuntu-22.04"
8 |
9 | jobs:
10 | - job: style_test
11 | strategy:
12 | matrix:
13 | Python3_12:
14 | python.version: "3.12"
15 | timeoutInMinutes: 10
16 |
17 | steps:
18 | - task: UsePythonVersion@0
19 | displayName: Set python version
20 | inputs:
21 | versionSpec: "$(python.version)"
22 |
23 | - script: |
24 | python -m pip install -U tox
25 | displayName: Install tox
26 |
27 | - script: |
28 | python -m tox -e style
29 | displayName: "Run style tests"
30 |
31 | - job: run_test
32 | strategy:
33 | matrix:
34 | Python3_8:
35 | python.version: "3.8"
36 | Python3_9:
37 | python.version: "3.9"
38 | Python3_10:
39 | python.version: "3.10"
40 | Python3_11:
41 | python.version: "3.11"
42 | Python3_12:
43 | python.version: "3.12"
44 | timeoutInMinutes: 10
45 |
46 | steps:
47 | - task: UsePythonVersion@0
48 | displayName: Set python version
49 | inputs:
50 | versionSpec: "$(python.version)"
51 |
52 | - script: |
53 | runtests=true
54 | if [ ! -d "$(pwd)/tests" ]; then
55 | echo "No tests found"
56 | runtest=false
57 | else
58 | python -m pip install -U tox
59 | fi
60 | echo "##vso[task.setvariable variable=runtests;]$runtests"
61 | displayName: Install tox
62 |
63 | - script: |
64 | python -m tox -e py
65 | displayName: "Run tests"
66 | condition: and(succeeded(), eq(variables.runtests, true))
67 |
--------------------------------------------------------------------------------
/demo_extractors/limit_other.py:
--------------------------------------------------------------------------------
1 | """Demo extractor to show the usage of the other field in the model."""
2 |
3 | from io import BytesIO
4 | from typing import List, Optional
5 |
6 | from demo_extractors import shared
7 | from maco import extractor, model, yara
8 |
9 |
10 | class LimitOther(extractor.Extractor):
11 | """An example of how the 'other' dictionary can be limited in a custom way."""
12 |
13 | family = "limit_other"
14 | author = "blue"
15 | last_modified = "2022-06-14"
16 | yara_rule = """
17 | rule LimitOther
18 | {
19 | strings:
20 | $self_trigger = "LimitOther"
21 |
22 | condition:
23 | $self_trigger
24 | }
25 | """
26 |
27 | def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
28 | """Run the analysis process.
29 |
30 | Args:
31 | stream (BytesIO): file object from disk/network/memory.
32 | matches (List[yara.Match]): yara rule matches
33 |
34 | Returns:
35 | (Optional[model.ExtractorModel]): model of results
36 |
37 | Raises:
38 | Exception: if the httpx library is not installed
39 |
40 | """
41 | # import httpx at runtime so we can test that requirements.txt is installed dynamically without breaking
42 | # the tests that do direct importing
43 | import httpx
44 |
45 | # use httpx so it doesn't get deleted by auto linter
46 | if not httpx.__name__:
47 | raise Exception("wow I really want to use this library in a useful way")
48 |
49 | # use a custom model that inherits from ExtractorModel
50 | # this model defines what can go in the 'other' dict
51 | tmp = shared.MyCustomModel(family="specify_other")
52 | tmp.campaign_id.append("12345")
53 | tmp.other = tmp.Other(key1="key1", key2=True, key3=45)
54 | return tmp
55 |
--------------------------------------------------------------------------------
/pipelines/publish.yaml:
--------------------------------------------------------------------------------
1 | name: publish
2 |
3 | trigger:
4 | branches:
5 | exclude:
6 | - '*'
7 | tags:
8 | include: ["v*"]
9 | pr: none
10 |
11 | pool:
12 | vmImage: "ubuntu-22.04"
13 |
14 | jobs:
15 | - job: test
16 | displayName: Test
17 | strategy:
18 | matrix:
19 | Python38:
20 | python.version: '3.8'
21 | Python39:
22 | python.version: '3.9'
23 | Python310:
24 | python.version: '3.10'
25 | Python311:
26 | python.version: '3.11'
27 | Python312:
28 | python.version: '3.12'
29 | steps:
30 | - task: UsePythonVersion@0
31 | displayName: 'Use Python $(python.version)'
32 | inputs:
33 | versionSpec: '$(python.version)'
34 |
35 | - script: |
36 | set -x
37 |
38 | python -m pip install -U tox
39 | python -m tox -e py
40 |
41 | - job: build_and_deploy
42 | dependsOn: test
43 | displayName: Build and Deploy
44 | variables:
45 | - group: deployment-information
46 |
47 | steps:
48 | - task: UsePythonVersion@0
49 | displayName: 'Use Python 3.9'
50 | inputs:
51 | versionSpec: '3.9'
52 |
53 | - script: |
54 | set -x
55 | python -m pip install -U build
56 | python -m build
57 | ls dist
58 | displayName: Build (Full)
59 |
60 | - script: |
61 | set -x
62 | cd model_setup
63 | python -m build --outdir ../dist
64 | ls ../dist
65 | displayName: Build (Model Only)
66 |
67 | - script: |
68 | set -x
69 | cd extractor_setup
70 | python -m build --outdir ../dist
71 | ls ../dist
72 | displayName: Build (Extractor Essentials)
73 |
74 | - script: |
75 | set -xv # Echo commands before they are run
76 | sudo env "PATH=$PATH" python -m pip install --no-cache-dir twine
77 | ls dist
78 | twine upload --skip-existing dist/*
79 | displayName: Deploy to PyPI
80 | env:
81 | TWINE_USERNAME: $(twineUsername)
82 | TWINE_PASSWORD: $(twinePassword)
83 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "setuptools_scm>=8"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "maco"
7 | description = "Maco is a framework for creating and using malware configuration extractors."
8 | dynamic = ["version", "readme", "dependencies"]
9 | requires-python = ">=3.8"
10 | authors = [{ name = "sl-govau" }]
11 | maintainers = [{ name = "cccs-rs" }]
12 | license = { file = "LICENSE.md" }
13 |
14 | classifiers = [
15 | "Development Status :: 5 - Production/Stable",
16 | "Intended Audience :: Developers",
17 |
18 | "Topic :: Software Development :: Libraries :: Python Modules",
19 |
20 | "License :: OSI Approved :: MIT License",
21 |
22 | "Programming Language :: Python :: 3.8",
23 | "Programming Language :: Python :: 3.9",
24 | "Programming Language :: Python :: 3.10",
25 | "Programming Language :: Python :: 3.11",
26 | "Programming Language :: Python :: 3.12",
27 | ]
28 |
29 | [project.scripts]
30 | maco = "maco.cli:main"
31 |
32 | [project.urls]
33 | Repository = "https://github.com/CybercentreCanada/Maco"
34 | Issues = "https://github.com/CybercentreCanada/Maco/issues"
35 |
36 | [tool.setuptools_scm]
37 |
38 | [tool.setuptools.dynamic]
39 | readme = { file = ["README.md"], content-type = "text/markdown" }
40 | dependencies = { file = ["requirements.txt"] }
41 |
42 | [tool.setuptools.packages.find]
43 | where = ["."]
44 | exclude = ["test", "tests", "extractors", "model_setup", "extractor_setup"]
45 |
46 | [tool.ruff]
47 | line-length = 120
48 |
49 | [tool.ruff.format]
50 | docstring-code-format = true
51 |
52 | [tool.ruff.lint]
53 | # Add the `line-too-long` rule to the enforced rule set. By default, Ruff omits rules that
54 | # overlap with the use of a formatter, like Black, but we can override this behavior by
55 | # explicitly adding the rule.
56 | extend-select = ["E501", "D", "DOC"]
57 | ignore = ["D104"]
58 | preview = true
59 |
60 | [tool.ruff.lint.pydocstyle]
61 | convention = "google"
62 |
--------------------------------------------------------------------------------
/tests/test_demo_extractors.py:
--------------------------------------------------------------------------------
1 | """Test demo extractors."""
2 |
3 | import os
4 | import unittest
5 |
6 | from maco import cli
7 | from maco.collector import Collector
8 |
9 |
10 | class TestDemoExtractors(unittest.TestCase):
11 | """Test demo extractors."""
12 |
13 | def test_complex(self):
14 | """Test complex extractor."""
15 | path_file = os.path.normpath(os.path.join(__file__, "../data/trigger_complex.txt"))
16 | collector = Collector(os.path.join(__file__, "../../demo_extractors"))
17 | self.assertEqual(
18 | set(collector.extractors.keys()),
19 | {"Elfy", "Nothing", "Complex", "LimitOther", "Terminator"},
20 | )
21 |
22 | with open(path_file, "rb") as stream:
23 | ret = cli.process_file(
24 | collector,
25 | path_file,
26 | stream,
27 | pretty=True,
28 | force=False,
29 | include_base64=False,
30 | )
31 | self.assertEqual(
32 | ret,
33 | {
34 | "Complex": {
35 | "family": "complex",
36 | "version": "5",
37 | "decoded_strings": sorted(["Paradise", "Complex"]),
38 | "binaries": [
39 | {
40 | "datatype": "payload",
41 | "encryption": {"algorithm": "something"},
42 | "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
43 | "size": 9,
44 | "hex_sample": "736F6D652064617461",
45 | }
46 | ],
47 | "http": [
48 | {
49 | "protocol": "https",
50 | "hostname": "blarg5.com",
51 | "path": "/malz/64",
52 | "usage": "c2",
53 | }
54 | ],
55 | "encryption": [{"algorithm": "sha256"}],
56 | }
57 | },
58 | )
59 |
--------------------------------------------------------------------------------
/tests/test_helpers.py:
--------------------------------------------------------------------------------
1 | """Test helper functions."""
2 |
3 | import io
4 | import os
5 | import unittest
6 |
7 | from maco import collector
8 |
9 | path_extractors = "../../demo_extractors"
10 |
11 |
12 | class TestHelpersFindExtractors(unittest.TestCase):
13 | """Test finding extractors."""
14 |
15 | def test_find_extractors(self):
16 | """Test finding extractors."""
17 | target = os.path.join(__file__, path_extractors)
18 | m = collector.Collector(target)
19 | # extractors = helpers.find_extractors(target)
20 | self.assertEqual(len(m.extractors), 4)
21 | self.assertEqual(
22 | {x for x in m.extractors.keys()},
23 | {"Complex", "Elfy", "LimitOther", "Nothing"},
24 | )
25 |
26 |
27 | class TestHelpersCompileYara(unittest.TestCase):
28 | """Test YARA rule compilation."""
29 |
30 | def test_compile_yara(self):
31 | """Test YARA rule compilation."""
32 | target = os.path.join(__file__, path_extractors)
33 | m = collector.Collector(target)
34 | self.assertEqual(
35 | {x.identifier for x in m.rules},
36 | {"Elfy", "Complex", "ComplexSubtext", "Nothing", "ComplexAlt", "LimitOther", "Terminator"},
37 | )
38 |
39 |
40 | class TestHelpersAnalyseStream(unittest.TestCase):
41 | """Test analyzing a stream."""
42 |
43 | def setUp(self):
44 | """Setup."""
45 | target = os.path.join(__file__, path_extractors)
46 | self.m = collector.Collector(target)
47 |
48 | def test_analyse_stream(self):
49 | """Test analyzing a stream."""
50 | data = b""
51 | resp = self.m.extract(io.BytesIO(data), "Complex")
52 | self.assertEqual(resp, None)
53 |
54 | data = b"data"
55 | resp = self.m.extract(io.BytesIO(data), "Complex")
56 | self.assertEqual(
57 | resp,
58 | {
59 | "family": "complex",
60 | "version": "5",
61 | "binaries": [
62 | {
63 | "datatype": "payload",
64 | "data": b"some data",
65 | "encryption": {"algorithm": "something"},
66 | }
67 | ],
68 | "http": [
69 | {
70 | "protocol": "https",
71 | "hostname": "blarg5.com",
72 | "path": "/malz/4",
73 | "usage": "c2",
74 | }
75 | ],
76 | "encryption": [{"algorithm": "sha256"}],
77 | },
78 | )
79 |
--------------------------------------------------------------------------------
/tests/test_parallelism.py:
--------------------------------------------------------------------------------
1 | """Test extractor loading and import rewriting when executed in parallel."""
2 |
3 | import os
4 |
5 | from maco.collector import Collector
6 | import unittest
7 |
8 |
9 | class TestParallelism(unittest.TestCase):
10 | """Test parallel loading of maco extractors.
11 |
12 | This test only makes sense when run in parallel -- running a single instance will not test the affected areas.
13 | pytest-xdist needs to be installed to run these tests in parallel, use the -n flag to specify how many processes.
14 | 2 or 4 is a reasonable number for the four test cases here.
15 | python -m pytest tests/test_parallelism.py -n 2
16 | """
17 |
18 | # determine path to test extractor
19 | working_dir = os.path.join(os.path.dirname(__file__), "extractors/import_rewriting")
20 | assert os.path.isdir(working_dir)
21 |
22 | # this value may need to be increased to ensure the errors occur, depending on your test system
23 | repetitions = 5
24 |
25 | def test_parallelism_1(self):
26 | """Test for one pytest-xdist worker."""
27 | for _ in range(self.repetitions):
28 | collector = Collector(self.working_dir, create_venv=False)
29 |
30 | # if extractor isn't overwritten, extractor will load
31 | # otherwise this raises an ExtractorLoadError because the extractor file is empty
32 | self.assertListEqual(list(collector.extractors.keys()), ["Importer"])
33 |
34 | def test_parallelism_2(self):
35 | """Test for one pytest-xdist worker."""
36 | for _ in range(self.repetitions):
37 | collector = Collector(self.working_dir, create_venv=False)
38 |
39 | # if extractor isn't overwritten, extractor will load
40 | # otherwise this raises an ExtractorLoadError because the extractor file is empty
41 | self.assertListEqual(list(collector.extractors.keys()), ["Importer"])
42 |
43 | def test_parallelism_3(self):
44 | """Test for one pytest-xdist worker."""
45 | for _ in range(self.repetitions):
46 | collector = Collector(self.working_dir, create_venv=False)
47 |
48 | # if extractor isn't overwritten, extractor will load
49 | # otherwise this raises an ExtractorLoadError because the extractor file is empty
50 | self.assertListEqual(list(collector.extractors.keys()), ["Importer"])
51 |
52 | def test_parallelism_4(self):
53 | """Test for one pytest-xdist worker."""
54 | for _ in range(self.repetitions):
55 | collector = Collector(self.working_dir, create_venv=False)
56 |
57 | # if extractor isn't overwritten, extractor will load
58 | # otherwise this raises an ExtractorLoadError because the extractor file is empty
59 | self.assertListEqual(list(collector.extractors.keys()), ["Importer"])
60 |
--------------------------------------------------------------------------------
/demo_extractors/complex/complex.py:
--------------------------------------------------------------------------------
1 | """Demo complex extractor."""
2 |
3 | from io import BytesIO
4 | from typing import List, Optional
5 |
6 | from demo_extractors.complex import complex_utils
7 | from maco import extractor, model, yara
8 |
9 |
10 | class Complex(extractor.Extractor):
11 | """This script has multiple yara rules and coverage of the data model."""
12 |
13 | family = "complex"
14 | author = "blue"
15 | last_modified = "2022-06-14"
16 | yara_rule = """
17 | private rule ComplexSubtext
18 | {
19 | strings:
20 | $self_trigger = "self_trigger"
21 | condition:
22 | $self_trigger
23 | }
24 | rule Complex
25 | {
26 | strings:
27 | $self_trigger = "Complex"
28 | $my_hex_string = { E2 34 A1 C8 23 FB }
29 |
30 | condition:
31 | ($self_trigger or $my_hex_string) and ComplexSubtext
32 | }
33 | rule ComplexAlt
34 | {
35 | strings:
36 | $self_trigger = "Paradise"
37 |
38 | condition:
39 | $self_trigger
40 | }
41 | """
42 |
43 | def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
44 | """Run the analysis process.
45 |
46 | Args:
47 | stream (BytesIO): file object from disk/network/memory.
48 | matches (List[yara.Match]): yara rule matches
49 |
50 | Returns:
51 | (Optional[model.ExtractorModel]): model of results
52 |
53 | """
54 | self.logger.info("starting run")
55 | self.logger.debug(f"{[x.rule for x in matches]=}")
56 | data = stream.read()
57 | if not data:
58 | return
59 | # this is where you would do some processing on the file
60 | data_len = len(data)
61 | other = complex_utils.getdata()["result"]
62 | self.logger.debug("got data from lib")
63 | # example - accessing yara strings
64 | strings = sorted({z.plaintext().decode("utf8") for x in matches for y in x.strings for z in y.instances})
65 | self.logger.debug(f"{strings=}")
66 | # construct model of results
67 | tmp = model.ExtractorModel(family=self.family)
68 | tmp.decoded_strings = strings
69 | tmp.version = "5"
70 | tmp.http.append(
71 | tmp.Http(
72 | protocol="https",
73 | hostname=f"blarg{other}.com",
74 | path=f"/malz/{data_len}",
75 | usage="c2",
76 | )
77 | )
78 |
79 | tmp.encryption.append(tmp.Encryption(algorithm="sha256"))
80 | tmp.binaries.append(
81 | tmp.Binary(
82 | data=b"some data",
83 | datatype=tmp.Binary.TypeEnum.payload,
84 | encryption=tmp.Binary.Encryption(algorithm="something"),
85 | )
86 | )
87 | return tmp
88 |
--------------------------------------------------------------------------------
/maco/extractor.py:
--------------------------------------------------------------------------------
1 | """Base class for an extractor script."""
2 |
3 | import logging
4 | import textwrap
5 | from typing import BinaryIO, List, Optional, Union
6 |
7 | from maco import model, yara
8 | from maco.exceptions import InvalidExtractor
9 |
10 | DEFAULT_YARA_RULE = """
11 | rule {name}
12 | {{
13 | condition:
14 | true
15 | }}
16 | """
17 |
18 |
19 | class Extractor:
20 | """Base class for an analysis extractor with common entrypoint and metadata.
21 |
22 | Override this docstring with a good description of your extractor.
23 | """
24 |
25 | family: Union[str, List[str]] = None # family or families of malware that is detected by the extractor
26 | author: str = None # author of the extractor (name@organisation)
27 | last_modified: str = None # last modified date (YYYY-MM-DD)
28 | sharing: str = "TLP:WHITE" # who can this be shared with?
29 | yara_rule: str = None # yara rule that we filter inputs with
30 | reference: str = None # link to malware report or other reference information
31 | logger: logging.Logger = None # logger for use when debugging
32 |
33 | def __init__(self) -> None:
34 | """Initialise the extractor.
35 |
36 | Raises:
37 | InvalidExtractor: When the extractor is invalid.
38 | """
39 | self.name = name = type(self).__name__
40 | self.logger = logging.getLogger(f"maco.extractor.{name}")
41 | self.logger.debug(f"initialise '{name}'")
42 | if not self.family or not self.author or not self.last_modified:
43 | raise InvalidExtractor("must set family, author, last_modified")
44 | # if author does not set a yara rule, match on everything
45 | if not self.yara_rule:
46 | self.yara_rule = DEFAULT_YARA_RULE.format(name=name)
47 | # unindent the yara rule from triple quoted string
48 | # this is for friendly printing, yara handles the rule ok either way
49 | self.yara_rule = textwrap.dedent(self.yara_rule)
50 | # check yara rules conform to expected structure
51 | # we throw away these compiled rules as we need all rules in system compiled together
52 | try:
53 | self.yara_compiled = yara.compile(source=self.yara_rule)
54 | except yara.SyntaxError as e:
55 | raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
56 | # need to track which plugin owns the rules
57 | self.yara_rule_names = [x.identifier for x in self.yara_compiled]
58 | if not len(list(self.yara_compiled)):
59 | raise InvalidExtractor(f"{name} must define at least one yara rule")
60 | for x in self.yara_compiled:
61 | if x.is_global:
62 | raise InvalidExtractor(f"{x.identifier} yara rule must not be global")
63 |
64 | def run(self, stream: BinaryIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
65 | """Run the analysis process and return dict matching.
66 |
67 | :param stream: file object from disk/network/memory.
68 | :param match: yara rule match information contains locations of strings.
69 | """
70 | raise NotImplementedError()
71 |
--------------------------------------------------------------------------------
/maco/base_test.py:
--------------------------------------------------------------------------------
1 | """Foundation for unit testing an extractor.
2 |
3 | Example:
4 | from maco import base_test
5 | class TestExample(base_test.BaseTest):
6 | name = "Example"
7 | path = os.path.join(__file__, "../../extractors")
8 | def test_run(self):
9 | data = b"data with Example information"
10 | ret = self.extract(io.BytesIO(data))
11 | self.assertEqual(ret["family"], "example")
12 | """
13 |
14 | import importlib
15 | import io
16 | import os
17 | import unittest
18 |
19 | import cart
20 |
21 | from maco import collector
22 | from maco.exceptions import NoHitException
23 |
24 |
25 | class BaseTest(unittest.TestCase):
26 | """Base test class."""
27 |
28 | name: str = None # name of the extractor
29 | # folder and/or file where extractor is.
30 | # I recommend something like os.path.join(__file__, "../../extractors")
31 | # if your extractors are in a folder 'extractors' next to a folder of tests
32 | path: str = None
33 | create_venv: bool = False
34 |
35 | @classmethod
36 | def setUpClass(cls) -> None:
37 | """Initialization of class.
38 |
39 | Raises:
40 | Exception: when name or path is not set.
41 | """
42 | if not cls.name or not cls.path:
43 | raise Exception("name and path must be set")
44 | cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
45 | return super().setUpClass()
46 |
47 | def test_default_metadata(self):
48 | """Require extractor to be loadable and valid."""
49 | self.assertIn(self.name, self.c.extractors)
50 | self.assertEqual(len(self.c.extractors), 1)
51 |
52 | def extract(self, stream):
53 | """Return results for running extractor over stream, including yara check.
54 |
55 | Raises:
56 | NoHitException: when yara rule doesn't hit.
57 | """
58 | runs = self.c.match(stream)
59 | if not runs:
60 | raise NoHitException("no yara rule hit")
61 | resp = self.c.extract(stream, self.name)
62 | return resp
63 |
64 | @classmethod
65 | def _get_location(cls) -> str:
66 | """Return path to child class that implements this class."""
67 | # import child module
68 | module = cls.__module__
69 | i = importlib.import_module(module)
70 | # get location to child module
71 | return i.__file__
72 |
73 | @classmethod
74 | def load_cart(cls, filepath: str) -> io.BytesIO:
75 | """Load and unneuter a test file (likely malware) into memory for processing.
76 |
77 | Args:
78 | filepath (str): Path to carted sample
79 |
80 | Returns:
81 | (io.BytesIO): Buffered stream containing the un-carted sample
82 |
83 | Raises:
84 | FileNotFoundError: if the path to the sample doesn't exist
85 | """
86 | # it is nice if we can load files relative to whatever is implementing base_test
87 | dirpath = os.path.split(cls._get_location())[0]
88 | # either filepath is absolute, or should be loaded relative to child of base_test
89 | filepath = os.path.join(dirpath, filepath)
90 | if not os.path.isfile(filepath):
91 | raise FileNotFoundError(filepath)
92 | with open(filepath, "rb") as f:
93 | unpacked = io.BytesIO()
94 | # just bubble exceptions if it isn't cart
95 | cart.unpack_stream(f, unpacked)
96 | # seek to start of the unneutered stream
97 | unpacked.seek(0)
98 | return unpacked
99 |
--------------------------------------------------------------------------------
/tests/test_base_test.py:
--------------------------------------------------------------------------------
1 | """Base testing."""
2 |
3 | import io
4 | import os
5 |
6 | from demo_extractors.complex import complex, complex_utils
7 | from maco import base_test
8 |
9 |
10 | class TestLimitOther(base_test.BaseTest):
11 | """Test that limit_other extractor can be used in base environment."""
12 |
13 | name = "LimitOther"
14 | path = os.path.join(__file__, "../../demo_extractors")
15 |
16 | def test_load_cart(self):
17 | """Test loading a cart file."""
18 | data = self.load_cart("data/example.txt.cart").read()
19 | self.assertEqual(data, b"LimitOther\n")
20 |
21 | def test_extract(self):
22 | """Tests that we can run an extractor through maco."""
23 | ret = self.extract(self.load_cart("data/example.txt.cart"))
24 | self.assertEqual(ret["family"], "specify_other")
25 | self.assertEqual(ret["campaign_id"], ["12345"])
26 |
27 |
28 | class TestComplex(base_test.BaseTest):
29 | """Test that complex extractor can be used in base environment."""
30 |
31 | name = "Complex"
32 | path = os.path.join(__file__, "../../demo_extractors")
33 | create_venv = False
34 |
35 | def test_extract(self):
36 | """Tests that we can run an extractor through maco."""
37 | ret = self.extract(self.load_cart("data/trigger_complex.txt.cart"))
38 | self.assertEqual(ret["family"], "complex")
39 | self.assertEqual(ret["version"], "5")
40 |
41 | def test_subfunction(self):
42 | """Tests that we can import directly from the extractor module and run a function."""
43 | self.assertEqual(complex_utils.getdata(), {"result": 5})
44 |
45 | def test_manual_extract(self):
46 | """Tests that we can run an extractor through maco."""
47 | ref = complex.Complex
48 | self.assertGreater(len(ref.yara_rule), 100)
49 | instance = complex.Complex()
50 | self.assertGreater(len(instance.yara_rule), 100)
51 |
52 | data = io.BytesIO(b"my malwarez")
53 | result = instance.run(data, [])
54 | self.assertEqual(result.family, "complex")
55 |
56 |
57 | class TestComplexVenv(base_test.BaseTest):
58 | """Test that complex extractor can be used in full venv isolation."""
59 |
60 | name = "Complex"
61 | path = os.path.join(__file__, "../../demo_extractors")
62 | create_venv = True
63 |
64 | def test_extract(self):
65 | """Tests that we can run an extractor through maco."""
66 | ret = self.extract(self.load_cart("data/trigger_complex.txt.cart"))
67 | self.assertEqual(ret["family"], "complex")
68 | self.assertEqual(ret["version"], "5")
69 |
70 |
71 | class TestTerminator(base_test.BaseTest):
72 | """Test that terminator extractor can be used in base environment."""
73 |
74 | name = "Terminator"
75 | path = os.path.join(__file__, "../../demo_extractors")
76 | create_venv = False
77 |
78 | def test_extract(self):
79 | """Tests that we can run an extractor through maco."""
80 | ret = self.extract(self.load_cart("data/trigger_complex.txt.cart"))
81 | self.assertEqual(ret, None)
82 |
83 |
84 | class TestTerminatorVenv(base_test.BaseTest):
85 | """Test that terminator extractor can be used in base environment."""
86 |
87 | name = "Terminator"
88 | path = os.path.join(__file__, "../../demo_extractors")
89 | create_venv = True
90 |
91 | def test_extract(self):
92 | """Tests that we can run an extractor through maco."""
93 | ret = self.extract(self.load_cart("data/trigger_complex.txt.cart"))
94 | self.assertEqual(ret, None)
95 |
--------------------------------------------------------------------------------
/tests/test_detection.py:
--------------------------------------------------------------------------------
1 | """Test detection of extractors."""
2 |
3 | import os
4 | import sys
5 |
6 | import pytest
7 |
8 | from maco.collector import Collector
9 |
10 | INIT_MODULES = list(sys.modules.keys())
11 | TESTS_DIR = os.path.dirname(__file__)
12 |
13 | CAPE_EXTRACTORS = [
14 | "AgentTesla",
15 | "AsyncRAT",
16 | "AuroraStealer",
17 | "Azorult",
18 | "BitPaymer",
19 | "BlackDropper",
20 | "Blister",
21 | "BruteRatel",
22 | "BumbleBee",
23 | "Carbanak",
24 | "CobaltStrikeBeacon",
25 | "CobaltStrikeStager",
26 | "DCRat",
27 | "DarkGate",
28 | "DoppelPaymer",
29 | "DridexLoader",
30 | "Fareit",
31 | "Formbook",
32 | "GuLoader",
33 | "IcedID",
34 | "IcedIDLoader",
35 | "KoiLoader",
36 | "Latrodectus",
37 | "LokiBot",
38 | "Lumma",
39 | "NanoCore",
40 | "Nighthawk",
41 | "Njrat",
42 | "Oyster",
43 | "PhemedroneStealer",
44 | "PikaBot",
45 | "PlugX",
46 | "QakBot",
47 | "QuasarRAT",
48 | "Quickbind",
49 | "RedLine",
50 | "Remcos",
51 | "Rhadamanthys",
52 | "SmokeLoader",
53 | "Socks5Systemz",
54 | "SparkRAT",
55 | "SquirrelWaffle",
56 | "Stealc",
57 | "Strrat",
58 | "VenomRAT",
59 | "WarzoneRAT",
60 | "XWorm",
61 | "XenoRAT",
62 | "Zloader",
63 | ]
64 |
65 |
66 | @pytest.mark.parametrize(
67 | "repository_url, extractors, python_minor, branch",
68 | [
69 | ("https://github.com/jeFF0Falltrades/rat_king_parser", ["RKPMACO"], 10, None),
70 | ("https://github.com/CAPESandbox/community", CAPE_EXTRACTORS, 10, None),
71 | ],
72 | ids=("jeFF0Falltrades/rat_king_parser", "CAPESandbox/community"),
73 | )
74 | def test_public_projects(repository_url: str, extractors: list, python_minor: int, branch: str):
75 | """Test compatibility with public projects."""
76 | # Ensure that any changes we make doesn't break usage of public projects
77 | # which can affect downstream systems using like library (ie. Assemblyline)
78 | import sys
79 | from tempfile import TemporaryDirectory
80 |
81 | from git import Repo
82 |
83 | if sys.version_info >= (3, python_minor):
84 | with TemporaryDirectory() as working_dir:
85 | project_name = repository_url.rsplit("/", 1)[1]
86 | extractor_dir = os.path.join(working_dir, project_name)
87 | Repo.clone_from(repository_url, extractor_dir, depth=1, branch=branch)
88 |
89 | collector = Collector(extractor_dir, create_venv=True)
90 | assert set(extractors) == set(collector.extractors.keys())
91 |
92 | else:
93 | pytest.skip("Unsupported Python version")
94 |
95 |
96 | def test_module_confusion():
97 | """Test module confusion."""
98 | import shutil
99 | from tempfile import TemporaryDirectory
100 |
101 | import git
102 |
103 | # ensure that the git import is kept
104 | assert git.__name__
105 |
106 | # Directories that have the same name as the Python module, shouldn't cause confusion on loading the right module
107 | collector = Collector(os.path.join(__file__, "../extractors/bob"))
108 | assert collector.extractors["Bob"]
109 |
110 | collector = Collector(os.path.join(__file__, "../extractors"))
111 | assert collector.extractors["Bob"]
112 |
113 | # Existing packages shouldn't interfere with loading extractors from directories with similar names
114 | with TemporaryDirectory() as ex_copy:
115 | copy_ex_dir = f"{ex_copy}/git"
116 | shutil.copytree(f"{TESTS_DIR}/extractors", copy_ex_dir, dirs_exist_ok=True)
117 | collector = Collector(copy_ex_dir)
118 | assert collector.extractors["Bob"] and os.path.exists(collector.extractors["Bob"]["module_path"])
119 |
--------------------------------------------------------------------------------
/tests/benchmark.py:
--------------------------------------------------------------------------------
1 | """Benchmarking tests."""
2 |
3 | import os
4 | import timeit
5 |
6 | from demo_extractors.complex import complex
7 | from maco import base_test
8 |
9 | # instance of extractor for synthetic comparison to maco
10 | instance = complex.Complex()
11 |
12 |
13 | class LocalBaseTest(base_test.BaseTest):
14 | """Local base test."""
15 |
16 | name = "Complex"
17 | path = os.path.join(__file__, "../../demo_extractors")
18 | create_venv = False
19 |
20 | @classmethod
21 | def setUpClass(cls) -> None:
22 | """Setup class."""
23 | super().setUpClass()
24 | cls.input_file = cls.load_cart("data/trigger_complex.txt.cart")
25 | cls.input_file.seek(0)
26 |
27 |
28 | class TestComplexSynthetic(LocalBaseTest):
29 | """Test extractors work bypassing maco."""
30 |
31 | def test_extract(self):
32 | """Test extraction."""
33 | self.input_file.seek(0)
34 | raw = self.input_file.read()
35 | self.input_file.seek(0)
36 | # run yara rules against sample
37 | matches = instance.yara_compiled.match(data=raw)
38 | self.assertEqual(len(matches), 2)
39 | result = instance.run(self.input_file, [])
40 | self.assertEqual(result.family, "complex")
41 |
42 |
43 | class TestComplexNoVenv(LocalBaseTest):
44 | """Test extractors work without full venv isolation."""
45 |
46 | def test_extract(self):
47 | """Test extraction without a virtual environment."""
48 | self.input_file.seek(0)
49 | ret = self.extract(self.input_file)
50 | self.assertEqual(ret["family"], "complex")
51 | self.assertEqual(ret["version"], "5")
52 |
53 |
54 | class TestComplexVenv(LocalBaseTest):
55 | """Test extractors work when run with virtual environments."""
56 |
57 | create_venv = True
58 |
59 | def test_extract(self):
60 | """Test extraction with a virtual environment."""
61 | self.input_file.seek(0)
62 | ret = self.extract(self.input_file)
63 | self.assertEqual(ret["family"], "complex")
64 | self.assertEqual(ret["version"], "5")
65 |
66 |
67 | def make_synthetic():
68 | """Make synthetic test.
69 |
70 | Returns:
71 | SyntheticTest
72 | """
73 | TestComplexSynthetic.setUpClass()
74 | tc = TestComplexSynthetic()
75 | tc.setUp()
76 | return tc
77 |
78 |
79 | def make_no_venv():
80 | """Make no venv test.
81 |
82 | Returns:
83 | Test without virtual environment isolation
84 | """
85 | TestComplexNoVenv.setUpClass()
86 | tc = TestComplexNoVenv()
87 | tc.setUp()
88 | return tc
89 |
90 |
91 | def make_venv():
92 | """Make venv test.
93 |
94 | Returns:
95 | Test with virtual environment isolation
96 | """
97 | TestComplexVenv.setUpClass()
98 | tc = TestComplexVenv()
99 | tc.setUp()
100 | return tc
101 |
102 |
103 | if __name__ == "__main__":
104 | trials = 1000
105 | print(f"num trials: {trials}")
106 | print("results are number of seconds to execute total number of trials")
107 | print("synthetic comparison (directly import and execute extractor)")
108 | print(
109 | timeit.timeit(
110 | "tc.test_extract()",
111 | setup="from __main__ import make_synthetic; tc=make_synthetic()",
112 | number=trials,
113 | )
114 | )
115 | print("maco no venv isolation")
116 | print(
117 | timeit.timeit(
118 | "tc.test_extract()",
119 | setup="from __main__ import make_no_venv; tc=make_no_venv()",
120 | number=trials,
121 | )
122 | )
123 | print("maco venv isolation")
124 | print(
125 | timeit.timeit(
126 | "tc.test_extract()",
127 | setup="from __main__ import make_venv; tc=make_venv()",
128 | number=trials,
129 | )
130 | )
131 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 | ### VisualStudioCode ###
163 | .vscode/*
164 | !.vscode/settings.json
165 | !.vscode/tasks.json
166 | !.vscode/launch.json
167 | !.vscode/extensions.json
168 | !.vscode/*.code-snippets
169 |
170 | # Local History for Visual Studio Code
171 | .history/
172 |
173 | # Built Visual Studio Code Extensions
174 | *.vsix
175 |
176 | ### VisualStudioCode Patch ###
177 | # Ignore all local history of files
178 | .history
179 | .ionide
180 |
--------------------------------------------------------------------------------
/maco/yara.py:
--------------------------------------------------------------------------------
1 | """yara-python facade that uses yara-x."""
2 |
3 | import re
4 | from collections import namedtuple
5 | from itertools import cycle
6 | from typing import Dict, List, Union
7 |
8 | import yara_x
9 |
10 | from maco.exceptions import SyntaxError
11 |
12 | RULE_ID_RE = re.compile("(\w+)? ?rule (\w+)")
13 |
14 |
15 | # Create interfaces that resembles yara-python (but is running yara-x under the hood)
16 | class StringMatchInstance:
17 | """Instance of a string match."""
18 |
19 | def __init__(self, match: yara_x.Match, file_content: bytes):
20 | """Initializes StringMatchInstance."""
21 | self.matched_data = file_content[match.offset : match.offset + match.length]
22 | self.matched_length = match.length
23 | self.offset = match.offset
24 | self.xor_key = match.xor_key
25 |
26 | def plaintext(self) -> bytes:
27 | """Plaintext of the matched data.
28 |
29 | Returns:
30 | (bytes): Plaintext of the matched cipher text
31 | """
32 | if not self.xor_key:
33 | # No need to XOR the matched data
34 | return self.matched_data
35 | else:
36 | return bytes(c ^ k for c, k in zip(self.matched_data, cycle(self.xor_key)))
37 |
38 |
39 | class StringMatch:
40 | """String match."""
41 |
42 | def __init__(self, pattern: yara_x.Pattern, file_content: bytes):
43 | """Initializes StringMatch."""
44 | self.identifier = pattern.identifier
45 | self.instances = [StringMatchInstance(match, file_content) for match in pattern.matches]
46 | self._is_xor = any([match.xor_key for match in pattern.matches])
47 |
48 | def is_xor(self):
49 | """Checks if string match is xor'd.
50 |
51 | Returns:
52 | (bool): True if match is xor'd
53 | """
54 | return self._is_xor
55 |
56 |
57 | class Match:
58 | """Match."""
59 |
60 | def __init__(self, rule: yara_x.Rule, file_content: bytes):
61 | """Initializes Match."""
62 | self.rule = rule.identifier
63 | self.namespace = rule.namespace
64 | self.tags = list(rule.tags) or []
65 | self.meta = dict()
66 | # Ensure metadata doesn't get overwritten
67 | for k, v in rule.metadata:
68 | self.meta.setdefault(k, []).append(v)
69 | self.strings = [StringMatch(pattern, file_content) for pattern in rule.patterns]
70 |
71 |
72 | class Rules:
73 | """Rules."""
74 |
75 | def __init__(self, source: str = None, sources: Dict[str, str] = None):
76 | """Initializes Rules.
77 |
78 | Raises:
79 | SyntaxError: Raised when there's a syntax error in the YARA rule.
80 | """
81 | Rule = namedtuple("Rule", "identifier namespace is_global")
82 | if source:
83 | sources = {"default": source}
84 |
85 | try:
86 | self._rules = []
87 | compiler = yara_x.Compiler(relaxed_re_syntax=True)
88 | for namespace, source in sources.items():
89 | compiler.new_namespace(namespace)
90 | for rule_type, id in RULE_ID_RE.findall(source):
91 | is_global = True if rule_type == "global" else False
92 | self._rules.append(Rule(namespace=namespace, identifier=id, is_global=is_global))
93 | compiler.add_source(source)
94 | self.scanner = yara_x.Scanner(compiler.build())
95 | except yara_x.CompileError as e:
96 | raise SyntaxError(e)
97 |
98 | def __iter__(self):
99 | """Iterate over rules.
100 |
101 | Yields:
102 | YARA rules
103 | """
104 | for rule in self._rules:
105 | yield rule
106 |
107 | def match(self, filepath: str = None, data: Union[bytes, bytearray] = None) -> List[Match]:
108 | """Performs a scan to check for YARA rules matches based on the file, either given by path or buffer.
109 |
110 | Returns:
111 | (List[Match]): A list of YARA matches.
112 | """
113 | if filepath:
114 | with open(filepath, "rb") as fp:
115 | data = fp.read()
116 |
117 | if isinstance(data, bytearray):
118 | data = bytes(data)
119 |
120 | return [Match(m, data) for m in self.scanner.scan(data).matching_rules]
121 |
122 |
123 | def compile(source: str = None, sources: Dict[str, str] = None) -> Rules:
124 | """Compiles YARA rules from source or from sources.
125 |
126 | Returns:
127 | (Rules): a Rules object
128 | """
129 | return Rules(source, sources)
130 |
--------------------------------------------------------------------------------
/maco/collector.py:
--------------------------------------------------------------------------------
1 | """Convenience functions for discovering your extractors."""
2 |
3 | import inspect
4 | import logging
5 | import logging.handlers
6 | import os
7 | import sys
8 | from tempfile import NamedTemporaryFile
9 | from types import ModuleType
10 | from typing import Any, BinaryIO, Dict, List, TypedDict, Union
11 |
12 | from multiprocess import Manager, Process, Queue
13 | from pydantic import BaseModel
14 |
15 | from maco import extractor, model, utils, yara
16 | from maco.exceptions import AnalysisAbortedException, ExtractorLoadError
17 |
18 | logger = logging.getLogger("maco.lib.helpers")
19 |
20 |
21 | def _verify_response(resp: Union[BaseModel, dict]) -> Dict:
22 | """Enforce types and verify properties, and remove defaults.
23 |
24 | Args:
25 | resp (Union[BaseModel, dict])): results from extractor
26 |
27 | Returns:
28 | (Dict): results from extractor after verification
29 | """
30 | if not resp:
31 | return None
32 | # check the response is valid for its own model
33 | # this is useful if a restriction on the 'other' dictionary is needed
34 | resp_model = type(resp)
35 | if resp_model != model.ExtractorModel and hasattr(resp_model, "model_validate"):
36 | resp = resp_model.model_validate(resp)
37 | # check the response is valid according to the ExtractorModel
38 | resp = model.ExtractorModel.model_validate(resp)
39 | # coerce sets to correct types
40 | # otherwise we end up with sets where we expect lists
41 | resp = model.ExtractorModel(**resp.model_dump())
42 | # dump model to dict
43 | return resp.model_dump(exclude_defaults=True)
44 |
45 |
46 | class ExtractorMetadata(TypedDict):
47 | """Extractor-supplied metadata."""
48 |
49 | author: str
50 | family: str
51 | last_modified: str
52 | sharing: str
53 | description: str
54 |
55 |
56 | class ExtractorRegistration(TypedDict):
57 | """Registration collected by the collector for a single extractor."""
58 |
59 | venv: str
60 | module_path: str
61 | module_name: str
62 | extractor_class: str
63 | metadata: ExtractorMetadata
64 |
65 |
66 | class Collector:
67 | """Discover and load extractors from file system."""
68 |
69 | def __init__(
70 | self,
71 | path_extractors: str,
72 | include: List[str] = None,
73 | exclude: List[str] = None,
74 | create_venv: bool = False,
75 | skip_install: bool = False,
76 | ):
77 | """Discover and load extractors from file system.
78 |
79 | Raises:
80 | ExtractorLoadError: when no extractors are found
81 | """
82 | # maco requires the extractor to be imported directly, so ensure they are available on the path
83 | full_path_extractors = os.path.abspath(path_extractors)
84 | full_path_above_extractors = os.path.dirname(full_path_extractors)
85 | # Modify the PATH so we can recognize this new package on import
86 | if full_path_extractors not in sys.path:
87 | sys.path.insert(1, full_path_extractors)
88 | if full_path_above_extractors not in sys.path:
89 | sys.path.insert(1, full_path_above_extractors)
90 |
91 | path_extractors = os.path.realpath(path_extractors)
92 | self.path: str = path_extractors
93 | self.extractors: Dict[str, ExtractorRegistration] = {}
94 |
95 | with Manager() as manager:
96 | extractors = manager.dict()
97 | namespaced_rules = manager.dict()
98 |
99 | def extractor_module_callback(module: ModuleType, venv: str):
100 | members = inspect.getmembers(module, predicate=utils.maco_extractor_validation)
101 | for member in members:
102 | name, member = member
103 | if exclude and name in exclude:
104 | # Module is part of the exclusion list, skip
105 | logger.debug(f"exclude excluded '{name}'")
106 | return
107 |
108 | if include and name not in include:
109 | # Module wasn't part of the inclusion list, skip
110 | logger.debug(f"include excluded '{name}'")
111 | return
112 |
113 | # initialise and register
114 | logger.debug(f"register '{name}'")
115 | extractors[name] = dict(
116 | venv=venv,
117 | module_path=module.__file__,
118 | module_name=member.__module__,
119 | extractor_class=member.__name__,
120 | metadata={
121 | "family": member.family,
122 | "author": member.author,
123 | "last_modified": member.last_modified,
124 | "sharing": member.sharing,
125 | "description": member.__doc__,
126 | },
127 | )
128 | namespaced_rules[name] = member.yara_rule or extractor.DEFAULT_YARA_RULE.format(name=name)
129 |
130 | # multiprocess logging is awkward - set up a queue to ensure we can log
131 | logging_queue = Queue()
132 | queue_handler = logging.handlers.QueueListener(logging_queue, *logging.getLogger().handlers)
133 | queue_handler.start()
134 |
135 | # Find the extractors within the given directory
136 | # Execute within a child process to ensure main process interpreter is kept clean
137 | p = Process(
138 | target=utils.proxy_logging,
139 | args=(
140 | logging_queue,
141 | utils.import_extractors,
142 | extractor_module_callback,
143 | ),
144 | kwargs=dict(
145 | root_directory=path_extractors,
146 | scanner=yara.compile(source=utils.MACO_YARA_RULE),
147 | create_venv=create_venv and os.path.isdir(path_extractors),
148 | skip_install=skip_install,
149 | ),
150 | )
151 | p.start()
152 | p.join()
153 |
154 | # stop multiprocess logging
155 | queue_handler.stop()
156 | logging_queue.close()
157 |
158 | self.extractors = dict(extractors)
159 | if not self.extractors:
160 | raise ExtractorLoadError("no extractors were loaded")
161 | logger.debug(f"found extractors {list(self.extractors.keys())}\n")
162 |
163 | # compile yara rules gathered from extractors
164 | self.rules = yara.compile(sources=dict(namespaced_rules))
165 |
166 | def match(self, stream: BinaryIO) -> Dict[str, List[yara.Match]]:
167 | """Return extractors that should run based on yara rules."""
168 | # execute yara rules on file to find extractors we should run
169 | # yara can't run on a stream so we give it a bytestring
170 | matches = self.rules.match(data=stream.read())
171 | stream.seek(0)
172 | if not matches:
173 | return
174 | # get all rules that hit for each extractor
175 | runs = {}
176 | for match in matches:
177 | runs.setdefault(match.namespace, []).append(match)
178 |
179 | return runs
180 |
181 | def extract(
182 | self,
183 | stream: BinaryIO,
184 | extractor_name: str,
185 | ) -> Dict[str, Any]:
186 | """Run extractor with stream and verify output matches the model.
187 |
188 | Args:
189 | stream (BinaryIO): Binary stream to analyze
190 | extractor_name (str): Name of extractor to analyze stream
191 |
192 | Returns:
193 | (Dict[str, Any]): Results from extractor
194 | """
195 | extractor = self.extractors[extractor_name]
196 | try:
197 | # Run extractor on a copy of the sample
198 | with NamedTemporaryFile() as sample_path:
199 | sample_path.write(stream.read())
200 | sample_path.flush()
201 | # enforce types and verify properties, and remove defaults
202 | return _verify_response(
203 | utils.run_extractor(
204 | sample_path.name,
205 | module_name=extractor["module_name"],
206 | extractor_class=extractor["extractor_class"],
207 | module_path=extractor["module_path"],
208 | venv=extractor["venv"],
209 | )
210 | )
211 | except AnalysisAbortedException:
212 | # Extractor voluntarily aborted analysis of sample
213 | return
214 | except Exception:
215 | # caller can deal with the exception
216 | raise
217 | finally:
218 | # make sure to reset where we are in the file
219 | # otherwise follow on extractors are going to read 0 bytes
220 | stream.seek(0)
221 |
--------------------------------------------------------------------------------
/maco/cli.py:
--------------------------------------------------------------------------------
1 | """CLI example of how extractors can be executed."""
2 |
3 | import argparse
4 | import base64
5 | import binascii
6 | import hashlib
7 | import io
8 | import json
9 | import logging
10 | import os
11 | import sys
12 | from importlib.metadata import version
13 | from typing import BinaryIO, List, Tuple
14 |
15 | import cart
16 |
17 | from maco import collector
18 |
19 | logger = logging.getLogger("maco.lib.cli")
20 |
21 |
22 | def process_file(
23 | collected: collector.Collector,
24 | path_file: str,
25 | stream: BinaryIO,
26 | *,
27 | pretty: bool,
28 | force: bool,
29 | include_base64: bool,
30 | ):
31 | """Process a filestream with the extractors and rules.
32 |
33 | Args:
34 | collected (collector.Collector): a Collector instance
35 | path_file (str): path to sample to be analyzed
36 | stream (BinaryIO): binary stream to be analyzed
37 | pretty (bool): Pretty print the JSON output
38 | force (bool): Run all extractors regardless of YARA rule match
39 | include_base64 (bool): include base64'd data in output
40 |
41 | Returns:
42 | (dict): The output from the extractors analyzing the sample
43 |
44 | """
45 | unneutered = io.BytesIO()
46 | try:
47 | cart.unpack_stream(stream, unneutered)
48 | except Exception:
49 | # use original stream if anything goes wrong here
50 | # i.e. invalid/malformed cart
51 | pass
52 | else:
53 | # use unneutered stream
54 | stream = unneutered
55 | # unpack will read some bytes either way so reset position
56 | stream.seek(0)
57 |
58 | # find extractors that should run based on yara rules
59 | if not force:
60 | runs = collected.match(stream)
61 | else:
62 | # execute all extractors with no yara information
63 | # note - extractors may rely on a yara hit so this may cause errors
64 | runs = {x: [] for x in collected.extractors.keys()}
65 | if not runs:
66 | return
67 |
68 | # run extractor for the set of hits
69 | logger.info(f"path: {path_file}")
70 | ret = {}
71 | for extractor_name, hits in runs.items():
72 | # run and store results for extractor
73 | logger.info(f"run {extractor_name} extractor from rules {[x.rule for x in hits]}")
74 | try:
75 | resp = collected.extract(stream, extractor_name)
76 | except Exception as e:
77 | logger.exception(f"extractor error with {path_file} ({e})")
78 | resp = None
79 | # encode binary data so we can print as json
80 | if resp:
81 | for row in resp.get("binaries", []):
82 | row["sha256"] = hashlib.sha256(row["data"]).hexdigest()
83 | # number of bytes in the binary
84 | row["size"] = len(row["data"])
85 | # small sample of first part of binary
86 | row["hex_sample"] = binascii.hexlify(row["data"][:32]).decode("utf8").upper()
87 | if include_base64:
88 | # this can be large
89 | row["base64"] = base64.b64encode(row["data"]).decode("utf8")
90 | # do not print raw bytes to console
91 | row.pop("data")
92 | ret[extractor_name] = resp
93 | logger.info(json.dumps(resp, indent=2 if pretty else None))
94 | logger.info("")
95 |
96 | return ret
97 |
98 |
99 | def process_filesystem(
100 | path_extractors: str,
101 | path_samples: str,
102 | include: List[str],
103 | exclude: List[str],
104 | *,
105 | pretty: bool,
106 | force: bool,
107 | include_base64: bool,
108 | create_venv: bool = False,
109 | skip_install: bool = False,
110 | ) -> Tuple[int, int, int]:
111 | """Process filesystem with extractors and print results of extraction.
112 |
113 | Returns:
114 | (Tuple[int, int, int]): Total number of analysed files, yara hits and successful maco extractions.
115 | """
116 | if force:
117 | logger.warning("force execute will cause errors if an extractor requires a yara rule hit during execution")
118 | collected = collector.Collector(
119 | path_extractors, include=include, exclude=exclude, create_venv=create_venv, skip_install=skip_install
120 | )
121 |
122 | logger.info(f"extractors loaded: {[x for x in collected.extractors.keys()]}\n")
123 | for _, extractor in collected.extractors.items():
124 | extractor_meta = extractor["metadata"]
125 | logger.info(
126 | f"{extractor_meta['family']} by {extractor_meta['author']}"
127 | f" {extractor_meta['last_modified']} {extractor_meta['sharing']}"
128 | f"\n{extractor_meta['description']}\n"
129 | )
130 |
131 | num_analysed = 0
132 | num_hits = 0
133 | num_extracted = 0
134 | if os.path.isfile(path_samples):
135 | # analyse a single file
136 | walker = [("", None, [path_samples])]
137 | elif os.path.isdir(path_samples):
138 | # load files from directory tree
139 | walker = os.walk(path_samples)
140 | else:
141 | logger.error(f"not file or folder: {path_samples}")
142 | exit(2)
143 | try:
144 | base_directory = os.path.abspath(path_samples)
145 | for path, _, files in walker:
146 | for file in files:
147 | num_analysed += 1
148 | path_file = os.path.abspath(os.path.join(path, file))
149 | if not path_file.startswith(base_directory):
150 | logger.error(f"Attempted path traversal detected: {path_file}")
151 | continue
152 |
153 | try:
154 | with open(path_file, "rb") as stream:
155 | resp = process_file(
156 | collected,
157 | path_file,
158 | stream,
159 | pretty=pretty,
160 | force=force,
161 | include_base64=include_base64,
162 | )
163 | if resp:
164 | num_hits += 1
165 | if any(x for x in resp.values()):
166 | num_extracted += 1
167 | except Exception as e:
168 | logger.exception(f"file error with {path_file} ({e})")
169 | continue
170 | except:
171 | raise
172 | finally:
173 | logger.info("")
174 | logger.info(f"{num_analysed} analysed, {num_hits} hits, {num_extracted} extracted")
175 | return num_analysed, num_hits, num_extracted
176 |
177 |
178 | def main():
179 | """Main block for CLI."""
180 | parser = argparse.ArgumentParser(description="Run extractors over samples.")
181 | parser.add_argument("extractors", type=str, help="path to extractors")
182 | parser.add_argument("samples", type=str, help="path to samples")
183 | parser.add_argument(
184 | "-v",
185 | "--verbose",
186 | action="count",
187 | default=0,
188 | help="print debug logging. -v extractor info, -vv extractor debug, -vvv cli debug",
189 | )
190 | parser.add_argument("--pretty", action="store_true", help="pretty print json output")
191 | parser.add_argument(
192 | "--base64",
193 | action="store_true",
194 | help="Include base64 encoded binary data in output "
195 | "(can be large, consider printing to file rather than console)",
196 | )
197 | parser.add_argument("--logfile", type=str, help="file to log output")
198 | parser.add_argument("--include", type=str, help="comma separated extractors to run")
199 | parser.add_argument("--exclude", type=str, help="comma separated extractors to not run")
200 | parser.add_argument(
201 | "-f",
202 | "--force",
203 | action="store_true",
204 | help="ignore yara rules and execute all extractors",
205 | )
206 | parser.add_argument(
207 | "--create_venv",
208 | action="store_true",
209 | help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). "
210 | "This runs much slower than the alternative but may be necessary "
211 | "when there are many extractors with conflicting dependencies.",
212 | )
213 | parser.add_argument(
214 | "--force_install",
215 | action="store_true",
216 | help="Force installation of Python dependencies for extractors (in both host and virtual environments).",
217 | )
218 | parser.add_argument(
219 | "--version",
220 | action="version",
221 | version=f"version: {version('maco')}",
222 | help="Show version of MACO",
223 | )
224 |
225 | args = parser.parse_args()
226 | inc = args.include.split(",") if args.include else []
227 | exc = args.exclude.split(",") if args.exclude else []
228 |
229 | # set up logging for lib, only show debug with 3+ verbose
230 | logger_lib = logging.getLogger("maco.lib")
231 | logger_lib.setLevel(logging.DEBUG if args.verbose > 2 else logging.INFO)
232 | ch = logging.StreamHandler(sys.stdout)
233 | ch.setLevel(logging.DEBUG)
234 | logger_lib.addHandler(ch)
235 |
236 | # set up logging for extractor
237 | logger_ex = logging.getLogger("maco.extractor")
238 | if args.verbose == 0:
239 | logger_ex.setLevel(logging.WARNING)
240 | elif args.verbose == 1:
241 | logger_ex.setLevel(logging.INFO)
242 | else:
243 | logger_ex.setLevel(logging.DEBUG)
244 | ch = logging.StreamHandler(sys.stdout)
245 | ch.setLevel(logging.DEBUG)
246 | formatter = logging.Formatter(
247 | fmt="%(asctime)s, [%(levelname)s] %(module)s.%(funcName)s: %(message)s", datefmt="%Y-%m-%d (%H:%M:%S)"
248 | )
249 | ch.setFormatter(formatter)
250 | logger_ex.addHandler(ch)
251 |
252 | # log everything to file
253 | if args.logfile:
254 | logger = logging.getLogger("maco")
255 | logger_lib.setLevel(logging.DEBUG)
256 | fh = logging.FileHandler(args.logfile)
257 | fh.setLevel(logging.DEBUG)
258 | fh.setFormatter(formatter)
259 | logger.addHandler(fh)
260 |
261 | process_filesystem(
262 | args.extractors,
263 | args.samples,
264 | inc,
265 | exc,
266 | pretty=args.pretty,
267 | force=args.force,
268 | include_base64=args.base64,
269 | create_venv=args.create_venv,
270 | skip_install=not args.force_install,
271 | )
272 |
273 |
274 | if __name__ == "__main__":
275 | main()
276 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Maco - Malware config extractor framework
2 |
3 | ## Maco is a framework for malware config extractors.
4 |
5 | It aims to solve two problems:
6 |
7 | - Define a standardize ontology (or model) for extractor output. This greatly helps for databasing extracted values.
8 | - Provide a standard way of identifying which parsers to run and how to execute them.
9 |
10 | ## Maco components
11 |
12 | - `model.py`
13 | - A data model for the common output of an extractor
14 | - `extractor.py`
15 | - Base class for extractors to implement
16 | - `collector.py`
17 | - Utilities for loading and running extractors
18 | - `cli.py`
19 | - A CLI tool `maco` to assist with running your extractors locally
20 | - `base_test.py`
21 | - Assist with writing unit tests for your extractors
22 |
23 | **Note: If you're interested in using only the model in your project, you can `pip install maco-model` which is a smaller package containing only the model definition**
24 |
25 | ## Project Integrations 🛠️
26 |
27 | This framework is actively being used by:
28 |
29 | | Project | Description | License |
30 | | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
31 | |
| A malware analysis platform that uses the MACO model to export malware configuration extractions into a parseable, machine-friendly format | [](https://github.com/CybercentreCanada/assemblyline/blob/main/LICENSE.md) |
32 | | [configextractor-py](https://github.com/CybercentreCanada/configextractor-py) | A tool designed to run extractors from multiple frameworks and uses the MACO model for output harmonization | [](https://github.com/CybercentreCanada/configextractor-py/blob/main/LICENSE.md) |
33 | |
| A robust, multiprocessing-capable, multi-family RAT config parser/extractor that is compatible with MACO | [](https://github.com/jeFF0Falltrades/rat_king_parser/blob/master/LICENSE) |
34 | |
| A parser/extractor repository containing MACO extractors that's authored by the CAPE community but is integrated in [CAPE](https://github.com/kevoreilly/CAPEv2) deployments.
**Note: These MACO extractors wrap and parse the original CAPE extractors.** | [](https://github.com/kevoreilly/CAPEv2/blob/master/LICENSE) |
35 |
36 | ## Model Example
37 |
38 | See [the model definition](https://github.com/CybercentreCanada/Maco/blob/0f447a66de5e5ce8770ef3fe2325aec002842e63/maco/model.py#L127) for all the supported fields.
39 | You can use the model independently of the rest of the framework.
40 | This is still useful for compatibility between systems!
41 |
42 | ```python
43 | from maco import model
44 | # 'family' is the only required property on the model
45 | output = model.ExtractorModel(family="wanabee")
46 | output.version = "2019" # variant first found in 2019
47 | output.category.extend([model.CategoryEnum.cryptominer, model.CategoryEnum.clickfraud])
48 | output.http.append(model.ExtractorModel.Http(protocol="https",
49 | uri="https://bad-domain.com/c2_payload",
50 | usage="c2"))
51 | output.tcp.append(model.ExtractorModel.Connection(server_ip="127.0.0.1",
52 | usage="ransom"))
53 | output.campaign_id.append("859186-3224-9284")
54 | output.inject_exe.append("explorer.exe")
55 | output.binaries.append(
56 | output.Binary(
57 | data=b"sam I am",
58 | datatype=output.Binary.TypeEnum.config,
59 | encryption=output.Binary.Encryption(
60 | algorithm="rot26",
61 | mode="block",
62 | ),
63 | )
64 | )
65 | # data about the malware that doesn't fit the model
66 | output.other["author_lunch"] = "green eggs and ham"
67 | output.other["author_lunch_time"] = "3pm"
68 | print(output.model_dump(exclude_defaults=True))
69 |
70 | # Generated model
71 | {
72 | 'family': 'wanabee',
73 | 'version': '2019',
74 | 'category': ['cryptominer', 'clickfraud'],
75 | 'campaign_id': ['859186-3224-9284'],
76 | 'inject_exe': ['explorer.exe'],
77 | 'other': {'author_lunch': 'green eggs and ham', 'author_lunch_time': '3pm'},
78 | 'http': [{'uri': 'https://bad-domain.com/c2_payload', 'usage': 'c2', 'protocol': 'https'}],
79 | 'tcp': [{'server_ip': '127.0.0.1', 'usage': 'ransom'}],
80 | 'binaries': [{
81 | 'datatype': 'config', 'data': b'sam I am',
82 | 'encryption': {'algorithm': 'rot26', 'mode': 'block'}
83 | }]
84 | }
85 | ```
86 |
87 | And you can create model instances from dictionaries:
88 |
89 | ```python
90 | from maco import model
91 | output = {
92 | "family": "wanabee2",
93 | "version": "2022",
94 | "ssh": [
95 | {
96 | "username": "wanna",
97 | "password": "bee2",
98 | "hostname": "10.1.10.100",
99 | }
100 | ],
101 | }
102 | print(model.ExtractorModel(**output))
103 |
104 | # Generated model
105 | family='wanabee2' version='2022' category=[] attack=[] capability_enabled=[]
106 | capability_disabled=[] campaign_id=[] identifier=[] decoded_strings=[]
107 | password=[] mutex=[] pipe=[] sleep_delay=None inject_exe=[] other={}
108 | binaries=[] ftp=[] smtp=[] http=[]
109 | ssh=[SSH(username='wanna', password='bee2', hostname='10.1.10.100', port=None, usage=None)]
110 | proxy=[] dns=[] tcp=[] udp=[] encryption=[] service=[] cryptocurrency=[]
111 | paths=[] registry=[]
112 | ```
113 |
114 | ## Extractor Example
115 |
116 | The following extractor will trigger on any file with more than 50 ELF sections,
117 | and set some properties in the model.
118 |
119 | Your extractors will do a better job of finding useful information than this one!
120 |
121 | ```python
122 | class Elfy(extractor.Extractor):
123 | """Check basic elf property."""
124 |
125 | family = "elfy"
126 | author = "blue"
127 | last_modified = "2022-06-14"
128 | yara_rule = """
129 | import "elf"
130 |
131 | rule Elfy
132 | {
133 | condition:
134 | elf.number_of_sections > 50
135 | }
136 | """
137 |
138 | def run(
139 | self, stream: BytesIO, matches: List[yara.Match]
140 | ) -> Optional[model.ExtractorModel]:
141 | # return config model formatted results
142 | ret = model.ExtractorModel(family=self.family)
143 | # the list for campaign_id already exists and is empty, so we just add an item
144 | ret.campaign_id.append(str(len(stream.read())))
145 | return ret
146 | ```
147 |
148 | ## Writing Extractors
149 |
150 | There are several examples that use Maco in the '`demo_extractors`' folder.
151 |
152 | Some things to keep in mind:
153 |
154 | - The Yara rule names must be prefixed with the extractor class name.
155 | - e.g. Class 'MyScript' has Yara rules named 'MyScriptDetect1' and 'MyScriptDetect2', not 'Detect1'
156 | - You can load other scripts contained within the same folder via a Python relative import
157 | - See `complex.py` for details
158 | - You can standardise your usage of the '`other`' dict
159 | - This is optional, see `limit_other.py` for details
160 | - Consider instead making a PR with the properties you are frequently using
161 |
162 | # Requirements
163 |
164 | Python 3.8+.
165 |
166 | Install this package with `pip install maco`.
167 |
168 | All required Python packages are in the `requirements.txt`.
169 |
170 | # CLI Usage
171 |
172 | ```bash
173 | > maco --help
174 | usage: maco [-h] [-v] [--pretty] [--base64] [--logfile LOGFILE] [--include INCLUDE] [--exclude EXCLUDE] [-f] [--create_venv] extractors samples
175 |
176 | Run extractors over samples.
177 |
178 | positional arguments:
179 | extractors path to extractors
180 | samples path to samples
181 |
182 | optional arguments:
183 | -h, --help show this help message and exit
184 | -v, --verbose print debug logging. -v extractor info, -vv extractor debug, -vvv cli debug
185 | --pretty pretty print json output
186 | --base64 Include base64 encoded binary data in output (can be large, consider printing to file rather than console)
187 | --logfile LOGFILE file to log output
188 | --include INCLUDE comma separated extractors to run
189 | --exclude EXCLUDE comma separated extractors to not run
190 | -f, --force ignore yara rules and execute all extractors
191 | --create_venv Creates venvs for every requirements.txt found (only applies when extractor path is a directory)
192 | ```
193 |
194 | ## CLI output example
195 |
196 | The CLI is helpful for using your extractors in a standalone system, such as in a reverse engineering environment.
197 |
198 | ```bash
199 | > maco demo_extractors/ /usr/lib --include Complex
200 | extractors loaded: ['Complex']
201 |
202 | complex by blue 2022-06-14 TLP:WHITE
203 | This script has multiple yara rules and coverage of the data model.
204 |
205 | path: /usr/lib/udev/hwdb.bin
206 | run Complex extractor from rules ['ComplexAlt']
207 | {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
208 | "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
209 | "encryption": {"algorithm": "something"}}],
210 | "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/9956330", "usage": "c2"}],
211 | "encryption": [{"algorithm": "sha256"}]}
212 |
213 | path: /usr/lib/udev/hwdb.d/20-OUI.hwdb
214 | run Complex extractor from rules ['ComplexAlt']
215 | {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
216 | "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
217 | "encryption": {"algorithm": "something"}}],
218 | "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/1986908", "usage": "c2"}],
219 | "encryption": [{"algorithm": "sha256"}]}
220 |
221 | path: /usr/lib/udev/hwdb.d/20-usb-vendor-model.hwdb
222 | run Complex extractor from rules ['ComplexAlt']
223 | {"family": "complex", "version": "5", "decoded_strings": ["Paradise"],
224 | "binaries": [{"datatype": "payload", "size": 9, "hex_sample": "736F6D652064617461", "sha256": "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee",
225 | "encryption": {"algorithm": "something"}}],
226 | "http": [{"protocol": "https", "hostname": "blarg5.com", "path": "/malz/1257481", "usage": "c2"}],
227 | "encryption": [{"algorithm": "sha256"}]}
228 |
229 |
230 | 15884 analysed, 3 hits, 3 extracted
231 | ```
232 |
233 | The demo extractors are designed to trigger when run over the '`demo_extractors`' folder.
234 |
235 | e.g. `maco demo_extractors demo_extractors`
236 |
237 | # Contributions
238 |
239 | Please use ruff to format and lint PRs. This may be the cause of PR test failures.
240 |
241 | Ruff will attempt to fix most issues, but some may require manual resolution.
242 |
243 | ```
244 | pip install ruff
245 | ruff format
246 | ruff check --fix
247 | ```
248 |
--------------------------------------------------------------------------------
/tests/test_model.py:
--------------------------------------------------------------------------------
1 | """Model validation testing."""
2 |
3 | import unittest
4 | from typing import Dict
5 |
6 | from pydantic import ValidationError
7 |
8 | from maco import collector, model
9 |
10 |
11 | class TestModelObject(unittest.TestCase):
12 | """Test the model object."""
13 |
14 | maxDiff = None
15 |
16 | def test_model_invalid(self):
17 | """Test invalid model."""
18 | # family not supplied
19 | self.assertRaises(ValidationError, model.ExtractorModel)
20 |
21 | ret = model.ExtractorModel(family="octopus")
22 | # invalid property
23 | self.assertRaises(ValueError, setattr, *(ret, "invalid", 12345))
24 | # invalid type
25 | ret.sleep_delay = "test"
26 | self.assertRaises(ValidationError, collector._verify_response, ret)
27 |
28 | def test_model_object_1(self):
29 | """Test the model object with basic requirements."""
30 | # object example
31 | tmp = model.ExtractorModel(family="scuba")
32 | tmp.campaign_id.append("5467")
33 | self.verify(tmp, {"family": "scuba", "campaign_id": ["5467"]})
34 |
35 | def test_model_object_2(self):
36 | """Test the model object with more data."""
37 | em = model.ExtractorModel
38 | tmp = model.ExtractorModel(
39 | family="scuba",
40 | version="lotso_stuff",
41 | category=[],
42 | attack=[],
43 | capability_enabled=[],
44 | capability_disabled=[],
45 | campaign_id=["32"],
46 | identifier=["uxuduxuduxuudux"],
47 | decoded_strings=["there", "are", "some", "strings"],
48 | password=["hunter2"],
49 | mutex=["YEAH"],
50 | pipe=["xiod"],
51 | sleep_delay=45000,
52 | sleep_delay_jitter=2500,
53 | inject_exe=["Teams.exe"],
54 | other={"misc_data": {"nested": 5}},
55 | binaries=[
56 | em.Binary(
57 | datatype=None,
58 | data=b"\x10\x20\x30\x40",
59 | other={
60 | "datatype": ["payload"],
61 | "extension": [".invalid"],
62 | "label": ["xor 0x04 at 0x2130-0x2134"],
63 | "some_junk": [1, 2, 3, 4, 5, 6],
64 | },
65 | encryption=em.Binary.Encryption(
66 | algorithm="alxor",
67 | public_key=None,
68 | key=None,
69 | provider=None,
70 | mode=None,
71 | iv=None,
72 | seed=None,
73 | nonce=None,
74 | constants=[],
75 | usage="binary",
76 | ),
77 | ),
78 | em.Binary(
79 | datatype=None,
80 | data=b"\x50\x60\x70\x80",
81 | other={"datatype": ["payload"]},
82 | encryption=[
83 | em.Binary.Encryption(
84 | algorithm="alxor",
85 | public_key=None,
86 | key=None,
87 | provider=None,
88 | mode=None,
89 | iv=None,
90 | seed=None,
91 | nonce=None,
92 | constants=[],
93 | usage="binary",
94 | ),
95 | em.Binary.Encryption(
96 | algorithm="RC4",
97 | public_key=None,
98 | key=None,
99 | provider=None,
100 | mode=None,
101 | iv=None,
102 | seed=None,
103 | nonce=None,
104 | constants=[],
105 | usage="binary",
106 | ),
107 | ],
108 | ),
109 | ],
110 | ftp=[
111 | em.FTP(
112 | username=None,
113 | password=None,
114 | hostname="somewhere",
115 | port=None,
116 | path=None,
117 | usage="c2",
118 | )
119 | ],
120 | smtp=[
121 | em.SMTP(
122 | username=None,
123 | password=None,
124 | hostname="here.com",
125 | port=None,
126 | mail_to=[],
127 | mail_from=None,
128 | subject=None,
129 | usage="upload",
130 | )
131 | ],
132 | http=[
133 | em.Http(
134 | uri=None,
135 | protocol="https",
136 | username=None,
137 | password=None,
138 | hostname="blarg.com",
139 | port=None,
140 | path="/malz",
141 | query=None,
142 | fragment=None,
143 | user_agent=None,
144 | method=None,
145 | headers=None,
146 | max_size=None,
147 | usage="c2",
148 | )
149 | ],
150 | ssh=[
151 | em.SSH(
152 | username=None,
153 | password=None,
154 | hostname="bad.malware",
155 | port=None,
156 | usage="download",
157 | )
158 | ],
159 | proxy=[
160 | em.Proxy(
161 | protocol=None,
162 | username=None,
163 | password=None,
164 | hostname="192.168.0.80",
165 | port=None,
166 | usage="tunnel",
167 | )
168 | ],
169 | icmp=[
170 | em.ICMP(
171 | type=None,
172 | code=None,
173 | header="DEADBEEF",
174 | hostname="192.168.0.80",
175 | usage="c2",
176 | )
177 | ],
178 | dns=[em.DNS(ip="123.21.21.21", port=None, usage="other")],
179 | tcp=[
180 | em.Connection(
181 | client_ip=None,
182 | client_port=None,
183 | server_ip="73.21.32.43",
184 | server_domain=None,
185 | server_port=None,
186 | usage="c2",
187 | )
188 | ],
189 | udp=[
190 | em.Connection(
191 | client_ip=None,
192 | client_port=None,
193 | server_ip="73.21.32.43",
194 | server_domain=None,
195 | server_port=None,
196 | usage="c2",
197 | )
198 | ],
199 | encryption=[
200 | em.Encryption(
201 | algorithm="alxor",
202 | public_key=None,
203 | key=None,
204 | provider=None,
205 | mode=None,
206 | iv=None,
207 | seed=None,
208 | nonce=None,
209 | constants=[],
210 | usage="binary",
211 | )
212 | ],
213 | service=[
214 | em.Service(
215 | dll=None,
216 | name="DeviceMonitorSvc",
217 | display_name="DeviceMonitorSvc",
218 | description="Device Monitor Service",
219 | )
220 | ],
221 | cryptocurrency=[
222 | em.Cryptocurrency(
223 | coin="APE",
224 | address="689fdh658790d6dr987yth84iyth7er8gtrfohyt9",
225 | ransom_amount=None,
226 | usage="miner",
227 | )
228 | ],
229 | paths=[
230 | em.Path(path="C:/Windows/system32", usage="install"),
231 | em.Path(path="C:/user/USERNAME/xxxxx/xxxxx/", usage="logs"),
232 | em.Path(path="\\here\\is\\some\\place", usage="install"),
233 | ],
234 | registry=[
235 | em.Registry(key="HKLM_LOCAL_USER/some/location/to/key", usage="store_data"),
236 | em.Registry(key="HKLM_LOCAL_USER/system/location", usage="read"),
237 | ],
238 | )
239 | self.verify(
240 | tmp,
241 | {
242 | "family": "scuba",
243 | "version": "lotso_stuff",
244 | "campaign_id": ["32"],
245 | "identifier": ["uxuduxuduxuudux"],
246 | "decoded_strings": ["there", "are", "some", "strings"],
247 | "password": ["hunter2"],
248 | "mutex": ["YEAH"],
249 | "pipe": ["xiod"],
250 | "sleep_delay": 45000,
251 | "sleep_delay_jitter": 2500,
252 | "icmp": [{"header": "DEADBEEF", "hostname": "192.168.0.80", "usage": "c2"}],
253 | "inject_exe": ["Teams.exe"],
254 | "other": {"misc_data": {"nested": 5}},
255 | "binaries": [
256 | {
257 | "data": b"\x10 0@",
258 | "other": {
259 | "datatype": ["payload"],
260 | "extension": [".invalid"],
261 | "label": ["xor 0x04 at 0x2130-0x2134"],
262 | "some_junk": [1, 2, 3, 4, 5, 6],
263 | },
264 | "encryption": {"algorithm": "alxor", "usage": "binary"},
265 | },
266 | {
267 | "data": b"P`p\x80",
268 | "other": {"datatype": ["payload"]},
269 | "encryption": [
270 | {"algorithm": "alxor", "usage": "binary"},
271 | {"algorithm": "RC4", "usage": "binary"},
272 | ],
273 | },
274 | ],
275 | "ftp": [{"hostname": "somewhere", "usage": "c2"}],
276 | "smtp": [{"hostname": "here.com", "usage": "upload"}],
277 | "http": [
278 | {
279 | "protocol": "https",
280 | "hostname": "blarg.com",
281 | "path": "/malz",
282 | "usage": "c2",
283 | }
284 | ],
285 | "ssh": [{"hostname": "bad.malware", "usage": "download"}],
286 | "proxy": [{"hostname": "192.168.0.80", "usage": "tunnel"}],
287 | "dns": [{"ip": "123.21.21.21", "usage": "other"}],
288 | "tcp": [{"server_ip": "73.21.32.43", "usage": "c2"}],
289 | "udp": [{"server_ip": "73.21.32.43", "usage": "c2"}],
290 | "encryption": [{"algorithm": "alxor", "usage": "binary"}],
291 | "service": [
292 | {
293 | "name": "DeviceMonitorSvc",
294 | "display_name": "DeviceMonitorSvc",
295 | "description": "Device Monitor Service",
296 | }
297 | ],
298 | "cryptocurrency": [
299 | {
300 | "coin": "APE",
301 | "address": "689fdh658790d6dr987yth84iyth7er8gtrfohyt9",
302 | "usage": "miner",
303 | }
304 | ],
305 | "paths": [
306 | {"path": "C:/Windows/system32", "usage": "install"},
307 | {"path": "C:/user/USERNAME/xxxxx/xxxxx/", "usage": "logs"},
308 | {"path": "\\here\\is\\some\\place", "usage": "install"},
309 | ],
310 | "registry": [
311 | {
312 | "key": "HKLM_LOCAL_USER/some/location/to/key",
313 | "usage": "store_data",
314 | },
315 | {"key": "HKLM_LOCAL_USER/system/location", "usage": "read"},
316 | ],
317 | },
318 | )
319 |
320 | def verify(self, in1, in2: Dict) -> Dict:
321 | """Verify the returned data matches the schema."""
322 | resp = collector._verify_response(in1)
323 | self.assertEqual(resp, in2)
324 |
325 |
326 | class TestModelDict(unittest.TestCase):
327 | """Test verifying dicts against the schema."""
328 |
329 | def test_model_1(self):
330 | """Test the model object with basic requirements."""
331 | # dict example
332 | self.verify(
333 | {
334 | "family": "scuba",
335 | "version": "30-01-2023",
336 | "http": [
337 | {
338 | "protocol": "https",
339 | "hostname": "blarg.com",
340 | "path": "/malz",
341 | "usage": "c2",
342 | }
343 | ],
344 | }
345 | )
346 |
347 | def test_model_2(self):
348 | """Test the model object with more data."""
349 | # dict example large
350 | self.maxDiff = None
351 |
352 | self.verify(
353 | {
354 | "family": "scuba",
355 | "version": "lotso_stuff",
356 | "binaries": [
357 | {
358 | "data": rb"\x10\x20\x30\x40",
359 | "encryption": {"algorithm": "alxor", "usage": "binary"},
360 | "other": {
361 | "datatype": ["payload"],
362 | "extension": [".invalid"],
363 | "label": ["xor 0x04 at 0x2130-0x2134"],
364 | "some_junk": [1, 2, 3, 4, 5, 6],
365 | },
366 | },
367 | {
368 | "data": rb"\x50\x60\x70\x80",
369 | "encryption": [
370 | {"algorithm": "alxor", "usage": "binary"},
371 | {"algorithm": "RC4", "usage": "binary"},
372 | ],
373 | "other": {
374 | "datatype": ["payload"],
375 | },
376 | },
377 | ],
378 | "ftp": [{"hostname": "somewhere", "usage": "c2"}],
379 | "smtp": [{"hostname": "here.com", "usage": "upload"}],
380 | "http": [
381 | {
382 | "protocol": "https",
383 | "hostname": "blarg.com",
384 | "path": "/malz",
385 | "usage": "c2",
386 | }
387 | ],
388 | "ssh": [{"hostname": "bad.malware", "usage": "download"}],
389 | "proxy": [{"hostname": "192.168.0.80", "usage": "tunnel"}],
390 | "dns": [{"ip": "123.21.21.21", "usage": "other"}],
391 | "tcp": [{"server_ip": "73.21.32.43", "usage": "c2"}],
392 | "udp": [{"server_ip": "73.21.32.43", "usage": "c2"}],
393 | "encryption": [{"algorithm": "alxor", "usage": "binary"}],
394 | "service": [
395 | {
396 | "name": "DeviceMonitorSvc",
397 | "display_name": "DeviceMonitorSvc",
398 | "description": "Device Monitor Service",
399 | }
400 | ],
401 | "cryptocurrency": [
402 | {
403 | "coin": "APE",
404 | "address": "689fdh658790d6dr987yth84iyth7er8gtrfohyt9",
405 | "usage": "miner",
406 | }
407 | ],
408 | "paths": [
409 | {"path": "C:/Windows/system32", "usage": "install"},
410 | {"path": "C:/user/USERNAME/xxxxx/xxxxx/", "usage": "logs"},
411 | {"path": "\\here\\is\\some\\place", "usage": "install"},
412 | ],
413 | "registry": [
414 | {
415 | "key": "HKLM_LOCAL_USER/some/location/to/key",
416 | "usage": "store_data",
417 | },
418 | {"key": "HKLM_LOCAL_USER/system/location", "usage": "read"},
419 | ],
420 | "campaign_id": ["32"],
421 | "identifier": ["uxuduxuduxuudux"],
422 | "decoded_strings": ["there", "are", "some", "strings"],
423 | "password": ["hunter2"],
424 | "mutex": ["YEAH"],
425 | "pipe": ["xiod"],
426 | "sleep_delay": 45000,
427 | "inject_exe": ["Teams.exe"],
428 | "other": {"misc_data": {"nested": 5}},
429 | }
430 | )
431 |
432 | def verify(self, config: Dict) -> Dict:
433 | """Verify the returned data matches the schema."""
434 | tmp = model.ExtractorModel.model_validate(config)
435 | resp = collector._verify_response(tmp)
436 | self.assertEqual(resp, config)
437 |
--------------------------------------------------------------------------------
/maco/utils.py:
--------------------------------------------------------------------------------
1 | """Common utilities shared between the MACO collector and configextractor-py."""
2 |
3 | import importlib
4 | import inspect
5 | import json
6 | import logging
7 | import logging.handlers
8 | import os
9 | import re
10 | import shutil
11 | import subprocess
12 | import sys
13 | import tempfile
14 | from importlib.machinery import SourceFileLoader
15 |
16 | from multiprocess import Process, Queue
17 |
18 | from maco import yara
19 |
20 | if sys.version_info >= (3, 11):
21 | import tomllib
22 | else:
23 | import tomli as tomllib
24 |
25 | from base64 import b64decode
26 | from copy import deepcopy
27 | from glob import glob
28 | from logging import Logger
29 | from types import ModuleType
30 | from typing import Callable, Dict, List, Tuple, Union
31 |
32 | from uv import find_uv_bin
33 |
34 | from maco import model
35 | from maco.exceptions import AnalysisAbortedException
36 | from maco.extractor import Extractor
37 |
38 | logger = logging.getLogger("maco.lib.utils")
39 |
40 | VENV_DIRECTORY_NAME = ".venv"
41 |
42 | RELATIVE_FROM_RE = re.compile(rb"from (\.+)")
43 | RELATIVE_FROM_IMPORT_RE = re.compile(rb"from (\.+) import")
44 |
45 | UV_BIN = find_uv_bin()
46 |
47 | PIP_CMD = f"{UV_BIN} pip"
48 | VENV_CREATE_CMD = f"{UV_BIN} venv"
49 |
50 |
51 | class Base64Decoder(json.JSONDecoder):
52 | """JSON decoder that also base64 encodes binary data."""
53 |
54 | def __init__(self, *args, **kwargs):
55 | """Initialize the decoder."""
56 | json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs)
57 |
58 | def object_hook(self, obj):
59 | """Hook to decode base64 encoded binary data.""" # noqa: DOC201
60 | if "__class__" not in obj:
61 | return obj
62 | type = obj["__class__"]
63 | if type == "bytes":
64 | return b64decode(obj["data"])
65 | return obj
66 |
67 |
68 | VENV_SCRIPT = """
69 | import importlib
70 | import json
71 | import os
72 | import sys
73 | import logging
74 |
75 | try:
76 | # Respect cases where the extractor is tied to certain version of yara-python for processing
77 | import yara
78 | except:
79 | # Otherwise fallback to MACO's interface for yara-python==4.5.x
80 | from maco import yara
81 |
82 | from base64 import b64encode
83 |
84 | # ensure we have a logger to stderr
85 | import logging
86 | logger = logging.getLogger()
87 | logger.setLevel(logging.DEBUG)
88 | sh = logging.StreamHandler()
89 | logger.addHandler(sh)
90 | sh.setLevel(logging.DEBUG)
91 | formatter = logging.Formatter(
92 | fmt="%(asctime)s, [%(levelname)s] %(module)s.%(funcName)s: %(message)s", datefmt="%Y-%m-%d (%H:%M:%S)"
93 | )
94 | sh.setFormatter(formatter)
95 |
96 | parent_package_path = "{parent_package_path}"
97 | sys.path.insert(1, parent_package_path)
98 | mod = importlib.import_module("{module_name}")
99 |
100 | class Base64Encoder(json.JSONEncoder):
101 | def default(self, o):
102 | if isinstance(o, bytes):
103 | return dict(__class__="bytes", data=b64encode(o).decode())
104 | return json.JSONEncoder.default(self, o)
105 | matches = []
106 | if mod.{module_class}.yara_rule:
107 | matches = yara.compile(source=mod.{module_class}.yara_rule).match("{sample_path}")
108 | result = mod.{module_class}().run(open("{sample_path}", 'rb'), matches=matches)
109 |
110 | with open("{output_path}", 'w') as fp:
111 | if not result:
112 | json.dump(dict(), fp)
113 | else:
114 | try:
115 | json.dump(result.model_dump(exclude_defaults=True, exclude_none=True), fp, cls=Base64Encoder)
116 | except AttributeError:
117 | # venv likely has an older version of Pydantic < 2 installed
118 | json.dump(result.dict(exclude_defaults=True, exclude_none=True), fp, cls=Base64Encoder)
119 | """
120 |
121 | MACO_YARA_RULE = r"""
122 | rule MACO {
123 | meta:
124 | desc = "Used to match on Python files that contain MACO extractors"
125 | strings:
126 | $from = "from maco"
127 | $import = "import maco"
128 | $extractor = "Extractor"
129 | $class = /class \w+\(([a-zA-Z.]+)?Extractor\)\:/
130 | condition:
131 | ($from or $import) and $extractor and $class
132 | }
133 | """
134 |
135 |
136 | def maco_extractor_validation(module: ModuleType) -> bool:
137 | """Validation function for extractors.
138 |
139 | Returns:
140 | (bool): True if extractor belongs to MACO, False otherwise.
141 | """
142 | if inspect.isclass(module):
143 | # 'author' has to be implemented otherwise will raise an exception according to MACO
144 | return hasattr(module, "author") and module.author
145 | return False
146 |
147 |
148 | def maco_extract_rules(module: Extractor) -> str:
149 | """Extracts YARA rules from extractor.
150 |
151 | Returns:
152 | (str): YARA rules
153 | """
154 | return module.yara_rule
155 |
156 |
157 | def scan_for_extractors(root_directory: str, scanner: yara.Rules, logger: Logger) -> Tuple[List[str], List[str]]:
158 | """Looks for extractors using YARA rules.
159 |
160 | Args:
161 | root_directory (str): Root directory containing extractors
162 | scanner (yara.Rules): Scanner to look for extractors using YARA rules
163 | logger (Logger): Logger to use
164 |
165 | Returns:
166 | Tuple[List[str], List[str]]: Returns a list of extractor directories and extractor files
167 |
168 | """
169 | extractor_dirs = set([root_directory])
170 | extractor_files = []
171 |
172 | def scan_and_repair(directory, package=None):
173 | nodes = os.listdir(directory)
174 |
175 | if "__init__.py" in nodes and not package and "-" not in os.path.basename(directory):
176 | # Perhaps we've found the outermost package?
177 | package = os.path.basename(directory)
178 |
179 | for node in nodes:
180 | path = os.path.join(directory, node)
181 | if node == VENV_DIRECTORY_NAME:
182 | # Ignore looking for extractors within packages
183 | continue
184 | elif not node.endswith(".py") and os.path.isfile(path):
185 | # Ignore scanning non-Python files
186 | continue
187 | elif node in ["setup.py"]:
188 | # Ignore setup files and markers for package directories
189 | continue
190 | elif "test" in node:
191 | # Ignore test files
192 | continue
193 | elif "deprecated" in node:
194 | # Ignore deprecated files
195 | continue
196 |
197 | if os.path.isfile(os.path.join(directory, node)):
198 | # Scan Python file for potential extractors
199 | if package:
200 | # Inspect the contents and look for any relative import issues
201 | with open(path, "rb") as f:
202 | data = f.read()
203 |
204 | # Replace any relative importing with absolute
205 | changed_imports = False
206 | curr_dir = os.path.dirname(path)
207 | split = curr_dir.split("/")[::-1]
208 | for pattern in [RELATIVE_FROM_IMPORT_RE, RELATIVE_FROM_RE]:
209 | for match in pattern.findall(data):
210 | depth = match.count(b".")
211 | abspath = ".".join(split[depth - 1 : split.index(package) + 1][::-1])
212 | abspath += "." if pattern == RELATIVE_FROM_RE else ""
213 | data = data.replace(f"from {match.decode()}".encode(), f"from {abspath}".encode(), 1)
214 | changed_imports = True
215 |
216 | # only write extractor files if imports were changed
217 | if changed_imports:
218 | with open(path, "wb") as f:
219 | f.write(data)
220 |
221 | if scanner.match(path):
222 | # Add directory to list of hits for venv creation
223 | extractor_dirs.add(directory)
224 | extractor_files.append(os.path.realpath(path))
225 | else:
226 | scan_and_repair(path, package)
227 |
228 | # Search for extractors using YARA rules
229 | logger.info("Searching for prospective extractors based on YARA rules..")
230 | scan_and_repair(root_directory)
231 |
232 | return extractor_dirs, extractor_files
233 |
234 |
235 | def _install_required_packages(create_venv: bool, directories: List[str], python_version: str, logger: Logger):
236 | venvs = []
237 | env = deepcopy(os.environ)
238 | stop_directory = os.path.dirname(sorted(directories)[0])
239 | # Track directories that we've already visited
240 | visited_dirs = []
241 | for dir in directories:
242 | # Recurse backwards through the directory structure to look for package requirements
243 | while dir != stop_directory and dir not in visited_dirs:
244 | req_files = list({"requirements.txt", "pyproject.toml"}.intersection(set(os.listdir(dir))))
245 | if req_files:
246 | # create a virtual environment, otherwise directly install into current env
247 | if create_venv:
248 | venv_path = os.path.join(dir, VENV_DIRECTORY_NAME)
249 | logger.info(f"Updating virtual environment {venv_path}")
250 | env.update({"VIRTUAL_ENV": venv_path})
251 | # Create a virtual environment for the directory
252 | if not os.path.exists(venv_path):
253 | cmd = f"{VENV_CREATE_CMD} --python {python_version}"
254 | subprocess.run(cmd.split(" ") + [venv_path], capture_output=True, env=env)
255 |
256 | # Install/Update the packages in the environment
257 | install_command = PIP_CMD.split(" ") + ["install"]
258 | # When running locally, only install packages to required spec.
259 | # This prevents issues during maco development and building extractors against local libraries.
260 | if create_venv:
261 | # when running in custom virtual environment, always upgrade packages.
262 | install_command.extend(["--upgrade", "--no-cache"])
263 |
264 | # Update the pip install command depending on where the dependencies are coming from
265 | if "requirements.txt" in req_files:
266 | # Perform a pip install using the requirements flag
267 | install_command.extend(["--requirements", "requirements.txt"])
268 | elif "pyproject.toml" in req_files:
269 | # Assume we're dealing with a project directory
270 | pyproject_command = ["--editable", "."]
271 |
272 | # Check to see if there are optional dependencies required
273 | with open(os.path.join(dir, "pyproject.toml"), "rb") as f:
274 | parsed_toml_project = tomllib.load(f).get("project", {})
275 | for dep_name, dependencies in parsed_toml_project.get("optional-dependencies", {}).items():
276 | # Look for the dependency that hints at use of MACO for the extractors
277 | if "maco" in " ".join(dependencies):
278 | pyproject_command = [f".[{dep_name}]"]
279 | break
280 |
281 | install_command.extend(pyproject_command)
282 |
283 | # Always require maco-extractor to be installed
284 | install_command.append("maco-extractor")
285 | logger.debug(f"Install command: {' '.join(install_command)} [{dir}]")
286 | # this uses VIRTUAL_ENV to control usage of a virtual environment
287 | p = subprocess.run(
288 | install_command,
289 | cwd=dir,
290 | capture_output=True,
291 | env=env,
292 | )
293 | if p.returncode != 0:
294 | if b"is being installed using the legacy" in p.stderr:
295 | # Ignore these types of errors
296 | continue
297 | logger.error(f"Error installing into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
298 | else:
299 | logger.debug(f"Installed dependencies into venv:\n{p.stdout.decode()}\n{p.stderr.decode()}")
300 | if create_venv:
301 | venvs.append(venv_path)
302 |
303 | # Cleanup any build directories that are the product of package installation
304 | expected_build_path = os.path.join(dir, "build")
305 | if os.path.exists(expected_build_path):
306 | shutil.rmtree(expected_build_path)
307 |
308 | # Add directories to our visited list and check the parent of this directory on the next loop
309 | visited_dirs.append(dir)
310 | dir = os.path.dirname(dir)
311 | return venvs
312 |
313 |
314 | def find_and_insert_venv(path: str, venvs: List[str]) -> Tuple[str, str]:
315 | """Finds the closest virtual environment to the extractor and inserts it into the PATH.
316 |
317 | Args:
318 | path (str): Path of extractor
319 | venvs (List[str]): List of virtual environments
320 |
321 | Returns:
322 | (Tuple[str, str]): Virtual environment and site-packages path that's closest to the extractor
323 | """
324 | venv = None
325 | for venv in sorted(venvs, reverse=True):
326 | venv_parent = os.path.dirname(venv)
327 | if path.startswith(f"{venv_parent}/"):
328 | # Found the virtual environment that's the closest to extractor
329 | break
330 |
331 | if not venv:
332 | return None, None
333 |
334 | if venv:
335 | # Insert the venv's site-packages into the PATH temporarily to load the module
336 | for site_package in glob(os.path.join(venv, "lib/python*/site-packages")):
337 | if site_package not in sys.path:
338 | sys.path.insert(2, site_package)
339 | break
340 |
341 | return venv, site_package
342 |
343 |
344 | def register_extractor_module(
345 | extractor_source_file: str,
346 | module_name: str,
347 | venvs: List[str],
348 | extractor_module_callback: Callable[[ModuleType, str], None],
349 | logger: Logger,
350 | ):
351 | """Register the extractor module in isolation.
352 |
353 | Args:
354 | extractor_source_file (str): Path to source file of extractor
355 | module_name (str): The name of the module relative to the package directory
356 | venvs (List[str]): List of virtual environments
357 | extractor_module_callback (Callable[[ModuleType, str], None]): Callback used to register extractors
358 | logger (Logger): Logger to use
359 |
360 | """
361 | try:
362 | logger.info(f"Inspecting '{extractor_source_file}' for extractors..")
363 | venv, site_packages = find_and_insert_venv(extractor_source_file, venvs)
364 | loader = SourceFileLoader(
365 | module_name,
366 | extractor_source_file,
367 | )
368 | extractor_module_callback(loader.load_module(), venv)
369 | finally:
370 | # Cleanup virtual environment that was loaded into PATH
371 | if venv and site_packages in sys.path:
372 | sys.path.remove(site_packages)
373 |
374 |
375 | def register_extractors(
376 | current_directory: str,
377 | venvs: List[str],
378 | extractor_files: List[str],
379 | extractor_module_callback: Callable[[ModuleType, str], None],
380 | logger: Logger,
381 | ):
382 | """Register extractors with in the current directory.
383 |
384 | Args:
385 | current_directory (str): Current directory to register extractors found
386 | venvs (List[str]): List of virtual environments
387 | extractor_files (List[str]): List of extractor files found
388 | extractor_module_callback (Callable[[ModuleType, str], None]): Callback used to register extractors
389 | logger (Logger): Logger to use
390 | """
391 | package_name = os.path.basename(current_directory)
392 | parent_directory = os.path.dirname(current_directory)
393 | if venvs and package_name in sys.modules:
394 | # this may happen as part of testing if some part of the extractor code was directly imported
395 | logger.warning(
396 | f"Looks like {package_name} is already loaded. "
397 | "If your maco extractor overlaps an existing package name this could cause problems."
398 | )
399 |
400 | try:
401 | # Modify the PATH so we can recognize this new package on import
402 | sys.path.insert(1, current_directory)
403 | sys.path.insert(1, parent_directory)
404 |
405 | # Load the potential extractors directly from the source file
406 | registration_processes = []
407 | for extractor_source_file in extractor_files:
408 | module_name = extractor_source_file.replace(f"{parent_directory}/", "").replace("/", ".")[:-3]
409 | p = Process(
410 | target=register_extractor_module,
411 | args=(extractor_source_file, module_name, venvs, extractor_module_callback, logger),
412 | )
413 | p.start()
414 | registration_processes.append(p)
415 |
416 | for p in registration_processes:
417 | p.join()
418 |
419 | finally:
420 | # Cleanup changes made to PATH
421 | sys.path.remove(parent_directory)
422 | sys.path.remove(current_directory)
423 |
424 |
425 | def proxy_logging(queue: Queue, callback: Callable[[ModuleType, str], None], *args, **kwargs):
426 | """Ensures logging is set up correctly for a child process and then executes the callback."""
427 | logger = logging.getLogger()
428 | qh = logging.handlers.QueueHandler(queue)
429 | qh.setLevel(logging.DEBUG)
430 | logger.addHandler(qh)
431 | callback(*args, **kwargs, logger=logger)
432 |
433 |
434 | def import_extractors(
435 | extractor_module_callback: Callable[[ModuleType, str], bool],
436 | *,
437 | root_directory: str,
438 | scanner: yara.Rules,
439 | create_venv: bool,
440 | logger: Logger,
441 | python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
442 | skip_install: bool = False,
443 | ):
444 | """Import extractors in a given directory.
445 |
446 | Args:
447 | extractor_module_callback (Callable[[ModuleType, str], bool]): Callback used to register extractors
448 | root_directory (str): Root directory to look for extractors
449 | scanner (yara.Rules): Scanner to look for extractors that match YARA rule
450 | create_venv (bool): Create/Use virtual environments
451 | logger (Logger): Logger to use
452 | python_version (str): Version of python to use when creating virtual environments
453 | skip_install (bool): Skip installation of Python dependencies for extractors
454 | """
455 | extractor_dirs, extractor_files = scan_for_extractors(root_directory, scanner, logger)
456 |
457 | logger.info(f"Extractor files found based on scanner ({len(extractor_files)}).")
458 | logger.debug(extractor_files)
459 |
460 | if not skip_install:
461 | # Install packages into the current environment or dynamically created virtual environments
462 | venvs = _install_required_packages(create_venv, extractor_dirs, python_version, logger)
463 | else:
464 | # Look for pre-existing virtual environments, if any
465 | logger.info("Checking for pre-existing virtual environment(s)..")
466 | venvs = [
467 | os.path.join(root, VENV_DIRECTORY_NAME)
468 | for root, dirs, _ in os.walk(root_directory)
469 | if VENV_DIRECTORY_NAME in dirs
470 | ]
471 |
472 | # With the environment prepared, we can now hunt for the extractors and register them
473 | logger.info("Registering extractors..")
474 | register_extractors(root_directory, venvs, extractor_files, extractor_module_callback, logger)
475 |
476 |
477 | # holds cached extractors when not running in venv mode
478 | _loaded_extractors: Dict[str, Extractor] = {}
479 |
480 |
481 | def run_extractor(
482 | sample_path,
483 | module_name,
484 | extractor_class,
485 | module_path,
486 | venv,
487 | venv_script=VENV_SCRIPT,
488 | json_decoder=Base64Decoder,
489 | ) -> Union[Dict[str, dict], model.ExtractorModel]:
490 | """Runs the maco extractor against sample either in current process or child process.
491 |
492 | Args:
493 | sample_path (str): Path to sample
494 | module_name (str): Name of extractor module
495 | extractor_class (str): Name of extractor class in module
496 | module_path (str): Path to Python module containing extractor
497 | venv (str): Path to virtual environment associated to extractor
498 | venv_script (str): Script to run extractor in a virtual environment
499 | json_decoder (Base64Decoder): Decoder used for JSON
500 |
501 | Raises:
502 | AnalysisAbortedException: Raised when extractor voluntarily terminates execution
503 | Exception: Raised when extractor raises an exception
504 |
505 | Returns:
506 | Union[Dict[str, dict], model.ExtractorModel]: Results from extractor
507 | """
508 | if not venv:
509 | key = f"{module_name}_{extractor_class}"
510 | if key not in _loaded_extractors:
511 | # dynamic import of extractor
512 | try:
513 | # Add the correct directory to the PATH before attempting to load the extractor
514 | import_path = module_path[: -4 - len(module_name)]
515 | sys.path.insert(1, import_path)
516 | mod = importlib.import_module(module_name)
517 | extractor_cls = mod.__getattribute__(extractor_class)
518 | extractor = extractor_cls()
519 |
520 | # Add to cache
521 | _loaded_extractors[key] = extractor
522 | finally:
523 | sys.path.pop(1)
524 |
525 | else:
526 | # retrieve cached extractor
527 | extractor = _loaded_extractors[key]
528 | if extractor.yara_compiled:
529 | matches = extractor.yara_compiled.match(sample_path)
530 | loaded = extractor.run(open(sample_path, "rb"), matches=matches)
531 | else:
532 | # execute extractor in child process with separate virtual environment
533 | # Write temporary script in the same directory as extractor to resolve relative imports
534 | python_exe = os.path.join(venv, "bin", "python")
535 | dirname = os.path.dirname(module_path)
536 | with tempfile.NamedTemporaryFile("w", dir=dirname, suffix=".py") as script:
537 | with tempfile.NamedTemporaryFile() as output:
538 | parent_package_path = dirname.rsplit(module_name.split(".", 1)[0], 1)[0]
539 | root_directory = module_path[:-3].rsplit(module_name.split(".", 1)[1].replace(".", "/"))[0]
540 |
541 | script.write(
542 | venv_script.format(
543 | parent_package_path=parent_package_path,
544 | module_name=module_name,
545 | module_class=extractor_class,
546 | sample_path=sample_path,
547 | output_path=output.name,
548 | )
549 | )
550 | script.flush()
551 | cwd = root_directory
552 | custom_module = script.name[:-3].replace(root_directory, "").replace("/", ".")
553 |
554 | if custom_module.startswith("src."):
555 | # src layout found, which means the actual module content is within 'src' directory
556 | custom_module = custom_module[4:]
557 | cwd = os.path.join(cwd, "src")
558 |
559 | # run the maco extractor in full venv process isolation (slow)
560 | proc = subprocess.run(
561 | [python_exe, "-m", custom_module],
562 | cwd=cwd,
563 | capture_output=True,
564 | )
565 | stderr = proc.stderr.decode()
566 | try:
567 | # Load results and return them
568 | output.seek(0)
569 | loaded = json.load(output, cls=json_decoder)
570 | except Exception as e:
571 | # If there was an error raised during runtime, then propagate
572 | delim = f'File "{module_path}"'
573 | exception = stderr
574 | if delim in exception:
575 | exception = f"{delim}{exception.split(delim, 1)[1]}"
576 | if "maco.exceptions.AnalysisAbortedException" in exception:
577 | # Extractor voluntarily terminated, re-raise exception to be handled by collector
578 | raise AnalysisAbortedException(
579 | exception.split("maco.exceptions.AnalysisAbortedException: ")[-1]
580 | )
581 | else:
582 | # print extractor logging at error level
583 | logger.error(f"maco extractor raised exception, stderr:\n{stderr}")
584 | raise Exception(exception) from e
585 | # ensure that extractor logging is available
586 | logger.info(f"maco extractor stderr:\n{stderr}")
587 | return loaded
588 |
--------------------------------------------------------------------------------
/maco/model/model.py:
--------------------------------------------------------------------------------
1 | """Malware config extractor output model."""
2 |
3 | from enum import Enum
4 | from typing import Any, Dict, List, Optional, Union
5 |
6 | from pydantic import BaseModel, ConfigDict
7 |
8 |
9 | class ForbidModel(BaseModel):
10 | """We want to forbid extra properties, so that the 'other' field is used instead."""
11 |
12 | model_config = ConfigDict(extra="forbid", use_enum_values=True)
13 |
14 |
15 | class ConnUsageEnum(str, Enum):
16 | """Purpose of the connection."""
17 |
18 | c2 = "c2" # issue commands to malware
19 | upload = "upload" # get data out of the network
20 | download = "download" # fetch dynamic config, second stage, etc
21 | propagate = "propagate" # spread through the network
22 | tunnel = "tunnel" # communicate through the network
23 | ransom = "ransom" # payment
24 | decoy = "decoy" # Decoy connections to obfuscate malicious
25 | other = "other"
26 |
27 |
28 | class Encryption(ForbidModel):
29 | """Encryption usage."""
30 |
31 | class UsageEnum(str, Enum):
32 | """Purpose of the encryption."""
33 |
34 | config = "config"
35 | communication = "communication"
36 | binary = "binary"
37 | ransom = "ransom"
38 | other = "other"
39 |
40 | algorithm: Optional[str] = None
41 | public_key: Optional[str] = None
42 | key: Optional[str] = None # private key or symmetric key
43 | provider: Optional[str] = None # encryption library used. openssl, homebrew, etc.
44 |
45 | mode: Optional[str] = None # block vs stream
46 | # base 64'd binary data for these details?
47 | # TODO to confirm usage of these different properties
48 | iv: Optional[str] = None # initialisation vector
49 | seed: Optional[str] = None
50 | nonce: Optional[str] = None
51 | constants: List[str] = []
52 |
53 | usage: Optional[UsageEnum] = None
54 |
55 |
56 | class CategoryEnum(str, Enum):
57 | """Category of the malware."""
58 |
59 | # Software that shows you extra promotions that you cannot control as you use your PC.
60 | # You wouldn't see the extra ads if you didn't have adware installed.
61 | adware = "adware"
62 |
63 | # Malware related to an Advanced Persistent Threat (APT) group.
64 | apt = "apt"
65 |
66 | # A backdoor Trojan gives malicious users remote control over the infected computer.
67 | # They enable the author to do anything they wish on the infected computer including
68 | # sending, receiving, launching and deleting files, displaying data and rebooting the computer.
69 | # Backdoor Trojans are often used to unite a group of victim computers to form a botnet or
70 | # zombie network that can be used for criminal purposes.
71 | backdoor = "backdoor"
72 |
73 | # Trojan Banker programs are designed to steal your account data for online banking systems,
74 | # e-payment systems and credit or debit cards.
75 | banker = "banker"
76 |
77 | # A malware variant that modifies the boot sectors of a hard drive, including the Master Boot Record (MBR)
78 | # and Volume Boot Record (VBR).
79 | bootkit = "bootkit"
80 |
81 | # A malicious bot is self-propagating malware designed to infect a host and connect back to a central server
82 | # or servers that act as a command and control (C&C) center for an entire network of compromised devices,
83 | # or botnet.
84 | bot = "bot"
85 |
86 | # A browser hijacker is defined as a form of unwanted software that modifies a web browser's settings without
87 | # the user's permission. The result is the placement of unwanted advertising into the browser,
88 | # and possibly the replacement of an existing home page or search page with the hijacker page.
89 | browser_hijacker = "browser_hijacker"
90 |
91 | # Trojan bruteforcer are trying to brute force website in order to achieve something else
92 | # (EX: Finding WordPress websites with default credentials).
93 | bruteforcer = "bruteforcer"
94 |
95 | # A type of trojan that can use your PC to 'click' on websites or applications.
96 | # They are usually used to make money for a malicious hacker by clicking on online advertisements
97 | # and making it look like the website gets more traffic than it does.
98 | # They can also be used to skew online polls, install programs on your PC, or make unwanted software
99 | # appear more popular than it is.
100 | clickfraud = "clickfraud"
101 |
102 | # Cryptocurrency mining malware.
103 | cryptominer = "cryptominer"
104 |
105 | # These programs conduct DoS (Denial of Service) attacks against a targeted web address.
106 | # By sending multiple requests from your computer and several other infected computers,
107 | # the attack can overwhelm the target address leading to a denial of service.
108 | ddos = "ddos"
109 |
110 | # Trojan Downloaders can download and install new versions of malicious programs in the target system.
111 | downloader = "downloader"
112 |
113 | # These programs are used by hackers in order to install malware or to prevent the detection of malicious programs.
114 | dropper = "dropper"
115 |
116 | # Exploit kits are programs that contain data or code that takes advantage of a vulnerability
117 | # within an application that is running in the target system.
118 | exploitkit = "exploitkit"
119 |
120 | # Trojan FakeAV programs simulate the activity of antivirus software.
121 | # They are designed to extort money in return for the detection and removal of threat, even though the
122 | # threats that they report are actually non-existent.
123 | fakeav = "fakeav"
124 |
125 | # A type of tool that can be used to allow and maintain unauthorized access to your PC.
126 | hacktool = "hacktool"
127 |
128 | # A program that collects your personal information, such as your browsing history,
129 | # and uses it without adequate consent.
130 | infostealer = "infostealer"
131 |
132 | # A keylogger monitors and logs every keystroke it can identify.
133 | # Once installed, the virus either keeps track of all the keys and stores the information locally,
134 | # after which the hacker needs physical access to the computer to retrieve the information,
135 | # or the logs are sent over the internet back to the hacker.
136 | keylogger = "keylogger"
137 |
138 | # A program that loads another application / memory space.
139 | loader = "loader"
140 |
141 | # A type of malware that hides its code and purpose to make it more difficult for
142 | # security software to detect or remove it.
143 | obfuscator = "obfuscator"
144 |
145 | # Point-of-sale malware is usually a type of malware that is used by cybercriminals to target point of sale (POS)
146 | # and payment terminals with the intent to obtain credit card and debit card information.
147 | pos = "pos"
148 |
149 | # This type of trojan allows unauthorized parties to use the infected computer as a proxy server
150 | # to access the Internet anonymously.
151 | proxy = "proxy"
152 |
153 | # A program that can be used by a remote hacker to gain access and control of an infected machine.
154 | rat = "rat"
155 |
156 | # This type of malware can modify data in the target computer so the operating system
157 | # will stop running correctly or the data is no longer accessible.
158 | # The criminal will only restore the computer state or data after a ransom is paid to them
159 | # (mostly using cryptocurrency).
160 | ransomware = "ransomware"
161 |
162 | # A reverse proxy is a server that receives requests from the internet and forwards them to a small set of servers.
163 | reverse_proxy = "reverse_proxy"
164 |
165 | # Rootkits are designed to conceal certain objects or activities in the system.
166 | # Often their main purpose is to prevent malicious programs being detected
167 | # in order to extend the period in which programs can run on an infected computer.
168 | rootkit = "rootkit"
169 |
170 | # This type of malware scan the internet / network(s) / system(s) / service(s) to collect information.
171 | # That information could be used later to perpetuate an cyber attack.
172 | scanner = "scanner"
173 |
174 | # Scareware is a form of malware which uses social engineering to cause shock, anxiety,
175 | # or the perception of a threat in order to manipulate users into buying unwanted software.
176 | scareware = "scareware"
177 |
178 | # Malware that is sending spam.
179 | spammer = "spammer"
180 |
181 | # Generic or Unknown Trojan
182 | trojan = "trojan"
183 |
184 | # A generic computer virus
185 | virus = "virus"
186 |
187 | # A type of malware that destroy the data.
188 | wiper = "wiper"
189 |
190 | # A web shell is a script that can be uploaded to a web server to enable remote administration of the machine.
191 | webshell = "webshell"
192 |
193 | # A type of malware that spreads to other PCs.
194 | worm = "worm"
195 |
196 |
197 | class ExtractorModel(ForbidModel):
198 | r"""Captured config/iocs, unpacked binaries and other malware properties from a robo-analyst.
199 |
200 | This model defines common fields for output of a script targeting a specific malware family.
201 | Usage of this model will allow for easier sharing of scripts between different authors and systems.
202 | The model will not define fields for all data that can be extracted from a binary, only the most common.
203 | This is to make it easier for authors to understand and use the model.
204 |
205 | This model can have new fields added in the future if they become more common,
206 | but the intent is to avoid removing or modifying existing fields, for backwards compatibility.
207 |
208 | Where data does not fit with the current model, the 'others' field should be used.
209 | Contents in this field is not defined by the model and verification/normalisation is up to
210 | the author and whatever systems run the scripts.
211 | If many decoders define similar data in the 'others' field, that field should be migrated to this model.
212 |
213 | The model must be kept relatively flat, with nested lists of dictionaries to be avoided.
214 | This is to make queries simpler to write in sql, elasticsearch and other storage systems.
215 |
216 | Malware and systems that investigate malware can do pretty much anything.
217 | This model needs to be simple and flexible to make sharing easy.
218 | Some things should be out of scope for this model.
219 | Responsibility for these things are up to authors and systems that use this model.
220 |
221 | Out of scope
222 | * Verifying anything in the 'others' dict, including that it is json-compatible.
223 | * We don't know anything about the structure
224 | * checking is json compatible requires dumping to json string, which can be slow
225 | * Connecting specific config items to malware behaviour catalog
226 | * i.e. "Persistence::Modify Registry" with 'registry' item from model (SYSTEM\ControlSet001\Services\)
227 | * due to complexity and normalisation difficulties
228 | * much malware behaviour is not related to specific config items
229 | * Normalisation/verification of individual properties
230 | * i.e. lowercase filepaths - some filesystems are case sensitive
231 | * i.e. checking registry hives match known - not enough SME and too complex for a simple model
232 | * generally, this quickly becomes complex (validating a fully defined http item)
233 | * calling systems are probably performing their own validation anyway
234 | * requiring specific properties to be set
235 | * i.e. if http item is defined, requiring hostname to be set
236 | * Some use cases always seem to exist where a property should not be set
237 | """
238 |
239 | family: Union[str, List[str]] # family or families of malware that was detected
240 | version: Optional[str] = None # version/variant of malware
241 | category: List[CategoryEnum] = [] # capability/purpose of the malware
242 | attack: List[str] = [] # mitre att&ck reference ids, e.g. 'T1129'
243 |
244 | #
245 | # simple config properties
246 | #
247 |
248 | # capabilities of the malware enabled/disabled in config
249 | # note these are probably malware-specific capabilities so no attempt to normalise has been made
250 | # note - av/sandbox detection should be noted by 'detect_'
251 | capability_enabled: List[str] = []
252 | capability_disabled: List[str] = []
253 |
254 | campaign_id: List[str] = [] # Server/Campaign Id for malware
255 | identifier: List[str] = [] # UUID/Identifiers for deployed instance
256 | decoded_strings: List[str] = [] # decoded strings from within malware
257 | password: List[str] = [] # Any password extracted from the binary
258 | mutex: List[str] = [] # mutex to prevent multiple instances
259 | pipe: List[str] = [] # pipe name used for communication
260 | sleep_delay: Optional[int] = None # time to sleep/delay execution (milliseconds)
261 | # additional time applied to sleep_delay (milliseconds).
262 | # Jitter implementations can vary but usually it is a value from which a random number is generated and
263 | # added/subtracted to the sleep_delay to make behaviour more unpredictable
264 | sleep_delay_jitter: Optional[int] = None
265 | inject_exe: List[str] = [] # name of executable to inject into
266 |
267 | # configuration or clustering/research data that doesnt fit the other fields
268 | # * rarely used by decoders or specific to one decoder
269 | # to prevent key explosion, the keys must not be dynamically generated
270 | # e.g. api_imports, api_checksums, num_imports, import_hash + many more
271 | # data stored here must always be JSON-serialisable
272 | other: Dict[str, Any] = {}
273 |
274 | #
275 | # embedded binary data
276 | #
277 | class Binary(ForbidModel):
278 | """Binary data extracted by decoder."""
279 |
280 | class TypeEnum(str, Enum):
281 | """Type of binary data."""
282 |
283 | payload = "payload" # contained within the original file
284 | config = "config" # sometimes malware uses json/formatted text for config
285 | other = "other"
286 |
287 | datatype: Optional[TypeEnum] = None # what the binary data is used for
288 | data: bytes # binary data, not json compatible
289 |
290 | # other information for the extracted binary rather than the config
291 | # data stored here must always be JSON-serialisable
292 | # e.g. filename, extension, relationship label
293 | other: Dict[str, Any] = {}
294 |
295 | # convenience for ret.encryption.append(ret.Encryption(*properties))
296 | # Define as class as only way to allow for this to be accessed and not have pydantic try to parse it.
297 | class Encryption(Encryption):
298 | """Encryption usage."""
299 |
300 | pass
301 |
302 | encryption: Union[List[Encryption], Encryption, None] = None # encryption information for the binary
303 |
304 | binaries: List[Binary] = []
305 |
306 | #
307 | # communication protocols
308 | #
309 | class FTP(ForbidModel):
310 | """Usage of FTP connection."""
311 |
312 | username: Optional[str] = None
313 | password: Optional[str] = None
314 | hostname: Optional[str] = None
315 | port: Optional[int] = None
316 |
317 | path: Optional[str] = None
318 |
319 | usage: Optional[ConnUsageEnum] = None
320 |
321 | ftp: List[FTP] = []
322 |
323 | class SMTP(ForbidModel):
324 | """Usage of SMTP."""
325 |
326 | # credentials and location of server
327 | username: Optional[str] = None
328 | password: Optional[str] = None
329 | hostname: Optional[str] = None
330 | port: Optional[int] = None
331 |
332 | mail_to: List[str] = [] # receivers
333 | mail_from: Optional[str] = None # sender
334 | subject: Optional[str] = None
335 |
336 | usage: Optional[ConnUsageEnum] = None
337 |
338 | smtp: List[SMTP] = [] # SMTP server for malware
339 |
340 | class Http(ForbidModel):
341 | """Usage of HTTP connection."""
342 |
343 | # malware sometimes does weird stuff with uris so we don't want to force
344 | # authors to break the uri into username, hostname, path, etc.
345 | # as we lose that information.
346 | # e.g. extra '?' or '/' when unnecessary.
347 | # or something that is technically an invalid uri but still works
348 | uri: Optional[str] = None
349 |
350 | # on the other hand we might not have enough info to construct a uri
351 | protocol: Optional[str] = None # http,https
352 | username: Optional[str] = None
353 | password: Optional[str] = None
354 | hostname: Optional[str] = None # (A host/hostname can be an IP, domain or hostname)
355 | port: Optional[int] = None
356 | path: Optional[str] = None
357 | query: Optional[str] = None
358 | fragment: Optional[str] = None
359 |
360 | user_agent: Optional[str] = None # user agent sent by malware
361 | method: Optional[str] = None # get put delete etc
362 | headers: Optional[Dict[str, str]] = None # custom/additional HTTP headers
363 | max_size: Optional[int] = None
364 |
365 | usage: Optional[ConnUsageEnum] = None
366 |
367 | http: List[Http] = []
368 |
369 | class SSH(ForbidModel):
370 | """Usage of ssh connection."""
371 |
372 | username: Optional[str] = None
373 | password: Optional[str] = None
374 | hostname: Optional[str] = None
375 | port: Optional[int] = None
376 |
377 | usage: Optional[ConnUsageEnum] = None
378 |
379 | ssh: List[SSH] = []
380 |
381 | class Proxy(ForbidModel):
382 | """Usage of proxy connection."""
383 |
384 | protocol: Optional[str] = None # socks5,http
385 | username: Optional[str] = None
386 | password: Optional[str] = None
387 | hostname: Optional[str] = None
388 | port: Optional[int] = None
389 |
390 | usage: Optional[ConnUsageEnum] = None
391 |
392 | proxy: List[Proxy] = []
393 |
394 | class ICMP(ForbidModel):
395 | """Usage of ICMP."""
396 |
397 | type: Optional[int] = None
398 | code: Optional[int] = None
399 | header: Optional[str] = None # Some malware uses non-standard header fields
400 | hostname: Optional[str] = None
401 |
402 | usage: Optional[ConnUsageEnum] = None
403 |
404 | icmp: List[ICMP] = []
405 |
406 | #
407 | # inter process communication (IPC)
408 | #
409 | class IPC(ForbidModel):
410 | """Usage of named pipe communications."""
411 |
412 | # A record stored on disk, or a record synthesized on demand by a file
413 | # server, which can be accessed by multiple processes.
414 | file: Optional[List[str]] = None
415 | # Data sent over a network interface, either to a different process on
416 | # the same computer or to another computer on the network. Stream
417 | # oriented (TCP; data written through a socket requires formatting to
418 | # preserve message boundaries) or more rarely message-oriented (UDP,
419 | # SCTP).
420 | socket: Optional[List[str]] = None
421 | # Similar to an internet socket, but all communication occurs within
422 | # the kernel. Domain sockets use the file system as their address
423 | # space. Processes reference a domain socket as an inode, and multiple
424 | # processes can communicate with one socket.
425 | unix_domain_socket: Optional[List[str]] = None
426 | # A file mapped to RAM and can be modified by changing memory
427 | # addresses directly instead of outputting to a stream. This shares
428 | # the same benefits as a standard file.
429 | memory_mapped_file: Optional[Union[bytes, List[str]]] = None
430 | # A data stream similar to a socket, but which usually preserves
431 | # message boundaries. Typically implemented by the operating system,
432 | # they allow multiple processes to read and write to the message queue
433 | # without being directly connected to each other.
434 | message_queue: Optional[List[str]] = None
435 | # A unidirectional data channel using standard input and output. Data
436 | # written to the write-end of the pipe is buffered by the operating
437 | # system until it is read from the read-end of the pipe. Two-way
438 | # communication between processes can be achieved by using two pipes
439 | # in opposite "directions".
440 | anonymous_pipe: Optional[List[str]] = None
441 | # A pipe that is treated like a file. Instead of using standard input
442 | # and output as with an anonymous pipe, processes write to and read
443 | # from a named pipe, as if it were a regular file.
444 | named_pipe: Optional[List[str]] = None
445 | # The process names involved in the IPC communication
446 | process_names: Optional[List[str]] = None
447 | # Multiple processes are given access to the same block of memory,
448 | # which creates a shared buffer for the processes to communicate with
449 | # each other.
450 | shared_memory: Optional[bytes] = None
451 | usage: Optional[ConnUsageEnum] = None
452 |
453 | ipc: List[IPC] = [] # Inter-Process Communications (similar to 'pipe' but more detailed)
454 |
455 | class DNS(ForbidModel):
456 | """Direct usage of DNS."""
457 |
458 | class RecordTypeEnum(str, Enum):
459 | """DNS record types."""
460 |
461 | A = "A"
462 | AAAA = "AAAA"
463 | AFSDB = "AFSDB"
464 | APL = "APL"
465 | CAA = "CAA"
466 | CDNSKEY = "CDNSKEY"
467 | CDS = "CDS"
468 | CERT = "CERT"
469 | CNAME = "CNAME"
470 | CSYNC = "CSYNC"
471 | DHCID = "DHCID"
472 | DLV = "DLV"
473 | DNAME = "DNAME"
474 | DNSKEY = "DNSKEY"
475 | DS = "DS"
476 | EUI48 = "EUI48"
477 | EUI64 = "EUI64"
478 | HINFO = "HINFO"
479 | HIP = "HIP"
480 | HTTPS = "HTTPS"
481 | IPSECKEY = "IPSECKEY"
482 | KEY = "KEY"
483 | KX = "KX"
484 | LOC = "LOC"
485 | MX = "MX"
486 | NAPTR = "NAPTR"
487 | NS = "NS"
488 | NSEC = "NSEC"
489 | NSEC3 = "NSEC3"
490 | NSEC3PARAM = "NSEC3PARAM"
491 | OPENPGPKEY = "OPENPGPKEY"
492 | PTR = "PTR"
493 | RRSIG = "RRSIG"
494 | RP = "RP"
495 | SIG = "SIG"
496 | SMIMEA = "SMIMEA"
497 | SOA = "SOA"
498 | SRV = "SRV"
499 | SSHFP = "SSHFP"
500 | SVCB = "SVCB"
501 | TA = "TA"
502 | TKEY = "TKEY"
503 | TLSA = "TLSA"
504 | TSIG = "TSIG"
505 | TXT = "TXT"
506 | URI = "URI"
507 | ZONEMD = "ZONEMD"
508 |
509 | ip: Optional[str] = None
510 | port: Optional[int] = None # The default value is 53
511 | hostname: Optional[str] = None # This is the query hostname
512 | record_type: Optional[RecordTypeEnum] = None # The DNS record type that is queried
513 | usage: Optional[ConnUsageEnum] = None
514 |
515 | dns: List[DNS] = [] # custom DNS address to use for name resolution
516 |
517 | class Connection(ForbidModel):
518 | """Generic TCP/UDP usage."""
519 |
520 | client_ip: Optional[str] = None
521 | client_port: Optional[int] = None
522 | server_ip: Optional[str] = None
523 | server_domain: Optional[str] = None
524 | server_port: Optional[int] = None
525 |
526 | usage: Optional[ConnUsageEnum] = None
527 |
528 | tcp: List[Connection] = []
529 | udp: List[Connection] = []
530 |
531 | #
532 | # complex configuration properties
533 | #
534 | # convenience for ret.encryption.append(ret.Encryption(*properties))
535 | # Define as class as only way to allow for this to be accessed and not have pydantic try to parse it.
536 | class Encryption(Encryption):
537 | """Encryption usage."""
538 |
539 | pass
540 |
541 | encryption: List[Encryption] = []
542 |
543 | class Service(ForbidModel):
544 | """OS service usage by malware."""
545 |
546 | dll: Optional[str] = None # dll that the service is loaded from
547 | name: Optional[str] = None # service/driver name for persistence
548 | display_name: Optional[str] = None # display name for service
549 | description: Optional[str] = None # description for service
550 |
551 | service: List[Service] = []
552 |
553 | class Cryptocurrency(ForbidModel):
554 | """Cryptocoin usage (ransomware/miner)."""
555 |
556 | class UsageEnum(str, Enum):
557 | """Cryptocoin usage."""
558 |
559 | ransomware = "ransomware" # request money to unlock
560 | miner = "miner" # use gpu/cpu to mint coins
561 | other = "other"
562 |
563 | coin: Optional[str] = None # BTC,ETH,USDT,BNB, etc
564 | address: Optional[str] = None
565 | ransom_amount: Optional[float] = None # number of coins required (if hardcoded)
566 |
567 | usage: UsageEnum
568 |
569 | cryptocurrency: List[Cryptocurrency] = []
570 |
571 | class Path(ForbidModel):
572 | """Path used by malware."""
573 |
574 | class UsageEnum(str, Enum):
575 | """Purpose of the path."""
576 |
577 | c2 = "c2" # file/folder issues commands to malware
578 | config = "config" # config is loaded from this path
579 | install = "install" # install directory/filename for malware
580 | plugins = "plugins" # load new capability from this directory
581 | logs = "logs" # location to log activity
582 | storage = "storage" # location to store/backup copied files
583 | other = "other"
584 |
585 | # C:\User\tmp\whatever.txt or /some/unix/folder/path
586 | path: str
587 | usage: Optional[UsageEnum] = None
588 |
589 | paths: List[Path] = [] # files/directories used by malware
590 |
591 | class Registry(ForbidModel):
592 | """Registry usage by malware."""
593 |
594 | class UsageEnum(str, Enum):
595 | """Registry usage."""
596 |
597 | persistence = "persistence" # stay alive
598 | store_data = "store_data" # generated encryption keys or config
599 | store_payload = "store_payload" # malware hidden in registry key
600 | read = "read" # read system registry keys
601 | other = "other"
602 |
603 | key: str
604 | usage: Optional[UsageEnum] = None
605 |
606 | registry: List[Registry] = []
607 |
--------------------------------------------------------------------------------