├── requirements.txt
├── .coveragerc
├── tests
└── test_malwarebazaar_mcp.py
├── README.md
├── .gitignore
├── LICENSE
└── malwarebazaar_mcp.py
/requirements.txt:
--------------------------------------------------------------------------------
1 | python-dotenv==1.1.0
2 | mcp[cli]==1.6.0
3 | httpx==0.28.1
4 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source =
4 | .
5 | omit =
6 | tests/*
7 | */__init__.py
8 |
9 | [report]
10 | exclude_lines =
11 | pragma: no cover
12 | if __name__ == '__main__':
13 | @abstractmethod
14 |
--------------------------------------------------------------------------------
/tests/test_malwarebazaar_mcp.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest.mock import patch, AsyncMock
3 | from malwarebazaar_mcp import get_info, get_file
4 |
5 |
6 | class TestMalwareBazaarMCP(unittest.IsolatedAsyncioTestCase):
7 | """
8 | Unit tests for functions in the malwarebazaar_mcp module,
9 | focusing on asynchronous behaviors and response handling.
10 | """
11 |
12 | @patch("malwarebazaar_mcp.make_mb_request", new_callable=AsyncMock)
13 | async def test_get_info_success(self, mock_request):
14 | """
15 | Test that get_info returns expected formatted result when the
16 | query status is 'ok' and the response contains sample metadata.
17 | """
18 | mock_request.return_value = {
19 | "query_status": "ok",
20 | "data": [{"sha256_hash": "abc123", "first_seen": "2023-01-01"}],
21 | }
22 | result = await get_info(sha256="abc123")
23 | self.assertIn("abc123", result)
24 | self.assertIn("Malware Sample Metadata", result)
25 | self.assertIn("```text", result)
26 |
27 | @patch("malwarebazaar_mcp.make_mb_request", new_callable=AsyncMock)
28 | async def test_get_info_hash_not_found(self, mock_request):
29 | """
30 | Test that get_info returns an appropriate error message when
31 | the hash is not found in the MalwareBazaar database.
32 | """
33 | mock_request.return_value = {"query_status": "hash_not_found"}
34 | result = await get_info(sha256="deadbeef")
35 | self.assertIn("❌ Error: `hash_not_found`", result)
36 |
37 | @patch("malwarebazaar_mcp.make_mb_request", new_callable=AsyncMock)
38 | async def test_get_info_invalid_selector(self, mock_request):
39 | """
40 | Test that get_info returns an error message when the provided
41 | selector is invalid or unsupported.
42 | """
43 | result = await get_info(selector="invalid", sha256="")
44 | self.assertIn("❌ Error: `selector` is not valid", result)
45 |
46 | @patch("malwarebazaar_mcp.httpx.AsyncClient.post")
47 | async def test_get_file_success_zip(self, mock_post):
48 | """
49 | Test that get_file correctly handles a successful ZIP file response
50 | from MalwareBazaar, including content-type and password message.
51 | """
52 | class MockResponse:
53 | status_code = 200
54 | headers = {"content-type": "application/zip", "content-length": "12345"}
55 | content = b"PK...mockzipcontent..."
56 |
57 | def json(self):
58 | return {}
59 |
60 | mock_post.return_value = MockResponse()
61 | result = await get_file("abc123")
62 | self.assertIn("Saved to:", result)
63 | self.assertIn("Password: 'infected'", result)
64 | self.assertIn("```text", result)
65 |
66 |
67 | if __name__ == "__main__":
68 | unittest.main()
69 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MalwareBazaar_MCP
2 | An AI-driven MCP server that autonomously interfaces with Malware Bazaar, delivering real-time threat intel and sample metadata for authorized cybersecurity research workflows.
3 |
4 |
5 |
6 | # MCP Tools
7 | #### `get_recent`: Get up to 10 most recent samples from MalwareBazaar.
8 | #### `get_info`: Get detailed metadata about a specific malware sample.
9 | #### `get_file`: Download a malware sample from MalwareBazaar.
10 | #### `get_taginfo`: Get malware samples associated with a specific tag.
11 |
12 |
13 |
14 | # Step 1: Create a MalwareBazaar APIKEY
15 | https://auth.abuse.ch/user/me
16 |
17 | # Step 2: Create `.env`
18 | ```bash
19 | MALWAREBAZAAR_API_KEY=
20 | ```
21 |
22 | # Step 3a: Create Virtual Env & Install Requirements - MAC/Linux
23 | ```bash
24 | curl -LsSf https://astral.sh/uv/install.sh | sh
25 | cd MalwareBazaar_MCP
26 | uv init .
27 | uv venv
28 | source .venv/bin/activate
29 | uv pip install -r requirements.txt
30 | ```
31 |
32 | # Step 3b: Create Virtual Env & Install Requirements - Windows
33 | ```bash
34 | powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
35 | cd MalwareBazaar_MCP
36 | uv init .
37 | uv venv
38 | .venv\Scripts\activate
39 | uv pip install -r requirements.txt
40 | ```
41 |
42 | # Step 4a: Add Config to the MCP Client - MAC/Linux
43 | ```bash
44 | {
45 | "mcpServers": {
46 | "malwarebazaar": {
47 | "description": "Malware Bazaar MCP Server",
48 | "command": "/Users/XXX/.local/bin/uv",
49 | "args": [
50 | "--directory",
51 | "/Users/XXX/Documents/MalwareBazaar_MCP",
52 | "run",
53 | "malwarebazaar_mcp.py"
54 | ]
55 | }
56 | }
57 | }
58 | ```
59 |
60 | # Step 4b: Add Config to the MCP Client - Windows
61 | ```bash
62 | {
63 | "mcpServers": {
64 | "malwarebazaar": {
65 | "description": "Malware Bazaar MCP Server",
66 | "command": "uv",
67 | "args": [
68 | "--directory",
69 | "C:\Users\XXX\Document\MalwareBazaar_MCP",
70 | "run",
71 | "malwarebazaar_mcp.py"
72 | ]
73 | }
74 | }
75 | }
76 | ```
77 |
78 | # Step 5: Run MCP Server
79 | ```bash
80 | uv run malwarebazaar_mcp.py
81 | ```
82 |
83 | # Step 6: Run MCP Client & Query
84 | ```
85 | Help me understnad the latest hash from Malware Bazaar.
86 | ```
87 |
88 | # Step 7: Run Tests
89 | ```
90 | python -m unittest discover -s tests
91 |
92 | uv pip install coverage==7.8.0
93 | coverage run --branch -m unittest discover -s tests
94 | coverage report -m
95 | coverage html
96 | open htmlcov/index.html # MAC
97 | xdg-open htmlcov/index.html # Linux
98 | start htmlcov\index.html # Windows
99 | coverage erase
100 | ```
101 |
102 |
103 |
104 | # License
105 | [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0)
106 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # UV
98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | #uv.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 |
110 | # pdm
111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | # in version control.
115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 |
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 |
127 | # SageMath parsed files
128 | *.sage.py
129 |
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 |
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 |
143 | # Rope project settings
144 | .ropeproject
145 |
146 | # mkdocs documentation
147 | /site
148 |
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 |
154 | # Pyre type checker
155 | .pyre/
156 |
157 | # pytype static type analyzer
158 | .pytype/
159 |
160 | # Cython debug symbols
161 | cython_debug/
162 |
163 | # PyCharm
164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | # and can be added to the global gitignore or merged into this file. For a more nuclear
167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 |
170 | # Ruff stuff:
171 | .ruff_cache/
172 |
173 | # PyPI configuration file
174 | .pypirc
175 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/malwarebazaar_mcp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """
4 | MalwareBazaar MCP Server
5 |
6 | This script defines a local MCP (Model Context Protocol) server using FastMCP to query and analyze
7 | threat intelligence data from MalwareBazaar (https://bazaar.abuse.ch/). It provides tools to retrieve
8 | recent samples, fetch detailed metadata, download files, and query by tag, using the MalwareBazaar API.
9 |
10 | The script is intended to be run as an MCP tool within a larger agent system and communicate via stdio
11 | transport.
12 |
13 | Environment:
14 | - MALWAREBAZAAR_API_KEY: API key for authenticating with MalwareBazaar.
15 | - DEBUG_MB (optional): Set to "1" to enable debug output.
16 |
17 | Usage:
18 | $ uv run malwarebazaar_mcp.py
19 |
20 | Returns:
21 | Formatted results or error messages based on the executed subcommand.
22 | """
23 |
24 | from typing import Any
25 | import os
26 | import sys
27 | import json
28 | import base64
29 | import pprint
30 | import httpx
31 | import re
32 | from dotenv import load_dotenv
33 | from mcp.server.fastmcp import FastMCP
34 |
35 | # Initialize FastMCP server
36 | mcp = FastMCP("MalwareBazaar MCP")
37 |
38 | # Constants
39 | MBAZ_API_URL = "https://mb-api.abuse.ch/api/v1/"
40 | USER_AGENT = "malwarebazaar-mcp/1.5"
41 |
42 | # Load API key from environment
43 | load_dotenv()
44 | RAW_KEY = os.getenv("MALWAREBAZAAR_API_KEY")
45 | API_KEY = RAW_KEY.strip() if RAW_KEY else None
46 | DEBUG = os.getenv("DEBUG_MB") == "1"
47 |
48 | # Ensure the API key is set before making any requests
49 | if not API_KEY:
50 | raise RuntimeError("MALWAREBAZAAR_API_KEY is missing (.env or config env block)")
51 |
52 |
53 | async def make_mb_request(payload: dict[str, Any]) -> dict[str, Any] | None:
54 | """
55 | Sends a POST request to the MalwareBazaar API with the specified payload.
56 |
57 | Parameters:
58 | payload (dict): The dictionary of form fields to send with the request.
59 |
60 | Returns:
61 | dict | None: Parsed JSON response from the API if successful, None otherwise.
62 | """
63 | headers = {
64 | "User-Agent": USER_AGENT,
65 | "Accept": "application/json",
66 | "Auth-Key": API_KEY,
67 | }
68 |
69 | async with httpx.AsyncClient(timeout=30.0) as client:
70 | try:
71 | request = client.build_request(
72 | "POST", MBAZ_API_URL, headers=headers, data=payload
73 | )
74 | if DEBUG:
75 | print("\n>>> --- MalwareBazaar request ---", file=sys.stderr)
76 | pprint.pprint(dict(request.headers), stream=sys.stderr, width=120)
77 | preview = base64.b64encode(request.content[:200]).decode()
78 | print(">>> body preview (b64):", preview, file=sys.stderr)
79 |
80 | response = await client.send(request)
81 | raw = await response.aread()
82 |
83 | print(
84 | ">>> RAW RESPONSE:",
85 | raw.decode(errors="replace")[:1000],
86 | file=sys.stderr,
87 | )
88 |
89 | if DEBUG:
90 | print(">>> status :", response.status_code, file=sys.stderr)
91 | print(">>> headers:", dict(response.headers), file=sys.stderr)
92 |
93 | response.raise_for_status()
94 | try:
95 | return json.loads(raw)
96 | except json.JSONDecodeError as e:
97 | print(
98 | f"JSON decode failed: {e}\nRaw content: {raw[:200]}",
99 | file=sys.stderr,
100 | )
101 | return None
102 | except Exception as e:
103 | print(
104 | f"MalwareBazaar request failed: {e}\nRequest: {payload}",
105 | file=sys.stderr,
106 | )
107 | return None
108 |
109 |
110 | def format_detailed(sample: dict) -> str:
111 | """
112 | Formats and returns a multiline string of key metadata fields from a malware sample,
113 | prioritizing a fixed key order and appending any additional fields not recognized.
114 |
115 | Parameters:
116 | sample (dict): Dictionary containing the sample metadata.
117 |
118 | Returns:
119 | str: Formatted multiline text block.
120 | """
121 | keys_order = [...] # fill in the desired field order
122 | lines = []
123 | for key in keys_order:
124 | if key in sample:
125 | value = sample[key]
126 | value_str = (
127 | json.dumps(value, indent=2)
128 | if isinstance(value, (dict, list))
129 | else str(value)
130 | )
131 | lines.append(f"{key}: {value_str}")
132 | extra_keys = set(sample.keys()) - set(keys_order)
133 | if extra_keys:
134 | lines.append("\n-- Additional Fields --")
135 | for key in sorted(extra_keys):
136 | value = sample[key]
137 | value_str = (
138 | json.dumps(value, indent=2)
139 | if isinstance(value, (dict, list))
140 | else str(value)
141 | )
142 | lines.append(f"{key}: {value_str}")
143 | return "\n".join(lines)
144 |
145 |
146 | def format_basic(sample: dict) -> str:
147 | """
148 | Formats and returns a one-line summary string with hash and timestamp.
149 |
150 | Parameters:
151 | sample (dict): Dictionary containing the sample metadata.
152 |
153 | Returns:
154 | str: SHA256 hash and first seen timestamp.
155 | """
156 | return f"{sample.get('sha256_hash')} {sample.get('first_seen')}"
157 |
158 |
159 | @mcp.tool()
160 | async def get_recent(selector: str = "time") -> str:
161 | """
162 | Retrieves up to 10 of the most recently submitted malware samples.
163 |
164 | Parameters:
165 | selector (str): Only accepted value is "time" (default).
166 |
167 | Returns:
168 | str: Formatted result of basic metadata for recent samples.
169 | """
170 | if selector != "time":
171 | return f"\u274c Error: `get_recent()` only supports `selector='time'`. Got: `{selector}`.\nUse `get_info()` to query by hash."
172 |
173 | data = await make_mb_request({"query": "get_recent", "selector": selector})
174 | if not data or data.get("query_status") == "no_results":
175 | return "No recent samples returned by MalwareBazaar."
176 | if data.get("query_status") != "ok":
177 | return f"\u274c Unexpected response: `{data.get('query_status')}`."
178 |
179 | samples = data["data"][:10]
180 | formatted = [format_basic(sample) for sample in samples]
181 | return (
182 | "### Recent Malware Samples\n"
183 | "Retrieved using `get_recent()`\n\n"
184 | "```text\n" + "\n".join(formatted) + "\n```"
185 | )
186 |
187 |
188 | @mcp.tool()
189 | async def get_info(selector: str = "", sha256: str = "") -> str:
190 | """
191 | Retrieves full metadata for a single malware sample given a SHA256, SHA1, or MD5 hash.
192 |
193 | Parameters:
194 | selector (str): Optional. Used for routing or alternative hash input.
195 | sha256 (str): The full hash of the sample to query.
196 |
197 | Returns:
198 | str: Full formatted metadata response from MalwareBazaar.
199 | """
200 | if not sha256 and re.fullmatch(r"[a-fA-F0-9]{64}", selector):
201 | sha256 = selector
202 | elif selector and selector != "":
203 | return f"\u274c Error: `selector` is not valid for `get_info()`. Did you mean to use `get_recent()`?"
204 |
205 | if not sha256:
206 | return "\u274c Error: No valid hash provided. Please pass `sha256=`."
207 |
208 | data = await make_mb_request({"query": "get_info", "hash": sha256})
209 | if not data or data.get("query_status") in [
210 | "hash_not_found",
211 | "illegal_hash",
212 | "no_hash_provided",
213 | ]:
214 | return f"\u274c Error: `{data.get('query_status', 'Unknown error')}`."
215 | if data.get("query_status") != "ok":
216 | return f"\u274c Unexpected response: `{data.get('query_status')}`."
217 |
218 | sample = data["data"][0]
219 | return (
220 | f"### Malware Sample Metadata for `{sha256}`\n\n"
221 | "Retrieved using `get_info()`\n\n"
222 | "```text\n" + format_detailed(sample) + "\n```"
223 | )
224 |
225 |
226 | @mcp.tool()
227 | async def get_file(sha256: str) -> str:
228 | """
229 | Downloads a malware sample archive by SHA256 hash.
230 |
231 | Parameters:
232 | sha256 (str): SHA256 hash of the desired sample.
233 |
234 | Returns:
235 | str: Result message indicating file path, size, or error.
236 | """
237 | payload = {"query": "get_file", "sha256_hash": sha256}
238 | headers = {
239 | "User-Agent": USER_AGENT,
240 | "Accept": "application/octet-stream",
241 | "Auth-Key": API_KEY,
242 | }
243 |
244 | print(f"\n=== DEBUG: Attempting download for {sha256} ===")
245 |
246 | async with httpx.AsyncClient(timeout=60.0) as client:
247 | try:
248 | response = await client.post(MBAZ_API_URL, headers=headers, data=payload)
249 | if response.status_code == 200:
250 | content = response.content
251 | print(f"First 16 bytes: {content[:16].hex()}")
252 | if content.startswith(b"PK"):
253 | try:
254 | download_dir = os.path.abspath(os.getcwd())
255 | file_path = os.path.join(download_dir, f"{sha256}.zip")
256 | temp_path = f"{file_path}.tmp"
257 | with open(temp_path, "wb") as f:
258 | f.write(content)
259 | os.rename(temp_path, file_path)
260 | if os.path.exists(file_path):
261 | return (
262 | "### File Download Successful\n\n"
263 | "```text\n"
264 | f"Saved to: {file_path}\nSize: {os.path.getsize(file_path):,} bytes\nPassword: 'infected'\n"
265 | "```"
266 | )
267 | return "File saved but verification failed"
268 | except Exception as e:
269 | return f"File save error: {str(e)}"
270 | elif b"query_status" in content:
271 | try:
272 | error = response.json()
273 | return f"API Error: {error.get('query_status')}"
274 | except:
275 | return "API Error (malformed JSON)"
276 | return "Unexpected response format"
277 | return f"HTTP Error {response.status_code}"
278 | except Exception as e:
279 | return f"Request failed: {str(e)}"
280 |
281 |
282 | @mcp.tool()
283 | async def get_taginfo(tag: str, limit: int = 100) -> str:
284 | """
285 | Retrieves a list of malware samples from MalwareBazaar tagged with a specific keyword.
286 |
287 | Parameters:
288 | tag (str): The keyword tag to filter malware samples by.
289 | limit (int): Maximum number of results to retrieve (default 100, max 1000).
290 |
291 | Returns:
292 | str: A formatted list of results or an error string.
293 | """
294 | if limit > 1000:
295 | limit = 1000
296 | payload = {"query": "get_taginfo", "tag": tag, "limit": limit}
297 | data = await make_mb_request(payload)
298 | if not data:
299 | return "\u274c Error: No data received from MalwareBazaar."
300 | if data.get("query_status") != "ok":
301 | return f"\u274c Error: `{data.get('query_status', 'Unknown error')}`."
302 | samples = data.get("data", [])
303 | if not samples:
304 | return f"No malware samples found for tag `{tag}`."
305 |
306 | formatted = [format_basic(sample) for sample in samples]
307 | return (
308 | f"### Samples for Tag `{tag}`\n\n"
309 | f"Retrieved using `get_taginfo()` with limit `{limit}`.\n\n"
310 | "```text\n" + "\n".join(formatted) + "\n```"
311 | )
312 |
313 |
314 | if __name__ == "__main__":
315 | mcp.run(transport="stdio")
316 |
--------------------------------------------------------------------------------