├── requirements.txt ├── .coveragerc ├── tests └── test_malwarebazaar_mcp.py ├── README.md ├── .gitignore ├── LICENSE └── malwarebazaar_mcp.py /requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv==1.1.0 2 | mcp[cli]==1.6.0 3 | httpx==0.28.1 4 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = 4 | . 5 | omit = 6 | tests/* 7 | */__init__.py 8 | 9 | [report] 10 | exclude_lines = 11 | pragma: no cover 12 | if __name__ == '__main__': 13 | @abstractmethod 14 | -------------------------------------------------------------------------------- /tests/test_malwarebazaar_mcp.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import patch, AsyncMock 3 | from malwarebazaar_mcp import get_info, get_file 4 | 5 | 6 | class TestMalwareBazaarMCP(unittest.IsolatedAsyncioTestCase): 7 | """ 8 | Unit tests for functions in the malwarebazaar_mcp module, 9 | focusing on asynchronous behaviors and response handling. 10 | """ 11 | 12 | @patch("malwarebazaar_mcp.make_mb_request", new_callable=AsyncMock) 13 | async def test_get_info_success(self, mock_request): 14 | """ 15 | Test that get_info returns expected formatted result when the 16 | query status is 'ok' and the response contains sample metadata. 17 | """ 18 | mock_request.return_value = { 19 | "query_status": "ok", 20 | "data": [{"sha256_hash": "abc123", "first_seen": "2023-01-01"}], 21 | } 22 | result = await get_info(sha256="abc123") 23 | self.assertIn("abc123", result) 24 | self.assertIn("Malware Sample Metadata", result) 25 | self.assertIn("```text", result) 26 | 27 | @patch("malwarebazaar_mcp.make_mb_request", new_callable=AsyncMock) 28 | async def test_get_info_hash_not_found(self, mock_request): 29 | """ 30 | Test that get_info returns an appropriate error message when 31 | the hash is not found in the MalwareBazaar database. 32 | """ 33 | mock_request.return_value = {"query_status": "hash_not_found"} 34 | result = await get_info(sha256="deadbeef") 35 | self.assertIn("❌ Error: `hash_not_found`", result) 36 | 37 | @patch("malwarebazaar_mcp.make_mb_request", new_callable=AsyncMock) 38 | async def test_get_info_invalid_selector(self, mock_request): 39 | """ 40 | Test that get_info returns an error message when the provided 41 | selector is invalid or unsupported. 42 | """ 43 | result = await get_info(selector="invalid", sha256="") 44 | self.assertIn("❌ Error: `selector` is not valid", result) 45 | 46 | @patch("malwarebazaar_mcp.httpx.AsyncClient.post") 47 | async def test_get_file_success_zip(self, mock_post): 48 | """ 49 | Test that get_file correctly handles a successful ZIP file response 50 | from MalwareBazaar, including content-type and password message. 51 | """ 52 | class MockResponse: 53 | status_code = 200 54 | headers = {"content-type": "application/zip", "content-length": "12345"} 55 | content = b"PK...mockzipcontent..." 56 | 57 | def json(self): 58 | return {} 59 | 60 | mock_post.return_value = MockResponse() 61 | result = await get_file("abc123") 62 | self.assertIn("Saved to:", result) 63 | self.assertIn("Password: 'infected'", result) 64 | self.assertIn("```text", result) 65 | 66 | 67 | if __name__ == "__main__": 68 | unittest.main() 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MalwareBazaar_MCP 2 | An AI-driven MCP server that autonomously interfaces with Malware Bazaar, delivering real-time threat intel and sample metadata for authorized cybersecurity research workflows. 3 | 4 |
5 | 6 | # MCP Tools 7 | #### `get_recent`: Get up to 10 most recent samples from MalwareBazaar. 8 | #### `get_info`: Get detailed metadata about a specific malware sample. 9 | #### `get_file`: Download a malware sample from MalwareBazaar. 10 | #### `get_taginfo`: Get malware samples associated with a specific tag. 11 | 12 |
13 | 14 | # Step 1: Create a MalwareBazaar APIKEY 15 | https://auth.abuse.ch/user/me 16 | 17 | # Step 2: Create `.env` 18 | ```bash 19 | MALWAREBAZAAR_API_KEY= 20 | ``` 21 | 22 | # Step 3a: Create Virtual Env & Install Requirements - MAC/Linux 23 | ```bash 24 | curl -LsSf https://astral.sh/uv/install.sh | sh 25 | cd MalwareBazaar_MCP 26 | uv init . 27 | uv venv 28 | source .venv/bin/activate 29 | uv pip install -r requirements.txt 30 | ``` 31 | 32 | # Step 3b: Create Virtual Env & Install Requirements - Windows 33 | ```bash 34 | powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" 35 | cd MalwareBazaar_MCP 36 | uv init . 37 | uv venv 38 | .venv\Scripts\activate 39 | uv pip install -r requirements.txt 40 | ``` 41 | 42 | # Step 4a: Add Config to the MCP Client - MAC/Linux 43 | ```bash 44 | { 45 | "mcpServers": { 46 | "malwarebazaar": { 47 | "description": "Malware Bazaar MCP Server", 48 | "command": "/Users/XXX/.local/bin/uv", 49 | "args": [ 50 | "--directory", 51 | "/Users/XXX/Documents/MalwareBazaar_MCP", 52 | "run", 53 | "malwarebazaar_mcp.py" 54 | ] 55 | } 56 | } 57 | } 58 | ``` 59 | 60 | # Step 4b: Add Config to the MCP Client - Windows 61 | ```bash 62 | { 63 | "mcpServers": { 64 | "malwarebazaar": { 65 | "description": "Malware Bazaar MCP Server", 66 | "command": "uv", 67 | "args": [ 68 | "--directory", 69 | "C:\Users\XXX\Document\MalwareBazaar_MCP", 70 | "run", 71 | "malwarebazaar_mcp.py" 72 | ] 73 | } 74 | } 75 | } 76 | ``` 77 | 78 | # Step 5: Run MCP Server 79 | ```bash 80 | uv run malwarebazaar_mcp.py 81 | ``` 82 | 83 | # Step 6: Run MCP Client & Query 84 | ``` 85 | Help me understnad the latest hash from Malware Bazaar. 86 | ``` 87 | 88 | # Step 7: Run Tests 89 | ``` 90 | python -m unittest discover -s tests 91 | 92 | uv pip install coverage==7.8.0 93 | coverage run --branch -m unittest discover -s tests 94 | coverage report -m 95 | coverage html 96 | open htmlcov/index.html # MAC 97 | xdg-open htmlcov/index.html # Linux 98 | start htmlcov\index.html # Windows 99 | coverage erase 100 | ``` 101 | 102 |
103 | 104 | # License 105 | [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0) 106 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /malwarebazaar_mcp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | MalwareBazaar MCP Server 5 | 6 | This script defines a local MCP (Model Context Protocol) server using FastMCP to query and analyze 7 | threat intelligence data from MalwareBazaar (https://bazaar.abuse.ch/). It provides tools to retrieve 8 | recent samples, fetch detailed metadata, download files, and query by tag, using the MalwareBazaar API. 9 | 10 | The script is intended to be run as an MCP tool within a larger agent system and communicate via stdio 11 | transport. 12 | 13 | Environment: 14 | - MALWAREBAZAAR_API_KEY: API key for authenticating with MalwareBazaar. 15 | - DEBUG_MB (optional): Set to "1" to enable debug output. 16 | 17 | Usage: 18 | $ uv run malwarebazaar_mcp.py 19 | 20 | Returns: 21 | Formatted results or error messages based on the executed subcommand. 22 | """ 23 | 24 | from typing import Any 25 | import os 26 | import sys 27 | import json 28 | import base64 29 | import pprint 30 | import httpx 31 | import re 32 | from dotenv import load_dotenv 33 | from mcp.server.fastmcp import FastMCP 34 | 35 | # Initialize FastMCP server 36 | mcp = FastMCP("MalwareBazaar MCP") 37 | 38 | # Constants 39 | MBAZ_API_URL = "https://mb-api.abuse.ch/api/v1/" 40 | USER_AGENT = "malwarebazaar-mcp/1.5" 41 | 42 | # Load API key from environment 43 | load_dotenv() 44 | RAW_KEY = os.getenv("MALWAREBAZAAR_API_KEY") 45 | API_KEY = RAW_KEY.strip() if RAW_KEY else None 46 | DEBUG = os.getenv("DEBUG_MB") == "1" 47 | 48 | # Ensure the API key is set before making any requests 49 | if not API_KEY: 50 | raise RuntimeError("MALWAREBAZAAR_API_KEY is missing (.env or config env block)") 51 | 52 | 53 | async def make_mb_request(payload: dict[str, Any]) -> dict[str, Any] | None: 54 | """ 55 | Sends a POST request to the MalwareBazaar API with the specified payload. 56 | 57 | Parameters: 58 | payload (dict): The dictionary of form fields to send with the request. 59 | 60 | Returns: 61 | dict | None: Parsed JSON response from the API if successful, None otherwise. 62 | """ 63 | headers = { 64 | "User-Agent": USER_AGENT, 65 | "Accept": "application/json", 66 | "Auth-Key": API_KEY, 67 | } 68 | 69 | async with httpx.AsyncClient(timeout=30.0) as client: 70 | try: 71 | request = client.build_request( 72 | "POST", MBAZ_API_URL, headers=headers, data=payload 73 | ) 74 | if DEBUG: 75 | print("\n>>> --- MalwareBazaar request ---", file=sys.stderr) 76 | pprint.pprint(dict(request.headers), stream=sys.stderr, width=120) 77 | preview = base64.b64encode(request.content[:200]).decode() 78 | print(">>> body preview (b64):", preview, file=sys.stderr) 79 | 80 | response = await client.send(request) 81 | raw = await response.aread() 82 | 83 | print( 84 | ">>> RAW RESPONSE:", 85 | raw.decode(errors="replace")[:1000], 86 | file=sys.stderr, 87 | ) 88 | 89 | if DEBUG: 90 | print(">>> status :", response.status_code, file=sys.stderr) 91 | print(">>> headers:", dict(response.headers), file=sys.stderr) 92 | 93 | response.raise_for_status() 94 | try: 95 | return json.loads(raw) 96 | except json.JSONDecodeError as e: 97 | print( 98 | f"JSON decode failed: {e}\nRaw content: {raw[:200]}", 99 | file=sys.stderr, 100 | ) 101 | return None 102 | except Exception as e: 103 | print( 104 | f"MalwareBazaar request failed: {e}\nRequest: {payload}", 105 | file=sys.stderr, 106 | ) 107 | return None 108 | 109 | 110 | def format_detailed(sample: dict) -> str: 111 | """ 112 | Formats and returns a multiline string of key metadata fields from a malware sample, 113 | prioritizing a fixed key order and appending any additional fields not recognized. 114 | 115 | Parameters: 116 | sample (dict): Dictionary containing the sample metadata. 117 | 118 | Returns: 119 | str: Formatted multiline text block. 120 | """ 121 | keys_order = [...] # fill in the desired field order 122 | lines = [] 123 | for key in keys_order: 124 | if key in sample: 125 | value = sample[key] 126 | value_str = ( 127 | json.dumps(value, indent=2) 128 | if isinstance(value, (dict, list)) 129 | else str(value) 130 | ) 131 | lines.append(f"{key}: {value_str}") 132 | extra_keys = set(sample.keys()) - set(keys_order) 133 | if extra_keys: 134 | lines.append("\n-- Additional Fields --") 135 | for key in sorted(extra_keys): 136 | value = sample[key] 137 | value_str = ( 138 | json.dumps(value, indent=2) 139 | if isinstance(value, (dict, list)) 140 | else str(value) 141 | ) 142 | lines.append(f"{key}: {value_str}") 143 | return "\n".join(lines) 144 | 145 | 146 | def format_basic(sample: dict) -> str: 147 | """ 148 | Formats and returns a one-line summary string with hash and timestamp. 149 | 150 | Parameters: 151 | sample (dict): Dictionary containing the sample metadata. 152 | 153 | Returns: 154 | str: SHA256 hash and first seen timestamp. 155 | """ 156 | return f"{sample.get('sha256_hash')} {sample.get('first_seen')}" 157 | 158 | 159 | @mcp.tool() 160 | async def get_recent(selector: str = "time") -> str: 161 | """ 162 | Retrieves up to 10 of the most recently submitted malware samples. 163 | 164 | Parameters: 165 | selector (str): Only accepted value is "time" (default). 166 | 167 | Returns: 168 | str: Formatted result of basic metadata for recent samples. 169 | """ 170 | if selector != "time": 171 | return f"\u274c Error: `get_recent()` only supports `selector='time'`. Got: `{selector}`.\nUse `get_info()` to query by hash." 172 | 173 | data = await make_mb_request({"query": "get_recent", "selector": selector}) 174 | if not data or data.get("query_status") == "no_results": 175 | return "No recent samples returned by MalwareBazaar." 176 | if data.get("query_status") != "ok": 177 | return f"\u274c Unexpected response: `{data.get('query_status')}`." 178 | 179 | samples = data["data"][:10] 180 | formatted = [format_basic(sample) for sample in samples] 181 | return ( 182 | "### Recent Malware Samples\n" 183 | "Retrieved using `get_recent()`\n\n" 184 | "```text\n" + "\n".join(formatted) + "\n```" 185 | ) 186 | 187 | 188 | @mcp.tool() 189 | async def get_info(selector: str = "", sha256: str = "") -> str: 190 | """ 191 | Retrieves full metadata for a single malware sample given a SHA256, SHA1, or MD5 hash. 192 | 193 | Parameters: 194 | selector (str): Optional. Used for routing or alternative hash input. 195 | sha256 (str): The full hash of the sample to query. 196 | 197 | Returns: 198 | str: Full formatted metadata response from MalwareBazaar. 199 | """ 200 | if not sha256 and re.fullmatch(r"[a-fA-F0-9]{64}", selector): 201 | sha256 = selector 202 | elif selector and selector != "": 203 | return f"\u274c Error: `selector` is not valid for `get_info()`. Did you mean to use `get_recent()`?" 204 | 205 | if not sha256: 206 | return "\u274c Error: No valid hash provided. Please pass `sha256=`." 207 | 208 | data = await make_mb_request({"query": "get_info", "hash": sha256}) 209 | if not data or data.get("query_status") in [ 210 | "hash_not_found", 211 | "illegal_hash", 212 | "no_hash_provided", 213 | ]: 214 | return f"\u274c Error: `{data.get('query_status', 'Unknown error')}`." 215 | if data.get("query_status") != "ok": 216 | return f"\u274c Unexpected response: `{data.get('query_status')}`." 217 | 218 | sample = data["data"][0] 219 | return ( 220 | f"### Malware Sample Metadata for `{sha256}`\n\n" 221 | "Retrieved using `get_info()`\n\n" 222 | "```text\n" + format_detailed(sample) + "\n```" 223 | ) 224 | 225 | 226 | @mcp.tool() 227 | async def get_file(sha256: str) -> str: 228 | """ 229 | Downloads a malware sample archive by SHA256 hash. 230 | 231 | Parameters: 232 | sha256 (str): SHA256 hash of the desired sample. 233 | 234 | Returns: 235 | str: Result message indicating file path, size, or error. 236 | """ 237 | payload = {"query": "get_file", "sha256_hash": sha256} 238 | headers = { 239 | "User-Agent": USER_AGENT, 240 | "Accept": "application/octet-stream", 241 | "Auth-Key": API_KEY, 242 | } 243 | 244 | print(f"\n=== DEBUG: Attempting download for {sha256} ===") 245 | 246 | async with httpx.AsyncClient(timeout=60.0) as client: 247 | try: 248 | response = await client.post(MBAZ_API_URL, headers=headers, data=payload) 249 | if response.status_code == 200: 250 | content = response.content 251 | print(f"First 16 bytes: {content[:16].hex()}") 252 | if content.startswith(b"PK"): 253 | try: 254 | download_dir = os.path.abspath(os.getcwd()) 255 | file_path = os.path.join(download_dir, f"{sha256}.zip") 256 | temp_path = f"{file_path}.tmp" 257 | with open(temp_path, "wb") as f: 258 | f.write(content) 259 | os.rename(temp_path, file_path) 260 | if os.path.exists(file_path): 261 | return ( 262 | "### File Download Successful\n\n" 263 | "```text\n" 264 | f"Saved to: {file_path}\nSize: {os.path.getsize(file_path):,} bytes\nPassword: 'infected'\n" 265 | "```" 266 | ) 267 | return "File saved but verification failed" 268 | except Exception as e: 269 | return f"File save error: {str(e)}" 270 | elif b"query_status" in content: 271 | try: 272 | error = response.json() 273 | return f"API Error: {error.get('query_status')}" 274 | except: 275 | return "API Error (malformed JSON)" 276 | return "Unexpected response format" 277 | return f"HTTP Error {response.status_code}" 278 | except Exception as e: 279 | return f"Request failed: {str(e)}" 280 | 281 | 282 | @mcp.tool() 283 | async def get_taginfo(tag: str, limit: int = 100) -> str: 284 | """ 285 | Retrieves a list of malware samples from MalwareBazaar tagged with a specific keyword. 286 | 287 | Parameters: 288 | tag (str): The keyword tag to filter malware samples by. 289 | limit (int): Maximum number of results to retrieve (default 100, max 1000). 290 | 291 | Returns: 292 | str: A formatted list of results or an error string. 293 | """ 294 | if limit > 1000: 295 | limit = 1000 296 | payload = {"query": "get_taginfo", "tag": tag, "limit": limit} 297 | data = await make_mb_request(payload) 298 | if not data: 299 | return "\u274c Error: No data received from MalwareBazaar." 300 | if data.get("query_status") != "ok": 301 | return f"\u274c Error: `{data.get('query_status', 'Unknown error')}`." 302 | samples = data.get("data", []) 303 | if not samples: 304 | return f"No malware samples found for tag `{tag}`." 305 | 306 | formatted = [format_basic(sample) for sample in samples] 307 | return ( 308 | f"### Samples for Tag `{tag}`\n\n" 309 | f"Retrieved using `get_taginfo()` with limit `{limit}`.\n\n" 310 | "```text\n" + "\n".join(formatted) + "\n```" 311 | ) 312 | 313 | 314 | if __name__ == "__main__": 315 | mcp.run(transport="stdio") 316 | --------------------------------------------------------------------------------