├── requirements.txt ├── README.md ├── .gitignore └── search.py /requirements.txt: -------------------------------------------------------------------------------- 1 | requests -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # malware-bazaar-advanced-search 2 | 3 | Script to chain search parameters for MalwareBazaar 4 | 5 | ## usage 6 | 7 | This tool can be used to quickly find samples in MalwareBazar (MB) by extending the functionality of the default search syntax with `-s, --search`. It does this by allowing the user to supply multiple filters in one, then pulls the results of each filter and cross-references them to each other. It can also be used to download samples returned by a search with `--download-all`, or individual samples with the `--get-file` switch. 8 | 9 | The goal of this tool is for it to be fairly intuitive if the operator is familiar with MB search syntax. 10 | 11 | No API key is required. 12 | 13 | **Download LNK files tagged with "CobaltStrike"** 14 | 15 | `python.exe .\search.py -s "file_type:lnk signature:CobaltStrike" --download-all` 16 | 17 | **Download Specific Hash** 18 | 19 | `python.exe .\search.py --get-file HASH` 20 | 21 | ## Known Issues 22 | 23 | - `yara` search query does not work as expected so it is not supported 24 | - `issuer_cn` search query is not supported do to common names often having spaces, breaking the logic 25 | - Using extremely common parameters mixed with extremly specific ones can cause missed results. To validate, just use the specific parameter. 26 | - i.e. it is very common for a sample to have "exe" as a tag, and since the script can only return the latest 1000 results, if this tag is combined with a very specific parameter, like serial number, it will like return no results incorrectly 27 | 28 | ## API Limit 29 | 30 | It is recommended to understand the MB API limits before use. 31 | 32 | https://bazaar.abuse.ch/faq/#api-limit 33 | 34 | ## Tool Showcase 35 | 36 | My Medium post about the tool 37 | 38 | https://montysecurity.medium.com/hunting-cobalt-strike-lnk-loaders-f3c407a991c0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Tool Specific 2 | samples/ 3 | *.zip 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # pdm 109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 110 | #pdm.lock 111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 112 | # in version control. 113 | # https://pdm.fming.dev/#use-with-ide 114 | .pdm.toml 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | env/ 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | 153 | # pytype static type analyzer 154 | .pytype/ 155 | 156 | # Cython debug symbols 157 | cython_debug/ 158 | 159 | # PyCharm 160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 162 | # and can be added to the global gitignore or merged into this file. For a more nuclear 163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 164 | #.idea/ 165 | -------------------------------------------------------------------------------- /search.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import requests 3 | from time import sleep 4 | import json 5 | import os 6 | 7 | parser = argparse.ArgumentParser(description="MalwareBazaar Advanced Search") 8 | parser.add_argument("-s", "--search", type=str, help="Search String") 9 | parser.add_argument("-l", "--limit", default=1000, type=int, help="Upper limit of number of hashes to pull per search (default: 1000) (max: 1000)") 10 | parser.add_argument("--get-file", type=str, help="download this file hash") 11 | parser.add_argument("--download-all", action="store_true", help="Download all files from a search and put them in a directory called 'samples'") 12 | 13 | args = parser.parse_args() 14 | search_string = args.search 15 | limit = args.limit 16 | get_file = args.get_file 17 | download_all = args.download_all 18 | 19 | api_url = "https://mb-api.abuse.ch/api/v1/" 20 | 21 | def download_hash(hash): 22 | request_data = { 23 | "query": "get_file", 24 | "sha256_hash": str(hash).lower() 25 | } 26 | download_request = requests.post(url=api_url, data=request_data) 27 | with open(str(hash) + "_pw_infected.zip", "wb") as f: 28 | f.write(download_request.content) 29 | 30 | def convert_search_string(search_string): 31 | key_conversion = { 32 | "tag": "get_taginfo", 33 | "file_type": "get_file_type", 34 | "signature": "get_siginfo", 35 | "clamav": "get_clamavinfo", 36 | #"yara": "get_yarainfo", # not working 37 | "serial_number": "get_certificate", 38 | #"issuer_cn": "get_issuer", # not supported as it commonly includes spaceses 39 | "imphash": "get_imphash", 40 | "tlsh": "get_tlsh", 41 | "telfhash": "get_telfhash", 42 | "gimphash": "get_gimphash", 43 | "dhash_icon": "get_dhash_icon" 44 | } 45 | filters = search_string.split(" ") 46 | filters_coverted = [] 47 | for filter in filters: 48 | split_filter = filter.split(":") 49 | k = split_filter[0] 50 | try: 51 | q = key_conversion[split_filter[0]] 52 | except KeyError: 53 | print(f"[!] {k} not a valid search operator") 54 | return 55 | v = split_filter[1] 56 | kv = { 57 | 'query': q, 58 | k: v, 59 | 'limit': limit 60 | } 61 | filters_coverted.append(kv) 62 | return filters_coverted 63 | 64 | def search_mb(filters_coverted): 65 | success_list = [] 66 | failure_list = [] 67 | for filter in filters_coverted: 68 | print(f"[+] Parsing {filter}") 69 | mb_request = requests.post(url=api_url, data=filter) 70 | try: 71 | mb_response = json.loads(mb_request.text) 72 | except json.decoder.JSONDecodeError: 73 | print("[!] JSON Failed To Load") 74 | print(mb_request.text) 75 | return_status = str(mb_response['query_status']) 76 | if return_status != "ok": 77 | failure_list.append(filter) 78 | print("[!] Search Failed") 79 | print(mb_request.text) 80 | continue 81 | data = mb_response["data"] 82 | success_list.append(data) 83 | sleep(1) 84 | return success_list 85 | 86 | def parse_results(data): 87 | hashes = [] 88 | for result_set in data: 89 | for result in result_set: 90 | file_hash = str(result["sha256_hash"]) 91 | hashes.append(file_hash) 92 | matches = set() 93 | for file_hash in hashes: 94 | if hashes.count(file_hash) == len(data): # number of sighting = number of searches 95 | matches.add(file_hash) 96 | 97 | if len(matches) == 0: 98 | print("[+] No Matches Found") 99 | elif len(matches) > 0: 100 | print(f"[+] Found {str(len(matches))} matches") 101 | 102 | if download_all: 103 | print("[+] Downloading Files to samples/") 104 | 105 | if len(matches) > 0: 106 | for hash in matches: 107 | print(f"[+] SHA256: {hash}") 108 | if download_all: 109 | try: 110 | os.mkdir("samples") 111 | except FileExistsError: 112 | pass 113 | os.chdir("samples") 114 | download_hash(hash) 115 | os.chdir("../") 116 | if download_all: 117 | print("[+] Finished Downloading Files") 118 | 119 | def main(): 120 | if get_file and search_string: 121 | print("[!] Cannot use --get-hash and --search at the same time") 122 | return 123 | if get_file: 124 | download_hash(get_file) 125 | if search_string: 126 | print(f"[+] Searching {search_string}") 127 | filters_coverted = convert_search_string(search_string) 128 | data = search_mb(filters_coverted) 129 | parse_results(data) 130 | 131 | main() --------------------------------------------------------------------------------