├── .gitignore ├── README.md ├── modules └── asyncrat_extract_config │ ├── asyncrat_extract_config.py │ └── dnlib.dll ├── monitor └── main.py ├── requirements.txt └── sources └── malwareBazaar.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore local rules 2 | *.yar 3 | 4 | # Ignore samples 5 | samples/ 6 | 7 | *.json 8 | *.txt 9 | 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | cover/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | .pybuilder/ 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | # For a library or package, you might want to ignore these files since the code is 97 | # intended to run in multiple environments; otherwise, check them in: 98 | # .python-version 99 | 100 | # pipenv 101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 104 | # install all needed dependencies. 105 | #Pipfile.lock 106 | 107 | # poetry 108 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 109 | # This is especially recommended for binary packages to ensure reproducibility, and is more 110 | # commonly ignored for libraries. 111 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 112 | #poetry.lock 113 | 114 | # pdm 115 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 116 | #pdm.lock 117 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 118 | # in version control. 119 | # https://pdm.fming.dev/#use-with-ide 120 | .pdm.toml 121 | 122 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 123 | __pypackages__/ 124 | 125 | # Celery stuff 126 | celerybeat-schedule 127 | celerybeat.pid 128 | 129 | # SageMath parsed files 130 | *.sage.py 131 | 132 | # Environments 133 | .env 134 | .venv 135 | env/ 136 | venv/ 137 | ENV/ 138 | env.bak/ 139 | venv.bak/ 140 | 141 | # Spyder project settings 142 | .spyderproject 143 | .spyproject 144 | 145 | # Rope project settings 146 | .ropeproject 147 | 148 | # mkdocs documentation 149 | /site 150 | 151 | # mypy 152 | .mypy_cache/ 153 | .dmypy.json 154 | dmypy.json 155 | 156 | # Pyre type checker 157 | .pyre/ 158 | 159 | # pytype static type analyzer 160 | .pytype/ 161 | 162 | # Cython debug symbols 163 | cython_debug/ 164 | 165 | # PyCharm 166 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 167 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 168 | # and can be added to the global gitignore or merged into this file. For a more nuclear 169 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 170 | #.idea/ 171 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YaraMonitor 2 | 3 | YaraMonitor is a tool to continuously ingest, analyze, and alert on malware samples given a set of yara rules. The main design philosophy behind it is that it is extendable to monitor additional sources so long as the ingestion from that source can be automated via Python. 4 | 5 | It currently monitors: 6 | - MalwareBazaar recent uploads 7 | 8 | ## Structure 9 | 10 | - `rules` = place yara rules here, do not create sub-folders 11 | - `sources` = python scripts to ingest data from various sources, must be called by `monitor/main.py` in the `run_ingestion()` function 12 | - `samples` = scripts in `sources` download samples to this directory, in a subfolder for each script (created at run time) 13 | - `modules` = extra Python code to process samples, done after removing non-matches 14 | 15 | ## Caution 16 | 17 | The `samples` directory will contain malware (with `.infected` extensions). 18 | 19 | ## Requirements 20 | 21 | - Python 22 | - `pip install -r requirements.txt` 23 | 24 | ## Usage 25 | 26 | - `monitor/main.py` is intended to be ran for long periods of time, polling MalwareBazaar every hour for new uploads 27 | - It is recommended to use this within a Python venv 28 | - By default, matches are only written to stdout. You can use `-d, --discord` to supply a Discord webhook and it will send a message with what rule and the path to the sample (it does not upload any files to Discord) 29 | 30 | ``` 31 | git clone https://github.com/montysecurity/yaramonitor.git 32 | cd yaramonitor 33 | # [Activate Python venv, optional, recommended] 34 | python -m pip install -r requirements.txt 35 | python main/main.py 36 | ``` 37 | 38 | - When running `main.py`, do it from the root folder of the repo (`yaramonitor`) by running `python monitor/main.py` (doing anything else will cause the program to not be able to find the `samples` directory) 39 | 40 | ## What happens when I run main.py 41 | 42 | 1. Removes all files from `samples` directory (optional, if `-w, --wipe` is supplied) 43 | 2. Poll MalwareBazaar and download all samples that were uploaded in the last hour, store in `samples/MalwareBazaar` 44 | 3. Loop through all files just downloaded, unzip them, delete the zip file, and scan them with all yara rules in `rules` 45 | - If a sample matches a rule, send alert (print to screen and/or Discord Webhook) 46 | - If a sample has no macthes, delete it 47 | 4. Sleep for 1 minute and repeat 48 | 49 | ## Intelligent Handling of Samples 50 | 51 | So long as the program is running and memory is retained, the following are true: 52 | 53 | - Samples that have a yara match are not deleted 54 | - The process will not re-download samples that have already been downloaded and scanned, even if they have already been deleted 55 | - The process will not re-scan samples that have already been scanned with the yara rules 56 | 57 | ## Modules 58 | 59 | Modules are meant to extend the analytical capabilties of the tool. For example, there is a module to extract configs from AsyncRAT and store them in a local JSON file (inspired by https://github.com/embee-research/Asyncrat-Venom-Dcrat-Config-Extractor). It can be called using `python.exe monitor/main.py -m "asyncrat_extract_config"` 60 | 61 | For future module development, the intention is for the module name used in the parameter be exactly the same name as the folder it resides in. -------------------------------------------------------------------------------- /modules/asyncrat_extract_config/asyncrat_extract_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config Extractor For Asyncrat and variants (dcrat/Venomrat) 3 | 4 | Author: Matthew 5 | Twitter: @Embee_Research 6 | 7 | Samples: 8 | Async: 4b63a22def3589977211ff8749091f61d446df02cfc07066b78d3302c034b0cc 9 | Venom: 2941774e26232818b739deff45e59a32247a4a5c8d1d4e4aca517a6f5ed5055f 10 | Dcrat: ed7cd05b950c11d49a3a36f6fe35e672e088499a91f7263740ee8b79f74224e9 11 | 12 | The portion of this code that obtains the byte-based aes256.salt was heavily inspired 13 | by the OALabs StormKitty post. 14 | https://research.openanalysis.net/dot%20net/static%20analysis/stormkitty/dnlib/python/research/2021/07/14/dot_net_static_analysis.html 15 | 16 | 17 | Usage: `asyncrat-config-extractor.py asyncrat.bin` 18 | (Ensure that you have a copy of dnlib.dll in the same directory as this script) 19 | 20 | """ 21 | import clr,os,base64,binascii,hmac,hashlib,sys,json 22 | from time import sleep 23 | 24 | # got the extraction working, suppressing errors on Windows 25 | if os.name == 'nt': 26 | class DevNull: 27 | def write(self, msg): 28 | pass 29 | sys.stderr = DevNull() 30 | else: 31 | print("[!] asyncrat_extract_config Module not supported on this OS; requires a Windows OS") 32 | exit() 33 | 34 | current_dir = os.getcwd() 35 | #print(os.getcwd()) 36 | os.chdir("modules/asyncrat_extract_config/") 37 | current_dir = os.getcwd() 38 | #Open dlib.dll from current directory 39 | clr.AddReference(current_dir + "/dnlib.dll") 40 | from dnlib.DotNet import ModuleDefMD 41 | from dnlib.DotNet.Emit import OpCodes 42 | from Crypto.Cipher import AES 43 | from backports.pbkdf2 import pbkdf2_hmac 44 | 45 | #read the 1st argument containing filename to open 46 | try: 47 | #filename = current_dir + "\\" + sys.argv[1] 48 | #print(current_dir) 49 | #print(sys.argv[1]) 50 | filename = sys.argv[1] 51 | #print("Loading File: " + filename) 52 | module = ModuleDefMD.Load(filename) 53 | except Exception as e: 54 | #print("Unable to open file. Please ensure you have entered a filename as an argument") 55 | sys.exit(1) 56 | 57 | #Temporarily read file so that sha256 can be calculated. 58 | try: 59 | f = open(filename, "rb") 60 | data = f.read() 61 | f.close() 62 | sha_256 = "".join(x for x in str(hashlib.sha256(data).hexdigest())) 63 | #print("SHA256: " + sha_256) 64 | except: 65 | pass 66 | 67 | # Name of Class containing configuration values 68 | class_name = "Client.Settings" 69 | # placeholders for storing data 70 | values = [] 71 | name_mappings = {} 72 | in_field = False 73 | 74 | target_type = module.Find(class_name, isReflectionName=True) 75 | if target_type: 76 | # Enumerate methods looking for constructors 77 | constructors = [m for m in target_type.Methods if m.Name in (".cctor", ".ctor")] 78 | for constructor in constructors: 79 | if constructor.HasBody: 80 | # Enumerate constructor instructions (IL) 81 | instructions = list(constructor.Body.Instructions) 82 | ################## ORIGINAL CODE FROM embee-research ############################# 83 | #for instruction in constructor.Body.Instructions: 84 | # print(instruction) 85 | # #get encrypted string 86 | # if "ldstr" in str(instruction): 87 | # field_value = str(instruction).split(" ")[-1] 88 | # field_value = field_value.strip("\"") 89 | # in_field = True 90 | # #Get field name from IL instructions 91 | # if "stsfld" in str(instruction) and in_field: 92 | # fieldname = str(instruction).split()[-1] 93 | # name_mappings[fieldname] = field_value 94 | # in_field = False 95 | ################################################################################ 96 | i = 0 97 | for instruction in list(reversed(instructions)): 98 | if "stsfld" in str(instruction): 99 | if "ldstr" in str(list(reversed(instructions))[i+1]): 100 | field_name = str(f"{str(instruction).split(': stsfld')[1]}".strip("\n")) 101 | field_value = str(list(reversed(instructions))[i+1]).split('"')[1].strip('"') 102 | name_mappings[field_name] = field_value 103 | i += 1 104 | 105 | #Get AES encryption key from settings 106 | for i in name_mappings.keys(): 107 | if "key" in i.lower(): 108 | settings_key = name_mappings[i] 109 | 110 | def get_salt_from_bin(): 111 | #extract salt from "Client.Algorithm.Aes256" Class 112 | #This is needed to properly decrypt data 113 | salt = "" 114 | class_name = "Client.Algorithm.Aes256" 115 | target_type = module.Find(class_name, isReflectionName=True) 116 | #Enumerate constructors for string based salt (Dcrat,Venomrat) 117 | if target_type: 118 | constructors = [m for m in target_type.Methods if m.Name in (".cctor", ".cctor")] 119 | #for m in constructors: 120 | # print(m) 121 | for constructor in constructors: 122 | #Enumerate constructor IL for "ldstr" operation, extract the argument 123 | #containing the salt value 124 | if constructor.HasBody: 125 | for instruction in constructor.Body.Instructions: 126 | if "ldstr" in str(instruction): 127 | salt = str(instruction).replace("\"","").split(" ")[-1] 128 | return salt.encode('utf-8') 129 | #For asyncrat, extract the byte array based salt 130 | #this is heavily based on the StormKitty analysis by OALabs 131 | for mtype in module.GetTypes(): 132 | #Skip type in no methods or no Body 133 | if not mtype.HasMethods: 134 | continue 135 | for method in mtype.Methods: 136 | if not method.HasBody: 137 | continue 138 | if not method.Body.HasInstructions: 139 | continue 140 | #If valid body, enumerate instructions for reference to salt (typically stsfld) 141 | for ptr in range(len(method.Body.Instructions)): 142 | instruction = method.Body.Instructions[ptr] 143 | if "stsfld" in str(instruction): 144 | #If stsfld found, walk backwards to find ldtoken refernence to byte array salt 145 | if "Aes256::Salt" in str(instruction): 146 | for i in range(1,5): 147 | if method.Body.Instructions[ptr-i].OpCode == OpCodes.Ldtoken: 148 | #print(method.Body.Instructions[ptr-i]) 149 | mm = method.Body.Instructions[ptr-i] 150 | token = mm.Operand.MDToken 151 | #If token found, locate the initial value of the byte array token 152 | #This is also borrowed from StormKitty analysis by OALabs 153 | for mtype in module.GetTypes(): 154 | if mtype.get_HasFields: 155 | for field in mtype.get_Fields(): 156 | if field.get_MDToken() == token: 157 | try: 158 | out = bytes(field.get_InitialValue()) 159 | #print(out) 160 | return out 161 | except: 162 | continue 163 | return None 164 | 165 | #Function for deriving AES Key from salt 166 | def derive_aes_key(key,salt,keysize): 167 | key = bytearray(base64.b64decode(key)) 168 | salt = bytes(salt) 169 | key = pbkdf2_hmac("SHA1", key, salt, 50000, keysize) 170 | return key 171 | 172 | #Function for deriving IV from initial authkey 173 | def generate_iv(authkey, enc): 174 | data = base64.b64decode(enc) 175 | data = data[32:] 176 | out = hmac.new(authkey, data,hashlib.sha256).hexdigest() 177 | return out[0:32] 178 | 179 | #Function for performing primary encryption 180 | def aes_decrypt(enc,key,iv): 181 | iv = bytes.fromhex(iv) 182 | enc = base64.b64decode(enc) 183 | cipher = AES.new(key, AES.MODE_CBC, iv) 184 | return cipher.decrypt(enc) 185 | 186 | #Generate AES Keys from salt. 187 | salt = get_salt_from_bin() 188 | this_key = derive_aes_key(settings_key,salt,32) 189 | auth_key = derive_aes_key(settings_key,salt,96) 190 | auth_key = auth_key[32:] 191 | 192 | new_dict = {} 193 | 194 | #Enumerate encrypted config and decrypt/print as appropriate 195 | valid_decryption = False 196 | for name in reversed(name_mappings.keys()): 197 | try: 198 | enc = name_mappings[name] 199 | iv = generate_iv(auth_key,enc) 200 | result = aes_decrypt(enc,this_key, iv) 201 | inlen = len(base64.b64decode(enc)) 202 | out = "" 203 | for i in result[48:]: 204 | out += chr(i) 205 | out2 = "".join(letter for letter in out if letter.isprintable()) 206 | if len(out) < 100: 207 | valid_decryption = True 208 | new_dict[str(name.split("::")[1]).replace("_", "")] = out2 209 | except: 210 | continue 211 | 212 | if valid_decryption: 213 | new_dict["SHA256"] = sha_256 214 | print(f"[+] Extracted AsyncRAT Config: {new_dict}") 215 | fname = "asyncrat_configs.txt" 216 | 217 | f = open(fname, "a+") 218 | f.write(str(json.dumps(new_dict))) 219 | f.write("\n") 220 | f.close() 221 | 222 | sleep(1) 223 | f = open(fname, "r") 224 | lines = f.readlines() 225 | f.close() 226 | if len(lines) != len(set(lines)): 227 | os.remove(fname) 228 | f = open(fname, "a") 229 | for line in set(lines): 230 | f.write(line) 231 | f.close() -------------------------------------------------------------------------------- /modules/asyncrat_extract_config/dnlib.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montysecurity/YaraMonitor/0d2d6e17fff492874edf83c9c557c49aabac55c9/modules/asyncrat_extract_config/dnlib.dll -------------------------------------------------------------------------------- /monitor/main.py: -------------------------------------------------------------------------------- 1 | import os, sys, yara, argparse, hashlib, subprocess 2 | from discord_webhook import DiscordWebhook 3 | from time import sleep 4 | from tqdm import tqdm 5 | 6 | sys.path.insert(1, "sources") 7 | 8 | parser = argparse.ArgumentParser(description="YaraMonitor: Monitor malware sources with a set of Yara rules") 9 | parser.add_argument("-d", "--discord", type=str, default=None, help="Send results to Discord Webhook provided") 10 | parser.add_argument("-w", "--wipe", action="store_true", help="Remove existing files from samples directory when the script starts") 11 | parser.add_argument("-l", "--low-storage-mode", action="store_true", help="Automatically delete all samples, regardless of match") 12 | parser.add_argument("-m", "--module", type=str, default=None, help="Invoke module code on samples") 13 | parser.add_argument("-s", "--scan-only", action='store_true', default=False, help="Skip ingestion, just run yara rules and modules") 14 | #parser.add_argument("--module-only", default=False, action='store_true', help="only run modules") 15 | args = parser.parse_args() 16 | discord = args.discord 17 | wipe = args.wipe 18 | auto_delete_all = args.low_storage_mode 19 | module = args.module 20 | scan_only = args.scan_only 21 | #module_only = args.module_only 22 | 23 | def run_ingestion(): 24 | import malwareBazaar 25 | malwareBazaar.main() 26 | 27 | def index_rules(): 28 | files = set() 29 | for file in os.listdir("rules"): 30 | files.add(file) 31 | return files 32 | 33 | def load_rule(file): 34 | with open(file) as f: 35 | src = f.read() 36 | rule = yara.compile(source=src) 37 | return rule 38 | 39 | def index_samples(): 40 | files = set() 41 | try: 42 | os.listdir("samples") 43 | except FileNotFoundError: 44 | os.makedirs("samples") 45 | for dir in os.listdir("samples"): 46 | for file in os.listdir(f"samples/{str(dir)}"): 47 | files.add(str(f"samples/{str(dir)}/{str(file)}")) 48 | return files 49 | 50 | def rename_samples(): 51 | samples = index_samples() 52 | for sample in samples: 53 | split_name = str(sample).split(".") 54 | base_name = split_name[0] 55 | old_extension = split_name[1] 56 | if old_extension != "infected": 57 | try: 58 | os.rename(sample, f"{base_name}.infected") 59 | except FileExistsError: 60 | os.remove(sample) 61 | 62 | def alert(message): 63 | print(message) 64 | if discord is not None: 65 | DiscordWebhook(url=discord, content=message).execute() 66 | 67 | def remove_samples(samples_matched): 68 | samples = index_samples() 69 | for sample in samples: 70 | hash_object = hashlib.sha256(sample.encode()) 71 | hex_dig = hash_object.hexdigest() 72 | if hex_dig not in samples_matched: 73 | print(f"[+] Removing {str(sample)}") 74 | os.remove(sample) 75 | 76 | def main(): 77 | # variables used for modules 78 | asyncrat_scanned_samples = set() 79 | 80 | i = 0 81 | global samples_scanned 82 | samples_scanned = set() 83 | global samples_matched 84 | samples_matched = set() 85 | if wipe: 86 | remove_samples(samples_matched) 87 | while True: 88 | i += 1 89 | if i > 100: 90 | alert(message="[+] Still Running YaraMonitor") 91 | i = 0 92 | if not scan_only: 93 | run_ingestion() 94 | rename_samples() 95 | rules = index_rules() 96 | samples = index_samples() 97 | hash_list_buf = set() 98 | for rule in rules: 99 | try: 100 | yara_rule = load_rule("rules/" + rule) 101 | except yara.SyntaxError: # not sure why this happens, it is inconsistent; also, loading all rules at once fails 102 | continue 103 | for sample in samples: 104 | hash_object = hashlib.sha256(sample.encode()) 105 | hex_dig = hash_object.hexdigest() 106 | if hex_dig in samples_matched: 107 | #print(f"[+] Skipping {sample} as it has already been scanned") 108 | continue 109 | # Temporary buffer of samples scanned in this loop 110 | hash_list_buf.add(hex_dig) 111 | print(f"[+] Scanning {str(sample)} with {str(rule)}") 112 | yara_matches = yara_rule.match(sample) 113 | samples_scanned.add(hex_dig) 114 | if len(yara_matches) > 0: 115 | message = f"[+] {str(rule)} triggered on {str(sample)}" 116 | alert(message) 117 | samples_matched.add(hex_dig) 118 | # keep track of samples scanned for the lifetime of the program 119 | samples_scanned = samples_scanned | hash_list_buf 120 | if module is not None: 121 | if os.name == "nt": 122 | pythonbin = "venv/Scripts/python.exe" 123 | else: 124 | pythonbin = "venv/bin/python" 125 | if module == "asyncrat_extract_config": 126 | if os.name == 'nt': 127 | samples = index_samples() 128 | for sample in samples: 129 | if sample not in asyncrat_scanned_samples: 130 | print(f"[+] Calling {module} on {sample}") 131 | try: 132 | p = subprocess.Popen([pythonbin, 'modules/asyncrat_extract_config/asyncrat_extract_config.py', f"../../{sample}" ]) 133 | asyncrat_scanned_samples.add(sample) 134 | except Exception as e: 135 | print(e) 136 | elif os.name != 'nt': 137 | print("[!] asyncrat_extract_config module not supported on this OS; requires a Windows OS") 138 | if auto_delete_all: 139 | samples_matched = set() 140 | remove_samples(samples_matched) 141 | #sleep(60) 142 | 143 | main() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | yara-python 2 | requests 3 | pyzipper 4 | discord_webhook 5 | tqdm 6 | 7 | # modules 8 | ## asyncrat extract config 9 | backports.pbkdf2 10 | pythonnet 11 | pycryptodome -------------------------------------------------------------------------------- /sources/malwareBazaar.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import json 4 | from time import sleep 5 | import pyzipper 6 | from tqdm import tqdm 7 | import urllib3 8 | 9 | mb_api_url = "https://mb-api.abuse.ch/api/v1/?" 10 | files_downloaded = set() 11 | 12 | def downloadsample(sha256): 13 | path = "/../samples/MalwareBazaar" 14 | script_path = str(os.path.dirname(os.path.realpath(__file__))) 15 | path = script_path + path 16 | if not os.path.exists(path): 17 | os.makedirs(path) 18 | request_data = { 19 | "query": "get_file", 20 | "sha256_hash": str(sha256).lower() 21 | } 22 | try: 23 | download_request = requests.post(url=mb_api_url, data=request_data) 24 | except: 25 | return 26 | filepath = path + "/" + str(sha256) + "_pw_infected.zip" 27 | with open(filepath, "wb") as f: 28 | f.write(download_request.content) 29 | try: 30 | with pyzipper.AESZipFile(filepath, 'r', compression=pyzipper.ZIP_DEFLATED, encryption=pyzipper.WZ_AES) as extracted_zip: 31 | extracted_zip.extractall("samples/MalwareBazaar/", pwd=str.encode("infected")) 32 | except: # Failed once 33 | pass 34 | # remove zip file as it should not be scanned 35 | os.remove(filepath) 36 | 37 | def feedcheck(): 38 | request_payload = { 39 | "query": "get_recent", 40 | "selector": "time" 41 | } 42 | # if download fails 43 | try: 44 | request = requests.post(url=mb_api_url, data=request_payload) 45 | if request.status_code != 200: 46 | feedcheck() 47 | response_json = json.loads(request.text) 48 | if "data" in response_json: 49 | pass 50 | except urllib3.exceptions.ReadTimeoutError: 51 | print("[!] MalwareBazaeer Request Timed Out; sleeping 60 seconds") 52 | sleep(60) 53 | except: 54 | feedcheck() 55 | 56 | hashes = set() 57 | try: 58 | for file in response_json["data"]: 59 | filehash = str(file["sha256_hash"]) 60 | hashes.add(filehash) 61 | except: 62 | feedcheck() 63 | for filehash in hashes: 64 | if filehash not in files_downloaded: 65 | print(f"[+] Downloading {str(filehash)}") 66 | downloadsample(filehash) 67 | files_downloaded.add(filehash) 68 | sleep(10) 69 | 70 | def main(): 71 | feedcheck() 72 | 73 | main() --------------------------------------------------------------------------------