├── README.md ├── shellcode_to_exe ├── README.md └── shellcode_to_exe.py └── pe_extract ├── README.md └── pe_extract.py /README.md: -------------------------------------------------------------------------------- 1 | # malware_analysis_tools 2 | 3 | This repository contains tools helpful for malware analysis. 4 | -------------------------------------------------------------------------------- /shellcode_to_exe/README.md: -------------------------------------------------------------------------------- 1 | # shellcode_to_exe 2 | 3 | Tool to create a Windows executable (x86/64) from a shellcode file. 4 | -------------------------------------------------------------------------------- /pe_extract/README.md: -------------------------------------------------------------------------------- 1 | # pe_extract 2 | 3 | Tool that extracts EXE, DLL, SYS and unknown files from any given byte blob, memory dump or similar data structure. 4 | 5 | The idea for this script is based on UsAr's PEExtract tool: 6 | http://web.archive.org/web/20101126155525/http://usar.pp.ru/download/ 7 | 8 | It solves a few shortcomings of the original program like: 9 | - Multiple file scan support (e.g. for automatically created memory dumps) 10 | - Skip likely incomplete page sized PEs (for automatically created memory dumps) 11 | - Extraction support for signed PE files 12 | - Proper extraction of PE files where the last section's SizeOfRawData value doesn't make the overall file size 13 | - Support for XORed PE files 14 | -------------------------------------------------------------------------------- /shellcode_to_exe/shellcode_to_exe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Dominik Reichel (2022) 3 | 4 | shellcode_to_exe - Create a Windows executable (x86/64) from a shellcode file 5 | """ 6 | 7 | __version__ = 0.2 8 | 9 | import os 10 | import argparse 11 | 12 | from enum import Enum, IntEnum 13 | from typing import Optional 14 | 15 | HEADER_32 = '4D5A00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 16 | '00000000000000000000040000000504500004C010100000000000000000000000000E0000F010B010000DEADC0DE000000' \ 17 | '000000000000100000001000000000000000004000001000000002000004000000000000000400000000000000FACEFEED0' \ 18 | '002000000000000020000000000100000000100000010000010000000000000100000000000000000000000000000000000' \ 19 | '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 20 | '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 21 | '0000000000000000000000000000002E7368656C6C0000DEADBEEF00100000CAFEBABE00020000000000000000000000000' \ 22 | '000200000E00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 23 | '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 24 | '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 25 | '0000000000000000000000000000000000' 26 | 27 | HEADER_64 = '4D5A00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 28 | '000000000000000000000400000005045000064860100000000000000000000000000F0002F000B020000DEADC0DE000000' \ 29 | '000000000000100000001000000000400000000000001000000002000005000200000000000500020000000000FACEFEED0' \ 30 | '002000000000000020000000000100000000000000001000000000000001000000000000010000000000000000000001000' \ 31 | '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 32 | '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 33 | '000000000000000000000000000000000000000000000000000000000000002E7368656C6C0000DEADBEEF00100000CAFEB' \ 34 | 'ABE00020000000000000000000000000000200000E000000000000000000000000000000000000000000000000000000000' \ 35 | '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 36 | '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' \ 37 | '0000000000000000000000000000000000' 38 | 39 | 40 | class HeaderFields(IntEnum): 41 | VIRTUALSIZE = 16 42 | RAWSIZE = 512 43 | SIZEOFIMAGE = 4096 44 | 45 | 46 | class HeaderFieldMarkers(Enum): 47 | VIRTUALSIZE = b'\xDE\xAD\xBE\xEF' 48 | RAWSIZE = b'\xCA\xFE\xBA\xBE' 49 | SIZEOFCODE = b'\xDE\xAD\xC0\xDE' 50 | SIZEOFIMAGE = b'\xFA\xCE\xFE\xED' 51 | 52 | 53 | class ShellcodeToExe: 54 | def __init__(self, bitness: str, shellcode_path: str, exe_path: str): 55 | self.bitness = bitness 56 | self.shellcode_path = shellcode_path 57 | self.exe_path = exe_path 58 | if bitness == '32': 59 | self.exe_header = bytes.fromhex(HEADER_32) 60 | elif bitness == '64': 61 | self.exe_header = bytes.fromhex(HEADER_64) 62 | 63 | @staticmethod 64 | def _calculate_field_size(field_name: str, size: int) -> int: 65 | result = size 66 | 67 | if size % HeaderFields[field_name.upper()].value != 0: 68 | result = size + (HeaderFields[field_name.upper()].value - (size % HeaderFields[field_name.upper()].value)) 69 | 70 | return result 71 | 72 | def _update_header_field(self, field_name: str, size: int) -> bool: 73 | try: 74 | size_hex = size.to_bytes(4, byteorder='little') 75 | self.exe_header = self.exe_header.replace(HeaderFieldMarkers[field_name.upper()].value, size_hex) 76 | return True 77 | except Exception as e: 78 | print(f'[-] Could not update EXE header template field "{field_name}" - {e}.') 79 | return False 80 | 81 | def _get_shellcode(self) -> Optional[bytes]: 82 | if not os.path.exists(self.shellcode_path): 83 | print(f'[-] Shellcode file "{self.shellcode_path}" does not exist.') 84 | return 85 | 86 | with open(self.shellcode_path, 'rb') as f: 87 | return f.read() 88 | 89 | def _create_exe(self, file_bytes: bytes) -> None: 90 | if self.exe_path == 'shellcode.exe': 91 | self.exe_path = f'{os.path.join(os.path.dirname(self.shellcode_path), self.exe_path)}' 92 | 93 | with open(self.exe_path, 'wb') as f: 94 | f.write(file_bytes) 95 | print(f'[+] Created {self.bitness}-bit executable: {self.exe_path}') 96 | 97 | def run(self) -> None: 98 | shellcode = self._get_shellcode() 99 | 100 | if shellcode: 101 | shellcode_len = len(shellcode) 102 | 103 | virtual_size = self._calculate_field_size('VirtualSize', shellcode_len) 104 | raw_size = self._calculate_field_size('RawSize', shellcode_len) 105 | 106 | if shellcode_len != raw_size: 107 | # Fill shellcode section with 0 bytes according to alignment 108 | shellcode += bytes(raw_size - shellcode_len) 109 | 110 | if self._update_header_field('VirtualSize', virtual_size) and \ 111 | self._update_header_field('RawSize', raw_size) and \ 112 | self._update_header_field('SizeOfCode', raw_size) and \ 113 | self._update_header_field('SizeOfImage', 114 | self._calculate_field_size('SizeOfImage', virtual_size + 0x1000)): 115 | self._create_exe(self.exe_header + shellcode) 116 | 117 | 118 | def main(): 119 | parser = argparse.ArgumentParser(prog='shellcode_to_exe.py', 120 | description='Create a Windows executable (x86/64) from a shellcode file') 121 | parser.add_argument('-b', '--bitness', dest='bitness', type=str, required=True, help='"32" or "64" (bitness)') 122 | parser.add_argument('-s', '--shellcode', dest='shellcode', type=str, required=True, help='Shellcode file path') 123 | parser.add_argument('-e', '--executable', dest='executable', type=str, default='shellcode.exe', 124 | help='Executable output path') 125 | args = parser.parse_args() 126 | 127 | s2e = ShellcodeToExe(args.bitness, args.shellcode, args.executable) 128 | s2e.run() 129 | 130 | 131 | if __name__ == '__main__': 132 | main() 133 | -------------------------------------------------------------------------------- /pe_extract/pe_extract.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Dominik Reichel (2022-2024) 3 | 4 | pe_extract - Extract Windows PE files (EXE, DLL, SYS, unknown) from any given PE, byte blob, memory dump or similar data structure. 5 | """ 6 | 7 | __version__ = 0.2 8 | 9 | import os 10 | import re 11 | import pefile 12 | import argparse 13 | 14 | from dataclasses import dataclass 15 | from enum import IntEnum 16 | from typing import List, Optional 17 | from pefile import PE 18 | 19 | 20 | @dataclass 21 | class MZItem: 22 | offset: int 23 | xor_key: Optional[int] = None 24 | 25 | 26 | @dataclass 27 | class Report: 28 | exe: int = 1 29 | exe_xored: int = 1 30 | exe_signed: int = 1 31 | dll: int = 1 32 | dll_xored: int = 1 33 | dll_signed: int = 1 34 | sys: int = 1 35 | sys_xored: int = 1 36 | sys_signed: int = 1 37 | unknown: int = 1 38 | unknown_xored: int = 1 39 | unknown_signed: int = 1 40 | 41 | def increase_value(self, field_name: str) -> None: 42 | curr_val = getattr(self, field_name) 43 | setattr(self, field_name, curr_val + 1) 44 | 45 | def get_value(self, field_name: str) -> int: 46 | return getattr(self, field_name) 47 | 48 | 49 | class InputType(IntEnum): 50 | File = 1 51 | Folder = 2 52 | Unknown = 3 53 | 54 | 55 | class FileType(IntEnum): 56 | Exe = 1 57 | Dll = 2 58 | Sys = 3 59 | Unknown = 4 60 | 61 | 62 | class PEExtract: 63 | PE_MAGIC = b'\x4D\x5A' 64 | PE_SIGNATURE = b'\x50\x45\x00\x00' 65 | 66 | def __init__(self, input_path: str, extract_all: bool, skip_xored: bool, extract_overlays: bool, verbose: bool): 67 | self.input_path = input_path 68 | self.extract_all = extract_all 69 | self.skip_xored = skip_xored 70 | self.extract_overlays = extract_overlays 71 | self.verbose = verbose 72 | self.report = Report() 73 | 74 | def __verbose_print(self, message: str): 75 | if self.verbose: 76 | print(message) 77 | 78 | def _get_input_type(self) -> InputType: 79 | result = InputType.Unknown 80 | 81 | if os.path.isfile(self.input_path): 82 | print('[*] Input path is a file.') 83 | self.__verbose_print('---') 84 | result = InputType.File 85 | elif os.path.isdir(self.input_path): 86 | print('[*] Input path is a folder.') 87 | self.__verbose_print('---') 88 | result = InputType.Folder 89 | else: 90 | print('[-] Unknown input. Please provide a valid file/folder path.') 91 | 92 | return result 93 | 94 | def get_file_content(self, file_path: str) -> bytes: 95 | with open(file_path, 'rb') as f: 96 | result = f.read() 97 | 98 | if not result: 99 | self.__verbose_print('\t[-] File is empty.') 100 | 101 | return result 102 | 103 | @staticmethod 104 | def _is_pe(file_data: bytes) -> bool: 105 | result = False 106 | 107 | if file_data.startswith(PEExtract.PE_MAGIC): 108 | pe_signature_index_bytes = file_data[0x3c:0x3c + 4] 109 | pe_signature_index = int.from_bytes(pe_signature_index_bytes, byteorder='little') 110 | pe_signature_bytes = file_data[pe_signature_index:pe_signature_index + 4] 111 | if pe_signature_bytes == PEExtract.PE_SIGNATURE: 112 | result = True 113 | 114 | return result 115 | 116 | @staticmethod 117 | def _search_mz_bytes(byte_sequence: bytes, byte_array: bytes) -> List: 118 | return [x.start() for x in re.finditer(re.escape(byte_sequence), byte_array)] 119 | 120 | def _get_mz_indexes(self, byte_array: bytes) -> List[MZItem]: 121 | result = [] 122 | 123 | if not self.skip_xored: 124 | for i in range(1, 255): 125 | mz_bytes_xored = bytes(x ^ i for x in PEExtract.PE_MAGIC) 126 | mz_offsets_xored = self._search_mz_bytes(mz_bytes_xored, byte_array) 127 | 128 | for mz_offset_xored in mz_offsets_xored: 129 | result.append(MZItem(mz_offset_xored, i)) 130 | 131 | mz_offsets = self._search_mz_bytes(PEExtract.PE_MAGIC, byte_array) 132 | for mz_offset in mz_offsets: 133 | result.append(MZItem(mz_offset)) 134 | 135 | if any(x.xor_key for x in result): 136 | self.__verbose_print('\t[*] XORed magic "MZ" byte(s) found.') 137 | 138 | if any(x.xor_key is None for x in result): 139 | self.__verbose_print('\t[*] Magic "MZ" byte(s) found.') 140 | 141 | if not result: 142 | self.__verbose_print('\t[*] No magic "MZ" byte(s) found, file likely does not contain PEs.') 143 | 144 | self.__verbose_print('\t-') 145 | 146 | return result 147 | 148 | def _get_pe_signature_index(self, byte_array: bytes, mz_item: MZItem) -> int: 149 | pe_signature_index_bytes = byte_array[mz_item.offset + 0x3c:mz_item.offset + 0x3c + 4] 150 | 151 | if mz_item.xor_key: 152 | pe_signature_index_bytes = bytes(x ^ mz_item.xor_key for x in pe_signature_index_bytes) 153 | 154 | result = int.from_bytes(pe_signature_index_bytes, byteorder='little') 155 | 156 | if not result: 157 | self.__verbose_print('\t[*] End of file reached.') 158 | 159 | return result 160 | 161 | def _check_pe_signature(self, signature: bytes, xor_key: int) -> bool: 162 | result = False 163 | 164 | if xor_key and signature == bytes(x ^ xor_key for x in PEExtract.PE_SIGNATURE): 165 | self.__verbose_print('\t[*] XORed "PE" signature found.') 166 | result = True 167 | elif signature == PEExtract.PE_SIGNATURE: 168 | self.__verbose_print('\t[*] "PE" signature found.') 169 | result = True 170 | 171 | return result 172 | 173 | def get_file_type(self, pe: PE) -> FileType: 174 | result = FileType.Unknown 175 | 176 | if pe.is_exe(): 177 | self.__verbose_print('\t[+] Found EXE file.') 178 | result = FileType.Exe 179 | elif pe.is_dll(): 180 | self.__verbose_print('\t[+] Found DLL file.') 181 | result = FileType.Dll 182 | elif pe.is_driver(): 183 | self.__verbose_print('\t[+] Found SYS file.') 184 | result = FileType.Sys 185 | else: 186 | self.__verbose_print('\t[+] Found unknown file.') 187 | 188 | return result 189 | 190 | def get_file_bitness(self, pe: PE) -> str: 191 | result = '' 192 | 193 | if (pe.OPTIONAL_HEADER.Magic & pefile.OPTIONAL_HEADER_MAGIC_PE) == pefile.OPTIONAL_HEADER_MAGIC_PE: 194 | self.__verbose_print('\t[*] File is for 32-bit architecture.') 195 | result = '32' 196 | elif (pe.OPTIONAL_HEADER.Magic & pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS) == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS: 197 | self.__verbose_print('\t[*] File is for 64-bit architecture.') 198 | result = '64' 199 | 200 | return result 201 | 202 | def is_page_sized_pe(self, file_size: int) -> bool: 203 | result = True 204 | 205 | if file_size == 4096: 206 | self.__verbose_print('\t[*] Extracted PE file has memory page size of 4,096 bytes and is likely incomplete.') 207 | result = False 208 | 209 | return result 210 | 211 | def is_signed_pe(self, pe_file: PE) -> bool: 212 | result = False 213 | 214 | if pe_file.OPTIONAL_HEADER.DATA_DIRECTORY[ 215 | pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_SECURITY"]].VirtualAddress and \ 216 | pe_file.OPTIONAL_HEADER.DATA_DIRECTORY[pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_SECURITY"]].Size: 217 | self.__verbose_print('\t[*] File is signed.') 218 | result = True 219 | 220 | return result 221 | 222 | @staticmethod 223 | def get_pe_with_signature(pe_file: PE) -> bytes: 224 | signature_size = pe_file.OPTIONAL_HEADER.DATA_DIRECTORY[ 225 | pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_SECURITY"]].Size 226 | overlay_data_offset = pe_file.get_overlay_data_start_offset() 227 | 228 | return pe_file.__data__[:overlay_data_offset + signature_size] 229 | 230 | def __summary_report(self) -> None: 231 | print('---') 232 | print('Found and extracted:') 233 | print(f'\tEXE files - {self.report.exe - 1} ' 234 | f'({self.report.exe_xored - 1} XORed; {self.report.exe_signed - 1} signed)') 235 | print(f'\tDLL files - {self.report.dll - 1} ' 236 | f'({self.report.dll_xored - 1} XORed; {self.report.dll_signed - 1} signed)') 237 | print(f'\tSYS files - {self.report.sys - 1} ' 238 | f'({self.report.sys_xored - 1} XORed; {self.report.sys_signed - 1} signed)') 239 | print(f'\tUnknown files - {self.report.unknown - 1} ' 240 | f'({self.report.unknown_xored - 1} XORed; {self.report.unknown_signed - 1} signed)') 241 | 242 | def extract_from_file(self, file_path: str) -> None: 243 | file_content = self.get_file_content(file_path) 244 | if not file_content: 245 | return 246 | 247 | # Check if input file is a PE itself to prevent self-extraction 248 | if self._is_pe(file_content): 249 | file_content = file_content[1:] 250 | 251 | self.__verbose_print(f'Extracting files from: {file_path}') 252 | # Search for magic 'MZ' bytes 253 | mz_items = self._get_mz_indexes(file_content) 254 | 255 | for mz_item in mz_items: 256 | self.__verbose_print(f'\t[+] Processing \'MZ\' magic bytes indicator at {hex(mz_item.offset)}') 257 | # Get index of 'PE' signature 258 | pe_signature_index = self._get_pe_signature_index(file_content, mz_item) 259 | 260 | # Check if 'PE' signature exists at index and if true extract PE file 261 | if self._check_pe_signature( 262 | file_content[mz_item.offset + pe_signature_index:mz_item.offset + pe_signature_index + 4], 263 | mz_item.xor_key): 264 | # Decrypt file bytes when XORed PE was found 265 | if mz_item.xor_key: 266 | pe_bytes = bytes(x ^ mz_item.xor_key for x in file_content[mz_item.offset:]) 267 | else: 268 | pe_bytes = file_content[mz_item.offset:] 269 | 270 | # Create pefile instance from file bytes 271 | pe_file = pefile.PE(data=pe_bytes) 272 | 273 | # Check if file has memory page size when --extract-all argument wasn't chosen 274 | if not self.extract_all and not self.is_page_sized_pe(len(pe_file.__data__)): 275 | continue 276 | 277 | # Create folder for extracted PE files 278 | if not os.path.exists(f'{file_path}_files'): 279 | os.mkdir(f'{file_path}_files') 280 | 281 | # Get PE file type and if it was XORed 282 | pe_file_type = self.get_file_type(pe_file).name.lower() 283 | pe_file_type = f'{pe_file_type}_xored' if mz_item.xor_key else pe_file_type 284 | 285 | # Get PE file bitness 286 | pe_file_bitness = self.get_file_bitness(pe_file) 287 | bitness_string = f'_x{pe_file_bitness}' if pe_file_bitness else '' 288 | 289 | # Create file write path for extracted PE 290 | pe_file_path = os.path.join(f'{file_path}_files', 291 | f'{pe_file_type}{bitness_string}_{self.report.get_value(pe_file_type)}.bin') 292 | 293 | # Increase file type pointer for file name and report 294 | self.report.increase_value(pe_file_type) 295 | 296 | # Increase also normal file type counter if XORed PE was found 297 | if mz_item.xor_key: 298 | self.report.increase_value(pe_file_type + '_xored') 299 | 300 | # Write extracted PE file to disk 301 | if self.extract_overlays: 302 | pe_file.write(pe_file_path) 303 | pe_file.close() 304 | else: 305 | if self.is_signed_pe(pe_file): 306 | self.report.increase_value(pe_file_type + '_signed') 307 | pe_file_bytes = self.get_pe_with_signature(pe_file) 308 | else: 309 | pe_file_bytes = pe_file.trim() 310 | with open(pe_file_path, 'wb') as f: 311 | f.write(pe_file_bytes) 312 | 313 | self.__verbose_print(f'\t[+] Extracted PE written to: {pe_file_path}') 314 | self.__verbose_print('\t-') 315 | 316 | def extract_from_folder(self) -> None: 317 | for root, _, files in os.walk(self.input_path): 318 | print(f'[*] Found {len(files)} files in "{self.input_path}".') 319 | for file in files: 320 | self.extract_from_file(os.path.join(root, file)) 321 | break 322 | 323 | def extract(self) -> None: 324 | print(f'Input: "{self.input_path}"') 325 | input_type = self._get_input_type() 326 | 327 | if input_type == InputType.File: 328 | self.extract_from_file(self.input_path) 329 | elif input_type == InputType.Folder: 330 | self.extract_from_folder() 331 | 332 | self.__summary_report() 333 | 334 | 335 | def main(): 336 | parser = argparse.ArgumentParser(description='Extract EXE, DLL, SYS and unknown PE(s) from byte blob(s).') 337 | parser.add_argument('input', type=str, help='Input file/folder (absolute path).') 338 | parser.add_argument('-s', '--skip-xor-extraction', dest='skip_xored', action='store_true', 339 | help='Skip extraction of simple XORed PE files.') 340 | parser.add_argument('-o', '--extract-pe-overlays', dest='extract_overlays', action='store_true', 341 | help='Extract also possible PE overlay data (unreliable).') 342 | parser.add_argument('-a', '--extract-all', dest='extract_all', action='store_true', 343 | help='Extract also (likely) incomplete PEs from memory page size dumps.') 344 | parser.add_argument('-v', '--verbose-output', dest='verbose', action='store_true', 345 | help='Show detailed output.') 346 | args = parser.parse_args() 347 | 348 | pe_extract = PEExtract(args.input, args.extract_all, args.skip_xored, args.extract_overlays, args.verbose) 349 | pe_extract.extract() 350 | 351 | 352 | if __name__ == "__main__": 353 | main() 354 | --------------------------------------------------------------------------------