├── README.md ├── LICENSE ├── .gitignore └── detect_stack_strings.py /README.md: -------------------------------------------------------------------------------- 1 | # Ghidra Stack-strings 2 | 3 | A script to detect stack-strings written by the selected x86/x64 instructions, emulated using [Unicorn](https://www.unicorn-engine.org/). 4 | 5 | The script is written in Python 3, so it needs [Ghidrathon](https://github.com/mandiant/Ghidrathon). 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Giovanni Lagorio 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /detect_stack_strings.py: -------------------------------------------------------------------------------- 1 | # Detect stack-strings written by the selected instructions, emulated using Unicorn. 2 | # The script is written in Python 3, so it needs Ghidrathon. 3 | # @author zxgio 4 | # @category Emulation 5 | # @runtime pyghidra 6 | 7 | import re 8 | from collections import namedtuple 9 | from itertools import chain 10 | from unicorn import ( 11 | Uc, 12 | UC_ARCH_X86, 13 | UC_MODE_32, 14 | UC_MODE_64, 15 | UC_PROT_READ, 16 | UC_PROT_WRITE, 17 | UC_PROT_EXEC, 18 | UC_HOOK_MEM_WRITE, 19 | UC_HOOK_CODE 20 | ) 21 | from unicorn.x86_const import ( 22 | UC_X86_REG_ESP, 23 | UC_X86_REG_EIP, 24 | UC_X86_REG_EBP, 25 | UC_X86_REG_RSP, 26 | UC_X86_REG_RIP, 27 | UC_X86_REG_RBP, 28 | ) 29 | 30 | # functions ascii_strings and unicode_strings are taken from: 31 | # https://gist.github.com/jedimasterbot/39ef35bc4324e4b4338a210298526cd0 32 | 33 | ASCII_BYTE = rb" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t" 34 | String = namedtuple("String", ["s", "offset"]) 35 | 36 | 37 | def ascii_strings(buf, n=4): 38 | reg = rb"([%s]{%d,})" % (ASCII_BYTE, n) 39 | ascii_re = re.compile(reg) 40 | for match in ascii_re.finditer(buf): 41 | yield String(match.group().decode("ascii"), match.start()) 42 | 43 | 44 | def unicode_strings(buf, n=4): 45 | reg = rb"((?:[%s]\x00){%d,})" % (ASCII_BYTE, n) 46 | uni_re = re.compile(reg) 47 | for match in uni_re.finditer(buf): 48 | try: 49 | yield String(match.group().decode("utf-16"), match.start()) 50 | except UnicodeDecodeError: 51 | pass 52 | 53 | 54 | def all_strings(buf, n=4): 55 | return list(chain(ascii_strings(buf, n), unicode_strings(buf, n))) 56 | 57 | 58 | def emulate(selection): 59 | proc = currentProgram.getLanguage().getProcessor().toString() 60 | if proc != "x86": 61 | print("Sorry, unsupported architecture.") 62 | return 63 | bits = currentProgram.getLanguage().getLanguageDescription().getSize() 64 | emu = Uc(UC_ARCH_X86, UC_MODE_32 if bits==32 else UC_MODE_64) 65 | min_addr = selection.getMinAddress().getOffset() 66 | max_addr = selection.getMaxAddress() 67 | # print(f'Selection from 0x{min_addr:x} to 0x{max_addr.getOffset():x}') 68 | last_instruction = getInstructionContaining(max_addr) 69 | max_addr = last_instruction.getMaxAddress().getOffset() 70 | last_instruction_addr = last_instruction.getMinAddress().getOffset() 71 | print(f"Emulating from 0x{min_addr:x} to 0x{last_instruction_addr:x} (code range 0x{min_addr:x}-0x{max_addr:x})") 72 | code = bytes(b & 0xFF for b in getBytes(toAddr(min_addr), max_addr - min_addr + 1)) 73 | CODE_ADDR = min_addr & ~4095 74 | CODE_SIZE = (max_addr - CODE_ADDR + 4096) & ~4095 75 | STACK_ADDR = 1024 * 1024 76 | STACK_SIZE = 1024 * 1024 77 | emu.mem_map(CODE_ADDR, CODE_SIZE, UC_PROT_EXEC) 78 | emu.mem_write(min_addr, code) 79 | emu.mem_map(STACK_ADDR, STACK_SIZE, UC_PROT_READ | UC_PROT_WRITE) 80 | emu.reg_write(UC_X86_REG_ESP if bits==32 else UC_X86_REG_RSP, STACK_ADDR + STACK_SIZE // 2 - 8192) 81 | emu.reg_write(UC_X86_REG_EBP if bits==32 else UC_X86_REG_RBP, STACK_ADDR + STACK_SIZE // 2) 82 | all_writes = {} 83 | 84 | def hook_mem_write(emu, access, address, size, value, user_data): 85 | all_writes[address] = emu.reg_read(UC_X86_REG_EIP if bits == 32 else UC_X86_REG_RIP) 86 | 87 | # def hook_code(emu, address, size, user_data): 88 | # print(f'Instruction at 0x{address:x}, size = {size}') 89 | 90 | emu.hook_add(UC_HOOK_MEM_WRITE, hook_mem_write) 91 | # emu.hook_add(UC_HOOK_CODE, hook_code) 92 | emu.emu_start(min_addr, last_instruction_addr + last_instruction.getLength()) 93 | n_found = 0 94 | for s, offset in all_strings(emu.mem_read(STACK_ADDR, STACK_SIZE), 3): 95 | n_found += 1 96 | inst_addr = all_writes.get(offset + STACK_ADDR, None) 97 | if inst_addr: 98 | print(f"'{s}' written by instruction at 0x{inst_addr:x}") 99 | setPreComment(toAddr(inst_addr), s) 100 | else: 101 | print(f"'{s}' written by instruction ???") 102 | print(f'{n_found} string(s) found.') 103 | 104 | 105 | selection = currentSelection 106 | if selection is not None: 107 | emulate(selection) 108 | else: 109 | print("Please select the instructions to emulate, before running this script.") 110 | --------------------------------------------------------------------------------