├── .gitignore ├── LICENSE ├── README.md └── detect_stack_strings.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Giovanni Lagorio 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ghidra Stack-strings 2 | 3 | A script to detect stack-strings written by the selected x86/x64 instructions, emulated using [Unicorn](https://www.unicorn-engine.org/). 4 | 5 | The script is written in Python 3, so it needs [Ghidrathon](https://github.com/mandiant/Ghidrathon). 6 | -------------------------------------------------------------------------------- /detect_stack_strings.py: -------------------------------------------------------------------------------- 1 | # Detect stack-strings written by the selected instructions, emulated using Unicorn. 2 | # The script is written in Python 3, so it needs Ghidrathon. 3 | # @author zxgio 4 | # @category Emulation 5 | # @keybinding 6 | # @menupath 7 | # @toolbar 8 | 9 | import re 10 | from collections import namedtuple 11 | from itertools import chain 12 | from unicorn import ( 13 | Uc, 14 | UC_ARCH_X86, 15 | UC_MODE_32, 16 | UC_MODE_64, 17 | UC_PROT_READ, 18 | UC_PROT_WRITE, 19 | UC_PROT_EXEC, 20 | UC_HOOK_MEM_WRITE, 21 | UC_HOOK_CODE 22 | ) 23 | from unicorn.x86_const import ( 24 | UC_X86_REG_ESP, 25 | UC_X86_REG_EIP, 26 | UC_X86_REG_EBP, 27 | UC_X86_REG_RSP, 28 | UC_X86_REG_RIP, 29 | UC_X86_REG_RBP, 30 | ) 31 | 32 | # functions ascii_strings and unicode_strings are taken from: 33 | # https://gist.github.com/jedimasterbot/39ef35bc4324e4b4338a210298526cd0 34 | 35 | ASCII_BYTE = rb" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t" 36 | String = namedtuple("String", ["s", "offset"]) 37 | 38 | 39 | def ascii_strings(buf, n=4): 40 | reg = rb"([%s]{%d,})" % (ASCII_BYTE, n) 41 | ascii_re = re.compile(reg) 42 | for match in ascii_re.finditer(buf): 43 | yield String(match.group().decode("ascii"), match.start()) 44 | 45 | 46 | def unicode_strings(buf, n=4): 47 | reg = rb"((?:[%s]\x00){%d,})" % (ASCII_BYTE, n) 48 | uni_re = re.compile(reg) 49 | for match in uni_re.finditer(buf): 50 | try: 51 | yield String(match.group().decode("utf-16"), match.start()) 52 | except UnicodeDecodeError: 53 | pass 54 | 55 | 56 | def all_strings(buf, n=4): 57 | return list(chain(ascii_strings(buf, n), unicode_strings(buf, n))) 58 | 59 | 60 | def emulate(): 61 | proc = currentProgram().getLanguage().getProcessor().toString() 62 | if proc != "x86": 63 | print("Sorry, unsupported architecture.") 64 | return 65 | bits = currentProgram().getLanguage().getLanguageDescription().getSize() 66 | emu = Uc(UC_ARCH_X86, UC_MODE_32 if bits==32 else UC_MODE_64) 67 | min_addr = currentSelection().getMinAddress().getOffset() 68 | max_addr = currentSelection().getMaxAddress() 69 | # print(f'Selection from 0x{min_addr:x} to 0x{max_addr.getOffset():x}') 70 | last_instruction = getInstructionContaining(max_addr) 71 | max_addr = last_instruction.getMaxAddress().getOffset() 72 | last_instruction_addr = last_instruction.getMinAddress().getOffset() 73 | print(f"Emulating from 0x{min_addr:x} to 0x{last_instruction_addr:x} (code range 0x{min_addr:x}-0x{max_addr:x})") 74 | code = bytes(b & 0xFF for b in getBytes(toAddr(min_addr), max_addr - min_addr + 1)) 75 | CODE_ADDR = min_addr & ~4095 76 | CODE_SIZE = (max_addr - CODE_ADDR + 4096) & ~4095 77 | STACK_ADDR = 1024 * 1024 78 | STACK_SIZE = 1024 * 1024 79 | emu.mem_map(CODE_ADDR, CODE_SIZE, UC_PROT_EXEC) 80 | emu.mem_write(min_addr, code) 81 | emu.mem_map(STACK_ADDR, STACK_SIZE, UC_PROT_READ | UC_PROT_WRITE) 82 | emu.reg_write(UC_X86_REG_ESP if bits==32 else UC_X86_REG_RSP, STACK_ADDR + STACK_SIZE // 2 - 8192) 83 | emu.reg_write(UC_X86_REG_EBP if bits==32 else UC_X86_REG_RBP, STACK_ADDR + STACK_SIZE // 2) 84 | all_writes = {} 85 | 86 | def hook_mem_write(emu, access, address, size, value, user_data): 87 | all_writes[address] = emu.reg_read(UC_X86_REG_EIP if bits == 32 else UC_X86_REG_RIP) 88 | 89 | # def hook_code(emu, address, size, user_data): 90 | # print(f'Instruction at 0x{address:x}, size = {size}') 91 | 92 | emu.hook_add(UC_HOOK_MEM_WRITE, hook_mem_write) 93 | # emu.hook_add(UC_HOOK_CODE, hook_code) 94 | emu.emu_start(min_addr, last_instruction_addr + last_instruction.getLength()) 95 | n_found = 0 96 | for s, offset in all_strings(emu.mem_read(STACK_ADDR, STACK_SIZE), 3): 97 | n_found += 1 98 | inst_addr = all_writes.get(offset + STACK_ADDR, None) 99 | if inst_addr: 100 | print(f"'{s}' written by instruction at 0x{inst_addr:x}") 101 | setPreComment(toAddr(inst_addr), s) 102 | else: 103 | print(f"'{s}' written by instruction ???") 104 | print(f'{n_found} string(s) found.') 105 | 106 | 107 | if currentSelection() is not None: 108 | emulate() 109 | else: 110 | print("Please select the instructions to emulate, before running this script.") 111 | --------------------------------------------------------------------------------