├── requirements.txt ├── gostringsr2 ├── __init__.py ├── cli.py └── gostringsr2.py ├── setup.cfg ├── CHANGELOG.md ├── LICENSE ├── setup.py └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | r2pipe 2 | Click 3 | -------------------------------------------------------------------------------- /gostringsr2/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Top-level package for gostringsr2.""" 4 | 5 | __author__ = """Jonathan Wrightsell""" 6 | __email__ = "jonathan.wrightsell@carvesystems.com" 7 | __version__ = "1.1.2" 8 | 9 | from .gostringsr2 import * 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 1.1.2 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = version="{current_version}" 8 | replace = version="{new_version}" 9 | 10 | [bumpversion:file:gostringsr2/__init__.py] 11 | search = __version__ = "{current_version}" 12 | replace = __version__ = "{new_version}" 13 | 14 | [bumpversion:file:CHANGELOG.md] 15 | search = ## next 16 | replace = ## {new_version} 17 | 18 | [bdist_wheel] 19 | universal = 1 20 | 21 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 1.1.2 4 | 5 | - Validate file is elf/pe/mach0 or throw an error 6 | - Rearrange logic so that non-ARM/x86 architectures may work 7 | - Add some docs to GoStringsR2 8 | - Open r2pipe with `-2` flag to kill stderr 9 | 10 | ## 1.1.1 11 | 12 | - Add `-s` option to generate an r2 script that can be loaded into radare2 afterwards 13 | 14 | ## 1.1.0 15 | 16 | - Use `p8` instead of `pr` to get raw data (#1) 17 | - Fix bug in is_a_string_ref() (#1) 18 | - Use quietr2pipe to kill r2's stderr during run() 19 | - Some refactoring, debug output, optimizing cross-ref boundaries for r2 20 | 21 | ## 1.0.0 22 | 23 | - Initial release 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019, Jonathan Wrightsell 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup, find_packages 4 | 5 | with open("README.md") as readme_file: 6 | readme = readme_file.read() 7 | 8 | with open("CHANGELOG.md") as history_file: 9 | history = history_file.read() 10 | 11 | requirements = ["Click>=6.0", "r2pipe"] 12 | 13 | setup_requirements = ["pytest-runner"] 14 | 15 | test_requirements = ["pytest"] 16 | 17 | setup( 18 | author="Jonathan Wrightsell", 19 | author_email="jonathan.wrightsell@carvesystems.com", 20 | classifiers=[ 21 | "Development Status :: 2 - Pre-Alpha", 22 | "Intended Audience :: Developers", 23 | "License :: OSI Approved :: MIT License", 24 | "Natural Language :: English", 25 | "Programming Language :: Python :: 2", 26 | "Programming Language :: Python :: 2.7", 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3.4", 29 | "Programming Language :: Python :: 3.5", 30 | "Programming Language :: Python :: 3.6", 31 | "Programming Language :: Python :: 3.7", 32 | ], 33 | description="gostringsr2 extracts strings from a Go binary using radare2", 34 | entry_points={"console_scripts": ["gostringsr2=gostringsr2.cli:main"]}, 35 | install_requires=requirements, 36 | license="MIT license", 37 | long_description=readme + "\n\n" + history, 38 | include_package_data=True, 39 | keywords="gostringsr2", 40 | name="gostringsr2", 41 | packages=find_packages(include=["gostringsr2"]), 42 | setup_requires=setup_requirements, 43 | test_suite="tests", 44 | tests_require=test_requirements, 45 | url="https://github.com/carvesystems/gostringsr2", 46 | version="1.1.2", 47 | zip_safe=False, 48 | ) 49 | -------------------------------------------------------------------------------- /gostringsr2/cli.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import click 3 | import re 4 | from os import path 5 | from . import GoStringsR2, GoStringsR2Error 6 | 7 | 8 | def printe(*args, **kwargs): 9 | print(*args, file=sys.stderr, flush=True, **kwargs) 10 | 11 | 12 | @click.command() 13 | @click.argument("file") 14 | @click.option( 15 | "-n", "length", is_flag=False, default=4, help="minimum length, default=4" 16 | ) 17 | @click.option("-v", "verbose", is_flag=True, help="verbose") 18 | @click.option("-u", "utf8", is_flag=True, help="utf8 encoding instead of ascii") 19 | @click.option( 20 | "-s", "r2script", help="save output as r2 script; load in r2 with: . [script-file]" 21 | ) 22 | def main(file, length, verbose, utf8, r2script): 23 | 24 | if not path.isfile(file): 25 | printe("invalid file {}".format(file)) 26 | return 1 27 | 28 | g = GoStringsR2(file, verbose) 29 | 30 | try: 31 | g.load() 32 | 33 | encoding = "ascii" 34 | if utf8: 35 | encoding = "utf8" 36 | 37 | go_strings = g.get_strings(length, encoding) 38 | # array of [address, decoded length, string, byte length, list of code refs] 39 | 40 | if r2script is not None: 41 | r2scriptfile = open(r2script, "w") 42 | r2scriptfile.writelines( 43 | [ 44 | "fs strings\n", 45 | "e asm.comments = false\n", # The big Go string gets in the way 46 | "e asm.usercomments = true\n", 47 | ] 48 | ) 49 | else: 50 | r2scriptfile = None 51 | 52 | for go_string in go_strings: 53 | s_addr, s_len, s_val, s_binlen, s_refs = go_string 54 | 55 | # get rid of "binary" chars before printing, otherwise pipes to grep are unhappy 56 | s_val = re.sub("[\x00\x08]", "", s_val) 57 | 58 | if verbose: 59 | print("0x{:x} : [{}] : {}".format(s_addr, s_len, s_val)) 60 | else: 61 | print(s_val) 62 | 63 | if r2scriptfile is not None: 64 | r2scriptfile.writelines(g.get_r2_script_for_string(go_string, encoding)) 65 | 66 | if r2scriptfile is not None: 67 | printe( 68 | "+ r2 script written to {}. Load in r2 with '. [scriptfile]'".format( 69 | r2script 70 | ) 71 | ) 72 | r2scriptfile.close() 73 | 74 | g.kill() 75 | 76 | return 0 77 | 78 | except GoStringsR2Error as err: 79 | printe("gostringsr2 error: {}".format(err)) 80 | 81 | 82 | if __name__ == "__main__": 83 | sys.exit(main()) # pragma: no cover 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gostringsr2 2 | 3 | gostringsr2 extracts strings from a Go binary using radare2. 4 | 5 | Tested with radare2 3.7.0, Python 3.7, r2pipe 1.4.1, on OS X and Linux. 6 | 7 | Tested on Go binaries with architectures: x86 (32 and 64 bit), ARM (32 and 64 bit). 8 | 9 | Tested on Go binaries with file formats: ELF (Linux), Mach-O (OS X), PE (Windows). 10 | 11 | v1.0.0 code walkthrough - https://carvesystems.com/news/reverse-engineering-go-binaries-using-radare-2-and-python/ 12 | 13 | ## Installation 14 | 15 | 1. Install radare2. 16 | 1. Install gostringsr2 into your Python3 (virtual) environment 17 | 18 | From Github: 19 | 20 | ``` 21 | pip install git+https://github.com/carvesystems/gostringsr2 22 | ``` 23 | 24 | or 25 | 26 | Locally: 27 | 28 | ``` 29 | git clone https://github.com/carvesystems/gostringsr2 30 | pip install -e gostringsr2 31 | ``` 32 | 33 | ## Usage 34 | 35 | ``` 36 | Usage: gostringsr2 [OPTIONS] FILE 37 | 38 | Options: 39 | -n INTEGER minimum length, default=4 40 | -v verbose 41 | -u utf8 encoding instead of ascii 42 | -s TEXT save output as r2 script; load in r2 with: . [script-file] 43 | --help Show this message and exit. 44 | ``` 45 | 46 | ## Example 47 | 48 | Sample Go file: 49 | 50 | ``` 51 | $ cat < helloworld.go 52 | package main 53 | 54 | func main() { 55 | print("hello world, how are you today?\n") 56 | } 57 | SOURCE 58 | 59 | $ go build helloworld.go 60 | 61 | $ ./helloworld 62 | hello world, how are you today? 63 | ``` 64 | 65 | ### Basic output: 66 | 67 | Find ASCII strings of at least length 8: 68 | 69 | ``` 70 | $ gostringsr2 -n 8 helloworld|grep -B5 -A5 hello 71 | bad write barrier buffer bounds 72 | call from within the Go runtime 73 | casgstatus: bad incoming values 74 | checkmark found unmarked object 75 | entersyscallblock inconsistent 76 | hello world, how are you today? 77 | inserting span already in treap 78 | internal error - misuse of itab 79 | non in-use span in unswept list 80 | pacer: sweep done at heap size 81 | resetspinning: not a spinning m 82 | ``` 83 | 84 | ### Verbose output: 85 | 86 | Shows debug messages and each string's virtual address and (decoded) length. 87 | 88 | ``` 89 | $ gostringsr2 -v -n 8 helloworld|grep -B5 -A5 hello 90 | Loading file into r2: helloworld 91 | file: helloworld 92 | size: 1083 KB 93 | executable: mach0 94 | language: c 95 | architecture: 64-bit x86 96 | os: macos 97 | stripped: False 98 | 99 | Locating string table... 100 | String table at 0x106cf20 thru 0x10713a2 101 | Retrieving cross references... 102 | Limited cross-ref check from 0x1001000 to 0x104ead0 103 | Locating string references... 104 | Retrieved 774 references to the string table 105 | Found strings: 631 106 | 0x106f9c3 : [31] : bad write barrier buffer bounds 107 | 0x106f9e2 : [31] : call from within the Go runtime 108 | 0x106fa01 : [31] : casgstatus: bad incoming values 109 | 0x106fa20 : [31] : checkmark found unmarked object 110 | 0x106fa3f : [31] : entersyscallblock inconsistent 111 | 0x106fa5e : [31] : hello world, how are you today? 112 | 0x106fa7d : [31] : inserting span already in treap 113 | 0x106fa9c : [31] : internal error - misuse of itab 114 | 0x106fabb : [31] : non in-use span in unswept list 115 | 0x106fada : [31] : pacer: sweep done at heap size 116 | 0x106faf9 : [31] : resetspinning: not a spinning m 117 | ``` 118 | 119 | 120 | ### r2 script output 121 | 122 | Writes an r2 script that creates: 123 | 124 | 1. A string reference ("axs") to the string at each code locations 125 | 1. A comment ("CCu") at each code reference, `([string length]) "[first 50 characters of the string]"` 126 | 1. A flag in the strings flag space starting with `str.go.[first 20 chars of the string]` 127 | 128 | ``` 129 | $ gostringsr2 -s helloworld.r2 -v -n 8 -helloworld|grep hello 130 | Loading file into r2: helloworld 131 | file: helloworld 132 | size: 1083 KB 133 | executable: mach0 134 | language: c 135 | architecture: 64-bit x86 136 | os: macos 137 | stripped: False 138 | 139 | Locating string table... 140 | String table at 0x106cf40 thru 0x1071403 141 | Retrieving cross references... 142 | Limited cross-ref check from 0x1001000 to 0x104eaf0 143 | Locating string references... 144 | Retrieved 775 references to the string table 145 | Found strings: 632 146 | + r2 script written to hello.r2. Load in r2 with '. [scriptfile]' 147 | 0x106fbf7 : [32] : hello world, how are you today? 148 | 149 | 150 | $ r2 helloworld 151 | -- It's not you, it's me. 152 | [0x0104a4d0]> . hello.r2 153 | [0x0104a4d0]> axt 0x106fbf7 154 | (nofunc); (32) "hello world, how are you today?//" 0x104ea42 [STRING] lea rax, str.go.hello_world__how_are 155 | [0x0104a4d0]> pd 6 @0x104ea42 156 | 0x0104ea42 488d05ae1102. lea rax, str.go.hello_world__how_are ; (32) "hello world, how are you today?//" 157 | 0x0104ea49 48890424 mov qword [rsp], rax 158 | 0x0104ea4d 48c744240820. mov qword [rsp + 8], 0x20 159 | 0x0104ea56 e87557fdff call sym.runtime.printstring 160 | 0x0104ea5b e8c04efdff call sym.runtime.printunlock 161 | 0x0104ea60 e83b4efdff call sym.runtime.printlock 162 | [0x0104a4d0]> 163 | ``` 164 | -------------------------------------------------------------------------------- /gostringsr2/gostringsr2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import json 5 | import binascii 6 | import re 7 | import base64 8 | 9 | import r2pipe 10 | 11 | 12 | class GoStringsR2Error(RuntimeError): 13 | pass 14 | 15 | 16 | class GoStringsR2: 17 | 18 | SUPPORTED_ARCHS = ["arm", "x86"] 19 | SUPPORTED_BINTYPES = ["elf", "pe", "mach0"] 20 | 21 | def __init__(self, _file, _logging=False): 22 | """ 23 | Initialize GoStringsR2 with a path to a Binary 24 | 25 | _file is the path to a file. 26 | If _logging is True, status messages will be output to standard error. 27 | """ 28 | 29 | self.file = _file 30 | self.logging = _logging 31 | self.loaded = False 32 | self.r2 = None 33 | 34 | def kill(self): 35 | """ 36 | Closes the r2pipe session 37 | """ 38 | 39 | if self.loaded: 40 | self.r2.quit() 41 | self.r2 = None 42 | self.loaded = False 43 | 44 | def runjson(self, cmd): 45 | """ 46 | Executes an r2 command that returns a JSON dictionary. 47 | """ 48 | 49 | return self.r2.cmdj(cmd) 50 | 51 | def run(self, cmd): 52 | """ 53 | Executes an r2 command 54 | """ 55 | 56 | return self.r2.cmd(cmd) 57 | 58 | def load(self): 59 | """ 60 | Opens the r2pipe session. 61 | 62 | GoStringsR2Error may be thrown if there is an error loading. 63 | """ 64 | 65 | self.log("Loading file into r2: {}".format(self.file)) 66 | self.r2 = r2pipe.open(self.file) 67 | self.data = {} 68 | self.data["info"] = self.runjson("ij") 69 | if "bin" not in self.data["info"]: 70 | raise GoStringsR2Error("r2 could not parse the binary") 71 | 72 | self.arch = self.data["info"]["bin"]["arch"] 73 | self.bintype = self.data["info"]["bin"]["bintype"] 74 | self.bits = self.data["info"]["bin"]["bits"] 75 | self.binos = self.data["info"]["bin"]["os"] 76 | 77 | if self.bintype not in ["elf", "mach0", "pe"]: 78 | raise GoStringsR2Error( 79 | "bintype {} not supported by gostringsr2. Supported: {}".format( 80 | self.bintype, GoStringsR2.SUPPORTED_BINTYPES 81 | ) 82 | ) 83 | if self.arch not in ["arm", "x86"]: 84 | self.log("warning: arch {} may not fully work".format(self.arch)) 85 | 86 | self.data["symbols"] = self.runjson("isj") 87 | self.data["sections"] = self.runjson("iSj") 88 | 89 | self.loaded = True 90 | 91 | self.log(self.file_info()) 92 | 93 | def file_info(self): 94 | """ 95 | Returns a descriptive string of the loaded binary. 96 | """ 97 | 98 | if self.loaded: 99 | return ( 100 | "file: {}\n" 101 | "size: {} KB\n" 102 | "executable: {}\n" 103 | "language: {}\n" 104 | "architecture: {}-bit {}\n" 105 | "os: {}\n" 106 | "stripped: {}\n".format( 107 | self.data["info"]["core"]["file"], 108 | self.data["info"]["core"]["size"] // 1024, 109 | self.data["info"]["bin"]["bintype"], 110 | self.data["info"]["bin"]["lang"], 111 | self.data["info"]["bin"]["bits"], 112 | self.data["info"]["bin"]["arch"], 113 | self.data["info"]["bin"]["os"], 114 | self.data["info"]["bin"]["stripped"], 115 | ) 116 | ) 117 | 118 | return "file: " 119 | 120 | def get_string_table_symbols(self, rdata): 121 | """ 122 | Returns a dictionary with the raw data from the string table, as found by referencing Go symbols in the provided rdata dictionary. 123 | """ 124 | 125 | g_str = self.find_symbol("go.string.*") 126 | g_func = self.find_symbol("go.func.*") 127 | if g_str is not None and g_func is not None: 128 | g_str["tabsize"] = g_func["vaddr"] - g_str["vaddr"] 129 | startaddr = g_str["vaddr"] - rdata["vaddr"] 130 | endaddr = startaddr + g_str["tabsize"] 131 | g_str["table"] = rdata["data"][startaddr:endaddr] 132 | return g_str 133 | 134 | return None 135 | 136 | def get_rodata_section(self): 137 | """ 138 | Returns the first read-only data section in the binary. 139 | """ 140 | 141 | if self.bintype == "elf": 142 | sname = ".rodata" 143 | elif self.bintype == "mach0": 144 | sname = ".__TEXT.__rodata" 145 | elif self.bintype == "pe": 146 | sname = ".rdata" 147 | return self.get_section_data(sname) 148 | 149 | def get_code_section(self): 150 | """ 151 | Returns the first text/code section in the binary. 152 | """ 153 | 154 | if self.bintype in ["elf", "pe"]: 155 | return self.get_section_info(".text") 156 | elif self.bintype == "mach0": 157 | return self.get_section_info(".__TEXT.__text") 158 | return None 159 | 160 | def get_string_table_search(self, rdata): 161 | """ 162 | Returns a dictionary with the raw data from the string table, as found via searching in the provided rdata dictionary. 163 | """ 164 | 165 | self.log("Searching for string table") 166 | if rdata is not None: 167 | str_start, str_size = self._find_longest_string(rdata["data"]) 168 | 169 | if str_size > 0: 170 | g_str = {"vaddr": rdata["vaddr"] + str_start, "tabsize": str_size} 171 | startaddr = g_str["vaddr"] - rdata["vaddr"] 172 | endaddr = startaddr + g_str["tabsize"] 173 | g_str["table"] = rdata["data"][startaddr:endaddr] 174 | 175 | return g_str 176 | 177 | return None 178 | 179 | def _find_longest_string(self, bindata): 180 | off = 0 181 | this_off = 0 182 | longest_off = 0 183 | longest_size = 0 184 | 185 | binlength = len(bindata) 186 | while off < binlength: 187 | b = bindata[off : off + 2] 188 | # Basically, terminate a "string" if 2 null bytes are seen. Seems to work for the most part. 189 | if b == b"\x00\x00": 190 | this_size = off - this_off 191 | if this_size > 0: 192 | if this_size > longest_size: 193 | longest_off = this_off 194 | longest_size = this_size 195 | this_off = off + 2 196 | else: 197 | this_size = off - this_off 198 | if this_size > 0: 199 | if this_size > longest_size: 200 | longest_off = this_off 201 | longest_size = this_size 202 | off += 2 203 | 204 | if (off - this_off) > longest_size: 205 | longest_off = this_off 206 | longest_size = off - this_off 207 | 208 | if longest_size > 0: 209 | return (longest_off, longest_size) 210 | 211 | return (None, 0) 212 | 213 | def get_string_table(self): 214 | """ 215 | Returns the string table either via symbols or via searching. 216 | """ 217 | 218 | rodata = self.get_rodata_section() 219 | stab_sym = self.get_string_table_symbols(rodata) 220 | stab_sym = ( 221 | stab_sym if stab_sym is not None else self.get_string_table_search(rodata) 222 | ) 223 | 224 | if stab_sym is None: 225 | return None 226 | else: 227 | strtab_start = stab_sym["vaddr"] 228 | strtab_end = strtab_start + stab_sym["tabsize"] 229 | self.log( 230 | "String table at 0x{:x} thru 0x{:x}".format(strtab_start, strtab_end) 231 | ) 232 | strtab = { 233 | "startaddr": strtab_start, 234 | "endaddr": strtab_end, 235 | "data": stab_sym["table"], 236 | } 237 | return strtab 238 | 239 | def find_symbol(self, symbol_name): 240 | """ 241 | Returns a symbol in the binary as a dictionary, as retrieved with r2. 242 | """ 243 | 244 | for sym in self.data["symbols"]: 245 | if sym.get("name", "") == symbol_name: 246 | return sym 247 | return None 248 | 249 | def get_cross_refs(self): 250 | """ 251 | Performs the cross-references search and returns results in r2 quiet/human-readable format. 252 | """ 253 | 254 | xrefs = None 255 | 256 | # Only check .text; other executable sections may get searched otherwise 257 | # If more than one .text section exists, changeme 258 | code_section = self.get_code_section() 259 | if code_section is not None: 260 | c_start = code_section["vaddr"] 261 | c_end = c_start + code_section["size"] 262 | self.log( 263 | "Limited cross-ref check from 0x{:x} to 0x{:x}".format(c_start, c_end) 264 | ) 265 | self.run("e search.from=0x{:x}".format(c_start)) 266 | self.run("e search.to=0x{:x}".format(c_end)) 267 | 268 | cross_ref_cmd = "/ra" 269 | # Use ESIL analysis for non-x86 architectures 270 | if self.arch != "x86": 271 | cross_ref_cmd = "aae" 272 | # send stderr from r2 to /dev/null to hide r2's address progress 273 | self.run("{} 2>/dev/null".format(cross_ref_cmd)) 274 | xrefs = self.run("axq") 275 | return xrefs 276 | 277 | def get_section_info(self, section_name): 278 | """ 279 | Returns the section info of section_name, as retrieved with r2. 280 | """ 281 | 282 | for secobj in self.data["sections"]: 283 | if secobj["name"].endswith(section_name): 284 | return secobj 285 | return None 286 | 287 | def get_section_data(self, section_name): 288 | """ 289 | Returns a dictionary containing the raw binary data of the requested section. 290 | """ 291 | 292 | secobj = self.get_section_info(section_name) 293 | if secobj is not None: 294 | s_base = secobj["vaddr"] 295 | s_size = secobj["vsize"] 296 | rdsize = 4096 297 | i = 0 298 | sdata = b"" 299 | while s_size > 0: 300 | c = "p8 {} @0x{:x}".format(min(rdsize, s_size), s_base + i * rdsize) 301 | sdat = self.run(c).strip() 302 | sdata += binascii.unhexlify(sdat) 303 | i += 1 304 | s_size -= rdsize 305 | 306 | return {"name": section_name, "vaddr": s_base, "data": sdata} 307 | return None 308 | 309 | def find_strings(self, minlength, encoding, refs, tablebase, tabledata): 310 | """ 311 | Processes cross-references and returns a list of string objects. 312 | 313 | Each string object in the returned list is a list specifying address, decoded length, decoded string value, string size in bytes, list of code references to this string. 314 | """ 315 | 316 | # refs.keys() = dest address, refs.values() = list of source addresses 317 | refs_addrs = sorted(refs.keys(), reverse=True) 318 | 319 | all_strings = [] 320 | for r in refs_addrs: 321 | # r = virtual addr of a string 322 | # subtract vaddr of section to get offset into 323 | r_offset = r - tablebase 324 | if len(all_strings) > 0: 325 | last_ref = all_strings[len(all_strings) - 1][0] - tablebase 326 | r_end_offset = last_ref 327 | else: 328 | r_end_offset = len(tabledata) 329 | 330 | r_str = tabledata[r_offset:r_end_offset].decode(encoding, errors="ignore") 331 | decoded_len = len(r_str) 332 | all_strings.append( 333 | [ 334 | tablebase + r_offset, 335 | decoded_len, 336 | r_str, 337 | r_end_offset - r_offset, 338 | refs[r], 339 | ] 340 | ) 341 | 342 | # filter all_strings by length requirement, then reverse order 343 | # since all_strings started at the end 344 | return list(reversed([s for s in all_strings if s[1] >= minlength])) 345 | 346 | def _is_a_string_ref( 347 | self, src_addr, dst_addr, strtab_addr, strtab_endaddr, code_section 348 | ): 349 | if dst_addr >= strtab_addr and dst_addr < strtab_endaddr: 350 | if code_section is None: 351 | return True 352 | else: 353 | return src_addr >= code_section["vaddr"] and src_addr < ( 354 | code_section["vaddr"] + code_section["size"] 355 | ) 356 | 357 | return False 358 | 359 | def process_xrefs(self, xrefs, strtab_start, strtab_end): 360 | """ 361 | Filters cross-references to only references to the string table. 362 | 363 | xrefs is data returned by r2 from "axq" (quiet/human-readable format), strtab_start/end specify the addresses of the string table. 364 | """ 365 | 366 | str_refs = {} 367 | 368 | code_section = self.get_code_section() 369 | 370 | # 0x01640839 -> 0x016408a9 CALL 371 | for line in xrefs.split("\n"): 372 | lparts = line.split(" ") 373 | # 0 = src, 1= arrow, 2 = dst, 3=empty, 4=type 374 | if len(lparts) == 5: 375 | r_src = int(lparts[0], 16) 376 | r_dst = int(lparts[2], 16) 377 | if self._is_a_string_ref( 378 | r_src, r_dst, strtab_start, strtab_end, code_section 379 | ): 380 | if r_dst in str_refs.keys(): 381 | str_refs[r_dst].append(r_src) 382 | else: 383 | str_refs[r_dst] = [r_src] 384 | 385 | return str_refs 386 | 387 | def log(self, log_msg, *args, **kwargs): 388 | if self.logging: 389 | print("\033[92m" + log_msg + "\033[0m", *args, **kwargs, file=sys.stderr) 390 | 391 | def get_strings(self, minlength, encoding="ascii"): 392 | """ 393 | Perform the string search, returning string objects. 394 | 395 | minlength specifies the minimum length of a string that will be returned. 396 | encoding specifies how bytes should be decoded, with "ascii" being the default. 397 | """ 398 | 399 | ret_strings = [] 400 | 401 | self.log("Locating string table...") 402 | strtab = self.get_string_table() 403 | if strtab is None: 404 | raise GoStringsR2Error("couldn't find the Go string table in the binary") 405 | 406 | self.log("Retrieving cross references...") 407 | xrefs = self.get_cross_refs() 408 | if xrefs is None: 409 | raise GoStringsR2Error("r2 returned no cross-references") 410 | 411 | self.log("Locating string references...") 412 | str_refs = self.process_xrefs(xrefs, strtab["startaddr"], strtab["endaddr"]) 413 | 414 | self.log("Retrieved {} references to the string table".format(len(str_refs))) 415 | if len(str_refs): 416 | ret_strings = self.find_strings( 417 | minlength, encoding, str_refs, strtab["startaddr"], strtab["data"] 418 | ) 419 | 420 | self.log("Found strings: {}".format(len(ret_strings))) 421 | return ret_strings 422 | 423 | def get_r2_script_for_string(self, go_string, encoding): 424 | """ 425 | Returns a list of r2 commands that create comments at code locations where strings are referenced. 426 | """ 427 | 428 | s_addr, s_decoded_len, s_value, s_bin_len, s_refs = go_string 429 | script = [] 430 | 431 | # sanitized flag name, up to 20 alpha numeric chars 432 | san_str = re.sub("[^a-zA-Z0-9]", "_", s_value)[:20] 433 | 434 | # comment will be up to 50 chars; replace newlines with double slash // 435 | trunc_str = '({}) "{}{}"'.format( 436 | s_decoded_len, 437 | re.sub("[\n]", "//", s_value[:50]), 438 | "..." if s_decoded_len > 50 else "", 439 | ) 440 | # ...but base64 encoded because r2 doesn't like ; chars 441 | trunc_str = base64.b64encode(trunc_str.encode(encoding)).decode("ascii") 442 | 443 | # create flag for the string value, use bin length for r2 444 | script.append("f str.go.{} {} @0x{:x}\n".format(san_str, s_bin_len, s_addr)) 445 | # create string reference and comment at each reference containing a snippet of the string 446 | for r in s_refs: 447 | script.append("axs 0x{:x} @0x{:x}\n".format(s_addr, r)) 448 | script.append("CCu base64:{} @0x{:x}\n".format(trunc_str, r)) 449 | 450 | return script 451 | --------------------------------------------------------------------------------