├── README.md ├── LICENSE.md ├── symbolyzer.py └── run_bin.c /README.md: -------------------------------------------------------------------------------- 1 | #Running Executables on macOS from Memory 2 | 3 | This is the code repo for the blog post located . 4 | 5 | ##symbolyzer.py 6 | 7 | symbolyzer.py is used for generating unique offsets / ints for a list of symbols to use for matching in place of a hash algorithm. It can be run in the following manner to do so for dyld: 8 | 9 | > $ nm /usr/lib/dyld | cut -d" " -f3 | sort | uniq | python symbolyzer.py 10 | 11 | The output is in the following format: 12 | 13 | > $ _sysctlbyname[4] = 0x626c7463 14 | 15 | This means that if you read an int from the start of the string table entry + 4 and get 0x626c7463, you have found the entry for sysctlbyname. 16 | 17 | ##run_bin.c 18 | 19 | run_bin.c is the proof-of-concept code for running a binary from memory. It can be run as follows: 20 | 21 | > $ gcc run_bin.c && ./a.out /bin/ls 22 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Cylance Inc. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, 4 | are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation and/or 11 | other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software without 15 | specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 21 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 24 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /symbolyzer.py: -------------------------------------------------------------------------------- 1 | ###################################################################################### 2 | # # 3 | # Author: Stephanie Archibald # 4 | # Copyright (c) 2017 Cylance Inc. All rights reserved. # 5 | # # 6 | # Redistribution and use in source and binary forms, with or without modification, # 7 | # are permitted provided that the following conditions are met: # 8 | # # 9 | # 1. Redistributions of source code must retain the above copyright notice, this # 10 | # list of conditions and the following disclaimer. # 11 | # # 12 | # 2. Redistributions in binary form must reproduce the above copyright notice, # 13 | # this list of conditions and the following disclaimer in the documentation and/or # 14 | # other materials provided with the distribution. # 15 | # # 16 | # 3. Neither the name of the copyright holder nor the names of its contributors # 17 | # may be used to endorse or promote products derived from this software without # 18 | # specific prior written permission. # 19 | # # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # 21 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # 22 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR # 24 | # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # 25 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # 26 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON # 27 | # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # 28 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # 29 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # 30 | # # 31 | ###################################################################################### 32 | 33 | import sys 34 | 35 | def calc_int(string, offset): 36 | """ 37 | Return a hex string representation of the integer found by referencing offset bytes into string. 38 | """ 39 | try: 40 | return "".join(reversed(["%02x" % ord(string[offset + i]) for i in range(4)])) 41 | except: 42 | print offset, string 43 | raise 44 | 45 | class results: 46 | """ 47 | Class to generate the offset / int pairs for a list of symbols. 48 | """ 49 | def __init__(self, symbols): 50 | self.symbols = symbols 51 | self.results = [] 52 | self.cant_match = [] 53 | self.generate() 54 | 55 | def generate(self): 56 | """ 57 | Create the offset / int pairs for all the symbols. 58 | """ 59 | for i in range(len(self.symbols)): 60 | symbol = self.symbols[i] 61 | j = 0 62 | for n in self.symbols: 63 | if n == symbol: continue 64 | k = self.find_unique_offset(symbol, n) 65 | if k == -1: 66 | self.cant_match.append(symbol) 67 | break 68 | if k > j: 69 | j = k 70 | 71 | if symbol not in self.cant_match: 72 | self.results.append(result(symbol, j)) 73 | 74 | def find_unique_offset(self, symbol1, symbol2): 75 | """ 76 | Finds a unique offset between two symbols. 77 | """ 78 | symbol1 += "\0" 79 | symbol2 += "\0" 80 | 81 | looplen = min(len(symbol1), len(symbol2)) - 3 82 | if looplen < 0: return -1 83 | 84 | i = 0 85 | for i in range(looplen): 86 | if calc_int(symbol1, i) != calc_int(symbol2, i): 87 | break 88 | 89 | if i == looplen: 90 | if len(symbol1) > len(symbol2): 91 | return i 92 | return -1 93 | 94 | return i 95 | 96 | class result: 97 | """ 98 | Helper class to store and print the offset / int pairs for a given symbol. 99 | """ 100 | def __init__(self, name, offset): 101 | self.name = name 102 | self.set_offset(offset) 103 | 104 | def set_offset(self, offset): 105 | self.offset = offset 106 | self.int = calc_int(self.name + "\0", offset) 107 | 108 | def __str__(self): 109 | return "%s[%i] = 0x%s" % (self.name, self.offset, self.int) 110 | 111 | if __name__ == "__main__": 112 | try: 113 | r = results([line.strip() for line in sys.stdin.readlines() if line != "\n"]) 114 | 115 | for p in r.results: 116 | print p 117 | 118 | if r.cant_match: 119 | print "\nDIDN'T WORK (%i):" % len(r.cant_match) 120 | for n in r.cant_match: 121 | print n 122 | except KeyboardInterrupt: 123 | pass 124 | -------------------------------------------------------------------------------- /run_bin.c: -------------------------------------------------------------------------------- 1 | /************************************************************************************* 2 | * Author: Stephanie Archibald * 3 | * Copyright (c) 2017 Cylance Inc. All rights reserved. * 4 | * * 5 | * Redistribution and use in source and binary forms, with or without modification, * 6 | * are permitted provided that the following conditions are met: * 7 | * * 8 | * 1. Redistributions of source code must retain the above copyright notice, this * 9 | * list of conditions and the following disclaimer. * 10 | * * 11 | * 2. Redistributions in binary form must reproduce the above copyright notice, * 12 | * this list of conditions and the following disclaimer in the documentation and/or * 13 | * other materials provided with the distribution. * 14 | * * 15 | * 3. Neither the name of the copyright holder nor the names of its contributors * 16 | * may be used to endorse or promote products derived from this software without * 17 | * specific prior written permission. * 18 | * * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * 20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * 21 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * 22 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR * 23 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 24 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * 25 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * 26 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * 28 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * 29 | * * 30 | *************************************************************************************/ 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | #include 43 | #include 44 | #include 45 | 46 | #define EXECUTABLE_BASE_ADDR 0x100000000 47 | #define DYLD_BASE 0x00007fff5fc00000 48 | 49 | int IS_SIERRA = -1; 50 | 51 | int is_sierra(void) { 52 | // returns 1 if running on Sierra, 0 otherwise 53 | // this works because /bin/rcp was removed in Sierra 54 | if(IS_SIERRA == -1) { 55 | struct stat statbuf; 56 | IS_SIERRA = (stat("/bin/rcp", &statbuf) != 0); 57 | } 58 | return IS_SIERRA; 59 | } 60 | 61 | int find_macho(unsigned long addr, unsigned long *base, unsigned int increment, unsigned int dereference) { 62 | unsigned long ptr; 63 | 64 | // find a Mach-O header by searching from address. 65 | *base = 0; 66 | 67 | while(1) { 68 | ptr = addr; 69 | if(dereference) ptr = *(unsigned long *)ptr; 70 | chmod((char *)ptr, 0777); 71 | if(errno == 2 /*ENOENT*/ && 72 | ((int *)ptr)[0] == 0xfeedfacf /*MH_MAGIC_64*/) { 73 | *base = ptr; 74 | return 0; 75 | } 76 | 77 | addr += increment; 78 | } 79 | return 1; 80 | } 81 | 82 | int find_epc(unsigned long base, struct entry_point_command **entry) { 83 | // find the entry point command by searching through base's load commands 84 | 85 | struct mach_header_64 *mh; 86 | struct load_command *lc; 87 | 88 | unsigned long text = 0; 89 | 90 | *entry = NULL; 91 | 92 | mh = (struct mach_header_64 *)base; 93 | lc = (struct load_command *)(base + sizeof(struct mach_header_64)); 94 | for(int i=0; incmds; i++) { 95 | if(lc->cmd == LC_MAIN) { //0x80000028 96 | *entry = (struct entry_point_command *)lc; 97 | return 0; 98 | } 99 | 100 | lc = (struct load_command *)((unsigned long)lc + lc->cmdsize); 101 | } 102 | 103 | return 1; 104 | } 105 | 106 | unsigned long resolve_symbol(unsigned long base, unsigned int offset, unsigned int match) { 107 | // Parse the symbols in the Mach-O image at base and return the address of the one 108 | // matched by the offset / int pair (offset, match) 109 | struct load_command *lc; 110 | struct segment_command_64 *sc, *linkedit, *text; 111 | struct symtab_command *symtab; 112 | struct nlist_64 *nl; 113 | 114 | char *strtab; 115 | 116 | symtab = 0; 117 | linkedit = 0; 118 | text = 0; 119 | 120 | lc = (struct load_command *)(base + sizeof(struct mach_header_64)); 121 | for(int i=0; i<((struct mach_header_64 *)base)->ncmds; i++) { 122 | if(lc->cmd == 0x2/*LC_SYMTAB*/) { 123 | symtab = (struct symtab_command *)lc; 124 | } else if(lc->cmd == 0x19/*LC_SEGMENT_64*/) { 125 | sc = (struct segment_command_64 *)lc; 126 | switch(*((unsigned int *)&((struct segment_command_64 *)lc)->segname[2])) { //skip __ 127 | case 0x4b4e494c: //LINK 128 | linkedit = sc; 129 | break; 130 | case 0x54584554: //TEXT 131 | text = sc; 132 | break; 133 | } 134 | } 135 | lc = (struct load_command *)((unsigned long)lc + lc->cmdsize); 136 | } 137 | 138 | if(!linkedit || !symtab || !text) return -1; 139 | 140 | unsigned long file_slide = linkedit->vmaddr - text->vmaddr - linkedit->fileoff; 141 | strtab = (char *)(base + file_slide + symtab->stroff); 142 | 143 | nl = (struct nlist_64 *)(base + file_slide + symtab->symoff); 144 | for(int i=0; insyms; i++) { 145 | char *name = strtab + nl[i].n_un.n_strx; 146 | if(*(unsigned int *)&name[offset] == match) { 147 | if(is_sierra()) { 148 | return base + nl[i].n_value; 149 | } else { 150 | return base - DYLD_BASE + nl[i].n_value; 151 | } 152 | } 153 | } 154 | 155 | return -1; 156 | } 157 | 158 | int load_from_disk(char *filename, char **buf, unsigned int *size) { 159 | /* 160 | What, you say? this isn't running from memory! You're loading from disk!! 161 | 162 | Put down the pitchforks, please. Yes, this reads a binary from disk...into 163 | memory. The code is then executed from memory. This here is a POC; in 164 | real life you would probably want to read into buf from a socket. 165 | */ 166 | int fd; 167 | struct stat s; 168 | 169 | if((fd = open(filename, O_RDONLY)) == -1) return 1; 170 | if(fstat(fd, &s)) return 1; 171 | 172 | *size = s.st_size; 173 | 174 | if((*buf = mmap(NULL, (*size) * sizeof(char), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANON, -1, 0)) == MAP_FAILED) return 1; 175 | if(read(fd, *buf, *size * sizeof(char)) != *size) { 176 | free(*buf); 177 | *buf = NULL; 178 | return 1; 179 | } 180 | 181 | close(fd); 182 | 183 | return 0; 184 | } 185 | 186 | int load_and_exec(char *filename, unsigned long dyld) { 187 | // Load the binary specified by filename using dyld 188 | char *binbuf = NULL; 189 | unsigned int size; 190 | unsigned long addr; 191 | 192 | NSObjectFileImageReturnCode(*create_file_image_from_memory)(const void *, size_t, NSObjectFileImage *) = NULL; 193 | NSModule (*link_module)(NSObjectFileImage, const char *, unsigned long) = NULL; 194 | 195 | //resolve symbols for NSCreateFileImageFromMemory & NSLinkModule 196 | addr = resolve_symbol(dyld, 25, 0x4d6d6f72); 197 | if(addr == -1) { 198 | fprintf(stderr, "Could not resolve symbol: _sym[25] == 0x4d6d6f72.\n"); 199 | goto err; 200 | } 201 | create_file_image_from_memory = (NSObjectFileImageReturnCode (*)(const void *, size_t, NSObjectFileImage *)) addr; 202 | 203 | addr = resolve_symbol(dyld, 4, 0x4d6b6e69); 204 | if(addr == -1) { 205 | fprintf(stderr, "Could not resolve symbol: _sym[4] == 0x4d6b6e69.\n"); 206 | goto err; 207 | } 208 | link_module = (NSModule (*)(NSObjectFileImage, const char *, unsigned long)) addr; 209 | 210 | // load filename into a buf in memory 211 | if(load_from_disk(filename, &binbuf, &size)) goto err; 212 | 213 | // change the filetype to a bundle 214 | int type = ((int *)binbuf)[3]; 215 | if(type != 0x8) ((int *)binbuf)[3] = 0x8; //change to mh_bundle type 216 | 217 | // create file image 218 | NSObjectFileImage fi; 219 | if(create_file_image_from_memory(binbuf, size, &fi) != 1) { 220 | fprintf(stderr, "Could not create image.\n"); 221 | goto err; 222 | } 223 | 224 | // link image 225 | NSModule nm = link_module(fi, "mytest", NSLINKMODULE_OPTION_PRIVATE | 226 | NSLINKMODULE_OPTION_BINDNOW); 227 | if(!nm) { 228 | fprintf(stderr, "Could not link image.\n"); 229 | goto err; 230 | } 231 | 232 | // find entry point and call it 233 | if(type == 0x2) { //mh_execute 234 | unsigned long execute_base; 235 | struct entry_point_command *epc; 236 | 237 | if(find_macho((unsigned long)nm, &execute_base, sizeof(int), 1)) { 238 | fprintf(stderr, "Could not find execute_base.\n"); 239 | goto err; 240 | } 241 | 242 | if(find_epc(execute_base, &epc)) { 243 | fprintf(stderr, "Could not find ec.\n"); 244 | goto err; 245 | } 246 | 247 | int(*main)(int, char**, char**, char**) = (int(*)(int, char**, char**, char**))(execute_base + epc->entryoff); 248 | char *argv[]={"test", NULL}; 249 | int argc = 1; 250 | char *env[] = {NULL}; 251 | char *apple[] = {NULL}; 252 | return main(argc, argv, env, apple); 253 | } 254 | err: 255 | if(binbuf) free(binbuf); 256 | return 1; 257 | } 258 | 259 | int main(int ac, char **av) { 260 | 261 | if(ac != 2) { 262 | fprintf(stderr, "usage: %s \n", av[0]); 263 | exit(1); 264 | } 265 | 266 | unsigned long binary, dyld; 267 | 268 | // find dyld based on os version 269 | if(is_sierra()) { 270 | if(find_macho(EXECUTABLE_BASE_ADDR, &binary, 0x1000, 0)) return 1; 271 | if(find_macho(binary + 0x1000, &dyld, 0x1000, 0)) return 1; 272 | } else { 273 | if(find_macho(DYLD_BASE, &dyld, 0x1000, 0)) return 1; 274 | } 275 | 276 | // load and execute the specified binary 277 | return load_and_exec(av[1], dyld); 278 | } 279 | --------------------------------------------------------------------------------