├── README.md ├── afl-llvm.diff ├── blocklist.txt ├── build.sh ├── build_afl_llvm.sh ├── build_all_qemu.sh ├── build_qemu_first.sh ├── build_qemu_with_instr.sh ├── cfg.py ├── config.py ├── did_digfuzz_contribute.py ├── digfuzz.py ├── dump_diff.sh ├── dump_digfuzz.c ├── instr_interface.py ├── llvm_instr.py ├── preach.h ├── preach_util.h ├── preachfuzz.py ├── qemu_instr.py ├── qemu_qsym_harness.c ├── qemuafl_bb_aggr_shm.diff ├── qemuafl_bb_stdout.diff ├── qsym.diff ├── qsym_ce.py ├── run_afl.py ├── symcc_ce.py ├── targets ├── bzip2 │ ├── build.sh │ └── fuzz.c ├── json-c │ └── build.sh └── wasm3 │ └── build.sh ├── test.c ├── test.sh └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # DigFuzz 2 | *Unofficial* Implementation of DigFuzz from paper "Send Hardest Problems My Way: Probabilistic Path Prioritization for Hybrid Fuzzing" 3 | 4 | 5 | 6 | ### Setup Executor 7 | Executor is the part of DigFuzz that collects coverage and bb hit count data. Currently, we support QEMU or LLVM. 8 | 9 | **QEMU**: 10 | 11 | QEMU is instrumented to dump bb hit count to a shared memory region where DigFuzz can read. 12 | 13 | ``` bash 14 | ./build_all_qemu.sh 15 | ``` 16 | 17 | **LLVM**: 18 | 19 | `trace-pc-guard` feature of LLVM is used to provide bb hit count information. Although this is faster than using QEMU, translating the address of LLVM instrumented binary to that of uninstrumented binary (concolic execution tools have difficulty with instrumented binaries) may not be accurate. 20 | 21 | *TODO* 22 | 23 | ### Setup Concolic Execution Tools 24 | Concolic execution tool is the part of DigFuzz that conducts directed solving. Fundamentally, it is a function that takes in a binary and a list of code location (representing a path), then provides an input that makes binary go through this path. Currently, we support QSYM or SymCC. 25 | 26 | **QSYM**: 27 | 28 | Given QSYM is built on old version of LLVM, you can use a remote instance with Ubuntu 14.04 and setup QSYM there. 29 | 30 | Run following command at remote host: 31 | ``` bash 32 | git clone https://github.com/sslab-gatech/qsym.git && cd qsym 33 | vim qsym.diff # copy content from qsym.diff in this repo to remote 34 | git apply qsym.diff 35 | ./setup.sh 36 | pip install . 37 | ``` 38 | 39 | Then, edit the `config.py` to set the credential for accessing that instance and installation location of QSYM. 40 | 41 | **SymCC**: 42 | 43 | *TODO* 44 | 45 | 46 | ### Start Fuzzing (QEMU + QSYM) 47 | Compile your target with `LLVMFuzzerTestOneInput` exported and `-no-pie ` enabled, which should yield a .o file 48 | ```bash 49 | CFLAGS="-no-pie" CXXFLAGS="-no-pie" make fuzzer 50 | ``` 51 | Edit `config.py` for path of the .o file 52 | ```bash 53 | vim config.py 54 | ``` 55 | 56 | If not configured system for AFL, do 57 | ```bash 58 | sudo ./AFLplusplus/afl-system-config 59 | ``` 60 | 61 | In one terminal (AFL stuffs) 62 | ```bash 63 | python3 run_afl.py 64 | ``` 65 | In another terminal (QSYM stuffs) 66 | ```bash 67 | python3 digfuzz.py 68 | ``` 69 | 70 | 71 | ### QEMU-based Instrumentation Workflow 72 | QEMU side: 73 | 1. Dump BB hitcount to shared memory 74 | 75 | Our side: 76 | 1. Re-execute corpus inputs to build execution tree. (Not concolic execution, just concrete execution with instrumentation) 77 | 2. Read shared memory and add BB visit count to execution tree 78 | 3. Do a DFS on execution to assign probabilities for each edge 79 | 4. Identify missed branch in all corpus traces and build a priority queue for these traces with priority as 1-probability 80 | 5. QSYM do concolic execution on the missed branch to identify input 81 | 6. Add input to AFL corpus 82 | 83 | 84 | ### LLVM-based Instrumentation Workflow 85 | AFL side: 86 | 1. Compile binary with two harness, one for AFL, another for angr. 87 | 2. Instrument AFL binary via Clang to provide block information. 88 | 89 | Our side: 90 | 1. Re-execute corpus inputs to build execution tree. (Not concolic execution, just concrete execution with instrumentation) 91 | 2. For every trace AFL binary gives, add visit count to the execution tree. 92 | 3. Do a DFS on execution to assign probabilities for each edge 93 | 4. Identify missed branch in all corpus traces and build a priority queue for these traces with priority as 1-probability 94 | 5. QSYM do concolic execution on the missed branch to identify input 95 | 6. Add input to AFL corpus 96 | -------------------------------------------------------------------------------- /afl-llvm.diff: -------------------------------------------------------------------------------- 1 | diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c 2 | index 2089ce7..847922d 100644 3 | --- a/instrumentation/afl-compiler-rt.o.c 4 | +++ b/instrumentation/afl-compiler-rt.o.c 5 | @@ -1238,41 +1238,46 @@ __attribute__((constructor(0))) void __afl_auto_first(void) { 6 | 7 | The first function (__sanitizer_cov_trace_pc_guard) is called back on every 8 | edge (as opposed to every basic block). */ 9 | - 10 | +#define instr_code 11 | +#include "preach.h" 12 | +//#include "../preach_util.h" 13 | void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { 14 | + if (__afl_debug) { 15 | + fprintf(stderr, 16 | + "preach2\n"); 17 | + } 18 | + pc_trace_guard__preach(guard); 19 | 20 | // For stability analysis, if you want to know to which function unstable 21 | // edge IDs belong - uncomment, recompile+install llvm_mode, recompile 22 | // the target. libunwind and libbacktrace are better solutions. 23 | // Set AFL_DEBUG_CHILD=1 and run afl-fuzz with 2>file to capture 24 | // the backtrace output 25 | - /* 26 | - uint32_t unstable[] = { ... unstable edge IDs }; 27 | - uint32_t idx; 28 | - char bt[1024]; 29 | - for (idx = 0; i < sizeof(unstable)/sizeof(uint32_t); i++) { 30 | - 31 | - if (unstable[idx] == __afl_area_ptr[*guard]) { 32 | - 33 | - int bt_size = backtrace(bt, 256); 34 | - if (bt_size > 0) { 35 | - 36 | - char **bt_syms = backtrace_symbols(bt, bt_size); 37 | - if (bt_syms) { 38 | - 39 | - fprintf(stderr, "DEBUG: edge=%u caller=%s\n", unstable[idx], 40 | - bt_syms[0]); 41 | - free(bt_syms); 42 | - 43 | - } 44 | +// uint32_t unstable[] = { ... unstable edge IDs }; 45 | +// uint32_t idx; 46 | +// char bt[1024]; 47 | +// for (idx = 0; i < sizeof(unstable)/sizeof(uint32_t); i++) { 48 | +// 49 | +// if (unstable[idx] == __afl_area_ptr[*guard]) { 50 | +// 51 | +// int bt_size = backtrace(bt, 256); 52 | +// if (bt_size > 0) { 53 | +// 54 | +// char **bt_syms = backtrace_symbols(bt, bt_size); 55 | +// if (bt_syms) { 56 | +// 57 | +// fprintf(stderr, "DEBUG: edge=%u caller=%s\n", unstable[idx], 58 | +// bt_syms[0]); 59 | +// free(bt_syms); 60 | +// 61 | +// } 62 | +// 63 | +// } 64 | +// 65 | +// } 66 | +// 67 | +// } 68 | 69 | - } 70 | - 71 | - } 72 | - 73 | - } 74 | - 75 | - */ 76 | 77 | #if (LLVM_VERSION_MAJOR < 9) 78 | 79 | @@ -1292,7 +1297,7 @@ void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { 80 | still touch the bitmap, but in a fairly harmless way. */ 81 | 82 | void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { 83 | - 84 | +1; 85 | u32 inst_ratio = 100; 86 | char *x; 87 | 88 | @@ -1309,7 +1314,7 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { 89 | } 90 | 91 | if (start == stop || *start) return; 92 | - 93 | + pc_trace_guard_init__preach(); 94 | x = getenv("AFL_INST_RATIO"); 95 | if (x) inst_ratio = (u32)atoi(x); 96 | 97 | @@ -1638,69 +1643,114 @@ void __cmplog_ins_hook16(uint128_t arg1, uint128_t arg2, uint8_t attr) { 98 | 99 | #endif 100 | 101 | +//void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) { 102 | +// trace_cmp__preach(arg1, arg2); 103 | +// __cmplog_ins_hook1(arg1, arg2, 0); 104 | +// 105 | +//} 106 | +// 107 | +//void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2) { 108 | +// trace_cmp__preach(arg1, arg2); 109 | +// 110 | +// __cmplog_ins_hook1(arg1, arg2, 0); 111 | +// 112 | +//} 113 | +// 114 | +//void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) { 115 | +// trace_cmp__preach(arg1, arg2); 116 | +// 117 | +// __cmplog_ins_hook2(arg1, arg2, 0); 118 | +// 119 | +//} 120 | +// 121 | +//void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2) { 122 | +// trace_cmp__preach(arg1, arg2); 123 | +// 124 | +// __cmplog_ins_hook2(arg1, arg2, 0); 125 | +// 126 | +//} 127 | +// 128 | +//void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) { 129 | +// trace_cmp__preach(arg1, arg2); 130 | +// 131 | +// __cmplog_ins_hook4(arg1, arg2, 0); 132 | +// 133 | +//} 134 | +// 135 | +//void __sanitizer_cov_trace_cost_cmp4(uint32_t arg1, uint32_t arg2) { 136 | +// trace_cmp__preach(arg1, arg2); 137 | +// 138 | +// __cmplog_ins_hook4(arg1, arg2, 0); 139 | +// 140 | +//} 141 | +// 142 | +//void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) { 143 | +// trace_cmp__preach(arg1, arg2); 144 | +// 145 | +// __cmplog_ins_hook8(arg1, arg2, 0); 146 | +// 147 | +//} 148 | +// 149 | +//void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2) { 150 | +// trace_cmp__preach(arg1, arg2); 151 | +// 152 | +// __cmplog_ins_hook8(arg1, arg2, 0); 153 | +// 154 | +//} 155 | +// 156 | +//#ifdef WORD_SIZE_64 157 | +//void __sanitizer_cov_trace_cmp16(uint128_t arg1, uint128_t arg2) { 158 | +// trace_cmp__preach(arg1, arg2); 159 | +// 160 | +// __cmplog_ins_hook16(arg1, arg2, 0); 161 | +// 162 | +//} 163 | +// 164 | +//void __sanitizer_cov_trace_const_cmp16(uint128_t arg1, uint128_t arg2) { 165 | +// trace_cmp__preach(arg1, arg2); 166 | +// 167 | +// __cmplog_ins_hook16(arg1, arg2, 0); 168 | +// 169 | +//} 170 | + 171 | +//#endif 172 | void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) { 173 | - 174 | - __cmplog_ins_hook1(arg1, arg2, 0); 175 | - 176 | -} 177 | - 178 | -void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2) { 179 | - 180 | - __cmplog_ins_hook1(arg1, arg2, 0); 181 | - 182 | + trace_cmp__preach(); 183 | } 184 | 185 | void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) { 186 | - 187 | - __cmplog_ins_hook2(arg1, arg2, 0); 188 | - 189 | -} 190 | - 191 | -void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2) { 192 | - 193 | - __cmplog_ins_hook2(arg1, arg2, 0); 194 | + trace_cmp__preach(); 195 | 196 | } 197 | 198 | void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) { 199 | - 200 | - __cmplog_ins_hook4(arg1, arg2, 0); 201 | - 202 | -} 203 | - 204 | -void __sanitizer_cov_trace_cost_cmp4(uint32_t arg1, uint32_t arg2) { 205 | - 206 | - __cmplog_ins_hook4(arg1, arg2, 0); 207 | + trace_cmp__preach(); 208 | 209 | } 210 | 211 | void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) { 212 | - 213 | - __cmplog_ins_hook8(arg1, arg2, 0); 214 | - 215 | + trace_cmp__preach(); 216 | } 217 | 218 | -void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2) { 219 | +void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2){ 220 | + trace_cmp__preach(); 221 | 222 | - __cmplog_ins_hook8(arg1, arg2, 0); 223 | 224 | } 225 | +void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2){ 226 | + trace_cmp__preach(); 227 | 228 | -#ifdef WORD_SIZE_64 229 | -void __sanitizer_cov_trace_cmp16(uint128_t arg1, uint128_t arg2) { 230 | - 231 | - __cmplog_ins_hook16(arg1, arg2, 0); 232 | 233 | } 234 | +void __sanitizer_cov_trace_const_cmp4(uint32_t arg1, uint32_t arg2){ 235 | + trace_cmp__preach(); 236 | 237 | -void __sanitizer_cov_trace_const_cmp16(uint128_t arg1, uint128_t arg2) { 238 | - 239 | - __cmplog_ins_hook16(arg1, arg2, 0); 240 | 241 | } 242 | +void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2){ 243 | + trace_cmp__preach(); 244 | 245 | -#endif 246 | - 247 | +} 248 | void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) { 249 | 250 | if (unlikely(!__afl_cmp_map)) return; 251 | diff --git a/instrumentation/preach.h b/instrumentation/preach.h 252 | index f4909d6..a43f218 100644 253 | --- a/instrumentation/preach.h 254 | +++ b/instrumentation/preach.h 255 | @@ -4,5 +4,74 @@ 256 | 257 | #ifndef AFL_BUILD_PREACH_H 258 | #define AFL_BUILD_PREACH_H 259 | +#include 260 | +#include 261 | +#include 262 | + 263 | + 264 | + 265 | +struct DCP3_node { 266 | + void* addr; 267 | + uint16_t left; 268 | + uint16_t right; 269 | + bool is_compare; 270 | + long long visit_time; 271 | + uint32_t traversal_depth; 272 | + bool is_exit_node; 273 | +}; 274 | + 275 | +struct DCP3_node* DCP3[100000]; 276 | + 277 | +uint16_t last_visited_dcp3_node = UINT16_MAX; 278 | + 279 | + 280 | +void pc_trace_guard_init__preach(){ 281 | + 282 | +} 283 | + 284 | +void pc_trace_guard__preach(uint32_t* guard){ 285 | + assert(*guard < 100000); 286 | + printf("guard"); 287 | + if (DCP3[*guard] == NULL){ 288 | + DCP3[*guard] = malloc(sizeof(struct DCP3_node)); 289 | + DCP3[*guard]->addr = __builtin_return_address(0); 290 | + DCP3[*guard]->is_compare = false; 291 | + DCP3[*guard]->left = UINT16_MAX; 292 | + DCP3[*guard]->right = UINT16_MAX; 293 | + DCP3[*guard]->visit_time = 1; 294 | + } else { 295 | + if (last_visited_dcp3_node != UINT16_MAX){ 296 | + struct DCP3_node* last_node = DCP3[last_visited_dcp3_node]; 297 | + if (last_node->left != *guard && last_node->right != *guard){ 298 | + if (last_node->left == UINT16_MAX) 299 | + last_node->left = *guard; 300 | + else if (last_node->right == UINT16_MAX) 301 | + last_node->right = *guard; 302 | + else 303 | + assert(0); // ??? getting three children? 304 | + } 305 | + } 306 | + DCP3[*guard]->visit_time++; 307 | + } 308 | + last_visited_dcp3_node = *guard; 309 | +} 310 | + 311 | + 312 | + 313 | +void trace_cmp__preach(){ 314 | + printf("cmp3"); 315 | + if (last_visited_dcp3_node != UINT16_MAX) 316 | + DCP3[last_visited_dcp3_node]->is_compare = true; 317 | +} 318 | + 319 | +void traversal_helper(){ 320 | + 321 | +} 322 | + 323 | +void calculate_prob_all_path(){ 324 | + 325 | +} 326 | 327 | #endif // AFL_BUILD_PREACH_H 328 | + 329 | + 330 | diff --git a/preach_util.h b/preach_util.h 331 | index bd24e5b..2013b12 100644 332 | --- a/preach_util.h 333 | +++ b/preach_util.h 334 | @@ -4,5 +4,28 @@ 335 | 336 | #ifndef AFL_BUILD_PREACH_UTIL_H 337 | #define AFL_BUILD_PREACH_UTIL_H 338 | +#include 339 | +struct DCP3_node { 340 | + void* addr; 341 | + uint16_t left; 342 | + uint16_t right; 343 | + bool is_compare; 344 | + long long visit_time; 345 | + uint32_t traversal_depth; 346 | + bool is_exit_node; 347 | +}; 348 | 349 | + 350 | +extern struct DCP3_node* DCP3[100000]; 351 | +extern uint16_t last_visited_dcp3_node; 352 | +void start__preach(){ 353 | + 354 | +} 355 | + 356 | + 357 | +void commit_instr__preach(){ 358 | + if (last_visited_dcp3_node != UINT16_MAX){ 359 | + DCP3[last_visited_dcp3_node]->is_exit_node = true; 360 | + } 361 | +} 362 | #endif // AFL_BUILD_PREACH_UTIL_H 363 | diff --git a/src/afl-cc.c b/src/afl-cc.c 364 | index 486f746..d6580e5 100644 365 | --- a/src/afl-cc.c 366 | +++ b/src/afl-cc.c 367 | @@ -587,7 +587,7 @@ static void edit_params(u32 argc, char **argv, char **envp) { 368 | 369 | #if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0) 370 | #if defined __ANDROID__ || ANDROID 371 | - cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard"; 372 | + cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard,trace-cmp,no-prune"; 373 | instrument_mode = INSTRUMENT_LLVMNATIVE; 374 | #else 375 | if (have_instr_list) { 376 | @@ -597,7 +597,7 @@ static void edit_params(u32 argc, char **argv, char **envp) { 377 | "Using unoptimized trace-pc-guard, due usage of " 378 | "-fsanitize-coverage-allow/denylist, you can use " 379 | "AFL_LLVM_ALLOWLIST/AFL_LLMV_DENYLIST instead.\n"); 380 | - cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard"; 381 | + cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard,trace-cmp,no-prune"; 382 | instrument_mode = INSTRUMENT_LLVMNATIVE; 383 | 384 | } else { 385 | @@ -617,7 +617,7 @@ static void edit_params(u32 argc, char **argv, char **envp) { 386 | SAYF( 387 | "Using unoptimized trace-pc-guard, upgrade to llvm 10.0.1+ for " 388 | "enhanced version.\n"); 389 | - cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard"; 390 | + cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard,trace-cmp,no-prune"; 391 | instrument_mode = INSTRUMENT_LLVMNATIVE; 392 | #else 393 | FATAL("pcguard instrumentation requires llvm 4.0.1+"); 394 | @@ -627,7 +627,7 @@ static void edit_params(u32 argc, char **argv, char **envp) { 395 | } else if (instrument_mode == INSTRUMENT_LLVMNATIVE) { 396 | 397 | #if LLVM_MAJOR >= 4 398 | - cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard"; 399 | + cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard,trace-cmp,no-prune"; 400 | #else 401 | FATAL("pcguard instrumentation requires llvm 4.0.1+"); 402 | #endif 403 | diff --git a/utils/aflpp_driver/aflpp_driver.c b/utils/aflpp_driver/aflpp_driver.c 404 | index c094c42..f5897f9 100644 405 | --- a/utils/aflpp_driver/aflpp_driver.c 406 | +++ b/utils/aflpp_driver/aflpp_driver.c 407 | @@ -171,6 +171,8 @@ size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { 408 | } 409 | 410 | // Execute any files provided as parameters. 411 | +#include "../../preach_util.h" 412 | + 413 | static int ExecuteFilesOnyByOne(int argc, char **argv) { 414 | 415 | unsigned char *buf = (unsigned char *)malloc(MAX_FILE); 416 | @@ -189,6 +191,7 @@ static int ExecuteFilesOnyByOne(int argc, char **argv) { 417 | 418 | printf("Reading %zu bytes from %s\n", length, argv[i]); 419 | LLVMFuzzerTestOneInput(buf, length); 420 | + commit_instr__preach(); 421 | printf("Execution successful.\n"); 422 | 423 | } 424 | @@ -229,6 +232,7 @@ int main(int argc, char **argv) { 425 | sleep(1); 426 | 427 | } 428 | + start__preach(); 429 | 430 | output_file = stderr; 431 | maybe_duplicate_stderr(); 432 | @@ -299,11 +303,10 @@ int main(int argc, char **argv) { 433 | 434 | num_runs++; 435 | LLVMFuzzerTestOneInput(__afl_fuzz_ptr, *__afl_fuzz_len); 436 | - 437 | + commit_instr__preach(); 438 | } 439 | 440 | } 441 | - 442 | printf("%s: successfully executed %d input(s)\n", argv[0], num_runs); 443 | 444 | } 445 | -------------------------------------------------------------------------------- /blocklist.txt: -------------------------------------------------------------------------------- 1 | fun:main 2 | fun:trace_cmp__preach -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shouc/digfuzz/c57b68870122a91429afa55ed3f1a6493e7fdf95/build.sh -------------------------------------------------------------------------------- /build_afl_llvm.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # Copyright 2019 Google Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | ################################################################################ 17 | 18 | set -e 19 | CC=${CC:-clang} 20 | CXX=${CXX:-clang++} 21 | cd afl-build 22 | #make clean 23 | 24 | make source-only -j10 25 | ar ru FuzzingEngine.a afl-compiler-rt.o utils/aflpp_driver/aflpp_driver.o 26 | 27 | cp -f FuzzingEngine.a afl-fuzz afl-showmap ../ 28 | echo "Success: link fuzz target against FuzzingEngine.a!" 29 | -------------------------------------------------------------------------------- /build_all_qemu.sh: -------------------------------------------------------------------------------- 1 | # build AFL++ 2 | git clone https://github.com/AFLplusplus/AFLplusplus.git 3 | cd AFLplusplus 4 | make all 5 | cd qemu_mode 6 | ./build_qemu_support.sh 7 | cd ../.. 8 | 9 | # headers 10 | mkdir afl_shared 11 | cp AFLplusplus/include/*.h afl_shared/ 12 | 13 | git clone https://github.com/AFLplusplus/qemuafl.git 14 | 15 | # build aggr qemu 16 | cp -R qemuafl ./qemuafl_aggr 17 | cd qemuafl_aggr 18 | cp ../qemuafl_bb_aggr_shm.diff . 19 | git apply ./qemuafl_bb_aggr_shm.diff 20 | cd .. 21 | ./build_qemu_first.sh qemuafl_aggr 22 | mv afl-qemu-trace AFLplusplus/ 23 | 24 | # build qemu stdout 25 | cp -R qemuafl ./qemuafl_stdout 26 | cd qemuafl_stdout 27 | cp ../qemuafl_bb_stdout.diff . 28 | git apply ./qemuafl_bb_stdout.diff 29 | cd .. 30 | ./build_qemu_first.sh qemuafl_stdout 31 | mv afl-qemu-trace qemu_stdout 32 | 33 | # build qemu dumper 34 | gcc dump_digfuzz.c -o dumper -lrt 35 | -------------------------------------------------------------------------------- /build_qemu_first.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # american fuzzy lop++ - QEMU build script 4 | # -------------------------------------- 5 | # 6 | # Originally written by Andrew Griffiths and 7 | # Michal Zalewski 8 | # 9 | # TCG instrumentation and block chaining support by Andrea Biondo 10 | # 11 | # 12 | # QEMU 5+ port, TCG thread-safety, CompareCoverage and NeverZero 13 | # counters by Andrea Fioraldi 14 | # 15 | # Copyright 2015, 2016, 2017 Google Inc. All rights reserved. 16 | # Copyright 2019-2020 AFLplusplus Project. All rights reserved. 17 | # 18 | # Licensed under the Apache License, Version 2.0 (the "License"); 19 | # you may not use this file except in compliance with the License. 20 | # You may obtain a copy of the License at: 21 | # 22 | # https://www.apache.org/licenses/LICENSE-2.0 23 | # 24 | # This script downloads, patches, and builds a version of QEMU with 25 | # minor tweaks to allow non-instrumented binaries to be run under 26 | # afl-fuzz. 27 | # 28 | # The modifications reside in patches/*. The standalone QEMU binary 29 | # will be written to ../afl-qemu-trace. 30 | # 31 | 32 | 33 | cd $1 || exit 1 34 | 35 | echo "[*] Making sure imported headers matches" 36 | cp "../afl_shared/config.h" "./qemuafl/imported/" || exit 1 37 | cp "../afl_shared/cmplog.h" "./qemuafl/imported/" || exit 1 38 | cp "../afl_shared/snapshot-inl.h" "./qemuafl/imported/" || exit 1 39 | cp "../afl_shared/types.h" "./qemuafl/imported/" || exit 1 40 | 41 | if [ -n "$HOST" ]; then 42 | echo "[+] Configuring host architecture to $HOST..." 43 | CROSS_PREFIX=$HOST- 44 | else 45 | CROSS_PREFIX= 46 | fi 47 | 48 | echo "[*] Configuring QEMU for $CPU_TARGET..." 49 | 50 | ORIG_CPU_TARGET="$CPU_TARGET" 51 | 52 | if [ "$ORIG_CPU_TARGET" = "" ]; then 53 | CPU_TARGET="`uname -m`" 54 | test "$CPU_TARGET" = "i686" && CPU_TARGET="i386" 55 | test "$CPU_TARGET" = "arm64v8" && CPU_TARGET="aarch64" 56 | case "$CPU_TARGET" in 57 | *arm*) 58 | CPU_TARGET="arm" 59 | ;; 60 | esac 61 | fi 62 | 63 | echo "Building for CPU target $CPU_TARGET" 64 | 65 | # --enable-pie seems to give a couple of exec's a second performance 66 | # improvement, much to my surprise. Not sure how universal this is.. 67 | QEMU_CONF_FLAGS=" \ 68 | --audio-drv-list= \ 69 | --disable-blobs \ 70 | --disable-bochs \ 71 | --disable-brlapi \ 72 | --disable-bsd-user \ 73 | --disable-bzip2 \ 74 | --disable-cap-ng \ 75 | --disable-cloop \ 76 | --disable-curl \ 77 | --disable-curses \ 78 | --disable-dmg \ 79 | --disable-fdt \ 80 | --disable-gcrypt \ 81 | --disable-glusterfs \ 82 | --disable-gnutls \ 83 | --disable-gtk \ 84 | --disable-guest-agent \ 85 | --disable-iconv \ 86 | --disable-libiscsi \ 87 | --disable-libnfs \ 88 | --disable-libssh \ 89 | --disable-libusb \ 90 | --disable-linux-aio \ 91 | --disable-live-block-migration \ 92 | --disable-lzo \ 93 | --disable-nettle \ 94 | --disable-numa \ 95 | --disable-opengl \ 96 | --disable-parallels \ 97 | --disable-plugins \ 98 | --disable-qcow1 \ 99 | --disable-qed \ 100 | --disable-rbd \ 101 | --disable-rdma \ 102 | --disable-replication \ 103 | --disable-sdl \ 104 | --disable-seccomp \ 105 | --disable-sheepdog \ 106 | --disable-smartcard \ 107 | --disable-snappy \ 108 | --disable-spice \ 109 | --disable-system \ 110 | --disable-tools \ 111 | --disable-tpm \ 112 | --disable-usb-redir \ 113 | --disable-vde \ 114 | --disable-vdi \ 115 | --disable-vhost-crypto \ 116 | --disable-vhost-kernel \ 117 | --disable-vhost-net \ 118 | --disable-vhost-scsi \ 119 | --disable-vhost-user \ 120 | --disable-vhost-vdpa \ 121 | --disable-vhost-vsock \ 122 | --disable-virglrenderer \ 123 | --disable-virtfs \ 124 | --disable-vnc \ 125 | --disable-vnc-jpeg \ 126 | --disable-vnc-png \ 127 | --disable-vnc-sasl \ 128 | --disable-vte \ 129 | --disable-vvfat \ 130 | --disable-xen \ 131 | --disable-xen-pci-passthrough \ 132 | --disable-xfsctl \ 133 | --target-list="${CPU_TARGET}-linux-user" \ 134 | --without-default-devices \ 135 | " 136 | 137 | if [ -n "${CROSS_PREFIX}" ]; then 138 | 139 | QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS --cross-prefix=$CROSS_PREFIX" 140 | 141 | fi 142 | 143 | if [ "$STATIC" = "1" ]; then 144 | 145 | echo Building STATIC binary 146 | 147 | # static PIE causes https://github.com/AFLplusplus/AFLplusplus/issues/892 148 | QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \ 149 | --static --disable-pie \ 150 | --extra-cflags=-DAFL_QEMU_STATIC_BUILD=1 \ 151 | " 152 | 153 | else 154 | 155 | QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} --enable-pie " 156 | 157 | fi 158 | 159 | if [ "$DEBUG" = "1" ]; then 160 | 161 | echo Building DEBUG binary 162 | 163 | # --enable-gcov might go here but incurs a mesonbuild error on meson 164 | # versions prior to 0.56: 165 | # https://github.com/qemu/meson/commit/903d5dd8a7dc1d6f8bef79e66d6ebc07c 166 | QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \ 167 | --disable-strip \ 168 | --enable-debug \ 169 | --enable-debug-info \ 170 | --enable-debug-mutex \ 171 | --enable-debug-stack-usage \ 172 | --enable-debug-tcg \ 173 | --enable-qom-cast-debug \ 174 | --enable-werror \ 175 | " 176 | 177 | else 178 | 179 | QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \ 180 | --disable-debug-info \ 181 | --disable-debug-mutex \ 182 | --disable-debug-tcg \ 183 | --disable-qom-cast-debug \ 184 | --disable-stack-protector \ 185 | --disable-werror \ 186 | " 187 | 188 | fi 189 | 190 | if [ "$PROFILING" = "1" ]; then 191 | 192 | echo Building PROFILED binary 193 | 194 | QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \ 195 | --enable-gprof \ 196 | --enable-profiler \ 197 | " 198 | 199 | fi 200 | 201 | # shellcheck disable=SC2086 202 | ./configure $QEMU_CONF_FLAGS || exit 1 203 | 204 | echo "[+] Configuration complete." 205 | 206 | echo "[*] Attempting to build QEMU (fingers crossed!)..." 207 | 208 | make -j `nproc` || exit 1 209 | 210 | echo "[+] Build process successful!" 211 | 212 | echo "[*] Copying binary..." 213 | 214 | cp -f "build/${CPU_TARGET}-linux-user/qemu-${CPU_TARGET}" "../afl-qemu-trace" || exit 1 215 | 216 | cd .. 217 | ls -l ../afl-qemu-trace || exit 1 218 | 219 | echo "[+] Successfully created '../afl-qemu-trace'." 220 | -------------------------------------------------------------------------------- /build_qemu_with_instr.sh: -------------------------------------------------------------------------------- 1 | cd $1 2 | 3 | echo "[*] Configuring QEMU for $CPU_TARGET..." 4 | 5 | ORIG_CPU_TARGET="$CPU_TARGET" 6 | 7 | if [ "$ORIG_CPU_TARGET" = "" ]; then 8 | CPU_TARGET="`uname -m`" 9 | test "$CPU_TARGET" = "i686" && CPU_TARGET="i386" 10 | test "$CPU_TARGET" = "arm64v8" && CPU_TARGET="aarch64" 11 | case "$CPU_TARGET" in 12 | *arm*) 13 | CPU_TARGET="arm" 14 | ;; 15 | esac 16 | fi 17 | echo "[+] Configuration complete." 18 | 19 | echo "[*] Attempting to build QEMU (fingers crossed!)..." 20 | make clean 21 | make -j `nproc` || exit 1 22 | 23 | echo "[+] Build process successful!" 24 | 25 | echo "[*] Copying binary..." 26 | cp -f "build/${CPU_TARGET}-linux-user/qemu-${CPU_TARGET}" "../afl-qemu-trace" || exit 1 27 | 28 | cd ../ -------------------------------------------------------------------------------- /cfg.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from functools import reduce 3 | 4 | import angr 5 | import claripy 6 | 7 | 8 | 9 | def get_result_solver(s, e, **kwargs): 10 | s.reload_solver() 11 | cast_vals = [s._cast_to(e, v, bytes) for v in s._eval(e, 1, **kwargs)] 12 | return cast_vals 13 | 14 | 15 | def main(exe, arg): 16 | prog = Program(exe) 17 | prog.set_input(arg) 18 | res = prog.run() 19 | print(res.solver.constraints) 20 | i = 0 21 | prog.pop_added_cons(res) 22 | print("constraints: %d" % len(res.solver.constraints)) 23 | print(res.solver.constraints) 24 | print(get_result_solver(res.solver, prog.arg1)) 25 | print('done') 26 | 27 | 28 | 29 | if __name__ == '__main__': 30 | # assert len(sys.argv) >= 3 31 | main("test.angr", "fuckfff") 32 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pwn 4 | 5 | # Target to fuzz 6 | OBJ_PATH = "test.c.o" 7 | 8 | # AFL slave count 9 | AFL_NUM_SLAVE = 1 10 | 11 | # NO NEED TO CHANGE 12 | LOCAL_UNINSTRUMENTED_EXEC_PATH = "./harness" 13 | REMOTE_UNINSTRUMENTED_EXEC_PATH = "/tmp/harness" 14 | 15 | QEMU_BIN = "./qemu_stdout" 16 | 17 | DUMPER_PATH = "./dumper" 18 | SHM_KEY = f"bz.shm" 19 | 20 | QEMU_TIMEOUT = 30 21 | QSYM_TIMEOUT = 30 22 | 23 | # NO NEED TO CHANGE IF USING run_afl.py 24 | AFL_FUZZ_PATH = "./AFLplusplus/afl-fuzz" 25 | AFL_IN_PATH = "./in" 26 | AFL_OUT_PATH = "./out" 27 | AFL_SLAVE_NAME = "s" 28 | AFL_MASTER_NAME = "m" 29 | AFL_CORPUS_PATH = f"{AFL_OUT_PATH}/{AFL_MASTER_NAME}/queue" # the directory of the afl master corpus 30 | 31 | # QSYM Remote setup 32 | USE_SSH = True 33 | if not USE_SSH: 34 | os.system(f"mkdir /tmp/digfuzz && cp {LOCAL_UNINSTRUMENTED_EXEC_PATH} /tmp/digfuzz/harness") 35 | PIN_SH = "/workdir/qsym/third_party/pin-2.14-71313-gcc.4.4.7-linux/pin.sh" # the location of qsym script remote 36 | QSYM_OBJECT_PATH = "/workdir/qsym/qsym/pintool/obj-intel64/libqsym.so" # the location of qsym pin obj remote 37 | QSYM_IMAGE_NAME = "qsym" 38 | QSYM_CMD = ["docker", "-v", "/tmp/digfuzz:/tmp/digfuzz", QSYM_IMAGE_NAME] 39 | 40 | else: 41 | QSYM_HOST = '18.237.37.59' 42 | QSYM_UN = 'ubuntu' 43 | QSYM_KEYFILE = "./seem-priv-key.PEM" 44 | QSYM_SSH_CONN = pwn.ssh(host=QSYM_HOST, user=QSYM_UN, keyfile=QSYM_KEYFILE) 45 | PIN_SH = "/home/ubuntu/qsym/third_party/pin-2.14-71313-gcc.4.4.7-linux/pin.sh" # the location of qsym script remote 46 | QSYM_OBJECT_PATH = "/home/ubuntu/qsym/qsym/pintool/obj-intel64/libqsym.so" # the location of qsym pin obj remote 47 | 48 | 49 | -------------------------------------------------------------------------------- /did_digfuzz_contribute.py: -------------------------------------------------------------------------------- 1 | import os 2 | import config 3 | import qemu_instr 4 | 5 | _executor = qemu_instr.STDINExecutorQEMU(config.QEMU_BIN, config.LOCAL_UNINSTRUMENTED_EXEC_PATH) 6 | qemu = qemu_instr.QEMUInstr(_executor, config.DUMPER_PATH, shm_key=config.SHM_KEY) 7 | tests = list(filter(lambda x: "10000" not in x and not x.startswith("."), os.listdir("out/m/queue"))) 8 | qemu.build_execution_tree(["out/m/queue/" + x for x in tests]) 9 | 10 | tests = list(filter(lambda x: "10000" in x and not x.startswith("."), os.listdir("out/m/queue"))) 11 | 12 | for filename in ["out/m/queue/" + x for x in tests]: 13 | with open(filename, "rb") as fp: 14 | corpus_content = fp.read() 15 | qemu.executor.execute_test_case(corpus_content) 16 | try: 17 | trace = qemu.executor.dump_trace() 18 | except EOFError as e: 19 | print(f"[Crash] Found crash {filename} with error {e}, skipping") 20 | qemu.executor.restart_bin() 21 | continue 22 | for i in trace: 23 | if i not in qemu.execution_tree: 24 | print(i, filename) 25 | -------------------------------------------------------------------------------- /digfuzz.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import sys 4 | import time 5 | 6 | import config 7 | import qemu_instr 8 | import qsym_ce 9 | import utils 10 | import pwn 11 | 12 | # pwn.log.setLevel("silent") 13 | 14 | 15 | # not tested, but seems to work... 16 | def add_input_to_afl_queue(content, idx="000000"): 17 | if not content: 18 | return 19 | global added_counter 20 | with open("%s/id:%6d,src:%s" % (config.AFL_CORPUS_PATH, added_counter, idx), "wb+") as fp: 21 | fp.write(content) 22 | added_counter += 1 23 | 24 | 25 | added_counter = int(1e5) 26 | utils.copy_file_to_qsym_host(config.LOCAL_UNINSTRUMENTED_EXEC_PATH, config.REMOTE_UNINSTRUMENTED_EXEC_PATH) 27 | utils.qsym_host_provide_permission(config.REMOTE_UNINSTRUMENTED_EXEC_PATH) 28 | 29 | _executor = qemu_instr.STDINExecutorQEMU(config.QEMU_BIN, config.LOCAL_UNINSTRUMENTED_EXEC_PATH) 30 | qemu = qemu_instr.QEMUInstr(_executor, config.DUMPER_PATH, shm_key=config.SHM_KEY) 31 | qsym = qsym_ce.QSYMConcolicExecutor(config.REMOTE_UNINSTRUMENTED_EXEC_PATH) 32 | 33 | known_testcase = set() 34 | 35 | 36 | def get_new_testcase_filenames(): 37 | result = [] 38 | for i in os.listdir(config.AFL_CORPUS_PATH): 39 | if i in known_testcase or i.startswith("."): 40 | continue 41 | known_testcase.add(i) 42 | result.append(f"{config.AFL_CORPUS_PATH}/{i}") 43 | return result 44 | 45 | 46 | def grab_id_from_afl_tc_name(name): 47 | return name.split("id:")[1].split(",")[0] 48 | 49 | 50 | while 1: 51 | qemu.build_execution_tree(get_new_testcase_filenames()) 52 | # qemu.dump_execution_tree() 53 | paths = qemu.get_sorted_missed_path() 54 | if len(paths) == 0: 55 | print("Let's wait for AFL") 56 | time.sleep(5) 57 | continue 58 | solving_path = random.choice(paths) 59 | print(f"Solving for path {solving_path} with prob {solving_path['prob']}") 60 | testcase_content = open(solving_path["fn"], "rb").read() 61 | for solution in qsym.flip_it(testcase_content, solving_path["flip"], 62 | nth=solving_path["nth"], 63 | qemu_instr_obj=qemu, 64 | testcase_fn=solving_path["fn"]): 65 | add_input_to_afl_queue(solution, idx=grab_id_from_afl_tc_name(solving_path["fn"])) 66 | print("Round done") 67 | -------------------------------------------------------------------------------- /dump_diff.sh: -------------------------------------------------------------------------------- 1 | cd $1 2 | echo $1 3 | git diff > $2.diff 4 | mv $2.diff .. 5 | cd .. 6 | -------------------------------------------------------------------------------- /dump_digfuzz.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | struct digfuzz_bucket 14 | { 15 | long unsigned int PC; 16 | uint32_t count; 17 | }; 18 | 19 | typedef struct digfuzz_bucket digfuzz_bucket_t; 20 | 21 | #define HASHMAP_SIZE 1000000 22 | 23 | 24 | #define SHM_SIZE (HASHMAP_SIZE*sizeof(digfuzz_bucket_t)) 25 | 26 | int main(){ 27 | digfuzz_bucket_t* shmem; 28 | 29 | char* shm_key = getenv("DIGFUZZ_SHM"); 30 | int fd = shm_open(shm_key, O_RDWR, S_IREAD | S_IWRITE); 31 | if (fd <= -1) { 32 | fprintf(stderr, "Failed to open shared memory region: %d\n", errno); 33 | _exit(-1); 34 | } 35 | 36 | shmem = (digfuzz_bucket_t *)mmap(NULL, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 37 | if (shmem == MAP_FAILED) { 38 | fprintf(stderr, "Failed to mmap shared memory region\n"); 39 | _exit(-1); 40 | } 41 | 42 | for (int i = 0; i < HASHMAP_SIZE; i++) 43 | if (shmem[i].PC != 0) 44 | printf("%lu,%d\n", shmem[i].PC, shmem[i].count); 45 | } -------------------------------------------------------------------------------- /instr_interface.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import json 3 | import angr 4 | 5 | 6 | # Exec Tree 7 | class Node: 8 | def __init__(self): 9 | self.children = set() 10 | self.children_prob = [] 11 | self.max_encounter_child = {} 12 | self.addr = 0 13 | self.is_comp = False 14 | self.visit_count = 1 15 | self.addr_range = None 16 | self.led_by = "" 17 | 18 | @staticmethod 19 | def to_addr(node): 20 | if node: 21 | return node.addr 22 | return 0 23 | 24 | def __hash__(self): 25 | return self.addr 26 | 27 | def __str__(self): 28 | return f"comp: {self.is_comp}; " \ 29 | f"vc: {self.visit_count}; " \ 30 | f"children: {self.children}; " \ 31 | f"prob: {self.children_prob};" \ 32 | f"led_by: {self.led_by}" \ 33 | f"addr_range: {hex(self.addr_range[0])} - {hex(self.addr_range[1])}" 34 | 35 | 36 | class UnknownNode(Node): 37 | pass 38 | 39 | 40 | SCALE = 1 41 | 42 | 43 | class Instrumentation(abc.ABC): 44 | def __init__(self, executor): 45 | self.executor = executor 46 | self.execution_tree = {} # addr -> Node 47 | self.corpus_traces = {} 48 | self.dfs_visited_nodes = set() 49 | self.unsolvable = set() 50 | self.solved = set() 51 | self.basic_block = {} # BB start => size 52 | self.__get_basic_block_size() 53 | 54 | def __get_basic_block_size(self): 55 | p = angr.Project(self.executor.uninstrumented_path, load_options={'auto_load_libs': False}) 56 | cfg = p.analyses.CFGFast() 57 | for key in cfg.kb.functions: 58 | for bb in cfg.kb.functions[key].blocks: 59 | self.basic_block[bb.addr] = bb.size 60 | 61 | def build_execution_tree(self, new_testcase_filenames: [str]): 62 | pass 63 | 64 | def dump_execution_tree(self): 65 | print(json.dumps({hex(x): str(self.execution_tree[x]) for x in self.execution_tree}, sort_keys=True, indent=4)) 66 | 67 | def assign_prob(self): 68 | for addr, current_node in self.execution_tree.items(): 69 | should_assign_prob = current_node.is_comp 70 | sum_of_children = 1 # prevent div by 0, todo: this causes left + right != 1 71 | 72 | for child_node_addr in current_node.children: 73 | child_node = self.execution_tree[child_node_addr] 74 | sum_of_children += child_node.visit_count 75 | 76 | for child_node_addr in current_node.children: 77 | child_node = self.execution_tree[child_node_addr] 78 | current_node.children_prob.append(child_node.visit_count * SCALE / sum_of_children) 79 | 80 | while len(current_node.children_prob) < 2: 81 | current_node.children_prob.append(3 * SCALE / sum_of_children) 82 | 83 | if not should_assign_prob or sum_of_children < 30: 84 | current_node.children_prob = [1.0 * SCALE for _ in range(len(current_node.children_prob))] 85 | 86 | def __get_prob(self, parent, child): 87 | parent_node = self.execution_tree[parent] 88 | child_node_addr = self.execution_tree[child].addr 89 | for k, _child_addr in enumerate(parent_node.children): 90 | if _child_addr == child_node_addr: 91 | return parent_node.children_prob[k] 92 | print(f"[Exec] {parent} {child} not in execution tree") 93 | assert False 94 | 95 | def __is_branch_missed(self, parent_addr, child_addr, nth=0): 96 | hit_count = nth + 1 97 | parent_node = self.execution_tree[parent_addr] 98 | return ( 99 | len(parent_node.children) < 2 100 | or hit_count not in parent_node.max_encounter_child[child_addr] 101 | ) and parent_node.is_comp 102 | 103 | def __should_i_solve(self, testcase_fn, flip_pcs, nth=0): 104 | return ((testcase_fn, flip_pcs[0], flip_pcs[1], nth) not in self.unsolvable) and \ 105 | ((testcase_fn, flip_pcs[0], flip_pcs[1], nth) not in self.solved) 106 | 107 | def add_unsolvable_path(self, testcase_fn, flip_pcs, nth=0): 108 | self.unsolvable.add((testcase_fn, flip_pcs[0], flip_pcs[1], nth)) 109 | 110 | def add_solved_path(self, testcase_fn, flip_pcs, nth=0): 111 | self.solved.add((testcase_fn, flip_pcs[0], flip_pcs[1], nth)) 112 | 113 | def get_sorted_missed_path(self, num=10): 114 | missed_paths = [] 115 | for filename in self.corpus_traces: 116 | hit_counts = {} 117 | trace = self.corpus_traces[filename] 118 | prob = 1 119 | trace_len = len(trace) 120 | for k in range(0, trace_len - 1): 121 | node = trace[k] 122 | next_node = trace[k + 1] 123 | 124 | hit_counts[node] = hit_counts[node] + 1 if node in hit_counts else 1 125 | nth = hit_counts[node] - 1 126 | if self.__is_branch_missed(node.addr, next_node.addr, nth=nth): 127 | path_prob = prob * node.children_prob[-1] 128 | flip_it = node.addr_range 129 | if not self.__should_i_solve(filename, flip_it, nth=nth): 130 | continue 131 | # todo: find out why nth 132 | if nth < 2: 133 | missed_paths.append({ 134 | "flip": flip_it, 135 | "prob": path_prob, 136 | "fn": filename, 137 | "nth": nth 138 | }) 139 | prob *= self.__get_prob(node.addr, next_node.addr) 140 | return sorted(missed_paths, key=lambda x: x["prob"])[:min(num, len(missed_paths))] 141 | -------------------------------------------------------------------------------- /llvm_instr.py: -------------------------------------------------------------------------------- 1 | import instr_interface 2 | import utils 3 | import pwn 4 | import re 5 | import os 6 | 7 | 8 | # CONFIGS 9 | 10 | # LOCAL: afl-fuzz + tree.py 11 | # REMOTE: qsym 12 | # 13 | # REMOTE and LOCAL should have same copy of code 14 | # 15 | # LOCAL SETUP: 16 | # mkdir /tmp/digfuzz 17 | # clang -fsanitize-coverage=bb,trace-pc-guard,indirect-calls,\ 18 | # trace-cmp,no-prune -fsanitize=address -g test.cc FuzzingEngine.a -o test.fuzz 19 | # LOCAL AFL CMD: 20 | # AFL_SKIP_CPUFREQ=1 ./afl-fuzz -i in/ -o out/ -M f1 ./test.fuzz 21 | # AFL_SKIP_CPUFREQ=1 ./afl-fuzz -i in/ -o out/ -S f2 ./test.fuzz 22 | 23 | # REMOTE SETUP: 24 | # build qsym ... 25 | # build uninstrumented bin: 26 | # clang -c -g angr_harness.c -o angr_harness.o 27 | # clang -g angr_harness.o test.cc -o test.angr 28 | 29 | class GDBExecutor: 30 | EXTRACT_START = re.compile(b"starts at address 0x(.+?) ") 31 | EXTRACT_END = re.compile(b"and ends at 0x(.+?) ") 32 | 33 | def __init__(self, uninstrumented_path): 34 | self.gdb_instance = None 35 | self.uninstrumented_path = uninstrumented_path 36 | self.cmp_table = {} 37 | 38 | def restart_gdb(self): 39 | self.gdb_instance = pwn.process(["gdb", self.uninstrumented_path]) 40 | self.gdb_instance.recvuntil("(gdb) ") 41 | 42 | def run_gdb(self): 43 | if self.gdb_instance is None: 44 | self.restart_gdb() 45 | 46 | def execute_gdb_cmd(self, cmd): 47 | assert self.gdb_instance 48 | self.gdb_instance.sendline(cmd) 49 | return self.gdb_instance.recvuntil("(gdb) ").replace(b"\n", b"") 50 | 51 | def get_addr(self, file_loc): 52 | if file_loc in self.cmp_table: 53 | return self.cmp_table[file_loc] 54 | self.run_gdb() 55 | real_file_loc = b':'.join(file_loc.split(b':')[:-1]) # todo: fix 56 | result = self.execute_gdb_cmd(b"info line " + real_file_loc) # todo: fix 57 | if b"starts at address" in result and b"and ends at" in result: 58 | start = self.EXTRACT_START.split(result) 59 | end = self.EXTRACT_END.split(result) 60 | assert len(start) == 3 and len(end) == 3, "GDB gives something weird" 61 | result = [int(b'0x' + start[1], 16), int(b'0x' + end[1], 16)] 62 | else: 63 | print(result) 64 | print(f"[GDB] gdb thinks we give a bad file_loc {real_file_loc}") 65 | return None 66 | self.cmp_table[file_loc] = result 67 | return result 68 | 69 | 70 | class STDINExecutorLLVM: 71 | def __init__(self, build_dir, uninstrumented_path, instrumented_path): 72 | self.instance = None 73 | self.build_dir = build_dir 74 | self.uninstrumented_path = uninstrumented_path 75 | self.instrumented_path = instrumented_path 76 | self.gdb_instance = None 77 | 78 | def run_bin(self): 79 | if self.instance is None: 80 | self.restart_bin() 81 | 82 | def restart_bin(self): 83 | self.instance = pwn.process([self.instrumented_path, "p"]) 84 | assert self.instance.recvline(timeout=30) == b"init\n" 85 | 86 | def execute_test_case(self, corpus_content): 87 | self.instance.sendline(f"{len(corpus_content) + 1}".encode("ascii")) 88 | self.instance.sendline(corpus_content) 89 | 90 | def read_and_determine_done_reading(self): 91 | assert self.instance 92 | result = '' 93 | while len(result) < 7 or result[:7] != b'digfuzz': 94 | result = self.instance.recvline(timeout=30) 95 | if result == b'digfuzz_done\n': 96 | return 0, False 97 | return result[8:-1], True 98 | 99 | 100 | class LLVMInstr(instr_interface.Instrumentation): 101 | def __init__(self, executor, trace_directory="/tmp/digfuzz"): 102 | super().__init__(executor) 103 | self.trace_directory = trace_directory 104 | self.corpus_traces = {} 105 | self.cmp_table = {} 106 | self.visited_trace = set() 107 | self.gdb = GDBExecutor(executor.uninstrumented_path) 108 | 109 | def __add_to_execution_tree(self, trace, file_name): 110 | last_node = None 111 | for i in trace: 112 | addr = i[0] 113 | is_cmp = i[1] 114 | file_loc = None 115 | if is_cmp: 116 | file_loc = i[2] 117 | if addr not in self.execution_tree: 118 | self.execution_tree[addr] = instr_interface.Node() 119 | self.execution_tree[addr].addr = addr 120 | self.execution_tree[addr].is_comp = is_cmp 121 | if file_loc: 122 | self.execution_tree[addr].angr_addr_range = self.gdb.get_addr(file_loc) 123 | current_node = self.execution_tree[addr] 124 | if last_node is not None and (last_node.left != current_node and last_node.right != current_node): 125 | if last_node.left is None: 126 | last_node.left = current_node 127 | elif last_node.right is None: 128 | last_node.right = current_node 129 | else: 130 | print("[Exec Tree] More than 2 children for a node :(") 131 | current_node.led_by = file_name 132 | last_node = current_node 133 | 134 | def __build_execution_tree(self, new_testcase_filenames): 135 | for i in new_testcase_filenames: 136 | if i[0] == ".": 137 | continue 138 | with open(i, "rb") as fp: 139 | corpus_content = fp.read() 140 | self.executor.run_bin() 141 | self.executor.execute_test_case(corpus_content) 142 | trace = [] 143 | while 1: 144 | try: 145 | content, should_continue = self.executor.read_and_determine_done_reading() 146 | if not should_continue: break 147 | if content[0] == 99: # b'c' 148 | # cmp 149 | trace[-1][1] = True 150 | trace[-1][2] = content.split(b',')[-1] 151 | else: 152 | trace.append([int(content, 16), False, b'1']) 153 | except EOFError as e: 154 | print(f"[Crash] Found crash {i}, skipping") 155 | self.executor.restart_bin() 156 | break 157 | self.corpus_traces[i] = trace 158 | self.__add_to_execution_tree(trace, i) 159 | 160 | def __increment_tree_visit_count(self): 161 | current_check_trace_files = set(os.listdir(self.trace_directory)).difference(self.visited_trace) 162 | for i in current_check_trace_files: 163 | self.visited_trace.add(i) 164 | with open(f"{self.trace_directory}/{i}") as fp: 165 | content = fp.readlines() 166 | for addr in content: 167 | if addr == "EOF\n" or "0x" not in addr: 168 | continue 169 | try: 170 | addr = int(addr[:-1], 16) 171 | except Exception as e: 172 | print(e, addr) 173 | if addr not in self.execution_tree: 174 | print("[Fuzzer] Fuzzer found a new path but did not add it to corpus") 175 | continue 176 | self.execution_tree[addr].visit_count += 1 177 | 178 | def build_execution_tree(self, new_testcase_filenames): 179 | self.__build_execution_tree(new_testcase_filenames) 180 | self.__increment_tree_visit_count() 181 | return self.execution_tree 182 | -------------------------------------------------------------------------------- /preach.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by shou on 6/1/21. 3 | // 4 | 5 | #ifndef AFL_BUILD_PREACH_H 6 | #define AFL_BUILD_PREACH_H 7 | 8 | #endif // AFL_BUILD_PREACH_H 9 | 10 | 11 | -------------------------------------------------------------------------------- /preach_util.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by shou on 6/2/21. 3 | // 4 | 5 | #ifndef AFL_BUILD_PREACH_UTIL_H 6 | #define AFL_BUILD_PREACH_UTIL_H 7 | 8 | #endif // AFL_BUILD_PREACH_UTIL_H 9 | -------------------------------------------------------------------------------- /preachfuzz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | import angr 4 | import claripy 5 | import time 6 | import os 7 | 8 | import config 9 | from llvm_instr import GDBExecutor 10 | branch_instrs = ["ja","jae","jb","jbe","jc","je","jecxz","jg","jge","jl","jle","jna","jnae","jnb","jnbe","jnc","jne","jng", 11 | "jnge","jnl","jnle","jno","jnp","jns","jnz","jo","jp","jpe","jpo","js","jz"] 12 | cmp_instrs = ["cmp", "test","FUCOM","FUCOMP","FUCOMPP","FUCOMI","FUCOMIP","FTST","FXAM","FCOM","FCOMP","FCOMPP","FICOM","FICOMP","FCOMI","FCOMIP"] 13 | 14 | floc = f"/tmp/objdump-log-{time.time()}" 15 | os.system(f"objdump -d {config.LOCAL_UNINSTRUMENTED_EXEC_PATH} > {floc}") 16 | 17 | mapping = {} 18 | 19 | gdb = GDBExecutor(config.LOCAL_UNINSTRUMENTED_EXEC_PATH) 20 | hardest_path = [f"test.c:{x}:-1".encode("ascii") for x in [13, 19, 23, 26, 27, 30, 31, 34, 35, 38, 39, 42, 43, 46, 47]] 21 | 22 | solve_for_addr = [] 23 | 24 | for i in hardest_path: 25 | addrs = gdb.get_addr(i) 26 | solve_for_addr.append(addrs[0]) 27 | solve_for_addr.append(addrs[1]) 28 | 29 | print(solve_for_addr) 30 | # for line in open(floc).read().split("\n"): 31 | # line_arr = line.split("\t") 32 | # if len(line_arr) > 2 and ("nop" in line_arr[-1]): 33 | # continue 34 | # if record_next or driver_part: 35 | # if len(line_arr) > 1: 36 | # pc = int("0x" + line_arr[0].split(":")[0].replace(" ", ""), 16) 37 | # self.__non_comp_bb.add(pc) 38 | # record_next = False 39 | # 40 | # # todo: parse instead of direct match 41 | # if len(line_arr) > 2 and ("call" in line_arr[-1] or "jmp" in line_arr[-1]): 42 | # record_next = True 43 | # if len(line_arr) == 1: 44 | # record_next = True 45 | # driver_part = False 46 | # if len(line_arr) == 1 and ("
" in line_arr[-1] or "<__libc_csu" in line_arr[-1]): 47 | # driver_part = True 48 | 49 | 50 | p = angr.Project('harness') 51 | 52 | 53 | state = p.factory.full_init_state( 54 | args=['./harness', 'v'], 55 | 56 | add_options=angr.options.unicorn, 57 | stdin=angr.SimFile, 58 | ) 59 | state.options.add(angr.options.LAZY_SOLVES) 60 | 61 | while True: 62 | succ = state.step() 63 | if len(succ.successors) == 2: 64 | break 65 | state = succ.successors[0] 66 | 67 | 68 | print(state) 69 | -------------------------------------------------------------------------------- /qemu_instr.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import instr_interface 4 | import pwn 5 | import config 6 | import os 7 | import angr 8 | import utils 9 | 10 | 11 | class STDINExecutorQEMU: 12 | def __init__(self, qemu_bin, uninstrumented_path): 13 | self.instance = None 14 | self.qemu_bin = qemu_bin 15 | self.uninstrumented_path = uninstrumented_path 16 | 17 | def run_bin(self): 18 | if self.instance is None: 19 | self.restart_bin() 20 | 21 | def restart_bin(self): 22 | self.instance = pwn.process([self.qemu_bin, self.uninstrumented_path]) 23 | 24 | def execute_test_case(self, corpus_content): 25 | self.instance.sendline(corpus_content) 26 | 27 | # convert output to a list of PCs 28 | def dump_trace(self): 29 | out = self.instance.recvuntil(b'EXECDONE', timeout=config.QEMU_TIMEOUT) 30 | return map(lambda x: int(x[8:]), filter(lambda x: x.startswith(b"digfuzz"), out.split(b"\n"))) 31 | 32 | 33 | class QEMUInstr(instr_interface.Instrumentation): 34 | def __init__(self, executor, dumper_path, shm_key="/digfuzz"): 35 | super().__init__(executor) 36 | self.corpus_traces = {} 37 | self.dumper_path = dumper_path 38 | self.shm_key = shm_key 39 | self.visited_trace = set() 40 | self.__non_comp_bb = set() 41 | self.__grab_non_comp_bb() 42 | self.executor.run_bin() 43 | 44 | # QEMU would dump all BBs, even those call / jmp, we have to know this! 45 | def __grab_non_comp_bb(self): 46 | floc = f"/tmp/objdump-log-{time.time()}" 47 | os.system(f"objdump -d {self.executor.uninstrumented_path} > {floc}") 48 | driver_part = False 49 | for line in open(floc).read().split("\n"): 50 | line_arr = line.split("\t") 51 | if (len(line_arr) > 2 and ("call" in line_arr[-1] or 52 | "jmp" in line_arr[-1] or 53 | "leave" in line_arr[-1] or 54 | "ret" in line_arr[-1]))\ 55 | or driver_part: 56 | if len(line_arr) > 1: 57 | pc = int("0x" + line_arr[0].split(":")[0].replace(" ", ""), 16) 58 | self.__non_comp_bb.add(pc) 59 | # todo: parse instead of direct match 60 | if len(line_arr) == 1: 61 | driver_part = False 62 | if len(line_arr) == 1 and ("
" in line_arr[-1] 63 | or "<__libc_csu" in line_arr[-1] 64 | or "@plt>" in line_arr[-1]): 65 | pc = int("0x" + line_arr[0].split(" ")[0].replace(" ", ""), 16) 66 | self.__non_comp_bb.add(pc) 67 | driver_part = True 68 | 69 | def __add_to_execution_tree(self, trace, file_name): 70 | last_addr = 0 71 | if file_name not in self.corpus_traces: 72 | self.corpus_traces[file_name] = [] 73 | hit_counts = {} 74 | trace = list(trace) 75 | for addr in trace: 76 | edge = (last_addr, addr) 77 | hit_counts[edge] = hit_counts[edge] + 1 if edge in hit_counts else 1 78 | 79 | # init node 80 | if addr not in self.execution_tree: 81 | self.execution_tree[addr] = instr_interface.Node() 82 | self.execution_tree[addr].addr = addr 83 | self.execution_tree[addr].addr_range = (addr, addr + self.basic_block[addr]) 84 | addresses = range(addr, addr + self.basic_block[addr]) 85 | if self.__non_comp_bb.isdisjoint(addresses): 86 | self.execution_tree[addr].is_comp = True 87 | 88 | # update children 89 | current_node = self.execution_tree[addr] 90 | if last_addr != 0 and addr not in self.execution_tree[last_addr].children: 91 | self.execution_tree[last_addr].children.add(addr) 92 | current_node.led_by = file_name 93 | self.corpus_traces[file_name].append(current_node) 94 | last_addr = addr 95 | 96 | # setup edge hitcount 97 | last_addr = None 98 | for addr in trace: 99 | if not last_addr: 100 | last_addr = addr 101 | continue 102 | edge = (last_addr, addr) 103 | hit_count = hit_counts[edge] 104 | last_node = self.execution_tree[last_addr] 105 | if addr in last_node.max_encounter_child: 106 | last_node.max_encounter_child[addr].add(hit_count) 107 | else: 108 | last_node.max_encounter_child[addr] = {hit_count} 109 | last_addr = addr 110 | 111 | def __build_execution_tree(self, new_testcase_filenames): 112 | for filename in new_testcase_filenames: 113 | with open(filename, "rb") as fp: 114 | corpus_content = fp.read() 115 | self.executor.execute_test_case(corpus_content) 116 | try: 117 | trace = self.executor.dump_trace() 118 | except EOFError as e: 119 | print(f"[Crash] Found crash {filename} with error {e}, skipping") 120 | self.executor.restart_bin() 121 | continue 122 | self.__add_to_execution_tree(trace, filename) 123 | 124 | def __add_qemu_bb_dumper_out_to_tree(self, content): 125 | for line in content.split(b"\n"): 126 | if not line: 127 | continue 128 | line_arr = line.split(b",") 129 | pc, counter = int(line_arr[0]), int(line_arr[1]) 130 | if pc in self.execution_tree: 131 | self.execution_tree[pc].visit_count = counter 132 | 133 | def call_dumper(self): 134 | return pwn.process(self.dumper_path, env={ 135 | "DIGFUZZ_SHM": self.shm_key 136 | }).recvall(timeout=config.QEMU_TIMEOUT) 137 | 138 | def __increment_tree_visit_count(self): 139 | content = self.call_dumper() 140 | self.__add_qemu_bb_dumper_out_to_tree(content) 141 | 142 | def build_execution_tree(self, new_testcase_filenames): 143 | self.__build_execution_tree(new_testcase_filenames) 144 | self.__increment_tree_visit_count() 145 | self.assign_prob() 146 | return self.execution_tree 147 | 148 | 149 | if __name__ == "__main__": 150 | # 151 | # code_loc = "test.c" 152 | # os.system(f"gcc -c {code_loc} -no-pie -o {code_loc}.o") 153 | # 154 | # utils.setup() 155 | # utils.compile_harness(f"{code_loc}.o") 156 | uninstrumented_executable = "harness" 157 | 158 | _executor = STDINExecutorQEMU(config.QEMU_BIN, uninstrumented_executable) 159 | qemu = QEMUInstr(_executor, config.DUMPER_PATH, shm_key=config.SHM_KEY) 160 | with open("/tmp/qemu1-test", "wb+") as fp: 161 | fp.write(b"kbcdeffx") 162 | with open("/tmp/qemu2-test", "wb+") as fp: 163 | fp.write(b"") 164 | 165 | def get_new_testcase_filenames(): 166 | result = [] 167 | for i in os.listdir(config.AFL_CORPUS_PATH): 168 | if i.startswith("."): 169 | continue 170 | result.append(f"{config.AFL_CORPUS_PATH}/{i}") 171 | return result 172 | qemu.build_execution_tree(get_new_testcase_filenames()) 173 | qemu.dump_execution_tree() 174 | -------------------------------------------------------------------------------- /qemu_qsym_harness.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | // libFuzzer interface is thin, so we don't include any libFuzzer headers. 7 | int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); 8 | __attribute__((weak)) int LLVMFuzzerInitialize(int *argc, char ***argv); 9 | 10 | #define kMaxAflInputSize (1 * 1024 * 1024) 11 | static uint8_t AflInputBuf[kMaxAflInputSize]; 12 | 13 | int main(int argc, char **argv) { 14 | if (LLVMFuzzerInitialize) LLVMFuzzerInitialize(&argc, &argv); 15 | if (argc > 1) { // for afl 16 | size_t l = read(0, AflInputBuf, kMaxAflInputSize); 17 | LLVMFuzzerTestOneInput(AflInputBuf, l); 18 | } else 19 | while (1) { // for qsym 20 | size_t l = read(0, AflInputBuf, kMaxAflInputSize); 21 | LLVMFuzzerTestOneInput(AflInputBuf, l); 22 | printf("EXECDONE\n"); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /qemuafl_bb_aggr_shm.diff: -------------------------------------------------------------------------------- 1 | diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c 2 | index bedf41b51d..43b0b57e8e 100644 3 | --- a/accel/tcg/cpu-exec.c 4 | +++ b/accel/tcg/cpu-exec.c 5 | @@ -53,7 +53,19 @@ 6 | #ifndef AFL_QEMU_STATIC_BUILD 7 | #include 8 | #endif 9 | - 10 | +#include 11 | +#include 12 | +#include 13 | +#include 14 | +#include 15 | +#include 16 | +#include 17 | +#include 18 | +#include 19 | +#include 20 | +#include 21 | +#include 22 | +#include 23 | /*************************** 24 | * VARIOUS AUXILIARY STUFF * 25 | ***************************/ 26 | @@ -305,7 +317,39 @@ static void afl_map_shm_fuzz(void) { 27 | 28 | } 29 | 30 | + 31 | +struct digfuzz_bucket 32 | +{ 33 | + unsigned long int PC; 34 | + uint32_t count; 35 | +}; 36 | + 37 | +typedef struct digfuzz_bucket digfuzz_bucket_t; 38 | + 39 | +#define HASHMAP_SIZE 1000000 40 | + 41 | + 42 | +#define SHM_SIZE (HASHMAP_SIZE*sizeof(digfuzz_bucket_t)) 43 | + 44 | +digfuzz_bucket_t* digfuzz_hitcounts; 45 | + 46 | void afl_setup(void) { 47 | + // digfuzz instrumentation 48 | + 49 | + // create hashmap 50 | + char* shm_key_df = getenv("DIGFUZZ_SHM"); 51 | + printf("digfuzz init"); 52 | + if (shm_key_df){ 53 | + int fd = shm_open(shm_key_df, O_RDWR | O_CREAT, S_IREAD | S_IWRITE); 54 | + if (fd <= -1) { 55 | + fprintf(stderr, "[DigFuzz] Failed to create shared memory region\n"); 56 | + return -1; 57 | + } 58 | + ftruncate(fd, SHM_SIZE); 59 | + digfuzz_hitcounts = mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 60 | + close(fd); 61 | + } 62 | + // done 63 | 64 | char *id_str = getenv(SHM_ENV_VAR), *inst_r = getenv("AFL_INST_RATIO"); 65 | 66 | diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h 67 | index 754d54a9a7..229540a4a3 100644 68 | --- a/accel/tcg/tcg-runtime.h 69 | +++ b/accel/tcg/tcg-runtime.h 70 | @@ -355,3 +355,5 @@ DEF_HELPER_FLAGS_2(qasan_store4, TCG_CALL_NO_RWG, void, env, tl) 71 | DEF_HELPER_FLAGS_2(qasan_store8, TCG_CALL_NO_RWG, void, env, tl) 72 | DEF_HELPER_FLAGS_1(qasan_shadow_stack_push, TCG_CALL_NO_RWG, void, tl) 73 | DEF_HELPER_FLAGS_1(qasan_shadow_stack_pop, TCG_CALL_NO_RWG, void, tl) 74 | + 75 | +DEF_HELPER_FLAGS_1(digfuzz_cnt, TCG_CALL_NO_RWG, void, tl) 76 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c 77 | index 2fa6b0a851..871a519390 100644 78 | --- a/accel/tcg/translate-all.c 79 | +++ b/accel/tcg/translate-all.c 80 | @@ -71,8 +71,28 @@ 81 | 82 | __thread int cur_block_is_good; 83 | 84 | -void HELPER(afl_maybe_log)(target_ulong cur_loc) { 85 | +#define HASHMAP_HASH_INIT 2166136261u 86 | +#define HASHMAP_SIZE 1000000 87 | + 88 | +struct digfuzz_bucket 89 | +{ 90 | + unsigned long int PC; 91 | + uint32_t count; 92 | +}; 93 | + 94 | +typedef struct digfuzz_bucket digfuzz_bucket_t; 95 | + 96 | 97 | +extern digfuzz_bucket_t* digfuzz_hitcounts; 98 | + 99 | +void HELPER(digfuzz_cnt)(target_ulong cur_loc) { 100 | + // printf("%d\n", cur_loc); 101 | + uint32_t cur_loc_hash = cur_loc % HASHMAP_SIZE; 102 | + digfuzz_hitcounts[cur_loc_hash].PC = cur_loc; 103 | + digfuzz_hitcounts[cur_loc_hash].count++; 104 | +} 105 | + 106 | +void HELPER(afl_maybe_log)(target_ulong cur_loc) { 107 | register uintptr_t afl_idx = cur_loc ^ afl_prev_loc; 108 | 109 | INC_AFL_AREA(afl_idx); 110 | @@ -100,7 +120,8 @@ static void afl_gen_trace(target_ulong cur_loc) { 111 | 112 | if (!cur_block_is_good) 113 | return; 114 | - 115 | + if (digfuzz_hitcounts) 116 | + gen_helper_digfuzz_cnt(tcg_const_tl(cur_loc)); 117 | /* Looks like QEMU always maps to fixed locations, so ASLR is not a 118 | concern. Phew. But instruction addresses may be aligned. Let's mangle 119 | the value to get something quasi-uniform. */ 120 | -------------------------------------------------------------------------------- /qemuafl_bb_stdout.diff: -------------------------------------------------------------------------------- 1 | diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h 2 | index 754d54a9a7..229540a4a3 100644 3 | --- a/accel/tcg/tcg-runtime.h 4 | +++ b/accel/tcg/tcg-runtime.h 5 | @@ -355,3 +355,5 @@ DEF_HELPER_FLAGS_2(qasan_store4, TCG_CALL_NO_RWG, void, env, tl) 6 | DEF_HELPER_FLAGS_2(qasan_store8, TCG_CALL_NO_RWG, void, env, tl) 7 | DEF_HELPER_FLAGS_1(qasan_shadow_stack_push, TCG_CALL_NO_RWG, void, tl) 8 | DEF_HELPER_FLAGS_1(qasan_shadow_stack_pop, TCG_CALL_NO_RWG, void, tl) 9 | + 10 | +DEF_HELPER_FLAGS_1(digfuzz_cnt, TCG_CALL_NO_RWG, void, tl) 11 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c 12 | index 2fa6b0a851..189e8f211f 100644 13 | --- a/accel/tcg/translate-all.c 14 | +++ b/accel/tcg/translate-all.c 15 | @@ -71,6 +71,10 @@ 16 | 17 | __thread int cur_block_is_good; 18 | 19 | +void HELPER(digfuzz_cnt)(target_ulong cur_loc) { 20 | + printf("digfuzz@%lu\n", cur_loc); 21 | +} 22 | + 23 | void HELPER(afl_maybe_log)(target_ulong cur_loc) { 24 | 25 | register uintptr_t afl_idx = cur_loc ^ afl_prev_loc; 26 | @@ -101,6 +105,8 @@ static void afl_gen_trace(target_ulong cur_loc) { 27 | if (!cur_block_is_good) 28 | return; 29 | 30 | + gen_helper_digfuzz_cnt(tcg_const_tl(cur_loc)); 31 | + 32 | /* Looks like QEMU always maps to fixed locations, so ASLR is not a 33 | concern. Phew. But instruction addresses may be aligned. Let's mangle 34 | the value to get something quasi-uniform. */ 35 | -------------------------------------------------------------------------------- /qsym.diff: -------------------------------------------------------------------------------- 1 | diff --git a/Dockerfile b/Dockerfile 2 | index 3c99259..c9425af 100644 3 | --- a/Dockerfile 4 | +++ b/Dockerfile 5 | @@ -10,3 +10,6 @@ COPY . /workdir/qsym 6 | 7 | RUN ./setup.sh 8 | RUN pip install . 9 | +RUN cd /workdir/qsym/qsym/pintool && make -j${nproc} 10 | +RUN mkdir /tmp/in 11 | +RUN mkdir /tmp/out 12 | \ No newline at end of file 13 | diff --git a/qsym/pintool/solver.cpp b/qsym/pintool/solver.cpp 14 | index 147334f..b50c5a1 100644 15 | --- a/qsym/pintool/solver.cpp 16 | +++ b/qsym/pintool/solver.cpp 17 | @@ -127,9 +127,9 @@ void Solver::add(z3::expr expr) { 18 | z3::check_result Solver::check() { 19 | uint64_t before = getTimeStamp(); 20 | z3::check_result res; 21 | - LOG_STAT( 22 | - "SMT: { \"solving_time\": " + decstr(solving_time_) + ", " 23 | - + "\"total_time\": " + decstr(before - start_time_) + " }\n"); 24 | + // LOG_STAT( 25 | + // "SMT: { \"solving_time\": " + decstr(solving_time_) + ", " 26 | + // + "\"total_time\": " + decstr(before - start_time_) + " }\n"); 27 | // LOG_DEBUG("Constraints: " + solver_.to_smt2() + "\n"); 28 | try { 29 | res = solver_.check(); 30 | @@ -146,15 +146,14 @@ z3::check_result Solver::check() { 31 | return res; 32 | } 33 | 34 | +void Solver::fake_check(){ 35 | + // printf("BB\n%lu%sBBEND\n", last_pc_, solver_.to_smt2().c_str()); 36 | +} 37 | + 38 | + 39 | bool Solver::checkAndSave(const std::string& postfix) { 40 | - if (check() == z3::sat) { 41 | - saveValues(postfix); 42 | - return true; 43 | - } 44 | - else { 45 | - LOG_DEBUG("unsat\n"); 46 | - return false; 47 | - } 48 | + fake_check(); 49 | + return true; 50 | } 51 | 52 | void Solver::addJcc(ExprRef e, bool taken, ADDRINT pc) { 53 | @@ -174,49 +173,41 @@ void Solver::addJcc(ExprRef e, bool taken, ADDRINT pc) { 54 | 55 | // check duplication before really solving something, 56 | // some can be handled by range based constraint solving 57 | - bool is_interesting; 58 | - if (pc == 0) { 59 | - // If addJcc() is called by special case, then rely on last_interested_ 60 | - is_interesting = last_interested_; 61 | - } 62 | - else 63 | - is_interesting = isInterestingJcc(e, taken, pc); 64 | 65 | - if (is_interesting) 66 | - negatePath(e, taken); 67 | - addConstraint(e, taken, is_interesting); 68 | + negatePath(e, taken); 69 | + addConstraint(e, taken, false); 70 | } 71 | 72 | void Solver::addAddr(ExprRef e, ADDRINT addr) { 73 | - llvm::APInt v(e->bits(), addr); 74 | - addAddr(e, v); 75 | +// llvm::APInt v(e->bits(), addr); 76 | +// addAddr(e, v); 77 | } 78 | 79 | void Solver::addAddr(ExprRef e, llvm::APInt addr) { 80 | - if (e->isConcrete()) 81 | - return; 82 | - 83 | - if (last_interested_) { 84 | - reset(); 85 | - // TODO: add optimize in z3 86 | - syncConstraints(e); 87 | - if (check() != z3::sat) 88 | - return; 89 | - z3::expr &z3_expr = e->toZ3Expr(); 90 | - 91 | - // TODO: add unbound case 92 | - z3::expr min_expr = getMinValue(z3_expr); 93 | - z3::expr max_expr = getMaxValue(z3_expr); 94 | - solveOne(z3_expr == min_expr); 95 | - solveOne(z3_expr == max_expr); 96 | - } 97 | - 98 | - addValue(e, addr); 99 | +// if (e->isConcrete()) 100 | +// return; 101 | +// 102 | +// if (last_interested_) { 103 | +// reset(); 104 | +// // TODO: add optimize in z3 105 | +// syncConstraints(e); 106 | +// if (check() != z3::sat) 107 | +// return; 108 | +// z3::expr &z3_expr = e->toZ3Expr(); 109 | +// 110 | +// // TODO: add unbound case 111 | +// z3::expr min_expr = getMinValue(z3_expr); 112 | +// z3::expr max_expr = getMaxValue(z3_expr); 113 | +// solveOne(z3_expr == min_expr); 114 | +// solveOne(z3_expr == max_expr); 115 | +// } 116 | +// 117 | +// addValue(e, addr); 118 | } 119 | 120 | void Solver::addValue(ExprRef e, ADDRINT val) { 121 | - llvm::APInt v(e->bits(), val); 122 | - addValue(e, v); 123 | +// llvm::APInt v(e->bits(), val); 124 | +// addValue(e, v); 125 | } 126 | 127 | void Solver::addValue(ExprRef e, llvm::APInt val) { 128 | @@ -234,31 +225,31 @@ void Solver::addValue(ExprRef e, llvm::APInt val) { 129 | } 130 | 131 | void Solver::solveAll(ExprRef e, llvm::APInt val) { 132 | - if (last_interested_) { 133 | - std::string postfix = ""; 134 | - ExprRef expr_val = g_expr_builder->createConstant(val, e->bits()); 135 | - ExprRef expr_concrete = g_expr_builder->createBinaryExpr(Equal, e, expr_val); 136 | - 137 | - reset(); 138 | - syncConstraints(e); 139 | - addToSolver(expr_concrete, false); 140 | - 141 | - if (check() != z3::sat) { 142 | - // Optimistic solving 143 | - reset(); 144 | - addToSolver(expr_concrete, false); 145 | - postfix = "optimistic"; 146 | - } 147 | - 148 | - z3::expr z3_expr = e->toZ3Expr(); 149 | - while(true) { 150 | - if (!checkAndSave(postfix)) 151 | - break; 152 | - z3::expr value = getPossibleValue(z3_expr); 153 | - add(value != z3_expr); 154 | - } 155 | - } 156 | - addValue(e, val); 157 | +// if (last_interested_) { 158 | +// std::string postfix = ""; 159 | +// ExprRef expr_val = g_expr_builder->createConstant(val, e->bits()); 160 | +// ExprRef expr_concrete = g_expr_builder->createBinaryExpr(Equal, e, expr_val); 161 | +// 162 | +// reset(); 163 | +// syncConstraints(e); 164 | +// addToSolver(expr_concrete, false); 165 | +// 166 | +// if (check() != z3::sat) { 167 | +// // Optimistic solving 168 | +// reset(); 169 | +// addToSolver(expr_concrete, false); 170 | +// postfix = "optimistic"; 171 | +// } 172 | +// 173 | +// z3::expr z3_expr = e->toZ3Expr(); 174 | +// while(true) { 175 | +// if (!checkAndSave(postfix)) 176 | +// break; 177 | +// z3::expr value = getPossibleValue(z3_expr); 178 | +// add(value != z3_expr); 179 | +// } 180 | +// } 181 | +// addValue(e, val); 182 | } 183 | 184 | UINT8 Solver::getInput(ADDRINT index) { 185 | @@ -293,58 +284,41 @@ void Solver::readInput() { 186 | inputs_.push_back((UINT8)ch); 187 | } 188 | 189 | -std::vector Solver::getConcreteValues() { 190 | - // TODO: change from real input 191 | - z3::model m = solver_.get_model(); 192 | - unsigned num_constants = m.num_consts(); 193 | - std::vector values = inputs_; 194 | - for (unsigned i = 0; i < num_constants; i++) { 195 | - z3::func_decl decl = m.get_const_decl(i); 196 | - z3::expr e = m.get_const_interp(decl); 197 | - z3::symbol name = decl.name(); 198 | - 199 | - if (name.kind() == Z3_INT_SYMBOL) { 200 | - int value = e.get_numeral_int(); 201 | - values[name.to_int()] = (UINT8)value; 202 | - } 203 | - } 204 | - return values; 205 | -} 206 | 207 | void Solver::saveValues(const std::string& postfix) { 208 | - std::vector values = getConcreteValues(); 209 | - 210 | - // If no output directory is specified, then just print it out 211 | - if (out_dir_.empty()) { 212 | - printValues(values); 213 | - return; 214 | - } 215 | - 216 | - std::string fname = out_dir_+ "/" + toString6digit(num_generated_); 217 | - // Add postfix to record where it is genereated 218 | - if (!postfix.empty()) 219 | - fname = fname + "-" + postfix; 220 | - ofstream of(fname, std::ofstream::out | std::ofstream::binary); 221 | - LOG_INFO("New testcase: " + fname + "\n"); 222 | - if (of.fail()) 223 | - LOG_FATAL("Unable to open a file to write results\n"); 224 | - 225 | - // TODO: batch write 226 | - for (unsigned i = 0; i < values.size(); i++) { 227 | - char val = values[i]; 228 | - of.write(&val, sizeof(val)); 229 | - } 230 | - 231 | - of.close(); 232 | - num_generated_++; 233 | +// std::vector values = getConcreteValues(); 234 | +// 235 | +// // If no output directory is specified, then just print it out 236 | +// if (out_dir_.empty()) { 237 | +// printValues(values); 238 | +// return; 239 | +// } 240 | +// 241 | +// std::string fname = out_dir_+ "/" + toString6digit(num_generated_); 242 | +// // Add postfix to record where it is genereated 243 | +// if (!postfix.empty()) 244 | +// fname = fname + "-" + postfix; 245 | +// ofstream of(fname, std::ofstream::out | std::ofstream::binary); 246 | +// LOG_INFO("New testcase: " + fname + "\n"); 247 | +// if (of.fail()) 248 | +// LOG_FATAL("Unable to open a file to write results\n"); 249 | +// 250 | +// // TODO: batch write 251 | +// for (unsigned i = 0; i < values.size(); i++) { 252 | +// char val = values[i]; 253 | +// of.write(&val, sizeof(val)); 254 | +// } 255 | +// 256 | +// of.close(); 257 | +// num_generated_++; 258 | } 259 | 260 | void Solver::printValues(const std::vector& values) { 261 | - fprintf(stderr, "[INFO] Values: "); 262 | - for (unsigned i = 0; i < values.size(); i++) { 263 | - fprintf(stderr, "\\x%02X", values[i]); 264 | - } 265 | - fprintf(stderr, "\n"); 266 | +// fprintf(stderr, "[INFO] Values: "); 267 | +// for (unsigned i = 0; i < values.size(); i++) { 268 | +// fprintf(stderr, "\\x%02X", values[i]); 269 | +// } 270 | +// fprintf(stderr, "\n"); 271 | } 272 | 273 | z3::expr Solver::getPossibleValue(z3::expr& z3_expr) { 274 | @@ -352,40 +326,19 @@ z3::expr Solver::getPossibleValue(z3::expr& z3_expr) { 275 | return m.eval(z3_expr); 276 | } 277 | 278 | -z3::expr Solver::getMinValue(z3::expr& z3_expr) { 279 | - push(); 280 | - z3::expr value(context_); 281 | - while (true) { 282 | - if (checkAndSave()) { 283 | - value = getPossibleValue(z3_expr); 284 | - solver_.add(z3::ult(z3_expr, value)); 285 | - } 286 | - else 287 | - break; 288 | - } 289 | - pop(); 290 | - return value; 291 | -} 292 | 293 | -z3::expr Solver::getMaxValue(z3::expr& z3_expr) { 294 | - push(); 295 | - z3::expr value(context_); 296 | - while (true) { 297 | - if (checkAndSave()) { 298 | - value = getPossibleValue(z3_expr); 299 | - solver_.add(z3::ugt(z3_expr, value)); 300 | - } 301 | - else 302 | - break; 303 | - } 304 | - pop(); 305 | - return value; 306 | +void Solver::addToSolver(ExprRef e, bool taken) { 307 | + if (!taken) 308 | + e = g_expr_builder->createLNot(e); 309 | + add(e->toZ3Expr()); 310 | } 311 | 312 | -void Solver::addToSolver(ExprRef e, bool taken) { 313 | - e->simplify(); 314 | +void Solver::addToSolverDump(ExprRef e, bool taken) { 315 | if (!taken) 316 | e = g_expr_builder->createLNot(e); 317 | + z3::solver s(context_); 318 | + s.add(e->toZ3Expr()); 319 | + std::cout << "FLIPME" << last_pc_ << "\n" << s.to_smt2() << "FLIPMEEND\n"; 320 | add(e->toZ3Expr()); 321 | } 322 | 323 | @@ -518,21 +471,25 @@ bool Solver::isInterestingJcc(ExprRef rel_expr, bool taken, ADDRINT pc) { 324 | void Solver::negatePath(ExprRef e, bool taken) { 325 | reset(); 326 | syncConstraints(e); 327 | - addToSolver(e, !taken); 328 | - bool sat = checkAndSave(); 329 | - if (!sat) { 330 | - reset(); 331 | - // optimistic solving 332 | - addToSolver(e, !taken); 333 | - checkAndSave("optimistic"); 334 | - } 335 | + addToSolverDump(e, taken); 336 | + 337 | + 338 | + 339 | + 340 | + fake_check(); 341 | +// if (!sat) { 342 | +// reset(); 343 | +// // optimistic solving 344 | +// addToSolver(e, !taken); 345 | +// checkAndSave("optimistic"); 346 | +// } 347 | } 348 | 349 | void Solver::solveOne(z3::expr z3_expr) { 350 | - push(); 351 | - add(z3_expr); 352 | - checkAndSave(); 353 | - pop(); 354 | +// push(); 355 | +// add(z3_expr); 356 | +// checkAndSave(); 357 | +// pop(); 358 | } 359 | 360 | void Solver::checkFeasible() { 361 | diff --git a/qsym/pintool/solver.h b/qsym/pintool/solver.h 362 | index e37f091..a7dbcbf 100644 363 | --- a/qsym/pintool/solver.h 364 | +++ b/qsym/pintool/solver.h 365 | @@ -65,15 +65,13 @@ protected: 366 | void checkOutDir(); 367 | void readInput(); 368 | 369 | - std::vector getConcreteValues(); 370 | void saveValues(const std::string& postfix); 371 | void printValues(const std::vector& values); 372 | 373 | z3::expr getPossibleValue(z3::expr& z3_expr); 374 | - z3::expr getMinValue(z3::expr& z3_expr); 375 | - z3::expr getMaxValue(z3::expr& z3_expr); 376 | 377 | void addToSolver(ExprRef e, bool taken); 378 | + void addToSolverDump(ExprRef e, bool taken); 379 | void syncConstraints(ExprRef e); 380 | 381 | void addConstraint(ExprRef e, bool taken, bool is_interesting); 382 | @@ -88,6 +86,8 @@ protected: 383 | void solveOne(z3::expr); 384 | 385 | void checkFeasible(); 386 | + 387 | + void fake_check(); 388 | }; 389 | 390 | extern Solver* g_solver; 391 | -------------------------------------------------------------------------------- /qsym_ce.py: -------------------------------------------------------------------------------- 1 | import re 2 | import time 3 | 4 | import config 5 | import z3 6 | 7 | 8 | # hacky smt parsing 9 | def remove_assert(string): 10 | cter = -1 11 | met_assert = False 12 | while 1: 13 | cter += 1 14 | if met_assert and string[cter] == 40: 15 | string = string[cter:] 16 | break 17 | if string[cter] == 40 and string[cter + 1:cter + 7] == b'assert': # b'(' 18 | met_assert = True 19 | continue 20 | cter = len(string) 21 | while 1: 22 | cter -= 1 23 | if string[cter] == 41: # b')' 24 | string = string[:cter] 25 | break 26 | return string 27 | 28 | 29 | def negate_smt2(string): 30 | string = remove_assert(string) 31 | return f'(assert (not {string.decode("utf-8")}))'.encode('utf-8') 32 | 33 | 34 | def get_bv_value(smt): 35 | match_res = re.compile(r"(k![0-9]+) \(\)").findall(smt) 36 | if len(match_res) < 1: 37 | assert False, "Can't find declare-fun" 38 | return sorted(list(set([int(x.replace("k!", "")) for x in match_res]))) 39 | 40 | 41 | def solve_smt(smt, orig): 42 | if type(orig) == bytes: 43 | orig = [x for x in orig] 44 | else: 45 | orig = [ord(x) for x in orig] 46 | s = z3.Solver() 47 | s.set("timeout", config.QSYM_TIMEOUT) 48 | s.from_string(smt) 49 | bvs = get_bv_value(smt) 50 | try: 51 | s.check() 52 | m = s.model() 53 | result = [] 54 | known = set() 55 | for d in m.decls(): 56 | known.add(d.name()) 57 | result.append((int(d.name().replace('k!', "")), m[d])) 58 | for idx, sol in result: 59 | idx = bvs.index(idx) 60 | if idx >= len(orig): 61 | print(smt) 62 | print(idx, len(orig)) 63 | orig.append(int(sol.__str__())) 64 | continue 65 | orig[idx] = int(sol.__str__()) 66 | return bytes(orig) 67 | except Exception as e: 68 | print(f"[Solver] UNSAT {e}") 69 | 70 | 71 | def to_smt2(bvs, constraints): 72 | smt2 = b'' 73 | for bv in bvs: 74 | smt2 += bv + b'\n' 75 | smt2 += constraints + b'\n' 76 | return smt2.decode('utf-8') 77 | 78 | 79 | class QSYMConcolicExecutor: 80 | def __init__(self, uninstrumented_executable): 81 | self.uninstrumented_executable = uninstrumented_executable 82 | self.cmp_constraint = {} 83 | self.execution_tree = None 84 | self.qsym_instance = None 85 | self.__run_qsym() 86 | self.__cache = {} 87 | 88 | def __set_cache(self, testcase_fn, bvs, cmp_constraints): 89 | self.__cache[testcase_fn] = (bvs, cmp_constraints) 90 | 91 | def __get_cache(self, testcase_fn, handler, *args): 92 | if testcase_fn in self.__cache: 93 | return self.__cache[testcase_fn] 94 | return handler(*args) 95 | 96 | def update_exec_tree(self, tree): 97 | self.execution_tree = tree 98 | 99 | def __run_qsym(self): 100 | if config.USE_SSH: 101 | config.QSYM_SSH_CONN.process(["mkdir", "in"]) 102 | config.QSYM_SSH_CONN.process(["mkdir", "out"]) 103 | self.qsym_instance = config.QSYM_SSH_CONN.process([config.PIN_SH, '-ifeellucky', '-t', 104 | config.QSYM_OBJECT_PATH, '-i', 'in', '-o', 'out', '--', 105 | self.uninstrumented_executable]) 106 | else: 107 | self.qsym_instance = config.QSYM_SSH_CONN.process(config.QSYM_CMD + [config.PIN_SH, '-ifeellucky', '-t', 108 | config.QSYM_OBJECT_PATH, '-i', 109 | '/tmp/in', '-o', 110 | '/tmp/out', '--', 111 | uninstrumented_executable]) 112 | self.qsym_instance.recvuntil(b"[INFO] IMG: /lib/x86_64-linux-gnu/libc.so.6") 113 | print("[QSYM] Ready") 114 | 115 | def __get_result(self, corpus_content): 116 | try: 117 | # todo: dont restart qsym 118 | self.qsym_instance.kill() 119 | self.__run_qsym() 120 | # 121 | 122 | self.qsym_instance.sendline(corpus_content) 123 | start_time = time.time() 124 | result = self.qsym_instance.recvuntil("EXECDONE", timeout=config.QSYM_TIMEOUT) 125 | end_time = time.time() 126 | print(f"[QSYM] Spent {end_time - start_time}s dumping constraints") 127 | except EOFError as e: 128 | print(f"[QSYM] Crashed, ignoring content {corpus_content}") 129 | self.__run_qsym() 130 | return b'' 131 | return result 132 | 133 | @staticmethod 134 | def __parse_output_flipme(lines: [bytes]): 135 | recording = False 136 | current_pc = -1 137 | current_constraint = [] 138 | cmp_constraint = {} 139 | bvs = set() 140 | for i in lines: 141 | if len(i) > 8 and i[:9] == b'FLIPMEEND': 142 | recording = False 143 | continue 144 | if recording: 145 | if b'(declare-fun' in i: 146 | bvs.add(i) 147 | continue 148 | if b'(check-sat)' in i: 149 | cmp_constraint[current_pc] = b'\n'.join(current_constraint) 150 | current_constraint = [] 151 | continue 152 | current_constraint.append(i) 153 | if len(i) > 5 and i[:6] == b'FLIPME': 154 | current_pc = int(i[6:]) 155 | recording = True 156 | return bvs, cmp_constraint 157 | 158 | def __parse_output(self, lines: bytes): 159 | lines = lines.split(b"\n") 160 | # bvs, path_constraints = self.__parse_output_path(lines) 161 | bvs, cmp_constraint = self.__parse_output_flipme(lines) 162 | return bvs, cmp_constraint 163 | 164 | # get a list of [cmp constraints] that has pc in pc_wanted_range 165 | @staticmethod 166 | def __find_last_cmp_pc(cmp_constraints: dict, pc_wanted_range, nth=0): 167 | result = [] 168 | for pc in cmp_constraints: 169 | if pc_wanted_range[0] < pc < pc_wanted_range[1]: 170 | if nth == 0: 171 | result.append(pc) 172 | nth -= 1 173 | return result 174 | 175 | # find a path node to stop => find a cmp cons => flip cmp cons & concat 176 | def __get_constraint(self, flip_pc_range, bvs, cmp_constraints, nth=0): 177 | cmp_cons_pcs = self.__find_last_cmp_pc(cmp_constraints, flip_pc_range, nth=nth) 178 | if len(cmp_cons_pcs) == 0: 179 | print("[QSYM] Trying to flip constant branch") 180 | for pc in cmp_cons_pcs: 181 | path = b"\n".join([cmp_constraints[_pc] for _pc in cmp_constraints if _pc < pc]) 182 | yield to_smt2(bvs, path + b'\n' + negate_smt2(cmp_constraints[pc])) 183 | 184 | # conduct concolic execution and flip constraints in flip_pc_range while preserving others 185 | def flip_it(self, testcase_content, flip_pc_range, nth=0, qemu_instr_obj=None, testcase_fn=""): 186 | if qemu_instr_obj and testcase_fn: 187 | qemu_instr_obj.add_solved_path(testcase_fn, flip_pc_range, nth=nth) 188 | if testcase_fn not in self.__cache: 189 | result = self.__get_result(testcase_content) 190 | bvs, cmp_constraint = self.__parse_output(result) 191 | self.__set_cache(testcase_fn, bvs, cmp_constraint) 192 | else: 193 | bvs, cmp_constraint = self.__cache[testcase_fn] 194 | has_solution = False 195 | 196 | for to_be_solved in self.__get_constraint(flip_pc_range, bvs, cmp_constraint, nth=nth): 197 | if len(to_be_solved) == 0: 198 | print("[Solver] Conc exec gives nothing") 199 | continue 200 | solution = solve_smt(to_be_solved, testcase_content) 201 | if not solution: 202 | continue 203 | print(f"[QSYM] SAT") 204 | has_solution = True 205 | yield solution 206 | if not has_solution and qemu_instr_obj and testcase_fn: 207 | qemu_instr_obj.add_unsolvable_path(testcase_fn, flip_pc_range, nth=nth) 208 | 209 | 210 | if __name__ == "__main__": 211 | # import os 212 | import utils 213 | # 214 | # code_loc = "test.c" 215 | # os.system(f"gcc -c {code_loc} -no-pie -o {code_loc}.o") 216 | # 217 | # utils.setup() 218 | # utils.compile_harness(f"{code_loc}.o") 219 | # 220 | uninstrumented_executable = "/tmp/qsym_harness" 221 | 222 | utils.copy_file_to_qsym_host("harness", uninstrumented_executable) 223 | utils.qsym_host_provide_permission(uninstrumented_executable) 224 | 225 | qsym = QSYMConcolicExecutor(uninstrumented_executable) 226 | for sol in qsym.flip_it(open('./out/m/queue/id:100004,src:100001', "rb").read(), (4247400, 4247489), nth=0): 227 | print(sol) 228 | -------------------------------------------------------------------------------- /run_afl.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import sys 3 | import os 4 | 5 | import config 6 | import utils 7 | 8 | if len(sys.argv) > 1 and sys.argv[1].endswith(".o"): 9 | utils.compile_harness(sys.argv[1]) 10 | elif len(sys.argv) > 1: 11 | os.system(f"cp {sys.argv[1]} ./harness") 12 | else: 13 | code_loc = "test.c" 14 | utils.setup() 15 | os.system(f"gcc -g -c {code_loc} -no-pie -o {code_loc}.o") 16 | utils.compile_harness(f"{code_loc}.o") 17 | 18 | processes = [] 19 | 20 | 21 | def run_async_cmd(cmd): 22 | print(cmd) 23 | p = multiprocessing.Process(target=lambda _: os.system(cmd), args=(-1,)) 24 | p.daemon = True 25 | p.start() 26 | processes.append(p) 27 | 28 | 29 | run_async_cmd(f"DIGFUZZ_SHM=/{config.SHM_KEY} " 30 | f"{config.AFL_FUZZ_PATH} -Q -i {config.AFL_IN_PATH} -o {config.AFL_OUT_PATH} -M {config.AFL_MASTER_NAME} " 31 | f"-- {config.LOCAL_UNINSTRUMENTED_EXEC_PATH} fuzz") 32 | 33 | for i in range(config.AFL_NUM_SLAVE): 34 | run_async_cmd( 35 | f"DIGFUZZ_SHM=/{config.SHM_KEY} " 36 | f"{config.AFL_FUZZ_PATH} -Q -i {config.AFL_IN_PATH} -o {config.AFL_OUT_PATH} -S {config.AFL_SLAVE_NAME}_{i} " 37 | f"-- {config.LOCAL_UNINSTRUMENTED_EXEC_PATH} fuzz") 38 | 39 | 40 | for p in processes: 41 | p.join() 42 | -------------------------------------------------------------------------------- /symcc_ce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shouc/digfuzz/c57b68870122a91429afa55ed3f1a6493e7fdf95/symcc_ce.py -------------------------------------------------------------------------------- /targets/bzip2/build.sh: -------------------------------------------------------------------------------- 1 | git clone https://github.com/libigl/libigl.git target 2 | pushd target 3 | mkdir build 4 | pushd build 5 | cmake -DLIBIGL_WITH_OPENGL=OFF \ 6 | -DLIBIGL_WITH_OPENGL_GLFW=OFF \ 7 | -DLIBIGL_WITH_OPENGL_GLFW_IMGUI=OFF \ 8 | -DLIBIGL_WITH_COMISO=OFF \ 9 | -DLIBIGL_WITH_EMBREE=OFF \ 10 | -DLIBIGL_WITH_PNG=OFF \ 11 | -DLIBIGL_WITH_TETGEN=OFF \ 12 | -DLIBIGL_WITH_TRIANGLE=OFF \ 13 | -DLIBIGL_WITH_PREDICATES=OFF \ 14 | -DLIBIGL_WITH_XML=OFF \ 15 | -DLIBIGL_BUILD_TESTS=OFF \ 16 | .. 17 | CXXFLAGS=-no-pie make -j$(nproc) 18 | popd 19 | 20 | g++ -no-pie make -DIGL_STATIC_LIBRARY \ 21 | -I./include \ 22 | -isystem ./include \ 23 | -isystem ./external/eigen \ 24 | -c ../fuzz.cc -o fuzzer.o 25 | 26 | g++ -no-pie fuzzer.o ./build/libigl.a ../../../driver.o -o harness 27 | -------------------------------------------------------------------------------- /targets/bzip2/fuzz.c: -------------------------------------------------------------------------------- 1 | #include "bzlib.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | extern int BZ2_bzBuffToBuffDecompress(char* dest, 8 | unsigned int* destLen, 9 | char* source, 10 | unsigned int sourceLen, 11 | int small, 12 | int verbosity); 13 | 14 | int 15 | LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) 16 | { 17 | int r, small; 18 | unsigned int nZ, nOut; 19 | 20 | // See: https://github.com/google/bzip2-rpc/blob/master/unzcrash.c#L39 21 | nOut = size*2; 22 | char *outbuf = malloc(nOut); 23 | small = size % 2; 24 | r = BZ2_bzBuffToBuffDecompress(outbuf, &nOut, (char *)data, size, 25 | small, /*verbosity=*/0); 26 | 27 | if (r != BZ_OK) { 28 | #ifdef __DEBUG__ 29 | fprintf(stdout, "Decompression error: %d\n", r); 30 | #endif 31 | } 32 | free(outbuf); 33 | return 0; 34 | } -------------------------------------------------------------------------------- /targets/json-c/build.sh: -------------------------------------------------------------------------------- 1 | git clone --depth 1 https://github.com/json-c/json-c.git target 2 | cd target 3 | mkdir json-c-build 4 | cd json-c-build 5 | cmake -DBUILD_SHARED_LIBS=OFF .. 6 | CFLAGS=-no-pie CXXFLAGS=-no-pie make -j$(nproc) 7 | cd .. 8 | g++ -std=c++11 -no-pie -I. -I./json-c-build fuzz/tokener_parse_ex_fuzzer.cc ./json-c-build/libjson-c.a ../../../driver.o -o harness -lbsd 9 | cp harness .. 10 | cd .. -------------------------------------------------------------------------------- /targets/wasm3/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shouc/digfuzz/c57b68870122a91429afa55ed3f1a6493e7fdf95/targets/wasm3/build.sh -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | typedef char string[8]; 11 | 12 | int target(const char* s1, size_t s) { 13 | if (s < 8) return -1; 14 | int i; 15 | 16 | const char* s2 = "abcdefgx"; 17 | if(*s1) { 18 | if (*s1 == *s2) { 19 | s1++; 20 | s2++; 21 | if(*s1) { 22 | if (*s1 == *s2) { 23 | s1++; 24 | s2++; 25 | if(*s1) { 26 | if (*s1 == *s2) { 27 | s1++; 28 | s2++; 29 | if(*s1) { 30 | if (*s1 == *s2) { 31 | s1++; 32 | s2++; 33 | if(*s1) { 34 | if (*s1 == *s2) { 35 | s1++; 36 | s2++; 37 | if(*s1) { 38 | if (*s1 == *s2) { 39 | s1++; 40 | s2++; 41 | if(*s1) { 42 | if (*s1 == *s2) { 43 | s1++; 44 | s2++; 45 | if(*s1) { 46 | if (*s1 == *s2) { 47 | assert(0); 48 | } 49 | } 50 | } 51 | } 52 | } 53 | } 54 | } 55 | } 56 | } 57 | } 58 | } 59 | } 60 | } 61 | } 62 | } 63 | } 64 | return 0; 65 | } 66 | 67 | int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size){ 68 | target(Data, Size); 69 | return 0; 70 | } -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | ./build_afl.bash 2 | clang -c -g angr_harness.c -o angr_harness.o 3 | clang -fsanitize-coverage=bb,trace-pc-guard,indirect-calls,trace-cmp,no-prune -fsanitize=address -g test.cc FuzzingEngine.a -o test.fuzz 4 | clang -g angr_harness.o test.cc -o test.angr 5 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import paramiko 4 | import config 5 | 6 | 7 | def copy_file_to_qsym_host(local_path, remote_path): 8 | ssh_client = paramiko.SSHClient() 9 | ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 10 | ssh_client.connect(hostname=config.QSYM_HOST, username=config.QSYM_UN, key_filename=config.QSYM_KEYFILE) 11 | ftp_client = ssh_client.open_sftp() 12 | ftp_client.put(local_path, remote_path) 13 | ftp_client.close() 14 | 15 | 16 | def qsym_host_provide_permission(remote_path): 17 | ssh_client = paramiko.SSHClient() 18 | ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 19 | ssh_client.connect(hostname=config.QSYM_HOST, username=config.QSYM_UN, key_filename=config.QSYM_KEYFILE) 20 | print(f"chmod +x {remote_path}") 21 | ssh_client.exec_command(f"chmod +x {remote_path}") 22 | 23 | 24 | def setup(): 25 | os.system("gcc -c -g qemu_qsym_harness.c -no-pie -o driver.o") 26 | 27 | 28 | def compile_harness(obj_loc): 29 | os.system(f"gcc {obj_loc} driver.o -no-pie -g -o {config.LOCAL_UNINSTRUMENTED_EXEC_PATH}") 30 | --------------------------------------------------------------------------------