├── tools ├── program_generator │ ├── target │ │ ├── debug │ │ │ ├── .cargo-lock │ │ │ ├── .fingerprint │ │ │ │ ├── rand_core-e25943b7452e6ba6 │ │ │ │ │ ├── dep-lib-rand_core │ │ │ │ │ ├── lib-rand_core │ │ │ │ │ ├── invoked.timestamp │ │ │ │ │ └── lib-rand_core.json │ │ │ │ ├── program_generator-97a5f69bcae5c740 │ │ │ │ │ ├── lib-program_generator │ │ │ │ │ ├── invoked.timestamp │ │ │ │ │ ├── dep-lib-program_generator │ │ │ │ │ ├── lib-program_generator.json │ │ │ │ │ └── output-lib-program_generator │ │ │ │ ├── program_generator-9f3b4da5251921fa │ │ │ │ │ ├── bin-program_generator │ │ │ │ │ ├── invoked.timestamp │ │ │ │ │ └── bin-program_generator.json │ │ │ │ ├── rand_xoshiro-826023e1e10f2594 │ │ │ │ │ ├── dep-lib-rand_xoshiro │ │ │ │ │ ├── lib-rand_xoshiro │ │ │ │ │ ├── invoked.timestamp │ │ │ │ │ └── lib-rand_xoshiro.json │ │ │ │ ├── program_generator-55bed8c75a40676d │ │ │ │ │ ├── test-bin-program_generator │ │ │ │ │ ├── invoked.timestamp │ │ │ │ │ └── test-bin-program_generator.json │ │ │ │ └── program_generator-71b575e69d616725 │ │ │ │ │ ├── test-lib-program_generator │ │ │ │ │ ├── invoked.timestamp │ │ │ │ │ ├── dep-test-lib-program_generator │ │ │ │ │ ├── test-lib-program_generator.json │ │ │ │ │ └── output-test-lib-program_generator │ │ │ ├── incremental │ │ │ │ ├── program_generator-1szwh0nemnj33 │ │ │ │ │ ├── s-gbtrymypnm-1krtpfm.lock │ │ │ │ │ ├── s-gbtrynbds6-1471w9c.lock │ │ │ │ │ ├── s-gbtryo2kyy-1rhu27j.lock │ │ │ │ │ ├── s-gbtrymypnm-1krtpfm-working │ │ │ │ │ │ └── dep-graph.part.bin │ │ │ │ │ ├── s-gbtrynbds6-1471w9c-working │ │ │ │ │ │ └── dep-graph.part.bin │ │ │ │ │ └── s-gbtryo2kyy-1rhu27j-working │ │ │ │ │ │ └── dep-graph.part.bin │ │ │ │ └── program_generator-3ph2xy5u2qtvt │ │ │ │ │ ├── s-gbtrymyg1k-hbm5ig.lock │ │ │ │ │ ├── s-gbtrynbgb3-y2hf5i.lock │ │ │ │ │ ├── s-gbtryo1cy8-7215i9.lock │ │ │ │ │ ├── s-gbtrymyg1k-hbm5ig-working │ │ │ │ │ └── dep-graph.part.bin │ │ │ │ │ ├── s-gbtrynbgb3-y2hf5i-working │ │ │ │ │ └── dep-graph.part.bin │ │ │ │ │ └── s-gbtryo1cy8-7215i9-working │ │ │ │ │ └── dep-graph.part.bin │ │ │ └── deps │ │ │ │ ├── librand_core-e25943b7452e6ba6.rmeta │ │ │ │ ├── librand_xoshiro-826023e1e10f2594.rmeta │ │ │ │ ├── program_generator-71b575e69d616725.d │ │ │ │ ├── program_generator-97a5f69bcae5c740.d │ │ │ │ ├── rand_core-e25943b7452e6ba6.d │ │ │ │ └── rand_xoshiro-826023e1e10f2594.d │ │ ├── CACHEDIR.TAG │ │ └── .rustc_info.json │ ├── src │ │ ├── main.rs │ │ ├── rng.rs │ │ ├── compile.rs │ │ └── lib.rs │ ├── Cargo.toml │ ├── Cargo.lock │ └── README.md ├── trace_full_regs.sh └── scraper.py ├── test_cases ├── aa ├── simple_test ├── generated_program ├── simple_test.c └── b.c ├── resources ├── graph.png ├── domtree.png ├── afl_test.png ├── printout.png ├── sfuzz_test.png ├── domtree.dot ├── ssa.dot └── notes.md ├── Cargo.toml ├── LICENSE.md ├── README.md ├── docs ├── benchmarking.md ├── memory_management.md ├── fuzzing.md └── code_gen.md ├── src ├── syscalls.rs ├── pretty_printing.rs ├── main.rs ├── config.rs ├── mutator.rs └── irgraph.rs └── fuzzing.md /tools/program_generator/target/debug/.cargo-lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_cases/aa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/test_cases/aa -------------------------------------------------------------------------------- /resources/graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/resources/graph.png -------------------------------------------------------------------------------- /resources/domtree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/resources/domtree.png -------------------------------------------------------------------------------- /resources/afl_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/resources/afl_test.png -------------------------------------------------------------------------------- /resources/printout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/resources/printout.png -------------------------------------------------------------------------------- /resources/sfuzz_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/resources/sfuzz_test.png -------------------------------------------------------------------------------- /test_cases/simple_test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/test_cases/simple_test -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/rand_core-e25943b7452e6ba6/dep-lib-rand_core: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_cases/generated_program: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/test_cases/generated_program -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-97a5f69bcae5c740/lib-program_generator: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-9f3b4da5251921fa/bin-program_generator: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/rand_core-e25943b7452e6ba6/lib-rand_core: -------------------------------------------------------------------------------- 1 | e156a19efd59b159 -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/rand_xoshiro-826023e1e10f2594/dep-lib-rand_xoshiro: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrymypnm-1krtpfm.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrynbds6-1471w9c.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtryo2kyy-1rhu27j.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrymyg1k-hbm5ig.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrynbgb3-y2hf5i.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtryo1cy8-7215i9.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-55bed8c75a40676d/test-bin-program_generator: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-71b575e69d616725/test-lib-program_generator: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/rand_xoshiro-826023e1e10f2594/lib-rand_xoshiro: -------------------------------------------------------------------------------- 1 | b86706463e38296f -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/rand_core-e25943b7452e6ba6/invoked.timestamp: -------------------------------------------------------------------------------- 1 | This file has an mtime of when this was started. -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/rand_xoshiro-826023e1e10f2594/invoked.timestamp: -------------------------------------------------------------------------------- 1 | This file has an mtime of when this was started. -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-55bed8c75a40676d/invoked.timestamp: -------------------------------------------------------------------------------- 1 | This file has an mtime of when this was started. -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-71b575e69d616725/invoked.timestamp: -------------------------------------------------------------------------------- 1 | This file has an mtime of when this was started. -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-97a5f69bcae5c740/invoked.timestamp: -------------------------------------------------------------------------------- 1 | This file has an mtime of when this was started. -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-9f3b4da5251921fa/invoked.timestamp: -------------------------------------------------------------------------------- 1 | This file has an mtime of when this was started. -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-97a5f69bcae5c740/dep-lib-program_generator: -------------------------------------------------------------------------------- 1 |  2 | src/lib.rs 3 | src/rng.rssrc/compile.rs -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-71b575e69d616725/dep-test-lib-program_generator: -------------------------------------------------------------------------------- 1 |  2 | src/lib.rs 3 | src/rng.rssrc/compile.rs -------------------------------------------------------------------------------- /tools/program_generator/target/debug/deps/librand_core-e25943b7452e6ba6.rmeta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/deps/librand_core-e25943b7452e6ba6.rmeta -------------------------------------------------------------------------------- /tools/program_generator/target/debug/deps/librand_xoshiro-826023e1e10f2594.rmeta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/deps/librand_xoshiro-826023e1e10f2594.rmeta -------------------------------------------------------------------------------- /tools/program_generator/src/main.rs: -------------------------------------------------------------------------------- 1 | use program_generator::{ 2 | compile::compile, 3 | Program, 4 | }; 5 | 6 | fn main() { 7 | let program = Program::create_program(); 8 | compile(program); 9 | } 10 | -------------------------------------------------------------------------------- /tools/program_generator/target/CACHEDIR.TAG: -------------------------------------------------------------------------------- 1 | Signature: 8a477f597d28d172789f06886806bc55 2 | # This file is a cache directory tag created by cargo. 3 | # For information about cache directory tags see https://bford.info/cachedir/ 4 | -------------------------------------------------------------------------------- /tools/program_generator/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "program_generator" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | rand_xoshiro = "0.6.0" 10 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrymypnm-1krtpfm-working/dep-graph.part.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrymypnm-1krtpfm-working/dep-graph.part.bin -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrynbds6-1471w9c-working/dep-graph.part.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrynbds6-1471w9c-working/dep-graph.part.bin -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtryo2kyy-1rhu27j-working/dep-graph.part.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtryo2kyy-1rhu27j-working/dep-graph.part.bin -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrymyg1k-hbm5ig-working/dep-graph.part.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrymyg1k-hbm5ig-working/dep-graph.part.bin -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrynbgb3-y2hf5i-working/dep-graph.part.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrynbgb3-y2hf5i-working/dep-graph.part.bin -------------------------------------------------------------------------------- /tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtryo1cy8-7215i9-working/dep-graph.part.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtryo1cy8-7215i9-working/dep-graph.part.bin -------------------------------------------------------------------------------- /resources/domtree.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | 0 [ label = "Label @ 0x1000\l " ] 3 | 1 [ label = "Label @ 0x100C\l " ] 4 | 2 [ label = "Label @ 0x1018\l " ] 5 | 3 [ label = "Label @ 0x1028\l " ] 6 | 4 [ label = "Label @ 0x1034\l " ] 7 | 0 -> 1 [ ] 8 | 0 -> 3 [ ] 9 | 0 -> 2 [ ] 10 | 2 -> 4 [ ] 11 | } 12 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/rand_core-e25943b7452e6ba6/lib-rand_core.json: -------------------------------------------------------------------------------- 1 | {"rustc":15069735523673833430,"features":"[]","target":14554634924080965175,"profile":3735503092003429423,"path":15235452744257207087,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/rand_core-e25943b7452e6ba6/dep-lib-rand_core"}}],"rustflags":[],"metadata":3275543247315060703,"config":2202906307356721367,"compile_kind":0} -------------------------------------------------------------------------------- /tools/program_generator/target/debug/deps/program_generator-71b575e69d616725.d: -------------------------------------------------------------------------------- 1 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/program_generator-71b575e69d616725.rmeta: src/lib.rs src/rng.rs src/compile.rs 2 | 3 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/program_generator-71b575e69d616725.d: src/lib.rs src/rng.rs src/compile.rs 4 | 5 | src/lib.rs: 6 | src/rng.rs: 7 | src/compile.rs: 8 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/deps/program_generator-97a5f69bcae5c740.d: -------------------------------------------------------------------------------- 1 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/program_generator-97a5f69bcae5c740.rmeta: src/lib.rs src/rng.rs src/compile.rs 2 | 3 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/program_generator-97a5f69bcae5c740.d: src/lib.rs src/rng.rs src/compile.rs 4 | 5 | src/lib.rs: 6 | src/rng.rs: 7 | src/compile.rs: 8 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/rand_xoshiro-826023e1e10f2594/lib-rand_xoshiro.json: -------------------------------------------------------------------------------- 1 | {"rustc":15069735523673833430,"features":"[]","target":12700624167688026310,"profile":3735503092003429423,"path":4654909132326164140,"deps":[[1974880700686797828,"rand_core",false,6463045886076344033]],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/rand_xoshiro-826023e1e10f2594/dep-lib-rand_xoshiro"}}],"rustflags":[],"metadata":10630710798610827097,"config":2202906307356721367,"compile_kind":0} -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-97a5f69bcae5c740/lib-program_generator.json: -------------------------------------------------------------------------------- 1 | {"rustc":15069735523673833430,"features":"[]","target":5404270602498279417,"profile":7309141686862299243,"path":17523903030608720598,"deps":[[15191189118421006103,"rand_xoshiro",false,8009995252369090488]],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/program_generator-97a5f69bcae5c740/dep-lib-program_generator"}}],"rustflags":[],"metadata":7797948686568424061,"config":2202906307356721367,"compile_kind":0} -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-71b575e69d616725/test-lib-program_generator.json: -------------------------------------------------------------------------------- 1 | {"rustc":15069735523673833430,"features":"[]","target":5404270602498279417,"profile":1021633075455700787,"path":17523903030608720598,"deps":[[15191189118421006103,"rand_xoshiro",false,8009995252369090488]],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/program_generator-71b575e69d616725/dep-test-lib-program_generator"}}],"rustflags":[],"metadata":7797948686568424061,"config":2202906307356721367,"compile_kind":0} -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-9f3b4da5251921fa/bin-program_generator.json: -------------------------------------------------------------------------------- 1 | {"rustc":15069735523673833430,"features":"[]","target":6365363593584776832,"profile":7309141686862299243,"path":1684066648322511884,"deps":[[11263871373454206889,"program_generator",false,11722842124356941761],[15191189118421006103,"rand_xoshiro",false,8009995252369090488]],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/program_generator-9f3b4da5251921fa/dep-bin-program_generator"}}],"rustflags":[],"metadata":7797948686568424061,"config":2202906307356721367,"compile_kind":0} -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-55bed8c75a40676d/test-bin-program_generator.json: -------------------------------------------------------------------------------- 1 | {"rustc":15069735523673833430,"features":"[]","target":6365363593584776832,"profile":1021633075455700787,"path":1684066648322511884,"deps":[[11263871373454206889,"program_generator",false,11722842124356941761],[15191189118421006103,"rand_xoshiro",false,8009995252369090488]],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/program_generator-55bed8c75a40676d/dep-test-bin-program_generator"}}],"rustflags":[],"metadata":7797948686568424061,"config":2202906307356721367,"compile_kind":0} -------------------------------------------------------------------------------- /test_cases/simple_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) { 5 | char buf[100]; 6 | int fd = open(argv[1], O_RDONLY); 7 | 8 | read(fd, buf, 100); 9 | 10 | if (buf[0] == 0x41) { 11 | if (buf[1] == 0x42) { 12 | if (buf[2] == 0x43) { 13 | if (buf[3] == 0x44) { 14 | if (buf[4] == 0x45) { 15 | if (buf[5] == 0x46) { 16 | *(unsigned long*)0x4141414141414141 = 0; 17 | } 18 | } 19 | } 20 | } 21 | } 22 | } 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /test_cases/b.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) { 5 | char buf[100]; 6 | int fd = open(argv[1], O_RDONLY); 7 | 8 | read(fd, buf, 100); 9 | 10 | int v = buf[8] * 123 / 17; 11 | 12 | if (buf[0] == v) { 13 | if (buf[1] == 0x42) { 14 | if (buf[2] == 0x43) { 15 | if (buf[3] == 0x44) { 16 | if (buf[4] == 0x45) { 17 | if (buf[5] == 0x46) { 18 | *(unsigned long*)0x4141414141414141 = 0; 19 | } 20 | } 21 | } 22 | } 23 | } 24 | } 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /tools/program_generator/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "program_generator" 7 | version = "0.1.0" 8 | dependencies = [ 9 | "rand_xoshiro", 10 | ] 11 | 12 | [[package]] 13 | name = "rand_core" 14 | version = "0.6.3" 15 | source = "registry+https://github.com/rust-lang/crates.io-index" 16 | checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" 17 | 18 | [[package]] 19 | name = "rand_xoshiro" 20 | version = "0.6.0" 21 | source = "registry+https://github.com/rust-lang/crates.io-index" 22 | checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" 23 | dependencies = [ 24 | "rand_core", 25 | ] 26 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sfuzz" 3 | version = "0.2.0" 4 | edition = "2021" 5 | 6 | [profile.release] 7 | lto = true 8 | codegen-units = 1 9 | panic = 'abort' 10 | 11 | [dependencies] 12 | elfparser = { git = "https://github.com/seal9055/local_crates/", branch = "main" } 13 | my_libs = { git = "https://github.com/seal9055/local_crates/", branch = "main" } 14 | clap = { version = "3.1.18", features = ["derive"] } 15 | rustc-hash = "1.1.0" 16 | num-traits = "0.2.14" 17 | byteorder = "1.4.3" 18 | fasthash = "0.4.0" 19 | num-format = "0.4.0" 20 | parking_lot = "0.12.0" 21 | rand_xoshiro = "0.6.0" 22 | console = "0.15.0" 23 | parse_int = "0.6.0" 24 | reqwest = { version = "*", features = ["json"] } 25 | serde = { version = "*", features = ["derive"] } 26 | 27 | [dependencies.iced-x86] 28 | version = "1.15.0" 29 | features = ["code_asm"] 30 | -------------------------------------------------------------------------------- /resources/ssa.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | 0 [ label = " Label @ 0x1000\l\l0x001000 A0(1) = 0x14\l0x001004 A1(1) = 0xA\l0x001008 if A0(1) == A1(1) (0x100C, 0x1028)\l\l " ] 3 | 1 [ label = "\l Label @ 0x100C\l\l0x00100C A2(1) = A0(1) + A1(1)\l0x001010 A3(1) = 0x1\l0x001014 Jmp 0x1018\l\l " ] 4 | 2 [ label = "\l Label @ 0x1018\l\l0x000000 A3(2) = φ(A3(1), A3(3))\l0x000000 A2(2) = φ(A2(1), A2(3))\l0x001018 Z1(1) = 0x5\l0x000000 A4(1) = A2(2) + Z1(1)\l0x00101C Z1(2) = 0x1\l0x000000 A5(1) = A4(1) + Z1(2)\l0x001020 Z1(3) = 0x0\l0x000000 A6(1) = A3(2) + Z1(3)\l0x001024 Jmp 0x1034\l\l " ] 5 | 3 [ label = "\l Label @ 0x1028\l\l0x001028 A2(3) = A0(1) - A1(1)\l0x00102C A3(3) = 0x2\l0x001030 Jmp 0x1018\l\l " ] 6 | 4 [ label = "\l Label @ 0x1034\l\l0x001034 Ret\l\l " ] 7 | 0 -> 1 [ ] 8 | 0 -> 3 [ ] 9 | 1 -> 2 [ ] 10 | 2 -> 4 [ ] 11 | 3 -> 2 [ ] 12 | } 13 | -------------------------------------------------------------------------------- /tools/program_generator/README.md: -------------------------------------------------------------------------------- 1 | # Program Generator 2 | 3 | SFUZZ currently only supports RISC-V and I had some trouble compiling many actual programs to RISC-V so that I could test SFUZZ. Due to this, I decided to write this program generator so that I could randomly generate different test cases of varying complexity. This testing method is far from perfect, but it was a fun little side project while working on SFUZZ, that I think is still decent to quickly test basic fuzzer capabilities. 4 | 5 | The generated program reads in a specified number of bytes from an input file into a buffer which is then passed into various different comparisons. These eventually lead to a crash if enough checks are passed. The complexity of the generated program can easily be modified through a configuration variable and enables the generation of programs ranging from ~100 to several million lines of code depending on the users testing preferences. The generated comparison depth is also handled based on this complexity. -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 seal9055 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tools/trace_full_regs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | ''' 4 | Script used to trace the register state of a program running under qemu and format it so that I can 5 | just run `diff` on the traces generated by qemu and my fuzzer to find bugs in my JIT implementation 6 | ''' 7 | 8 | # Requires target program and the pc to stop tracing at 9 | if [ "$#" -ne 2 ]; then 10 | echo "Usage: ./trace_full_regs.sh " 11 | exit 12 | fi 13 | 14 | # gdb-script that runs `info reg` on every instruction until $last_pc 15 | echo "set pagination off" >> script 16 | echo "set logging file gdb.output" >> script 17 | echo "set logging on" >> script 18 | echo "" >> script 19 | echo "target remote :1234" >> script 20 | echo "" >> script 21 | echo "while(\$pc != $2)" >> script 22 | echo " info reg" >> script 23 | echo " si" >> script 24 | echo "end" >> script 25 | echo "" >> script 26 | echo "set logging off" >> script 27 | echo "quit" >> script 28 | 29 | # Run qemu with gdb 30 | qemu-riscv64 -g 1234 ./$1 & 31 | gdb-multiarch ./$1 --command=script 32 | 33 | # Format output 34 | cat gdb.output | grep -v 'in' | tr -s ' ' | cut -d ' ' -f1,2 | cut -d$'\t' -f1 > trace 35 | sed -i '/pc/ s/$/\n/' trace 36 | 37 | rm gdb.output 38 | rm script 39 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/deps/rand_core-e25943b7452e6ba6.d: -------------------------------------------------------------------------------- 1 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/rand_core-e25943b7452e6ba6.rmeta: /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/lib.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/block.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/error.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/impls.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/le.rs 2 | 3 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/rand_core-e25943b7452e6ba6.d: /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/lib.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/block.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/error.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/impls.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/le.rs 4 | 5 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/lib.rs: 6 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/block.rs: 7 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/error.rs: 8 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/impls.rs: 9 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/le.rs: 10 | -------------------------------------------------------------------------------- /tools/program_generator/target/.rustc_info.json: -------------------------------------------------------------------------------- 1 | {"rustc_fingerprint":15743540453928543069,"outputs":{"4614504638168534921":{"success":true,"status":"","code":0,"stdout":"rustc 1.67.0-nightly (11ebe6512 2022-11-01)\nbinary: rustc\ncommit-hash: 11ebe6512b4c77633c59f8dcdd421df3b79d1a9f\ncommit-date: 2022-11-01\nhost: x86_64-unknown-linux-gnu\nrelease: 1.67.0-nightly\nLLVM version: 15.0.4\n","stderr":""},"10376369925670944939":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/seal/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"llvm14-builtins-abi\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_has_atomic_equal_alignment=\"16\"\ntarget_has_atomic_equal_alignment=\"32\"\ntarget_has_atomic_equal_alignment=\"64\"\ntarget_has_atomic_equal_alignment=\"8\"\ntarget_has_atomic_equal_alignment=\"ptr\"\ntarget_has_atomic_load_store=\"16\"\ntarget_has_atomic_load_store=\"32\"\ntarget_has_atomic_load_store=\"64\"\ntarget_has_atomic_load_store=\"8\"\ntarget_has_atomic_load_store=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_thread_local\ntarget_vendor=\"unknown\"\nunix\n","stderr":""},"15697416045686424142":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n","stderr":""}},"successes":{}} -------------------------------------------------------------------------------- /tools/program_generator/src/rng.rs: -------------------------------------------------------------------------------- 1 | //! This is used to expose an api for an rng-object that can safely be used in a global variable. 2 | //! This required locks, which makes it quite slow. 3 | 4 | use std::sync::Mutex; 5 | 6 | use rand_xoshiro::rand_core::RngCore; 7 | use rand_xoshiro::Xoroshiro64Star; 8 | use rand_xoshiro::rand_core::SeedableRng; 9 | 10 | /// Used to seed randomness based on cpu timestamp 11 | fn rdtsc() -> u64 { 12 | unsafe { std::arch::x86_64::_rdtsc() } 13 | } 14 | 15 | /// Randomness exposing api that can be used in a global and uses a faster rand implementation than 16 | /// the standard Rand crate 17 | pub struct Rng { 18 | rng: Mutex, 19 | } 20 | 21 | impl Rng { 22 | /// Create a new Rand object 23 | pub fn new() -> Self { 24 | Self { 25 | rng: Mutex::new(Xoroshiro64Star::seed_from_u64(rdtsc())) 26 | } 27 | } 28 | 29 | /// Return a random number 30 | pub fn gen(&self) -> usize { 31 | self.rng.lock().unwrap().next_u64() as usize 32 | } 33 | 34 | /// Return a random byte-string, ascii-range 1-0xff (inclusive) 35 | pub fn next_string(&self, max_length: usize, min: usize, max: usize) -> Vec { 36 | // Create a random byte-string 37 | let rand_bytes = (1..max_length).map(|_| { 38 | self.gen_range(min, max) as u8 39 | }).collect::>(); 40 | 41 | rand_bytes 42 | } 43 | 44 | /// Return 2 random 32-bit unsigned integers 45 | pub fn get2_rand(&self) -> (usize, usize) { 46 | let tmp = self.rng.lock().unwrap().next_u64(); 47 | ((tmp & 0xffffffff) as usize, (tmp >> 32) as usize) 48 | } 49 | 50 | /// Return a random number up to `max` 51 | pub fn next_num(&self, max: usize) -> usize { 52 | if max == 0 { 53 | return 0; 54 | } 55 | self.rng.lock().unwrap().next_u64() as usize % max 56 | } 57 | 58 | /// Return a random value in the range min..max, inclusive of min and exclusive of max 59 | pub fn gen_range(&self, min: usize, max: usize) -> usize { 60 | if max == min { 61 | return min; 62 | } 63 | 64 | (self.rng.lock().unwrap().next_u64() as usize % (max - min)) + min 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-97a5f69bcae5c740/output-lib-program_generator: -------------------------------------------------------------------------------- 1 | {"message":"`#![feature]` may not be used on the stable release channel","code":{"code":"E0554","explanation":"Feature attributes are only allowed on the nightly release channel. Stable or\nbeta compilers will not comply.\n\nErroneous code example:\n\n```ignore (depends on release channel)\n#![feature(lang_items)] // error: `#![feature]` may not be used on the\n // stable release channel\n```\n\nIf you need the feature, make sure to use a nightly release of the compiler\n(but be warned that the feature may be removed or altered in the future).\n"},"level":"error","spans":[{"file_name":"src/lib.rs","byte_start":11,"byte_end":24,"line_start":1,"line_end":1,"column_start":12,"column_end":25,"is_primary":true,"text":[{"text":"#![feature(variant_count)]","highlight_start":12,"highlight_end":25}],"label":null,"suggested_replacement":null,"suggestion_applicability":null,"expansion":null}],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror[E0554]\u001b[0m\u001b[0m\u001b[1m: `#![feature]` may not be used on the stable release channel\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m--> \u001b[0m\u001b[0msrc/lib.rs:1:12\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m\u001b[1m\u001b[38;5;12m1\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m#![feature(variant_count)]\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;9m^^^^^^^^^^^^^\u001b[0m\n\n"} 2 | {"message":"`#![feature]` may not be used on the stable release channel","code":{"code":"E0554","explanation":"Feature attributes are only allowed on the nightly release channel. Stable or\nbeta compilers will not comply.\n\nErroneous code example:\n\n```ignore (depends on release channel)\n#![feature(lang_items)] // error: `#![feature]` may not be used on the\n // stable release channel\n```\n\nIf you need the feature, make sure to use a nightly release of the compiler\n(but be warned that the feature may be removed or altered in the future).\n"},"level":"error","spans":[{"file_name":"src/lib.rs","byte_start":38,"byte_end":47,"line_start":2,"line_end":2,"column_start":12,"column_end":21,"is_primary":true,"text":[{"text":"#![feature(once_cell)]","highlight_start":12,"highlight_end":21}],"label":null,"suggested_replacement":null,"suggestion_applicability":null,"expansion":null}],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror[E0554]\u001b[0m\u001b[0m\u001b[1m: `#![feature]` may not be used on the stable release channel\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m--> \u001b[0m\u001b[0msrc/lib.rs:2:12\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m\u001b[1m\u001b[38;5;12m2\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m#![feature(once_cell)]\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;9m^^^^^^^^^\u001b[0m\n\n"} 3 | {"message":"aborting due to 2 previous errors","code":null,"level":"error","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror\u001b[0m\u001b[0m\u001b[1m: aborting due to 2 previous errors\u001b[0m\n\n"} 4 | {"message":"For more information about this error, try `rustc --explain E0554`.","code":null,"level":"failure-note","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1mFor more information about this error, try `rustc --explain E0554`.\u001b[0m\n"} 5 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/.fingerprint/program_generator-71b575e69d616725/output-test-lib-program_generator: -------------------------------------------------------------------------------- 1 | {"message":"`#![feature]` may not be used on the stable release channel","code":{"code":"E0554","explanation":"Feature attributes are only allowed on the nightly release channel. Stable or\nbeta compilers will not comply.\n\nErroneous code example:\n\n```ignore (depends on release channel)\n#![feature(lang_items)] // error: `#![feature]` may not be used on the\n // stable release channel\n```\n\nIf you need the feature, make sure to use a nightly release of the compiler\n(but be warned that the feature may be removed or altered in the future).\n"},"level":"error","spans":[{"file_name":"src/lib.rs","byte_start":11,"byte_end":24,"line_start":1,"line_end":1,"column_start":12,"column_end":25,"is_primary":true,"text":[{"text":"#![feature(variant_count)]","highlight_start":12,"highlight_end":25}],"label":null,"suggested_replacement":null,"suggestion_applicability":null,"expansion":null}],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror[E0554]\u001b[0m\u001b[0m\u001b[1m: `#![feature]` may not be used on the stable release channel\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m--> \u001b[0m\u001b[0msrc/lib.rs:1:12\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m\u001b[1m\u001b[38;5;12m1\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m#![feature(variant_count)]\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;9m^^^^^^^^^^^^^\u001b[0m\n\n"} 2 | {"message":"`#![feature]` may not be used on the stable release channel","code":{"code":"E0554","explanation":"Feature attributes are only allowed on the nightly release channel. Stable or\nbeta compilers will not comply.\n\nErroneous code example:\n\n```ignore (depends on release channel)\n#![feature(lang_items)] // error: `#![feature]` may not be used on the\n // stable release channel\n```\n\nIf you need the feature, make sure to use a nightly release of the compiler\n(but be warned that the feature may be removed or altered in the future).\n"},"level":"error","spans":[{"file_name":"src/lib.rs","byte_start":38,"byte_end":47,"line_start":2,"line_end":2,"column_start":12,"column_end":21,"is_primary":true,"text":[{"text":"#![feature(once_cell)]","highlight_start":12,"highlight_end":21}],"label":null,"suggested_replacement":null,"suggestion_applicability":null,"expansion":null}],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror[E0554]\u001b[0m\u001b[0m\u001b[1m: `#![feature]` may not be used on the stable release channel\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m--> \u001b[0m\u001b[0msrc/lib.rs:2:12\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m\u001b[1m\u001b[38;5;12m2\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m#![feature(once_cell)]\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;9m^^^^^^^^^\u001b[0m\n\n"} 3 | {"message":"aborting due to 2 previous errors","code":null,"level":"error","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror\u001b[0m\u001b[0m\u001b[1m: aborting due to 2 previous errors\u001b[0m\n\n"} 4 | {"message":"For more information about this error, try `rustc --explain E0554`.","code":null,"level":"failure-note","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1mFor more information about this error, try `rustc --explain E0554`.\u001b[0m\n"} 5 | -------------------------------------------------------------------------------- /tools/program_generator/target/debug/deps/rand_xoshiro-826023e1e10f2594.d: -------------------------------------------------------------------------------- 1 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/rand_xoshiro-826023e1e10f2594.rmeta: /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/lib.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/common.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/splitmix64.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64star.rs 2 | 3 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/rand_xoshiro-826023e1e10f2594.d: /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/lib.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/common.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/splitmix64.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64star.rs 4 | 5 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/lib.rs: 6 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/common.rs: 7 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/splitmix64.rs: 8 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128starstar.rs: 9 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plusplus.rs: 10 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plus.rs: 11 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256starstar.rs: 12 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plusplus.rs: 13 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plus.rs: 14 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512starstar.rs: 15 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plusplus.rs: 16 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plus.rs: 17 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plus.rs: 18 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plusplus.rs: 19 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128starstar.rs: 20 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64starstar.rs: 21 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64star.rs: 22 | -------------------------------------------------------------------------------- /tools/program_generator/src/compile.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | Program, Type, Block, Operation 3 | }; 4 | 5 | /// Prints out the generated program to stdout 6 | const DEBUG_PRINT: bool = false; 7 | 8 | /// Can be set to false to not actually write cases to disk & compile while debugging 9 | const WRITE_TO_DISK: bool = true; 10 | 11 | /// Compiler used to compile the c-code once generated 12 | const COMPILER: &str = "gcc"; 13 | 14 | /// Contains information that the compiler functions require while generating the c code from the 15 | /// intermediate representation 16 | #[derive(Default, Debug)] 17 | pub struct Compiler { 18 | /// Scope depth, used to handle indentation 19 | cur_depth: usize, 20 | 21 | /// The actual c code that will be compiled 22 | code: String, 23 | 24 | /// The program being generated in an intermediate representation 25 | program: Program, 26 | } 27 | 28 | impl Compiler { 29 | /// Create a new compiler object 30 | pub fn new(program: Program) -> Self { 31 | Self { 32 | cur_depth: 0, 33 | code: String::new(), 34 | program, 35 | } 36 | } 37 | 38 | /// Debug print for the c-code 39 | pub fn print_code(&self) { 40 | println!("Generated the following code:"); 41 | println!("+-------------------------------------------------+"); 42 | println!("{}", self.code); 43 | println!("+-------------------------------------------------+"); 44 | } 45 | 46 | /// Insert indentation into the code based on the current depth 47 | fn insert_indent(&mut self) { 48 | for _ in 0..self.cur_depth { 49 | self.code.push_str(" "); 50 | } 51 | } 52 | 53 | /// Setup headers and begin actual program translation 54 | pub fn translate_program(&mut self) { 55 | self.code.push_str("#include \n"); 56 | self.code.push_str("#include \n"); 57 | self.code.push_str("#include \n\n"); 58 | 59 | for func in &self.program.function_list { 60 | let name = &func.name; 61 | if name != "main" { 62 | self.code.push_str(&format!("void {}(unsigned char *buf);\n", name)); 63 | } 64 | } 65 | self.code.push('\n'); 66 | 67 | // Traverse function list in reverse and translate each to C 68 | for i in 0..self.program.function_list.len() { 69 | self.translate_function_header(i); 70 | self.translate_function_body(i); 71 | } 72 | } 73 | 74 | /// Translate the header of a function to c 75 | fn translate_function_header(&mut self, index: usize) { 76 | let mut first = true; 77 | let func = self.program.function_list[index].clone(); 78 | self.code.push_str(&format!("{} {}(", func.typ, func.name)); 79 | for arg in func.arguments { 80 | if first { 81 | first = false; 82 | } else { 83 | self.code.push_str(", "); 84 | } 85 | if arg.0 == Type::Argv { 86 | self.code.push_str(&format!("char **{}", arg.1)); 87 | } else { 88 | self.code.push_str(&format!("{} {}", arg.0, arg.1)); 89 | } 90 | 91 | } 92 | self.code.push_str(")\n"); 93 | } 94 | 95 | /// Emit an operation to the c-code 96 | fn emit_operation(&mut self, operation: &Operation) { 97 | self.insert_indent(); 98 | match operation { 99 | Operation::If(_, b) => { 100 | self.code.push_str(&format!("{}\n", operation)); 101 | self.translate_block(b); 102 | }, 103 | _ => self.code.push_str(&format!("{};\n", operation)), 104 | }; 105 | } 106 | 107 | /// Translate an entire block to c-code while taking care of handling proper scoping and 108 | /// indentation 109 | fn translate_block(&mut self, block: &Block) { 110 | self.insert_indent(); 111 | self.code.push_str("{\n"); 112 | self.cur_depth += 1; 113 | 114 | for operation in &block.stmt_list { 115 | self.emit_operation(operation); 116 | } 117 | 118 | self.cur_depth -= 1; 119 | self.insert_indent(); 120 | self.code.push_str("}\n\n"); 121 | } 122 | 123 | /// Translate the body of a function 124 | fn translate_function_body(&mut self, index: usize) { 125 | let body = self.program.function_list[index].body.clone(); 126 | self.translate_block(&body); 127 | } 128 | } 129 | 130 | /// Compile the previously generated program to an elf binary 131 | pub fn compile(program: Program) { 132 | if DEBUG_PRINT { 133 | println!("Received the following program: \n{:#?}", program); 134 | } 135 | 136 | let mut compiler = Compiler::new(program); 137 | compiler.translate_program(); 138 | 139 | if DEBUG_PRINT { 140 | compiler.print_code(); 141 | } 142 | 143 | if WRITE_TO_DISK { 144 | // Write the program to disk 145 | std::fs::write("generated_program.c", &compiler.code) 146 | .expect("Failed to write generated program to disk"); 147 | 148 | // Compile the generated program 149 | assert!(std::process::Command::new(COMPILER) 150 | .arg("generated_program.c") 151 | .arg("-o") 152 | .arg("generated_program") 153 | .arg("-g") 154 | .status() 155 | .unwrap() 156 | .success()); 157 | } 158 | 159 | println!("[+] Done"); 160 | } 161 | -------------------------------------------------------------------------------- /resources/notes.md: -------------------------------------------------------------------------------- 1 | #### These are just some random notes I'm taking while thinking about how I want to develop certain parts. 2 | 3 | 4 | ## Mutations 5 | 6 | > https://lcamtuf.blogspot.com/2014/08/binary-fuzzing-strategies-what-works.html 7 | > https://www.usenix.org/system/files/sec19-lyu.pdf 8 | > https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf [21 , 28, 29 , 37 , 54] 9 | 10 | 1. bitflips (1, 2, 4) 11 | 2. Byte flips (1, 2, 4) 12 | 3. add/sub integers (+-35), byte, word, dword, qword (signed & unsigned) 13 | 4. Insert common breaking points (-1, MAXINT, etc) 14 | 5. Increase/decrease size 15 | 6. Splice test cases together 16 | x. Havoc 17 | 18 | Setup dictionary 19 | > https://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html 20 | 21 | ## Seed Scheduling 22 | 23 | Analysis: 24 | 1. Graph centrality 25 | 2. Mutation history to determine when to stop focusing on "hard" edges 26 | 27 | Dynamic: 28 | 1. Execution time 29 | 2. Less bytes 30 | 3. Coverage 31 | 4. How often the seed has been chosen 32 | 5. Number of inputs with same cov 33 | 6. Generated test cases based on this input with same cov 34 | 35 | - 36 | 1. Decrease whenever no new cov is found 37 | 38 | ## Coverage eval 39 | 40 | > [Cov-sensitivity] https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf 41 | - Tracking call stack seems kinda sick. n=2-gram cov seems nice too 42 | - Assign different coverage metrics to different threads and synch corpus 43 | - Assign different coverage metrics to different threads and dont synch 44 | 45 | > [Cerberos] https://dl.acm.org/doi/pdf/10.1145/3338906.3338975 46 | - Complexity score calculated for each function that can be correlated to inputs via their 47 | coverage trace 48 | 49 | - Idea is to assign rank using the 5 metrics below, and to then queue up all seeds based on 50 | their weight. Uses paretto frontier 51 | 52 | - exec time, number of covs, unique edges, file size, complexity score 53 | 54 | > [Directed fuzzing] https://dl.acm.org/doi/pdf/10.1145/3133956.3134020 55 | - Analyze program callgraph/cfg to direct the fuzzer to specific target points in the program 56 | 57 | > https://www.ndss-symposium.org/wp-content/uploads/2020/02/24422-paper.pdf 58 | - Don't treat all coverage equally, label security-relevant edges based and assign weights based 59 | on their path to vulnerable functions (eg. memcpy or a lot of memory operations) 60 | 61 | > https://arxiv.org/pdf/2203.12064.pdf 62 | - Graph centrality analysis 63 | 64 | > https://mboehme.github.io/paper/CCS16.pdf 65 | - AFLFast 66 | - More energy to low frequency paths 67 | - Model seed scheduling as markov chains 68 | 69 | > https://www.usenix.org/system/files/woot20-paper-fioraldi.pdf 70 | > https://dl.acm.org/doi/pdf/10.1145/3133956.3134073 71 | > https://www.ndss-symposium.org/wp-content/uploads/2017/09/ndss2017_10-2_Rawat_paper.pdf 72 | 73 | Things to potentiall maintain for each input to determine energy: 74 | - Size 75 | - Execution time 76 | - Which cov-units it hits & how rare each of them is (potentially complexity score for each 77 | cov-unit as well) 78 | - 79 | 80 | Cull corpus: 81 | - Track which cov points an input hits to potentially dedup/remove obsolete cases 82 | 83 | - Periodically cull entries that are superseeded by other entries (maybe check how often the 84 | entry was hit too) 85 | 86 | Add case to inputs: 87 | If a case produces same cov, but block is executed different number of times as 88 | previous cases, it is regarded as interesting 89 | 90 | Timeout: 91 | - Timeout: 5x initial calibrated exec speed 92 | 93 | Scheduling: 94 | - Whenever a seed is chosen, execute it n times instead of just once 95 | - Alternatively just go through all seeds sequentially and execute each seed n times 96 | 97 | Calculate score: 98 | Initial: 99 | - Base it on size & exec-time 100 | 101 | Dynamic: 102 | 1. 103 | - weight = number of cov points an input hits 104 | - weight += 25% / unique/new cov 105 | - weight += bonus (based on shorter exec/shorter input) 106 | - avg_weight = average of all input weights 107 | - Assign each input weight based on how far above/below the avg they are 108 | 2. 109 | ```rs 110 | fn calculate_energy(input: Input) { 111 | 112 | } 113 | 114 | fn fuzz_loop() { 115 | loop { 116 | id = get_next_seed(); 117 | p = calculate_energy(corpus.inputs[id]); 118 | 119 | for i in 0..p { 120 | mutate(corpus.inputs[i]); 121 | fuzz(corpus.input[i]); 122 | } 123 | } 124 | } 125 | ``` 126 | 127 | 128 | ## Crash deduping 129 | 1. AFL 130 | - Crash trace includes a tuple not seen before 131 | - Crash trace is missing a tuple seen before 132 | 133 | 2. stuff 134 | - Dont threadshare the crash_map, and instead work by sending crashing inputs to the main 135 | thread to handle 136 | 137 | 138 | ## Other 139 | > https://arxiv.org/abs/2009.06124 - Scaling 140 | 141 | > Coverage-tracking 142 | - priority queue, increase priority of input whenever it finds new coverage 143 | ```rs 144 | let hash = calculate_hash(from, to); 145 | hash &= bitmap_size; 146 | let idx = hash / 64; 147 | let bit = 1 << (hash % 64); 148 | if (state->cov_bitmap[idx] & bit) == 0 { 149 | state->cov_bitmappidx] |= bit; 150 | state->exit_reason = 7; 151 | state->cov_from = from; 152 | state->cov_to = to; 153 | state->reenter_pc = pc+4; 154 | return 155 | } 156 | ``` 157 | -------------------------------------------------------------------------------- /tools/scraper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import requests 3 | import urllib 4 | from requests_html import HTML 5 | from requests_html import HTMLSession 6 | import os 7 | import sys 8 | import random 9 | import string 10 | import subprocess 11 | from subprocess import DEVNULL 12 | import signal 13 | import time 14 | from datetime import timedelta 15 | import threading 16 | 17 | ''' 18 | Web scrapper that makes use of google's search engine to collect files of a specific filetype. Let 19 | it run as long as you wish, and ctrl-c to stop execution. A signal handler will then remove all 20 | incorrectly downloaded files and dedup the collection to create a unique set of files given a 21 | specific type. 22 | 23 | Just modify FILE_TYPE to specify what type of file you want. Only filetypes that are indexable by 24 | googles `filetype=XXX` are supported. 25 | ''' 26 | 27 | # File type to collect 28 | FILE_TYPE = "pdf" 29 | 30 | # Delay between google-requests, can help reduce throttling (in seconds). 0-30 seconds seem to be 31 | # best in most cases, the optimal delay varies 32 | REQUEST_DELAY = 0 33 | 34 | # Number of urls containing pdf files for which download attempts were made 35 | num_downloads = 0 36 | 37 | # Number of downloads that were attempted but failed 38 | failed_downloads = 0 39 | 40 | # Time that has elapsed since program start 41 | elapsed = 0 42 | 43 | # Get the source code of a requested page 44 | def get_source(url): 45 | session = HTMLSession() 46 | response = session.get(url) 47 | return response 48 | 49 | # Return google search results for a query 50 | def scrape_google(query): 51 | query = urllib.parse.quote_plus(query) 52 | response = get_source("https://www.google.co.uk/search?q=" + query) 53 | 54 | links = list(response.html.absolute_links) 55 | google_domains = ('https://www.google.', 56 | 'https://google.', 57 | 'https://webcache.googleusercontent.', 58 | 'http://webcache.googleusercontent.', 59 | 'https://policies.google.', 60 | 'https://support.google.', 61 | 'https://maps.google.') 62 | 63 | # Remove all irrelevant links 64 | for url in links[:]: 65 | if url.startswith(google_domains): 66 | links.remove(url) 67 | return links 68 | 69 | # Download requested url 70 | def download_file(file_name, url): 71 | response = requests.get(url) 72 | open(file_name, "wb").write(response.content) 73 | 74 | # Remove all files that were mistakenly downloaded and don't have the correct type 75 | def remove_duds(): 76 | ret = 0 77 | for file_name in os.listdir("seeds"): 78 | output = str(subprocess.check_output(f"file seeds/{file_name}", shell=True, stderr=DEVNULL)) 79 | # If the file we downloaded has the incorrect file-type, remove 80 | if FILE_TYPE not in output and FILE_TYPE.upper() not in output: 81 | os.remove(f"seeds/{file_name}") 82 | ret += 1 83 | return ret 84 | 85 | # Remove all duplicates in the downloaded files 86 | def dedup(): 87 | ret = 0 88 | unique = [] 89 | for file_name in os.listdir("seeds"): 90 | if os.path.isfile(file_name): 91 | filehash = md5.md5(file(file_name).read()).hexdigest() 92 | if filehash not in unique: 93 | unique.append(filehash) 94 | else: 95 | os.remove(file_name) 96 | ret += 1 97 | return ret 98 | 99 | # Print out the overall results 100 | def print_results(duds_removed, deduped): 101 | global elapsed 102 | global num_downloads 103 | global failed_downloads 104 | 105 | num_files = len([name for name in os.listdir('seeds')]) 106 | print("\n\n\n+===================================================+") 107 | print(f"Runtime: {str(timedelta(seconds=elapsed))}") 108 | print(f"Total initial download attempts: {num_downloads}") 109 | print(f"Failed downloads: {failed_downloads}") 110 | print(f"Incorrect file-types removed: {duds_removed}") 111 | print(f"Duplicate files removed: {deduped}") 112 | print(f"A total of {num_files} unique files now exist in the `seeds` directory") 113 | print("+===================================================+\n\n") 114 | 115 | # Signal handler to print out invoke some filtering functions on the seed collections and print out 116 | # results 117 | def signal_handler(sig, frame): 118 | duds_removed = remove_duds() 119 | deduped = dedup() 120 | 121 | print_results(duds_removed, deduped) 122 | os.kill(os.getpid(), signal.SIGQUIT) 123 | 124 | # Print out how much time has passed every second 125 | def timer(): 126 | global elapsed 127 | while True: 128 | elapsed += 1 129 | sys.stdout.write("\r") 130 | sys.stdout.write("Runtime: " + str(timedelta(seconds=elapsed))) 131 | time.sleep(1) 132 | 133 | def main(): 134 | global num_downloads 135 | global failed_downloads 136 | global elapsed 137 | 138 | # Start timer thread 139 | threading.Thread(target=timer).start() 140 | 141 | # Create seed directory if it doesnt already exist 142 | os.makedirs("seeds", exist_ok=True) 143 | 144 | while True: 145 | # Collect a list of urls that contain pdf files 146 | rand_search = ''.join(random.choice(string.ascii_lowercase) for i in range(10)) 147 | query = f"filetype:{FILE_TYPE} {rand_search}" 148 | try: 149 | urls = scrape_google(query) 150 | # Download all previously found files 151 | num_downloads += len(urls) 152 | for url in urls: 153 | rand_name = ''.join(random.choice(string.ascii_lowercase) for i in range(10)) 154 | try: 155 | download_file(f"seeds/{rand_name}", url) 156 | except: 157 | failed_downloads += 1 158 | except: 159 | pass 160 | 161 | time.sleep(REQUEST_DELAY) 162 | 163 | if __name__ == "__main__": 164 | signal.signal(signal.SIGINT, signal_handler) 165 | print("Hit CTRL-C to stop execution at any time") 166 | main() 167 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SFUZZ 2 | Start date: Dec, 2021 3 | 4 | This is a coverage-guided, emulation based greybox fuzzer that makes use of a custom Just-In-Time compiler to achieve near-native performance. It works by lifting RISC-V elf binaries to an intermediate representation before JIT compiling them to x86 during execution. During JIT compilation the code is instrumented to enable fuzzing-improvements such as coverage tracking, asan, cmpcov, or snapshot-based fuzzing. 5 | 6 |
7 | 8 | #### Features 9 | - Multi-threaded, supporting an arbitrary amount of threads and scaling almost linearly 10 | - Custom JIT compiler for high performance and more importantly customizability that is harder 11 | to achieve with other solutions such as qemu 12 | - Custom memory management unit to once again allow high customization and highly beneficial features 13 | such as byte-level permission checks and dirty-bit based emulator resets. Additionally hooks to allow for 14 | safe usage of heap-routines are implemented. 15 | - Virtualized file management to allow easy in memory fuzzing 16 | - Snapshot based fuzzing, so a target's memory/register state can be snapshotted during execution to 17 | base all future fuzz cases off of this baseline 18 | - Edge-level coverage tracking, and coverage guided fuzzing based on this feedback 19 | - Various mutators, crash deduplication, and a simple seed scheduling algorithm 20 | 21 |
22 |

23 | 24 | #### Description 25 | 26 | The objective of this project is to highlight the benefits of using an emulated environment for 27 | fuzzing. Many previous fuzzers based on emulation exist, but they all almost exclusively use the qemu 28 | emulation engine for the underlying emulation. While this engine does have a fairly mature 29 | just-in-time compiler and generates very good code, it is not designed for fuzzing. During fuzzing, 30 | we intend to run the same process thousands of times per second. This makes room for specialized 31 | optimizations that qemu does not make strong use of such as reusing the same memory space for each 32 | process run and only resetting a limited amount of memory via dirty bit mechanics. 33 | 34 | In many ways, this is more of a proof-of-concept that I wanted to work on to learn about compiler internals, and have an emulation-based playground to play around with various fuzzing techniques such as different coverage metrics, seed schedulers, and snapshot-based fuzzing. With more JIT optimizations and most importantly, extensions to include more popular architectures such as mips or arm this could however certainly be used to efficiently fuzz closed source code that cannot simply be instrumented through recompilation. 35 | 36 | Given the testing I have done so far, sfuzz has significantly less overhead than many other popular fuzzers, which results in very fast performance, especially for small fuzz cases. 37 | 38 | More details on the features/choices made for this fuzzer are listed in the accompanying blogpost (https://seal9055.com/blog/fuzzing/sfuzz) and the documentation files listed below: 39 | - Memory Management - [memory_management.md](https://github.com/seal9055/sfuzz/tree/main/docs/memory_management.md) 40 | - Code Generation - [code_gen.md](https://github.com/seal9055/sfuzz/tree/main/docs/code_gen.md) 41 | - Fuzzer implementation/features - [fuzzing.md](https://github.com/seal9055/sfuzz/tree/main/docs/fuzzing.md) 42 | - Some simple tests - [benchmarking.md](https://github.com/seal9055/sfuzz/tree/main/docs/benchmarking.md) 43 | 44 | #### Usage 45 | 46 | This entire fuzzer is written in rust, so after cloning the repository, just run `cargo build --release` to compile. 47 | 48 | Since the fuzzer currently only supports RISC-V, the target needs to be compiled to RISC-V using the below toolchain (or a similar one). Alternatively if you already have a RISC-V binary that will work perfectly fine too. 49 | 50 | Once this is set up, just create input/output directories, add some initial seed files to the input directory and start up the fuzzer. 51 | 52 | `./sfuzz -i in -o out -- ./test_cases/simple_test @@` 53 | 54 | Additional flags can be passed in via commandline options to specify the number of threads, enable snapshot fuzzing, add a dictionary to the mutator, etc. The additional options can be listed by running sfuzz with the `-h` flag. 55 | 56 | If you wish to test the fuzzer against some targets of varying complexity, the progrem_generator at `tools/program_generator` can be used to automatically generate programs of varying complexity. Note that you will require a RISC-V toolchain to then compile the target. 57 | 58 | #### Riscv toolchain to compile binaries for the fuzzer 59 | 60 | This sets up a toolchain to compile riscv binaries that can be loaded/used by this project. 61 | ``` 62 | Riscv compiler/tooling: 63 | sudo apt-get install autoconf automake autotools-dev curl python3 libmpc-dev libmpfr-dev \ 64 | libgmp-dev gawk build-essential bison flex texinfo gperf libtool patchutils bc zlib1g-dev \ 65 | libexpat-dev 66 | git clone https://github.com/riscv/riscv-gnu-toolchain && cd riscv-gnu-toolchain 67 | ./configure --prefix=/opt/riscv --with-arch=rv64i 68 | sudo make 69 | 70 | Debugger: 71 | gdb-multiarch 72 | ``` 73 | 74 | #### TODO 75 | 76 | This list represents a set of features that I plan on implementing in the future. 77 | 78 | - [X] Working Memory management unit 79 | - [X] JIT Compiler 80 | - [X] Virtualized files for in-memory fuzzing 81 | - [X] Byte level permission checks + hooked/safe allocators 82 | - [X] Track edge level coverage 83 | - [X] Persistent mode to fuzz in small loops around target functions 84 | - [X] Crash deduping / unique crashes 85 | - [X] Update mutators to include more options 86 | - [X] Seed Scheduling 87 | - [X] CmpCov to get past magic values and checksums 88 | - [X] Add some tooling around the fuzzer 89 | - [ ] Proper benchmarking 90 | - [ ] Implement RISC-V M & A extensions, so that the JIT can use glibc instead of newlib 91 | - [ ] Replace assembler to improve compilation speed 92 | - [ ] Support more architectures (eg. mips, arm) 93 | - [ ] JIT optimizations, and another attempt at register allocation 94 | 95 | #### References 96 | 97 | * All papers listed in [fuzzing.md](https://github.com/seal9055/sfuzz/blob/main/fuzzing.md) 98 | * Emulation based fuzzing - Brandon Falk [GamozoLabs](https://gamozolabs.github.io/) 99 | * Intel Software Developer Manuals 100 | * RISCV User ISA specification 101 | * Rv8: a high performance RISC-V to x86 binary translator - Michael Clark & Bruce Hoult 102 | * Engineerining a compiler Keith D. Cooper & Londa Torczon 103 | * Cranelift [https://cfallin.org/blog/] - Chris Fallin 104 | * Generating Low-Overhead Dynamic Binary Translators - Mathias Payer & Thomas R. Gross 105 | * Efficiently Computing Static Single Assignment Form and the Control Dependence Graph - Cytron et al 106 | * Computing Liveness Sets for SSA-Form Programs - Brandner et al 107 | * Linear Scan Register Allocation on SSA Form - Christian Wimmer & Michael Franz 108 | * http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf 109 | * AddressSanitizer: A Fast Address Sanity Checker 110 | https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/37752.pdf 111 | -------------------------------------------------------------------------------- /docs/benchmarking.md: -------------------------------------------------------------------------------- 1 | # Benchmarks & Testing 2 | 3 | #####  Since the fuzzer is not at a point where it can run proper benchmarks (eg. [fuzzbench](https://google.github.io/fuzzbench/)), this pretty much consists solely of sample programs I wrote and other programs I chose to include. Note that this means that my conclusions may be biased. 4 |
5 | 6 | #### Performance against a very Simple Target 7 | 8 | **Experiment-Setup** 9 | This initial test just compares the fuzzer's performance when used on a very simple test binary. This test showcases the low overhead of the fuzzer when it comes to resetting memory and running many very short cases. Arguably this is not a very important test-case since no real program will be this simple, but I found it interesting nonetheless. The program basically just has the fuzzer jump through some small if-comparison's before segfaulting, thus giving the fuzzer a crash. Any coverage guided fuzzer should be able to trivially find the crash within seconds. The corpus consists of a 100-byte file generated from `/dev/urandom`. 10 | 11 | For this test-case, all of my fuzzer's features are enabled, including coverage tracking, byte-level permissions, allocator-hooks, in-memory fuzzing, cmpcov, and snapshot based fuzzing. I set the snapshot right after the call to `open`. 12 | 13 | I will be comparing my fuzzer's performance to AFL++ while testing both qemu-emulation and compile-time instrumentation for AFL. Since this is a state-less target, I will also instrument the below code for AFL's persistent-mode fuzzing and in-memory input generation as described in AFL's [docs](https://github.com/AFLplusplus/AFLplusplus/blob/stable/instrumentation/README.persistent_mode.md). 14 | ```c 15 | int main(int argc, char **argv) { 16 | char buf[100]; 17 | int fd = open(argv[1], O_RDONLY); 18 | 19 | read(fd, buf, 100); 20 | 21 | if (buf[0] == 0x41) { 22 | if (buf[1] == 0x42) { 23 | if (buf[2] == 0x43) { 24 | if (buf[3] == 0x44) { 25 | if (buf[4] == 0x45) { 26 | if (buf[5] == 0x46) { 27 | *(unsigned long*)0x4141414141414141 = 0; 28 | } 29 | } 30 | } 31 | } 32 | } 33 | } 34 | return 0; 35 | } 36 | ``` 37 | 38 | **Results** 39 | | Setup | Result | 40 | | --- | --- | 41 | | SFUZZ - Snapshot | 1.1 million fuzz cases per second | 42 | | SFUZZ - No-Snapshot | 650,000 fuzz cases per second | 43 | | AFL++ - QEMU | 3,500 fuzz cases per second | 44 | | AFL++ - Source-Instr. | 3,500 fuzz cases per second | 45 | | AFL++ - Source-Instr. + Persistent/in-memory | 33,000 fuzz cases per second | 46 |
47 | 48 | SFUZZ finds the crash within the first second of running and executes about 1.1 million fuzz cases per second. Disabling snapshot based fuzzing and starting each test case at the `_start` function still finds the crash immediately, but performance drops to 650,000 per second. This massive gap is because this is a very small program for which the initialization routines make up the majority of the code, so being able to skip these is very beneficial. 49 | 50 | I tested AFL++ in 2 modes, qemu and source code instrumented. Starting with qemu-mode and a -O3 compiled binary (without any snapshot/persistent fuzzing mechanisms enabled), AFL requires 2 and a half minutes to find the crash and runs at about 3500 fuzz cases per second. Taking the non-snapshot version of my fuzzer, this is a \~185x speedup. 51 | 52 | With source based instrumentation using the afl-clang-fast compiler with the flags shown below, AFL finds the crash in 3 minutes, and runs at 3500 fuzz cases per second once again. I would have expected this to run a lot faster than the qemu-based approach, but I believe that the setup should be correct. Things start to look a little different with persistent-mode/in memory fuzzing enabled. AFL++ is now able to generate 33,000 fuzz cases per second and also finds the crash in the first second. 53 | 54 | ``` 55 | AFL_USE_ASAN=1 LLVM_CONFIG=llvm-config-11 ~/AFLplusplus/afl-clang-fast ../test_cases/simple_test.c -o simple_test_afl -O3 56 | 57 | ~/AFLplusplus/afl-fuzz (-D) -i in -o out -- ./simple_test_afl @@ 58 | ``` 59 | 60 | I believe that the emulated version of AFL using qemu is the fairer comparison since my fuzzer does not require source code for its instrumentation and fully emulates. I include both metrics though since my fuzzer currently only supports RISC-V which is not a very popular architecture, and will thus generally require source code as well to recompile to RISC-V. 61 | 62 | #### More Complex Randomly Generated Program 63 | 64 | **Experiment-Setup** 65 | The this case the target was automatically generated using the [program_generation](https://github.com/seal9055/sfuzz/tree/main/tools/program_generator) tool I wrote. It automatically generates a C program that takes its fuzz-input from a file and uses it to attempt to pass various `if`-checks. 66 | 67 | The program was generated with a complexity configuration of `9` and generated a 6200 loc target. The generated code is also saved [here](https://github.com/seal9055/sfuzz/blob/main/test_cases/generated_program.c). 68 | 69 | Apart from that the remaining setup is the exact same as for the first case, using a 500-byte randomly generated input-seed. For the below results, both afl and SFUZZ were run for 15mins 70 | 71 | **Results** 72 | | Setup | Result | 73 | | --- | --- | 74 | | SFUZZ - Snapshot | 142,000 fuzz cases per second, 114 unique crashes | 75 | | SFUZZ - No-Snapshot | 105,000 fuzz cases per second, 108 unique crashes | 76 | | AFL++ - QEMU | 2,400 fuzz cases per second, 85 unique crashes | 77 | | AFL++ - Source-Instr. | 2,600 fuzz cases per second, 78 unique crashes | 78 | | AFL++ - Source-Instr. + Persistent/in-memory | 30,000 fuzz cases per second, 107 unique crashes | 79 |
80 | Once again SFUZZ outperforms AFL by a pretty good margin. The performance gap is however much 81 | smaller in this case than in the previous one since more instructions are being executed for each 82 | fuzz-case resulting in clang's optimizations granting large benefits when compared to my naive JIT. 83 | 84 | Once again persistent mode adds immense performance benefits to AFL, which is reflected very heavily 85 | in its overall results. Default source code instrumentation once again seems to perform very similar 86 | to qemu. I have no idea why this is the case or if I might be messing something up in my AFL setup, 87 | but these are the results are ended up recording. 88 | 89 | For this simple target SFUZZ with snapshotting enabled seems to have found most possible crashes by 90 | the 18 minute mark. After this point it did not find any more coverage. 91 | 92 | According to the final results, AFL seems to have kept up in terms of the unique crashes it was able 93 | to find, however, AFL++ determines a crash's uniqueness based on the path taken to reach the crash. 94 | Since there are multiple paths to reach each of the crashes inside the target that number is a 95 | little misleading. Nevertheless, AFL still found a decent amount of crashes when considering the 96 | performance deficit. I believe that this is in big parts due to AFL's corpus management and seed 97 | prioritization algorithms. The one's currently in use for SFUZZ are super simple and don't perform 98 | any corpus minimization. 99 | 100 | The output screens at the end of the timeframe for both AFL and SFUZZ are listed below 101 | (snapshot/persistent mode were enabled for both SFUZZ and AFL for the below results). 102 | 103 | 104 |

105 |

106 | 107 | -------------------------------------------------------------------------------- /docs/memory_management.md: -------------------------------------------------------------------------------- 1 | # Memory Management 2 | 3 | #### Overview 4 | 5 | This component of the fuzzer is responsible for providing the memory space for the target. It provides each emulator thread an entirely separate mmu and makes sure that none of the target threads can access/corrupt the memory space of another thread. 6 | 7 | Each mmu consists of 2 contiguous blocks of memory (one for the actual memory, and another one for permissions), and an api that exposes various operations on this memory such as allocations, frees, reads, and writes. The exposed functions make use of the permissions-map to achieve byte-level permission checks (similar to ASAN) on each memory access, in addition to an allocator that performs properly checked allocations/frees. 8 | 9 | Most of the code pertaining to these features can be found in [mmu.rs](https://github.com/seal9055/sfuzz/blob/main/src/mmu.rs) More detailed descriptions of some of these features are provided below. 10 | 11 | #### Byte Level Permission Checks 12 | On most architectures, permissions are handled at the hardware level through page tables. This means that whenever an instruction tries to access a memory region, without possessing the correct permissions, an abort is generated which is then handled at the software level. Since these permissions are handled at the page table level, it prevents any incorrect access from crossing page boundaries. When it comes to exploitation, however, a couple of out-of-bounds bytes can oftentimes already be enough to compromise the security of an application, which this type of permission checking cannot handle. 13 | 14 | A tool commonly used while fuzzing is address sanitizer (also referred to as asan). When a binary is compiled using asan, it is instrumented at compile time with extra checks that make sure that every memory access has the correct access permissions. This tool however has a few very relevant issues. For one it requires access to the binaries source code to recompile it with proper instrumentation. This makes it only useful to open source projects, which especially when fuzzing embedded systems, is often not available. Secondly, asan has a very non-significant performance overhead. According to a study conducted by Google in 2012 (AddressSanitizer: A Fast Address Sanity Checker), it resulted in a 73% slowdown, which is quite a bit, especially when considering how reliant fuzzers are on their performance. This slowdown however was worth it due to the power of byte-level permission checks and led to 300 new bugs being discovered in the Chrome browser at the time. 15 | 16 | In this case, since the binary is being run in a custom JIT compiler, both of these drawbacks can be almost entirely mitigated. Not having source code available is not an issue at all anymore since all of the code is being generated based on the binary. As for the performance aspects, EXECUTE permissions are almost entirely free since they are checked once when a function is first compiled, and then assumed to be true for the rest of the program's execution. This would need some changes when dealing with JIT compilers that frequently change their executable memory mappings, but for 99% of use cases, it should suffice. As for load and store instructions (that require the READ and WRITE permissions), the checks consist of 5 assembly instructions (1 memory load, 1 conditional jmp, and 3 arithmetic instructions). While this results in some additional overhead when performing frequent memory accesses, it is nowhere near as expensive as address sanitizer. 17 | 18 | These permission bits mean that every out-of-bounds memory access (even if it is just a single byte) instantly results in a notification to the fuzzer which can then modify its corpus to focus on this bug and attempt to increase the out of bounds bug. This permission model also applies to library functions such as malloc & free. These are hooked to instead call custom malloc/free implementations that support this byte-level memory model. These hooked functions also include additional checks to completely destruct free'd memory so common heap bugs such as use after free's or double free's are instantly reported as well instead of leading to undefined behavior. 19 | 20 | #### Dirty-bit Memory Resets 21 | In the current implementation, each new address space is 64mb large (although this can easily be changed depending on the complexity of the target). This means that on each new fuzz case, this entire space needs to be reset to its initial state. Doing a massive 64mb memcpy() on each new fuzz case is very expensive and leads to completely unacceptable performance. Here we can borrow a concept that is common in the operating systems world: dirty bits. In operating systems, these are maintained at the page table level similar to the permissions. This bit is set whenever a write to memory occurs. This means that when copying memory between different cache levels, or just clearing memory, the page table can be traversed, and only pages with the dirty bit set need to have work done on them. 22 | 23 | The same principle applies to this fuzzer. When a fuzzer is run, only a very small percentage of this 64mb address space is actually overwritten. This means that by maintaining a dirty bit list, we can selectively choose which pages are reset while leaving most of the memory intact. The memory space, in this case, is not maintained in a page table so some of the implementation details differ, but the principle remains. 24 | 25 | The implementation of memory resets in this project was heavily influenced by Brandon Falk's prior research into obtaining extremely fast memory resets and his implementation in his fuzz_with_emus project. 2 array's are maintained. Whenever memory is dirtied, the address is pushed to an initially empty array that contains a listing of all dirtied memory regions. Additionally, a dirty bitmap is maintained that is used to verify that only 1 address from each page (4096 bytes in this case) is pushed to this array to avoid duplicates. Populating this vector during execution is very simple and only requires 6 additional instructions during store operations. While resetting, the fuzzer can then just iterate through the previously populator vector and free the address ranges that were pushed to the vector. 26 | 27 | #### Virtualized Files 28 | Many potential fuzz-targets read in their input from files stored on disk. This requires syscalls and disk access, which while fuzzing quickly gets extremely expensive. Instead, the fuzzer emulates all syscalls in user-space and stores files within the emulator as byte-arrays & a cursor into the current position within the file. This means that file operations now no longer require a context-swap into the kernel or disk access and are instead quickly emulated resulting in massive performance increases. 29 | 30 | #### Glibc String Functions 31 | The standard Glibc implementation used on most Linux distributions makes use of specialized optimizations for string operations (eg. strlen/strcmp). These functions make sure that they are page-aligned when called and then read in 8 bytes at a time. This can easily go out of bounds (eg. when calling strlen on a 3-byte string), however since the access is page aligned the 8-byte access cannot trigger a page fault and thus does not lead to any security bugs. Since this fuzzer has byte-level permission checks though, this results in unnecessary crashes being recorded. My solution was to write up custom "safe" implementations for some of these functions in assembly, dynamically recognize libc-string functions within the target, and compile in my own version instead of the default ones. This defeats the problem without adding any performance overhead. 32 | 33 | #### Future Work 34 | For RISC-V the current memory/permission model is totally sufficient, but if this fuzzer were to be 35 | used against x86_64 for example, issues would quickly come up. X86 uses a much larger memory space/area, 36 | so simply loading the entire space into memory is inpractical and will cause many cache-related 37 | performance slowdowns. In that light, I would like to eventually implement a page-table structure to only 38 | map in pages that are actually used to more easily support larger memory spaces. 39 | -------------------------------------------------------------------------------- /src/syscalls.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | mmu::Perms, 3 | emulator::{Emulator, Register, FileType::{self, STDOUT, STDERR, INVALID}, Fault}, 4 | config::FUZZ_INPUT, 5 | }; 6 | 7 | // Helper Structs for syscalls {{{ 8 | 9 | #[repr(C)] 10 | #[derive(Debug)] 11 | struct Stat { 12 | st_dev: u64, 13 | st_ino: u64, 14 | st_mode: u32, 15 | st_nlink: u32, 16 | st_uid: u32, 17 | st_gid: u32, 18 | st_rdev: u64, 19 | __pad1: u64, 20 | 21 | st_size: i64, 22 | st_blksize: i32, 23 | __pad2: i32, 24 | 25 | st_blocks: i64, 26 | 27 | st_atime: u64, 28 | st_atimensec: u64, 29 | st_mtime: u64, 30 | st_mtimensec: u64, 31 | st_ctime: u64, 32 | st_ctimensec: u64, 33 | 34 | __glibc_reserved: [i32; 2], 35 | } 36 | 37 | // }}} 38 | 39 | 40 | pub fn exit() -> Option { 41 | Some(Fault::Exit) 42 | } 43 | 44 | pub fn fstat(emu: &mut Emulator) -> Option { 45 | let fd = emu.get_reg(Register::A0) as usize; 46 | let statbuf = emu.get_reg(Register::A1); 47 | 48 | // Check if the FD is valid 49 | let file = emu.fd_list.get(fd); 50 | if file.is_none() { 51 | // FD was not valid, return out with an error 52 | emu.set_reg(Register::A0, !0); 53 | return None; 54 | } 55 | 56 | // qemu output for the syscall + correct input lengths 57 | if file.unwrap().ftype == FileType::FUZZINPUT { 58 | let stat: Stat = Stat { 59 | st_dev: 0x803, 60 | st_ino: 0x81889, 61 | st_mode: 0x81a4, 62 | st_nlink: 0x1, 63 | st_uid: 0x3e8, 64 | st_gid: 0x3e8, 65 | st_rdev: 0x0, 66 | __pad1: 0, 67 | st_size: emu.fuzz_input.len() as i64, 68 | st_blksize: 0x1000, 69 | __pad2: 0, 70 | st_blocks: (emu.fuzz_input.len() as i64 + 511) / 512, 71 | st_atime: 0x5f0fe246, 72 | st_atimensec: 0, 73 | st_mtime: 0x5f0fe244, 74 | st_mtimensec: 0, 75 | st_ctime: 0x5f0fe244, 76 | st_ctimensec: 0, 77 | __glibc_reserved: [0, 0], 78 | }; 79 | 80 | // Cast the stat structure to raw bytes 81 | let stat = unsafe { 82 | core::slice::from_raw_parts( 83 | &stat as *const Stat as *const u8, 84 | core::mem::size_of_val(&stat)) 85 | }; 86 | 87 | // Write in the stat data 88 | emu.memory.write_mem(statbuf as usize, stat, stat.len()).unwrap(); 89 | emu.set_reg(Register::A0, 0); 90 | } else if file.unwrap().ftype != FileType::OTHER { 91 | emu.set_reg(Register::A0, !0); 92 | } else { 93 | unreachable!(); 94 | } 95 | 96 | None 97 | } 98 | 99 | pub fn lseek(emu: &mut Emulator) -> Option { 100 | let fd = emu.get_reg(Register::A0) as usize; 101 | let offset = emu.get_reg(Register::A1) as i64; 102 | let whence = emu.get_reg(Register::A2) as i32; 103 | 104 | if emu.fd_list.len() < fd || emu.fd_list[fd].ftype == FileType::INVALID { 105 | emu.set_reg(Register::A0, !0); 106 | return None; 107 | } 108 | 109 | if emu.fd_list[fd].ftype == FileType::FUZZINPUT { 110 | let cur = emu.fd_list[fd].cursor.unwrap(); 111 | 112 | let new_pos: i64 = match whence { 113 | 0 => offset, // SEEK_SET 114 | 1 => cur as i64 + offset, // SEEK_CUR 115 | 2 => (emu.fuzz_input.len() as i64) + offset, // SEEK_END 116 | _ => { 117 | emu.set_reg(Register::A0, !0); 118 | return None; 119 | } 120 | }; 121 | 122 | let new_pos = core::cmp::max(0i64, new_pos); 123 | let new_pos = core::cmp::min(new_pos, emu.fuzz_input.len() as i64) as usize; 124 | 125 | emu.fd_list[fd].cursor = Some(new_pos); 126 | emu.set_reg(Register::A0, new_pos); 127 | } else { 128 | unreachable!(); 129 | } 130 | None 131 | } 132 | 133 | pub fn open(emu: &mut Emulator) -> Option { 134 | let filename = emu.get_reg(Register::A0) as usize; 135 | let _flags = emu.get_reg(Register::A1); 136 | let _mode = emu.get_reg(Register::A2); 137 | 138 | let mut buf: Vec = Vec::new(); 139 | let mut cur = 0; 140 | // get filename length 141 | loop { 142 | let c: u8 = emu.memory.read_at(filename + cur, Perms::READ).unwrap(); 143 | buf.push(c); 144 | if c == 0 { 145 | break; 146 | } 147 | cur += 1; 148 | } 149 | 150 | let fd = if buf == FUZZ_INPUT.get().unwrap().as_bytes() { 151 | emu.alloc_file(FileType::FUZZINPUT) 152 | } else { 153 | emu.alloc_file(FileType::OTHER) 154 | }; 155 | 156 | emu.set_reg(Register::A0, fd); 157 | None 158 | } 159 | 160 | pub fn read(emu: &mut Emulator) -> Option { 161 | let fd = emu.get_reg(Register::A0) as usize; 162 | let buf = emu.get_reg(Register::A1); 163 | let count = emu.get_reg(Register::A2); 164 | 165 | // If the file does not exist or has already been closed, return an error 166 | let file = emu.fd_list.get_mut(fd); 167 | if file.is_none() || file.unwrap().ftype == FileType::INVALID { 168 | emu.set_reg(Register::A0, !0); 169 | return None; 170 | } 171 | 172 | // Special case, reading in the fuzzinput 173 | if emu.fd_list[fd].ftype == FileType::FUZZINPUT { 174 | let offset = emu.fd_list[fd].cursor.unwrap(); 175 | let len = core::cmp::min(count, emu.fuzz_input.len()-offset); 176 | 177 | emu.memory.write_mem(buf, &emu.fuzz_input[offset..offset+len], len) 178 | .expect("Error occured while trying to read in fuzz-input"); 179 | 180 | emu.set_reg(Register::A0, len); 181 | emu.fd_list[fd].cursor = Some(offset + len); 182 | } else { 183 | // Read in a different file 184 | //unreachable!(); 185 | emu.set_reg(Register::A0, count); 186 | } 187 | 188 | None 189 | } 190 | 191 | pub fn write(emu: &mut Emulator) -> Option { 192 | let fd = emu.get_reg(Register::A0) as usize; 193 | let buf = emu.get_reg(Register::A1); 194 | let count = emu.get_reg(Register::A2); 195 | 196 | // If the file does not exist or has already been closed, return an error 197 | let file = emu.fd_list.get_mut(fd); 198 | if file.is_none() || file.as_ref().unwrap().ftype == FileType::INVALID { 199 | emu.set_reg(Register::A0, !0); 200 | return None; 201 | } 202 | 203 | // Set to true if you wish to see the actual stdout output of this syscall 204 | if false { 205 | let file = file.unwrap(); 206 | if file.ftype == STDOUT || file.ftype == STDERR { 207 | let mut read_data = vec![0u8; count]; 208 | emu.memory.read_into(buf, &mut read_data, count, Perms::READ).unwrap(); 209 | 210 | match std::str::from_utf8(&read_data) { 211 | Ok(v) => print!("{}", v), 212 | Err(_) => print!("{:?}", read_data), 213 | } 214 | } else { 215 | panic!("Write to unsupported file occured"); 216 | } 217 | } 218 | 219 | emu.set_reg(Register::A0, count); 220 | None 221 | } 222 | 223 | pub fn brk(emu: &mut Emulator) -> Option { 224 | let base = emu.get_reg(Register::A0); 225 | if base == 0 { 226 | emu.set_reg(Register::A0, 0); 227 | return None; 228 | } 229 | 230 | panic!("Not supporting brk"); 231 | } 232 | 233 | pub fn gettimeofday(emu: &mut Emulator) -> Option { 234 | emu.set_reg(Register::A0, 20); 235 | None 236 | } 237 | 238 | pub fn close(emu: &mut Emulator) -> Option { 239 | let fd = emu.get_reg(Register::A0) as usize; 240 | 241 | let file = emu.fd_list.get_mut(fd); 242 | 243 | if file.is_none() { 244 | emu.set_reg(Register::A0, !0); 245 | return None; 246 | } 247 | 248 | let file = file.unwrap(); 249 | 250 | file.ftype = INVALID; 251 | 252 | emu.set_reg(Register::A0, 0); 253 | None 254 | } 255 | -------------------------------------------------------------------------------- /docs/fuzzing.md: -------------------------------------------------------------------------------- 1 | # Fuzzing Capabilities 2 | 3 | #### Overview 4 | This will probably be the most interesting aspect for most people looking to use this fuzzer. Here 5 | I will describe the details of which features this fuzzer currently supports and their basic 6 | implementation details. 7 | 8 | #### Byte Level Permission Checks 9 | While this is an extremely important part of why this fuzzer is so effective, this capability was already covered in the [memory_management.md](https://github.com/seal9055/sfuzz/tree/main/docs/memory_management.md) section, so I will not repeat the information here. 10 | 11 | #### Coverage Tracking 12 | This fuzzer implements edge, block, and call-stack based coverage tracking. Coverage is currently being tracked in a very simple way. A bytemap is maintained to determine which edges/blocks have already been hit. At the beginning of each block, a fast hash is generated to index into the bytemap and check if the block/edge has already previously been hit. If it has, we just move on. If it is a new edge/block, however, the byte is set in the map, and the coverage counter is incremented to showcase that new coverage has been hit. For edge coverage, this hash consists of a quick xorshift hash, and for block-level coverage, the lower 24 bits of the address are just used. 13 | 14 | Callstack-based coverage tracking adds an additional field to the fuzzer. An evolving hash that is maintained throughout an entire input, and has new edges xor'd in. While this is far from perfect, it does allow the fuzzer to reason about what path has been taken to reach the current edge and track new coverage for new paths. 15 | 16 | By default, the fuzzer uses edge coverage because call-stack coverage can quickly snowball out of control in some cases, but against some targets it may be worth considering, especially since some [papers](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf) have rated it higher than basic edge coverage against many targets. 17 | 18 | #### Compare Coverage Tracking 19 | Coverage tracking already greatly improves fuzzers and allows them to reach much more complex code paths. Unfortunately, it does not however help fuzzers with multi-byte comparisons (eg. `if (buf[3] == 0xdeadbeef)`) since statements such as these are handled in a single cmp instruction that isn't instrumented by basic coverage tracking. This is where CmpCov comes in. At runtime, branch-if-equal & branch-if-not-equal instructions are replaced with several separate single-byte comparisons. This results in a \~5-15% performance decrease (depending on the amount of cmp's within the target), but greatly improves the fuzzers ability to find magic values without having to brute-force 2^32+ bytes since it can now instrument these comparisons with coverage tracking instructions. CmpCov is enabled by default. 20 | 21 | #### Coverage Guided Fuzzing 22 | This is done in pretty much the simplest way possible. Whenever a case finds new coverage, the case is added to the corpus and mutated off of for future fuzz cases. This includes both code coverage and compare coverage and makes the fuzzer much better at traversing targets. 23 | 24 | #### Persistent-mode/Snapshot Fuzzing 25 | This is mostly a performance optimization, but since it is very specific to fuzzing I figured this category probably suits it best. The main reason for this optimization is, that the standard `fork() + execve()` routine used by basic fuzzers is slow and does not scale, thus making room for improved case reset techniques. 26 | 27 | One initial improvement AFL++ uses is the forkserver optimization, where new processes are cloned from a copy-on-write master that is kept in the original state. This reduces a lot of the overhead, but still requires the expensive fork() syscall. A better alternative is to instrument the api with a custom-written, single-process loop, therefore removing all of the `execve()/fork()` overhead. AFL mostly automates this, but still requires the user to write a small harness to designate where this loop should be positioned. 28 | 29 | In the case of SFUZZ, since the fuzzer is running in an emulator, this becomes almost trivial. We can specify a specific address as the snapshot starting point, run the JIT up to that point, and take a snapshot of the entire register/memory state. All future fuzz-cases can now use this snapshot as their starting location instead of having to restart the process from the very beginning. This can be used to avoid a lot of setup that is disconnected from our fuzzing input and thus greatly speed up the fuzzing process. This becomes especially useful when dealing with larger targets, for which we can take a snapshot right before the interesting function, set an exit point right afterward, and then fuzz this function in a very tight/fast loop. 30 | 31 | This can oftentimes easily get at least a 30-50% speed improvement against simple targets, and even bigger speed improvements against larger targets where more code can be cut out of the snapshot, which makes it almost always worth it to go through the manual effort of choosing a good address to snapshot at. 32 | 33 | To enable snapshot-based fuzzing in SFUZZ, simply add the following flags with the address at which you wish to insert the snapshot `-s 0x1234`. 34 | 35 | #### Seed Scheduling 36 | Seed scheduling is implemented based on power schedules, with the inputs sitting in a queue that is iterated through. Before an input is executed, its energy is calculated. This determines how often an input will be executed (20000 to 150000 times based on its energy). The energy is kept within a reasonable range to make sure no cases are completely left out, and that a case executes often enough that the cost of this seed scheduling does not matter. This simply gives slight priority to favored cases. 37 | 38 | The energy of a case is determined based on the input size (in bytes), execution time (measures in instructions executed), how frequently the case has found new coverage, and how often this case has found a crash. Small sizes/execution times are favored, with new coverage providing additional bonus points. While crashes are good, a case may lie in a situation where it always results in the exact same crash, in which case its energy is slowly lowered. 39 | 40 | For the most part, I don't think this strategy matters too much (at least in a generic sense without considering the target), so I decided to only slightly favor "better" cases over others since especially at the start of a fuzzing campaign with an unfamiliar target, it is very hard to generalize which metrics are actually important. Slower inputs could end up finding many more new code paths than faster inputs and so on. 41 | 42 | #### Mutation Strategies 43 | The fuzzer currently has 8 different mutation strategies that are listed and described below. 44 | 45 | - ByteReplace - This strategy replaces 1-128 bytes in the input with random other bytes. Smaller corruptions are 46 | heavily favored over larger corruptions to avoid potentially destroying a good initial corpus. 47 | - Bitflip - This strategy flips 1-128 random bits in the target. Smaller corruptions are once again heavily favored. 48 | - MagicNum - This strategy maintains a small dictionary of hardcoded useful values. These are 1-8 byte values that lie on the boundaries of integer over/underflows, and can thus frequently find integer bugs. 49 | - SimpleArithmetic - This strategy simply adds or subtracts a random value from 1-32 to 0-128 random bytes in 50 | the fuzzcase. This technique has proven to be very useful in the past and can often find integer bugs or corrupt 51 | length fields. 52 | - RemoveBlock - This strategy removes a random block from the input. It is more expensive than many 53 | of the other strategies. 54 | - DupBlock - This strategy duplicates a random block from the input. It is more expensive than many 55 | of the other strategies. 56 | - Resize - This strategy resizes the input. Decreasing the size simply truncates the input, while increasing the size 57 | adds random bytes to the end. 58 | - Dictionary - This is an optional feature that is enabled with the `-d` flag. It allows a dictionary file 59 | to be passed in, that the mutator will then use to randomly splice entries from the dictionary into the target. 60 | - Havoc - This strategy is invoked every 100 cases and simply combines multiple of the above listed strategies 61 | together for a single case. 62 | 63 | These mutation strategies are weighted. By default the cheaper/less destructive mutation strategies are favored (ByteReplace, Bitflip, MagicNum, SimpleAirhmetic), while the more expensive/more destructive strategies are prioritized a lot less (RemoveBlock, DupBlock, Resize, Dictionary). 64 | 65 | #### Crashes 66 | 67 | Crashes are saved using a couple of different methods to differentiate between different crashes. The different crash causes are ReadFaults, WriteFaults, ExecFaults, OutOfBounds accesses, Timeouts, and various heap bugs. Timeouts occur when a fuzz case executes more instructions than the timeout allows. This is automatically calibrated using the initial seeds, but can also be manually overridden using the `-t` flag. 68 | 69 | Unique crashes are based on the type of crash and the address that the crash occured at. Only unique crashes are saved off. -------------------------------------------------------------------------------- /src/pretty_printing.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | config::{COV_METHOD, NO_PERM_CHECKS, SNAPSHOT_ADDR, NUM_THREADS, DEBUG_PRINT, CMP_COV, 3 | RUN_CASES, SEND_REMOTE}, 4 | Statistics, Corpus, 5 | }; 6 | 7 | use core::fmt; 8 | use std::sync::Arc; 9 | use std::time::Duration; 10 | use std::collections::HashMap; 11 | 12 | use console::Term; 13 | use num_format::{Locale, ToFormattedString}; 14 | 15 | /// Different log-types that can be used to print out messages in different colors 16 | pub enum LogType { 17 | Neutral = 0, 18 | Success = 1, 19 | Failure = 2, 20 | } 21 | 22 | /// Color a string green 23 | pub struct Green(pub &'static str); 24 | impl fmt::Display for Green { 25 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 26 | write!(f, "\x1B[32m")?; 27 | write!(f, "{}", self.0)?; 28 | write!(f, "\x1B[0m")?; 29 | Ok(()) 30 | } 31 | } 32 | 33 | /// Color a string blue 34 | pub struct Blue(pub &'static str); 35 | impl fmt::Display for Blue { 36 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 37 | write!(f, "\x1B[34m")?; 38 | write!(f, "{}", self.0)?; 39 | write!(f, "\x1B[0m")?; 40 | Ok(()) 41 | } 42 | } 43 | 44 | /// Color a string red 45 | pub struct Red(pub &'static str); 46 | impl fmt::Display for Red { 47 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 48 | write!(f, "\x1B[31m")?; 49 | write!(f, "{}", self.0)?; 50 | write!(f, "\x1B[0m")?; 51 | Ok(()) 52 | } 53 | } 54 | 55 | /// Small wrapper to print out colored log messages 56 | pub fn log(color: LogType, msg: &str) { 57 | if *DEBUG_PRINT.get().unwrap() { 58 | match color { 59 | LogType::Neutral => { 60 | println!("{} {}", Blue("[-]"), msg); 61 | }, 62 | LogType::Success => { 63 | println!("{} {}", Green("[+]"), msg); 64 | }, 65 | LogType::Failure => { 66 | println!("{} {}", Red("[!]"), msg); 67 | }, 68 | } 69 | } 70 | } 71 | 72 | /// Print out statistics in a nicely formated static screen 73 | fn pretty_stats(term: &Term, stats: &Statistics, elapsed_time: f64, timeout: u64, corpus: 74 | &Arc, last_cov: f64) { 75 | 76 | term.clear_screen().unwrap(); 77 | term.move_cursor_to(0, 2).unwrap(); 78 | 79 | // Print out error message instead of standard output if the terminal size is too small to 80 | // properly display output 81 | let (x, y) = term.size(); 82 | if x < 25 || y < 95 { 83 | term.write_line(&format!("Increase terminal size to 25:95 (Cur: {}:{})", x, y)).unwrap(); 84 | term.flush().unwrap(); 85 | return; 86 | } 87 | 88 | term.write_line( 89 | &format!("{}", Green("\t\t[ SFUZZ - https://github.com/seal9055/sfuzz ]\n")) 90 | ).unwrap(); 91 | 92 | let duration = Duration::from_secs_f64(elapsed_time); 93 | let elapsed_sec = duration.as_secs() % 60; 94 | let elapsed_min = (duration.as_secs() / 60) % 60; 95 | let elapsed_hr = (duration.as_secs() / 60) / 60; 96 | 97 | // Progress information 98 | term.write_line( 99 | &format!("\t{}\n\t Run time: {:02}:{:02}:{:02}\n\t Total fuzz cases: {:12} \ 100 | \n\t Instrs execd [mil]: {:12}", 101 | Blue("Progression"), 102 | elapsed_hr, elapsed_min, elapsed_sec, 103 | stats.total_cases.to_formatted_string(&Locale::en), 104 | (stats.instr_count / 1_000_000).to_formatted_string(&Locale::en), 105 | ) 106 | ).unwrap(); 107 | 108 | // Results 109 | term.move_cursor_to(54, 4).unwrap(); 110 | term.write_line(&format!("{}", Blue("Overall Results"))).unwrap(); 111 | term.move_cursor_to(54, 5).unwrap(); 112 | term.write_line(&format!(" Unique Crashes: {}", stats.ucrashes)).unwrap(); 113 | term.move_cursor_to(54, 6).unwrap(); 114 | term.write_line(&format!(" Crashes: \t{}", stats.crashes.to_formatted_string(&Locale::en))) 115 | .unwrap(); 116 | term.move_cursor_to(54, 7).unwrap(); 117 | term.write_line(&format!(" Timeouts: \t{}", stats.timeouts.to_formatted_string(&Locale::en))) 118 | .unwrap(); 119 | 120 | // Performance numbers 121 | term.move_cursor_down(2).unwrap(); 122 | term.write_line( 123 | &format!("\t{}\n\t Fuzz cases per second: {:12}\n\t \ 124 | Instrs per second [mil]: {:12}", 125 | Blue("Performance measurements"), 126 | (stats.total_cases / elapsed_time as usize).to_formatted_string(&Locale::en), 127 | (stats.instr_count / 1_000_000 / elapsed_time as u64) 128 | .to_formatted_string(&Locale::en), 129 | ) 130 | ).unwrap(); 131 | 132 | let duration = Duration::from_secs_f64(elapsed_time - last_cov); 133 | let cov_sec = duration.as_secs() % 60; 134 | let cov_min = (duration.as_secs() / 60) % 60; 135 | let cov_hr = (duration.as_secs() / 60) / 60; 136 | 137 | // Coverage 138 | term.move_cursor_to(54, 10).unwrap(); 139 | term.write_line(&format!("{}", Blue("Coverage"))).unwrap(); 140 | term.move_cursor_to(54, 11).unwrap(); 141 | term.write_line(&format!(" Coverage: {}", stats.coverage)).unwrap(); 142 | term.move_cursor_to(54, 12).unwrap(); 143 | term.write_line(&format!(" CmpCov: {}", stats.cmpcov)).unwrap(); 144 | term.move_cursor_to(54, 13).unwrap(); 145 | term.write_line(&format!(" Time since last cov: {:02}:{:02}:{:02}", 146 | cov_hr, cov_min, cov_sec)).unwrap(); 147 | 148 | let run_cases = match RUN_CASES.get().unwrap() { 149 | Some(v) => format!("{}", v), 150 | None => "No Limit".to_string(), 151 | }; 152 | 153 | // Config information 154 | term.move_cursor_down(1).unwrap(); 155 | term.write_line( 156 | &format!("\t{}\n\t Num Threads: {}\n\t Coverage type: {:?}\n\t \ 157 | Snapshots enabled: {}\n\t ASAN: {}\n\t Timeout: {}\n\t CmpCov: {}\n\t Max runs: {}", 158 | Blue("Config"), 159 | NUM_THREADS.get().unwrap(), 160 | COV_METHOD.get().unwrap(), 161 | SNAPSHOT_ADDR.get().unwrap().is_some(), 162 | !NO_PERM_CHECKS.get().unwrap(), 163 | timeout.to_formatted_string(&Locale::en), 164 | CMP_COV.get().unwrap(), 165 | run_cases, 166 | )).unwrap(); 167 | 168 | // Corpus stats 169 | term.move_cursor_to(54, 15).unwrap(); 170 | term.write_line(&format!("{}", Blue("Corpus"))).unwrap(); 171 | term.move_cursor_to(54, 16).unwrap(); 172 | term.write_line(&format!(" Num Entries: {}", corpus.inputs.read().len())).unwrap(); 173 | term.move_cursor_to(54, 17).unwrap(); 174 | term.write_line(&format!(" Avg Instrs per case: {}", 175 | (stats.instr_count / stats.total_cases as u64) 176 | )).unwrap(); 177 | 178 | // Flush buffer and write to terminal 179 | term.flush().unwrap(); 180 | } 181 | 182 | /// Simple debug view of statistics 183 | fn basic_stats(stats: &Statistics, elapsed_time: f64) { 184 | println!( 185 | "[{:8.2}] fuzz cases: {:12} : fcps: {:8} : coverage: {:6} : crashes: {:8} \ 186 | \n\t instr_cnt: {:13} : ips: {:9} : ucrashes: {:6} : timeouts: {:8}", 187 | elapsed_time, 188 | stats.total_cases.to_formatted_string(&Locale::en), 189 | (stats.total_cases / elapsed_time as usize).to_formatted_string(&Locale::en), 190 | stats.coverage, 191 | stats.crashes, 192 | stats.instr_count.to_formatted_string(&Locale::en), 193 | (stats.instr_count / elapsed_time as u64).to_formatted_string(&Locale::en), 194 | stats.ucrashes, 195 | stats.timeouts 196 | ); 197 | } 198 | 199 | fn send_remote(ip: String, port: usize, stats: &Statistics, elapsed_time: f64) { 200 | let request_url = format!("http://{}:{}/stats", ip, port).to_string(); 201 | let client = reqwest::Client::new(); 202 | 203 | let mut map = HashMap::new(); 204 | map.insert("total_cases", stats.total_cases); 205 | map.insert("crashes", stats.crashes); 206 | map.insert("ucrashes", stats.ucrashes); 207 | map.insert("coverage", stats.coverage); 208 | map.insert("cmpcov", stats.cmpcov); 209 | map.insert("instr_count", stats.instr_count as usize); 210 | map.insert("timeouts", stats.timeouts as usize); 211 | map.insert("exec_time", elapsed_time as usize * 1_000); 212 | 213 | let _ = client.post(request_url).json(&map).send(); 214 | } 215 | 216 | /// Wrapper for actual stat-printing functions 217 | pub fn print_stats(term: &Term, stats: &Statistics, elapsed_time: f64, timeout: u64, 218 | corpus: &Arc, last_cov: f64) { 219 | if *DEBUG_PRINT.get().unwrap() { 220 | basic_stats(stats, elapsed_time); 221 | } else { 222 | pretty_stats(term, stats, elapsed_time, timeout, corpus, last_cov); 223 | } 224 | 225 | if let Some(connection_info) = SEND_REMOTE.get().unwrap() { 226 | let mut iter = connection_info.split(":"); 227 | let ip = iter.next().expect("Given ip in incorrect format").to_string(); 228 | let port: usize = iter.next().expect("Given port in incorrect format").parse() 229 | .expect("Given port in incorrect format"); 230 | 231 | assert!(port < 65536, "Invalid port number"); 232 | send_remote(ip, port, stats, elapsed_time); 233 | } 234 | } 235 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(once_cell)] 2 | 3 | use sfuzz::{ 4 | mmu::Perms, 5 | emulator::{Emulator, Register, Fault, ExitType}, 6 | jit::{Jit, LibFuncs}, 7 | pretty_printing::{print_stats, log, LogType}, 8 | Input, Corpus, Statistics, error_exit, load_elf_segments, worker, snapshot, calibrate_seeds, 9 | config::{handle_cli, Cli, SNAPSHOT_ADDR, OVERRIDE_TIMEOUT, NUM_THREADS, MAX_GUEST_ADDR, 10 | RUN_CASES}, 11 | }; 12 | use std::thread; 13 | use std::sync::{Arc, Mutex}; 14 | use std::sync::mpsc::{self, Receiver, Sender}; 15 | use std::time::{Duration, Instant}; 16 | 17 | use byteorder::{LittleEndian, WriteBytesExt}; 18 | use rustc_hash::FxHashMap; 19 | use console::Term; 20 | use clap::Parser; 21 | 22 | /// Hook that makes use of sfuzz's mmu to perform a memory safe malloc operation 23 | fn malloc_hook(emu: &mut Emulator) -> Result<(), Fault> { 24 | let alloc_size = emu.get_reg(Register::A1); 25 | 26 | if let Some(addr) = emu.memory.allocate(alloc_size, Perms::READ | Perms::WRITE) { 27 | emu.set_reg(Register::A0, addr); 28 | emu.set_reg(Register::Pc, emu.get_reg(Register::Ra)); 29 | Ok(()) 30 | } else { 31 | Err(Fault::OOM) 32 | } 33 | } 34 | 35 | /// Hook that makes use of sfuzz's mmu to perform a memory safe calloc operation, pretty much same 36 | /// as malloc apart from how the size is calculated 37 | fn calloc_hook(emu: &mut Emulator) -> Result<(), Fault> { 38 | let nmemb = emu.get_reg(Register::A1); 39 | let size = emu.get_reg(Register::A2); 40 | let alloc_size = size * nmemb; 41 | 42 | if let Some(addr) = emu.memory.allocate(alloc_size, Perms::READ | Perms::WRITE) { 43 | emu.set_reg(Register::A0, addr); 44 | emu.set_reg(Register::Pc, emu.get_reg(Register::Ra)); 45 | Ok(()) 46 | } else { 47 | Err(Fault::OOM) 48 | } 49 | } 50 | 51 | /// Hook that makes use of sfuzz's mmu to perform a memory safe free operation 52 | fn free_hook(emu: &mut Emulator) -> Result<(), Fault> { 53 | let ptr = emu.get_reg(Register::A1); 54 | 55 | emu.memory.free(ptr)?; 56 | emu.set_reg(Register::Pc, emu.get_reg(Register::Ra)); 57 | Ok(()) 58 | } 59 | 60 | /// Inserts various hooks into binary 61 | fn insert_hooks(sym_map: &FxHashMap, emu: &mut Emulator) { 62 | match sym_map.get("_free_r") { 63 | Some(v) => { 64 | log(LogType::Success, "_free_r hooked"); 65 | emu.hooks.insert(*v, free_hook); 66 | }, 67 | None => { 68 | log(LogType::Neutral, "free_r does not exist in target so it could not be hooked"); 69 | } 70 | } 71 | 72 | match sym_map.get("_malloc_r") { 73 | Some(v) => { 74 | log(LogType::Success, "_malloc_r hooked"); 75 | emu.hooks.insert(*v, malloc_hook); 76 | }, 77 | None => { 78 | log(LogType::Neutral, "malloc_r does not exist in target so it could not be hooked"); 79 | } 80 | } 81 | 82 | match sym_map.get("_calloc_r") { 83 | Some(v) => { 84 | log(LogType::Success, "_calloc_r hooked"); 85 | emu.hooks.insert(*v, calloc_hook); 86 | }, 87 | None => { 88 | log(LogType::Neutral, "_calloc_r does not exist in target so it could not be hooked"); 89 | } 90 | } 91 | 92 | // Hooks for strlen and strcmp are required because the default libc variants go out of bounds. 93 | // This is not a security issue since the functions verify that everything is properly aligned, 94 | // but since this fuzzer notices byte level permission violations these are required. 95 | 96 | match sym_map.get("strlen") { 97 | Some(v) => { 98 | log(LogType::Success, "strlen replaced with safe implementation"); 99 | emu.custom_lib.insert(*v, LibFuncs::STRLEN); 100 | }, 101 | None => { 102 | log(LogType::Neutral, "strlen does not exist in target so it could not be hooked"); 103 | } 104 | } 105 | 106 | match sym_map.get("strcmp") { 107 | Some(v) => { 108 | log(LogType::Success, "strcmp replaced with safe implementation"); 109 | emu.custom_lib.insert(*v, LibFuncs::STRCMP); 110 | }, 111 | None => { 112 | log(LogType::Neutral, "strcmp does not exist in target so it could not be hooked"); 113 | } 114 | } 115 | } 116 | 117 | /// Setup the root emulator's segments and stack before cloning the emulator into multiple threads 118 | /// to run multiple emulators at the same time 119 | fn main() -> std::io::Result<()> { 120 | // Thead-shared jit backing 121 | let jit = Arc::new(Jit::new(16 * 1024 * 1024)); 122 | 123 | // Thread-shared mutex that is used to lock JIT-compilation 124 | let prevent_rc: Arc> = Arc::new(Mutex::new(0)); 125 | 126 | // Thread-shared structure that holds fuzz-inputs and coverage information 127 | let mut corpus: Corpus = Corpus::new(16*1024*1024); 128 | 129 | // Each thread gets its own forked emulator. The jit-cache is shared between them however 130 | let mut emu = Emulator::new(MAX_GUEST_ADDR, jit, prevent_rc); 131 | 132 | // Statistics structure. This is kept local to the main thread and updated via message passing 133 | // from the worker threads 134 | let mut stats = Statistics::default(); 135 | 136 | // Messaging objects used to transfer statistics between worker threads and main thread 137 | let (tx, rx): (Sender, Receiver) = mpsc::channel(); 138 | 139 | let term = Term::buffered_stdout(); 140 | term.clear_screen()?; 141 | 142 | // Parse commandline-args and set config variables based on them 143 | let mut args = Cli::parse(); 144 | handle_cli(&mut args); 145 | 146 | // Insert loadable segments into emulator address space and retrieve symbol table information 147 | let sym_map = load_elf_segments(&args.fuzzed_app[0], &mut emu).unwrap_or_else(||{ 148 | error_exit("Unrecoverable error while loading elf segments"); 149 | }); 150 | 151 | // Initialize corpus with files from input directory 152 | let mut w = corpus.inputs.write(); 153 | for filename in std::fs::read_dir(args.input_dir)? { 154 | let filename = filename?.path(); 155 | let data = std::fs::read(filename)?; 156 | 157 | // Add the corpus input to the corpus 158 | w.push(Input::new(data, None)); 159 | } 160 | if w.is_empty() { panic!("Please supply at least 1 initial seed"); } 161 | drop(w); 162 | 163 | // Setup Stack 164 | let stack = emu.allocate(1024 * 1024, Perms::READ | Perms::WRITE) 165 | .expect("Error allocating stack"); 166 | emu.set_reg(Register::Sp, (stack + (1024 * 1024)) - 8); 167 | 168 | // Setup arguments 169 | //let arguments = vec!["test_cases/harder_test\0".to_string(), "fuzz_input\0".to_string()]; 170 | let argv: Vec = args.fuzzed_app.iter().map(|e| { 171 | let addr = emu.allocate(64, Perms::READ | Perms::WRITE) 172 | .expect("Allocating an argument failed"); 173 | emu.memory.write_mem(addr, e.as_bytes(), e.len()).expect("Writing to argv[0] failed"); 174 | addr 175 | }).collect(); 176 | 177 | // Macro to push 64-bit integers onto the stack 178 | macro_rules! push { 179 | ($expr:expr) => { 180 | let sp = emu.get_reg(Register::Sp) - 8; 181 | let mut wtr = vec![]; 182 | wtr.write_u64::($expr as u64)?; 183 | emu.memory.write_mem(sp, &wtr, 8).unwrap(); 184 | emu.set_reg(Register::Sp, sp); 185 | } 186 | } 187 | 188 | // Setup argc, argv & envp 189 | push!(0u64); // Auxp 190 | push!(0u64); // Envp 191 | push!(0u64); // Null-terminate Argv 192 | for arg in argv.iter().rev() { 193 | push!(*arg); 194 | } 195 | push!(argv.len()); // Argc 196 | 197 | // Insert various hooks into binary 198 | insert_hooks(&sym_map, &mut emu); 199 | 200 | // Setup snapshot fuzzing at a point before the fuzz-input is read in 201 | if let Some(addr) = SNAPSHOT_ADDR.get().unwrap() { 202 | println!("Activated snapshot-based fuzzing"); 203 | 204 | // Insert snapshot fuzzer exit condition 205 | emu.exit_conds.insert(*addr, ExitType::Snapshot); 206 | 207 | // Snapshot the emulator 208 | snapshot(&mut emu, &corpus); 209 | } 210 | 211 | // Calibrate the emulator for the timeout. 212 | // Alternatively configs can be used to override automatically determined timeout 213 | emu.timeout = calibrate_seeds(&mut emu, &corpus); 214 | if let Some(v) = OVERRIDE_TIMEOUT.get().unwrap() { 215 | emu.timeout = *v; 216 | } 217 | 218 | // Reset coverage collected during initial callibration so it is in a default state once 219 | // fuzzing actually starts. This also removes the coverage generated while capturing the 220 | // initial snapshot 221 | corpus.reset_coverage(); 222 | 223 | let emu = Arc::new(emu); 224 | let corpus = Arc::new(corpus); 225 | 226 | // Spawn worker threads to do the actual fuzzing 227 | for thr_id in 0..*NUM_THREADS.get().unwrap() { 228 | let emu_cp = emu.fork(); 229 | let corpus = corpus.clone(); 230 | let tx = tx.clone(); 231 | 232 | thread::spawn(move || worker(thr_id, emu_cp, corpus, tx)); 233 | } 234 | 235 | // Continuous statistic tracking via message passing in main thread 236 | let start = Instant::now(); 237 | let mut last_time = Instant::now(); 238 | let mut last_cov_event: f64 = 0.0; 239 | 240 | // Sleep for short duration on startup before printing statistics, otherwise elapsed time might 241 | // be 0, leading to a crash while printing statistics 242 | thread::sleep(Duration::from_millis(1000)); 243 | 244 | // Update stats structure whenever a thread sends a new message 245 | for received in rx { 246 | let elapsed_time = start.elapsed().as_secs_f64(); 247 | 248 | 249 | // Check if we got new coverage 250 | if received.coverage != 0 || received.cmpcov != 0 { 251 | last_cov_event = elapsed_time; 252 | } 253 | 254 | stats.coverage += received.coverage; 255 | stats.cmpcov += received.cmpcov; 256 | stats.total_cases += received.total_cases; 257 | stats.crashes += received.crashes; 258 | stats.ucrashes += received.ucrashes; 259 | stats.instr_count += received.instr_count; 260 | stats.timeouts += received.timeouts; 261 | 262 | // Print out updated statistics every second 263 | if last_time.elapsed() >= Duration::from_millis(500) { 264 | print_stats(&term, &stats, elapsed_time, emu.timeout, &corpus, last_cov_event); 265 | last_time = Instant::now(); 266 | } 267 | 268 | if let Some(max_cases) = RUN_CASES.get().unwrap() { 269 | if stats.total_cases >= *max_cases { 270 | error_exit("Fuzzer reached specified maximum number of total cases"); 271 | 272 | } 273 | } 274 | } 275 | 276 | Ok(()) 277 | } 278 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | use crate::error_exit; 2 | 3 | use std::sync::OnceLock; 4 | 5 | use clap::Parser; 6 | use parse_int::parse; 7 | 8 | /// Method used to track coverage, currently only Edge and Block coverage is implemented 9 | pub static COV_METHOD: OnceLock = OnceLock::new(); 10 | 11 | /// Address at which the fuzzer attempts to create a snapshot once reached 12 | pub static SNAPSHOT_ADDR: OnceLock> = OnceLock::new(); 13 | 14 | /// Number of cores to run the fuzzer with 15 | pub static NUM_THREADS: OnceLock = OnceLock::new(); 16 | 17 | /// Path to directory to which fuzzer-outputs are saved 18 | pub static OUTPUT_DIR: OnceLock = OnceLock::new(); 19 | 20 | /// File that contains the user-supplied dictionary 21 | pub static DICT_FILE: OnceLock> = OnceLock::new(); 22 | 23 | /// Input provided as argument to the target being fuzzed 24 | pub static FUZZ_INPUT: OnceLock = OnceLock::new(); 25 | 26 | /// Toggle-able permission checks. Should be left on, except for very special cases/debugging 27 | pub static NO_PERM_CHECKS: OnceLock = OnceLock::new(); 28 | 29 | /// Additional information is printed out, alongside rolling statistics. Some parts of this only 30 | /// work while running single-threaded 31 | pub static DEBUG_PRINT: OnceLock = OnceLock::new(); 32 | 33 | /// In addition to the default printouts, the fuzzer will now also send the data to a remote server. 34 | /// Implemented to interact with api of "https://github.com/rsalz47/cs326-final-gimel" 35 | pub static SEND_REMOTE: OnceLock> = OnceLock::new(); 36 | 37 | /// Separates branch-if-equal comparisons into multiple separate compares that benefit from 38 | /// coverage tracking so larger magic numbers can still be found through fuzzing 39 | pub static CMP_COV: OnceLock = OnceLock::new(); 40 | 41 | /// Manually override the automatically calibrated timeout 42 | pub static OVERRIDE_TIMEOUT: OnceLock> = OnceLock::new(); 43 | 44 | /// Collect a full register trace of program execution, for large programs, it can take several 45 | /// hours to write out a single case, only enable when debugging the JIT. Only works when fuzzer is 46 | /// being run single-threaded 47 | pub static FULL_TRACE: OnceLock = OnceLock::new(); 48 | 49 | /// Amount of cases that will be run before the fuzzer automatically shuts down 50 | pub static RUN_CASES: OnceLock> = OnceLock::new(); 51 | 52 | /// Size of memory space allocated for each thread's virtual address space 53 | pub const MAX_GUEST_ADDR: usize = 64 * 1024 * 1024; 54 | 55 | #[derive(Eq, PartialEq, Copy, Clone, Debug)] 56 | pub enum CovMethod { 57 | /// Don't track coverage 58 | None, 59 | 60 | /// Track block level coverage 61 | Block, 62 | 63 | /// Track edge level coverage 64 | Edge, 65 | 66 | /// Track edge level coverage alongside a call-stack hash to get more fine-grained coverage 67 | /// results 68 | CallStack, 69 | } 70 | 71 | /// Used by clap to parse command-line arguments 72 | #[derive(Debug, Parser)] 73 | #[clap(author = "seal9055", version, about = "Coverage-guided emulation based fuzzer")] 74 | #[clap(override_usage = "sfuzz [OPTION] -- /path/to/fuzzed_app [ ... ] (use `@@` to specify \ 75 | position of fuzz-input in target-argv)\n\n ex: sfuzz -i in -o out -n 16 -- \ 76 | ./test_cases/test @@")] 77 | pub struct Cli { 78 | #[clap(short, value_name = "DIR", forbid_empty_values = true, display_order = 1)] 79 | /// - Input directory that should contain the initial seed files 80 | pub input_dir: String, 81 | 82 | #[clap(short, value_name = "DIR", forbid_empty_values = true, display_order = 2)] 83 | /// - Output directory that will be used to eg. save crashes 84 | pub output_dir: String, 85 | 86 | #[clap(short = 'V', takes_value = false)] 87 | /// - Print version information 88 | pub version: bool, 89 | 90 | #[clap(short = 'h', takes_value = false)] 91 | /// - Print help information 92 | pub help: bool, 93 | 94 | #[clap(default_value_t=1, short, help_heading = "CONFIG")] 95 | /// - The number of threads to run this fuzzer with 96 | pub num_threads: usize, 97 | 98 | #[clap(short = 'p', help_heading = "CONFIG", takes_value = false)] 99 | /// - Disables permission checking, highly discouraged since it will cause the fuzzer itself to 100 | /// segfault when the target crashes due to being run in an emulator 101 | pub no_perm_checks: bool, 102 | 103 | #[clap(short = 'C', help_heading = "CONFIG", takes_value = false)] 104 | /// - Disables CmpCov, results in slight performance increase, but makes it almost impossible 105 | /// for the fuzzer to get past large magic value comparisons 106 | pub no_cmp_cov: bool, 107 | 108 | #[clap(short = 'e', help_heading = "CONFIG")] 109 | /// - File extension for the fuzz test input file if the target requires it 110 | pub extension: Option, 111 | 112 | #[clap(short = 'D', help_heading = "CONFIG", takes_value = false)] 113 | /// - Enable a rolling debug-print and information on which functions are lifted instead of the 114 | /// default print-window 115 | pub debug_print: bool, 116 | 117 | #[clap(short = 'k', help_heading = "CONFIG", takes_value = true)] 118 | /// - In addition to displaying the data on the screen, also send it to a remote api. Provide 119 | /// port and ip to send it to in format "127.0.0.1:9055". Implemented to interact with api of 120 | /// https://github.com/rsalz47/cs326-final-gimel 121 | pub send_remote: Option, 122 | 123 | #[clap(short = 's', help_heading = "CONFIG")] 124 | /// - Take a snapshot of the target at specified address and launch future fuzz-cases off of this 125 | /// snapshot 126 | pub snapshot: Option, 127 | 128 | #[clap(short = 't', help_heading = "CONFIG")] 129 | /// - Override the timeout that is otherwise dynamically set during calibration phase 130 | pub override_timeout: Option, 131 | 132 | #[clap(short = 'r', help_heading = "CONFIG")] 133 | /// - Optionally set the amount of cases to be run before the fuzzer shuts down 134 | pub run_cases: Option, 135 | 136 | #[clap(short = 'f', help_heading = "CONFIG", takes_value = false)] 137 | /// - Collect a full register trace of program execution. Only enable while debugging, majorly 138 | /// slows down performance. Only works when fuzzer is run single-threaded 139 | pub full_trace: bool, 140 | 141 | #[clap(short = 'd', value_name = "DICT", help_heading = "CONFIG", forbid_empty_values = true)] 142 | /// - Optionally supply a new-line separated list of inputs that will be mutated into the 143 | /// fuzz-inputs 144 | pub dictionary: Option, 145 | 146 | #[clap(short = 'c', help_heading = "CONFIG", default_value = "edge")] 147 | /// - Coverage method, currently supports `edge`, `block`, and `call-stack` based coverage 148 | pub cov_method: String, 149 | 150 | #[clap(last = true)] 151 | /// The target to be fuzzed alongside its arguments 152 | pub fuzzed_app: Vec, 153 | } 154 | 155 | /// Initialize configuration variables based on passed in commandline arguments, and verify that 156 | /// the user properly setup their fuzz-case 157 | pub fn handle_cli(args: &mut Cli) { 158 | NUM_THREADS.set(args.num_threads).unwrap(); 159 | NO_PERM_CHECKS.set(args.no_perm_checks).unwrap(); 160 | DEBUG_PRINT.set(args.debug_print).unwrap(); 161 | SEND_REMOTE.set(args.send_remote.clone()).unwrap(); 162 | OVERRIDE_TIMEOUT.set(args.override_timeout).unwrap(); 163 | CMP_COV.set(!args.no_cmp_cov).unwrap(); 164 | 165 | if args.fuzzed_app.is_empty() { 166 | error_exit("You need to specify the target to be fuzzed"); 167 | } 168 | 169 | // Verify that the input and output directories are valid 170 | if !std::path::Path::new(&args.input_dir).is_dir() { 171 | error_exit("You need to specify a valid input directory"); 172 | } 173 | if !std::path::Path::new(&args.output_dir).is_dir() { 174 | error_exit("You need to specify a valid output directory"); 175 | } 176 | OUTPUT_DIR.set(args.output_dir.clone()).unwrap(); 177 | 178 | if let Some(dict) = &args.dictionary { 179 | if !std::path::Path::new(&dict).is_file() { 180 | error_exit("You need to specify a valid dictionary file"); 181 | } 182 | DICT_FILE.set(Some(dict.to_string())).unwrap(); 183 | } else { 184 | DICT_FILE.set(None).unwrap(); 185 | } 186 | 187 | // Create the directory to save crashes too 188 | let mut crash_dir = args.output_dir.clone(); 189 | crash_dir.push_str("/crashes"); 190 | std::fs::create_dir_all(crash_dir).unwrap(); 191 | 192 | // Set the fuzz-input. If the user specified an extension, add that too 193 | FUZZ_INPUT.set( 194 | if let Some(ext) = &args.extension { 195 | format!("fuzz_input.{}\0", ext) 196 | } else { 197 | "fuzz_input\0".to_string() 198 | } 199 | ).unwrap(); 200 | 201 | // Verify that the user supplied `@@` and use it to setup the fuzz-input's argv 202 | let index = args.fuzzed_app.iter().position(|e| e == "@@").unwrap_or_else(|| { 203 | error_exit("You need to specify how the fuzz-case input files should be passed in. This \ 204 | can be done using the `@@` flag as shown in the example under `Usage`."); 205 | }); 206 | args.fuzzed_app[index] = FUZZ_INPUT.get().unwrap().to_string(); 207 | 208 | // Set snapshot address if requested 209 | if let Some(ss) = &args.snapshot { 210 | let num_repr = parse::(&ss).unwrap(); 211 | SNAPSHOT_ADDR.set(Some(num_repr)).unwrap(); 212 | } else { 213 | SNAPSHOT_ADDR.set(None).unwrap(); 214 | } 215 | 216 | // Set max number of cases if requested 217 | if let Some(runs) = &args.run_cases { 218 | let num_repr = parse::(&runs).unwrap(); 219 | RUN_CASES.set(Some(num_repr)).unwrap(); 220 | } else { 221 | RUN_CASES.set(None).unwrap(); 222 | } 223 | 224 | // Set the coverage collection method 225 | match args.cov_method.as_str() { 226 | "edge" => { 227 | COV_METHOD.set(CovMethod::Edge).unwrap(); 228 | }, 229 | "block" => { 230 | COV_METHOD.set(CovMethod::Block).unwrap(); 231 | }, 232 | "call-stack" => { 233 | COV_METHOD.set(CovMethod::CallStack).unwrap(); 234 | }, 235 | _ => { 236 | error_exit("You're specified coverage method is not supported, please chose `edge`, \ 237 | `block`, or `call-stack`") 238 | }, 239 | } 240 | 241 | // Trace mode 242 | if args.full_trace == true && args.num_threads != 1 { 243 | error_exit("Full Trace mode only works when running single-threaded"); 244 | } else { 245 | FULL_TRACE.set(args.full_trace).unwrap(); 246 | } 247 | 248 | if false { 249 | println!("cov_method: {:?}", COV_METHOD); 250 | println!("snapshot_addr: {:?}", SNAPSHOT_ADDR); 251 | println!("num_threads: {:?}", NUM_THREADS); 252 | println!("output_dir: {:?}", OUTPUT_DIR); 253 | println!("fuzz_input: {:?}", FUZZ_INPUT); 254 | println!("no_perm_checks: {:?}", NO_PERM_CHECKS); 255 | println!("debug_print: {:?}", DEBUG_PRINT); 256 | println!("send_remote: {:?}", SEND_REMOTE); 257 | println!("override_timeout: {:?}", OVERRIDE_TIMEOUT); 258 | println!("full_trace: {:?}", FULL_TRACE); 259 | } 260 | } 261 | 262 | -------------------------------------------------------------------------------- /src/mutator.rs: -------------------------------------------------------------------------------- 1 | use crate::config::DICT_FILE; 2 | 3 | use rand_xoshiro::rand_core::RngCore; 4 | use rand_xoshiro::Xoroshiro64Star; 5 | use rand_xoshiro::rand_core::SeedableRng; 6 | 7 | const MUTATE_SIMPLE: bool = false; 8 | 9 | #[derive(Copy, Clone, Debug)] 10 | pub enum Mutation { 11 | ByteReplace, 12 | BitFlip, 13 | MagicNum, 14 | SimpleArithmetic, 15 | RemoveBlock, 16 | DupBlock, 17 | Resize, 18 | Dictionary, 19 | } 20 | 21 | #[derive(Debug, Clone)] 22 | pub struct Mutator { 23 | /// Fast Rng 24 | rng: Xoroshiro64Star, 25 | 26 | /// Available mutation strategies 27 | mutation_strats: Vec, 28 | 29 | /// Count-down to havoc mode 30 | havoc_counter: usize, 31 | 32 | dictionary: Option>, 33 | } 34 | 35 | use std::fs::File; 36 | use std::io::{self, BufRead}; 37 | use std::path::Path; 38 | 39 | fn read_lines

(file_name: P) -> io::Result>> 40 | where P: AsRef, { 41 | let file = File::open(file_name)?; 42 | Ok(io::BufReader::new(file).lines()) 43 | } 44 | 45 | pub fn parse_dict(file_name: &str) -> Vec { 46 | let mut dict: Vec = Vec::new(); 47 | if let Ok(lines) = read_lines(file_name) { 48 | for line in lines { 49 | dict.push(line.unwrap()); 50 | } 51 | } 52 | dict 53 | } 54 | 55 | impl Mutator { 56 | pub fn default() -> Self { 57 | // Initialize the individual strategies for the mutation_strats array alongside their 58 | // weight. This creates a larger array since weight is created by inserting new 59 | // elements into the array, but I believe that this should be much faster than 60 | // alternatives 61 | let mut mut_strats: Vec = Vec::new(); 62 | mut_strats.append(&mut (0..1000).map(|_| { Mutation::ByteReplace }).collect()); 63 | mut_strats.append(&mut (0..1000).map(|_| { Mutation::BitFlip }).collect()); 64 | mut_strats.append(&mut (0..200).map(|_| { Mutation::MagicNum }).collect()); 65 | mut_strats.append(&mut (0..500).map(|_| { Mutation::SimpleArithmetic }).collect()); 66 | mut_strats.append(&mut (0..30).map(|_| { Mutation::RemoveBlock }).collect()); 67 | mut_strats.append(&mut (0..30).map(|_| { Mutation::DupBlock }).collect()); 68 | mut_strats.append(&mut (0..10).map(|_| { Mutation::Resize }).collect()); 69 | 70 | // If the user specified a dictionary to be used while fuzzing, parse it and add dictionary 71 | // replacements to the fuzz methods 72 | let dict_vec = if let Some(dict) = DICT_FILE.get().unwrap() { 73 | mut_strats.append(&mut (0..30).map(|_| { Mutation::Dictionary }).collect()); 74 | Some(parse_dict(dict)) 75 | } else { 76 | None 77 | }; 78 | 79 | Self { 80 | rng: Xoroshiro64Star::seed_from_u64(0), 81 | mutation_strats: mut_strats, 82 | havoc_counter: 0, 83 | dictionary: dict_vec, 84 | } 85 | } 86 | 87 | /// Return 2 random 32-bit unsigned integers 88 | fn get2_rand(&mut self) -> (usize, usize) { 89 | let tmp = self.rng.next_u64(); 90 | ((tmp & 0xffffffff) as usize, (tmp >> 32) as usize) 91 | } 92 | 93 | /// Chose a set of random bytes and mutate them. Prefer small corruption over larger one's 94 | fn byte_replace(&mut self, input: &mut [u8]) -> Result<(),()> { 95 | let input_length = input.len(); 96 | let (r1, r2) = self.get2_rand(); 97 | 98 | if (r1 % 1000) < 950 { 99 | // Small corruption, 0-32 bytes 100 | for _ in 1..(r2 % 32) { 101 | let (r1, r2) = self.get2_rand(); 102 | input[(r1 % input_length)] = r2 as u8; 103 | } 104 | } else { 105 | // Larger corruption, 64-128 bytes 106 | for _ in 64..(64 + (r2 % 64)) { 107 | let (r1, r2) = self.get2_rand(); 108 | input[(r1 % input_length)] = r2 as u8; 109 | } 110 | } 111 | Ok(()) 112 | } 113 | 114 | /// Flip some random bits in the input 115 | fn bit_flip(&mut self, input: &mut [u8]) -> Result<(),()> { 116 | let input_length = input.len(); 117 | let (r1, r2) = self.get2_rand(); 118 | 119 | if (r1 % 1000) < 950 { 120 | // Small corruption, flip up to 32 bits 121 | for _ in 1..(r2 % 32) { 122 | let (r1, r2) = self.get2_rand(); 123 | let bit_idx = r1 % 8; 124 | input[(r2 % input_length)] ^= 1 << bit_idx; 125 | } 126 | } else { 127 | // Larger corruption, flip 64-128 bits 128 | for _ in 64..(64 + (r2 % 64)) { 129 | let (r1, r2) = self.get2_rand(); 130 | let bit_idx = r1 % 8; 131 | input[(r2 % input_length)] ^= 1 << bit_idx; 132 | } 133 | } 134 | Ok(()) 135 | } 136 | 137 | /// Replace 1/2/4/8 bytes in the program with values that are likely to cause bugs (eg. 0 or 138 | /// INT_MAX) 139 | fn magic_nums(&mut self, input: &mut Vec) -> Result<(),()> { 140 | // Just return if input is too small to operate on in a useful manner 141 | if input.len() < 32 { return Err(()); } 142 | 143 | let (r1, r2) = self.get2_rand(); 144 | let splice_start = r1 % (input.len() - 8); 145 | let magic_nums: Vec> = vec![ 146 | vec![0x0], vec![0x0; 2], vec![0x0; 4], vec![0x0; 8], 147 | vec![0xff], vec![0xff; 2], vec![0xff; 4], vec![0xff; 8], 148 | vec![0x7f], vec![0x7f, 0xff], vec![0x7f, 0xff, 0xff, 0xff], 149 | vec![0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], vec![0x01, 0x0, 0x0], 150 | ]; 151 | 152 | input.splice(splice_start.., magic_nums[r2 % magic_nums.len()].iter().cloned()); 153 | Ok(()) 154 | } 155 | 156 | /// Add or subtract some bytes to attempt to cause an integer over/underflow 157 | fn simple_arithmetic(&mut self, input: &mut [u8]) -> Result<(),()> { 158 | let input_length = input.len(); 159 | 160 | let (r1, r2) = self.get2_rand(); 161 | 162 | if (r1 % 1000) < 950 { 163 | // Small corruption, 0-32 bytes, 50% chance to either add or sub a value 0-32 164 | for i in 1..(r2 % 32) { 165 | let (r1, r2) = self.get2_rand(); 166 | if i & 1 == 0 { 167 | input[(r1 % input_length)] = 168 | input[(r1 % input_length)].wrapping_add((r2 % 32) as u8); 169 | } else { 170 | input[(r1 % input_length)] = 171 | input[(r1 % input_length)].wrapping_sub((r2 % 32) as u8); 172 | } 173 | } 174 | } else { 175 | // Larger corruption, 64-128 bytes, 50% chance to either add or sub a value 0-32 176 | for i in 64..(64 + (r2 % 64)) { 177 | let (r1, r2) = self.get2_rand(); 178 | if i & 1 == 0 { 179 | input[(r1 % input_length)] = 180 | input[(r1 % input_length)].wrapping_add((r2 % 32) as u8); 181 | } else { 182 | input[(r1 % input_length)] = 183 | input[(r1 % input_length)].wrapping_sub((r2 % 32) as u8); 184 | } 185 | } 186 | } 187 | Ok(()) 188 | } 189 | 190 | /// Remove part of the input 191 | fn remove_block(&mut self, input: &mut Vec) -> Result<(),()> { 192 | let input_length = input.len(); 193 | 194 | // Just return if input is already extremely small 195 | if input_length < 32 { return Err(()); } 196 | 197 | let (r1, r2) = self.get2_rand(); 198 | 199 | let start = r1 % input_length; 200 | let end = start + core::cmp::min(input_length - start, r2 % 512); 201 | 202 | // Refuse to perform this mutation if input would end up too small 203 | if (input_length - (end - start)) < 32 { return Err(()); } 204 | 205 | input.drain(start..end); 206 | Ok(()) 207 | } 208 | 209 | /// Take a random block out of the input and duplicate it into a different location of the 210 | /// input 211 | fn duplicate_block(&mut self, input: &mut Vec) -> Result<(),()> { 212 | let input_length = input.len(); 213 | 214 | // Just return if input is too small to operate on in a useful manner 215 | if input_length < 32 { return Err(()); } 216 | 217 | let (r1, r2) = self.get2_rand(); 218 | 219 | // Calculate a random range within the input 220 | let start = r1 % input_length; 221 | let end = start + core::cmp::min(input_length - start, r2 % 128); 222 | 223 | // Chose random location to insert dup'd block into 224 | let idx = self.rng.next_u32() as usize % input_length; 225 | 226 | // Extract block to be dup'd and split the input at a random location 227 | let mut v = input[start..end].to_vec(); 228 | let mut p2 = input.split_off(idx); 229 | 230 | // Rebuild the input [(0..idx) + v + (idx..end)] 231 | input.append(&mut v); 232 | input.append(&mut p2); 233 | Ok(()) 234 | } 235 | 236 | /// Resize the input, can both truncate, or add random bytes into the middle of an input 237 | fn resize(&mut self, input: &mut Vec) -> Result<(),()> { 238 | let input_length = input.len(); 239 | let (r1, r2) = self.get2_rand(); 240 | 241 | if r1 & 1 == 0 { // Truncate 242 | // Just return if input is too small to operate on in a useful manner 243 | if input_length < 32 { return Err(()); } 244 | let trunc_val = (r2 % (input_length / 2)) % 512; 245 | 246 | // Refuse to perform this mutation if input would end up too small 247 | if trunc_val < 32 { return Err(()); } 248 | 249 | input.truncate(trunc_val); 250 | } else { // Increase size 251 | let size = if input_length < 32 { 252 | 32 253 | } else { 254 | (r2 % (input_length / 2)) % 512 255 | }; 256 | 257 | // Create a set of random bytes that we can append to the input 258 | let rand_bytes = (0..(size / 8)).map(|_| { 259 | self.rng.next_u64() 260 | }).collect::>(); 261 | 262 | // Transform these bytes from Vec to Vec 263 | let mut as_u8: Vec = unsafe { 264 | std::slice::from_raw_parts( 265 | rand_bytes.as_ptr() as *const u8, 266 | rand_bytes.len() * std::mem::size_of::(), 267 | ).to_vec() 268 | }; 269 | input.append(&mut as_u8); 270 | } 271 | Ok(()) 272 | } 273 | 274 | /// Replace some of the input bytes with a provided dictionary entry 275 | fn dict_replace(&mut self, input: &mut Vec) -> Result<(), ()> { 276 | let dict_idx = self.rng.next_u32() as usize % self.dictionary.as_ref().unwrap().len(); 277 | let entry = self.dictionary.as_ref().unwrap()[dict_idx].as_bytes(); 278 | 279 | if input.len() <= entry.len() { return Err(()); } 280 | let input_idx = self.rng.next_u64() as usize % (input.len() - entry.len()); 281 | for (i, j) in (input_idx..(input_idx + entry.len())).enumerate() { 282 | input[j] = entry[i]; 283 | } 284 | 285 | Ok(()) 286 | } 287 | 288 | /// Chose a random mutation strategy 289 | fn chose_mut(&mut self) -> Mutation { 290 | let tmp_rand = self.rng.next_u32() as usize % self.mutation_strats.len(); 291 | self.mutation_strats[tmp_rand] 292 | } 293 | 294 | /// Apply various implemented mutation strategies. Every 100 cases, use 'havoc-mode' which 295 | /// applies multiple strategies at the same time 296 | fn mutate_complex(&mut self, input: &mut Vec) { 297 | let mut muts = Vec::new(); 298 | self.havoc_counter += 1; 299 | 300 | // Usually only perform 1 mutation, but if havoc is invoked, we queue up multiple 301 | // mutations onto the input in this fuzz-case 302 | if self.havoc_counter == 100 { 303 | self.havoc_counter = 0; 304 | for _ in 1..(self.rng.next_u32() % 8) { 305 | muts.push(self.chose_mut()); 306 | } 307 | } else { 308 | muts.push(self.chose_mut()); 309 | } 310 | 311 | for mutation in &mut muts { 312 | 'inner: loop { 313 | let res = match mutation { 314 | Mutation::ByteReplace => self.byte_replace(input), 315 | Mutation::BitFlip => self.bit_flip(input), 316 | Mutation::MagicNum => self.magic_nums(input), 317 | Mutation::SimpleArithmetic => self.simple_arithmetic(input), 318 | Mutation::RemoveBlock => self.remove_block(input), 319 | Mutation::DupBlock => self.duplicate_block(input), 320 | Mutation::Resize => self.resize(input), 321 | Mutation::Dictionary => self.dict_replace(input), 322 | }; 323 | 324 | // If the chosen strategy failed, chose a different mutation and rerun the 325 | // mutator, otherwise break out of the inner loop to keep the mutation result 326 | if res.is_ok() { 327 | break 'inner; 328 | } else { 329 | *mutation = self.chose_mut(); 330 | } 331 | } 332 | } 333 | } 334 | 335 | /// Perform extremely simple/fast mutations 336 | fn mutate_simple(&mut self, input: &mut [u8]) { 337 | let input_length = input.len(); 338 | 339 | for _ in 0..(self.rng.next_u32() % 8) { 340 | let (r1, r2) = self.get2_rand(); 341 | input[(r1 % input_length)] = r2 as u8; 342 | } 343 | } 344 | 345 | /// Start the mutation process of an input 346 | pub fn mutate(&mut self, input: &mut Vec) { 347 | if MUTATE_SIMPLE { 348 | self.mutate_simple(input); 349 | } else { 350 | self.mutate_complex(input); 351 | } 352 | } 353 | } 354 | 355 | -------------------------------------------------------------------------------- /docs/code_gen.md: -------------------------------------------------------------------------------- 1 | # Code Generation 2 | 3 | 4 | **Small Note** 5 | ``` 6 | This was by far the most time-consuming and difficult aspect of this entire project. I initially 7 | spent about 3 months trying to format this like a proper optimizing compiler might. This included 8 | lifting the code to an intermediate representation, transforming it to single-static-assignment form, 9 | performing register allocation and finally compiling it to x86_64 machine code. I implemented all of 10 | these, but in the end I decided to fall back to a simpler approach due to multiple reasons I outline 11 | below. I still believe that this approach is possible though and holds decent performance gains, so 12 | I will most likely reattempt this in the future. 13 | ``` 14 | 15 | #### Overview 16 | 17 | This emulator makes use of a custom just-in-time compiler for all of its execution. The code generation is a multi-step process that leads to a 20-50x performance increase over pure emulation. 18 | 19 | Once execution is started, each individual emulator thread has the ability to compile new code. Whenever the emulator runs into a function that we have not yet compiled it invokes a lock on the JIT code backend and attempts to compile the entire function into the JIT backend before resuming execution. This lock only stops other threads from adding new code to the JIT-backing during compilation without stopping them from using the JIT-backing. This means that one thread compiling new code has basically no impact on any of the other threads, making this lock mostly free while providing 1 uniform memory region that contains all of the compiled code for all threads. Once the compilation is completed, the mutex is unlocked and the addresses of the newly generated code are added to the JIT lookup table. At this point, the compiling thread can resume fuzzer execution and all other threads can access this newly compiled code via the translation table. 20 | 21 | Most of the code pertaining to code-generation can be found in [jit.rs](https://github.com/seal9055/sfuzz/blob/main/src/jit.rs), [irgraph.rs](https://github.com/seal9055/sfuzz/blob/main/src/irgraph.rs), and [emulator.rs](https://github.com/seal9055/sfuzz/blob/main/src/emulator.rs). More detailed descriptions of some of these processes are provided below. 22 | 23 | #### Lifting a Function to Custom IR 24 | The first step of actual code generation is to lift the entire function into an intermediate representation. The size of the function is determined during the initialization phase when first loading the target. This is done by parsing the elf metadata and setting up a hashmap mapping function start addresses to their sizes.
25 | 26 | The IR-lifting just iterates through the original instructions and creates an IR instruction based on the original instruction using a large switch statement. The below example imitates how the intermediate representation may look like for a very minimal function that pretty much just performs a branch based on a comparison in the first block. 27 | ``` 28 | Label @ 0x1000 29 | Label @ 0x1000 30 | 0x001000 A0 = 0x14 31 | 0x001004 A1 = 0xA 32 | 0x001008 if A0 == A1 (0x100C, 0x1028) 33 | 34 | Label @ 0x100C 35 | 0x00100C A2 = A0 + A1 36 | 0x001010 A3 = 0x1 37 | 0x001014 Jmp 0x1018 38 | 39 | Label @ 0x1018 40 | 0x001024 Jmp 0x1034 41 | 42 | Label @ 0x1028 43 | 0x001028 A2 = A0 - A 44 | 0x00102C A3 = 0x2 45 | 0x001030 Jmp 0x1018 46 | 47 | Label @ 0x1034 48 | 0x001034 Ret 49 | ``` 50 |

F1

51 | 52 | At this point, I attempted a couple of different approaches before settling on the current code generation procedure. My first approach was to first transform the above IR code into single static assignment form. This allows for stronger optimizations and is a very popular choice for modern compilers. Next, I used a linear scan register allocator to assign registers to the code and compile the final code. 53 | 54 | This approach resulted in multiple issues that led to me eventually abandoning it in favor of the current implementation. Some of the reasons as to why I changed my approach are listed below. 55 | 56 | 1. **Debugging** - Since this is meant to be a fuzzer, being able to properly debug crashes, or at least 57 | print out register states is important. After doing register allocation, determining which x86 register is allocated to each RISCV register at runtime to print out useful information is very difficult. 58 | 59 | 2. **Extendability** - When it comes to register allocation, a lot of the backend features (eg. A0-A7 for 60 | arguments, or syscall number in A7) are architecture-dependent. This makes it a lot harder to write the backend in a way that can be extended with new architectures by just adding a front end. 61 | 62 | 3. **Performance** - In theory, the ssa/regalloc approach will lead to better final code. In this case, 63 | however, since it's a binary translator, a lot of registers such as function arguments or stack pointers have to be hardcoded to x86 registers since we don't have important information such as the number of arguments when translating binary -> binary. This in addition to the meta-data required by the JIT (pointer to memory, permissions, JIT lookup table, register spill-stack, etc) led to most x86 registers being in use, leaving only 4 x86 registers available for the actual register allocation in my approach. This could obviously be greatly improved upon, but this would require a lot more time to achieve comparable results. 64 | 65 | 4. **Complexity** - This approach added a lot of extra complexity to the project which caused major 66 | issues and would have delayed the completion of this project by several months to debug all of these issues 67 | 68 | Nevertheless, I did implement both ssa-generation and register allocation before eventually abandoning it, and since it was a very large part of my time investment I decided to still keep notes on it. The implementation details are listed in the below 'Optimizing Compiler' section, and the final code for this approach can be viewed at commit 7d129ab847d171b66901f4c936dd2ad5c5a1b79a on the Github repository. 69 | 70 | #### Compiling to x86 Machine Code 71 | 72 | This phase pretty much just loops through all the previously lifted IR instructions and compiles them to x86 code. Whenever a syscall or a hooked function is encountered, appropriate instructions are generated to leave the JIT and handle the procedure. All registers are currently memory-mapped within the emulator. While this would have a very significant performance impact for normal programs, in the case of a fuzzer I can use the free'd registers up through this approach to point to other important frequently accessed fields such as dirty lists or instruction counters, so in the end, the performance overhead incurred by this is negligible. 73 | 74 | In addition to the previously mentioned actual code compilation, a lot of other very important steps are taken at this point. Mainly, the RISC-V to x86 translation table is populated, and instructions to instrument the code for fuzzing are inserted to enable snapshotting, coverage, hooks and proper permission checks. 75 |
76 | 77 | ## Optimizing Compiler 78 | 79 | #### Generate SSA-form for the IR 80 | 81 | The next step is to lift the previously generated code into single static assignment form. In this 82 | form each variable is assigned exactly once. This is where the second field of each register comes 83 | in. It is basically a counter for each register used to "create" a new register each time the 84 | register is redefined. This creates some problems if a join point after a branch needs to make use 85 | of a register that differs depending on which branch was taken (eg. in branch 1, `A1(1) = 5` is 86 | executed while in branch 2 `A1(2) = 10` is executed). In this case the succeeding block does not 87 | know on which version of A1 to operate on. Continuing with the above example, the phi function at 88 | the beginning of the join block would look like this: `A1(3) = Phi(A1(1), A1(2))`. The computer 89 | obviously does not have such as instruction, or ssa-form register usage so it needs to eventually 90 | be deconstructed, nevertheless, this ssa representation is very frequently used in compilers 91 | because it provides many advantages when attempting to run optimization passes on the code. 92 | 93 | The below graph showcases how this form would look like for the above program. Note how the second 94 | field of each register is now filled to make sure each register is only defined once, and that the 95 | final block in the function now has phi-functions at its beginning for each register that it may be 96 | required for.

97 | 98 |

Dominator Tree

100 |

F2

101 | 102 | In this project ssa form is generated using the techniques proposed in 103 | [Efficiently Computing Static Single Assignment Form and the Control Dependence 104 | Graph](https://www.cs.utexas.edu/~pingali/CS380C/2010/papers/ssaCytron.pdf) by Cytron et al. 105 | 106 | This algorithm makes use of dominance frontiers to compute a semipruned ssa representation that has 107 | fewer phi-functions than more naive implementations that may just place phi-functions in 108 | succeeding blocks for every register that survives block boundaries. 109 | 110 | In my implementation, the steps to generate this ssa form are divided up into 4 main phases. 111 | 112 | * Generate dominator tree 113 | 114 | In this phase, given a block b in the control flow graph, the set of blocks that strictly 115 | dominate b are given by (Dom(b)-b) where Dom(b) determines all blocks that must be traversed 116 | starting at the root of the cfg to get to block b. In this set the block that is closest 117 | to b is b's immediate dominator which is what we care to extract in this phase. This means 118 | that each cfg block exists in this form and that if a is the immediate dominator of b, 119 | an edge exists from a to b. 120 | 121 | The corresponding dominator tree for the above program is shown below. The first block 122 | dominates 123 | the 2 branching blocks as expected, but unlike in the cfg representation, here an edge exists 124 | from the first block to the join block because it is the earliest block that strictly dominates 125 | it. 126 | 127 |

Dominator Tree

129 |

F3

130 | 131 | * Find the dominance frontier 132 | 133 | The dominance frontier is used to determine which registers require phi-functions for a given 134 | block. It starts by identifying all join points j in the graph since these are the only blocks 135 | that may potentially require phi-functions. Next it loops through all of the cfg-predecessors 136 | of each block j until iDom(j) is found. During this traversal, block j is added to the 137 | dominance frontier set of each block encountered in this process with the exception of iDom(j). 138 | 139 | This leads to the following dominance frontier for the above program which tells us that 140 | block 1 141 | and 2 may need phi functions to be placed in block 2 (block 1 & 2 represent the 2 branches from 142 | the original CFG as indicated by the labels). 143 | ``` 144 | Label @ 0x1000 : {} 145 | Label @ 0x100c : {2} 146 | Label @ 0x1018 : {} 147 | Label @ 0x1028 : {2} 148 | Label @ 0x1034 : {} 149 | ``` 150 |

F4

151 | 152 | * Insert phi functions into the graph 153 | 154 | Now that we know where we want to place phi functions, they need to actually be placed for 155 | registers that require them. Since we have the dominance frontiers we can determine this fairly 156 | well without accidentally placing many unnecessary phi-functions. For every definition x in 157 | block b, a phi-function needs to be inserted at every node in the dominance frontier of 158 | b. Since 159 | the insertion of a phi-function alters the instruction state, it may force the insertion of 160 | additional phi-functions. This process needs to restart after every phi-function insertion. 161 | 162 | This results in 2 phi-functions being insterted at the start of block 2 as showcased in the 163 | F2. 164 | 165 | * Rename all registers to their appropriate ssa form 166 | 167 | In this phase the ssa form is completed by finally renaming all registers to their ssa-form 168 | name. Each register R with multiple definitions will thus be renamed R(1), R(2), ... R(n). This 169 | is done by maintaining a count of the highest-count definition of a register that is 170 | incremented whenever a new version of the register is defined alongside a stack that has 171 | the most recently defined version of the register on top of it. 172 | 173 | The algorithm used here walks through the dominator tree and for each block it starts by 174 | renaming all defined phi-functions definitions. Next it walks through each block in the 175 | program and rewrites the operands and declarations using the currently active ssa name for 176 | each register. For declarations, a newly generated ssa name must be created by incrementing its 177 | count variable and pushing it onto the registers stack. Finally the parameters of the phi 178 | functions of blocks succeeding the current block are renamed. 179 | 180 | Next it starts recursively calling the rename procecure on all children of the current 181 | block in the dominator tree. After this recursive call completes, all newly defined ssa 182 | registers are popped from each registers stack, thus resetting the register states back to 183 | the state prior to this blocks renaming procedure. 184 | 185 | In the current state of the compiler, ssa representation does not yet serve much of a purpose 186 | (although it can lead to better register allocation) since no optimizations have been written. This 187 | form does however allow for powerful optimizations to be added in the future.

188 | 189 | #### Potential Optimizations 190 | 191 | Modern compiler backends employ many different optimizations to produce the best code possible. In 192 | this case, due to limited time I will stick to very simple optimizations that are fairly 193 | straightforward to implement while providing decent performance benefits such as eliminating all 194 | instructions that attempt to write to the Zero register (basically a nop), or some basic constant 195 | propagation to eliminate all temporary instructions that my IR added.

196 | 197 | #### Register Allocation 198 | 199 | The goal of this phase is to replace the previously set ssa instruction operands with standard 200 | X86\_64 registers. The main difficulty of this process is to correctly determine efficient register 201 | allocation strategies that result in the least amount of registers being spilled to memory. This 202 | phase is still very early in development, and I am not entirely sure how I want to implement 203 | it yet. 204 | 205 | * Instruction Numbering 206 | 207 | The first step is to number the instructions. This assigns a unique id to each instruction. The main 208 | thing to consider here is that instructions need to be ordered in order of execution. This means 209 | that every instruction A that is executed before instruction B needs to have a lower id. This can be 210 | accomplished using the previously generated dominator tree's. 211 | 212 | * Register Live Intervals 213 | 214 | The goal of this phase is to determine how long each register is alive. For each used register it 215 | computed an interval from the point that the register is first defined to its last usage according 216 | to the previously marked id numbers during the instruction numbering phase. 217 | 218 | * Linear Scan Register Allocation 219 | 220 | This algorithm is pretty much the simplest way to do register allocation across block boundaries. 221 | Nevertheless it is the most popularly used register allocation algorithm in JIT compilers since it 222 | results in low compile time which is an important metric for JIT compilers. Additionally it only 223 | produces slightly worse code than much slower algorithms such as graph coloring approaches. 224 | 225 | The pseudo-code for this register allocation approach is listed below. We loop through all 226 | previously determined register liveness intervals and allocate an X86 register as long as there are 227 | free registers are available. If there is no free register available, the last used register is 228 | spilled to memory to obtain a free register. 229 | 230 | ```rs 231 | for (reg, interval) in live_intervals { // in order of increasing starting point 232 | // Start by expiring old intervals by removing all no longer in use registers from the active 233 | // mapping and adding it to the free registers instead. 234 | expire_old_intervals(); 235 | 236 | if free_regs.is_empty() { 237 | // Need to spill register to memory if there are no more free registers available 238 | // Spill the register with the farthest use 239 | spill_reg = active.pop(); 240 | 241 | // Use the now free'd register for the current register 242 | mapping.insert(reg, spill_reg); 243 | 244 | // Insert new range to active range 245 | active.insert(spill_reg, inter); 246 | } else { 247 | // Free register available, so just add it to the mapping 248 | preg = free_regs.pop(); 249 | active.insert(preg, inter); 250 | mapping.insert(reg, preg); 251 | } 252 | return mapping; 253 | } 254 | ``` 255 | 256 | #### Future Work 257 | As mentioned previously I would like to re-explore the optimizing compiler approach in the future. I believe it has a lot more potential than the more naive implementation, but it is not an immediate priority because there are more important improvements that I want to tackle first. 258 | -------------------------------------------------------------------------------- /tools/program_generator/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(variant_count)] 2 | #![feature(once_cell)] 3 | 4 | pub mod rng; 5 | pub mod compile; 6 | 7 | use rng::Rng; 8 | 9 | use std::fmt; 10 | use std::lazy::SyncLazy; 11 | 12 | /// This program takes an input file via argv[1], this variable specifies the amount of bytes that 13 | /// are read in and available for use from the input, larger values should make finding the bugs a 14 | /// little harder 15 | const INPUT_SIZE: usize = 500; 16 | 17 | /// Maximum depth that scopes can go too before early returning. Without this blocks would 18 | /// recursively create new blocks until a stack overflow occurs. Recommended: 8-12 for approximately 19 | /// 2,000 - 200,000 lines of code. For larger complexity scores, the INPUT_SIZE should also be 20 | /// increased to reduce duplication 21 | const COMPLEXITY: usize = 9; 22 | 23 | /// Minimum depth of functions, prevents too shallow functions that just immediately crash on base 24 | /// case 25 | const MIN_DEPTH: usize = 1; 26 | 27 | /// Minimum and maximum sizes for buffer allocations in the program. 28 | const MIN_ALLOC_SIZE: usize = 0x20; 29 | const MAX_ALLOC_SIZE: usize = 0x100; 30 | 31 | /// Maximum length for strings that can be used in comparisons. This needs to be smaller than 32 | /// `INPUT_SIZE` 33 | const MAX_STRING_LEN: usize = 0x20; 34 | 35 | /// Index into the provided user input 36 | #[derive(Debug, Clone, Copy)] 37 | pub struct Index(usize); 38 | 39 | impl fmt::Display for Index { 40 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 41 | write!(f, "{}", self.0) 42 | } 43 | } 44 | 45 | /// Create an rng object on program startup 46 | pub static RNG: SyncLazy = SyncLazy::new(|| { 47 | Rng::new() 48 | }); 49 | 50 | /// Supported values 51 | #[derive(Debug, Clone)] 52 | pub enum Value { 53 | Number(usize), 54 | StringLiteral(String), 55 | Arr(Vec), 56 | } 57 | 58 | impl fmt::Display for Value { 59 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 60 | match self { 61 | Value::Number(v) => write!(f, "{}", v), 62 | Value::StringLiteral(v) => write!(f, "\"{}\"", v), 63 | _ => unreachable!(), 64 | } 65 | } 66 | } 67 | 68 | /// Supported types 69 | #[derive(Debug, Clone, Copy, Eq, PartialEq)] 70 | pub enum Type { 71 | Void, 72 | Number, 73 | Str, 74 | Argv, 75 | Buffer, 76 | } 77 | 78 | impl fmt::Display for Type { 79 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 80 | match self { 81 | Type::Void => write!(f, "void"), 82 | Type::Number => write!(f, "int"), 83 | Type::Str => write!(f, "unsigned char *"), 84 | Type::Buffer => write!(f, "unsigned char*"), 85 | _ => unreachable!(), 86 | } 87 | } 88 | } 89 | 90 | /// Expressions that are used in if-statements 91 | #[derive(Debug, Clone)] 92 | enum Expr { 93 | /// Index into input-array and 8-bit value 94 | ByteCmp(Index, u8), 95 | 96 | /// Index into input-array and variable containing an 8-bit Value 97 | VarByteCmp(Index, String), 98 | 99 | /// Index into input-array and 16-bit value 100 | WordCmp(Index, u16), 101 | 102 | /// Index into input-array and variable containing a 16-bit Value 103 | VarWordCmp(Index, String), 104 | 105 | /// Index into input-array and 32-bit value 106 | DWordCmp(Index, u32), 107 | 108 | /// Index into input-array and variable containing a 32-bit Value 109 | VarDWordCmp(Index, String), 110 | 111 | /// Index into input-array and 64-bit value to be used for comparison operation 112 | QWordCmp(Index, u64), 113 | 114 | /// Index into input-array and variable containing a 64-bit Value 115 | VarQWordCmp(Index, String), 116 | 117 | /// Index into input-array and a ByteString used for comparison operation 118 | StrCmp(Index, Value), 119 | 120 | /// Index into input-array and a ByteString used for comparison operation with a variable 121 | VarStrCmp(Index, String), 122 | } 123 | 124 | impl Expr { 125 | /// Return a random Expression 126 | fn get_rand_expr(vars: &Vec<(String, Type)>) -> Self { 127 | let num_entries = std::mem::variant_count::(); 128 | let rstr = std::str::from_utf8(&RNG.next_string(16, 0x61, 0x7b)).unwrap().to_string(); 129 | let rnum = RNG.gen(); 130 | 131 | let num_vars = vars.iter().filter(|e| e.1 == Type::Number) 132 | .map(|e| e.0.clone()).collect::>(); 133 | 134 | let str_vars = vars.iter().filter(|e| e.1 == Type::Str) 135 | .map(|e| e.0.clone()).collect::>(); 136 | 137 | loop { 138 | match RNG.next_num(num_entries) { 139 | 0 => { 140 | return Expr::ByteCmp(Index(RNG.next_num(INPUT_SIZE)), rnum as u8); 141 | }, 142 | 1 => { 143 | if num_vars.is_empty() { continue; } 144 | return Expr::VarByteCmp(Index(RNG.next_num(INPUT_SIZE)), 145 | num_vars[RNG.next_num(num_vars.len())].clone()); 146 | }, 147 | 2 => { 148 | return Expr::WordCmp(Index(RNG.next_num(INPUT_SIZE)), rnum as u16); 149 | }, 150 | 3 => { 151 | if num_vars.is_empty() { continue; } 152 | return Expr::VarWordCmp(Index(RNG.next_num(INPUT_SIZE)), 153 | num_vars[RNG.next_num(num_vars.len())].clone()); 154 | }, 155 | 4 => { 156 | return Expr::DWordCmp(Index(RNG.next_num(INPUT_SIZE)), rnum as u32); 157 | }, 158 | 5 => { 159 | if num_vars.is_empty() { continue; } 160 | return Expr::VarDWordCmp(Index(RNG.next_num(INPUT_SIZE)), 161 | num_vars[RNG.next_num(num_vars.len())].clone()); 162 | }, 163 | 6 => { 164 | return Expr::QWordCmp(Index(RNG.next_num(INPUT_SIZE)), rnum as u64); 165 | }, 166 | 7 => { 167 | if num_vars.is_empty() { continue; } 168 | return Expr::VarQWordCmp(Index(RNG.next_num(INPUT_SIZE)), 169 | num_vars[RNG.next_num(num_vars.len())].clone()); 170 | }, 171 | 8 => { 172 | return Expr::StrCmp(Index(RNG.next_num(INPUT_SIZE-MAX_STRING_LEN)), 173 | Value::StringLiteral(rstr)); 174 | } 175 | 9 => { 176 | if str_vars.is_empty() { continue; } 177 | return Expr::VarStrCmp(Index(RNG.next_num(INPUT_SIZE-MAX_STRING_LEN)), 178 | str_vars[RNG.next_num(str_vars.len())].clone()); 179 | } 180 | _ => unreachable!(), 181 | }; 182 | } 183 | } 184 | } 185 | 186 | impl fmt::Display for Expr { 187 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 188 | let signs = ["=="]; 189 | let s = signs[RNG.next_num(signs.len())]; 190 | match self { 191 | Expr::VarByteCmp(a, b) | 192 | Expr::VarWordCmp(a, b) | 193 | Expr::VarDWordCmp(a, b) | 194 | Expr::VarQWordCmp(a, b) | 195 | Expr::VarStrCmp(a, b) => { 196 | write!(f, "buf[{}] == {}", a, b) 197 | }, 198 | Expr::ByteCmp(a, b) => write!(f, "buf[{}] {} {}", a, s, b), 199 | Expr::WordCmp(a, b) => write!(f, "*(unsigned short*)(buf + {}) {} {}", a, s, b), 200 | Expr::QWordCmp(a, b) => write!(f, "*(unsigned int*)(buf + {}) {} {}U", a, s, b), 201 | Expr::DWordCmp(a, b) => write!(f, "*(unsigned long*)(buf + {}) {} {}ULL", a, s, b), 202 | Expr::StrCmp(a, b) => write!(f, "!strcmp(&buf[{}], {})", a, b), 203 | } 204 | } 205 | } 206 | 207 | const NUM_SIMPLE_OPS: usize = 2; 208 | const NUM_COMPLEX_OPS: usize = 1; 209 | 210 | /// Operations that can occur in the code 211 | #[derive(Debug, Clone)] 212 | enum Operation { 213 | // Simple Operations 214 | // These are operations that occur at the start of a block, and solely exist to setup some 215 | // random local variables that can then later be used my some more complex operations 216 | 217 | /// Add input[.0] to .1 and assign it to a variable 218 | AddInts(Type, String, Index, usize), 219 | 220 | /// Subtract input[.0] from .1 and assign it to a variable 221 | SubInts(Type, String, Index, usize), 222 | 223 | // Complex Operations 224 | // These are operations that occur at the start of a block, and solely exist to setup some 225 | // random local variables that can then later be used my some more complex operations 226 | 227 | /// If expression alongside a true-block 228 | If(Expr, Block), 229 | 230 | /// Used to call generated functions (name, type, args) 231 | CallFunc(String, Type, Vec), 232 | 233 | /// Insert a crash 234 | Crash, 235 | 236 | // All operations below this point should not be returned by the `get_rand_op()` function, and 237 | // are solely used for special cases such as program initialization or inserting crashes 238 | 239 | /// Used in `main` to allocate the input buffer based on argv 240 | AllocInputBuf, 241 | 242 | /// Used to check that argv was properly provided in main 243 | ArgvCheck, 244 | 245 | /// Used to open the file provided by argv in main 246 | OpenFile, 247 | 248 | /// Used to read in the fuzz-input from the provided file 249 | ReadFile, 250 | } 251 | 252 | impl fmt::Display for Operation { 253 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 254 | match self { 255 | Operation::AddInts(a, b, c, d) => write!(f, "{} {} = buf[{}] + {}", a, b, c, d), 256 | Operation::SubInts(a, b, c, d) => write!(f, "{} {} = buf[{}] - {}", a, b, c, d), 257 | Operation::If(a, _) => write!(f, "if ({}) ", a), 258 | Operation::AllocInputBuf => write!(f, "unsigned char *buf = malloc({})", INPUT_SIZE), 259 | Operation::ArgvCheck => write!(f, "if (argc != 2) return"), 260 | Operation::OpenFile => write!(f, "FILE *fd = fopen(argv[1], \"r\")"), 261 | Operation::ReadFile => write!(f, "fgets(buf, {}, fd)", INPUT_SIZE), 262 | Operation::Crash => write!(f, "*(unsigned long*)0x{:x} = 0", RNG.gen()), 263 | Operation::CallFunc(a, _, _) => write!(f, "{}(buf)", a), 264 | } 265 | } 266 | } 267 | 268 | impl Operation { 269 | /// Return a random simple operation 270 | fn get_simple_op() -> Self { 271 | let var_name = std::str::from_utf8(&RNG.next_string(16, 0x61, 0x7b)).unwrap().to_string(); 272 | 273 | match RNG.next_num(NUM_SIMPLE_OPS) { 274 | 0 => Operation::AddInts(Type::Number, var_name, 275 | Index(RNG.next_num(INPUT_SIZE)), RNG.gen_range(MIN_ALLOC_SIZE, MAX_ALLOC_SIZE)), 276 | 1 => Operation::SubInts(Type::Number, var_name, 277 | Index(RNG.next_num(INPUT_SIZE)), RNG.gen_range(MIN_ALLOC_SIZE, MAX_ALLOC_SIZE)), 278 | _ => unreachable!(), 279 | } 280 | } 281 | 282 | /// Return a random more complex operation 283 | fn get_complex_op(program: &mut Program, vars: &Vec<(String, Type)>, complexity: usize, 284 | depth: usize) -> Self { 285 | let _var_name = std::str::from_utf8(&RNG.next_string(16, 0x41, 0x7b)).unwrap().to_string(); 286 | 287 | loop { 288 | match RNG.next_num(NUM_COMPLEX_OPS) { 289 | 0 => { 290 | return Operation::If(Expr::get_rand_expr(vars), 291 | Block::init_new_block(program, complexity - 1, depth + 1)); 292 | }, 293 | _ => unreachable!(), 294 | } 295 | } 296 | } 297 | } 298 | 299 | /// Scoped block with allocated variables and a list of statements to be executed 300 | #[derive(Debug, Default, Clone)] 301 | pub struct Block { 302 | /// Statements contained in a block 303 | stmt_list: Vec, 304 | 305 | /// (Name, Type) 306 | variables: Vec<(String, Type)>, 307 | } 308 | 309 | impl Block { 310 | /// Create a new block initialized with random operations 311 | pub fn init_new_block(program: &mut Program, complexity: usize, depth: usize) -> Self { 312 | let mut block = Block::default(); 313 | 314 | // If the minimum depth has been reached, there's a chance that the block will be terminated 315 | // on a crash or by calling a different function 316 | if depth >= MIN_DEPTH { 317 | let num = RNG.gen_range(0, complexity); 318 | if num < 5 { 319 | if num < 2 { 320 | // Insert crash 321 | block.stmt_list.push(Operation::Crash); 322 | } else { 323 | // Insert function call 324 | let func = program.function_list.get(RNG.next_num(program.function_list.len())); 325 | 326 | // Insert a function-call unless 'main' was retrieved, or no functions exist 327 | // yet, in which case just insert a crash 328 | if let Some(f) = func { 329 | if f.name == "main" { 330 | block.stmt_list.push(Operation::Crash); 331 | } else { 332 | block.stmt_list.push( 333 | Operation::CallFunc( 334 | f.name.clone(), 335 | Type::Void, 336 | Vec::new(), 337 | )); 338 | } 339 | } else { 340 | block.stmt_list.push(Operation::Crash); 341 | } 342 | } 343 | 344 | return block; 345 | } 346 | } 347 | 348 | // Start by inserting some simple operations to setup some variables that can later be used 349 | for _ in 0..RNG.gen_range(2, 5) { 350 | let op = Operation::get_simple_op(); 351 | 352 | // If this operation produces a value, add it to this blocks variables 353 | match &op { 354 | Operation::AddInts(typ, name, ..) | 355 | Operation::SubInts(typ, name, ..) => { 356 | block.variables.push((name.clone(), *typ)); 357 | }, 358 | _ => {}, 359 | } 360 | block.stmt_list.push(op); 361 | } 362 | 363 | // Next insert some more complex operations 364 | for _ in 0..RNG.gen_range(5, 10) { 365 | let op = Operation::get_complex_op(program, &block.variables, complexity, depth); 366 | block.stmt_list.push(op); 367 | } 368 | block 369 | } 370 | 371 | /// Create the main block. This just handles initial setup and calls the functions that should 372 | /// be fuzzed 373 | pub fn init_main_block(functions: &[Function]) -> Self { 374 | let mut block = Block::default(); 375 | 376 | // Allocate a global buffer to hold argv and write fuzz-input it 377 | block.stmt_list.push(Operation::ArgvCheck); 378 | block.stmt_list.push(Operation::OpenFile); 379 | block.stmt_list.push(Operation::AllocInputBuf); 380 | block.stmt_list.push(Operation::ReadFile); 381 | 382 | // Create a call to all functions 383 | for func in functions { 384 | block.stmt_list.push(Operation::CallFunc( 385 | func.name.clone(), 386 | func.typ, 387 | func.arguments.iter().map(|e| e.0).collect(), 388 | )); 389 | } 390 | block 391 | } 392 | } 393 | 394 | /// Intermediate representation of functions 395 | #[derive(Debug, Clone)] 396 | pub struct Function { 397 | name: String, 398 | typ: Type, 399 | arguments: Vec<(Type, String)>, 400 | body: Block, 401 | } 402 | 403 | /// The actual program being modelled 404 | #[derive(Debug, Default, Clone)] 405 | pub struct Program { 406 | /// List of generated functions 407 | function_list: Vec, 408 | } 409 | 410 | impl Program { 411 | pub fn default() -> Self { 412 | Self { 413 | function_list: Vec::new(), 414 | } 415 | } 416 | 417 | /// Start creation of the program 418 | pub fn create_program() -> Program { 419 | let mut program = Program::default(); 420 | 421 | // Create random generated functions that can be called from main 422 | for i in 0..COMPLEXITY { 423 | let func_name = format!("func_{}", i+1); 424 | let func = Function { 425 | name: func_name.to_string(), 426 | typ: Type::Void, 427 | arguments: vec![(Type::Buffer, "buf".to_string())], 428 | body: Block::init_new_block(&mut program.clone(), COMPLEXITY, 0) 429 | }; 430 | program.function_list.push(func); 431 | } 432 | 433 | // Create main function 434 | program.create_main(); 435 | 436 | program 437 | } 438 | 439 | /// Create main function. It has a special case since it requires additional initialization 440 | /// routines 441 | fn create_main(&mut self) { 442 | self.function_list.push( 443 | Function { 444 | name: "main".to_string(), 445 | typ: Type::Void, 446 | arguments: vec![(Type::Number, "argc".to_string()), (Type::Argv, "argv".to_string())], 447 | body: Block::init_main_block(&self.function_list), 448 | }); 449 | } 450 | } 451 | 452 | -------------------------------------------------------------------------------- /fuzzing.md: -------------------------------------------------------------------------------- 1 | #### This document is not specific to this fuzzer, but rather just a list of resources I found helpful while developing this fuzzer alongside some simple notes on some important fuzzing related topics from these papers. 2 | 3 | ### Fuzzing Reading List 4 | 1. Basics https://www.fuzzingbook.org/ 5 | 2. AFL++ https://www.usenix.org/system/files/woot20-paper-fioraldi.pdf 6 | 3. Afl-dev https://lcamtuf.blogspot.com/ 7 | 4. Afl-dev https://lcamtuf.coredump.cx/afl/technical_details.txt 8 | 5. Afl-study https://www.s3.eurecom.fr/docs/fuzzing22_fioraldi_report.pdf 9 | 6. Cov-sensitivity https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf 10 | 7. Valued coverage https://www.ndss-symposium.org/wp-content/uploads/2020/02/24422-paper.pdf 11 | 8. CFG-Seed sched https://arxiv.org/pdf/2203.12064.pdf 12 | 9. Seed selection https://dl.acm.org/doi/pdf/10.1145/3460319.3464795 13 | 10. Directed fuzz https://dl.acm.org/doi/pdf/10.1145/3133956.3134020 14 | 11. Havoc https://shadowmydx.github.io/papers/icse22-main-1314.pdf 15 | 12. Feedback-muts https://link.springer.com/article/10.1007/s10664-020-09927-3 16 | 13. Snapshots/state https://arxiv.org/pdf/2202.03643.pdf 17 | 14. Snapshots/state https://github.com/fgsect/FitM/blob/main/fitm.pdf 18 | 15. Benchmarking https://github.com/google/fuzzbench/issues/654 19 | 16. Benchmarking https://hexgolems.com/2020/08/on-measuring-and-visualizing-fuzzer-performance/ 20 | 17. Crash-triaging https://www.usenix.org/system/files/sec20-blazytko.pdf 21 | 18. Hash-collisions https://chao.100871.net/papers/oakland18.pdf 22 | 19. Bigmap-covmap https://alifahmed.github.io/res/BigMap_DSN.pdf 23 | 20. Redqueen https://synthesis.to/papers/NDSS19-Redqueen.pdf 24 | 21. Nautilus https://wcventure.github.io/FuzzingPaper/Paper/NDSS19_Nautilus.pdf 25 | 22. Nyx https://www.usenix.org/system/files/sec21summer_schumilo.pdf 26 | 23. AFLFast https://mboehme.github.io/paper/CCS16.pdf 27 | 24. Baseband-emu https://arxiv.org/pdf/2005.07797.pdf 28 | 25. Cerberos https://dl.acm.org/doi/pdf/10.1145/3338906.3338975 29 | 26. Fuzzilli https://saelo.github.io/papers/thesis.pdf 30 | 27. Symbolic/Taint https://edmcman.github.io/papers/oakland10.pdf 31 | 28. OS-modif perf https://gts3.org/assets/papers/2017/xu:os-fuzz.pdf 32 | 33 | ### Corpus Management 34 | 35 | ##### Corpus Minimization 36 | > Some fuzzers such as afl trim their corpus' to discard long inputs that take the same path as 37 | shorter inputs. 38 | 39 | > Pros 40 | - Cut down duplicate entries to not waste time on cases that don't provide more information 41 | - Smaller inputs are executed faster leading to higher performance 42 | > Cons 43 | - You potentially discard corpus entries that contained a valuable input 44 | - Reducing the size of inputs can greatly reduce the "state" the input has going into a 45 | specific block, thus leading to less bugs even if the same edges are covered 46 | 47 | > Specific techniques 48 | - Minset: compute weight by execution-time / file-size 49 | - Afl-cmin: Uses coverage information using tracked edge frequency counts 50 | - OptMin: Generates potentially optimal solutions unlike previous 2 approximations 51 | 52 | ##### Seed Selection 53 | > High quality initial seeds are very important because the originals can carry a lot of 54 | semantics that the fuzzer now no longer has to randomly generate or know. Any part that 55 | isn't covered by the corpus requires additional work on the side of the fuzzer to get there. 56 | This has a significant impact on expanding code coverage since a larger corpus already 57 | covers many more cases as its base 58 | 59 | ##### Seed Collection 60 | > Web crawler to collect input files 61 | > Seed-collections: 62 | - https://www.nist.gov/itl/ssd/software-quality-group/national-software-reference-library-nsrl 63 | - https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/ 64 | - https://lcamtuf.coredump.cx/afl/demo/ 65 | 66 | ### Coverage Tracking 67 | 68 | ##### Basic Block Coverage 69 | > Track coverage whenever a new basic block is hit 70 | 71 | ##### Edge Coverage 72 | (, ) 73 | > Generate tuples of the above form for each piece of code. If a new tuple is encountered, add 74 | the mutated input as a new corpus entry (A -> B, simplest hash would be A ^ B) 75 | > Generally better than block coverage since it provides more insight into program execution 76 | - Can trivially distinguish between the following 2 paths 77 | A -> B -> C -> D -> E (tuples: AB, BC, CD, DE) 78 | A -> B -> D -> C -> E (tuples: AB, BD, DC, CE) 79 | 80 | Example hash functions: 81 | - hash = (prev_block << 1) ^ cur_block 82 | - AFL Implementation: 83 | cur_location = ; 84 | shared_mem[cur_location ^ prev_location]++; 85 | prev_location = cur_location >> 1; 86 | 87 | ##### N-gram Edge Coverage 88 | > Track latest n edges taken. Tracking only the current edge offers little information about the 89 | actually taken path, while tracking an infinite amount of edges could result in path 90 | explosion. Common values for n are 2, 4 or 8 91 | 92 | ##### Path Coverage 93 | > Number of logical paths in the program that were taken during execution 94 | > Measures progress by computing a hash over the branches exercised by the input 95 | > Can be used to estimate how much more coverage will be gained/time with further fuzzing 96 | > Can potentially lead to path explosion if eg. a large loop is found 97 | 98 | ##### Collision Free Coverage 99 | > Generally accomplished by assigning a unique value to each edge during instrumentation, so a 100 | coverage bitmap can be efficiently accessed using this hardcoded value instead of computing 101 | a hash that risks collisions 102 | 103 | ##### BigMap 104 | > Common strategy to lower hash collisions is to increase the table size, this however results 105 | in lower cache locality and can greatly reduce perf. 106 | > BigMap adds an additional level of indirection so randomly scattered coverage metrics are 107 | instead stored in a sequential bitmap to maintain the currently active region in caches 108 | 109 | ##### Data Coverage 110 | > Distinguish test cases from a data accessing perspective 111 | 112 | ##### Collection Methods 113 | > Code instrumentation to report coverage information 114 | > Intel PIN - jit compiles program as soon as it is loaded into memory while adding additional 115 | instructions to track coverage 116 | > Randomly request current location of fuzzer at certain time intervals to track which code is 117 | executed 118 | > Intel PT - Hardware branch tracer 119 | 120 | ### Seed Scheduling 121 | ##### Metrics: 122 | > Vulnerable paths 123 | - Weight of each branch is based upon vulnerable functions (eg. memcpy) it can reach and the 124 | amount of loads/stores given different paths 125 | > Number of edges reachable from a given seed 126 | > Mutation history can be used to determine when one should stop focusing on "hard" edges 127 | > Graph centrality analysis - approximate number of reachable edges from given seed and give 128 | weight depending on how "central" a seed is. 129 | 130 | ##### Coverage Guided / Power Schedules 131 | > Assign different weights to inputs in the corpus to "smartly" distribute fuzzing time 132 | - Execution time 133 | - Shorter 134 | - More frequent coverage increases 135 | 136 | ### Mutational Strategies 137 | ##### General Approach 138 | > Feedback loop approach 139 | - Measure what type of mutations result in new coverage and use them more frequently 140 | > Start with sequential deterministic mutations before moving on to randomness 141 | > Target specific mutations will generally outperform generic mutation strategies. This can be 142 | enhanced by developing a state-concious fuzzer 143 | > Havoc: apply multiple randomly selected mutators simultaneously on some inputs 144 | 145 | ##### Individual Strategies 146 | > Walking bit flips - sequential ordered bitflips 147 | Pros: 148 | - Pretty good at finding low hanging fruit because it goes through entire input and gets 149 | a good bit of initial coverage 150 | Cons: 151 | - Expensive to keep up since each test requires 8 execve() calls per byte of the input 152 | file. Has diminishing returns, so only used for a short initial period. 153 | > Walking byte flips - sequential ordered byte-flips 154 | Pros/Cons: Much less expensive than bit flips, but also not very effective in the long run 155 | > Simple arithmetics - inc/dec integers in the input according to be/le and different sizes 156 | Pros: 157 | - Good to spot a lot of bugs pertaining to integer over/underflows or incorrect size 158 | checks 159 | Cons: 160 | - Relatively high costs (~20 execve calls per byte) 161 | > Known Integers - hardcoded set of integers that commonly trigger bugs (-1, 0, MAX_INT, etc) 162 | Pros/Cons: Very expensive, but can quickly find some common bugs before being disabled while 163 | going through the small hardcoded list of known values 164 | > Stacked Tweaks - non deterministic random mutations 165 | - bit flips 166 | - random incs/decs for 8/16/32/64 byte values 167 | - random single byte sets 168 | - block deletion 169 | - block duplication 170 | Pros: 171 | Extremely simple to implement 172 | Surprisingly very effective at generating new coverage 173 | > Changing size of input 174 | > Dictionary: Maintain a dictionary (either statically defined or dynamically created during 175 | runtime) of interesting strings, that can be added to the input at random positions. 176 | > Splicing: Combine two different inputs at random positions 177 | 178 | ### Triaging Crashes 179 | ##### Crash Exploration 180 | > Used to more easily understand what exactly caused a crash 181 | > Entirely separate mode that takes a crashing input and looks for more inputs that cause the 182 | same crash by mutating this input. This process uses very similar methods as the main 183 | fuzzer. Eventually it will have generated a small corpus of inputs related to the bug that 184 | can be triaged together to better understand the bug 185 | > Once a lot of crashing inputs are gathered, statistical analysis can be performed on the 186 | crashing inputs to find common cases, and automatically extract a lot of possible crash 187 | reasons. 188 | 189 | ##### Deduping Crashes 190 | > Group "similar" crashes together to avoid looking at hundreds of similar crashes 191 | - With edge based coverage this can be done whenever a new tuple is found that hasnt been 192 | used to achieve this crash before, or if a tuple is missing 193 | 194 | ##### Debugging 195 | > The simplest, but also most manual labor intensive approach is to just load the crashing input 196 | into a debugger and to manually attempt to figure out the root cause. 197 | > This can be improved upon with modern timeless debuggers that provide reverse execution 198 | functionality. This can be used to traverse the program backwards starting at the start 199 | location, which can often make bug triaging a lot more comfortable. 200 | 201 | ### Performance 202 | ##### Persistent Mode / Snapshot Fuzzing 203 | > Fuzz in a short loop around the specific target functions by saving the state right before the 204 | execution of the target, and then basing future fuzz cases off of this specific starting 205 | state instead of fully reseting the program on each run. 206 | > Can additionally implement mechanisms similar to copy-on-write/dirty bit memory resets to 207 | avoid having to reset large amounts of memory. This allows for much faster fuzzing. 208 | 209 | ##### In-memory Fuzzing 210 | > Many fuzz-targets read input from disk before starting to operate on the 211 | data. This leads to poor scaling due to the heavy io usage. Instead a fuzzer can just load 212 | the corpus into memory and directly pass it into the binary to avoid the disk performance 213 | overhead. 214 | 215 | ##### Scaling 216 | > When used in the real world, fuzzers are generally ran on at least 50-100 cores. This means 217 | that not only does the fuzzer need good single-core performance, but it also has to scale 218 | well with a large number of cores. 219 | > If coverage information and corpus are meant to be shared between cores, they need to be 220 | implemented in ways that can be shared between the threads without incurring high costs. 221 | This means that certain techniques that track massive amounts of additional information to 222 | make improved decisions suddenly become unviable when attempting to scale because all of the 223 | information needs to be passed between cores. 224 | > Another common pitfall of scaling attempts is the kernel. If the main fuzzing loop contains 225 | frequent syscalls, the kernel starts taking up a good chunk of the time that should be spent 226 | fuzzing. This becomes increasingly relevant when running the fuzzer on a high number of 227 | threads, which can easily result in >40% of total execution time being wasted in random 228 | kernel locks. 229 | 230 | ### Symbolic Execution in Fuzzing 231 | 232 | > Heavy symbolic analysis is still too slow for fuzzers, however using approximations one can 233 | gain many of the benefits without the massive performance hit 234 | 235 | ##### CMP-Deconstruction 236 | > Extract values used in cmp instructions affecting control-flow and add these to a known 237 | dictionary that fuzz cases can use to achieve new control flow 238 | > Mostly useful when dealing with a lot of magic values that need to be bypassed to achieve 239 | more coverage 240 | > Can be done via input-to-state correspondence between inputs and current program state. Start 241 | by finding all cmp instructions in an initial run and hooking all of them to retrieve the 242 | arguments. Using the compare operand, values that are likely to pass the check can be 243 | calculated (eg. zero/sign-extend, equal, slightly larger/less than, etc). The input is 244 | colorized to create lightweight approximation to taint tracking that can be used to track 245 | which part of the input finds itself in the cmp instruction. 246 | > Another approach is to transform multi-byte comparisons into multiple single byte comparisons, 247 | thus being able to leverage coverage guided fuzzing to bypass the check 248 | 249 | ##### Checksums 250 | > Checksums can be very challenging for fuzzers to handle since unlike magic-byte checks, they 251 | can change from one run to the other based on the provided input and greatly halt fuzzer 252 | progress. 253 | > One possible method is to statically identify checksum checks and patch them to a check that 254 | always returns true. 255 | 256 | ##### Concolic Execution 257 | > Track all conditionals during execution, and collect conditional constraints. These can be 258 | used to then produce inputs that take the non-traversed path. Still has a very real 259 | performance impact, but it does not suffer from state explosion and can thus be implemented 260 | in a scaling manner. 261 | 262 | ##### Taint-based Fuzzing 263 | > Tracks input-flow throughout a target to learn which parts of the input have an effect on 264 | certain operations. Can be used to eg. find magic bytes or integer overflow vulnerabilities, 265 | but has mostly been replaced in fuzzers by techniques that accomplish similar goals without 266 | the massive performance overhead that proper taint-tracking results in. 267 | 268 | ### Benchmarking Fuzzers 269 | > When profiling new algorithms in fuzzers, algorithmic performance (eg. coverage/cases) is much 270 | more relevant than timed performance (eg. coverage/time) due to the high variances that can 271 | occur using random fuzz-inputs. Time-performance is the most important aspect for finished 272 | fuzzers, but while benchmarking fuzzers in development it is unreasonable since it would 273 | require prototypes to be highly optimized to compete. This assumes that the developer can 274 | make reasonable assumptions about the performance implications of the algorithm once 275 | optimized. 276 | > Minor variables at the start of the fuzzer run can have massive impact on the rest. Eg. high 277 | corruption can lead to initially high coverage with strongly diminishing returns once the high 278 | corruption hits required bytes for further progress. 279 | > When properly evaluating fuzzers, debugging/introspection ability is extremely important 280 | rather than just running benchmarks/reviewing coverage graphs 281 | > Log Scale vs Linear Scale 282 | - Linear scale describes where a fuzzer flatlines, but doesn't produce much data otherwise 283 | - Much more coverage at the beginning of fuzzer-runs than at the end so a linear scale 284 | results in a vertical increase at t=0 and an almost horizontal line for the rest of the 285 | run which provides almost no information. 286 | - Log scales can make for easier interpretation of specific spikes during fuzzer runs 287 | > When benchmarking, don't focus on short fuzzer runs, but rather let the fuzzer run for eg. 24 288 | hours since some changes will have short term benefits but longterm drawbacks 289 | > Scaling is extremely important for real world fuzzer metrics. If a fuzzer performs better on 290 | single core metrics but then completely falls off when scaled to 50-100 cores it becomes 291 | unusable for proper fuzzing campaigns. Another point would be that it doesn't just scale 292 | across cores but also across multiple servers, eventhough this is potentially harder to 293 | test. A lot of proposed high introspection fuzzing techniques suddenly fall apart when faced 294 | with scaling because all of this data needs to be shared between cores. 295 | 296 | ##### Metrics 297 | > # of bugs is basically worthless because it relates more to the amount of hours spent using 298 | the fuzzer on bad targets instead of the actual fuzzer performance 299 | > Evaluating based on known bugs is useful if you are already familiar with the bugs and can 300 | thus determine if your fuzzer works as expected. 301 | > Coverage is probably the most popular metric to measure fuzzers. The proficiency of a fuzzer 302 | is often directly correlated with the amount of coverage it achieves. It might be misleading 303 | in certain cases such as grammar based fuzzers that only test a certain subset of an 304 | application. 305 | > Sampling based measurement to count how often individual blocks are hit by input. This 306 | provides information about how often blocks are reached, which is more valuable than 307 | single-hit coverage tracking. 308 | > State-aware coverage tracking: Measure which target states of a specific stateful target the 309 | fuzzer manages to hit. 310 | 311 | ### Grammar-based Fuzzing 312 | > Many applications that require highly structured inputs (eg. compilers) make fuzzing using 313 | mutational fuzzer implementations difficult. Grammar fuzzers in comparison generate input 314 | from scratch instead of modifying existing input. When fuzzing a javascript interpreter for 315 | example, a grammar based fuzzer would generate random but valid javascript code and use this 316 | as fuzz input. This greatly reduces the number of fuzz cases that would otherwise be 317 | immediately thrown out due to syntax errors with mutational engines. 318 | 319 | ### Misc 320 | 321 | ##### Crash Amplification 322 | > The goal of fuzzing is usually to find potentially exploitable bugs on a target. 323 | Unforunately fuzzers are generally only capable of finding these bugs if they actually cause 324 | a crash. The goal of crash amplification is to more easily crash the program if a bug 325 | occurs. 326 | 327 | > Compile-time instrumentation 328 | - ASAN: Address sanitization can be used to add runtime checks to the binary that track out 329 | of bounds accesses or heap bugs. Approximately 2x performance hit, but generally worth 330 | the extra cost. 331 | 332 | > Emulation 333 | - Byte level permission checks to catch off-by-one errors similar to asan 334 | - Hooking various functions such as malloc/free to instead replace them with safe 335 | alternatives that crash on any misuse 336 | 337 | -------------------------------------------------------------------------------- /src/irgraph.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | emulator::Register as PReg, 3 | irgraph::Val::{Reg, Imm, Imm64}, 4 | }; 5 | 6 | use std::fmt::{self, Formatter, UpperHex}; 7 | use num_traits::Signed; 8 | use rustc_hash::FxHashMap; 9 | 10 | /// Small helper type that is used to print out hex value eg. -0x20 instead of 0xffffffe0 11 | struct ReallySigned(T); 12 | impl UpperHex for ReallySigned { 13 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 14 | let prefix = if f.alternate() { "0x" } else { "" }; 15 | let bare_hex = format!("{:X}", self.0.abs()); 16 | f.pad_integral(self.0 >= T::zero(), prefix, &bare_hex) 17 | } 18 | } 19 | 20 | /// Value used to specify both inputs and outputs for intermediate representation 21 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 22 | pub enum Val { 23 | Reg(PReg), 24 | Imm(i32), 25 | Imm64(i64), 26 | } 27 | 28 | impl fmt::Display for Val { 29 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 30 | match self { 31 | Reg(v) => { 32 | write!(f, "{:?}", v) 33 | }, 34 | Imm(v) => { 35 | write!(f, "{}", v) 36 | }, 37 | Imm64(v) => { 38 | write!(f, "{}", v) 39 | }, 40 | } 41 | } 42 | } 43 | 44 | #[derive(Debug, Copy, Clone, Eq, PartialEq)] 45 | pub enum Operation { 46 | Undefined, 47 | Jmp(usize), 48 | JmpOff(i32), 49 | Branch(usize, usize), 50 | Syscall, 51 | Store, 52 | Load, 53 | Mov, 54 | Add, 55 | Sub, 56 | Mul, 57 | Div, 58 | And, 59 | Or, 60 | Xor, 61 | Shl, 62 | Shr, 63 | Sar, 64 | Slt, 65 | Nop, 66 | } 67 | 68 | impl Default for Operation { 69 | fn default() -> Self { Operation::Undefined } 70 | } 71 | 72 | /// These are used to give instructions extra information such as signed/unsigned or the type of 73 | /// comparison for branch instructions. 74 | #[derive(Debug, Clone, Copy)] 75 | pub struct Flag; 76 | #[allow(non_upper_case_globals)] 77 | impl Flag { 78 | pub const NoFlag: u16 = 0x0; 79 | pub const Signed: u16 = 0x1; 80 | pub const Unsigned: u16 = 0x2; 81 | pub const Equal: u16 = 0x4; 82 | pub const NEqual: u16 = 0x8; 83 | pub const Less: u16 = 0x10; 84 | pub const Greater: u16 = 0x20; 85 | pub const Byte: u16 = 0x40; 86 | pub const Word: u16 = 0x80; 87 | pub const DWord: u16 = 0x100; 88 | pub const QWord: u16 = 0x200; 89 | } 90 | 91 | /// The instructions used in the IR. Layed out in a way that is efficient memory wise and lets us 92 | /// easily determine if the instruction has input/output fields. 93 | #[derive(Debug, Clone, Default)] 94 | pub struct Instruction { 95 | pub op: Operation, 96 | pub i_reg: Vec, 97 | pub o_reg: Option, 98 | pub flags: u16, 99 | pub pc: Option, 100 | } 101 | 102 | impl Instruction { 103 | pub fn is_jump(&self) -> bool { 104 | matches!(self.op, Operation::Jmp(_) | Operation::Branch(..)) 105 | } 106 | } 107 | 108 | /// Pretty printing for the instructions 109 | impl fmt::Display for Instruction { 110 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 111 | match self.op { 112 | Operation::Jmp(x) => { 113 | write!(f, "{:#08X} Jmp {:#0x?}", self.pc.unwrap_or(0), x) 114 | }, 115 | Operation::JmpOff(x) => { 116 | write!(f, "{:#08X} Jmp ({:?} + {:#X})", self.pc.unwrap_or(0), self.i_reg[0], 117 | ReallySigned(x as i32)) 118 | }, 119 | Operation::Branch(x, y) => { 120 | match self.flags & 0b111100 { 121 | 0b000100 => { 122 | write!(f, "{:#08X} if {} == {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0), 123 | self.i_reg[0], self.i_reg[1], y, x) 124 | }, 125 | 0b001000 => { 126 | write!(f, "{:#08X} if {} != {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0), 127 | self.i_reg[0], self.i_reg[1], y, x) 128 | }, 129 | 0b010000 => { 130 | write!(f, "{:#08X} if {} < {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0), 131 | self.i_reg[0], self.i_reg[1], y, x) 132 | }, 133 | 0b100000 => { 134 | write!(f, "{:#08X} if {} > {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0), 135 | self.i_reg[0], self.i_reg[1], y, x) 136 | }, 137 | 0b100100 => { 138 | write!(f, "{:#08X} if {} >= {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0), 139 | self.i_reg[0], self.i_reg[1], y, x) 140 | }, 141 | 0b010100 => { 142 | write!(f, "{:#08X} if {} <= {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0), 143 | self.i_reg[0], self.i_reg[1], y, x) 144 | }, 145 | _ => { panic!("branch with flag: {}", self.flags & 0b111100); }, 146 | } 147 | }, 148 | Operation::Syscall => { 149 | write!(f, "{:#08X} Syscall", self.pc.unwrap_or(0)) 150 | }, 151 | Operation::Store => { 152 | write!(f, "{:#08X} [{}+{}] = {}", self.pc.unwrap_or(0), self.i_reg[0], 153 | self.i_reg[2], self.i_reg[1]) 154 | }, 155 | Operation::Load => { 156 | write!(f, "{:#08X} {:?} = [{}+{}]", self.pc.unwrap_or(0), self.o_reg.unwrap(), 157 | self.i_reg[0], self.i_reg[1]) 158 | }, 159 | Operation::Add => { 160 | write!(f, "{:#08X} {:?} = {} + {}", self.pc.unwrap_or(0), self.o_reg.unwrap(), 161 | self.i_reg[0], self.i_reg[1]) 162 | }, 163 | Operation::Sub => { 164 | write!(f, "{:#08X} {:?} = {} - {}", self.pc.unwrap_or(0), self.o_reg.unwrap(), 165 | self.i_reg[0], self.i_reg[1]) 166 | }, 167 | Operation::And => { 168 | write!(f, "{:#08X} {:?} = {} & {}", self.pc.unwrap_or(0), self.o_reg.unwrap(), 169 | self.i_reg[0], self.i_reg[1]) 170 | }, 171 | Operation::Or => { 172 | write!(f, "{:#08X} {:?} = {} | {}", self.pc.unwrap_or(0), self.o_reg.unwrap(), 173 | self.i_reg[0], self.i_reg[1]) 174 | }, 175 | Operation::Xor => { 176 | write!(f, "{:#08X} {:?} = {} ^ {}", self.pc.unwrap_or(0), self.o_reg.unwrap(), 177 | self.i_reg[0], self.i_reg[1]) 178 | }, 179 | Operation::Shl => { 180 | write!(f, "{:#08X} {:?} = {} << {}", self.pc.unwrap_or(0), self.o_reg.unwrap(), 181 | self.i_reg[0], self.i_reg[1]) 182 | }, 183 | Operation::Shr => { 184 | write!(f, "{:#08X} {:?} = {} >> {}", self.pc.unwrap_or(0), self.o_reg.unwrap(), 185 | self.i_reg[0], self.i_reg[1]) 186 | }, 187 | Operation::Sar => { 188 | write!(f, "{:#08X} {:?} = {} >> {} [A]", self.pc.unwrap_or(0), self.o_reg.unwrap(), 189 | self.i_reg[0], self.i_reg[1]) 190 | }, 191 | Operation::Slt => { 192 | write!(f, "{:#08X} {:?} = {} < {} ? 1 : 0", self.pc.unwrap_or(0), 193 | self.o_reg.unwrap(), self.i_reg[0], self.i_reg[1]) 194 | }, 195 | Operation::Mov => { 196 | write!(f, "{:#08X} {:?} = {}", self.pc.unwrap_or(0), 197 | self.o_reg.unwrap(), self.i_reg[0]) 198 | }, 199 | _ => { unreachable!() }, 200 | } 201 | } 202 | } 203 | 204 | /// Basic wrapper around instructions that keeps track of cur_pc. 205 | #[derive(Debug)] 206 | pub struct IRGraph { 207 | /// List of all instructions 208 | pub instrs: Vec, 209 | 210 | /// Labels indicating controlflow (instrs_index, pc) 211 | pub labels: FxHashMap, 212 | 213 | /// Since multiple IR instructions can be mapped to a single original instruction, this is used 214 | /// to only assign the pc to the first IR-instruction is generated for an original instruction. 215 | cur_pc: Option, 216 | } 217 | 218 | impl Default for IRGraph { 219 | fn default() -> Self { 220 | Self::new() 221 | } 222 | } 223 | 224 | impl IRGraph { 225 | pub fn new() -> Self { 226 | IRGraph { 227 | instrs: Vec::new(), 228 | labels: FxHashMap::default(), 229 | cur_pc: None, 230 | } 231 | } 232 | 233 | /// Initialize the cur_pc variable which is used to set the pc value in the IR instructions 234 | pub fn init_instr(&mut self, pc: usize) { 235 | self.cur_pc = Some(pc); 236 | } 237 | 238 | /// Insert a label into the irgraph using the current pc 239 | pub fn set_label(&mut self, pc: usize) { 240 | self.labels.insert(pc, self.instrs.len()); 241 | } 242 | 243 | /// r1 = imm 244 | pub fn movi32(&mut self, r1: PReg, imm: i32, flag: u16) -> PReg { 245 | self.instrs.push( Instruction { 246 | op: Operation::Mov, 247 | i_reg: vec![Imm(imm)], 248 | o_reg: Some(r1), 249 | flags: flag, 250 | pc: self.cur_pc, 251 | }); 252 | self.cur_pc = None; 253 | r1 254 | } 255 | 256 | /// r1 = imm 257 | pub fn movi64(&mut self, r1: PReg, imm: i64, flag: u16) -> PReg { 258 | self.instrs.push( Instruction { 259 | op: Operation::Mov, 260 | i_reg: vec![Imm64(imm)], 261 | o_reg: Some(r1), 262 | flags: flag, 263 | pc: self.cur_pc, 264 | }); 265 | self.cur_pc = None; 266 | r1 267 | } 268 | 269 | /// r1 = r2 270 | pub fn mov(&mut self, r1: PReg, r2: PReg, flag: u16) -> PReg { 271 | self.instrs.push( Instruction { 272 | op: Operation::Mov, 273 | i_reg: vec![Reg(r2)], 274 | o_reg: Some(r1), 275 | flags: flag, 276 | pc: self.cur_pc, 277 | }); 278 | self.cur_pc = None; 279 | r1 280 | } 281 | 282 | /// Jmp addr 283 | pub fn jmp(&mut self, addr: usize) { 284 | self.instrs.push( Instruction { 285 | op: Operation::Jmp(addr), 286 | i_reg: Vec::new(), 287 | o_reg: None, 288 | flags: Flag::NoFlag, 289 | pc: self.cur_pc, 290 | }); 291 | self.cur_pc = None; 292 | } 293 | 294 | /// Jmp (r1 + addr) 295 | pub fn jmp_offset(&mut self, r1: PReg, addr: i32) { 296 | self.instrs.push( Instruction { 297 | op: Operation::JmpOff(addr), 298 | i_reg: vec![Reg(r1)], 299 | o_reg: None, 300 | flags: Flag::NoFlag, 301 | pc: self.cur_pc, 302 | }); 303 | self.cur_pc = None; 304 | } 305 | 306 | /// Branch to either false_part or true_part, flags determine what kind of compare instruction 307 | /// is supposed to be inserted 308 | pub fn branch(&mut self, r2: PReg, r3: PReg, true_part: usize, false_part: usize, flags: u16) { 309 | self.instrs.push( Instruction { 310 | op: Operation::Branch(true_part, false_part), 311 | i_reg: vec![Reg(r2), Reg(r3)], 312 | o_reg: None, 313 | flags, 314 | pc: self.cur_pc, 315 | }); 316 | self.cur_pc = None; 317 | } 318 | 319 | /// r1 = [r2 + off] 320 | pub fn load(&mut self, r1: PReg, r2: PReg, off: i32, flags: u16) -> PReg { 321 | self.instrs.push( Instruction { 322 | op: Operation::Load, 323 | i_reg: vec![Reg(r2), Imm(off)], 324 | o_reg: Some(r1), 325 | flags, 326 | pc: self.cur_pc, 327 | }); 328 | self.cur_pc = None; 329 | r1 330 | } 331 | 332 | /// [r1 + off] = r2 333 | pub fn store(&mut self, r1: PReg, r2: PReg, off: i32, flags: u16) { 334 | self.instrs.push( Instruction { 335 | op: Operation::Store, 336 | i_reg: vec![Reg(r1), Reg(r2), Imm(off)], 337 | o_reg: None, 338 | flags, 339 | pc: self.cur_pc, 340 | }); 341 | self.cur_pc = None; 342 | } 343 | 344 | /// Set res_reg if rs1_reg is less than imm_reg 345 | pub fn slt(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg { 346 | self.instrs.push( Instruction { 347 | op: Operation::Slt, 348 | i_reg: vec![Reg(r2), Reg(r3)], 349 | o_reg: Some(r1), 350 | flags, 351 | pc: self.cur_pc, 352 | }); 353 | self.cur_pc = None; 354 | r1 355 | } 356 | 357 | /// Set res_reg if rs1_reg is less than the immediate 358 | pub fn slti(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg { 359 | self.instrs.push( Instruction { 360 | op: Operation::Slt, 361 | i_reg: vec![Reg(r2), Imm(imm)], 362 | o_reg: Some(r1), 363 | flags, 364 | pc: self.cur_pc, 365 | }); 366 | self.cur_pc = None; 367 | r1 368 | } 369 | 370 | /// r1 = r2 + r3 371 | pub fn add(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg { 372 | self.instrs.push( Instruction { 373 | op: Operation::Add, 374 | i_reg: vec![Reg(r2), Reg(r3)], 375 | o_reg: Some(r1), 376 | flags, 377 | pc: self.cur_pc, 378 | }); 379 | self.cur_pc = None; 380 | r1 381 | } 382 | 383 | /// r1 = r2 * r3 384 | pub fn mul(&mut self, r1: PReg, r2: PReg, r3: PReg, flag: u16) -> PReg { 385 | self.instrs.push( Instruction { 386 | op: Operation::Mul, 387 | i_reg: vec![Reg(r2), Reg(r3)], 388 | o_reg: Some(r1), 389 | flags: flag, 390 | pc: self.cur_pc, 391 | }); 392 | self.cur_pc = None; 393 | r1 394 | } 395 | 396 | /// r1 = r2 / r3 397 | pub fn div(&mut self, r1: PReg, r2: PReg, r3: PReg, flag: u16) -> PReg { 398 | self.instrs.push( Instruction { 399 | op: Operation::Div, 400 | i_reg: vec![Reg(r2), Reg(r3)], 401 | o_reg: Some(r1), 402 | flags: flag, 403 | pc: self.cur_pc, 404 | }); 405 | self.cur_pc = None; 406 | r1 407 | } 408 | 409 | /// r1 = r2 + imm 410 | pub fn addi(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg { 411 | self.instrs.push( Instruction { 412 | op: Operation::Add, 413 | i_reg: vec![Reg(r2), Imm(imm)], 414 | o_reg: Some(r1), 415 | flags, 416 | pc: self.cur_pc, 417 | }); 418 | self.cur_pc = None; 419 | r1 420 | } 421 | 422 | /// r1 = r2 - r3 423 | pub fn sub(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg { 424 | self.instrs.push( Instruction { 425 | op: Operation::Sub, 426 | i_reg: vec![Reg(r2), Reg(r3)], 427 | o_reg: Some(r1), 428 | flags, 429 | pc: self.cur_pc, 430 | }); 431 | self.cur_pc = None; 432 | r1 433 | } 434 | 435 | /// r1 = r2 - imm 436 | pub fn subi(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg { 437 | self.instrs.push( Instruction { 438 | op: Operation::Sub, 439 | i_reg: vec![Reg(r2), Imm(imm)], 440 | o_reg: Some(r1), 441 | flags, 442 | pc: self.cur_pc, 443 | }); 444 | self.cur_pc = None; 445 | r1 446 | } 447 | 448 | /// r1 = r2 ^ r3 449 | pub fn xor(&mut self, r1: PReg, r2: PReg, r3: PReg) -> PReg { 450 | self.instrs.push( Instruction { 451 | op: Operation::Xor, 452 | i_reg: vec![Reg(r2), Reg(r3)], 453 | o_reg: Some(r1), 454 | flags: Flag::NoFlag, 455 | pc: self.cur_pc, 456 | }); 457 | self.cur_pc = None; 458 | r1 459 | } 460 | 461 | /// r1 = r2 ^ imm 462 | pub fn xori(&mut self, r1: PReg, r2: PReg, imm: i32) -> PReg { 463 | self.instrs.push( Instruction { 464 | op: Operation::Xor, 465 | i_reg: vec![Reg(r2), Imm(imm)], 466 | o_reg: Some(r1), 467 | flags: Flag::NoFlag, 468 | pc: self.cur_pc, 469 | }); 470 | self.cur_pc = None; 471 | r1 472 | } 473 | 474 | /// r1 = r2 | r3 475 | pub fn or(&mut self, r1: PReg, r2: PReg, r3: PReg) -> PReg { 476 | self.instrs.push( Instruction { 477 | op: Operation::Or, 478 | i_reg: vec![Reg(r2), Reg(r3)], 479 | o_reg: Some(r1), 480 | flags: Flag::NoFlag, 481 | pc: self.cur_pc, 482 | }); 483 | self.cur_pc = None; 484 | r1 485 | } 486 | 487 | /// r1 = r2 | imm 488 | pub fn ori(&mut self, r1: PReg, r2: PReg, imm: i32) -> PReg { 489 | self.instrs.push( Instruction { 490 | op: Operation::Or, 491 | i_reg: vec![Reg(r2), Imm(imm)], 492 | o_reg: Some(r1), 493 | flags: Flag::NoFlag, 494 | pc: self.cur_pc, 495 | }); 496 | self.cur_pc = None; 497 | r1 498 | } 499 | 500 | /// r1 = r2 & r3 501 | pub fn and(&mut self, r1: PReg, r2: PReg, r3: PReg) -> PReg { 502 | self.instrs.push( Instruction { 503 | op: Operation::And, 504 | i_reg: vec![Reg(r2), Reg(r3)], 505 | o_reg: Some(r1), 506 | flags: Flag::NoFlag, 507 | pc: self.cur_pc, 508 | }); 509 | self.cur_pc = None; 510 | r1 511 | } 512 | 513 | /// r1 = r2 & imm 514 | pub fn andi(&mut self, r1: PReg, r2: PReg, imm: i32) -> PReg { 515 | self.instrs.push( Instruction { 516 | op: Operation::And, 517 | i_reg: vec![Reg(r2), Imm(imm)], 518 | o_reg: Some(r1), 519 | flags: Flag::NoFlag, 520 | pc: self.cur_pc, 521 | }); 522 | self.cur_pc = None; 523 | r1 524 | } 525 | 526 | /// r1 = r2 << r3 527 | pub fn shl(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg { 528 | self.instrs.push( Instruction { 529 | op: Operation::Shl, 530 | i_reg: vec![Reg(r2), Reg(r3)], 531 | o_reg: Some(r1), 532 | flags, 533 | pc: self.cur_pc, 534 | }); 535 | self.cur_pc = None; 536 | r1 537 | } 538 | 539 | /// r1 = r2 << imm 540 | pub fn shli(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg { 541 | self.instrs.push( Instruction { 542 | op: Operation::Shl, 543 | i_reg: vec![Reg(r2), Imm(imm)], 544 | o_reg: Some(r1), 545 | flags, 546 | pc: self.cur_pc, 547 | }); 548 | self.cur_pc = None; 549 | r1 550 | } 551 | 552 | /// r1 = r2 >> r3 (Logical) 553 | pub fn shr(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg { 554 | self.instrs.push( Instruction { 555 | op: Operation::Shr, 556 | i_reg: vec![Reg(r2), Reg(r3)], 557 | o_reg: Some(r1), 558 | flags, 559 | pc: self.cur_pc, 560 | }); 561 | self.cur_pc = None; 562 | r1 563 | } 564 | 565 | /// r1 = r2 >> imm (Logical) 566 | pub fn shri(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg { 567 | self.instrs.push( Instruction { 568 | op: Operation::Shr, 569 | i_reg: vec![Reg(r2), Imm(imm)], 570 | o_reg: Some(r1), 571 | flags, 572 | pc: self.cur_pc, 573 | }); 574 | self.cur_pc = None; 575 | r1 576 | } 577 | 578 | /// r1 = r2 >> r3 (Arithmetic) 579 | pub fn sar(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg { 580 | self.instrs.push( Instruction { 581 | op: Operation::Sar, 582 | i_reg: vec![Reg(r2), Reg(r3)], 583 | o_reg: Some(r1), 584 | flags, 585 | pc: self.cur_pc, 586 | }); 587 | self.cur_pc = None; 588 | r1 589 | } 590 | 591 | /// r1 = r2 >> imm (Arithmetic) 592 | pub fn sari(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg { 593 | self.instrs.push( Instruction { 594 | op: Operation::Sar, 595 | i_reg: vec![Reg(r2), Imm(imm)], 596 | o_reg: Some(r1), 597 | flags, 598 | pc: self.cur_pc, 599 | }); 600 | self.cur_pc = None; 601 | r1 602 | } 603 | 604 | /// Syscall instruction 605 | pub fn syscall(&mut self) { 606 | self.instrs.push( Instruction { 607 | op: Operation::Syscall, 608 | i_reg: Vec::new(), 609 | o_reg: None, 610 | flags: Flag::NoFlag, 611 | pc: self.cur_pc, 612 | }); 613 | self.cur_pc = None; 614 | } 615 | 616 | /// Return a hashmap that tracks the starting pc of each cfg block of this function 617 | pub fn get_leaders(&self) -> FxHashMap { 618 | let mut leader_set: FxHashMap = FxHashMap::default(); 619 | 620 | // First instruction is always a block-leader 621 | leader_set.insert(self.instrs[0].pc.unwrap(), 0); 622 | 623 | // Next insert all labels that indicate the start of a block 624 | for i in 0..self.instrs.len() { 625 | if let Some(pc) = self.instrs[i].pc { 626 | if self.labels.get(&pc).is_some() { 627 | leader_set.insert(pc, 0); 628 | } 629 | } 630 | } 631 | leader_set 632 | } 633 | } 634 | --------------------------------------------------------------------------------