├── tools
    ├── program_generator
    │   ├── target
    │   │   ├── debug
    │   │   │   ├── .cargo-lock
    │   │   │   ├── .fingerprint
    │   │   │   │   ├── rand_core-e25943b7452e6ba6
    │   │   │   │   │   ├── dep-lib-rand_core
    │   │   │   │   │   ├── lib-rand_core
    │   │   │   │   │   ├── invoked.timestamp
    │   │   │   │   │   └── lib-rand_core.json
    │   │   │   │   ├── program_generator-97a5f69bcae5c740
    │   │   │   │   │   ├── lib-program_generator
    │   │   │   │   │   ├── invoked.timestamp
    │   │   │   │   │   ├── dep-lib-program_generator
    │   │   │   │   │   ├── lib-program_generator.json
    │   │   │   │   │   └── output-lib-program_generator
    │   │   │   │   ├── program_generator-9f3b4da5251921fa
    │   │   │   │   │   ├── bin-program_generator
    │   │   │   │   │   ├── invoked.timestamp
    │   │   │   │   │   └── bin-program_generator.json
    │   │   │   │   ├── rand_xoshiro-826023e1e10f2594
    │   │   │   │   │   ├── dep-lib-rand_xoshiro
    │   │   │   │   │   ├── lib-rand_xoshiro
    │   │   │   │   │   ├── invoked.timestamp
    │   │   │   │   │   └── lib-rand_xoshiro.json
    │   │   │   │   ├── program_generator-55bed8c75a40676d
    │   │   │   │   │   ├── test-bin-program_generator
    │   │   │   │   │   ├── invoked.timestamp
    │   │   │   │   │   └── test-bin-program_generator.json
    │   │   │   │   └── program_generator-71b575e69d616725
    │   │   │   │   │   ├── test-lib-program_generator
    │   │   │   │   │   ├── invoked.timestamp
    │   │   │   │   │   ├── dep-test-lib-program_generator
    │   │   │   │   │   ├── test-lib-program_generator.json
    │   │   │   │   │   └── output-test-lib-program_generator
    │   │   │   ├── incremental
    │   │   │   │   ├── program_generator-1szwh0nemnj33
    │   │   │   │   │   ├── s-gbtrymypnm-1krtpfm.lock
    │   │   │   │   │   ├── s-gbtrynbds6-1471w9c.lock
    │   │   │   │   │   ├── s-gbtryo2kyy-1rhu27j.lock
    │   │   │   │   │   ├── s-gbtrymypnm-1krtpfm-working
    │   │   │   │   │   │   └── dep-graph.part.bin
    │   │   │   │   │   ├── s-gbtrynbds6-1471w9c-working
    │   │   │   │   │   │   └── dep-graph.part.bin
    │   │   │   │   │   └── s-gbtryo2kyy-1rhu27j-working
    │   │   │   │   │   │   └── dep-graph.part.bin
    │   │   │   │   └── program_generator-3ph2xy5u2qtvt
    │   │   │   │   │   ├── s-gbtrymyg1k-hbm5ig.lock
    │   │   │   │   │   ├── s-gbtrynbgb3-y2hf5i.lock
    │   │   │   │   │   ├── s-gbtryo1cy8-7215i9.lock
    │   │   │   │   │   ├── s-gbtrymyg1k-hbm5ig-working
    │   │   │   │   │       └── dep-graph.part.bin
    │   │   │   │   │   ├── s-gbtrynbgb3-y2hf5i-working
    │   │   │   │   │       └── dep-graph.part.bin
    │   │   │   │   │   └── s-gbtryo1cy8-7215i9-working
    │   │   │   │   │       └── dep-graph.part.bin
    │   │   │   └── deps
    │   │   │   │   ├── librand_core-e25943b7452e6ba6.rmeta
    │   │   │   │   ├── librand_xoshiro-826023e1e10f2594.rmeta
    │   │   │   │   ├── program_generator-71b575e69d616725.d
    │   │   │   │   ├── program_generator-97a5f69bcae5c740.d
    │   │   │   │   ├── rand_core-e25943b7452e6ba6.d
    │   │   │   │   └── rand_xoshiro-826023e1e10f2594.d
    │   │   ├── CACHEDIR.TAG
    │   │   └── .rustc_info.json
    │   ├── src
    │   │   ├── main.rs
    │   │   ├── rng.rs
    │   │   ├── compile.rs
    │   │   └── lib.rs
    │   ├── Cargo.toml
    │   ├── Cargo.lock
    │   └── README.md
    ├── trace_full_regs.sh
    └── scraper.py
├── test_cases
    ├── aa
    ├── simple_test
    ├── generated_program
    ├── simple_test.c
    └── b.c
├── resources
    ├── graph.png
    ├── domtree.png
    ├── afl_test.png
    ├── printout.png
    ├── sfuzz_test.png
    ├── domtree.dot
    ├── ssa.dot
    └── notes.md
├── Cargo.toml
├── LICENSE.md
├── README.md
├── docs
    ├── benchmarking.md
    ├── memory_management.md
    ├── fuzzing.md
    └── code_gen.md
├── src
    ├── syscalls.rs
    ├── pretty_printing.rs
    ├── main.rs
    ├── config.rs
    ├── mutator.rs
    └── irgraph.rs
└── fuzzing.md


/tools/program_generator/target/debug/.cargo-lock:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test_cases/aa:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/test_cases/aa


--------------------------------------------------------------------------------
/resources/graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/resources/graph.png


--------------------------------------------------------------------------------
/resources/domtree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/resources/domtree.png


--------------------------------------------------------------------------------
/resources/afl_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/resources/afl_test.png


--------------------------------------------------------------------------------
/resources/printout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/resources/printout.png


--------------------------------------------------------------------------------
/resources/sfuzz_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/resources/sfuzz_test.png


--------------------------------------------------------------------------------
/test_cases/simple_test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/test_cases/simple_test


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/rand_core-e25943b7452e6ba6/dep-lib-rand_core:
--------------------------------------------------------------------------------
1 |         


--------------------------------------------------------------------------------
/test_cases/generated_program:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/test_cases/generated_program


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-97a5f69bcae5c740/lib-program_generator:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-9f3b4da5251921fa/bin-program_generator:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/rand_core-e25943b7452e6ba6/lib-rand_core:
--------------------------------------------------------------------------------
1 | e156a19efd59b159


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/rand_xoshiro-826023e1e10f2594/dep-lib-rand_xoshiro:
--------------------------------------------------------------------------------
1 |         


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrymypnm-1krtpfm.lock:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrynbds6-1471w9c.lock:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtryo2kyy-1rhu27j.lock:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrymyg1k-hbm5ig.lock:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrynbgb3-y2hf5i.lock:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtryo1cy8-7215i9.lock:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-55bed8c75a40676d/test-bin-program_generator:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-71b575e69d616725/test-lib-program_generator:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/rand_xoshiro-826023e1e10f2594/lib-rand_xoshiro:
--------------------------------------------------------------------------------
1 | b86706463e38296f


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/rand_core-e25943b7452e6ba6/invoked.timestamp:
--------------------------------------------------------------------------------
1 | This file has an mtime of when this was started.


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/rand_xoshiro-826023e1e10f2594/invoked.timestamp:
--------------------------------------------------------------------------------
1 | This file has an mtime of when this was started.


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-55bed8c75a40676d/invoked.timestamp:
--------------------------------------------------------------------------------
1 | This file has an mtime of when this was started.


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-71b575e69d616725/invoked.timestamp:
--------------------------------------------------------------------------------
1 | This file has an mtime of when this was started.


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-97a5f69bcae5c740/invoked.timestamp:
--------------------------------------------------------------------------------
1 | This file has an mtime of when this was started.


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-9f3b4da5251921fa/invoked.timestamp:
--------------------------------------------------------------------------------
1 | This file has an mtime of when this was started.


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-97a5f69bcae5c740/dep-lib-program_generator:
--------------------------------------------------------------------------------
1 |     
2 |    src/lib.rs 
3 |    src/rng.rs    src/compile.rs    


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-71b575e69d616725/dep-test-lib-program_generator:
--------------------------------------------------------------------------------
1 |     
2 |    src/lib.rs 
3 |    src/rng.rs    src/compile.rs    


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/deps/librand_core-e25943b7452e6ba6.rmeta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/deps/librand_core-e25943b7452e6ba6.rmeta


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/deps/librand_xoshiro-826023e1e10f2594.rmeta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/deps/librand_xoshiro-826023e1e10f2594.rmeta


--------------------------------------------------------------------------------
/tools/program_generator/src/main.rs:
--------------------------------------------------------------------------------
 1 | use program_generator::{
 2 |     compile::compile,
 3 |     Program, 
 4 | };
 5 | 
 6 | fn main() {
 7 |     let program = Program::create_program();
 8 |     compile(program);
 9 | }
10 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/CACHEDIR.TAG:
--------------------------------------------------------------------------------
1 | Signature: 8a477f597d28d172789f06886806bc55
2 | # This file is a cache directory tag created by cargo.
3 | # For information about cache directory tags see https://bford.info/cachedir/
4 | 


--------------------------------------------------------------------------------
/tools/program_generator/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "program_generator"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | rand_xoshiro = "0.6.0"
10 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrymypnm-1krtpfm-working/dep-graph.part.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrymypnm-1krtpfm-working/dep-graph.part.bin


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrynbds6-1471w9c-working/dep-graph.part.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtrynbds6-1471w9c-working/dep-graph.part.bin


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtryo2kyy-1rhu27j-working/dep-graph.part.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-1szwh0nemnj33/s-gbtryo2kyy-1rhu27j-working/dep-graph.part.bin


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrymyg1k-hbm5ig-working/dep-graph.part.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrymyg1k-hbm5ig-working/dep-graph.part.bin


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrynbgb3-y2hf5i-working/dep-graph.part.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtrynbgb3-y2hf5i-working/dep-graph.part.bin


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtryo1cy8-7215i9-working/dep-graph.part.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seal9055/sfuzz/HEAD/tools/program_generator/target/debug/incremental/program_generator-3ph2xy5u2qtvt/s-gbtryo1cy8-7215i9-working/dep-graph.part.bin


--------------------------------------------------------------------------------
/resources/domtree.dot:
--------------------------------------------------------------------------------
 1 | digraph {
 2 |     0 [ label = "Label @ 0x1000\l " ]
 3 |     1 [ label = "Label @ 0x100C\l " ]
 4 |     2 [ label = "Label @ 0x1018\l " ]
 5 |     3 [ label = "Label @ 0x1028\l " ]
 6 |     4 [ label = "Label @ 0x1034\l " ]
 7 |     0 -> 1 [ ]
 8 |     0 -> 3 [ ]
 9 |     0 -> 2 [ ]
10 |     2 -> 4 [ ]
11 | }
12 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/rand_core-e25943b7452e6ba6/lib-rand_core.json:
--------------------------------------------------------------------------------
1 | {"rustc":15069735523673833430,"features":"[]","target":14554634924080965175,"profile":3735503092003429423,"path":15235452744257207087,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/rand_core-e25943b7452e6ba6/dep-lib-rand_core"}}],"rustflags":[],"metadata":3275543247315060703,"config":2202906307356721367,"compile_kind":0}


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/deps/program_generator-71b575e69d616725.d:
--------------------------------------------------------------------------------
1 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/program_generator-71b575e69d616725.rmeta: src/lib.rs src/rng.rs src/compile.rs
2 | 
3 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/program_generator-71b575e69d616725.d: src/lib.rs src/rng.rs src/compile.rs
4 | 
5 | src/lib.rs:
6 | src/rng.rs:
7 | src/compile.rs:
8 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/deps/program_generator-97a5f69bcae5c740.d:
--------------------------------------------------------------------------------
1 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/program_generator-97a5f69bcae5c740.rmeta: src/lib.rs src/rng.rs src/compile.rs
2 | 
3 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/program_generator-97a5f69bcae5c740.d: src/lib.rs src/rng.rs src/compile.rs
4 | 
5 | src/lib.rs:
6 | src/rng.rs:
7 | src/compile.rs:
8 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/rand_xoshiro-826023e1e10f2594/lib-rand_xoshiro.json:
--------------------------------------------------------------------------------
1 | {"rustc":15069735523673833430,"features":"[]","target":12700624167688026310,"profile":3735503092003429423,"path":4654909132326164140,"deps":[[1974880700686797828,"rand_core",false,6463045886076344033]],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/rand_xoshiro-826023e1e10f2594/dep-lib-rand_xoshiro"}}],"rustflags":[],"metadata":10630710798610827097,"config":2202906307356721367,"compile_kind":0}


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-97a5f69bcae5c740/lib-program_generator.json:
--------------------------------------------------------------------------------
1 | {"rustc":15069735523673833430,"features":"[]","target":5404270602498279417,"profile":7309141686862299243,"path":17523903030608720598,"deps":[[15191189118421006103,"rand_xoshiro",false,8009995252369090488]],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/program_generator-97a5f69bcae5c740/dep-lib-program_generator"}}],"rustflags":[],"metadata":7797948686568424061,"config":2202906307356721367,"compile_kind":0}


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-71b575e69d616725/test-lib-program_generator.json:
--------------------------------------------------------------------------------
1 | {"rustc":15069735523673833430,"features":"[]","target":5404270602498279417,"profile":1021633075455700787,"path":17523903030608720598,"deps":[[15191189118421006103,"rand_xoshiro",false,8009995252369090488]],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/program_generator-71b575e69d616725/dep-test-lib-program_generator"}}],"rustflags":[],"metadata":7797948686568424061,"config":2202906307356721367,"compile_kind":0}


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-9f3b4da5251921fa/bin-program_generator.json:
--------------------------------------------------------------------------------
1 | {"rustc":15069735523673833430,"features":"[]","target":6365363593584776832,"profile":7309141686862299243,"path":1684066648322511884,"deps":[[11263871373454206889,"program_generator",false,11722842124356941761],[15191189118421006103,"rand_xoshiro",false,8009995252369090488]],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/program_generator-9f3b4da5251921fa/dep-bin-program_generator"}}],"rustflags":[],"metadata":7797948686568424061,"config":2202906307356721367,"compile_kind":0}


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-55bed8c75a40676d/test-bin-program_generator.json:
--------------------------------------------------------------------------------
1 | {"rustc":15069735523673833430,"features":"[]","target":6365363593584776832,"profile":1021633075455700787,"path":1684066648322511884,"deps":[[11263871373454206889,"program_generator",false,11722842124356941761],[15191189118421006103,"rand_xoshiro",false,8009995252369090488]],"local":[{"CheckDepInfo":{"dep_info":"debug/.fingerprint/program_generator-55bed8c75a40676d/dep-test-bin-program_generator"}}],"rustflags":[],"metadata":7797948686568424061,"config":2202906307356721367,"compile_kind":0}


--------------------------------------------------------------------------------
/test_cases/simple_test.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <fcntl.h>
 3 | 
 4 | int main(int argc, char **argv) {
 5 |     char buf[100];
 6 |     int fd = open(argv[1], O_RDONLY);
 7 | 
 8 |     read(fd, buf, 100);
 9 | 
10 |     if (buf[0] == 0x41) {
11 |       if (buf[1] == 0x42) {
12 |         if (buf[2] == 0x43) {
13 |           if (buf[3] == 0x44) {
14 |             if (buf[4] == 0x45) {
15 |               if (buf[5] == 0x46) {
16 |                 *(unsigned long*)0x4141414141414141 = 0;
17 |               }
18 |             }
19 |           }
20 |         }
21 |       }
22 |     }
23 |     return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/test_cases/b.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <fcntl.h>
 3 | 
 4 | int main(int argc, char **argv) {
 5 |     char buf[100];
 6 |     int fd = open(argv[1], O_RDONLY);
 7 |     
 8 |     read(fd, buf, 100);
 9 | 
10 |     int v = buf[8] * 123 / 17;
11 | 
12 |     if (buf[0] == v) {
13 |       if (buf[1] == 0x42) {
14 |         if (buf[2] == 0x43) {
15 |           if (buf[3] == 0x44) {
16 |             if (buf[4] == 0x45) {
17 |               if (buf[5] == 0x46) {
18 |                 *(unsigned long*)0x4141414141414141 = 0;
19 |               }
20 |             }
21 |           }
22 |         }
23 |       }
24 |     }
25 |     return 0;
26 | }
27 | 


--------------------------------------------------------------------------------
/tools/program_generator/Cargo.lock:
--------------------------------------------------------------------------------
 1 | # This file is automatically @generated by Cargo.
 2 | # It is not intended for manual editing.
 3 | version = 3
 4 | 
 5 | [[package]]
 6 | name = "program_generator"
 7 | version = "0.1.0"
 8 | dependencies = [
 9 |  "rand_xoshiro",
10 | ]
11 | 
12 | [[package]]
13 | name = "rand_core"
14 | version = "0.6.3"
15 | source = "registry+https://github.com/rust-lang/crates.io-index"
16 | checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
17 | 
18 | [[package]]
19 | name = "rand_xoshiro"
20 | version = "0.6.0"
21 | source = "registry+https://github.com/rust-lang/crates.io-index"
22 | checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa"
23 | dependencies = [
24 |  "rand_core",
25 | ]
26 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "sfuzz"
 3 | version = "0.2.0"
 4 | edition = "2021"
 5 | 
 6 | [profile.release]
 7 | lto = true
 8 | codegen-units = 1
 9 | panic = 'abort'
10 | 
11 | [dependencies]
12 | elfparser = { git = "https://github.com/seal9055/local_crates/", branch = "main" }
13 | my_libs = { git = "https://github.com/seal9055/local_crates/", branch = "main" }
14 | clap = { version = "3.1.18", features = ["derive"] }
15 | rustc-hash = "1.1.0"
16 | num-traits = "0.2.14"
17 | byteorder = "1.4.3"
18 | fasthash = "0.4.0"
19 | num-format = "0.4.0"
20 | parking_lot = "0.12.0"
21 | rand_xoshiro = "0.6.0"
22 | console = "0.15.0"
23 | parse_int = "0.6.0"
24 | reqwest = { version = "*", features = ["json"] }
25 | serde = { version = "*", features = ["derive"] }
26 | 
27 | [dependencies.iced-x86]
28 | version = "1.15.0"
29 | features = ["code_asm"]
30 | 


--------------------------------------------------------------------------------
/resources/ssa.dot:
--------------------------------------------------------------------------------
 1 | digraph {
 2 |     0 [ label = "		Label @ 0x1000\l\l0x001000  A0(1) = 0x14\l0x001004  A1(1) = 0xA\l0x001008  if A0(1) == A1(1) (0x100C, 0x1028)\l\l " ]
 3 |     1 [ label = "\l		Label @ 0x100C\l\l0x00100C  A2(1) = A0(1) + A1(1)\l0x001010  A3(1) = 0x1\l0x001014  Jmp 0x1018\l\l " ]
 4 |     2 [ label = "\l		Label @ 0x1018\l\l0x000000  A3(2) = φ(A3(1), A3(3))\l0x000000  A2(2) = φ(A2(1), A2(3))\l0x001018  Z1(1) = 0x5\l0x000000  A4(1) = A2(2) + Z1(1)\l0x00101C  Z1(2) = 0x1\l0x000000  A5(1) = A4(1) + Z1(2)\l0x001020  Z1(3) = 0x0\l0x000000  A6(1) = A3(2) + Z1(3)\l0x001024  Jmp 0x1034\l\l " ]
 5 |     3 [ label = "\l		Label @ 0x1028\l\l0x001028  A2(3) = A0(1) - A1(1)\l0x00102C  A3(3) = 0x2\l0x001030  Jmp 0x1018\l\l " ]
 6 |     4 [ label = "\l		Label @ 0x1034\l\l0x001034  Ret\l\l " ]
 7 |     0 -> 1 [ ]
 8 |     0 -> 3 [ ]
 9 |     1 -> 2 [ ]
10 |     2 -> 4 [ ]
11 |     3 -> 2 [ ]
12 | }
13 | 


--------------------------------------------------------------------------------
/tools/program_generator/README.md:
--------------------------------------------------------------------------------
1 | # Program Generator
2 | 
3 | SFUZZ currently only supports RISC-V and I had some trouble compiling many actual programs to RISC-V so that I could test SFUZZ. Due to this, I decided to write this program generator so that I could randomly generate different test cases of varying complexity. This testing method is far from perfect, but it was a fun little side project while working on SFUZZ, that I think is still decent to quickly test basic fuzzer capabilities.
4 | 
5 | The generated program reads in a specified number of bytes from an input file into a buffer which is then passed into various different comparisons. These eventually lead to a crash if enough checks are passed. The complexity of the generated program can easily be modified through a configuration variable and enables the generation of programs ranging from ~100 to several million lines of code depending on the users testing preferences. The generated comparison depth is also handled based on this complexity.


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 seal9055
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tools/trace_full_regs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/zsh
 2 | 
 3 | '''
 4 | Script used to trace the register state of a program running under qemu and format it so that I can
 5 | just run `diff` on the traces generated by qemu and my fuzzer to find bugs in my JIT implementation
 6 | '''
 7 | 
 8 | # Requires target program and the pc to stop tracing at
 9 | if [ "$#" -ne 2 ]; then
10 |     echo "Usage: ./trace_full_regs.sh <target> <last_pc>"
11 |     exit
12 | fi
13 | 
14 | # gdb-script that runs `info reg` on every instruction until $last_pc
15 | echo "set pagination off" >> script
16 | echo "set logging file gdb.output" >> script
17 | echo "set logging on" >> script
18 | echo "" >> script
19 | echo "target remote :1234" >> script
20 | echo "" >> script
21 | echo "while(\$pc != $2)" >> script
22 | echo "    info reg" >> script
23 | echo "    si" >> script
24 | echo "end" >> script
25 | echo "" >> script
26 | echo "set logging off" >> script
27 | echo "quit" >> script
28 | 
29 | # Run qemu with gdb
30 | qemu-riscv64 -g 1234 ./$1 &
31 | gdb-multiarch ./$1 --command=script
32 | 
33 | # Format output
34 | cat gdb.output | grep -v 'in' | tr -s ' ' | cut -d ' ' -f1,2 | cut -d$'\t' -f1 > trace
35 | sed -i '/pc/ s/$/\n/' trace
36 | 
37 | rm gdb.output
38 | rm script
39 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/deps/rand_core-e25943b7452e6ba6.d:
--------------------------------------------------------------------------------
 1 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/rand_core-e25943b7452e6ba6.rmeta: /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/lib.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/block.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/error.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/impls.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/le.rs
 2 | 
 3 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/rand_core-e25943b7452e6ba6.d: /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/lib.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/block.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/error.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/impls.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/le.rs
 4 | 
 5 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/lib.rs:
 6 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/block.rs:
 7 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/error.rs:
 8 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/impls.rs:
 9 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_core-0.6.3/src/le.rs:
10 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/.rustc_info.json:
--------------------------------------------------------------------------------
1 | {"rustc_fingerprint":15743540453928543069,"outputs":{"4614504638168534921":{"success":true,"status":"","code":0,"stdout":"rustc 1.67.0-nightly (11ebe6512 2022-11-01)\nbinary: rustc\ncommit-hash: 11ebe6512b4c77633c59f8dcdd421df3b79d1a9f\ncommit-date: 2022-11-01\nhost: x86_64-unknown-linux-gnu\nrelease: 1.67.0-nightly\nLLVM version: 15.0.4\n","stderr":""},"10376369925670944939":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/home/seal/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"llvm14-builtins-abi\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_has_atomic_equal_alignment=\"16\"\ntarget_has_atomic_equal_alignment=\"32\"\ntarget_has_atomic_equal_alignment=\"64\"\ntarget_has_atomic_equal_alignment=\"8\"\ntarget_has_atomic_equal_alignment=\"ptr\"\ntarget_has_atomic_load_store=\"16\"\ntarget_has_atomic_load_store=\"32\"\ntarget_has_atomic_load_store=\"64\"\ntarget_has_atomic_load_store=\"8\"\ntarget_has_atomic_load_store=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_thread_local\ntarget_vendor=\"unknown\"\nunix\n","stderr":""},"15697416045686424142":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n","stderr":""}},"successes":{}}


--------------------------------------------------------------------------------
/tools/program_generator/src/rng.rs:
--------------------------------------------------------------------------------
 1 | //! This is used to expose an api for an rng-object that can safely be used in a global variable.
 2 | //! This required locks, which makes it quite slow. 
 3 | 
 4 | use std::sync::Mutex;
 5 | 
 6 | use rand_xoshiro::rand_core::RngCore;
 7 | use rand_xoshiro::Xoroshiro64Star;
 8 | use rand_xoshiro::rand_core::SeedableRng;
 9 | 
10 | /// Used to seed randomness based on cpu timestamp
11 | fn rdtsc() -> u64 {
12 |     unsafe { std::arch::x86_64::_rdtsc() }
13 | }
14 | 
15 | /// Randomness exposing api that can be used in a global and uses a faster rand implementation than
16 | /// the standard Rand crate
17 | pub struct Rng {
18 |     rng: Mutex<Xoroshiro64Star>,
19 | }
20 | 
21 | impl Rng {
22 |     /// Create a new Rand object
23 |     pub fn new() -> Self {
24 |         Self {
25 |             rng: Mutex::new(Xoroshiro64Star::seed_from_u64(rdtsc()))
26 |         }
27 |     }
28 | 
29 |     /// Return a random number
30 |     pub fn gen(&self) -> usize {
31 |         self.rng.lock().unwrap().next_u64() as usize
32 |     }
33 | 
34 |     /// Return a random byte-string, ascii-range 1-0xff (inclusive)
35 |     pub fn next_string(&self, max_length: usize, min: usize, max: usize) -> Vec<u8> {
36 |         // Create a random byte-string
37 |         let rand_bytes = (1..max_length).map(|_| {
38 |                 self.gen_range(min, max) as u8
39 |             }).collect::<Vec<u8>>();
40 |         
41 |         rand_bytes
42 |     }
43 | 
44 |     /// Return 2 random 32-bit unsigned integers
45 |     pub fn get2_rand(&self) -> (usize, usize) {
46 |         let tmp = self.rng.lock().unwrap().next_u64();
47 |         ((tmp & 0xffffffff) as usize, (tmp >> 32) as usize)
48 |     }
49 | 
50 |     /// Return a random number up to `max`
51 |     pub fn next_num(&self, max: usize) -> usize {
52 |         if max == 0 {
53 |             return 0;
54 |         }
55 |         self.rng.lock().unwrap().next_u64() as usize % max
56 |     }
57 | 
58 |     /// Return a random value in the range min..max, inclusive of min and exclusive of max
59 |     pub fn gen_range(&self, min: usize, max: usize) -> usize {
60 |         if max == min {
61 |             return min;
62 |         }
63 | 
64 |         (self.rng.lock().unwrap().next_u64() as usize % (max - min)) + min
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-97a5f69bcae5c740/output-lib-program_generator:
--------------------------------------------------------------------------------
1 | {"message":"`#![feature]` may not be used on the stable release channel","code":{"code":"E0554","explanation":"Feature attributes are only allowed on the nightly release channel. Stable or\nbeta compilers will not comply.\n\nErroneous code example:\n\n```ignore (depends on release channel)\n#![feature(lang_items)] // error: `#![feature]` may not be used on the\n                        //        stable release channel\n```\n\nIf you need the feature, make sure to use a nightly release of the compiler\n(but be warned that the feature may be removed or altered in the future).\n"},"level":"error","spans":[{"file_name":"src/lib.rs","byte_start":11,"byte_end":24,"line_start":1,"line_end":1,"column_start":12,"column_end":25,"is_primary":true,"text":[{"text":"#![feature(variant_count)]","highlight_start":12,"highlight_end":25}],"label":null,"suggested_replacement":null,"suggestion_applicability":null,"expansion":null}],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror[E0554]\u001b[0m\u001b[0m\u001b[1m: `#![feature]` may not be used on the stable release channel\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m--> \u001b[0m\u001b[0msrc/lib.rs:1:12\u001b[0m\n\u001b[0m  \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m\u001b[1m\u001b[38;5;12m1\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m#![feature(variant_count)]\u001b[0m\n\u001b[0m  \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m           \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;9m^^^^^^^^^^^^^\u001b[0m\n\n"}
2 | {"message":"`#![feature]` may not be used on the stable release channel","code":{"code":"E0554","explanation":"Feature attributes are only allowed on the nightly release channel. Stable or\nbeta compilers will not comply.\n\nErroneous code example:\n\n```ignore (depends on release channel)\n#![feature(lang_items)] // error: `#![feature]` may not be used on the\n                        //        stable release channel\n```\n\nIf you need the feature, make sure to use a nightly release of the compiler\n(but be warned that the feature may be removed or altered in the future).\n"},"level":"error","spans":[{"file_name":"src/lib.rs","byte_start":38,"byte_end":47,"line_start":2,"line_end":2,"column_start":12,"column_end":21,"is_primary":true,"text":[{"text":"#![feature(once_cell)]","highlight_start":12,"highlight_end":21}],"label":null,"suggested_replacement":null,"suggestion_applicability":null,"expansion":null}],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror[E0554]\u001b[0m\u001b[0m\u001b[1m: `#![feature]` may not be used on the stable release channel\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m--> \u001b[0m\u001b[0msrc/lib.rs:2:12\u001b[0m\n\u001b[0m  \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m\u001b[1m\u001b[38;5;12m2\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m#![feature(once_cell)]\u001b[0m\n\u001b[0m  \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m           \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;9m^^^^^^^^^\u001b[0m\n\n"}
3 | {"message":"aborting due to 2 previous errors","code":null,"level":"error","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror\u001b[0m\u001b[0m\u001b[1m: aborting due to 2 previous errors\u001b[0m\n\n"}
4 | {"message":"For more information about this error, try `rustc --explain E0554`.","code":null,"level":"failure-note","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1mFor more information about this error, try `rustc --explain E0554`.\u001b[0m\n"}
5 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/.fingerprint/program_generator-71b575e69d616725/output-test-lib-program_generator:
--------------------------------------------------------------------------------
1 | {"message":"`#![feature]` may not be used on the stable release channel","code":{"code":"E0554","explanation":"Feature attributes are only allowed on the nightly release channel. Stable or\nbeta compilers will not comply.\n\nErroneous code example:\n\n```ignore (depends on release channel)\n#![feature(lang_items)] // error: `#![feature]` may not be used on the\n                        //        stable release channel\n```\n\nIf you need the feature, make sure to use a nightly release of the compiler\n(but be warned that the feature may be removed or altered in the future).\n"},"level":"error","spans":[{"file_name":"src/lib.rs","byte_start":11,"byte_end":24,"line_start":1,"line_end":1,"column_start":12,"column_end":25,"is_primary":true,"text":[{"text":"#![feature(variant_count)]","highlight_start":12,"highlight_end":25}],"label":null,"suggested_replacement":null,"suggestion_applicability":null,"expansion":null}],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror[E0554]\u001b[0m\u001b[0m\u001b[1m: `#![feature]` may not be used on the stable release channel\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m--> \u001b[0m\u001b[0msrc/lib.rs:1:12\u001b[0m\n\u001b[0m  \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m\u001b[1m\u001b[38;5;12m1\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m#![feature(variant_count)]\u001b[0m\n\u001b[0m  \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m           \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;9m^^^^^^^^^^^^^\u001b[0m\n\n"}
2 | {"message":"`#![feature]` may not be used on the stable release channel","code":{"code":"E0554","explanation":"Feature attributes are only allowed on the nightly release channel. Stable or\nbeta compilers will not comply.\n\nErroneous code example:\n\n```ignore (depends on release channel)\n#![feature(lang_items)] // error: `#![feature]` may not be used on the\n                        //        stable release channel\n```\n\nIf you need the feature, make sure to use a nightly release of the compiler\n(but be warned that the feature may be removed or altered in the future).\n"},"level":"error","spans":[{"file_name":"src/lib.rs","byte_start":38,"byte_end":47,"line_start":2,"line_end":2,"column_start":12,"column_end":21,"is_primary":true,"text":[{"text":"#![feature(once_cell)]","highlight_start":12,"highlight_end":21}],"label":null,"suggested_replacement":null,"suggestion_applicability":null,"expansion":null}],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror[E0554]\u001b[0m\u001b[0m\u001b[1m: `#![feature]` may not be used on the stable release channel\u001b[0m\n\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m--> \u001b[0m\u001b[0msrc/lib.rs:2:12\u001b[0m\n\u001b[0m  \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m|\u001b[0m\n\u001b[0m\u001b[1m\u001b[38;5;12m2\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m#![feature(once_cell)]\u001b[0m\n\u001b[0m  \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;12m| \u001b[0m\u001b[0m           \u001b[0m\u001b[0m\u001b[1m\u001b[38;5;9m^^^^^^^^^\u001b[0m\n\n"}
3 | {"message":"aborting due to 2 previous errors","code":null,"level":"error","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1m\u001b[38;5;9merror\u001b[0m\u001b[0m\u001b[1m: aborting due to 2 previous errors\u001b[0m\n\n"}
4 | {"message":"For more information about this error, try `rustc --explain E0554`.","code":null,"level":"failure-note","spans":[],"children":[],"rendered":"\u001b[0m\u001b[1mFor more information about this error, try `rustc --explain E0554`.\u001b[0m\n"}
5 | 


--------------------------------------------------------------------------------
/tools/program_generator/target/debug/deps/rand_xoshiro-826023e1e10f2594.d:
--------------------------------------------------------------------------------
 1 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/rand_xoshiro-826023e1e10f2594.rmeta: /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/lib.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/common.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/splitmix64.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64star.rs
 2 | 
 3 | /home/seal/github/sfuzz/tools/program_generator/target/debug/deps/rand_xoshiro-826023e1e10f2594.d: /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/lib.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/common.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/splitmix64.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plusplus.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64starstar.rs /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64star.rs
 4 | 
 5 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/lib.rs:
 6 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/common.rs:
 7 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/splitmix64.rs:
 8 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128starstar.rs:
 9 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plusplus.rs:
10 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro128plus.rs:
11 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256starstar.rs:
12 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plusplus.rs:
13 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro256plus.rs:
14 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512starstar.rs:
15 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plusplus.rs:
16 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoshiro512plus.rs:
17 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plus.rs:
18 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128plusplus.rs:
19 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro128starstar.rs:
20 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64starstar.rs:
21 | /home/seal/.cargo/registry/src/github.com-1ecc6299db9ec823/rand_xoshiro-0.6.0/src/xoroshiro64star.rs:
22 | 


--------------------------------------------------------------------------------
/tools/program_generator/src/compile.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     Program, Type, Block, Operation
  3 | };
  4 | 
  5 | /// Prints out the generated program to stdout
  6 | const DEBUG_PRINT: bool = false;
  7 | 
  8 | /// Can be set to false to not actually write cases to disk & compile while debugging
  9 | const WRITE_TO_DISK: bool = true;
 10 | 
 11 | /// Compiler used to compile the c-code once generated
 12 | const COMPILER: &str = "gcc";
 13 | 
 14 | /// Contains information that the compiler functions require while generating the c code from the
 15 | /// intermediate representation
 16 | #[derive(Default, Debug)]
 17 | pub struct Compiler {
 18 |     /// Scope depth, used to handle indentation
 19 |     cur_depth: usize,
 20 | 
 21 |     /// The actual c code that will be compiled
 22 |     code: String,
 23 | 
 24 |     /// The program being generated in an intermediate representation
 25 |     program: Program,
 26 | }
 27 | 
 28 | impl Compiler {
 29 |     /// Create a new compiler object
 30 |     pub fn new(program: Program) -> Self {
 31 |         Self {
 32 |             cur_depth: 0,
 33 |             code: String::new(),
 34 |             program,
 35 |         }
 36 |     }
 37 | 
 38 |     /// Debug print for the c-code
 39 |     pub fn print_code(&self) {
 40 |         println!("Generated the following code:");
 41 |         println!("+-------------------------------------------------+");
 42 |         println!("{}", self.code);
 43 |         println!("+-------------------------------------------------+");
 44 |     }
 45 | 
 46 |     /// Insert indentation into the code based on the current depth
 47 |     fn insert_indent(&mut self) {
 48 |         for _ in 0..self.cur_depth {
 49 |             self.code.push_str("    ");
 50 |         }
 51 |     }
 52 | 
 53 |     /// Setup headers and begin actual program translation
 54 |     pub fn translate_program(&mut self) {
 55 |         self.code.push_str("#include <stdio.h>\n");
 56 |         self.code.push_str("#include <stdlib.h>\n");
 57 |         self.code.push_str("#include <string.h>\n\n");
 58 | 
 59 |         for func in &self.program.function_list {
 60 |             let name = &func.name;
 61 |             if name != "main" {
 62 |                 self.code.push_str(&format!("void {}(unsigned char *buf);\n", name));
 63 |             }
 64 |         }
 65 |         self.code.push('\n');
 66 | 
 67 |         // Traverse function list in reverse and translate each to C
 68 |         for i in 0..self.program.function_list.len() {
 69 |             self.translate_function_header(i);
 70 |             self.translate_function_body(i);
 71 |         }
 72 |     }
 73 | 
 74 |     /// Translate the header of a function to c
 75 |     fn translate_function_header(&mut self, index: usize) {
 76 |         let mut first = true;
 77 |         let func = self.program.function_list[index].clone();
 78 |         self.code.push_str(&format!("{} {}(", func.typ, func.name));
 79 |         for arg in func.arguments {
 80 |             if first {
 81 |                 first = false;
 82 |             } else {
 83 |                 self.code.push_str(", ");
 84 |             }
 85 |             if arg.0 == Type::Argv {
 86 |                 self.code.push_str(&format!("char **{}", arg.1));
 87 |             } else {
 88 |                 self.code.push_str(&format!("{} {}", arg.0, arg.1));
 89 |             }
 90 |             
 91 |         }
 92 |         self.code.push_str(")\n");
 93 |     }
 94 | 
 95 |     /// Emit an operation to the c-code
 96 |     fn emit_operation(&mut self, operation: &Operation) {
 97 |         self.insert_indent();
 98 |         match operation {
 99 |             Operation::If(_, b) => {
100 |                 self.code.push_str(&format!("{}\n", operation));
101 |                 self.translate_block(b);
102 |             },
103 |             _ => self.code.push_str(&format!("{};\n", operation)),
104 |         };
105 |     }
106 | 
107 |     /// Translate an entire block to c-code while taking care of handling proper scoping and
108 |     /// indentation
109 |     fn translate_block(&mut self, block: &Block) {
110 |         self.insert_indent();
111 |         self.code.push_str("{\n");
112 |         self.cur_depth += 1;
113 | 
114 |         for operation in &block.stmt_list {
115 |             self.emit_operation(operation);
116 |         }
117 | 
118 |         self.cur_depth -= 1;
119 |         self.insert_indent();
120 |         self.code.push_str("}\n\n");
121 |     }
122 | 
123 |     /// Translate the body of a function
124 |     fn translate_function_body(&mut self, index: usize) {
125 |         let body = self.program.function_list[index].body.clone();
126 |         self.translate_block(&body);
127 |     }
128 | }
129 | 
130 | /// Compile the previously generated program to an elf binary
131 | pub fn compile(program: Program) {
132 |     if DEBUG_PRINT {
133 |         println!("Received the following program: \n{:#?}", program);
134 |     }
135 | 
136 |     let mut compiler = Compiler::new(program);
137 |     compiler.translate_program();
138 | 
139 |     if DEBUG_PRINT {
140 |         compiler.print_code();
141 |     }
142 | 
143 |     if WRITE_TO_DISK {
144 |         // Write the program to disk
145 |         std::fs::write("generated_program.c", &compiler.code)
146 |             .expect("Failed to write generated program to disk");
147 | 
148 |         // Compile the generated program
149 |         assert!(std::process::Command::new(COMPILER)
150 |             .arg("generated_program.c")
151 |             .arg("-o")
152 |             .arg("generated_program")
153 |             .arg("-g")
154 |             .status()
155 |             .unwrap()
156 |             .success());
157 |     }
158 | 
159 |     println!("[+] Done");
160 | }
161 | 


--------------------------------------------------------------------------------
/resources/notes.md:
--------------------------------------------------------------------------------
  1 | #### These are just some random notes I'm taking while thinking about how I want to develop certain parts.
  2 | 
  3 | 
  4 | ## Mutations
  5 | 
  6 | > https://lcamtuf.blogspot.com/2014/08/binary-fuzzing-strategies-what-works.html  
  7 | > https://www.usenix.org/system/files/sec19-lyu.pdf  
  8 | > https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf [21 , 28, 29 , 37 , 54]
  9 | 
 10 | 1. bitflips (1, 2, 4)
 11 | 2. Byte flips (1, 2, 4)
 12 | 3. add/sub integers (+-35), byte, word, dword, qword (signed & unsigned)
 13 | 4. Insert common breaking points (-1, MAXINT, etc)
 14 | 5. Increase/decrease size
 15 | 6. Splice test cases together
 16 | x. Havoc
 17 | 
 18 | Setup dictionary  
 19 | > https://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html  
 20 | 
 21 | ## Seed Scheduling
 22 | 
 23 | Analysis:
 24 | 1. Graph centrality
 25 | 2. Mutation history to determine when to stop focusing on "hard" edges
 26 | 
 27 | Dynamic:
 28 | 1. Execution time
 29 | 2. Less bytes
 30 | 3. Coverage
 31 | 4. How often the seed has been chosen
 32 | 5. Number of inputs with same cov
 33 | 6. Generated test cases based on this input with same cov
 34 | 
 35 | -
 36 | 1. Decrease whenever no new cov is found
 37 | 
 38 | ## Coverage eval
 39 | 
 40 | > [Cov-sensitivity] https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf
 41 |     - Tracking call stack seems kinda sick. n=2-gram cov seems nice too
 42 |     - Assign different coverage metrics to different threads and synch corpus
 43 |     - Assign different coverage metrics to different threads and dont synch
 44 | 
 45 | > [Cerberos] https://dl.acm.org/doi/pdf/10.1145/3338906.3338975
 46 |     - Complexity score calculated for each function that can be correlated to inputs via their 
 47 |     coverage trace
 48 | 
 49 |     - Idea is to assign rank using the 5 metrics below, and to then queue up all seeds based on 
 50 |     their weight. Uses paretto frontier
 51 | 
 52 |     - exec time, number of covs, unique edges, file size, complexity score
 53 | 
 54 | > [Directed fuzzing] https://dl.acm.org/doi/pdf/10.1145/3133956.3134020
 55 |     - Analyze program callgraph/cfg to direct the fuzzer to specific target points in the program
 56 | 
 57 | > https://www.ndss-symposium.org/wp-content/uploads/2020/02/24422-paper.pdf
 58 |     - Don't treat all coverage equally, label security-relevant edges based and assign weights based
 59 |     on their path to vulnerable functions (eg. memcpy or a lot of memory operations)
 60 | 
 61 | > https://arxiv.org/pdf/2203.12064.pdf
 62 |     - Graph centrality analysis
 63 | 
 64 | > https://mboehme.github.io/paper/CCS16.pdf
 65 |     - AFLFast
 66 |     - More energy to low frequency paths
 67 |     - Model seed scheduling as markov chains
 68 | 
 69 | > https://www.usenix.org/system/files/woot20-paper-fioraldi.pdf
 70 | > https://dl.acm.org/doi/pdf/10.1145/3133956.3134073
 71 | > https://www.ndss-symposium.org/wp-content/uploads/2017/09/ndss2017_10-2_Rawat_paper.pdf
 72 | 
 73 | Things to potentiall maintain for each input to determine energy:
 74 |     - Size
 75 |     - Execution time
 76 |     - Which cov-units it hits & how rare each of them is (potentially complexity score for each
 77 |         cov-unit as well)
 78 |     - 
 79 |     
 80 | Cull corpus:
 81 |     - Track which cov points an input hits to potentially dedup/remove obsolete cases
 82 | 
 83 |     - Periodically cull entries that are superseeded by other entries (maybe check how often the 
 84 |         entry was hit too)
 85 | 
 86 | Add case to inputs:
 87 |     If a case produces same cov, but block is executed different number of times as
 88 |         previous cases, it is regarded as interesting
 89 | 
 90 | Timeout:
 91 |     - Timeout: 5x initial calibrated exec speed
 92 | 
 93 | Scheduling:
 94 |     - Whenever a seed is chosen, execute it n times instead of just once
 95 |     - Alternatively just go through all seeds sequentially and execute each seed n times
 96 | 
 97 | Calculate score:
 98 |     Initial:
 99 |         - Base it on size & exec-time
100 | 
101 |     Dynamic:
102 |         1. 
103 |             - weight = number of cov points an input hits
104 |             - weight += 25% / unique/new cov
105 |             - weight += bonus (based on shorter exec/shorter input)
106 |             - avg_weight = average of all input weights
107 |             - Assign each input weight based on how far above/below the avg they are
108 |         2.
109 |             ```rs
110 |             fn calculate_energy(input: Input) {
111 | 
112 |             }
113 | 
114 |             fn fuzz_loop() {
115 |                 loop {
116 |                     id = get_next_seed();
117 |                     p = calculate_energy(corpus.inputs[id]);
118 | 
119 |                     for i in 0..p {
120 |                         mutate(corpus.inputs[i]);
121 |                         fuzz(corpus.input[i]);
122 |                     }
123 |                 }
124 |             }
125 |             ```
126 | 
127 | 
128 | ## Crash deduping
129 | 1. AFL
130 |     - Crash trace includes a tuple not seen before
131 |     - Crash trace is missing a tuple seen before
132 | 
133 | 2. stuff
134 |     - Dont threadshare the crash_map, and instead work by sending crashing inputs to the main 
135 |     thread to handle
136 | 
137 | 
138 | ## Other
139 | > https://arxiv.org/abs/2009.06124 - Scaling  
140 | 
141 | > Coverage-tracking
142 |     - priority queue, increase priority of input whenever it finds new coverage
143 | ```rs
144 |     let hash = calculate_hash(from, to);
145 |     hash &= bitmap_size;
146 |     let idx = hash / 64;
147 |     let bit = 1 << (hash % 64);
148 |     if (state->cov_bitmap[idx] & bit) == 0 {
149 |         state->cov_bitmappidx] |= bit;
150 |         state->exit_reason = 7;
151 |         state->cov_from = from;
152 |         state->cov_to   = to;
153 |         state->reenter_pc = pc+4;
154 |         return
155 |     }
156 | ```
157 | 


--------------------------------------------------------------------------------
/tools/scraper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | import requests
  3 | import urllib
  4 | from requests_html import HTML
  5 | from requests_html import HTMLSession
  6 | import os
  7 | import sys
  8 | import random
  9 | import string
 10 | import subprocess
 11 | from subprocess import DEVNULL
 12 | import signal
 13 | import time
 14 | from datetime import timedelta
 15 | import threading
 16 | 
 17 | '''
 18 | Web scrapper that makes use of google's search engine to collect files of a specific filetype. Let
 19 | it run as long as you wish, and ctrl-c to stop execution. A signal handler will then remove all
 20 | incorrectly downloaded files and dedup the collection to create a unique set of files given a
 21 | specific type.
 22 | 
 23 | Just modify FILE_TYPE to specify what type of file you want. Only filetypes that are indexable by
 24 | googles `filetype=XXX` are supported.
 25 | '''
 26 | 
 27 | # File type to collect
 28 | FILE_TYPE = "pdf"
 29 | 
 30 | # Delay between google-requests, can help reduce throttling (in seconds). 0-30 seconds seem to be
 31 | # best in most cases, the optimal delay varies
 32 | REQUEST_DELAY = 0
 33 |  
 34 | # Number of urls containing pdf files for which download attempts were made
 35 | num_downloads = 0
 36 | 
 37 | # Number of downloads that were attempted but failed
 38 | failed_downloads = 0
 39 | 
 40 | # Time that has elapsed since program start
 41 | elapsed = 0
 42 | 
 43 | # Get the source code of a requested page
 44 | def get_source(url):
 45 |     session = HTMLSession()
 46 |     response = session.get(url)
 47 |     return response
 48 | 
 49 | # Return google search results for a query
 50 | def scrape_google(query):
 51 |     query = urllib.parse.quote_plus(query)
 52 |     response = get_source("https://www.google.co.uk/search?q=" + query)
 53 | 
 54 |     links = list(response.html.absolute_links)
 55 |     google_domains = ('https://www.google.', 
 56 |                       'https://google.', 
 57 |                       'https://webcache.googleusercontent.', 
 58 |                       'http://webcache.googleusercontent.', 
 59 |                       'https://policies.google.',
 60 |                       'https://support.google.',
 61 |                       'https://maps.google.')
 62 | 
 63 |     # Remove all irrelevant links
 64 |     for url in links[:]:
 65 |         if url.startswith(google_domains):
 66 |             links.remove(url)
 67 |     return links
 68 | 
 69 | # Download requested url
 70 | def download_file(file_name, url):
 71 |     response = requests.get(url)
 72 |     open(file_name, "wb").write(response.content)
 73 | 
 74 | # Remove all files that were mistakenly downloaded and don't have the correct type
 75 | def remove_duds():
 76 |     ret = 0
 77 |     for file_name in os.listdir("seeds"):
 78 |         output = str(subprocess.check_output(f"file seeds/{file_name}", shell=True, stderr=DEVNULL))
 79 |         # If the file we downloaded has the incorrect file-type, remove
 80 |         if FILE_TYPE not in output and FILE_TYPE.upper() not in output:
 81 |             os.remove(f"seeds/{file_name}")
 82 |             ret += 1
 83 |     return ret
 84 | 
 85 | # Remove all duplicates in the downloaded files
 86 | def dedup():
 87 |     ret = 0
 88 |     unique = []
 89 |     for file_name in os.listdir("seeds"):
 90 |         if os.path.isfile(file_name):
 91 |             filehash = md5.md5(file(file_name).read()).hexdigest()
 92 |             if filehash not in unique: 
 93 |                 unique.append(filehash)
 94 |             else: 
 95 |                 os.remove(file_name)
 96 |                 ret += 1
 97 |     return ret
 98 | 
 99 | # Print out the overall results
100 | def print_results(duds_removed, deduped):
101 |     global elapsed
102 |     global num_downloads
103 |     global failed_downloads
104 | 
105 |     num_files = len([name for name in os.listdir('seeds')])
106 |     print("\n\n\n+===================================================+")
107 |     print(f"Runtime: {str(timedelta(seconds=elapsed))}")
108 |     print(f"Total initial download attempts: {num_downloads}")
109 |     print(f"Failed downloads: {failed_downloads}")
110 |     print(f"Incorrect file-types removed: {duds_removed}")
111 |     print(f"Duplicate files removed: {deduped}")
112 |     print(f"A total of {num_files} unique files now exist in the `seeds` directory")
113 |     print("+===================================================+\n\n")
114 | 
115 | # Signal handler to print out invoke some filtering functions on the seed collections and print out
116 | # results
117 | def signal_handler(sig, frame):
118 |     duds_removed = remove_duds()
119 |     deduped = dedup()
120 | 
121 |     print_results(duds_removed, deduped)
122 |     os.kill(os.getpid(), signal.SIGQUIT)
123 | 
124 | # Print out how much time has passed every second
125 | def timer():
126 |     global elapsed
127 |     while True:
128 |         elapsed += 1
129 |         sys.stdout.write("\r")
130 |         sys.stdout.write("Runtime: " + str(timedelta(seconds=elapsed)))
131 |         time.sleep(1)
132 | 
133 | def main():
134 |     global num_downloads
135 |     global failed_downloads
136 |     global elapsed
137 | 
138 |     # Start timer thread
139 |     threading.Thread(target=timer).start()
140 | 
141 |     # Create seed directory if it doesnt already exist
142 |     os.makedirs("seeds", exist_ok=True)
143 | 
144 |     while True:
145 |         # Collect a list of urls that contain pdf files
146 |         rand_search = ''.join(random.choice(string.ascii_lowercase) for i in range(10))
147 |         query = f"filetype:{FILE_TYPE} {rand_search}"
148 |         try:
149 |             urls = scrape_google(query)
150 |             # Download all previously found files
151 |             num_downloads += len(urls)
152 |             for url in urls:
153 |                 rand_name = ''.join(random.choice(string.ascii_lowercase) for i in range(10))
154 |                 try:
155 |                     download_file(f"seeds/{rand_name}", url)
156 |                 except:
157 |                     failed_downloads += 1
158 |         except:
159 |             pass
160 | 
161 |         time.sleep(REQUEST_DELAY)
162 | 
163 | if __name__ == "__main__":
164 |     signal.signal(signal.SIGINT, signal_handler)
165 |     print("Hit CTRL-C to stop execution at any time")
166 |     main()
167 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SFUZZ
  2 | Start date: Dec, 2021
  3 | 
  4 | This is a coverage-guided, emulation based greybox fuzzer that makes use of a custom Just-In-Time compiler to achieve near-native performance. It works by lifting RISC-V elf binaries to an intermediate representation before JIT compiling them to x86 during execution. During JIT compilation the code is instrumented to enable fuzzing-improvements such as coverage tracking, asan, cmpcov, or snapshot-based fuzzing.
  5 | 
  6 | <br>
  7 | 
  8 | #### Features
  9 | - Multi-threaded, supporting an arbitrary amount of threads and scaling almost linearly
 10 | - Custom JIT compiler for high performance and more importantly customizability that is harder 
 11 |   to achieve with other solutions such as qemu
 12 | - Custom memory management unit to once again allow high customization and highly beneficial features
 13 |   such as byte-level permission checks and dirty-bit based emulator resets. Additionally hooks to allow for
 14 |   safe usage of heap-routines are implemented.
 15 | - Virtualized file management to allow easy in memory fuzzing
 16 | - Snapshot based fuzzing, so a target's memory/register state can be snapshotted during execution to
 17 |   base all future fuzz cases off of this baseline
 18 | - Edge-level coverage tracking, and coverage guided fuzzing based on this feedback
 19 | - Various mutators, crash deduplication, and a simple seed scheduling algorithm
 20 | 
 21 | <br>
 22 | <p style="text-align:center;"><img src="resources/printout.png" alt="" height="75%"width="75%"/></p>
 23 | 
 24 | #### Description
 25 | 
 26 | The objective of this project is to highlight the benefits of using an emulated environment for
 27 | fuzzing. Many previous fuzzers based on emulation exist, but they all almost exclusively use the qemu
 28 | emulation engine for the underlying emulation. While this engine does have a fairly mature
 29 | just-in-time compiler and generates very good code, it is not designed for fuzzing. During fuzzing, 
 30 | we intend to run the same process thousands of times per second. This makes room for specialized 
 31 | optimizations that qemu does not make strong use of such as reusing the same memory space for each 
 32 | process run and only resetting a limited amount of memory via dirty bit mechanics.
 33 | 
 34 | In many ways, this is more of a proof-of-concept that I wanted to work on to learn about compiler internals, and have an emulation-based playground to play around with various fuzzing techniques such as different coverage metrics, seed schedulers, and snapshot-based fuzzing. With more JIT optimizations and most importantly, extensions to include more popular architectures such as mips or arm this could however certainly be used to efficiently fuzz closed source code that cannot simply be instrumented through recompilation.
 35 | 
 36 | Given the testing I have done so far, sfuzz has significantly less overhead than many other popular fuzzers, which results in very fast performance, especially for small fuzz cases.
 37 | 
 38 | More details on the features/choices made for this fuzzer are listed in the accompanying blogpost (https://seal9055.com/blog/fuzzing/sfuzz) and the documentation files listed below:
 39 | - Memory Management - [memory_management.md](https://github.com/seal9055/sfuzz/tree/main/docs/memory_management.md)
 40 | - Code Generation - [code_gen.md](https://github.com/seal9055/sfuzz/tree/main/docs/code_gen.md)
 41 | - Fuzzer implementation/features - [fuzzing.md](https://github.com/seal9055/sfuzz/tree/main/docs/fuzzing.md)
 42 | - Some simple tests - [benchmarking.md](https://github.com/seal9055/sfuzz/tree/main/docs/benchmarking.md)
 43 | 
 44 | #### Usage
 45 | 
 46 | This entire fuzzer is written in rust, so after cloning the repository, just run `cargo build --release` to compile.
 47 | 
 48 | Since the fuzzer currently only supports RISC-V, the target needs to be compiled to RISC-V using the below toolchain (or a similar one). Alternatively if you already have a RISC-V binary that will work perfectly fine too.
 49 | 
 50 | Once this is set up, just create input/output directories, add some initial seed files to the input directory and start up the fuzzer.
 51 | 
 52 | `./sfuzz -i in -o out -- ./test_cases/simple_test @@`
 53 | 
 54 | Additional flags can be passed in via commandline options to specify the number of threads, enable snapshot fuzzing, add a dictionary to the mutator, etc. The additional options can be listed by running sfuzz with the `-h` flag.
 55 | 
 56 | If you wish to test the fuzzer against some targets of varying complexity, the progrem_generator at `tools/program_generator` can be used to automatically generate programs of varying complexity. Note that you will require a RISC-V toolchain to then compile the target.
 57 | 
 58 | #### Riscv toolchain to compile binaries for the fuzzer
 59 | 
 60 | This sets up a toolchain to compile riscv binaries that can be loaded/used by this project.
 61 | ```
 62 | Riscv compiler/tooling:
 63 |     sudo apt-get install autoconf automake autotools-dev curl python3 libmpc-dev libmpfr-dev \
 64 |     libgmp-dev gawk build-essential bison flex texinfo gperf libtool patchutils bc zlib1g-dev \
 65 |     libexpat-dev
 66 |     git clone https://github.com/riscv/riscv-gnu-toolchain && cd riscv-gnu-toolchain
 67 |     ./configure --prefix=/opt/riscv --with-arch=rv64i
 68 |     sudo make
 69 | 
 70 | Debugger:
 71 |     gdb-multiarch
 72 | ```
 73 | 
 74 | #### TODO
 75 | 
 76 | This list represents a set of features that I plan on implementing in the future.
 77 | 
 78 | - [X] Working Memory management unit
 79 | - [X] JIT Compiler
 80 | - [X] Virtualized files for in-memory fuzzing
 81 | - [X] Byte level permission checks + hooked/safe allocators
 82 | - [X] Track edge level coverage
 83 | - [X] Persistent mode to fuzz in small loops around target functions
 84 | - [X] Crash deduping / unique crashes
 85 | - [X] Update mutators to include more options
 86 | - [X] Seed Scheduling
 87 | - [X] CmpCov to get past magic values and checksums
 88 | - [X] Add some tooling around the fuzzer
 89 | - [ ] Proper benchmarking
 90 | - [ ] Implement RISC-V M & A extensions, so that the JIT can use glibc instead of newlib
 91 | - [ ] Replace assembler to improve compilation speed
 92 | - [ ] Support more architectures (eg. mips, arm)
 93 | - [ ] JIT optimizations, and another attempt at register allocation
 94 | 
 95 | #### References
 96 | 
 97 | * All papers listed in [fuzzing.md](https://github.com/seal9055/sfuzz/blob/main/fuzzing.md)
 98 | * Emulation based fuzzing - Brandon Falk [GamozoLabs](https://gamozolabs.github.io/)
 99 | * Intel Software Developer Manuals
100 | * RISCV User ISA specification
101 | * Rv8: a high performance RISC-V to x86 binary translator - Michael Clark & Bruce Hoult
102 | * Engineerining a compiler Keith D. Cooper & Londa Torczon
103 | * Cranelift [https://cfallin.org/blog/] - Chris Fallin
104 | * Generating Low-Overhead Dynamic Binary Translators - Mathias Payer & Thomas R. Gross
105 | * Efficiently Computing Static Single Assignment Form and the Control Dependence Graph - Cytron et al
106 | * Computing Liveness Sets for SSA-Form Programs - Brandner et al
107 | * Linear Scan Register Allocation on SSA Form - Christian Wimmer & Michael Franz
108 | * http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf
109 | * AddressSanitizer: A Fast Address Sanity Checker
110 |     https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/37752.pdf
111 | 


--------------------------------------------------------------------------------
/docs/benchmarking.md:
--------------------------------------------------------------------------------
  1 | # Benchmarks & Testing
  2 | 
  3 | ##### &nbsp;Since the fuzzer is not at a point where it can run proper benchmarks (eg. [fuzzbench](https://google.github.io/fuzzbench/)), this pretty much consists solely of sample programs I wrote and other programs I chose to include. Note that this means that my conclusions may be biased.
  4 | <br>
  5 | 
  6 | #### Performance against a very Simple Target
  7 | 
  8 | **Experiment-Setup**  
  9 | This initial test just compares the fuzzer's performance when used on a very simple test binary. This test showcases the low overhead of the fuzzer when it comes to resetting memory and running many very short cases. Arguably this is not a very important test-case since no real program will be this simple, but I found it interesting nonetheless. The program basically just has the fuzzer jump through some small if-comparison's before segfaulting, thus giving the fuzzer a crash. Any coverage guided fuzzer should be able to trivially find the crash within seconds. The corpus consists of a 100-byte file generated from `/dev/urandom`.
 10 | 
 11 | For this test-case, all of my fuzzer's features are enabled, including coverage tracking, byte-level permissions, allocator-hooks, in-memory fuzzing, cmpcov, and snapshot based fuzzing. I set the snapshot right after the call to `open`.
 12 | 
 13 | I will be comparing my fuzzer's performance to AFL++ while testing both qemu-emulation and compile-time instrumentation for AFL. Since this is a state-less target, I will also instrument the below code for AFL's persistent-mode fuzzing and in-memory input generation as described in AFL's [docs](https://github.com/AFLplusplus/AFLplusplus/blob/stable/instrumentation/README.persistent_mode.md).
 14 | ```c
 15 | int main(int argc, char **argv) {
 16 |     char buf[100];
 17 |     int fd = open(argv[1], O_RDONLY);
 18 | 
 19 |     read(fd, buf, 100);
 20 | 
 21 |     if (buf[0] == 0x41) {
 22 |       if (buf[1] == 0x42) {
 23 |         if (buf[2] == 0x43) {
 24 |           if (buf[3] == 0x44) {
 25 |             if (buf[4] == 0x45) {
 26 |               if (buf[5] == 0x46) {
 27 |                 *(unsigned long*)0x4141414141414141 = 0;
 28 |               }
 29 |             }
 30 |           }
 31 |         }
 32 |       }
 33 |     }
 34 |     return 0;
 35 | }
 36 | ```
 37 | 
 38 | **Results**
 39 | | Setup | Result |
 40 | | --- | --- |
 41 | | SFUZZ - Snapshot | 1.1 million fuzz cases per second |
 42 | | SFUZZ - No-Snapshot | 650,000 fuzz cases per second |
 43 | | AFL++ - QEMU | 3,500 fuzz cases per second |
 44 | | AFL++ - Source-Instr. | 3,500 fuzz cases per second |
 45 | | AFL++ - Source-Instr. + Persistent/in-memory | 33,000 fuzz cases per second |
 46 | <br> 
 47 | 
 48 | SFUZZ finds the crash within the first second of running and executes about 1.1 million fuzz cases per second. Disabling snapshot based fuzzing and starting each test case at the `_start` function still finds the crash immediately, but performance drops to 650,000 per second. This massive gap is because this is a very small program for which the initialization routines make up the majority of the code, so being able to skip these is very beneficial.
 49 | 
 50 | I tested AFL++ in 2 modes, qemu and source code instrumented. Starting with qemu-mode and a -O3 compiled binary (without any snapshot/persistent fuzzing mechanisms enabled), AFL requires 2 and a half minutes to find the crash and runs at about 3500 fuzz cases per second. Taking the non-snapshot version of my fuzzer, this is a \~185x speedup. 
 51 | 
 52 | With source based instrumentation using the afl-clang-fast compiler with the flags shown below, AFL finds the crash in 3 minutes, and runs at 3500 fuzz cases per second once again. I would have expected this to run a lot faster than the qemu-based approach, but I believe that the setup should be correct. Things start to look a little different with persistent-mode/in memory fuzzing enabled. AFL++ is now able to generate 33,000 fuzz cases per second and also finds the crash in the first second.
 53 | 
 54 | ```
 55 | AFL_USE_ASAN=1 LLVM_CONFIG=llvm-config-11 ~/AFLplusplus/afl-clang-fast ../test_cases/simple_test.c -o simple_test_afl -O3
 56 | 
 57 | ~/AFLplusplus/afl-fuzz (-D) -i in -o out -- ./simple_test_afl @@
 58 | ```
 59 | 
 60 | I believe that the emulated version of AFL using qemu is the fairer comparison since my fuzzer does not require source code for its instrumentation and fully emulates. I include both metrics though since my fuzzer currently only supports RISC-V which is not a very popular architecture, and will thus generally require source code as well to recompile to RISC-V.
 61 | 
 62 | #### More Complex Randomly Generated Program
 63 | 
 64 | **Experiment-Setup**  
 65 | The this case the target was automatically generated using the [program_generation](https://github.com/seal9055/sfuzz/tree/main/tools/program_generator) tool I wrote. It automatically generates a C program that takes its fuzz-input from a file and uses it to attempt to pass various `if`-checks. 
 66 | 
 67 | The program was generated with a complexity configuration of `9` and generated a 6200 loc target. The generated code is also saved [here](https://github.com/seal9055/sfuzz/blob/main/test_cases/generated_program.c).
 68 | 
 69 | Apart from that the remaining setup is the exact same as for the first case, using a 500-byte randomly generated input-seed. For the below results, both afl and SFUZZ were run for 15mins
 70 | 
 71 | **Results**
 72 | | Setup | Result |
 73 | | --- | --- |
 74 | | SFUZZ - Snapshot | 142,000 fuzz cases per second, 114 unique crashes |
 75 | | SFUZZ - No-Snapshot | 105,000 fuzz cases per second, 108 unique crashes |
 76 | | AFL++ - QEMU | 2,400 fuzz cases per second, 85 unique crashes |
 77 | | AFL++ - Source-Instr. | 2,600 fuzz cases per second, 78 unique crashes |
 78 | | AFL++ - Source-Instr. + Persistent/in-memory | 30,000 fuzz cases per second, 107 unique crashes |
 79 | <br> 
 80 | Once again SFUZZ outperforms AFL by a pretty good margin. The performance gap is however much
 81 | smaller in this case than in the previous one since more instructions are being executed for each
 82 | fuzz-case resulting in clang's optimizations granting large benefits when compared to my naive JIT.
 83 | 
 84 | Once again persistent mode adds immense performance benefits to AFL, which is reflected very heavily
 85 | in its overall results. Default source code instrumentation once again seems to perform very similar 
 86 | to qemu. I have no idea why this is the case or if I might be messing something up in my AFL setup,
 87 | but these are the results are ended up recording.
 88 | 
 89 | For this simple target SFUZZ with snapshotting enabled seems to have found most possible crashes by 
 90 | the 18 minute mark. After this point it did not find any more coverage.
 91 | 
 92 | According to the final results, AFL seems to have kept up in terms of the unique crashes it was able
 93 | to find, however, AFL++ determines a crash's uniqueness based on the path taken to reach the crash.
 94 | Since there are multiple paths to reach each of the crashes inside the target that number is a
 95 | little misleading. Nevertheless, AFL still found a decent amount of crashes when considering the
 96 | performance deficit. I believe that this is in big parts due to AFL's corpus management and seed
 97 | prioritization algorithms. The one's currently in use for SFUZZ are super simple and don't perform
 98 | any corpus minimization.
 99 | 
100 | The output screens at the end of the timeframe for both AFL and SFUZZ are listed below
101 | (snapshot/persistent mode were enabled for both SFUZZ and AFL for the below results).
102 | 
103 | 
104 | <p><img src="../resources/sfuzz_test.png" alt="" height="75%"width="75%"/></p>
105 | <p><img src="../resources/afl_test.png" alt="" height="75%"width="75%"/></p>
106 | 
107 | 


--------------------------------------------------------------------------------
/docs/memory_management.md:
--------------------------------------------------------------------------------
 1 | # Memory Management
 2 | 
 3 | #### Overview
 4 | 
 5 | This component of the fuzzer is responsible for providing the memory space for the target. It provides each emulator thread an entirely separate mmu and makes sure that none of the target threads can access/corrupt the memory space of another thread.
 6 | 
 7 | Each mmu consists of 2 contiguous blocks of memory (one for the actual memory, and another one for permissions), and an api that exposes various operations on this memory such as allocations, frees, reads, and writes. The exposed functions make use of the permissions-map to achieve byte-level permission checks (similar to ASAN) on each memory access, in addition to an allocator that performs properly checked allocations/frees.
 8 | 
 9 | Most of the code pertaining to these features can be found in [mmu.rs](https://github.com/seal9055/sfuzz/blob/main/src/mmu.rs) More detailed descriptions of some of these features are provided below.
10 | 
11 | #### Byte Level Permission Checks
12 | On most architectures, permissions are handled at the hardware level through page tables. This means that whenever an instruction tries to access a memory region, without possessing the correct permissions, an abort is generated which is then handled at the software level. Since these permissions are handled at the page table level, it prevents any incorrect access from crossing page boundaries. When it comes to exploitation, however, a couple of out-of-bounds bytes can oftentimes already be enough to compromise the security of an application, which this type of permission checking cannot handle.
13 | 
14 | A tool commonly used while fuzzing is address sanitizer (also referred to as asan). When a binary is compiled using asan, it is instrumented at compile time with extra checks that make sure that every memory access has the correct access permissions. This tool however has a few very relevant issues. For one it requires access to the binaries source code to recompile it with proper instrumentation. This makes it only useful to open source projects, which especially when fuzzing embedded systems, is often not available. Secondly, asan has a very non-significant performance overhead. According to a study conducted by Google in 2012 (AddressSanitizer: A Fast Address Sanity Checker), it resulted in a 73% slowdown, which is quite a bit, especially when considering how reliant fuzzers are on their performance. This slowdown however was worth it due to the power of byte-level permission checks and led to 300 new bugs being discovered in the Chrome browser at the time.
15 | 
16 | In this case, since the binary is being run in a custom JIT compiler, both of these drawbacks can be almost entirely mitigated. Not having source code available is not an issue at all anymore since all of the code is being generated based on the binary. As for the performance aspects, EXECUTE permissions are almost entirely free since they are checked once when a function is first compiled, and then assumed to be true for the rest of the program's execution. This would need some changes when dealing with JIT compilers that frequently change their executable memory mappings, but for 99% of use cases, it should suffice. As for load and store instructions (that require the READ and WRITE permissions), the checks consist of 5 assembly instructions (1 memory load, 1 conditional jmp, and 3 arithmetic instructions). While this results in some additional overhead when performing frequent memory accesses, it is nowhere near as expensive as address sanitizer.
17 | 
18 | These permission bits mean that every out-of-bounds memory access (even if it is just a single byte) instantly results in a notification to the fuzzer which can then modify its corpus to focus on this bug and attempt to increase the out of bounds bug. This permission model also applies to library functions such as malloc & free. These are hooked to instead call custom malloc/free implementations that support this byte-level memory model. These hooked functions also include additional checks to completely destruct free'd memory so common heap bugs such as use after free's or double free's are instantly reported as well instead of leading to undefined behavior.
19 | 
20 | #### Dirty-bit Memory Resets
21 | In the current implementation, each new address space is 64mb large (although this can easily be changed depending on the complexity of the target). This means that on each new fuzz case, this entire space needs to be reset to its initial state. Doing a massive 64mb memcpy() on each new fuzz case is very expensive and leads to completely unacceptable performance. Here we can borrow a concept that is common in the operating systems world: dirty bits. In operating systems, these are maintained at the page table level similar to the permissions. This bit is set whenever a write to memory occurs. This means that when copying memory between different cache levels, or just clearing memory, the page table can be traversed, and only pages with the dirty bit set need to have work done on them.
22 | 
23 | The same principle applies to this fuzzer. When a fuzzer is run, only a very small percentage of this 64mb address space is actually overwritten. This means that by maintaining a dirty bit list, we can selectively choose which pages are reset while leaving most of the memory intact. The memory space, in this case, is not maintained in a page table so some of the implementation details differ, but the principle remains.
24 | 
25 | The implementation of memory resets in this project was heavily influenced by Brandon Falk's prior research into obtaining extremely fast memory resets and his implementation in his fuzz_with_emus project. 2 array's are maintained. Whenever memory is dirtied, the address is pushed to an initially empty array that contains a listing of all dirtied memory regions. Additionally, a dirty bitmap is maintained that is used to verify that only 1 address from each page (4096 bytes in this case) is pushed to this array to avoid duplicates. Populating this vector during execution is very simple and only requires 6 additional instructions during store operations. While resetting, the fuzzer can then just iterate through the previously populator vector and free the address ranges that were pushed to the vector.
26 | 
27 | #### Virtualized Files
28 | Many potential fuzz-targets read in their input from files stored on disk. This requires syscalls and disk access, which while fuzzing quickly gets extremely expensive. Instead, the fuzzer emulates all syscalls in user-space and stores files within the emulator as byte-arrays & a cursor into the current position within the file. This means that file operations now no longer require a context-swap into the kernel or disk access and are instead quickly emulated resulting in massive performance increases.
29 | 
30 | #### Glibc String Functions
31 | The standard Glibc implementation used on most Linux distributions makes use of specialized optimizations for string operations (eg. strlen/strcmp). These functions make sure that they are page-aligned when called and then read in 8 bytes at a time. This can easily go out of bounds (eg. when calling strlen on a 3-byte string), however since the access is page aligned the 8-byte access cannot trigger a page fault and thus does not lead to any security bugs. Since this fuzzer has byte-level permission checks though, this results in unnecessary crashes being recorded. My solution was to write up custom "safe" implementations for some of these functions in assembly, dynamically recognize libc-string functions within the target, and compile in my own version instead of the default ones. This defeats the problem without adding any performance overhead.
32 | 
33 | #### Future Work
34 | For RISC-V the current memory/permission model is totally sufficient, but if this fuzzer were to be
35 | used against x86_64 for example, issues would quickly come up. X86 uses a much larger memory space/area,
36 | so simply loading the entire space into memory is inpractical and will cause many cache-related 
37 | performance slowdowns. In that light, I would like to eventually implement a page-table structure to only
38 | map in pages that are actually used to more easily support larger memory spaces.
39 | 


--------------------------------------------------------------------------------
/src/syscalls.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     mmu::Perms,
  3 |     emulator::{Emulator, Register, FileType::{self, STDOUT, STDERR, INVALID}, Fault},
  4 |     config::FUZZ_INPUT,
  5 | };
  6 | 
  7 | // Helper Structs for syscalls {{{
  8 | 
  9 | #[repr(C)]
 10 | #[derive(Debug)]
 11 | struct Stat {
 12 |     st_dev:     u64,
 13 |     st_ino:     u64,
 14 |     st_mode:    u32,
 15 |     st_nlink:   u32,
 16 |     st_uid:     u32,
 17 |     st_gid:     u32,
 18 |     st_rdev:    u64,
 19 |     __pad1:     u64,
 20 | 
 21 |     st_size:    i64,
 22 |     st_blksize: i32,
 23 |     __pad2:     i32,
 24 | 
 25 |     st_blocks: i64,
 26 | 
 27 |     st_atime:     u64,
 28 |     st_atimensec: u64,
 29 |     st_mtime:     u64,
 30 |     st_mtimensec: u64,
 31 |     st_ctime:     u64,
 32 |     st_ctimensec: u64,
 33 | 
 34 |     __glibc_reserved: [i32; 2],
 35 | }
 36 | 
 37 | // }}}
 38 | 
 39 | 
 40 | pub fn exit() -> Option<Fault> {
 41 |     Some(Fault::Exit)
 42 | }
 43 | 
 44 | pub fn fstat(emu: &mut Emulator) -> Option<Fault> {
 45 |     let fd      = emu.get_reg(Register::A0) as usize;
 46 |     let statbuf = emu.get_reg(Register::A1);
 47 | 
 48 |     // Check if the FD is valid
 49 |     let file = emu.fd_list.get(fd);
 50 |     if file.is_none() {
 51 |         // FD was not valid, return out with an error
 52 |         emu.set_reg(Register::A0, !0);
 53 |         return None;
 54 |     }
 55 | 
 56 |     // qemu output for the syscall + correct input lengths
 57 |     if file.unwrap().ftype == FileType::FUZZINPUT {
 58 |         let stat: Stat = Stat {
 59 |             st_dev:           0x803,
 60 |             st_ino:           0x81889,
 61 |             st_mode:          0x81a4,
 62 |             st_nlink:         0x1,
 63 |             st_uid:           0x3e8,
 64 |             st_gid:           0x3e8,
 65 |             st_rdev:          0x0,
 66 |             __pad1:           0,
 67 |             st_size:          emu.fuzz_input.len() as i64,
 68 |             st_blksize:       0x1000,
 69 |             __pad2:           0,
 70 |             st_blocks:        (emu.fuzz_input.len() as i64 + 511) / 512,
 71 |             st_atime:         0x5f0fe246,
 72 |             st_atimensec:     0,
 73 |             st_mtime:         0x5f0fe244,
 74 |             st_mtimensec:     0,
 75 |             st_ctime:         0x5f0fe244,
 76 |             st_ctimensec:     0,
 77 |             __glibc_reserved: [0, 0],
 78 |         };
 79 | 
 80 |         // Cast the stat structure to raw bytes
 81 |         let stat = unsafe {
 82 |             core::slice::from_raw_parts(
 83 |                 &stat as *const Stat as *const u8,
 84 |                 core::mem::size_of_val(&stat))
 85 |         };
 86 | 
 87 |         // Write in the stat data
 88 |         emu.memory.write_mem(statbuf as usize, stat, stat.len()).unwrap();
 89 |         emu.set_reg(Register::A0, 0);
 90 |     } else if file.unwrap().ftype != FileType::OTHER {
 91 |         emu.set_reg(Register::A0, !0);
 92 |     } else {
 93 |         unreachable!();
 94 |     }
 95 | 
 96 |     None
 97 | }
 98 | 
 99 | pub fn lseek(emu: &mut Emulator) -> Option<Fault> {
100 |     let fd     = emu.get_reg(Register::A0) as usize;
101 |     let offset = emu.get_reg(Register::A1) as i64;
102 |     let whence = emu.get_reg(Register::A2) as i32;
103 | 
104 |     if emu.fd_list.len() < fd || emu.fd_list[fd].ftype == FileType::INVALID {
105 |         emu.set_reg(Register::A0, !0);
106 |         return None;
107 |     }
108 | 
109 |     if emu.fd_list[fd].ftype == FileType::FUZZINPUT {
110 |         let cur = emu.fd_list[fd].cursor.unwrap();
111 | 
112 |         let new_pos: i64 = match whence {
113 |             0 => offset,                                // SEEK_SET
114 |             1 => cur as i64 + offset,                   // SEEK_CUR
115 |             2 => (emu.fuzz_input.len() as i64) + offset,         // SEEK_END
116 |             _ => {
117 |                 emu.set_reg(Register::A0, !0);
118 |                 return None;
119 |             }
120 |         };
121 | 
122 |         let new_pos = core::cmp::max(0i64, new_pos);
123 |         let new_pos = core::cmp::min(new_pos, emu.fuzz_input.len() as i64) as usize;
124 | 
125 |         emu.fd_list[fd].cursor = Some(new_pos);
126 |         emu.set_reg(Register::A0, new_pos);
127 |     } else {
128 |         unreachable!();
129 |     }
130 |     None
131 | }
132 | 
133 | pub fn open(emu: &mut Emulator) -> Option<Fault> {
134 |     let filename = emu.get_reg(Register::A0) as usize;
135 |     let _flags    = emu.get_reg(Register::A1);
136 |     let _mode    = emu.get_reg(Register::A2);
137 | 
138 |     let mut buf: Vec<u8> = Vec::new();
139 |     let mut cur = 0;
140 |     // get filename length
141 |     loop {
142 |         let c: u8 = emu.memory.read_at(filename + cur, Perms::READ).unwrap();
143 |         buf.push(c);
144 |         if c == 0 {
145 |             break;
146 |         }
147 |         cur += 1;
148 |     }
149 | 
150 |     let fd = if buf == FUZZ_INPUT.get().unwrap().as_bytes() {
151 |         emu.alloc_file(FileType::FUZZINPUT)
152 |     } else {
153 |         emu.alloc_file(FileType::OTHER)
154 |     };
155 | 
156 |     emu.set_reg(Register::A0, fd);
157 |     None
158 | }
159 | 
160 | pub fn read(emu: &mut Emulator) -> Option<Fault> {
161 |     let fd    = emu.get_reg(Register::A0) as usize;
162 |     let buf   = emu.get_reg(Register::A1);
163 |     let count = emu.get_reg(Register::A2);
164 | 
165 |     // If the file does not exist or has already been closed, return an error
166 |     let file = emu.fd_list.get_mut(fd);
167 |     if file.is_none() || file.unwrap().ftype == FileType::INVALID {
168 |         emu.set_reg(Register::A0, !0);
169 |         return None;
170 |     }
171 | 
172 |     // Special case, reading in the fuzzinput
173 |     if emu.fd_list[fd].ftype == FileType::FUZZINPUT {
174 |         let offset = emu.fd_list[fd].cursor.unwrap();
175 |         let len = core::cmp::min(count, emu.fuzz_input.len()-offset);
176 | 
177 |         emu.memory.write_mem(buf, &emu.fuzz_input[offset..offset+len], len)
178 |             .expect("Error occured while trying to read in fuzz-input");
179 | 
180 |         emu.set_reg(Register::A0, len);
181 |         emu.fd_list[fd].cursor = Some(offset + len);
182 |     } else {
183 |         // Read in a different file
184 |         //unreachable!();
185 |         emu.set_reg(Register::A0, count);
186 |     }
187 | 
188 |     None
189 | }
190 | 
191 | pub fn write(emu: &mut Emulator) -> Option<Fault> {
192 |     let fd    = emu.get_reg(Register::A0) as usize;
193 |     let buf   = emu.get_reg(Register::A1);
194 |     let count = emu.get_reg(Register::A2);
195 | 
196 |     // If the file does not exist or has already been closed, return an error
197 |     let file = emu.fd_list.get_mut(fd);
198 |     if file.is_none() || file.as_ref().unwrap().ftype == FileType::INVALID {
199 |         emu.set_reg(Register::A0, !0);
200 |         return None;
201 |     }
202 | 
203 |     // Set to true if you wish to see the actual stdout output of this syscall
204 |     if false {
205 |         let file = file.unwrap();
206 |         if file.ftype == STDOUT || file.ftype == STDERR {
207 |             let mut read_data = vec![0u8; count];
208 |             emu.memory.read_into(buf, &mut read_data, count, Perms::READ).unwrap();
209 | 
210 |             match std::str::from_utf8(&read_data) {
211 |                 Ok(v) => print!("{}", v),
212 |                 Err(_) => print!("{:?}", read_data),
213 |             }
214 |         } else {
215 |             panic!("Write to unsupported file occured");
216 |         }
217 |     }
218 | 
219 |     emu.set_reg(Register::A0, count);
220 |     None
221 | }
222 | 
223 | pub fn brk(emu: &mut Emulator) -> Option<Fault> {
224 |     let base = emu.get_reg(Register::A0);
225 |     if base == 0 {
226 |         emu.set_reg(Register::A0, 0);
227 |         return None;
228 |     }
229 | 
230 |     panic!("Not supporting brk");
231 | }
232 | 
233 | pub fn gettimeofday(emu: &mut Emulator) -> Option<Fault> {
234 |     emu.set_reg(Register::A0, 20);
235 |     None
236 | }
237 | 
238 | pub fn close(emu: &mut Emulator) -> Option<Fault> {
239 |     let fd = emu.get_reg(Register::A0) as usize;
240 | 
241 |     let file = emu.fd_list.get_mut(fd);
242 | 
243 |     if file.is_none() {
244 |         emu.set_reg(Register::A0, !0);
245 |         return None;
246 |     }
247 | 
248 |     let file = file.unwrap();
249 | 
250 |     file.ftype = INVALID;
251 | 
252 |     emu.set_reg(Register::A0, 0);
253 |     None
254 | }
255 | 


--------------------------------------------------------------------------------
/docs/fuzzing.md:
--------------------------------------------------------------------------------
 1 | # Fuzzing Capabilities
 2 | 
 3 | #### Overview
 4 | This will probably be the most interesting aspect for most people looking to use this fuzzer. Here
 5 | I will describe the details of which features this fuzzer currently supports and their basic 
 6 | implementation details. 
 7 | 
 8 | #### Byte Level Permission Checks
 9 | While this is an extremely important part of why this fuzzer is so effective, this capability was already covered in the [memory_management.md](https://github.com/seal9055/sfuzz/tree/main/docs/memory_management.md) section, so I will not repeat the information here.
10 | 
11 | #### Coverage Tracking
12 | This fuzzer implements edge, block, and call-stack based coverage tracking. Coverage is currently being tracked in a very simple way. A bytemap is maintained to determine which edges/blocks have already been hit. At the beginning of each block, a fast hash is generated to index into the bytemap and check if the block/edge has already previously been hit. If it has, we just move on. If it is a new edge/block, however, the byte is set in the map, and the coverage counter is incremented to showcase that new coverage has been hit. For edge coverage, this hash consists of a quick xorshift hash, and for block-level coverage, the lower 24 bits of the address are just used.
13 | 
14 | Callstack-based coverage tracking adds an additional field to the fuzzer. An evolving hash that is maintained throughout an entire input, and has new edges xor'd in. While this is far from perfect, it does allow the fuzzer to reason about what path has been taken to reach the current edge and track new coverage for new paths.
15 | 
16 | By default, the fuzzer uses edge coverage because call-stack coverage can quickly snowball out of control in some cases, but against some targets it may be worth considering, especially since some [papers](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf) have rated it higher than basic edge coverage against many targets.
17 | 
18 | #### Compare Coverage Tracking
19 | Coverage tracking already greatly improves fuzzers and allows them to reach much more complex code paths. Unfortunately, it does not however help fuzzers with multi-byte comparisons (eg. `if (buf[3] == 0xdeadbeef)`) since statements such as these are handled in a single cmp instruction that isn't instrumented by basic coverage tracking. This is where CmpCov comes in. At runtime, branch-if-equal & branch-if-not-equal instructions are replaced with several separate single-byte comparisons. This results in a \~5-15% performance decrease (depending on the amount of cmp's within the target), but greatly improves the fuzzers ability to find magic values without having to brute-force 2^32+ bytes since it can now instrument these comparisons with coverage tracking instructions. CmpCov is enabled by default.
20 | 
21 | #### Coverage Guided Fuzzing
22 | This is done in pretty much the simplest way possible. Whenever a case finds new coverage, the case is added to the corpus and mutated off of for future fuzz cases. This includes both code coverage and compare coverage and makes the fuzzer much better at traversing targets.
23 | 
24 | #### Persistent-mode/Snapshot Fuzzing
25 | This is mostly a performance optimization, but since it is very specific to fuzzing I figured this category probably suits it best. The main reason for this optimization is, that the standard `fork() + execve()` routine used by basic fuzzers is slow and does not scale, thus making room for improved case reset techniques.
26 | 
27 | One initial improvement AFL++ uses is the forkserver optimization, where new processes are cloned from a copy-on-write master that is kept in the original state. This reduces a lot of the overhead, but still requires the expensive fork() syscall. A better alternative is to instrument the api with a custom-written, single-process loop, therefore removing all of the `execve()/fork()` overhead. AFL mostly automates this, but still requires the user to write a small harness to designate where this loop should be positioned.
28 | 
29 | In the case of SFUZZ, since the fuzzer is running in an emulator, this becomes almost trivial. We can specify a specific address as the snapshot starting point, run the JIT up to that point, and take a snapshot of the entire register/memory state. All future fuzz-cases can now use this snapshot as their starting location instead of having to restart the process from the very beginning. This can be used to avoid a lot of setup that is disconnected from our fuzzing input and thus greatly speed up the fuzzing process. This becomes especially useful when dealing with larger targets, for which we can take a snapshot right before the interesting function, set an exit point right afterward, and then fuzz this function in a very tight/fast loop.
30 | 
31 | This can oftentimes easily get at least a 30-50% speed improvement against simple targets, and even bigger speed improvements against larger targets where more code can be cut out of the snapshot, which makes it almost always worth it to go through the manual effort of choosing a good address to snapshot at.
32 | 
33 | To enable snapshot-based fuzzing in SFUZZ, simply add the following flags with the address at which you wish to insert the snapshot `-s 0x1234`.
34 | 
35 | #### Seed Scheduling
36 | Seed scheduling is implemented based on power schedules, with the inputs sitting in a queue that is iterated through. Before an input is executed, its energy is calculated. This determines how often an input will be executed (20000 to 150000 times based on its energy). The energy is kept within a reasonable range to make sure no cases are completely left out, and that a case executes often enough that the cost of this seed scheduling does not matter. This simply gives slight priority to favored cases.
37 | 
38 | The energy of a case is determined based on the input size (in bytes), execution time (measures in instructions executed), how frequently the case has found new coverage, and how often this case has found a crash. Small sizes/execution times are favored, with new coverage providing additional bonus points. While crashes are good, a case may lie in a situation where it always results in the exact same crash, in which case its energy is slowly lowered.
39 | 
40 | For the most part, I don't think this strategy matters too much (at least in a generic sense without considering the target), so I decided to only slightly favor "better" cases over others since especially at the start of a fuzzing campaign with an unfamiliar target, it is very hard to generalize which metrics are actually important. Slower inputs could end up finding many more new code paths than faster inputs and so on.
41 | 
42 | #### Mutation Strategies
43 | The fuzzer currently has 8 different mutation strategies that are listed and described below.
44 | 
45 | - ByteReplace - This strategy replaces 1-128 bytes in the input with random other bytes. Smaller corruptions are 
46 |   heavily favored over larger corruptions to avoid potentially destroying a good initial corpus.
47 | - Bitflip - This strategy flips 1-128 random bits in the target. Smaller corruptions are once again heavily favored.
48 | - MagicNum - This strategy maintains a small dictionary of hardcoded useful values. These are 1-8 byte values that lie 	  on the boundaries of integer over/underflows, and can thus frequently find integer bugs.
49 | - SimpleArithmetic - This strategy simply adds or subtracts a random value from 1-32 to 0-128 random bytes in 
50 |   the fuzzcase. This technique has proven to be very useful in the past and can often find integer bugs or corrupt 
51 |   length fields.
52 | - RemoveBlock - This strategy removes a random block from the input. It is more expensive than many 
53 |   of the other strategies.
54 | - DupBlock - This strategy duplicates a random block from the input. It is more expensive than many 
55 |   of the other strategies.
56 | - Resize - This strategy resizes the input. Decreasing the size simply truncates the input, while increasing the size
57 |   adds random bytes to the end.
58 | - Dictionary - This is an optional feature that is enabled with the `-d` flag. It allows a dictionary file
59 |   to be passed in, that the mutator will then use to randomly splice entries from the dictionary into the target.
60 | - Havoc - This strategy is invoked every 100 cases and simply combines multiple of the above listed strategies 
61 |   together for a single case.
62 | 
63 | These mutation strategies are weighted. By default the cheaper/less destructive mutation strategies are favored (ByteReplace, Bitflip, MagicNum, SimpleAirhmetic), while the more expensive/more destructive strategies are prioritized a lot less (RemoveBlock, DupBlock, Resize, Dictionary).
64 | 
65 | #### Crashes
66 | 
67 | Crashes are saved using a couple of different methods to differentiate between different crashes. The different crash causes are ReadFaults, WriteFaults, ExecFaults, OutOfBounds accesses, Timeouts, and various heap bugs. Timeouts occur when a fuzz case executes more instructions than the timeout allows. This is automatically calibrated using the initial seeds, but can also be manually overridden using the `-t` flag.
68 | 
69 | Unique crashes are based on the type of crash and the address that the crash occured at. Only unique crashes are saved off. 


--------------------------------------------------------------------------------
/src/pretty_printing.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     config::{COV_METHOD, NO_PERM_CHECKS, SNAPSHOT_ADDR, NUM_THREADS, DEBUG_PRINT, CMP_COV, 
  3 |         RUN_CASES, SEND_REMOTE},
  4 |     Statistics, Corpus,
  5 | };
  6 | 
  7 | use core::fmt;
  8 | use std::sync::Arc;
  9 | use std::time::Duration;
 10 | use std::collections::HashMap;
 11 | 
 12 | use console::Term;
 13 | use num_format::{Locale, ToFormattedString};
 14 | 
 15 | /// Different log-types that can be used to print out messages in different colors
 16 | pub enum LogType {
 17 |     Neutral = 0,
 18 |     Success = 1,
 19 |     Failure = 2,
 20 | }
 21 | 
 22 | /// Color a string green
 23 | pub struct Green(pub &'static str);
 24 | impl fmt::Display for Green {
 25 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 
 26 |         write!(f, "\x1B[32m")?;
 27 |         write!(f, "{}", self.0)?;
 28 |         write!(f, "\x1B[0m")?;
 29 |         Ok(())
 30 |     }
 31 | }
 32 | 
 33 | /// Color a string blue
 34 | pub struct Blue(pub &'static str);
 35 | impl fmt::Display for Blue {
 36 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 
 37 |         write!(f, "\x1B[34m")?;
 38 |         write!(f, "{}", self.0)?;
 39 |         write!(f, "\x1B[0m")?;
 40 |         Ok(())
 41 |     }
 42 | }
 43 | 
 44 | /// Color a string red
 45 | pub struct Red(pub &'static str);
 46 | impl fmt::Display for Red {
 47 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 
 48 |         write!(f, "\x1B[31m")?;
 49 |         write!(f, "{}", self.0)?;
 50 |         write!(f, "\x1B[0m")?;
 51 |         Ok(())
 52 |     }
 53 | }
 54 | 
 55 | /// Small wrapper to print out colored log messages
 56 | pub fn log(color: LogType, msg: &str) {
 57 |     if *DEBUG_PRINT.get().unwrap() {
 58 |         match color {
 59 |             LogType::Neutral => {
 60 |                 println!("{} {}", Blue("[-]"), msg);
 61 |             },
 62 |             LogType::Success => {
 63 |                 println!("{} {}", Green("[+]"), msg);
 64 |             },
 65 |             LogType::Failure => {
 66 |                 println!("{} {}", Red("[!]"), msg);
 67 |             },
 68 |         }
 69 |     }
 70 | }
 71 | 
 72 | /// Print out statistics in a nicely formated static screen
 73 | fn pretty_stats(term: &Term, stats: &Statistics, elapsed_time: f64, timeout: u64, corpus: 
 74 |                 &Arc<Corpus>, last_cov: f64) {
 75 | 
 76 |     term.clear_screen().unwrap();
 77 |     term.move_cursor_to(0, 2).unwrap();
 78 | 
 79 |     // Print out error message instead of standard output if the terminal size is too small to
 80 |     // properly display output
 81 |     let (x, y) = term.size();
 82 |     if x < 25 || y < 95 {
 83 |         term.write_line(&format!("Increase terminal size to 25:95 (Cur: {}:{})", x, y)).unwrap();
 84 |         term.flush().unwrap();
 85 |         return;
 86 |     }
 87 | 
 88 |     term.write_line(
 89 |         &format!("{}", Green("\t\t[ SFUZZ - https://github.com/seal9055/sfuzz ]\n"))
 90 |     ).unwrap();
 91 | 
 92 |     let duration    = Duration::from_secs_f64(elapsed_time);
 93 |     let elapsed_sec = duration.as_secs() % 60;
 94 |     let elapsed_min = (duration.as_secs() / 60) % 60;
 95 |     let elapsed_hr  = (duration.as_secs() / 60) / 60;
 96 | 
 97 |     // Progress information
 98 |     term.write_line(
 99 |         &format!("\t{}\n\t   Run time: {:02}:{:02}:{:02}\n\t   Total fuzz cases: {:12} \
100 |                 \n\t   Instrs execd [mil]: {:12}", 
101 |         Blue("Progression"), 
102 |         elapsed_hr, elapsed_min, elapsed_sec,
103 |         stats.total_cases.to_formatted_string(&Locale::en),
104 |         (stats.instr_count / 1_000_000).to_formatted_string(&Locale::en),
105 |         )
106 |     ).unwrap();
107 | 
108 |     // Results
109 |     term.move_cursor_to(54, 4).unwrap();
110 |     term.write_line(&format!("{}", Blue("Overall Results"))).unwrap();
111 |     term.move_cursor_to(54, 5).unwrap();
112 |     term.write_line(&format!("   Unique Crashes: {}", stats.ucrashes)).unwrap();
113 |     term.move_cursor_to(54, 6).unwrap();
114 |     term.write_line(&format!("   Crashes: \t{}", stats.crashes.to_formatted_string(&Locale::en)))
115 |         .unwrap();
116 |     term.move_cursor_to(54, 7).unwrap();
117 |     term.write_line(&format!("   Timeouts: \t{}", stats.timeouts.to_formatted_string(&Locale::en)))
118 |         .unwrap();
119 | 
120 |     // Performance numbers
121 |     term.move_cursor_down(2).unwrap();
122 |     term.write_line(
123 |         &format!("\t{}\n\t   Fuzz cases per second: {:12}\n\t   \
124 |                 Instrs per second [mil]: {:12}",
125 |         Blue("Performance measurements"), 
126 |         (stats.total_cases / elapsed_time as usize).to_formatted_string(&Locale::en), 
127 |         (stats.instr_count / 1_000_000 / elapsed_time as u64)
128 |             .to_formatted_string(&Locale::en), 
129 |         )
130 |     ).unwrap();
131 | 
132 |     let duration  = Duration::from_secs_f64(elapsed_time - last_cov);
133 |     let cov_sec   = duration.as_secs() % 60;
134 |     let cov_min   = (duration.as_secs() / 60) % 60;
135 |     let cov_hr    = (duration.as_secs() / 60) / 60;
136 | 
137 |     // Coverage
138 |     term.move_cursor_to(54, 10).unwrap();
139 |     term.write_line(&format!("{}", Blue("Coverage"))).unwrap();
140 |     term.move_cursor_to(54, 11).unwrap();
141 |     term.write_line(&format!("   Coverage: {}", stats.coverage)).unwrap();
142 |     term.move_cursor_to(54, 12).unwrap();
143 |     term.write_line(&format!("   CmpCov: {}", stats.cmpcov)).unwrap();
144 |     term.move_cursor_to(54, 13).unwrap();
145 |     term.write_line(&format!("   Time since last cov: {:02}:{:02}:{:02}", 
146 |                     cov_hr, cov_min, cov_sec)).unwrap();
147 | 
148 |     let run_cases = match RUN_CASES.get().unwrap() {
149 |         Some(v) => format!("{}", v),
150 |         None => "No Limit".to_string(),
151 |     };
152 | 
153 |     // Config information
154 |     term.move_cursor_down(1).unwrap();
155 |     term.write_line(
156 |         &format!("\t{}\n\t   Num Threads: {}\n\t   Coverage type: {:?}\n\t   \
157 |         Snapshots enabled: {}\n\t   ASAN: {}\n\t   Timeout: {}\n\t   CmpCov: {}\n\t   Max runs: {}",
158 |         Blue("Config"), 
159 |         NUM_THREADS.get().unwrap(),
160 |         COV_METHOD.get().unwrap(),
161 |         SNAPSHOT_ADDR.get().unwrap().is_some(),
162 |         !NO_PERM_CHECKS.get().unwrap(),
163 |         timeout.to_formatted_string(&Locale::en),
164 |         CMP_COV.get().unwrap(),
165 |         run_cases,
166 |     )).unwrap();
167 | 
168 |     // Corpus stats
169 |     term.move_cursor_to(54, 15).unwrap();
170 |     term.write_line(&format!("{}", Blue("Corpus"))).unwrap();
171 |     term.move_cursor_to(54, 16).unwrap();
172 |     term.write_line(&format!("   Num Entries: {}", corpus.inputs.read().len())).unwrap();
173 |     term.move_cursor_to(54, 17).unwrap();
174 |     term.write_line(&format!("   Avg Instrs per case: {}", 
175 |                              (stats.instr_count / stats.total_cases as u64)
176 |                              )).unwrap();
177 | 
178 |     // Flush buffer and write to terminal
179 |     term.flush().unwrap();
180 | }
181 | 
182 | /// Simple debug view of statistics
183 | fn basic_stats(stats: &Statistics, elapsed_time: f64) {
184 |     println!(
185 |         "[{:8.2}] fuzz cases: {:12} : fcps: {:8} : coverage: {:6} : crashes: {:8} \
186 |         \n\t   instr_cnt: {:13} : ips: {:9} : ucrashes: {:6} : timeouts: {:8}", 
187 |         elapsed_time, 
188 |         stats.total_cases.to_formatted_string(&Locale::en),
189 |         (stats.total_cases / elapsed_time as usize).to_formatted_string(&Locale::en), 
190 |         stats.coverage,
191 |         stats.crashes,
192 |         stats.instr_count.to_formatted_string(&Locale::en),
193 |         (stats.instr_count / elapsed_time as u64).to_formatted_string(&Locale::en), 
194 |         stats.ucrashes,
195 |         stats.timeouts
196 |     );
197 | }
198 | 
199 | fn send_remote(ip: String, port: usize, stats: &Statistics, elapsed_time: f64) {
200 |     let request_url = format!("http://{}:{}/stats", ip, port).to_string();
201 |     let client = reqwest::Client::new();
202 | 
203 |     let mut map = HashMap::new();
204 |     map.insert("total_cases", stats.total_cases);
205 |     map.insert("crashes", stats.crashes);
206 |     map.insert("ucrashes", stats.ucrashes);
207 |     map.insert("coverage", stats.coverage);
208 |     map.insert("cmpcov", stats.cmpcov);
209 |     map.insert("instr_count", stats.instr_count as usize);
210 |     map.insert("timeouts", stats.timeouts as usize);
211 |     map.insert("exec_time", elapsed_time as usize * 1_000);
212 | 
213 |     let _ = client.post(request_url).json(&map).send();
214 | }
215 | 
216 | /// Wrapper for actual stat-printing functions
217 | pub fn print_stats(term: &Term, stats: &Statistics, elapsed_time: f64, timeout: u64, 
218 |                    corpus: &Arc<Corpus>, last_cov: f64) {
219 |     if *DEBUG_PRINT.get().unwrap() {
220 |         basic_stats(stats, elapsed_time);
221 |     } else {
222 |         pretty_stats(term, stats, elapsed_time, timeout, corpus, last_cov);
223 |     }
224 | 
225 |     if let Some(connection_info) = SEND_REMOTE.get().unwrap() {
226 |         let mut iter = connection_info.split(":");
227 |         let ip   = iter.next().expect("Given ip in incorrect format").to_string();
228 |         let port: usize = iter.next().expect("Given port in incorrect format").parse()
229 |             .expect("Given port in incorrect format");
230 | 
231 |         assert!(port < 65536, "Invalid port number");
232 |         send_remote(ip, port, stats, elapsed_time);
233 |     }
234 | }
235 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | #![feature(once_cell)]
  2 | 
  3 | use sfuzz::{
  4 |     mmu::Perms,
  5 |     emulator::{Emulator, Register, Fault, ExitType},
  6 |     jit::{Jit, LibFuncs},
  7 |     pretty_printing::{print_stats, log, LogType},
  8 |     Input, Corpus, Statistics, error_exit, load_elf_segments, worker, snapshot, calibrate_seeds,
  9 |     config::{handle_cli, Cli, SNAPSHOT_ADDR, OVERRIDE_TIMEOUT, NUM_THREADS, MAX_GUEST_ADDR, 
 10 |         RUN_CASES},
 11 | };
 12 | use std::thread;
 13 | use std::sync::{Arc, Mutex};
 14 | use std::sync::mpsc::{self, Receiver, Sender};
 15 | use std::time::{Duration, Instant};
 16 | 
 17 | use byteorder::{LittleEndian, WriteBytesExt};
 18 | use rustc_hash::FxHashMap;
 19 | use console::Term;
 20 | use clap::Parser;
 21 | 
 22 | /// Hook that makes use of sfuzz's mmu to perform a memory safe malloc operation
 23 | fn malloc_hook(emu: &mut Emulator) -> Result<(), Fault> {
 24 |     let alloc_size = emu.get_reg(Register::A1);
 25 | 
 26 |     if let Some(addr) = emu.memory.allocate(alloc_size, Perms::READ | Perms::WRITE) {
 27 |         emu.set_reg(Register::A0, addr);
 28 |         emu.set_reg(Register::Pc, emu.get_reg(Register::Ra));
 29 |         Ok(())
 30 |     } else {
 31 |         Err(Fault::OOM)
 32 |     }
 33 | }
 34 | 
 35 | /// Hook that makes use of sfuzz's mmu to perform a memory safe calloc operation, pretty much same
 36 | /// as malloc apart from how the size is calculated
 37 | fn calloc_hook(emu: &mut Emulator) -> Result<(), Fault> {
 38 |     let nmemb = emu.get_reg(Register::A1);
 39 |     let size  = emu.get_reg(Register::A2);
 40 |     let alloc_size = size * nmemb;
 41 | 
 42 |     if let Some(addr) = emu.memory.allocate(alloc_size, Perms::READ | Perms::WRITE) {
 43 |         emu.set_reg(Register::A0, addr);
 44 |         emu.set_reg(Register::Pc, emu.get_reg(Register::Ra));
 45 |         Ok(())
 46 |     } else {
 47 |         Err(Fault::OOM)
 48 |     }
 49 | }
 50 | 
 51 | /// Hook that makes use of sfuzz's mmu to perform a memory safe free operation
 52 | fn free_hook(emu: &mut Emulator) -> Result<(), Fault> {
 53 |     let ptr = emu.get_reg(Register::A1);
 54 | 
 55 |     emu.memory.free(ptr)?;
 56 |     emu.set_reg(Register::Pc, emu.get_reg(Register::Ra));
 57 |     Ok(())
 58 | }
 59 | 
 60 | /// Inserts various hooks into binary
 61 | fn insert_hooks(sym_map: &FxHashMap<String, usize>, emu: &mut Emulator) {
 62 |     match sym_map.get("_free_r") {
 63 |         Some(v) => {
 64 |             log(LogType::Success, "_free_r hooked");
 65 |             emu.hooks.insert(*v, free_hook);
 66 |         },
 67 |         None => {
 68 |             log(LogType::Neutral, "free_r does not exist in target so it could not be hooked"); 
 69 |         }
 70 |     }
 71 | 
 72 |     match sym_map.get("_malloc_r") {
 73 |         Some(v) => {
 74 |             log(LogType::Success, "_malloc_r hooked");
 75 |             emu.hooks.insert(*v, malloc_hook);
 76 |         },
 77 |         None => {
 78 |             log(LogType::Neutral, "malloc_r does not exist in target so it could not be hooked"); 
 79 |         }
 80 |     }
 81 | 
 82 |     match sym_map.get("_calloc_r") {
 83 |         Some(v) => {
 84 |             log(LogType::Success, "_calloc_r hooked");
 85 |             emu.hooks.insert(*v, calloc_hook);
 86 |         },
 87 |         None => {
 88 |             log(LogType::Neutral, "_calloc_r does not exist in target so it could not be hooked"); 
 89 |         }
 90 |     }
 91 | 
 92 |     // Hooks for strlen and strcmp are required because the default libc variants go out of bounds.
 93 |     // This is not a security issue since the functions verify that everything is properly aligned,
 94 |     // but since this fuzzer notices byte level permission violations these are required.
 95 | 
 96 |     match sym_map.get("strlen") {
 97 |         Some(v) => {
 98 |             log(LogType::Success, "strlen replaced with safe implementation");
 99 |             emu.custom_lib.insert(*v, LibFuncs::STRLEN);
100 |         },
101 |         None => {
102 |             log(LogType::Neutral, "strlen does not exist in target so it could not be hooked"); 
103 |         }
104 |     }
105 | 
106 |     match sym_map.get("strcmp") {
107 |         Some(v) => {
108 |             log(LogType::Success, "strcmp replaced with safe implementation");
109 |             emu.custom_lib.insert(*v, LibFuncs::STRCMP);
110 |         },
111 |         None => { 
112 |             log(LogType::Neutral, "strcmp does not exist in target so it could not be hooked"); 
113 |         }
114 |     }
115 | }
116 | 
117 | /// Setup the root emulator's segments and stack before cloning the emulator into multiple threads
118 | /// to run multiple emulators at the same time
119 | fn main() -> std::io::Result<()> {
120 |     // Thead-shared jit backing
121 |     let jit = Arc::new(Jit::new(16 * 1024 * 1024));
122 | 
123 |     // Thread-shared mutex that is used to lock JIT-compilation
124 |     let prevent_rc: Arc<Mutex<usize>> = Arc::new(Mutex::new(0));
125 | 
126 |     // Thread-shared structure that holds fuzz-inputs and coverage information
127 |     let mut corpus: Corpus = Corpus::new(16*1024*1024);
128 | 
129 |     // Each thread gets its own forked emulator. The jit-cache is shared between them however
130 |     let mut emu = Emulator::new(MAX_GUEST_ADDR, jit, prevent_rc);
131 | 
132 |     // Statistics structure. This is kept local to the main thread and updated via message passing 
133 |     // from the worker threads
134 |     let mut stats = Statistics::default();
135 | 
136 |     // Messaging objects used to transfer statistics between worker threads and main thread
137 |     let (tx, rx): (Sender<Statistics>, Receiver<Statistics>) = mpsc::channel();
138 | 
139 |     let term = Term::buffered_stdout();
140 |     term.clear_screen()?;
141 | 
142 |     // Parse commandline-args and set config variables based on them
143 |     let mut args = Cli::parse();
144 |     handle_cli(&mut args);
145 | 
146 |     // Insert loadable segments into emulator address space and retrieve symbol table information
147 |     let sym_map = load_elf_segments(&args.fuzzed_app[0], &mut emu).unwrap_or_else(||{
148 |         error_exit("Unrecoverable error while loading elf segments");
149 |     });
150 | 
151 |     // Initialize corpus with files from input directory
152 |     let mut w = corpus.inputs.write();
153 |     for filename in std::fs::read_dir(args.input_dir)? {
154 |         let filename = filename?.path();
155 |         let data = std::fs::read(filename)?;
156 | 
157 |         // Add the corpus input to the corpus
158 |         w.push(Input::new(data, None));
159 |     }
160 |     if w.is_empty() { panic!("Please supply at least 1 initial seed"); }
161 |     drop(w);
162 | 
163 |     // Setup Stack
164 |     let stack = emu.allocate(1024 * 1024, Perms::READ | Perms::WRITE)
165 |         .expect("Error allocating stack");
166 |     emu.set_reg(Register::Sp, (stack + (1024 * 1024)) - 8);
167 | 
168 |     // Setup arguments
169 |     //let arguments = vec!["test_cases/harder_test\0".to_string(), "fuzz_input\0".to_string()];
170 |     let argv: Vec<usize> = args.fuzzed_app.iter().map(|e| {
171 |         let addr = emu.allocate(64, Perms::READ | Perms::WRITE)
172 |             .expect("Allocating an argument failed");
173 |         emu.memory.write_mem(addr, e.as_bytes(), e.len()).expect("Writing to argv[0] failed");
174 |         addr
175 |     }).collect();
176 | 
177 |     // Macro to push 64-bit integers onto the stack
178 |     macro_rules! push {
179 |         ($expr:expr) => {
180 |             let sp = emu.get_reg(Register::Sp) - 8;
181 |             let mut wtr = vec![];
182 |             wtr.write_u64::<LittleEndian>($expr as u64)?;
183 |             emu.memory.write_mem(sp, &wtr, 8).unwrap();
184 |             emu.set_reg(Register::Sp, sp);
185 |         }
186 |     }
187 | 
188 |     // Setup argc, argv & envp
189 |     push!(0u64);            // Auxp
190 |     push!(0u64);            // Envp
191 |     push!(0u64);            // Null-terminate Argv
192 |     for arg in argv.iter().rev() {
193 |         push!(*arg);
194 |     }
195 |     push!(argv.len());    // Argc
196 | 
197 |     // Insert various hooks into binary
198 |     insert_hooks(&sym_map, &mut emu);
199 | 
200 |     // Setup snapshot fuzzing at a point before the fuzz-input is read in
201 |     if let Some(addr) = SNAPSHOT_ADDR.get().unwrap() {
202 |         println!("Activated snapshot-based fuzzing");
203 | 
204 |         // Insert snapshot fuzzer exit condition
205 |         emu.exit_conds.insert(*addr, ExitType::Snapshot);
206 | 
207 |         // Snapshot the emulator
208 |         snapshot(&mut emu, &corpus);
209 |     }
210 | 
211 |     // Calibrate the emulator for the timeout.
212 |     // Alternatively configs can be used to override automatically determined timeout
213 |     emu.timeout = calibrate_seeds(&mut emu, &corpus);
214 |     if let Some(v) = OVERRIDE_TIMEOUT.get().unwrap() {
215 |         emu.timeout = *v;
216 |     }
217 | 
218 |     // Reset coverage collected during initial callibration so it is in a default state once
219 |     // fuzzing actually starts. This also removes the coverage generated while capturing the
220 |     // initial snapshot
221 |     corpus.reset_coverage();
222 | 
223 |     let emu = Arc::new(emu);
224 |     let corpus = Arc::new(corpus);
225 | 
226 |     // Spawn worker threads to do the actual fuzzing
227 |     for thr_id in 0..*NUM_THREADS.get().unwrap() {
228 |         let emu_cp = emu.fork();
229 |         let corpus = corpus.clone();
230 |         let tx = tx.clone();
231 | 
232 |         thread::spawn(move || worker(thr_id, emu_cp, corpus, tx));
233 |     }
234 | 
235 |     // Continuous statistic tracking via message passing in main thread
236 |     let start = Instant::now();
237 |     let mut last_time = Instant::now();
238 |     let mut last_cov_event: f64 = 0.0;
239 | 
240 |     // Sleep for short duration on startup before printing statistics, otherwise elapsed time might
241 |     // be 0, leading to a crash while printing statistics
242 |     thread::sleep(Duration::from_millis(1000));
243 | 
244 |     // Update stats structure whenever a thread sends a new message
245 |     for received in rx {
246 |         let elapsed_time = start.elapsed().as_secs_f64();
247 | 
248 | 
249 |         // Check if we got new coverage
250 |         if received.coverage != 0 || received.cmpcov != 0 {
251 |             last_cov_event = elapsed_time;
252 |         }
253 | 
254 |         stats.coverage    += received.coverage;
255 |         stats.cmpcov      += received.cmpcov;
256 |         stats.total_cases += received.total_cases;
257 |         stats.crashes     += received.crashes;
258 |         stats.ucrashes    += received.ucrashes;
259 |         stats.instr_count += received.instr_count;
260 |         stats.timeouts    += received.timeouts;
261 | 
262 |         // Print out updated statistics every second
263 |         if last_time.elapsed() >= Duration::from_millis(500) {
264 |             print_stats(&term, &stats, elapsed_time, emu.timeout, &corpus, last_cov_event);
265 |             last_time = Instant::now();
266 |         }
267 | 
268 |         if let Some(max_cases) = RUN_CASES.get().unwrap() {
269 |             if stats.total_cases >= *max_cases {
270 |                 error_exit("Fuzzer reached specified maximum number of total cases");
271 | 
272 |             }
273 |         }
274 |     }
275 | 
276 |     Ok(())
277 | }
278 | 


--------------------------------------------------------------------------------
/src/config.rs:
--------------------------------------------------------------------------------
  1 | use crate::error_exit;
  2 | 
  3 | use std::sync::OnceLock;
  4 | 
  5 | use clap::Parser;
  6 | use parse_int::parse;
  7 | 
  8 | /// Method used to track coverage, currently only Edge and Block coverage is implemented
  9 | pub static COV_METHOD: OnceLock<CovMethod> = OnceLock::new();
 10 | 
 11 | /// Address at which the fuzzer attempts to create a snapshot once reached
 12 | pub static SNAPSHOT_ADDR: OnceLock<Option<usize>> = OnceLock::new();
 13 | 
 14 | /// Number of cores to run the fuzzer with
 15 | pub static NUM_THREADS: OnceLock<usize> = OnceLock::new();
 16 | 
 17 | /// Path to directory to which fuzzer-outputs are saved
 18 | pub static OUTPUT_DIR: OnceLock<String> = OnceLock::new();
 19 | 
 20 | /// File that contains the user-supplied dictionary
 21 | pub static DICT_FILE: OnceLock<Option<String>> = OnceLock::new();
 22 | 
 23 | /// Input provided as argument to the target being fuzzed
 24 | pub static FUZZ_INPUT: OnceLock<String> = OnceLock::new();
 25 | 
 26 | /// Toggle-able permission checks. Should be left on, except for very special cases/debugging
 27 | pub static NO_PERM_CHECKS: OnceLock<bool> = OnceLock::new();
 28 | 
 29 | /// Additional information is printed out, alongside rolling statistics. Some parts of this only
 30 | /// work while running single-threaded
 31 | pub static DEBUG_PRINT: OnceLock<bool> = OnceLock::new();
 32 | 
 33 | /// In addition to the default printouts, the fuzzer will now also send the data to a remote server.
 34 | /// Implemented to interact with api of "https://github.com/rsalz47/cs326-final-gimel"
 35 | pub static SEND_REMOTE: OnceLock<Option<String>> = OnceLock::new();
 36 | 
 37 | /// Separates branch-if-equal comparisons into multiple separate compares that benefit from
 38 | /// coverage tracking so larger magic numbers can still be found through fuzzing
 39 | pub static CMP_COV: OnceLock<bool> = OnceLock::new();
 40 | 
 41 | /// Manually override the automatically calibrated timeout
 42 | pub static OVERRIDE_TIMEOUT: OnceLock<Option<u64>> = OnceLock::new();
 43 | 
 44 | /// Collect a full register trace of program execution, for large programs, it can take several
 45 | /// hours to write out a single case, only enable when debugging the JIT. Only works when fuzzer is
 46 | /// being run single-threaded
 47 | pub static FULL_TRACE: OnceLock<bool> = OnceLock::new();
 48 | 
 49 | /// Amount of cases that will be run before the fuzzer automatically shuts down
 50 | pub static RUN_CASES: OnceLock<Option<usize>> = OnceLock::new();
 51 | 
 52 | /// Size of memory space allocated for each thread's virtual address space
 53 | pub const MAX_GUEST_ADDR: usize = 64 * 1024 * 1024;
 54 | 
 55 | #[derive(Eq, PartialEq, Copy, Clone, Debug)]
 56 | pub enum CovMethod {
 57 |     /// Don't track coverage
 58 |     None,
 59 | 
 60 |     /// Track block level coverage 
 61 |     Block,
 62 | 
 63 |     /// Track edge level coverage
 64 |     Edge,
 65 | 
 66 |     /// Track edge level coverage alongside a call-stack hash to get more fine-grained coverage
 67 |     /// results
 68 |     CallStack,
 69 | }
 70 | 
 71 | /// Used by clap to parse command-line arguments
 72 | #[derive(Debug, Parser)]
 73 | #[clap(author = "seal9055", version, about = "Coverage-guided emulation based fuzzer")]
 74 | #[clap(override_usage = "sfuzz [OPTION] -- /path/to/fuzzed_app [ ... ] (use `@@` to specify \
 75 |     position of fuzz-input in target-argv)\n\n    ex: sfuzz -i in -o out -n 16 -- \
 76 |     ./test_cases/test @@")]
 77 | pub struct Cli {
 78 |     #[clap(short, value_name = "DIR", forbid_empty_values = true, display_order = 1)]
 79 |     /// - Input directory that should contain the initial seed files
 80 |     pub input_dir: String,
 81 | 
 82 |     #[clap(short, value_name = "DIR", forbid_empty_values = true, display_order = 2)]
 83 |     /// - Output directory that will be used to eg. save crashes
 84 |     pub output_dir: String,
 85 | 
 86 |     #[clap(short = 'V', takes_value = false)]
 87 |     /// - Print version information
 88 |     pub version: bool,
 89 | 
 90 |     #[clap(short = 'h', takes_value = false)]
 91 |     /// - Print help information
 92 |     pub help: bool,
 93 | 
 94 |     #[clap(default_value_t=1, short, help_heading = "CONFIG")]
 95 |     /// - The number of threads to run this fuzzer with
 96 |     pub num_threads: usize,
 97 | 
 98 |     #[clap(short = 'p', help_heading = "CONFIG", takes_value = false)]
 99 |     /// - Disables permission checking, highly discouraged since it will cause the fuzzer itself to
100 |     /// segfault when the target crashes due to being run in an emulator
101 |     pub no_perm_checks: bool,
102 | 
103 |     #[clap(short = 'C', help_heading = "CONFIG", takes_value = false)]
104 |     /// - Disables CmpCov, results in slight performance increase, but makes it almost impossible
105 |     /// for the fuzzer to get past large magic value comparisons
106 |     pub no_cmp_cov: bool,
107 | 
108 |     #[clap(short = 'e', help_heading = "CONFIG")]
109 |     /// - File extension for the fuzz test input file if the target requires it
110 |     pub extension: Option<String>,
111 | 
112 |     #[clap(short = 'D', help_heading = "CONFIG", takes_value = false)]
113 |     /// - Enable a rolling debug-print and information on which functions are lifted instead of the
114 |     /// default print-window
115 |     pub debug_print: bool,
116 | 
117 |     #[clap(short = 'k', help_heading = "CONFIG", takes_value = true)]
118 |     /// - In addition to displaying the data on the screen, also send it to a remote api. Provide 
119 |     /// port and ip to send it to in format "127.0.0.1:9055". Implemented to interact with api of
120 |     /// https://github.com/rsalz47/cs326-final-gimel
121 |     pub send_remote: Option<String>,
122 | 
123 |     #[clap(short = 's', help_heading = "CONFIG")]
124 |     /// - Take a snapshot of the target at specified address and launch future fuzz-cases off of this
125 |     /// snapshot
126 |     pub snapshot: Option<String>,
127 | 
128 |     #[clap(short = 't', help_heading = "CONFIG")]
129 |     /// - Override the timeout that is otherwise dynamically set during calibration phase
130 |     pub override_timeout: Option<u64>,
131 | 
132 |     #[clap(short = 'r', help_heading = "CONFIG")]
133 |     /// - Optionally set the amount of cases to be run before the fuzzer shuts down
134 |     pub run_cases: Option<String>,
135 | 
136 |     #[clap(short = 'f', help_heading = "CONFIG", takes_value = false)]
137 |     /// - Collect a full register trace of program execution. Only enable while debugging, majorly
138 |     /// slows down performance. Only works when fuzzer is run single-threaded
139 |     pub full_trace: bool,
140 | 
141 |     #[clap(short = 'd', value_name = "DICT", help_heading = "CONFIG", forbid_empty_values = true)]
142 |     /// - Optionally supply a new-line separated list of inputs that will be mutated into the 
143 |     /// fuzz-inputs
144 |     pub dictionary: Option<String>,
145 | 
146 |     #[clap(short = 'c', help_heading = "CONFIG", default_value = "edge")]
147 |     /// - Coverage method, currently supports `edge`, `block`, and `call-stack` based coverage
148 |     pub cov_method: String,
149 | 
150 |     #[clap(last = true)]
151 |     /// The target to be fuzzed alongside its arguments
152 |     pub fuzzed_app: Vec<String>,
153 | }
154 | 
155 | /// Initialize configuration variables based on passed in commandline arguments, and verify that
156 | /// the user properly setup their fuzz-case
157 | pub fn handle_cli(args: &mut Cli) {
158 |     NUM_THREADS.set(args.num_threads).unwrap();
159 |     NO_PERM_CHECKS.set(args.no_perm_checks).unwrap();
160 |     DEBUG_PRINT.set(args.debug_print).unwrap();
161 |     SEND_REMOTE.set(args.send_remote.clone()).unwrap();
162 |     OVERRIDE_TIMEOUT.set(args.override_timeout).unwrap();
163 |     CMP_COV.set(!args.no_cmp_cov).unwrap();
164 | 
165 |     if args.fuzzed_app.is_empty() {
166 |         error_exit("You need to specify the target to be fuzzed");
167 |     }
168 | 
169 |     // Verify that the input and output directories are valid
170 |     if !std::path::Path::new(&args.input_dir).is_dir() {
171 |         error_exit("You need to specify a valid input directory");
172 |     }
173 |     if !std::path::Path::new(&args.output_dir).is_dir() {
174 |         error_exit("You need to specify a valid output directory");
175 |     }
176 |     OUTPUT_DIR.set(args.output_dir.clone()).unwrap();
177 | 
178 |     if let Some(dict) = &args.dictionary {
179 |         if !std::path::Path::new(&dict).is_file() {
180 |             error_exit("You need to specify a valid dictionary file");
181 |         }
182 |         DICT_FILE.set(Some(dict.to_string())).unwrap();
183 |     } else {
184 |         DICT_FILE.set(None).unwrap();
185 |     }
186 | 
187 |     // Create the directory to save crashes too
188 |     let mut crash_dir = args.output_dir.clone();
189 |     crash_dir.push_str("/crashes");
190 |     std::fs::create_dir_all(crash_dir).unwrap();
191 | 
192 |     // Set the fuzz-input. If the user specified an extension, add that too
193 |     FUZZ_INPUT.set(
194 |         if let Some(ext) = &args.extension {
195 |             format!("fuzz_input.{}\0", ext)
196 |         } else {
197 |             "fuzz_input\0".to_string()
198 |         }
199 |     ).unwrap();
200 | 
201 |     // Verify that the user supplied `@@` and use it to setup the fuzz-input's argv
202 |     let index = args.fuzzed_app.iter().position(|e| e == "@@").unwrap_or_else(|| {
203 |         error_exit("You need to specify how the fuzz-case input files should be passed in. This \
204 |                    can be done using the `@@` flag as shown in the example under `Usage`.");
205 |     });
206 |     args.fuzzed_app[index] = FUZZ_INPUT.get().unwrap().to_string();
207 | 
208 |     // Set snapshot address if requested
209 |     if let Some(ss) = &args.snapshot {
210 |         let num_repr = parse::<usize>(&ss).unwrap();
211 |         SNAPSHOT_ADDR.set(Some(num_repr)).unwrap();
212 |     } else {
213 |         SNAPSHOT_ADDR.set(None).unwrap();
214 |     }
215 | 
216 |     // Set max number of cases if requested
217 |     if let Some(runs) = &args.run_cases {
218 |         let num_repr = parse::<usize>(&runs).unwrap();
219 |         RUN_CASES.set(Some(num_repr)).unwrap();
220 |     } else {
221 |         RUN_CASES.set(None).unwrap();
222 |     }
223 | 
224 |     // Set the coverage collection method
225 |     match args.cov_method.as_str() {
226 |         "edge" => {
227 |             COV_METHOD.set(CovMethod::Edge).unwrap();
228 |         },
229 |         "block" => {
230 |             COV_METHOD.set(CovMethod::Block).unwrap();
231 |         },
232 |         "call-stack" => {
233 |             COV_METHOD.set(CovMethod::CallStack).unwrap();
234 |         },
235 |         _ => {
236 |             error_exit("You're specified coverage method is not supported, please chose `edge`, \
237 |                        `block`, or `call-stack`")
238 |         },
239 |     }
240 | 
241 |     // Trace mode
242 |     if args.full_trace == true && args.num_threads != 1 {
243 |         error_exit("Full Trace mode only works when running single-threaded");
244 |     } else {
245 |         FULL_TRACE.set(args.full_trace).unwrap();
246 |     }
247 | 
248 |     if false {
249 |         println!("cov_method: {:?}", COV_METHOD);
250 |         println!("snapshot_addr: {:?}", SNAPSHOT_ADDR);
251 |         println!("num_threads: {:?}", NUM_THREADS);
252 |         println!("output_dir: {:?}", OUTPUT_DIR);
253 |         println!("fuzz_input: {:?}", FUZZ_INPUT);
254 |         println!("no_perm_checks: {:?}", NO_PERM_CHECKS);
255 |         println!("debug_print: {:?}", DEBUG_PRINT);
256 |         println!("send_remote: {:?}", SEND_REMOTE);
257 |         println!("override_timeout: {:?}", OVERRIDE_TIMEOUT);
258 |         println!("full_trace: {:?}", FULL_TRACE);
259 |     }
260 | }
261 | 
262 | 


--------------------------------------------------------------------------------
/src/mutator.rs:
--------------------------------------------------------------------------------
  1 | use crate::config::DICT_FILE;
  2 | 
  3 | use rand_xoshiro::rand_core::RngCore;
  4 | use rand_xoshiro::Xoroshiro64Star;
  5 | use rand_xoshiro::rand_core::SeedableRng;
  6 | 
  7 | const MUTATE_SIMPLE: bool = false;
  8 | 
  9 | #[derive(Copy, Clone, Debug)]
 10 | pub enum Mutation {
 11 |     ByteReplace,
 12 |     BitFlip,
 13 |     MagicNum,
 14 |     SimpleArithmetic,
 15 |     RemoveBlock,
 16 |     DupBlock,
 17 |     Resize,
 18 |     Dictionary,
 19 | }
 20 | 
 21 | #[derive(Debug, Clone)]
 22 | pub struct Mutator {
 23 |     /// Fast Rng
 24 |     rng: Xoroshiro64Star,
 25 | 
 26 |     /// Available mutation strategies
 27 |     mutation_strats: Vec<Mutation>,
 28 | 
 29 |     /// Count-down to havoc mode
 30 |     havoc_counter: usize,
 31 | 
 32 |     dictionary: Option<Vec<String>>,
 33 | }
 34 | 
 35 | use std::fs::File;
 36 | use std::io::{self, BufRead};
 37 | use std::path::Path;
 38 | 
 39 | fn read_lines<P>(file_name: P) -> io::Result<io::Lines<io::BufReader<File>>>
 40 | where P: AsRef<Path>, {
 41 |     let file = File::open(file_name)?;
 42 |     Ok(io::BufReader::new(file).lines())
 43 | }
 44 | 
 45 | pub fn parse_dict(file_name: &str) -> Vec<String> {
 46 |     let mut dict: Vec<String> = Vec::new();
 47 |     if let Ok(lines) = read_lines(file_name) {
 48 |         for line in lines {
 49 |             dict.push(line.unwrap());
 50 |         }
 51 |     }
 52 |     dict
 53 | }
 54 | 
 55 | impl Mutator {
 56 |     pub fn default() -> Self {
 57 |         // Initialize the individual strategies for the mutation_strats array alongside their
 58 |         // weight. This creates a larger array since weight is created by inserting new
 59 |         // elements into the array, but I believe that this should be much faster than
 60 |         // alternatives
 61 |         let mut mut_strats: Vec<Mutation> = Vec::new();
 62 |         mut_strats.append(&mut (0..1000).map(|_| { Mutation::ByteReplace }).collect());
 63 |         mut_strats.append(&mut (0..1000).map(|_| { Mutation::BitFlip }).collect());
 64 |         mut_strats.append(&mut (0..200).map(|_|  { Mutation::MagicNum }).collect());
 65 |         mut_strats.append(&mut (0..500).map(|_|  { Mutation::SimpleArithmetic }).collect());
 66 |         mut_strats.append(&mut (0..30).map(|_|   { Mutation::RemoveBlock }).collect());
 67 |         mut_strats.append(&mut (0..30).map(|_|   { Mutation::DupBlock }).collect());
 68 |         mut_strats.append(&mut (0..10).map(|_|   { Mutation::Resize }).collect());
 69 | 
 70 |         // If the user specified a dictionary to be used while fuzzing, parse it and add dictionary
 71 |         // replacements to the fuzz methods
 72 |         let dict_vec = if let Some(dict) = DICT_FILE.get().unwrap() {
 73 |             mut_strats.append(&mut (0..30).map(|_|   { Mutation::Dictionary }).collect());
 74 |             Some(parse_dict(dict))
 75 |         } else {
 76 |             None
 77 |         };
 78 | 
 79 |         Self {
 80 |             rng: Xoroshiro64Star::seed_from_u64(0),
 81 |             mutation_strats: mut_strats,
 82 |             havoc_counter: 0,
 83 |             dictionary: dict_vec,
 84 |         }
 85 |     }
 86 | 
 87 |     /// Return 2 random 32-bit unsigned integers
 88 |     fn get2_rand(&mut self) -> (usize, usize) {
 89 |         let tmp = self.rng.next_u64();
 90 |         ((tmp & 0xffffffff) as usize, (tmp >> 32) as usize)
 91 |     }
 92 | 
 93 |     /// Chose a set of random bytes and mutate them. Prefer small corruption over larger one's
 94 |     fn byte_replace(&mut self, input: &mut [u8]) -> Result<(),()> {
 95 |         let input_length = input.len();
 96 |         let (r1, r2) = self.get2_rand();
 97 | 
 98 |         if (r1 % 1000) < 950 {
 99 |             // Small corruption, 0-32 bytes
100 |             for _ in 1..(r2 % 32) {
101 |                 let (r1, r2) = self.get2_rand();
102 |                 input[(r1 % input_length)] = r2 as u8;
103 |             }
104 |         } else {
105 |             // Larger corruption, 64-128 bytes
106 |             for _ in 64..(64 + (r2 % 64)) {
107 |                 let (r1, r2) = self.get2_rand();
108 |                 input[(r1 % input_length)] = r2 as u8;
109 |             }
110 |         }
111 |         Ok(())
112 |     }
113 | 
114 |     /// Flip some random bits in the input
115 |     fn bit_flip(&mut self, input: &mut [u8]) -> Result<(),()> {
116 |         let input_length = input.len();
117 |         let (r1, r2) = self.get2_rand();
118 | 
119 |         if (r1 % 1000) < 950 {
120 |             // Small corruption, flip up to 32 bits
121 |             for _ in 1..(r2 % 32) {
122 |                 let (r1, r2) = self.get2_rand();
123 |                 let bit_idx = r1 % 8;
124 |                 input[(r2 % input_length)] ^= 1 << bit_idx;
125 |             }
126 |         } else {
127 |             // Larger corruption, flip 64-128 bits
128 |             for _ in 64..(64 + (r2 % 64)) {
129 |                 let (r1, r2) = self.get2_rand();
130 |                 let bit_idx = r1 % 8;
131 |                 input[(r2 % input_length)] ^= 1 << bit_idx;
132 |             }
133 |         }
134 |         Ok(())
135 |     }
136 | 
137 |     /// Replace 1/2/4/8 bytes in the program with values that are likely to cause bugs (eg. 0 or
138 |     /// INT_MAX)
139 |     fn magic_nums(&mut self, input: &mut Vec<u8>) -> Result<(),()> {
140 |         // Just return if input is too small to operate on in a useful manner
141 |         if input.len() < 32 { return Err(()); }
142 | 
143 |         let (r1, r2) = self.get2_rand();
144 |         let splice_start = r1 % (input.len() - 8);
145 |         let magic_nums: Vec<Vec<u8>> = vec![
146 |             vec![0x0], vec![0x0; 2], vec![0x0; 4], vec![0x0; 8],
147 |             vec![0xff], vec![0xff; 2], vec![0xff; 4], vec![0xff; 8],
148 |             vec![0x7f], vec![0x7f, 0xff], vec![0x7f, 0xff, 0xff, 0xff],
149 |             vec![0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], vec![0x01, 0x0, 0x0],
150 |         ];
151 | 
152 |         input.splice(splice_start.., magic_nums[r2 % magic_nums.len()].iter().cloned());
153 |         Ok(())
154 |     }
155 | 
156 |     /// Add or subtract some bytes to attempt to cause an integer over/underflow
157 |     fn simple_arithmetic(&mut self, input: &mut [u8]) -> Result<(),()> {
158 |         let input_length = input.len();
159 | 
160 |         let (r1, r2) = self.get2_rand();
161 | 
162 |         if (r1 % 1000) < 950 {
163 |             // Small corruption, 0-32 bytes, 50% chance to either add or sub a value 0-32
164 |             for i in 1..(r2 % 32) {
165 |                 let (r1, r2) = self.get2_rand();
166 |                 if i & 1 == 0 {
167 |                     input[(r1 % input_length)] = 
168 |                         input[(r1 % input_length)].wrapping_add((r2 % 32) as u8);
169 |                 } else {
170 |                     input[(r1 % input_length)] = 
171 |                         input[(r1 % input_length)].wrapping_sub((r2 % 32) as u8);
172 |                 }
173 |             }
174 |         } else {
175 |             // Larger corruption, 64-128 bytes, 50% chance to either add or sub a value 0-32
176 |             for i in 64..(64 + (r2 % 64)) {
177 |                 let (r1, r2) = self.get2_rand();
178 |                 if i & 1 == 0 {
179 |                     input[(r1 % input_length)] = 
180 |                         input[(r1 % input_length)].wrapping_add((r2 % 32) as u8);
181 |                 } else {
182 |                     input[(r1 % input_length)] = 
183 |                         input[(r1 % input_length)].wrapping_sub((r2 % 32) as u8);
184 |                 }
185 |             }
186 |         }
187 |         Ok(())
188 |     }
189 | 
190 |     /// Remove part of the input
191 |     fn remove_block(&mut self, input: &mut Vec<u8>) -> Result<(),()> {
192 |         let input_length = input.len();
193 | 
194 |         // Just return if input is already extremely small
195 |         if input_length < 32 { return Err(()); }
196 | 
197 |         let (r1, r2) = self.get2_rand();
198 | 
199 |         let start = r1 % input_length;
200 |         let end   = start + core::cmp::min(input_length - start, r2 % 512);
201 | 
202 |         // Refuse to perform this mutation if input would end up too small
203 |         if (input_length - (end - start)) < 32 { return Err(()); }
204 | 
205 |         input.drain(start..end);
206 |         Ok(())
207 |     }
208 | 
209 |     /// Take a random block out of the input and duplicate it into a different location of the
210 |     /// input
211 |     fn duplicate_block(&mut self, input: &mut Vec<u8>) -> Result<(),()> {
212 |         let input_length = input.len();
213 | 
214 |         // Just return if input is too small to operate on in a useful manner
215 |         if input_length < 32 { return Err(()); }
216 | 
217 |         let (r1, r2) = self.get2_rand();
218 | 
219 |         // Calculate a random range within the input
220 |         let start = r1 % input_length;
221 |         let end   = start + core::cmp::min(input_length - start, r2 % 128);
222 | 
223 |         // Chose random location to insert dup'd block into
224 |         let idx = self.rng.next_u32() as usize % input_length;
225 | 
226 |         // Extract block to be dup'd and split the input at a random location
227 |         let mut v = input[start..end].to_vec();
228 |         let mut p2 = input.split_off(idx);
229 | 
230 |         // Rebuild the input [(0..idx) + v + (idx..end)]
231 |         input.append(&mut v);
232 |         input.append(&mut p2);
233 |         Ok(())
234 |     }
235 | 
236 |     /// Resize the input, can both truncate, or add random bytes into the middle of an input
237 |     fn resize(&mut self, input: &mut Vec<u8>) -> Result<(),()> {
238 |         let input_length = input.len();
239 |         let (r1, r2) = self.get2_rand();
240 | 
241 |         if r1 & 1 == 0 { // Truncate
242 |             // Just return if input is too small to operate on in a useful manner
243 |             if input_length < 32 { return Err(()); }
244 |             let trunc_val = (r2 % (input_length / 2)) % 512;
245 | 
246 |             // Refuse to perform this mutation if input would end up too small
247 |             if trunc_val < 32 { return Err(()); }
248 |             
249 |             input.truncate(trunc_val);
250 |         } else { // Increase size
251 |             let size = if input_length < 32 {
252 |                 32
253 |             } else {
254 |                 (r2 % (input_length / 2)) % 512
255 |             };
256 | 
257 |             // Create a set of random bytes that we can append to the input
258 |             let rand_bytes = (0..(size / 8)).map(|_| {
259 |                 self.rng.next_u64()
260 |             }).collect::<Vec<u64>>();
261 | 
262 |             // Transform these bytes from Vec<u64> to Vec<u8>
263 |             let mut as_u8: Vec<u8> = unsafe {
264 |                 std::slice::from_raw_parts(
265 |                     rand_bytes.as_ptr() as *const u8,
266 |                     rand_bytes.len() * std::mem::size_of::<u64>(),
267 |                 ).to_vec()
268 |             };
269 |             input.append(&mut as_u8);
270 |         }
271 |         Ok(())
272 |     }
273 | 
274 |     /// Replace some of the input bytes with a provided dictionary entry
275 |     fn dict_replace(&mut self, input: &mut Vec<u8>) -> Result<(), ()> {
276 |         let dict_idx = self.rng.next_u32() as usize % self.dictionary.as_ref().unwrap().len();
277 |         let entry = self.dictionary.as_ref().unwrap()[dict_idx].as_bytes();
278 | 
279 |         if input.len() <= entry.len() { return Err(()); }
280 |         let input_idx = self.rng.next_u64() as usize % (input.len() - entry.len());
281 |         for (i, j) in (input_idx..(input_idx + entry.len())).enumerate() {
282 |             input[j] = entry[i];
283 |         }
284 | 
285 |         Ok(())
286 |     }
287 | 
288 |     /// Chose a random mutation strategy
289 |     fn chose_mut(&mut self) -> Mutation {
290 |         let tmp_rand = self.rng.next_u32() as usize % self.mutation_strats.len();
291 |         self.mutation_strats[tmp_rand]
292 |     }
293 | 
294 |     /// Apply various implemented mutation strategies. Every 100 cases, use 'havoc-mode' which
295 |     /// applies multiple strategies at the same time
296 |     fn mutate_complex(&mut self, input: &mut Vec<u8>) {
297 |         let mut muts = Vec::new();
298 |         self.havoc_counter += 1;
299 | 
300 |         // Usually only perform 1 mutation, but if havoc is invoked, we queue up multiple
301 |         // mutations onto the input in this fuzz-case
302 |         if self.havoc_counter == 100 {
303 |             self.havoc_counter = 0;
304 |             for _ in 1..(self.rng.next_u32() % 8) {
305 |                 muts.push(self.chose_mut());
306 |             }
307 |         } else {
308 |             muts.push(self.chose_mut());
309 |         }
310 | 
311 |         for mutation in &mut muts {
312 |             'inner: loop {
313 |                 let res = match mutation {
314 |                     Mutation::ByteReplace      => self.byte_replace(input),
315 |                     Mutation::BitFlip          => self.bit_flip(input),
316 |                     Mutation::MagicNum         => self.magic_nums(input),
317 |                     Mutation::SimpleArithmetic => self.simple_arithmetic(input),
318 |                     Mutation::RemoveBlock      => self.remove_block(input),
319 |                     Mutation::DupBlock         => self.duplicate_block(input),
320 |                     Mutation::Resize           => self.resize(input),
321 |                     Mutation::Dictionary       => self.dict_replace(input),
322 |                 };
323 | 
324 |                 // If the chosen strategy failed, chose a different mutation and rerun the
325 |                 // mutator, otherwise break out of the inner loop to keep the mutation result
326 |                 if res.is_ok() { 
327 |                     break 'inner; 
328 |                 } else {
329 |                     *mutation = self.chose_mut();
330 |                 }
331 |             }
332 |         }
333 |     }
334 | 
335 |     /// Perform extremely simple/fast mutations
336 |     fn mutate_simple(&mut self, input: &mut [u8]) {
337 |         let input_length = input.len();
338 | 
339 |         for _ in 0..(self.rng.next_u32() % 8) {
340 |             let (r1, r2) = self.get2_rand();
341 |             input[(r1 % input_length)] = r2 as u8;
342 |         }
343 |     }
344 | 
345 |     /// Start the mutation process of an input
346 |     pub fn mutate(&mut self, input: &mut Vec<u8>) {
347 |         if MUTATE_SIMPLE {
348 |             self.mutate_simple(input);
349 |         } else {
350 |             self.mutate_complex(input);
351 |         }
352 |     }
353 | }
354 | 
355 | 


--------------------------------------------------------------------------------
/docs/code_gen.md:
--------------------------------------------------------------------------------
  1 | # Code Generation
  2 | 
  3 | 
  4 | **Small Note**
  5 | ```
  6 | This was by far the most time-consuming and difficult aspect of this entire project. I initially 
  7 | spent about 3 months trying to format this like a proper optimizing compiler might. This included
  8 | lifting the code to an intermediate representation, transforming it to single-static-assignment form,
  9 | performing register allocation and finally compiling it to x86_64 machine code. I implemented all of
 10 | these, but in the end I decided to fall back to a simpler approach due to multiple reasons I outline
 11 | below. I still believe that this approach is possible though and holds decent performance gains, so 
 12 | I will most likely reattempt this in the future.
 13 | ```
 14 | 
 15 | #### Overview
 16 | 
 17 | This emulator makes use of a custom just-in-time compiler for all of its execution. The code generation is a multi-step process that leads to a 20-50x performance increase over pure emulation. 
 18 | 
 19 | Once execution is started, each individual emulator thread has the ability to compile new code. Whenever the emulator runs into a function that we have not yet compiled it invokes a lock on the JIT code backend and attempts to compile the entire function into the JIT backend before resuming execution. This lock only stops other threads from adding new code to the JIT-backing during compilation without stopping them from using the JIT-backing. This means that one thread compiling new code has basically no impact on any of the other threads, making this lock mostly free while providing 1 uniform memory region that contains all of the compiled code for all threads. Once the compilation is completed, the mutex is unlocked and the addresses of the newly generated code are added to the JIT lookup table. At this point, the compiling thread can resume fuzzer execution and all other threads can access this newly compiled code via the translation table.
 20 | 
 21 | Most of the code pertaining to code-generation can be found in [jit.rs](https://github.com/seal9055/sfuzz/blob/main/src/jit.rs), [irgraph.rs](https://github.com/seal9055/sfuzz/blob/main/src/irgraph.rs), and [emulator.rs](https://github.com/seal9055/sfuzz/blob/main/src/emulator.rs). More detailed descriptions of some of these processes are provided below.
 22 | 
 23 | #### Lifting a Function to Custom IR
 24 | The first step of actual code generation is to lift the entire function into an intermediate representation. The size of the function is determined during the initialization phase when first loading the target. This is done by parsing the elf metadata and setting up a hashmap mapping function start addresses to their sizes.<br>
 25 | 
 26 | The IR-lifting just iterates through the original instructions and creates an IR instruction based on the original instruction using a large switch statement. The below example imitates how the intermediate representation may look like for a very minimal function that pretty much just performs a branch based on a comparison in the first block.
 27 | ```
 28 | Label @ 0x1000
 29 | Label @ 0x1000
 30 | 0x001000 A0 = 0x14
 31 | 0x001004 A1 = 0xA
 32 | 0x001008 if A0 == A1 (0x100C, 0x1028)
 33 | 
 34 | Label @ 0x100C
 35 | 0x00100C A2 = A0 + A1
 36 | 0x001010 A3 = 0x1
 37 | 0x001014 Jmp 0x1018
 38 | 
 39 | Label @ 0x1018
 40 | 0x001024 Jmp 0x1034
 41 | 
 42 | Label @ 0x1028
 43 | 0x001028 A2 = A0 - A
 44 | 0x00102C A3 = 0x2
 45 | 0x001030 Jmp 0x1018
 46 | 
 47 | Label @ 0x1034
 48 | 0x001034 Ret
 49 | ```
 50 | <p style="text-align:center;"<i>F1</i></p>
 51 | 
 52 | At this point, I attempted a couple of different approaches before settling on the current code generation procedure. My first approach was to first transform the above IR code into single static assignment form. This allows for stronger optimizations and is a very popular choice for modern compilers. Next, I used a linear scan register allocator to assign registers to the code and compile the final code.
 53 | 
 54 | This approach resulted in multiple issues that led to me eventually abandoning it in favor of the current implementation. Some of the reasons as to why I changed my approach are listed below.
 55 | 
 56 | 1. **Debugging** - Since this is meant to be a fuzzer, being able to properly debug crashes, or at least 
 57 |     print out register states is important. After doing register allocation, determining which x86 register is allocated to each RISCV register at runtime to print out useful information is very difficult.
 58 | 
 59 | 2. **Extendability** - When it comes to register allocation, a lot of the backend features (eg. A0-A7 for 
 60 |     arguments, or syscall number in A7) are architecture-dependent. This makes it a lot harder to write the backend in a way that can be extended with new architectures by just adding a front end.
 61 | 
 62 | 3. **Performance** - In theory, the ssa/regalloc approach will lead to better final code. In this case, 
 63 |     however, since it's a binary translator, a lot of registers such as function arguments or stack pointers have to be hardcoded to x86 registers since we don't have important information such as the number of arguments when translating binary -> binary. This in addition to the meta-data required by the JIT (pointer to memory, permissions, JIT lookup table, register spill-stack, etc) led to most x86 registers being in use, leaving only 4 x86 registers available for the actual register allocation in my approach. This could obviously be greatly improved upon, but this would require a lot more time to achieve comparable results.
 64 | 
 65 | 4. **Complexity** - This approach added a lot of extra complexity to the project which caused major 
 66 |     issues and would have delayed the completion of this project by several months to debug all of these issues
 67 | 
 68 | Nevertheless, I did implement both ssa-generation and register allocation before eventually abandoning it, and since it was a very large part of my time investment I decided to still keep notes on it. The implementation details are listed in the below 'Optimizing Compiler' section, and the final code for this approach can be viewed at commit 7d129ab847d171b66901f4c936dd2ad5c5a1b79a on the Github repository.
 69 | 
 70 | #### Compiling to x86 Machine Code
 71 | 
 72 | This phase pretty much just loops through all the previously lifted IR instructions and compiles them to x86 code. Whenever a syscall or a hooked function is encountered, appropriate instructions are generated to leave the JIT and handle the procedure. All registers are currently memory-mapped within the emulator. While this would have a very significant performance impact for normal programs, in the case of a fuzzer I can use the free'd registers up through this approach to point to other important frequently accessed fields such as dirty lists or instruction counters, so in the end, the performance overhead incurred by this is negligible.
 73 | 
 74 | In addition to the previously mentioned actual code compilation, a lot of other very important steps are taken at this point. Mainly, the RISC-V to x86 translation table is populated, and instructions to instrument the code for fuzzing are inserted to enable snapshotting, coverage, hooks and proper permission checks. 
 75 | <br>  
 76 | 
 77 | ## Optimizing Compiler
 78 | 
 79 | #### Generate SSA-form for the IR
 80 | 
 81 | The next step is to lift the previously generated code into single static assignment form. In this
 82 | form each variable is assigned exactly once. This is where the second field of each register comes
 83 | in. It is basically a counter for each register used to "create" a new register each time the
 84 | register is redefined. This creates some problems if a join point after a branch needs to make use
 85 | of a register that differs depending on which branch was taken (eg. in branch 1, `A1(1) = 5` is
 86 | executed while in branch 2 `A1(2) = 10` is executed). In this case the succeeding block does not
 87 | know on which version of A1 to operate on. Continuing with the above example, the phi function at
 88 | the beginning of the join block would look like this: `A1(3) = Phi(A1(1), A1(2))`. The computer
 89 | obviously does not have such as instruction, or ssa-form register usage so it needs to eventually
 90 | be deconstructed, nevertheless, this ssa representation is very frequently used in compilers
 91 | because it provides many advantages when attempting to run optimization passes on the code.
 92 | 
 93 | The below graph showcases how this form would look like for the above program. Note how the second
 94 | field of each register is now filled to make sure each register is only defined once, and that the
 95 | final block in the function now has phi-functions at its beginning for each register that it may be
 96 | required for.<br><br>
 97 | 
 98 | <p style="text-align:center;"><img src="../resources/graph.png" alt="Dominator Tree" height="75%"
 99 | width="75%"/></p>
100 | <p style="text-align:center;"<i>F2</i></p>
101 | 
102 | In this project ssa form is generated using the techniques proposed in
103 | [Efficiently Computing Static Single Assignment Form and the Control Dependence
104 | Graph](https://www.cs.utexas.edu/~pingali/CS380C/2010/papers/ssaCytron.pdf) by Cytron et al.
105 | 
106 | This algorithm makes use of dominance frontiers to compute a semipruned ssa representation that has
107 | fewer phi-functions than more naive implementations that may just place phi-functions in
108 | succeeding blocks for every register that survives block boundaries.
109 | 
110 | In my implementation, the steps to generate this ssa form are divided up into 4 main phases.
111 | 
112 | * Generate dominator tree
113 | 
114 |     In this phase, given a block b in the control flow graph, the set of blocks that strictly
115 |     dominate b are given by (Dom(b)-b) where Dom(b) determines all blocks that must be traversed
116 |     starting at the root of the cfg to get to block b. In this set the block that is closest
117 |     to b is b's immediate dominator which is what we care to extract in this phase. This means
118 |     that each cfg block exists in this form and that if a is the immediate dominator of b,
119 |     an edge exists from a to b.
120 | 
121 |     The corresponding dominator tree for the above program is shown below. The first block
122 |     dominates
123 |     the 2 branching blocks as expected, but unlike in the cfg representation, here an edge exists
124 |     from the first block to the join block because it is the earliest block that strictly dominates
125 |     it.
126 | 
127 | <p style="text-align:center;"><img src="../resources/domtree.png" alt="Dominator Tree" height="75%"
128 | width="75%"/></p>
129 | <p style="text-align:center;"<i>F3</i></p>
130 | 
131 | * Find the dominance frontier
132 | 
133 |     The dominance frontier is used to determine which registers require phi-functions for a given
134 |     block. It starts by identifying all join points j in the graph since these are the only blocks
135 |     that may potentially require phi-functions. Next it loops through all of the cfg-predecessors
136 |     of each block j until iDom(j) is found. During this traversal, block j is added to the
137 |     dominance frontier set of each block encountered in this process with the exception of iDom(j).
138 | 
139 |     This leads to the following dominance frontier for the above program which tells us that
140 |     block 1
141 |     and 2 may need phi functions to be placed in block 2 (block 1 & 2 represent the 2 branches from
142 |     the original CFG as indicated by the labels).
143 |     ```
144 |     Label @ 0x1000 : {}
145 |     Label @ 0x100c : {2}
146 |     Label @ 0x1018 : {}
147 |     Label @ 0x1028 : {2}
148 |     Label @ 0x1034 : {}
149 |     ```
150 | <p style="text-align:center;"<i>F4</i></p>
151 | 
152 | * Insert phi functions into the graph
153 | 
154 |     Now that we know where we want to place phi functions, they need to actually be placed for
155 |     registers that require them. Since we have the dominance frontiers we can determine this fairly
156 |     well without accidentally placing many unnecessary phi-functions. For every definition x in
157 |      block b, a phi-function needs to be inserted at every node in the dominance frontier of
158 |     b. Since
159 |     the insertion of a phi-function alters the instruction state, it may force the insertion of
160 |     additional phi-functions. This process needs to restart after every phi-function insertion.
161 | 
162 |     This results in 2 phi-functions being insterted at the start of block 2 as showcased in the
163 |     F2.
164 | 
165 | * Rename all registers to their appropriate ssa form
166 | 
167 |     In this phase the ssa form is completed by finally renaming all registers to their ssa-form
168 |     name. Each register R with multiple definitions will thus be renamed R(1), R(2), ... R(n). This
169 |     is done by maintaining a count of the highest-count definition of a register that is
170 |     incremented whenever a new version of the register is defined alongside a stack that has
171 |     the most recently defined version of the register on top of it.
172 | 
173 |     The algorithm used here walks through the dominator tree and for each block it starts by
174 |     renaming all defined phi-functions definitions. Next it walks through each block in the
175 |     program and rewrites the operands and declarations using the currently active ssa name for
176 |     each register. For declarations, a newly generated ssa name must be created by incrementing its
177 |     count variable and pushing it onto the registers stack. Finally the parameters of the phi
178 |     functions of blocks succeeding the current block are renamed.
179 | 
180 |     Next it starts recursively calling the rename procecure on all children of the current
181 |     block in the dominator tree. After this recursive call completes, all newly defined ssa
182 |     registers are popped from each registers stack, thus resetting the register states back to
183 |     the state prior to this blocks renaming procedure.
184 | 
185 | In the current state of the compiler, ssa representation does not yet serve much of a purpose
186 | (although it can lead to better register allocation) since no optimizations have been written. This
187 | form does however allow for powerful optimizations to be added in the future.<br><br>
188 | 
189 | #### Potential Optimizations
190 | 
191 | Modern compiler backends employ many different optimizations to produce the best code possible. In
192 | this case, due to limited time I will stick to very simple optimizations that are fairly
193 | straightforward to implement while providing decent performance benefits such as eliminating all
194 | instructions that attempt to write to the Zero register (basically a nop), or some basic constant
195 | propagation to eliminate all temporary instructions that my IR added.<br><br>
196 | 
197 | #### Register Allocation
198 | 
199 | The goal of this phase is to replace the previously set ssa instruction operands with standard
200 | X86\_64 registers. The main difficulty of this process is to correctly determine efficient register
201 | allocation strategies that result in the least amount of registers being spilled to memory. This
202 | phase is still very early in development, and I am not entirely sure how I want to implement
203 | it yet.
204 | 
205 | * Instruction Numbering
206 | 
207 | The first step is to number the instructions. This assigns a unique id to each instruction. The main
208 | thing to consider here is that instructions need to be ordered in order of execution. This means
209 | that every instruction A that is executed before instruction B needs to have a lower id. This can be
210 | accomplished using the previously generated dominator tree's.
211 | 
212 | * Register Live Intervals
213 | 
214 | The goal of this phase is to determine how long each register is alive. For each used register it
215 | computed an interval from the point that the register is first defined to its last usage according
216 | to the previously marked id numbers during the instruction numbering phase.
217 | 
218 | * Linear Scan Register Allocation
219 | 
220 | This algorithm is pretty much the simplest way to do register allocation across block boundaries.
221 | Nevertheless it is the most popularly used register allocation algorithm in JIT compilers since it
222 | results in low compile time which is an important metric for JIT compilers. Additionally it only
223 | produces slightly worse code than much slower algorithms such as graph coloring approaches.
224 | 
225 | The pseudo-code for this register allocation approach is listed below. We loop through all
226 | previously determined register liveness intervals and allocate an X86 register as long as there are
227 | free registers are available. If there is no free register available, the last used register is
228 | spilled to memory to obtain a free register.
229 | 
230 | ```rs
231 | for (reg, interval) in live_intervals { // in order of increasing starting point
232 |     // Start by expiring old intervals by removing all no longer in use registers from the active
233 |     // mapping and adding it to the free registers instead.
234 |     expire_old_intervals();
235 | 
236 |     if free_regs.is_empty() {
237 |     // Need to spill register to memory if there are no more free registers available
238 |         // Spill the register with the farthest use
239 |         spill_reg = active.pop();
240 | 
241 |         // Use the now free'd register for the current register
242 |         mapping.insert(reg, spill_reg);
243 | 
244 |         // Insert new range to active range
245 |         active.insert(spill_reg, inter);
246 |     } else {
247 |     // Free register available, so just add it to the mapping
248 |         preg = free_regs.pop();
249 |         active.insert(preg, inter);
250 |         mapping.insert(reg, preg);
251 |     }
252 |     return mapping;
253 | }
254 | ```
255 | 
256 | #### Future Work
257 | As mentioned previously I would like to re-explore the optimizing compiler approach in the future. I believe it has a lot more potential than the more naive implementation, but it is not an immediate priority because there are more important improvements that I want to tackle first.
258 | 


--------------------------------------------------------------------------------
/tools/program_generator/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![feature(variant_count)]
  2 | #![feature(once_cell)]
  3 | 
  4 | pub mod rng;
  5 | pub mod compile;
  6 | 
  7 | use rng::Rng;
  8 | 
  9 | use std::fmt;
 10 | use std::lazy::SyncLazy;
 11 | 
 12 | /// This program takes an input file via argv[1], this variable specifies the amount of bytes that
 13 | /// are read in and available for use from the input, larger values should make finding the bugs a
 14 | /// little harder
 15 | const INPUT_SIZE: usize = 500;
 16 | 
 17 | /// Maximum depth that scopes can go too before early returning. Without this blocks would
 18 | /// recursively create new blocks until a stack overflow occurs. Recommended: 8-12 for approximately 
 19 | /// 2,000 - 200,000 lines of code. For larger complexity scores, the INPUT_SIZE should also be
 20 | /// increased to reduce duplication
 21 | const COMPLEXITY: usize = 9;
 22 | 
 23 | /// Minimum depth of functions, prevents too shallow functions that just immediately crash on base
 24 | /// case
 25 | const MIN_DEPTH: usize = 1;
 26 | 
 27 | /// Minimum and maximum sizes for buffer allocations in the program.
 28 | const MIN_ALLOC_SIZE: usize = 0x20;
 29 | const MAX_ALLOC_SIZE: usize = 0x100;
 30 | 
 31 | /// Maximum length for strings that can be used in comparisons. This needs to be smaller than
 32 | /// `INPUT_SIZE`
 33 | const MAX_STRING_LEN: usize = 0x20;
 34 | 
 35 | /// Index into the provided user input
 36 | #[derive(Debug, Clone, Copy)]
 37 | pub struct Index(usize);
 38 | 
 39 | impl fmt::Display for Index {
 40 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 41 |         write!(f, "{}", self.0)
 42 |     }
 43 | }
 44 | 
 45 | /// Create an rng object on program startup
 46 | pub static RNG: SyncLazy<Rng> = SyncLazy::new(|| {
 47 |     Rng::new()
 48 | });
 49 | 
 50 | /// Supported values
 51 | #[derive(Debug, Clone)]
 52 | pub enum Value {
 53 |     Number(usize),
 54 |     StringLiteral(String),
 55 |     Arr(Vec<Value>),
 56 | }
 57 | 
 58 | impl fmt::Display for Value {
 59 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 60 |         match self {
 61 |             Value::Number(v) => write!(f, "{}", v),
 62 |             Value::StringLiteral(v) => write!(f, "\"{}\"", v),
 63 |             _ => unreachable!(),
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | /// Supported types
 69 | #[derive(Debug, Clone, Copy, Eq, PartialEq)]
 70 | pub enum Type {
 71 |     Void,
 72 |     Number,
 73 |     Str,
 74 |     Argv,
 75 |     Buffer,
 76 | }
 77 | 
 78 | impl fmt::Display for Type {
 79 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 80 |         match self {
 81 |             Type::Void => write!(f, "void"),
 82 |             Type::Number => write!(f, "int"),
 83 |             Type::Str => write!(f, "unsigned char *"),
 84 |             Type::Buffer => write!(f, "unsigned char*"),
 85 |             _ => unreachable!(),
 86 |         }
 87 |     }
 88 | }
 89 | 
 90 | /// Expressions that are used in if-statements
 91 | #[derive(Debug, Clone)]
 92 | enum Expr {
 93 |     /// Index into input-array and 8-bit value
 94 |     ByteCmp(Index, u8), 
 95 | 
 96 |     /// Index into input-array and variable containing an 8-bit Value
 97 |     VarByteCmp(Index, String), 
 98 | 
 99 |     /// Index into input-array and 16-bit value
100 |     WordCmp(Index, u16), 
101 | 
102 |     /// Index into input-array and variable containing a 16-bit Value
103 |     VarWordCmp(Index, String), 
104 | 
105 |     /// Index into input-array and 32-bit value
106 |     DWordCmp(Index, u32), 
107 | 
108 |     /// Index into input-array and variable containing a 32-bit Value
109 |     VarDWordCmp(Index, String), 
110 | 
111 |     /// Index into input-array and 64-bit value to be used for comparison operation
112 |     QWordCmp(Index, u64), 
113 | 
114 |     /// Index into input-array and variable containing a 64-bit Value
115 |     VarQWordCmp(Index, String), 
116 | 
117 |     /// Index into input-array and a ByteString used for comparison operation
118 |     StrCmp(Index, Value), 
119 | 
120 |     /// Index into input-array and a ByteString used for comparison operation with a variable
121 |     VarStrCmp(Index, String), 
122 | }
123 | 
124 | impl Expr {
125 |     /// Return a random Expression
126 |     fn get_rand_expr(vars: &Vec<(String, Type)>) -> Self {
127 |         let num_entries = std::mem::variant_count::<Expr>();
128 |         let rstr = std::str::from_utf8(&RNG.next_string(16, 0x61, 0x7b)).unwrap().to_string();
129 |         let rnum = RNG.gen();
130 | 
131 |         let num_vars = vars.iter().filter(|e| e.1 == Type::Number)
132 |             .map(|e| e.0.clone()).collect::<Vec<String>>();
133 | 
134 |         let str_vars = vars.iter().filter(|e| e.1 == Type::Str)
135 |             .map(|e| e.0.clone()).collect::<Vec<String>>();
136 | 
137 |         loop {
138 |             match RNG.next_num(num_entries) {
139 |                 0 => {
140 |                     return Expr::ByteCmp(Index(RNG.next_num(INPUT_SIZE)), rnum as u8);
141 |                 },
142 |                 1 => {
143 |                     if num_vars.is_empty() { continue; }
144 |                     return Expr::VarByteCmp(Index(RNG.next_num(INPUT_SIZE)), 
145 |                                             num_vars[RNG.next_num(num_vars.len())].clone());
146 |                 },
147 |                 2 => {
148 |                     return Expr::WordCmp(Index(RNG.next_num(INPUT_SIZE)), rnum as u16);
149 |                 },
150 |                 3 => {
151 |                     if num_vars.is_empty() { continue; }
152 |                     return Expr::VarWordCmp(Index(RNG.next_num(INPUT_SIZE)), 
153 |                                             num_vars[RNG.next_num(num_vars.len())].clone());
154 |                 },
155 |                 4 => {
156 |                     return Expr::DWordCmp(Index(RNG.next_num(INPUT_SIZE)), rnum as u32);
157 |                 },
158 |                 5 => {
159 |                     if num_vars.is_empty() { continue; }
160 |                     return Expr::VarDWordCmp(Index(RNG.next_num(INPUT_SIZE)), 
161 |                                             num_vars[RNG.next_num(num_vars.len())].clone());
162 |                 },
163 |                 6 => {
164 |                     return Expr::QWordCmp(Index(RNG.next_num(INPUT_SIZE)), rnum as u64);
165 |                 },
166 |                 7 => {
167 |                     if num_vars.is_empty() { continue; }
168 |                     return Expr::VarQWordCmp(Index(RNG.next_num(INPUT_SIZE)), 
169 |                                             num_vars[RNG.next_num(num_vars.len())].clone());
170 |                 },
171 |                 8 => {
172 |                     return Expr::StrCmp(Index(RNG.next_num(INPUT_SIZE-MAX_STRING_LEN)), 
173 |                                   Value::StringLiteral(rstr));
174 |                 }
175 |                 9 => {
176 |                     if str_vars.is_empty() { continue; }
177 |                     return Expr::VarStrCmp(Index(RNG.next_num(INPUT_SIZE-MAX_STRING_LEN)), 
178 |                                     str_vars[RNG.next_num(str_vars.len())].clone());
179 |                 }
180 |                 _ => unreachable!(),
181 |             };
182 |         }
183 |     }
184 | }
185 | 
186 | impl fmt::Display for Expr {
187 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
188 |         let signs = ["=="];
189 |         let s = signs[RNG.next_num(signs.len())];
190 |         match self {
191 |             Expr::VarByteCmp(a, b) |
192 |             Expr::VarWordCmp(a, b) |
193 |             Expr::VarDWordCmp(a, b) |
194 |             Expr::VarQWordCmp(a, b) |
195 |             Expr::VarStrCmp(a, b) => {
196 |                 write!(f, "buf[{}] == {}", a, b)
197 |             },
198 |             Expr::ByteCmp(a, b) => write!(f, "buf[{}] {} {}", a, s, b),
199 |             Expr::WordCmp(a, b) => write!(f, "*(unsigned short*)(buf + {}) {} {}", a, s, b),
200 |             Expr::QWordCmp(a, b) => write!(f, "*(unsigned int*)(buf + {}) {} {}U", a, s, b),
201 |             Expr::DWordCmp(a, b) => write!(f, "*(unsigned long*)(buf + {}) {} {}ULL", a, s, b),
202 |             Expr::StrCmp(a, b) => write!(f, "!strcmp(&buf[{}], {})", a, b),
203 |         }
204 |     }
205 | }
206 | 
207 | const NUM_SIMPLE_OPS: usize = 2;
208 | const NUM_COMPLEX_OPS: usize = 1;
209 | 
210 | /// Operations that can occur in the code
211 | #[derive(Debug, Clone)]
212 | enum Operation {
213 |     // Simple Operations
214 |     // These are operations that occur at the start of a block, and solely exist to setup some
215 |     // random local variables that can then later be used my some more complex operations
216 |     
217 |     /// Add input[.0] to .1 and assign it to a variable
218 |     AddInts(Type, String, Index, usize),
219 | 
220 |     /// Subtract input[.0] from .1 and assign it to a variable
221 |     SubInts(Type, String, Index, usize),
222 | 
223 |     // Complex Operations
224 |     // These are operations that occur at the start of a block, and solely exist to setup some
225 |     // random local variables that can then later be used my some more complex operations
226 | 
227 |     /// If expression alongside a true-block
228 |     If(Expr, Block),
229 | 
230 |     /// Used to call generated functions (name, type, args)
231 |     CallFunc(String, Type, Vec<Type>),
232 | 
233 |     /// Insert a crash
234 |     Crash,
235 | 
236 |     // All operations below this point should not be returned by the `get_rand_op()` function, and
237 |     // are solely used for special cases such as program initialization or inserting crashes
238 |     
239 |     /// Used in `main` to allocate the input buffer based on argv
240 |     AllocInputBuf,
241 | 
242 |     /// Used to check that argv was properly provided in main
243 |     ArgvCheck,
244 | 
245 |     /// Used to open the file provided by argv in main
246 |     OpenFile,
247 | 
248 |     /// Used to read in the fuzz-input from the provided file
249 |     ReadFile,
250 | }
251 | 
252 | impl fmt::Display for Operation {
253 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
254 |         match self {
255 |             Operation::AddInts(a, b, c, d) => write!(f, "{} {} = buf[{}] + {}", a, b, c, d),
256 |             Operation::SubInts(a, b, c, d) => write!(f, "{} {} = buf[{}] - {}", a, b, c, d),
257 |             Operation::If(a, _) => write!(f, "if ({}) ", a),
258 |             Operation::AllocInputBuf => write!(f, "unsigned char *buf = malloc({})", INPUT_SIZE),
259 |             Operation::ArgvCheck => write!(f, "if (argc != 2) return"),
260 |             Operation::OpenFile => write!(f, "FILE *fd = fopen(argv[1], \"r\")"),
261 |             Operation::ReadFile => write!(f, "fgets(buf, {}, fd)", INPUT_SIZE),
262 |             Operation::Crash => write!(f, "*(unsigned long*)0x{:x} = 0", RNG.gen()),
263 |             Operation::CallFunc(a, _, _) => write!(f, "{}(buf)", a),
264 |         }
265 |     }
266 | }
267 | 
268 | impl Operation {
269 |     /// Return a random simple operation
270 |     fn get_simple_op() -> Self {
271 |         let var_name = std::str::from_utf8(&RNG.next_string(16, 0x61, 0x7b)).unwrap().to_string();
272 | 
273 |         match RNG.next_num(NUM_SIMPLE_OPS)  {
274 |             0 => Operation::AddInts(Type::Number, var_name,
275 |                     Index(RNG.next_num(INPUT_SIZE)), RNG.gen_range(MIN_ALLOC_SIZE, MAX_ALLOC_SIZE)),
276 |             1 => Operation::SubInts(Type::Number, var_name,
277 |                     Index(RNG.next_num(INPUT_SIZE)), RNG.gen_range(MIN_ALLOC_SIZE, MAX_ALLOC_SIZE)),
278 |             _ => unreachable!(),
279 |         }
280 |     }
281 | 
282 |     /// Return a random more complex operation
283 |     fn get_complex_op(program: &mut Program, vars: &Vec<(String, Type)>, complexity: usize, 
284 |                       depth: usize) -> Self {
285 |         let _var_name = std::str::from_utf8(&RNG.next_string(16, 0x41, 0x7b)).unwrap().to_string();
286 | 
287 |         loop {
288 |             match RNG.next_num(NUM_COMPLEX_OPS)  {
289 |                 0 => { 
290 |                     return Operation::If(Expr::get_rand_expr(vars), 
291 |                                    Block::init_new_block(program, complexity - 1, depth + 1));
292 |                 },
293 |                 _ => unreachable!(),
294 |             }
295 |         }
296 |     }
297 | }
298 | 
299 | /// Scoped block with allocated variables and a list of statements to be executed
300 | #[derive(Debug, Default, Clone)]
301 | pub struct Block {
302 |     /// Statements contained in a block
303 |     stmt_list: Vec<Operation>,
304 | 
305 |     /// (Name, Type)
306 |     variables: Vec<(String, Type)>,
307 | }
308 | 
309 | impl Block {
310 |     /// Create a new block initialized with random operations
311 |     pub fn init_new_block(program: &mut Program, complexity: usize, depth: usize) -> Self {
312 |         let mut block = Block::default();
313 | 
314 |         // If the minimum depth has been reached, there's a chance that the block will be terminated
315 |         // on a crash or by calling a different function
316 |         if depth >= MIN_DEPTH {
317 |             let num = RNG.gen_range(0, complexity);
318 |             if num < 5 {
319 |                 if num < 2 {
320 |                     // Insert crash
321 |                     block.stmt_list.push(Operation::Crash);
322 |                 } else {
323 |                     // Insert function call
324 |                     let func = program.function_list.get(RNG.next_num(program.function_list.len()));
325 |                     
326 |                     // Insert a function-call unless 'main' was retrieved, or no functions exist
327 |                     // yet, in which case just insert a crash
328 |                     if let Some(f) = func {
329 |                         if f.name == "main" {
330 |                             block.stmt_list.push(Operation::Crash);
331 |                         } else {
332 |                             block.stmt_list.push(
333 |                                 Operation::CallFunc(
334 |                                     f.name.clone(),
335 |                                     Type::Void,
336 |                                     Vec::new(),
337 |                             ));
338 |                         }
339 |                     } else {
340 |                         block.stmt_list.push(Operation::Crash);
341 |                     }
342 |                 }
343 | 
344 |                 return block;
345 |             }
346 |         }
347 | 
348 |         // Start by inserting some simple operations to setup some variables that can later be used
349 |         for _ in 0..RNG.gen_range(2, 5) {
350 |             let op = Operation::get_simple_op();
351 | 
352 |             // If this operation produces a value, add it to this blocks variables
353 |             match &op {
354 |                 Operation::AddInts(typ, name, ..) |
355 |                 Operation::SubInts(typ, name, ..) => {
356 |                     block.variables.push((name.clone(), *typ));
357 |                 },
358 |                 _ => {},
359 |             }
360 |             block.stmt_list.push(op);
361 |         }
362 | 
363 |         // Next insert some more complex operations
364 |         for _ in 0..RNG.gen_range(5, 10) {
365 |             let op = Operation::get_complex_op(program, &block.variables, complexity, depth);
366 |             block.stmt_list.push(op);
367 |         }
368 |         block
369 |     }
370 | 
371 |     /// Create the main block. This just handles initial setup and calls the functions that should
372 |     /// be fuzzed
373 |     pub fn init_main_block(functions: &[Function]) -> Self {
374 |         let mut block = Block::default();
375 | 
376 |         // Allocate a global buffer to hold argv and write fuzz-input it
377 |         block.stmt_list.push(Operation::ArgvCheck);
378 |         block.stmt_list.push(Operation::OpenFile);
379 |         block.stmt_list.push(Operation::AllocInputBuf);
380 |         block.stmt_list.push(Operation::ReadFile);
381 | 
382 |         // Create a call to all functions
383 |         for func in functions {
384 |             block.stmt_list.push(Operation::CallFunc(
385 |                         func.name.clone(),
386 |                         func.typ,
387 |                         func.arguments.iter().map(|e| e.0).collect(),
388 |                     ));
389 |         }
390 |         block
391 |     }
392 | }
393 | 
394 | /// Intermediate representation of functions
395 | #[derive(Debug, Clone)]
396 | pub struct Function {
397 |     name: String,
398 |     typ: Type,
399 |     arguments: Vec<(Type, String)>,
400 |     body: Block,
401 | }
402 | 
403 | /// The actual program being modelled
404 | #[derive(Debug, Default, Clone)]
405 | pub struct Program {
406 |     /// List of generated functions
407 |     function_list: Vec<Function>,
408 | }
409 | 
410 | impl Program {
411 |     pub fn default() -> Self {
412 |         Self {
413 |             function_list: Vec::new(),
414 |         }
415 |     }
416 | 
417 |     /// Start creation of the program
418 |     pub fn create_program() -> Program {
419 |         let mut program = Program::default();
420 | 
421 |         // Create random generated functions that can be called from main
422 |         for i in 0..COMPLEXITY {
423 |             let func_name = format!("func_{}", i+1);
424 |             let func = Function {
425 |                     name: func_name.to_string(),
426 |                     typ:  Type::Void,
427 |                     arguments: vec![(Type::Buffer, "buf".to_string())],
428 |                     body: Block::init_new_block(&mut program.clone(), COMPLEXITY, 0)
429 |                 };
430 |             program.function_list.push(func);
431 |         }
432 | 
433 |         // Create main function
434 |         program.create_main();
435 | 
436 |         program
437 |     }
438 | 
439 |     /// Create main function. It has a special case since it requires additional initialization 
440 |     /// routines
441 |     fn create_main(&mut self) {
442 |         self.function_list.push(
443 |             Function {
444 |                 name: "main".to_string(),
445 |                 typ:  Type::Void,
446 |                 arguments: vec![(Type::Number, "argc".to_string()), (Type::Argv, "argv".to_string())],
447 |                 body: Block::init_main_block(&self.function_list), 
448 |             });
449 |     }
450 | }
451 | 
452 | 


--------------------------------------------------------------------------------
/fuzzing.md:
--------------------------------------------------------------------------------
  1 | #### This document is not specific to this fuzzer, but rather just a list of resources I found helpful while developing this fuzzer alongside some simple notes on some important fuzzing related topics from these papers.
  2 | 
  3 | ### Fuzzing Reading List
  4 |     1.  Basics          https://www.fuzzingbook.org/
  5 |     2.  AFL++           https://www.usenix.org/system/files/woot20-paper-fioraldi.pdf
  6 |     3.  Afl-dev         https://lcamtuf.blogspot.com/
  7 |     4.  Afl-dev         https://lcamtuf.coredump.cx/afl/technical_details.txt
  8 |     5.  Afl-study       https://www.s3.eurecom.fr/docs/fuzzing22_fioraldi_report.pdf
  9 |     6.  Cov-sensitivity https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf
 10 |     7.  Valued coverage https://www.ndss-symposium.org/wp-content/uploads/2020/02/24422-paper.pdf
 11 |     8.  CFG-Seed sched  https://arxiv.org/pdf/2203.12064.pdf
 12 |     9.  Seed selection  https://dl.acm.org/doi/pdf/10.1145/3460319.3464795
 13 |     10. Directed fuzz   https://dl.acm.org/doi/pdf/10.1145/3133956.3134020
 14 |     11. Havoc           https://shadowmydx.github.io/papers/icse22-main-1314.pdf
 15 |     12. Feedback-muts   https://link.springer.com/article/10.1007/s10664-020-09927-3
 16 |     13. Snapshots/state https://arxiv.org/pdf/2202.03643.pdf
 17 |     14. Snapshots/state https://github.com/fgsect/FitM/blob/main/fitm.pdf
 18 |     15. Benchmarking    https://github.com/google/fuzzbench/issues/654
 19 |     16. Benchmarking    https://hexgolems.com/2020/08/on-measuring-and-visualizing-fuzzer-performance/
 20 |     17. Crash-triaging  https://www.usenix.org/system/files/sec20-blazytko.pdf
 21 |     18. Hash-collisions https://chao.100871.net/papers/oakland18.pdf
 22 |     19. Bigmap-covmap   https://alifahmed.github.io/res/BigMap_DSN.pdf
 23 |     20. Redqueen        https://synthesis.to/papers/NDSS19-Redqueen.pdf
 24 |     21. Nautilus        https://wcventure.github.io/FuzzingPaper/Paper/NDSS19_Nautilus.pdf
 25 |     22. Nyx             https://www.usenix.org/system/files/sec21summer_schumilo.pdf
 26 |     23. AFLFast         https://mboehme.github.io/paper/CCS16.pdf
 27 |     24. Baseband-emu    https://arxiv.org/pdf/2005.07797.pdf
 28 |     25. Cerberos        https://dl.acm.org/doi/pdf/10.1145/3338906.3338975
 29 |     26. Fuzzilli        https://saelo.github.io/papers/thesis.pdf
 30 |     27. Symbolic/Taint  https://edmcman.github.io/papers/oakland10.pdf
 31 |     28. OS-modif perf   https://gts3.org/assets/papers/2017/xu:os-fuzz.pdf
 32 | 
 33 | ### Corpus Management
 34 | 
 35 | ##### Corpus Minimization
 36 |     > Some fuzzers such as afl trim their corpus' to discard long inputs that take the same path as 
 37 |         shorter inputs.
 38 | 
 39 |     > Pros
 40 |         - Cut down duplicate entries to not waste time on cases that don't provide more information
 41 |         - Smaller inputs are executed faster leading to higher performance
 42 |     > Cons
 43 |         - You potentially discard corpus entries that contained a valuable input
 44 |         - Reducing the size of inputs can greatly reduce the "state" the input has going into a
 45 |             specific block, thus leading to less bugs even if the same edges are covered
 46 | 
 47 |     > Specific techniques
 48 |         - Minset: compute weight by execution-time / file-size
 49 |         - Afl-cmin: Uses coverage information using tracked edge frequency counts
 50 |         - OptMin: Generates potentially optimal solutions unlike previous 2 approximations
 51 | 
 52 | ##### Seed Selection
 53 |     > High quality initial seeds are very important because the originals can carry a lot of
 54 |         semantics that the fuzzer now no longer has to randomly generate or know. Any part that
 55 |         isn't covered by the corpus requires additional work on the side of the fuzzer to get there.
 56 |         This has a significant impact on expanding code coverage since a larger corpus already 
 57 |         covers many more cases as its base
 58 | 
 59 | ##### Seed Collection
 60 |     > Web crawler to collect input files
 61 |     > Seed-collections:
 62 |         - https://www.nist.gov/itl/ssd/software-quality-group/national-software-reference-library-nsrl
 63 |         - https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/
 64 |         - https://lcamtuf.coredump.cx/afl/demo/
 65 | 
 66 | ### Coverage Tracking 
 67 | 
 68 | ##### Basic Block Coverage
 69 |     > Track coverage whenever a new basic block is hit
 70 | 
 71 | ##### Edge Coverage 
 72 |     (<id of cur code location>, <id of previous code location>)
 73 |     > Generate tuples of the above form for each piece of code. If a new tuple is encountered, add
 74 |         the mutated input as a new corpus entry (A -> B, simplest hash would be A ^ B)
 75 |     > Generally better than block coverage since it provides more insight into program execution
 76 |         - Can trivially distinguish between the following 2 paths
 77 |           A -> B -> C -> D -> E (tuples: AB, BC, CD, DE)
 78 |           A -> B -> D -> C -> E (tuples: AB, BD, DC, CE)
 79 | 
 80 |     Example hash functions:
 81 |         - hash = (prev_block << 1) ^ cur_block
 82 |         - AFL Implementation:
 83 |             cur_location = <COMPILE_TIME_RANDOM>;
 84 |             shared_mem[cur_location ^ prev_location]++; 
 85 |             prev_location = cur_location >> 1;
 86 | 
 87 | ##### N-gram Edge Coverage
 88 |     > Track latest n edges taken. Tracking only the current edge offers little information about the
 89 |         actually taken path, while tracking an infinite amount of edges could result in path
 90 |         explosion. Common values for n are 2, 4 or 8
 91 | 
 92 | ##### Path Coverage
 93 |     > Number of logical paths in the program that were taken during execution
 94 |     > Measures progress by computing a hash over the branches exercised by the input
 95 |     > Can be used to estimate how much more coverage will be gained/time with further fuzzing
 96 |     > Can potentially lead to path explosion if eg. a large loop is found
 97 | 
 98 | ##### Collision Free Coverage
 99 |     > Generally accomplished by assigning a unique value to each edge during instrumentation, so a
100 |         coverage bitmap can be efficiently accessed using this hardcoded value instead of computing
101 |         a hash that risks collisions
102 | 
103 | ##### BigMap
104 |     > Common strategy to lower hash collisions is to increase the table size, this however results
105 |         in lower cache locality and can greatly reduce perf.
106 |     > BigMap adds an additional level of indirection so randomly scattered coverage metrics are
107 |         instead stored in a sequential bitmap to maintain the currently active region in caches
108 | 
109 | ##### Data Coverage
110 |     > Distinguish test cases from a data accessing perspective
111 | 
112 | ##### Collection Methods
113 |     > Code instrumentation to report coverage information
114 |     > Intel PIN - jit compiles program as soon as it is loaded into memory while adding additional
115 |         instructions to track coverage
116 |     > Randomly request current location of fuzzer at certain time intervals to track which code is
117 |         executed
118 |     > Intel PT - Hardware branch tracer
119 | 
120 | ### Seed Scheduling
121 | ##### Metrics:
122 |     > Vulnerable paths 
123 |         - Weight of each branch is based upon vulnerable functions (eg. memcpy) it can reach and the
124 |             amount of loads/stores given different paths
125 |     > Number of edges reachable from a given seed
126 |     > Mutation history can be used to determine when one should stop focusing on "hard" edges
127 |     > Graph centrality analysis - approximate number of reachable edges from given seed and give
128 |         weight depending on how "central" a seed is.
129 | 
130 | ##### Coverage Guided / Power Schedules
131 |     > Assign different weights to inputs in the corpus to "smartly" distribute fuzzing time
132 |         - Execution time
133 |         - Shorter
134 |         - More frequent coverage increases
135 | 
136 | ### Mutational Strategies
137 | ##### General Approach
138 |     > Feedback loop approach
139 |         - Measure what type of mutations result in new coverage and use them more frequently
140 |     > Start with sequential deterministic mutations before moving on to randomness
141 |     > Target specific mutations will generally outperform generic mutation strategies. This can be
142 |         enhanced by developing a state-concious fuzzer
143 |     > Havoc: apply multiple randomly selected mutators simultaneously on some inputs
144 | 
145 | ##### Individual Strategies
146 |     > Walking bit flips - sequential ordered bitflips
147 |         Pros:
148 |             - Pretty good at finding low hanging fruit because it goes through entire input and gets
149 |                 a good bit of initial coverage
150 |         Cons:
151 |             - Expensive to keep up since each test requires 8 execve() calls per byte of the input
152 |                 file. Has diminishing returns, so only used for a short initial period.
153 |     > Walking byte flips - sequential ordered byte-flips
154 |         Pros/Cons: Much less expensive than bit flips, but also not very effective in the long run
155 |     > Simple arithmetics - inc/dec integers in the input according to be/le and different sizes
156 |         Pros:
157 |             - Good to spot a lot of bugs pertaining to integer over/underflows or incorrect size
158 |                 checks
159 |         Cons:
160 |             - Relatively high costs (~20 execve calls per byte)
161 |     > Known Integers - hardcoded set of integers that commonly trigger bugs (-1, 0, MAX_INT, etc)
162 |         Pros/Cons: Very expensive, but can quickly find some common bugs before being disabled while
163 |                      going through the small hardcoded list of known values
164 |     > Stacked Tweaks - non deterministic random mutations
165 |         - bit flips
166 |         - random incs/decs for 8/16/32/64 byte values
167 |         - random single byte sets
168 |         - block deletion
169 |         - block duplication
170 |         Pros:
171 |             Extremely simple to implement
172 |             Surprisingly very effective at generating new coverage
173 |     > Changing size of input
174 |     > Dictionary: Maintain a dictionary (either statically defined or dynamically created during
175 |         runtime) of interesting strings, that can be added to the input at random positions.
176 |     > Splicing: Combine two different inputs at random positions
177 | 
178 | ### Triaging Crashes
179 | ##### Crash Exploration
180 |     > Used to more easily understand what exactly caused a crash
181 |     > Entirely separate mode that takes a crashing input and looks for more inputs that cause the
182 |         same crash by mutating this input. This process uses very similar methods as the main
183 |         fuzzer. Eventually it will have generated a small corpus of inputs related to the bug that
184 |         can be triaged together to better understand the bug
185 |     > Once a lot of crashing inputs are gathered, statistical analysis can be performed on the
186 |         crashing inputs to find common cases, and automatically extract a lot of possible crash
187 |         reasons.
188 | 
189 | ##### Deduping Crashes
190 |     > Group "similar" crashes together to avoid looking at hundreds of similar crashes
191 |         - With edge based coverage this can be done whenever a new tuple is found that hasnt been
192 |             used to achieve this crash before, or if a tuple is missing
193 | 
194 | ##### Debugging
195 |     > The simplest, but also most manual labor intensive approach is to just load the crashing input
196 |         into a debugger and to manually attempt to figure out the root cause.
197 |     > This can be improved upon with modern timeless debuggers that provide reverse execution 
198 |         functionality. This can be used to traverse the program backwards starting at the start
199 |         location, which can often make bug triaging a lot more comfortable.
200 | 
201 | ### Performance
202 | ##### Persistent Mode / Snapshot Fuzzing
203 |     > Fuzz in a short loop around the specific target functions by saving the state right before the
204 |         execution of the target, and then basing future fuzz cases off of this specific starting
205 |         state instead of fully reseting the program on each run.
206 |     > Can additionally implement mechanisms similar to copy-on-write/dirty bit memory resets to 
207 |         avoid having to reset large amounts of memory. This allows for much faster fuzzing.
208 | 
209 | ##### In-memory Fuzzing
210 |     > Many fuzz-targets read input from disk before starting to operate on the
211 |         data. This leads to poor scaling due to the heavy io usage. Instead a fuzzer can just load
212 |         the corpus into memory and directly pass it into the binary to avoid the disk performance
213 |         overhead.
214 | 
215 | ##### Scaling
216 |     > When used in the real world, fuzzers are generally ran on at least 50-100 cores. This means
217 |         that not only does the fuzzer need good single-core performance, but it also has to scale
218 |         well with a large number of cores.
219 |     > If coverage information and corpus are meant to be shared between cores, they need to be 
220 |         implemented in ways that can be shared between the threads without incurring high costs.
221 |         This means that certain techniques that track massive amounts of additional information to 
222 |         make improved decisions suddenly become unviable when attempting to scale because all of the
223 |         information needs to be passed between cores.
224 |     > Another common pitfall of scaling attempts is the kernel. If the main fuzzing loop contains 
225 |         frequent syscalls, the kernel starts taking up a good chunk of the time that should be spent 
226 |         fuzzing. This becomes increasingly relevant when running the fuzzer on a high number of
227 |         threads, which can easily result in >40% of total execution time being wasted in random
228 |         kernel locks.
229 | 
230 | ### Symbolic Execution in Fuzzing
231 | 
232 |     > Heavy symbolic analysis is still too slow for fuzzers, however using approximations one can
233 |     gain many of the benefits without the massive performance hit
234 | 
235 | ##### CMP-Deconstruction
236 |     > Extract values used in cmp instructions affecting control-flow and add these to a known
237 |         dictionary that fuzz cases can use to achieve new control flow
238 |     > Mostly useful when dealing with a lot of magic values that need to be bypassed to achieve 
239 |         more coverage
240 |     > Can be done via input-to-state correspondence between inputs and current program state. Start
241 |         by finding all cmp instructions in an initial run and hooking all of them to retrieve the
242 |         arguments. Using the compare operand, values that are likely to pass the check can be
243 |         calculated (eg. zero/sign-extend, equal, slightly larger/less than, etc). The input is
244 |         colorized to create lightweight approximation to taint tracking that can be used to track
245 |         which part of the input finds itself in the cmp instruction.
246 |     > Another approach is to transform multi-byte comparisons into multiple single byte comparisons,
247 |         thus being able to leverage coverage guided fuzzing to bypass the check
248 | 
249 | ##### Checksums
250 |     > Checksums can be very challenging for fuzzers to handle since unlike magic-byte checks, they
251 |         can change from one run to the other based on the provided input and greatly halt fuzzer
252 |         progress.
253 |     > One possible method is to statically identify checksum checks and patch them to a check that
254 |         always returns true.
255 | 
256 | ##### Concolic Execution 
257 |     > Track all conditionals during execution, and collect conditional constraints. These can be 
258 |         used to then produce inputs that take the non-traversed path. Still has a very real 
259 |         performance impact, but it does not suffer from state explosion and can thus be implemented 
260 |         in a scaling manner.
261 | 
262 | ##### Taint-based Fuzzing
263 |     > Tracks input-flow throughout a target to learn which parts of the input have an effect on
264 |         certain operations. Can be used to eg. find magic bytes or integer overflow vulnerabilities,
265 |         but has mostly been replaced in fuzzers by techniques that accomplish similar goals without
266 |         the massive performance overhead that proper taint-tracking results in.
267 | 
268 | ### Benchmarking Fuzzers
269 |     > When profiling new algorithms in fuzzers, algorithmic performance (eg. coverage/cases) is much
270 |         more relevant than timed performance (eg. coverage/time) due to the high variances that can
271 |         occur using random fuzz-inputs. Time-performance is the most important aspect for finished
272 |         fuzzers, but while benchmarking fuzzers in development it is unreasonable since it would
273 |         require prototypes to be highly optimized to compete. This assumes that the developer can
274 |         make reasonable assumptions about the performance implications of the algorithm once
275 |         optimized.
276 |     > Minor variables at the start of the fuzzer run can have massive impact on the rest. Eg. high
277 |         corruption can lead to initially high coverage with strongly diminishing returns once the high
278 |         corruption hits required bytes for further progress.
279 |     > When properly evaluating fuzzers, debugging/introspection ability is extremely important
280 |         rather than just running benchmarks/reviewing coverage graphs
281 |     > Log Scale vs Linear Scale
282 |         - Linear scale describes where a fuzzer flatlines, but doesn't produce much data otherwise
283 |         - Much more coverage at the beginning of fuzzer-runs than at the end so a linear scale 
284 |             results in a vertical increase at t=0 and an almost horizontal line for the rest of the 
285 |             run which provides almost no information.
286 |         - Log scales can make for easier interpretation of specific spikes during fuzzer runs
287 |     > When benchmarking, don't focus on short fuzzer runs, but rather let the fuzzer run for eg. 24
288 |         hours since some changes will have short term benefits but longterm drawbacks
289 |     > Scaling is extremely important for real world fuzzer metrics. If a fuzzer performs better on
290 |         single core metrics but then completely falls off when scaled to 50-100 cores it becomes
291 |         unusable for proper fuzzing campaigns. Another point would be that it doesn't just scale
292 |         across cores but also across multiple servers, eventhough this is potentially harder to
293 |         test. A lot of proposed high introspection fuzzing techniques suddenly fall apart when faced
294 |         with scaling because all of this data needs to be shared between cores.
295 | 
296 | ##### Metrics
297 |     > # of bugs is basically worthless because it relates more to the amount of hours spent using
298 |         the fuzzer on bad targets instead of the actual fuzzer performance
299 |     > Evaluating based on known bugs is useful if you are already familiar with the bugs and can
300 |         thus determine if your fuzzer works as expected.
301 |     > Coverage is probably the most popular metric to measure fuzzers. The proficiency of a fuzzer
302 |         is often directly correlated with the amount of coverage it achieves. It might be misleading
303 |         in certain cases such as grammar based fuzzers that only test a certain subset of an
304 |         application.
305 |     > Sampling based measurement to count how often individual blocks are hit by input. This
306 |         provides information about how often blocks are reached, which is more valuable than
307 |         single-hit coverage tracking.
308 |     > State-aware coverage tracking: Measure which target states of a specific stateful target the
309 |         fuzzer manages to hit.
310 | 
311 | ### Grammar-based Fuzzing
312 |     > Many applications that require highly structured inputs (eg. compilers) make fuzzing using
313 |         mutational fuzzer implementations difficult. Grammar fuzzers in comparison generate input
314 |         from scratch instead of modifying existing input. When fuzzing a javascript interpreter for
315 |         example, a grammar based fuzzer would generate random but valid javascript code and use this
316 |         as fuzz input. This greatly reduces the number of fuzz cases that would otherwise be
317 |         immediately thrown out due to syntax errors with mutational engines.
318 | 
319 | ### Misc
320 | 
321 | ##### Crash Amplification
322 |     > The goal of fuzzing is usually to find potentially exploitable bugs on a target.
323 |         Unforunately fuzzers are generally only capable of finding these bugs if they actually cause
324 |         a crash. The goal of crash amplification is to more easily crash the program if a bug
325 |         occurs.
326 | 
327 |     > Compile-time instrumentation
328 |         - ASAN: Address sanitization can be used to add runtime checks to the binary that track out
329 |             of bounds accesses or heap bugs. Approximately 2x performance hit, but generally worth
330 |             the extra cost.
331 | 
332 |     > Emulation
333 |         - Byte level permission checks to catch off-by-one errors similar to asan
334 |         - Hooking various functions such as malloc/free to instead replace them with safe
335 |             alternatives that crash on any misuse
336 | 
337 | 


--------------------------------------------------------------------------------
/src/irgraph.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     emulator::Register as PReg,
  3 |     irgraph::Val::{Reg, Imm, Imm64},
  4 | };
  5 | 
  6 | use std::fmt::{self, Formatter, UpperHex};
  7 | use num_traits::Signed;
  8 | use rustc_hash::FxHashMap;
  9 | 
 10 | /// Small helper type that is used to print out hex value eg. -0x20 instead of 0xffffffe0
 11 | struct ReallySigned<T: PartialOrd + Signed + UpperHex>(T);
 12 | impl<T: PartialOrd + Signed + UpperHex> UpperHex for ReallySigned<T> {
 13 |     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
 14 |         let prefix = if f.alternate() { "0x" } else { "" };
 15 |         let bare_hex = format!("{:X}", self.0.abs());
 16 |         f.pad_integral(self.0 >= T::zero(), prefix, &bare_hex)
 17 |     }
 18 | }
 19 | 
 20 | /// Value used to specify both inputs and outputs for intermediate representation
 21 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 22 | pub enum Val {
 23 |     Reg(PReg),
 24 |     Imm(i32),
 25 |     Imm64(i64),
 26 | }
 27 | 
 28 | impl fmt::Display for Val {
 29 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 30 |         match self {
 31 |             Reg(v) => {
 32 |                 write!(f, "{:?}", v)
 33 |             },
 34 |             Imm(v) => {
 35 |                 write!(f, "{}", v)
 36 |             },
 37 |             Imm64(v) => {
 38 |                 write!(f, "{}", v)
 39 |             },
 40 |         }
 41 |     }
 42 | }
 43 | 
 44 | #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 45 | pub enum Operation {
 46 |     Undefined,
 47 |     Jmp(usize),
 48 |     JmpOff(i32),
 49 |     Branch(usize, usize),
 50 |     Syscall,
 51 |     Store,
 52 |     Load,
 53 |     Mov,
 54 |     Add,
 55 |     Sub,
 56 |     Mul,
 57 |     Div,
 58 |     And,
 59 |     Or,
 60 |     Xor,
 61 |     Shl,
 62 |     Shr,
 63 |     Sar,
 64 |     Slt,
 65 |     Nop,
 66 | }
 67 | 
 68 | impl Default for Operation {
 69 |     fn default() -> Self { Operation::Undefined }
 70 | }
 71 | 
 72 | /// These are used to give instructions extra information such as signed/unsigned or the type of
 73 | /// comparison for branch instructions.
 74 | #[derive(Debug, Clone, Copy)]
 75 | pub struct Flag;
 76 | #[allow(non_upper_case_globals)]
 77 | impl Flag {
 78 |     pub const NoFlag:   u16 = 0x0;
 79 |     pub const Signed:   u16 = 0x1;
 80 |     pub const Unsigned: u16 = 0x2;
 81 |     pub const Equal:    u16 = 0x4;
 82 |     pub const NEqual:   u16 = 0x8;
 83 |     pub const Less:     u16 = 0x10;
 84 |     pub const Greater:  u16 = 0x20;
 85 |     pub const Byte:     u16 = 0x40;
 86 |     pub const Word:     u16 = 0x80;
 87 |     pub const DWord:    u16 = 0x100;
 88 |     pub const QWord:    u16 = 0x200;
 89 | }
 90 | 
 91 | /// The instructions used in the IR. Layed out in a way that is efficient memory wise and lets us
 92 | /// easily determine if the instruction has input/output fields.
 93 | #[derive(Debug, Clone, Default)]
 94 | pub struct Instruction {
 95 |     pub op:    Operation,
 96 |     pub i_reg: Vec<Val>,
 97 |     pub o_reg: Option<PReg>,
 98 |     pub flags: u16,
 99 |     pub pc:    Option<usize>,
100 | }
101 | 
102 | impl Instruction {
103 |     pub fn is_jump(&self) -> bool {
104 |         matches!(self.op, Operation::Jmp(_) | Operation::Branch(..))
105 |     }
106 | }
107 | 
108 | /// Pretty printing for the instructions
109 | impl fmt::Display for Instruction {
110 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
111 |         match self.op {
112 |             Operation::Jmp(x) => {
113 |                 write!(f, "{:#08X}  Jmp {:#0x?}", self.pc.unwrap_or(0), x)
114 |             },
115 |             Operation::JmpOff(x) => {
116 |                 write!(f, "{:#08X}  Jmp ({:?} + {:#X})", self.pc.unwrap_or(0), self.i_reg[0], 
117 |                        ReallySigned(x as i32))
118 |             },
119 |             Operation::Branch(x, y) => {
120 |                 match self.flags & 0b111100 {
121 |                     0b000100 => {
122 |                         write!(f, "{:#08X}  if {} == {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0),
123 |                                self.i_reg[0], self.i_reg[1], y, x)
124 |                     },
125 |                     0b001000 => {
126 |                         write!(f, "{:#08X}  if {} != {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0),
127 |                                self.i_reg[0], self.i_reg[1], y, x)
128 |                     },
129 |                     0b010000 => {
130 |                         write!(f, "{:#08X}  if {} < {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0),
131 |                                self.i_reg[0], self.i_reg[1], y, x)
132 |                     },
133 |                     0b100000 => {
134 |                         write!(f, "{:#08X}  if {} > {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0),
135 |                                self.i_reg[0], self.i_reg[1], y, x)
136 |                     },
137 |                     0b100100 => {
138 |                         write!(f, "{:#08X}  if {} >= {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0),
139 |                                self.i_reg[0], self.i_reg[1], y, x)
140 |                     },
141 |                     0b010100 => {
142 |                         write!(f, "{:#08X}  if {} <= {} ({:#0X?}, {:#0X?})", self.pc.unwrap_or(0),
143 |                                self.i_reg[0], self.i_reg[1], y, x)
144 |                     },
145 |                     _ => { panic!("branch with flag: {}", self.flags & 0b111100); },
146 |                 }
147 |             },
148 |             Operation::Syscall => {
149 |                 write!(f, "{:#08X}  Syscall", self.pc.unwrap_or(0))
150 |             },
151 |             Operation::Store => {
152 |                 write!(f, "{:#08X}  [{}+{}] = {}", self.pc.unwrap_or(0), self.i_reg[0], 
153 |                        self.i_reg[2], self.i_reg[1])
154 |             },
155 |             Operation::Load => {
156 |                 write!(f, "{:#08X}  {:?} = [{}+{}]", self.pc.unwrap_or(0), self.o_reg.unwrap(),
157 |                     self.i_reg[0], self.i_reg[1])
158 |             },
159 |             Operation::Add => {
160 |                 write!(f, "{:#08X}  {:?} = {} + {}", self.pc.unwrap_or(0), self.o_reg.unwrap(),
161 |                        self.i_reg[0], self.i_reg[1])
162 |             },
163 |             Operation::Sub => {
164 |                 write!(f, "{:#08X}  {:?} = {} - {}", self.pc.unwrap_or(0), self.o_reg.unwrap(),
165 |                        self.i_reg[0], self.i_reg[1])
166 |             },
167 |             Operation::And => {
168 |                 write!(f, "{:#08X}  {:?} = {} & {}", self.pc.unwrap_or(0), self.o_reg.unwrap(),
169 |                        self.i_reg[0], self.i_reg[1])
170 |             },
171 |             Operation::Or => {
172 |                 write!(f, "{:#08X}  {:?} = {} | {}", self.pc.unwrap_or(0), self.o_reg.unwrap(),
173 |                        self.i_reg[0], self.i_reg[1])
174 |             },
175 |             Operation::Xor => {
176 |                 write!(f, "{:#08X}  {:?} = {} ^ {}", self.pc.unwrap_or(0), self.o_reg.unwrap(),
177 |                        self.i_reg[0], self.i_reg[1])
178 |             },
179 |             Operation::Shl => {
180 |                 write!(f, "{:#08X}  {:?} = {} << {}", self.pc.unwrap_or(0), self.o_reg.unwrap(),
181 |                        self.i_reg[0], self.i_reg[1])
182 |             },
183 |             Operation::Shr => {
184 |                 write!(f, "{:#08X}  {:?} = {} >> {}", self.pc.unwrap_or(0), self.o_reg.unwrap(),
185 |                        self.i_reg[0], self.i_reg[1])
186 |             },
187 |             Operation::Sar => {
188 |                 write!(f, "{:#08X}  {:?} = {} >> {} [A]", self.pc.unwrap_or(0), self.o_reg.unwrap(),
189 |                        self.i_reg[0], self.i_reg[1])
190 |             },
191 |             Operation::Slt => {
192 |                 write!(f, "{:#08X}  {:?} = {} < {} ? 1 : 0", self.pc.unwrap_or(0),
193 |                        self.o_reg.unwrap(), self.i_reg[0], self.i_reg[1])
194 |             },
195 |             Operation::Mov => {
196 |                 write!(f, "{:#08X}  {:?} = {}", self.pc.unwrap_or(0), 
197 |                        self.o_reg.unwrap(), self.i_reg[0])
198 |             },
199 |             _ => { unreachable!() },
200 |         }
201 |     }
202 | }
203 | 
204 | /// Basic wrapper around instructions that keeps track of cur_pc.
205 | #[derive(Debug)]
206 | pub struct IRGraph {
207 |     /// List of all instructions
208 |     pub instrs: Vec<Instruction>,
209 | 
210 |     /// Labels indicating controlflow (instrs_index, pc)
211 |     pub labels: FxHashMap<usize, usize>,
212 | 
213 |     /// Since multiple IR instructions can be mapped to a single original instruction, this is used
214 |     /// to only assign the pc to the first IR-instruction is generated for an original instruction.
215 |     cur_pc: Option<usize>,
216 | }
217 | 
218 | impl Default for IRGraph {
219 |     fn default() -> Self {
220 |         Self::new()
221 |     }
222 | }
223 | 
224 | impl IRGraph {
225 |     pub fn new() -> Self {
226 |         IRGraph {
227 |             instrs: Vec::new(),
228 |             labels: FxHashMap::default(),
229 |             cur_pc: None,
230 |         }
231 |     }
232 | 
233 |     /// Initialize the cur_pc variable which is used to set the pc value in the IR instructions
234 |     pub fn init_instr(&mut self, pc: usize) {
235 |         self.cur_pc = Some(pc);
236 |     }
237 | 
238 |     /// Insert a label into the irgraph using the current pc
239 |     pub fn set_label(&mut self, pc: usize) {
240 |         self.labels.insert(pc, self.instrs.len());
241 |     }
242 | 
243 |     /// r1 = imm
244 |     pub fn movi32(&mut self, r1: PReg, imm: i32, flag: u16) -> PReg {
245 |         self.instrs.push( Instruction {
246 |             op: Operation::Mov,
247 |             i_reg: vec![Imm(imm)],
248 |             o_reg: Some(r1),
249 |             flags: flag,
250 |             pc: self.cur_pc,
251 |         });
252 |         self.cur_pc = None;
253 |         r1
254 |     }
255 | 
256 |     /// r1 = imm
257 |     pub fn movi64(&mut self, r1: PReg, imm: i64, flag: u16) -> PReg {
258 |         self.instrs.push( Instruction {
259 |             op: Operation::Mov,
260 |             i_reg: vec![Imm64(imm)],
261 |             o_reg: Some(r1),
262 |             flags: flag,
263 |             pc: self.cur_pc,
264 |         });
265 |         self.cur_pc = None;
266 |         r1
267 |     }
268 | 
269 |     /// r1 = r2
270 |     pub fn mov(&mut self, r1: PReg, r2: PReg, flag: u16) -> PReg {
271 |         self.instrs.push( Instruction {
272 |             op: Operation::Mov,
273 |             i_reg: vec![Reg(r2)],
274 |             o_reg: Some(r1),
275 |             flags: flag,
276 |             pc: self.cur_pc,
277 |         });
278 |         self.cur_pc = None;
279 |         r1
280 |     }
281 | 
282 |     /// Jmp addr
283 |     pub fn jmp(&mut self, addr: usize) {
284 |         self.instrs.push( Instruction {
285 |             op: Operation::Jmp(addr),
286 |             i_reg: Vec::new(),
287 |             o_reg: None,
288 |             flags: Flag::NoFlag,
289 |             pc: self.cur_pc,
290 |         });
291 |         self.cur_pc = None;
292 |     }
293 | 
294 |     /// Jmp (r1 + addr)
295 |     pub fn jmp_offset(&mut self, r1: PReg, addr: i32) {
296 |         self.instrs.push( Instruction {
297 |             op: Operation::JmpOff(addr),
298 |             i_reg: vec![Reg(r1)],
299 |             o_reg: None,
300 |             flags: Flag::NoFlag,
301 |             pc: self.cur_pc,
302 |         });
303 |         self.cur_pc = None;
304 |     }
305 | 
306 |     /// Branch to either false_part or true_part, flags determine what kind of compare instruction
307 |     /// is supposed to be inserted
308 |     pub fn branch(&mut self, r2: PReg, r3: PReg, true_part: usize, false_part: usize, flags: u16) {
309 |         self.instrs.push( Instruction {
310 |             op: Operation::Branch(true_part, false_part),
311 |             i_reg: vec![Reg(r2), Reg(r3)],
312 |             o_reg: None,
313 |             flags,
314 |             pc: self.cur_pc,
315 |         });
316 |         self.cur_pc = None;
317 |     }
318 | 
319 |     /// r1 = [r2 + off]
320 |     pub fn load(&mut self, r1: PReg, r2: PReg, off: i32, flags: u16) -> PReg {
321 |         self.instrs.push( Instruction {
322 |             op: Operation::Load,
323 |             i_reg: vec![Reg(r2), Imm(off)],
324 |             o_reg: Some(r1),
325 |             flags,
326 |             pc: self.cur_pc,
327 |         });
328 |         self.cur_pc = None;
329 |         r1
330 |     }
331 | 
332 |     /// [r1 + off] = r2
333 |     pub fn store(&mut self, r1: PReg, r2: PReg, off: i32, flags: u16) {
334 |         self.instrs.push( Instruction {
335 |             op: Operation::Store,
336 |             i_reg: vec![Reg(r1), Reg(r2), Imm(off)],
337 |             o_reg: None,
338 |             flags,
339 |             pc: self.cur_pc,
340 |         });
341 |         self.cur_pc = None;
342 |     }
343 | 
344 |     /// Set res_reg if rs1_reg is less than imm_reg
345 |     pub fn slt(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg {
346 |         self.instrs.push( Instruction {
347 |             op: Operation::Slt,
348 |             i_reg: vec![Reg(r2), Reg(r3)],
349 |             o_reg: Some(r1),
350 |             flags,
351 |             pc: self.cur_pc,
352 |         });
353 |         self.cur_pc = None;
354 |         r1
355 |     }
356 | 
357 |     /// Set res_reg if rs1_reg is less than the immediate
358 |     pub fn slti(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg {
359 |         self.instrs.push( Instruction {
360 |             op: Operation::Slt,
361 |             i_reg: vec![Reg(r2), Imm(imm)],
362 |             o_reg: Some(r1),
363 |             flags,
364 |             pc: self.cur_pc,
365 |         });
366 |         self.cur_pc = None;
367 |         r1
368 |     }
369 | 
370 |     /// r1 = r2 + r3
371 |     pub fn add(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg {
372 |         self.instrs.push( Instruction {
373 |             op: Operation::Add,
374 |             i_reg: vec![Reg(r2), Reg(r3)],
375 |             o_reg: Some(r1),
376 |             flags,
377 |             pc: self.cur_pc,
378 |         });
379 |         self.cur_pc = None;
380 |         r1
381 |     }
382 | 
383 |     /// r1 = r2 * r3
384 |     pub fn mul(&mut self, r1: PReg, r2: PReg, r3: PReg, flag: u16) -> PReg {
385 |         self.instrs.push( Instruction {
386 |             op: Operation::Mul,
387 |             i_reg: vec![Reg(r2), Reg(r3)],
388 |             o_reg: Some(r1),
389 |             flags: flag,
390 |             pc: self.cur_pc,
391 |         });
392 |         self.cur_pc = None;
393 |         r1
394 |     }
395 | 
396 |     /// r1 = r2 / r3
397 |     pub fn div(&mut self, r1: PReg, r2: PReg, r3: PReg, flag: u16) -> PReg {
398 |         self.instrs.push( Instruction {
399 |             op: Operation::Div,
400 |             i_reg: vec![Reg(r2), Reg(r3)],
401 |             o_reg: Some(r1),
402 |             flags: flag,
403 |             pc: self.cur_pc,
404 |         });
405 |         self.cur_pc = None;
406 |         r1
407 |     }
408 | 
409 |     /// r1 = r2 + imm
410 |     pub fn addi(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg {
411 |         self.instrs.push( Instruction {
412 |             op: Operation::Add,
413 |             i_reg: vec![Reg(r2), Imm(imm)],
414 |             o_reg: Some(r1),
415 |             flags,
416 |             pc: self.cur_pc,
417 |         });
418 |         self.cur_pc = None;
419 |         r1
420 |     }
421 | 
422 |     /// r1 = r2 - r3
423 |     pub fn sub(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg {
424 |         self.instrs.push( Instruction {
425 |             op: Operation::Sub,
426 |             i_reg: vec![Reg(r2), Reg(r3)],
427 |             o_reg: Some(r1),
428 |             flags,
429 |             pc: self.cur_pc,
430 |         });
431 |         self.cur_pc = None;
432 |         r1
433 |     }
434 | 
435 |     /// r1 = r2 - imm
436 |     pub fn subi(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg {
437 |         self.instrs.push( Instruction {
438 |             op: Operation::Sub,
439 |             i_reg: vec![Reg(r2), Imm(imm)],
440 |             o_reg: Some(r1),
441 |             flags,
442 |             pc: self.cur_pc,
443 |         });
444 |         self.cur_pc = None;
445 |         r1
446 |     }
447 | 
448 |     /// r1 = r2 ^ r3
449 |     pub fn xor(&mut self, r1: PReg, r2: PReg, r3: PReg) -> PReg {
450 |         self.instrs.push( Instruction {
451 |             op: Operation::Xor,
452 |             i_reg: vec![Reg(r2), Reg(r3)],
453 |             o_reg: Some(r1),
454 |             flags: Flag::NoFlag,
455 |             pc: self.cur_pc,
456 |         });
457 |         self.cur_pc = None;
458 |         r1
459 |     }
460 | 
461 |     /// r1 = r2 ^ imm
462 |     pub fn xori(&mut self, r1: PReg, r2: PReg, imm: i32) -> PReg {
463 |         self.instrs.push( Instruction {
464 |             op: Operation::Xor,
465 |             i_reg: vec![Reg(r2), Imm(imm)],
466 |             o_reg: Some(r1),
467 |             flags: Flag::NoFlag,
468 |             pc: self.cur_pc,
469 |         });
470 |         self.cur_pc = None;
471 |         r1
472 |     }
473 | 
474 |     /// r1 = r2 | r3
475 |     pub fn or(&mut self, r1: PReg, r2: PReg, r3: PReg) -> PReg {
476 |         self.instrs.push( Instruction {
477 |             op: Operation::Or,
478 |             i_reg: vec![Reg(r2), Reg(r3)],
479 |             o_reg: Some(r1),
480 |             flags: Flag::NoFlag,
481 |             pc: self.cur_pc,
482 |         });
483 |         self.cur_pc = None;
484 |         r1
485 |     }
486 | 
487 |     /// r1 = r2 | imm
488 |     pub fn ori(&mut self, r1: PReg, r2: PReg, imm: i32) -> PReg {
489 |         self.instrs.push( Instruction {
490 |             op: Operation::Or,
491 |             i_reg: vec![Reg(r2), Imm(imm)],
492 |             o_reg: Some(r1),
493 |             flags: Flag::NoFlag,
494 |             pc: self.cur_pc,
495 |         });
496 |         self.cur_pc = None;
497 |         r1
498 |     }
499 | 
500 |     /// r1 = r2 & r3
501 |     pub fn and(&mut self, r1: PReg, r2: PReg, r3: PReg) -> PReg {
502 |         self.instrs.push( Instruction {
503 |             op: Operation::And,
504 |             i_reg: vec![Reg(r2), Reg(r3)],
505 |             o_reg: Some(r1),
506 |             flags: Flag::NoFlag,
507 |             pc: self.cur_pc,
508 |         });
509 |         self.cur_pc = None;
510 |         r1
511 |     }
512 | 
513 |     /// r1 = r2 & imm
514 |     pub fn andi(&mut self, r1: PReg, r2: PReg, imm: i32) -> PReg {
515 |         self.instrs.push( Instruction {
516 |             op: Operation::And,
517 |             i_reg: vec![Reg(r2), Imm(imm)],
518 |             o_reg: Some(r1),
519 |             flags: Flag::NoFlag,
520 |             pc: self.cur_pc,
521 |         });
522 |         self.cur_pc = None;
523 |         r1
524 |     }
525 | 
526 |     /// r1 = r2 << r3
527 |     pub fn shl(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg {
528 |         self.instrs.push( Instruction {
529 |             op: Operation::Shl,
530 |             i_reg: vec![Reg(r2), Reg(r3)],
531 |             o_reg: Some(r1),
532 |             flags,
533 |             pc: self.cur_pc,
534 |         });
535 |         self.cur_pc = None;
536 |         r1
537 |     }
538 | 
539 |     /// r1 = r2 << imm
540 |     pub fn shli(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg {
541 |         self.instrs.push( Instruction {
542 |             op: Operation::Shl,
543 |             i_reg: vec![Reg(r2), Imm(imm)],
544 |             o_reg: Some(r1),
545 |             flags,
546 |             pc: self.cur_pc,
547 |         });
548 |         self.cur_pc = None;
549 |         r1
550 |     }
551 | 
552 |     /// r1 = r2 >> r3 (Logical)
553 |     pub fn shr(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg {
554 |         self.instrs.push( Instruction {
555 |             op: Operation::Shr,
556 |             i_reg: vec![Reg(r2), Reg(r3)],
557 |             o_reg: Some(r1),
558 |             flags,
559 |             pc: self.cur_pc,
560 |         });
561 |         self.cur_pc = None;
562 |         r1
563 |     }
564 | 
565 |     /// r1 = r2 >> imm (Logical)
566 |     pub fn shri(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg {
567 |         self.instrs.push( Instruction {
568 |             op: Operation::Shr,
569 |             i_reg: vec![Reg(r2), Imm(imm)],
570 |             o_reg: Some(r1),
571 |             flags,
572 |             pc: self.cur_pc,
573 |         });
574 |         self.cur_pc = None;
575 |         r1
576 |     }
577 | 
578 |     /// r1 = r2 >> r3 (Arithmetic)
579 |     pub fn sar(&mut self, r1: PReg, r2: PReg, r3: PReg, flags: u16) -> PReg {
580 |         self.instrs.push( Instruction {
581 |             op: Operation::Sar,
582 |             i_reg: vec![Reg(r2), Reg(r3)],
583 |             o_reg: Some(r1),
584 |             flags,
585 |             pc: self.cur_pc,
586 |         });
587 |         self.cur_pc = None;
588 |         r1
589 |     }
590 | 
591 |     /// r1 = r2 >> imm (Arithmetic)
592 |     pub fn sari(&mut self, r1: PReg, r2: PReg, imm: i32, flags: u16) -> PReg {
593 |         self.instrs.push( Instruction {
594 |             op: Operation::Sar,
595 |             i_reg: vec![Reg(r2), Imm(imm)],
596 |             o_reg: Some(r1),
597 |             flags,
598 |             pc: self.cur_pc,
599 |         });
600 |         self.cur_pc = None;
601 |         r1
602 |     }
603 | 
604 |     /// Syscall instruction
605 |     pub fn syscall(&mut self) {
606 |          self.instrs.push( Instruction {
607 |             op: Operation::Syscall,
608 |             i_reg: Vec::new(),
609 |             o_reg: None,
610 |             flags: Flag::NoFlag,
611 |             pc: self.cur_pc,
612 |         });
613 |         self.cur_pc = None;
614 |     }
615 | 
616 |     /// Return a hashmap that tracks the starting pc of each cfg block of this function
617 |     pub fn get_leaders(&self) -> FxHashMap<usize, usize> {
618 |         let mut leader_set: FxHashMap<usize, usize> = FxHashMap::default();
619 | 
620 |         // First instruction is always a block-leader
621 |         leader_set.insert(self.instrs[0].pc.unwrap(), 0);
622 | 
623 |         // Next insert all labels that indicate the start of a block
624 |         for i in 0..self.instrs.len() {
625 |             if let Some(pc) = self.instrs[i].pc {
626 |                 if self.labels.get(&pc).is_some() {
627 |                     leader_set.insert(pc, 0);
628 |                 }
629 |             }
630 |         }
631 |         leader_set
632 |     }
633 | }
634 | 


--------------------------------------------------------------------------------