├── .gitignore ├── COPYING ├── Cargo.lock ├── Cargo.toml ├── README.md ├── TODO ├── logo.png ├── rustfmt.toml ├── src ├── backends │ ├── C │ │ ├── codegen.rs │ │ ├── formatter.rs │ │ ├── grammar.rs │ │ └── mod.rs │ ├── interpreter │ │ └── mod.rs │ ├── json │ │ ├── generator.rs │ │ └── mod.rs │ └── mod.rs ├── bin │ ├── compile.rs │ ├── dump.rs │ ├── fuzz.rs │ ├── gen.rs │ └── merge.rs ├── components │ ├── ffi.rs │ ├── generator.rs │ ├── input.rs │ ├── mod.rs │ └── mutator.rs ├── error.rs ├── grammar │ ├── builder.rs │ ├── cfg.rs │ └── mod.rs ├── lib.rs └── parser │ ├── gramatron.rs │ ├── mod.rs │ └── peacock.rs ├── template.c └── test-data ├── C ├── .gitignore ├── bench_generation.c ├── fuzz_mutate.c ├── fuzz_unparse.c ├── test_generation.c ├── test_mutate.c └── test_unparse.c ├── benchmarks ├── .gitignore ├── Makefile ├── README.md ├── baseline.c ├── gramatron-patch ├── grammar.json ├── grammar.postcard ├── nop.c ├── patch-libafl ├── source_automata.json └── throughput.c ├── fuzz ├── .gitignore └── main.c ├── grammars ├── duplicate_rules.json ├── gramatron.json ├── invalid-refs.json ├── mixed_rules.json ├── recursion.json ├── test-peacock.json ├── unit_rules.json └── unused_rules.json ├── libfuzzer ├── .gitignore ├── Makefile └── harness.c └── static_loading ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── build.rs ├── php.json └── src └── main.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "peacock-fuzz" 3 | description = "Library to parse context-free grammars and create grammar-based fuzzing tools" 4 | version = "0.2.4" 5 | edition = "2021" 6 | license = "GPL-3.0-only" 7 | readme = "README.md" 8 | repository = "https://github.com/z2-2z/peacock" 9 | keywords = ["fuzzing", "grammars"] 10 | include = [ 11 | "/src", 12 | "/COPYING", 13 | "/README.md", 14 | ] 15 | 16 | [profile.release] 17 | lto = true 18 | codegen-units = 1 19 | debug = true 20 | panic = "abort" 21 | 22 | [dependencies] 23 | serde_json = { version = "1.0", features = ["default", "preserve_order"] } 24 | json_comments = "0.2" 25 | thiserror = "1.0" 26 | ahash = { version = "0.8", default-features = false, features = ["std", "compile-time-rng"] } 27 | petgraph = "0.6" 28 | itertools = "0.12" 29 | libloading = "0.8" 30 | clap = { version = "4.4", features = ["derive"] } 31 | serde = "1.0" 32 | libafl = "0.13" 33 | postcard = "1.0" 34 | libafl_bolts = "0.13" 35 | nix = "0.29" 36 | 37 | [[bin]] 38 | name = "peacock-dump" 39 | path = "src/bin/dump.rs" 40 | 41 | [[bin]] 42 | name = "peacock-fuzz" 43 | path = "src/bin/fuzz.rs" 44 | 45 | [[bin]] 46 | name = "peacock-compile" 47 | path = "src/bin/compile.rs" 48 | 49 | [[bin]] 50 | name = "peacock-merge" 51 | path = "src/bin/merge.rs" 52 | 53 | [[bin]] 54 | name = "peacock-gen" 55 | path = "src/bin/gen.rs" 56 | 57 | [features] 58 | default = ["components"] 59 | 60 | # Enables debugging of generated code by the C backend by inserting printf() statements 61 | # at the beginning of each function call. 62 | debug-codegen = [] 63 | 64 | docs-rs = ["libafl/document-features"] 65 | 66 | # For the LibAFL components: Activate this when a generator is statically compiled into the fuzzer 67 | static-loading = ["components"] 68 | 69 | # Include LibAFL components in library 70 | components = [] 71 | 72 | [package.metadata.docs.rs] 73 | features = ["docs-rs"] 74 | rustc-args = ["--cfg", "docsrs"] 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | ~~~ fuzzing with grammar-based mutations ~~~ 4 |
5 | 6 |
7 | 8 | This project is a reimplementation of [Gramatron](https://github.com/HexHive/Gramatron) that is 9 | 10 | - __performant__: 4x higher throughput than LibAFL's Gramatron implementation 11 | - __versatile__: usable with LibAFL, libfuzzer, in a custom AFL++ mutator or standalone 12 | - __easy to use__: no more orchestration of different scripts to get the fuzzing campaign running, everything is batteries-included 13 | - __extendable__: at its core, peacock is a library that you can use at your leisure to customize every step of the grammar fuzzing process 14 | - __backwards compatible__: it works with grammars that you have already written for other tools 15 | 16 | ## What's inside 17 | 1. A __standalone fuzzer__ similar to afl-fuzz that employs grammar-based mutations 18 | 2. __LibAFL components__ to build your own grammar-based fuzzer 19 | 3. Grammar mutation procedure generated as __C code__ that you can employ in other contexts 20 | 21 | ## How to use it 22 | Clone the repo and execute 23 | ``` 24 | cargo build --release 25 | ``` 26 | This creates 5 ready-to-use tools: 27 | 28 | 1. `peacock-fuzz`: A coverage-guided fuzzer that can fuzz any binary compiled with AFL++'s compilers or anything that speaks AFL's forkserver protocol 29 | 2. `peacock-dump`: peacock-fuzz saves crashes and queue items in a raw, binary format to disk. Use this tool to get a human readable output from any such file. All these binary files have the prefix `peacock-raw-` 30 | 3. `peacock-compile`: Takes a grammar and compiles it to C code 31 | 4. `peacock-merge`: Merge multiple grammar files into one or convert a grammar file from one format into another 32 | 5. `peacock-gen`: Generate individual inputs from a grammar 33 | 34 | If you want more fine-grained control you can use the crate `peacock_fuzz`, which is the backbone of all the tools from above. 35 | See the documentation at [docs.rs](https://docs.rs/peacock-fuzz) in order to get started with peacock as a library. 36 | 37 | ## How it works 38 | Peacock is a fuzzer that implements so-called "grammar-based mutations". This means that it will mutate its inputs in such a way that they will always adhere to a given [grammar](https://en.wikipedia.org/wiki/Formal_grammar). 39 | 40 | The way mutations work is the same as in Gramatron. A grammar is converted to a [PDA](https://en.wikipedia.org/wiki/Pushdown_automaton) such that an input can be represented as a walk through the automaton. Then, a mutation of an input is simply a modification of an automaton walk. We cut off the walk at a random point and let it find a new random path through the automaton from there. 41 | 42 | While Gramatron and LibAFL realize the automaton as an adjacency matrix, 43 | peacock generates C code that encodes the automaton in its control flow. This saves us a lot of memory accesses and makes the mutation procedure faster. 44 | 45 | The generated C code exposes a certain API that can be used by any application, e.g. a libfuzzer harness, an AFL++ custom mutator or even Rust code. 46 | 47 | ## How to write grammars 48 | Peacock accepts its context-free grammars in JSON format. 49 | A context-free grammar has production rules of the form: 50 | ``` 51 | A -> X Y Z ... 52 | ``` 53 | where `A` _must_ be a non-terminal and `X`,`Y`,`Z` can be non-terminals or terminals. The right-hand-side must contain at least one symbol. 54 | 55 | Non-terminals are enclosed in `<>`, so the non-terminal `A` would be represented as ``. Terminals are enclosed in `''`. 56 | 57 | The set of rules 58 | ``` 59 | A -> a B 60 | A -> a 61 | B -> b B 62 | B -> Ɛ 63 | ``` 64 | would be written as 65 | ```jsonc 66 | { 67 | // Comments are also possible :) 68 | "": [ 69 | ["'a'", ""], 70 | ["'a'"] 71 | ], 72 | "": [ 73 | ["'b'", ""], 74 | ["''"] // Ɛ = '' 75 | ] 76 | } 77 | ``` 78 | and corresponds to the regular expression `a(b*)`. 79 | 80 | Peacock also supports the Gramatron format, which is a bit different and does not allow for comments. 81 | 82 | The non-terminal `` is the entrypoint of the grammar. 83 | 84 | ## C API Documentation 85 | - `void seed_generator (size_t new_seed)` 86 | Supply a seed for the RNG of the mutator. 87 | - `size_t unparse_sequence (size_t* seq_buf, size_t seq_capacity, unsigned char* input, size_t input_len)` 88 | Given an input that adheres to the grammar, find the corresponding automaton walk. _This function may be slow, use outside of hot loop._ 89 | - `seq_buf`: Automaton walk will be written into this buffer 90 | - `seq_capacity`: Maximum number of elements that `seq_buf` can hold (not number of bytes) 91 | - `input`: User input adhering to grammar 92 | - `input_len`: Length of `input` 93 | 94 | Returns the number of elements written to `seq_buf` or 0 if input does not adhere to grammar. 95 | - `size_t mutate_sequence (size_t* buf, size_t len, size_t capacity)` 96 | Given an automaton walk, create a random mutant of the walk. 97 | - `buf`: Pointer to array that holds automaton walk 98 | - `len`: Number of items in `buf` (not number of bytes) 99 | - `capacity`: Maximum number of items that `buf` can hold (not number of bytes) 100 | 101 | Returns the length of the new walk. 102 | - `size_t serialize_sequence (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len)` 103 | Given an automaton walk, create the corresponding output. 104 | - `seq`: Pointer to automaton walk 105 | - `seq_len`: Number of items in `seq` (not number of bytes) 106 | - `out`: Output will be written into that buffer 107 | - `out_len`: Number of bytes in `out` 108 | 109 | Returns how many bytes have been written to `out`. 110 | 111 | 112 | Macros: 113 | - `MAKE_THREAD_SAFE`: Define this to make the mutator completely thread-safe 114 | - `MAKE_VISIBLE`: Define this to explicitly set the visibility of the functions from above to "default" 115 | - `STATIC_SEED=`: Compile-time seed for the RNG 116 | - `DISABLE_rand`: Don't include the internal `rand` function and use an external one with the signature `size_t rand (void)` 117 | - `DISABLE_seed_generator`: Don't include the function `seed_generator` 118 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - comment functions in CGenerator: headline, annotation 2 | - peacock-gen take input on stdin if stdin is pipe, parse it and print mutation 3 | - reproducible benchmarks 4 | - rename peacock-raw filename to peacock-seq 5 | - better handling of static-loading in components/ffi.rs 6 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z2-2z/peacock/f2d053cf0e198be03220b6d2e8be4e1fd26be86a/logo.png -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | use_small_heuristics = "Max" 3 | edition = "2021" 4 | match_block_trailing_comma = true 5 | use_field_init_shorthand = true 6 | struct_lit_width = 0 7 | imports_granularity = "Crate" 8 | imports_layout = "Vertical" 9 | -------------------------------------------------------------------------------- /src/backends/C/codegen.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | use std::{ 3 | fs::File, 4 | io::Write, 5 | path::Path, 6 | }; 7 | 8 | use crate::{ 9 | backends::C::{ 10 | formatter::CFormatter, 11 | grammar::{ 12 | LLSymbol, 13 | LowLevelGrammar, 14 | }, 15 | }, 16 | grammar::ContextFreeGrammar, 17 | }; 18 | 19 | fn rule_has_nonterminals(rule: &[LLSymbol]) -> bool { 20 | for symbol in rule { 21 | if matches!(symbol, LLSymbol::NonTerminal(_)) { 22 | return true; 23 | } 24 | } 25 | 26 | false 27 | } 28 | 29 | fn rules_have_nonterminals(rules: &[Vec]) -> bool { 30 | for rule in rules { 31 | if rule_has_nonterminals(rule) { 32 | return true; 33 | } 34 | } 35 | 36 | false 37 | } 38 | 39 | fn rule_has_terminals(rule: &[LLSymbol]) -> bool { 40 | for symbol in rule { 41 | if matches!(symbol, LLSymbol::Terminal(_)) { 42 | return true; 43 | } 44 | } 45 | 46 | false 47 | } 48 | 49 | fn rules_have_terminals(rules: &[Vec]) -> bool { 50 | for rule in rules { 51 | if rule_has_terminals(rule) { 52 | return true; 53 | } 54 | } 55 | 56 | false 57 | } 58 | 59 | fn emit_includes(fmt: &mut CFormatter) { 60 | #[cfg(feature = "debug-codegen")] 61 | fmt.write("#include "); 62 | 63 | fmt.write("#include "); 64 | fmt.blankline(); 65 | } 66 | 67 | fn emit_macros(fmt: &mut CFormatter) { 68 | fmt.write("/* Helper Macros */"); 69 | 70 | fmt.write("#undef THREAD_LOCAL"); 71 | fmt.write("#ifdef MAKE_THREAD_SAFE"); 72 | fmt.write("#define THREAD_LOCAL __thread"); 73 | fmt.write("#else"); 74 | fmt.write("#define THREAD_LOCAL"); 75 | fmt.write("#endif"); 76 | fmt.blankline(); 77 | 78 | fmt.write("#undef UNLIKELY"); 79 | fmt.write("#define UNLIKELY(x) __builtin_expect(!!(x), 0)"); 80 | fmt.write("#undef LIKELY"); 81 | fmt.write("#define LIKELY(x) __builtin_expect(!!(x), 1)"); 82 | fmt.blankline(); 83 | 84 | fmt.write("#ifndef __clang__"); 85 | fmt.write("#undef __builtin_memcpy_inline"); 86 | fmt.write("#define __builtin_memcpy_inline __builtin_memcpy"); 87 | fmt.write("#endif"); 88 | fmt.blankline(); 89 | 90 | fmt.write("#undef EXPORT_FUNCTION"); 91 | fmt.write("#ifdef MAKE_VISIBLE"); 92 | fmt.write("#define EXPORT_FUNCTION __attribute__((visibility (\"default\")))"); 93 | fmt.write("#else"); 94 | fmt.write("#define EXPORT_FUNCTION"); 95 | fmt.write("#endif"); 96 | fmt.blankline(); 97 | } 98 | 99 | fn emit_rand(fmt: &mut CFormatter) { 100 | fmt.write("/* RNG */"); 101 | 102 | fmt.write("#ifndef STATIC_SEED"); 103 | fmt.write(" #define STATIC_SEED 0x35c6be9ba2548264"); 104 | fmt.write("#endif"); 105 | fmt.blankline(); 106 | 107 | fmt.write("static THREAD_LOCAL size_t rand_state = STATIC_SEED;"); 108 | fmt.blankline(); 109 | 110 | fmt.write("#ifndef DISABLE_rand"); 111 | fmt.write("static inline size_t rand (void) {"); 112 | fmt.indent(); 113 | fmt.write("size_t x = rand_state;"); 114 | fmt.write("x ^= x << 13;"); 115 | fmt.write("x ^= x >> 7;"); 116 | fmt.write("x ^= x << 17;"); 117 | fmt.write("return rand_state = x;"); 118 | fmt.unindent(); 119 | fmt.write("}"); 120 | fmt.write("#else"); 121 | fmt.write("size_t rand (void);"); 122 | fmt.write("#endif"); 123 | fmt.blankline(); 124 | 125 | fmt.write("#ifndef DISABLE_seed_generator"); 126 | fmt.write("EXPORT_FUNCTION"); 127 | fmt.write("void seed_generator (size_t new_seed) {"); 128 | fmt.indent(); 129 | fmt.write("if (!new_seed) {"); 130 | fmt.indent(); 131 | fmt.write("new_seed = 0xDEADBEEF;"); 132 | fmt.unindent(); 133 | fmt.write("}"); 134 | fmt.blankline(); 135 | fmt.write("rand_state = new_seed;"); 136 | fmt.unindent(); 137 | fmt.write("}"); 138 | fmt.write("#else"); 139 | fmt.write("void seed_generator (size_t);"); 140 | fmt.write("#endif"); 141 | fmt.blankline(); 142 | } 143 | 144 | fn emit_types(fmt: &mut CFormatter) { 145 | fmt.write("// Used to represent a sequence of rules"); 146 | fmt.write("typedef struct {"); 147 | fmt.indent(); 148 | fmt.write("size_t* buf;"); 149 | fmt.write("size_t len;"); 150 | fmt.write("size_t capacity;"); 151 | fmt.unindent(); 152 | fmt.write("} Sequence;"); 153 | fmt.blankline(); 154 | } 155 | 156 | fn emit_mutation_declarations(grammar: &LowLevelGrammar, fmt: &mut CFormatter) { 157 | fmt.write("/* Forward declarations for sequence mutation functions */"); 158 | 159 | for nonterm in grammar.rules().keys() { 160 | fmt.write(format!( 161 | "static int mutate_seq_nonterm{} (size_t* const, size_t* const, const size_t, size_t* const);", 162 | *nonterm 163 | )); 164 | } 165 | 166 | fmt.blankline(); 167 | } 168 | 169 | fn emit_mutation_function_rule(rule: &[LLSymbol], fmt: &mut CFormatter) { 170 | for symbol in rule { 171 | if let LLSymbol::NonTerminal(dst) = symbol { 172 | fmt.write(format!("if (UNLIKELY(!mutate_seq_nonterm{}(buf, len, capacity, step))) {{", dst.id())); 173 | fmt.indent(); 174 | fmt.write("return 0;"); 175 | fmt.unindent(); 176 | fmt.write("}"); 177 | fmt.blankline(); 178 | } 179 | } 180 | } 181 | 182 | fn emit_mutation_function_single(rule: &[LLSymbol], fmt: &mut CFormatter) { 183 | fmt.write("size_t idx = *len;"); 184 | fmt.blankline(); 185 | fmt.write("if (*step >= idx) {"); 186 | fmt.indent(); 187 | fmt.write("if (UNLIKELY(idx >= capacity)) {"); 188 | fmt.indent(); 189 | fmt.write("return 0;"); 190 | fmt.unindent(); 191 | fmt.write("}"); 192 | fmt.blankline(); 193 | fmt.write("buf[idx] = 0;"); 194 | fmt.write("*len = idx + 1;"); 195 | fmt.unindent(); 196 | fmt.write("}"); 197 | fmt.blankline(); 198 | 199 | fmt.write("*step += 1;"); 200 | fmt.blankline(); 201 | 202 | emit_mutation_function_rule(rule, fmt); 203 | 204 | fmt.write("return 1;"); 205 | } 206 | 207 | fn emit_mutation_function_multiple(rules: &[Vec], fmt: &mut CFormatter) { 208 | let have_nonterminals = rules_have_nonterminals(rules); 209 | 210 | fmt.write("size_t idx = *len;"); 211 | fmt.write("size_t target;"); 212 | fmt.blankline(); 213 | 214 | if have_nonterminals { 215 | fmt.write("if (*step < idx) {"); 216 | fmt.indent(); 217 | fmt.write("target = buf[*step];"); 218 | fmt.unindent(); 219 | fmt.write("} else {"); 220 | } else { 221 | fmt.write("if (*step >= idx) {"); 222 | } 223 | 224 | fmt.indent(); 225 | fmt.write("if (UNLIKELY(idx >= capacity)) {"); 226 | fmt.indent(); 227 | fmt.write("return 0;"); 228 | fmt.unindent(); 229 | fmt.write("}"); 230 | fmt.blankline(); 231 | fmt.write(format!("target = rand() % {};", rules.len())); 232 | fmt.write("buf[idx] = target;"); 233 | fmt.write("*len = idx + 1;"); 234 | fmt.unindent(); 235 | fmt.write("}"); 236 | fmt.blankline(); 237 | 238 | fmt.write("*step += 1;"); 239 | fmt.blankline(); 240 | 241 | if have_nonterminals { 242 | fmt.write("switch (target) {"); 243 | fmt.indent(); 244 | 245 | for (i, rule) in rules.iter().enumerate() { 246 | fmt.write(format!("case {}: {{", i)); 247 | fmt.indent(); 248 | 249 | emit_mutation_function_rule(rule, fmt); 250 | 251 | fmt.write("break;"); 252 | fmt.unindent(); 253 | fmt.write("}"); 254 | } 255 | 256 | fmt.write("default: {"); 257 | fmt.indent(); 258 | fmt.write("__builtin_unreachable();"); 259 | fmt.unindent(); 260 | fmt.write("}"); 261 | 262 | fmt.unindent(); 263 | fmt.write("}"); 264 | fmt.blankline(); 265 | } 266 | 267 | fmt.write("return 1;"); 268 | } 269 | 270 | fn emit_mutation_function( 271 | nonterm: usize, 272 | rules: &[Vec], 273 | grammar: &LowLevelGrammar, 274 | fmt: &mut CFormatter, 275 | ) { 276 | fmt.write(format!( 277 | "// This is the sequence mutation function for non-terminal {:?}", 278 | grammar.nonterminals()[nonterm] 279 | )); 280 | fmt.write(format!("static int mutate_seq_nonterm{} (size_t* const buf, size_t* const len, const size_t capacity, size_t* const step) {{", nonterm)); 281 | fmt.indent(); 282 | 283 | if rules.is_empty() { 284 | unreachable!() 285 | } else if rules.len() == 1 { 286 | emit_mutation_function_single(&rules[0], fmt); 287 | } else { 288 | emit_mutation_function_multiple(rules, fmt); 289 | } 290 | 291 | fmt.unindent(); 292 | fmt.write("}"); 293 | fmt.blankline(); 294 | } 295 | 296 | fn emit_mutation_entrypoint(grammar: &LowLevelGrammar, fmt: &mut CFormatter) { 297 | fmt.write("EXPORT_FUNCTION"); 298 | fmt.write("size_t mutate_sequence (size_t* buf, size_t len, const size_t capacity) {"); 299 | fmt.indent(); 300 | 301 | #[cfg(feature = "debug-codegen")] 302 | fmt.write("printf(\"Calling mutate_sequence(%p, %lu, %lu)\\n\", buf, len, capacity);"); 303 | 304 | fmt.write("if (UNLIKELY(!buf | !capacity)) {"); 305 | fmt.indent(); 306 | fmt.write("return 0;"); 307 | fmt.unindent(); 308 | fmt.write("}"); 309 | 310 | fmt.write("size_t step = 0;"); 311 | fmt.write(format!("mutate_seq_nonterm{}(buf, &len, capacity, &step);", grammar.entrypoint().id())); 312 | fmt.write("return len;"); 313 | 314 | fmt.unindent(); 315 | fmt.write("}"); 316 | fmt.blankline(); 317 | } 318 | 319 | fn emit_mutation_code(grammar: &LowLevelGrammar, fmt: &mut CFormatter) { 320 | emit_mutation_declarations(grammar, fmt); 321 | 322 | for (nonterm, rules) in grammar.rules() { 323 | emit_mutation_function(*nonterm, rules, grammar, fmt); 324 | } 325 | 326 | emit_mutation_entrypoint(grammar, fmt); 327 | } 328 | 329 | fn emit_terminals(grammar: &LowLevelGrammar, fmt: &mut CFormatter) { 330 | fmt.write("/* Terminals */"); 331 | 332 | for (i, term) in grammar.terminals().iter().enumerate() { 333 | let term = term.as_bytes(); 334 | 335 | fmt.write(format!("static const unsigned char TERM{}[{}] = {{", i, term.len())); 336 | fmt.indent(); 337 | 338 | for chunk in term.chunks(8) { 339 | let x: Vec = chunk.iter().map(|x| format!("{:#02X},", *x)).collect(); 340 | fmt.write(x.join(" ")); 341 | } 342 | 343 | fmt.unindent(); 344 | fmt.write("};"); 345 | } 346 | 347 | fmt.blankline(); 348 | } 349 | 350 | fn emit_serialization_declarations(grammar: &LowLevelGrammar, fmt: &mut CFormatter) { 351 | fmt.write("/* Forward declarations for serialization functions */"); 352 | 353 | for nonterm in grammar.rules().keys() { 354 | fmt.write(format!("static size_t serialize_seq_nonterm{} (const size_t* const, const size_t, unsigned char*, size_t, size_t* const);", *nonterm)); 355 | } 356 | 357 | fmt.blankline(); 358 | } 359 | 360 | fn emit_serialization_function_rule(rule: &[LLSymbol], fmt: &mut CFormatter) { 361 | for symbol in rule { 362 | match symbol { 363 | LLSymbol::NonTerminal(nonterm) => { 364 | fmt.write(format!("len = serialize_seq_nonterm{}(seq, seq_len, out, out_len, step);", nonterm.id())); 365 | fmt.write("out += len; out_len -= len;"); 366 | fmt.blankline(); 367 | }, 368 | LLSymbol::Terminal(term) => { 369 | fmt.write(format!("if (UNLIKELY(out_len < sizeof(TERM{}))) {{", term.id())); 370 | fmt.indent(); 371 | fmt.write("goto end;"); 372 | fmt.unindent(); 373 | fmt.write("}"); 374 | fmt.write(format!("__builtin_memcpy_inline(out, TERM{0}, sizeof(TERM{0}));", term.id())); 375 | fmt.write(format!("out += sizeof(TERM{0}); out_len -= sizeof(TERM{0});", term.id())); 376 | fmt.blankline(); 377 | }, 378 | } 379 | } 380 | } 381 | 382 | fn emit_serialization_function_single(rule: &[LLSymbol], fmt: &mut CFormatter) { 383 | let has_nonterminals = rule_has_nonterminals(rule); 384 | 385 | if !has_nonterminals { 386 | fmt.write("(void) seq;"); 387 | fmt.blankline(); 388 | } 389 | 390 | fmt.write("if (UNLIKELY(*step >= seq_len)) {"); 391 | fmt.indent(); 392 | fmt.write("return 0;"); 393 | fmt.unindent(); 394 | fmt.write("}"); 395 | fmt.blankline(); 396 | 397 | if has_nonterminals { 398 | fmt.write("size_t len;"); 399 | } 400 | 401 | fmt.write("unsigned char* original_out = out;"); 402 | fmt.write("*step += 1;"); 403 | fmt.blankline(); 404 | 405 | emit_serialization_function_rule(rule, fmt); 406 | 407 | if rule_has_terminals(rule) { 408 | fmt.write("end:"); 409 | } 410 | fmt.write("return (size_t) (out - original_out);"); 411 | } 412 | 413 | fn emit_serialization_function_multiple(rules: &[Vec], fmt: &mut CFormatter) { 414 | fmt.write("if (UNLIKELY(*step >= seq_len)) {"); 415 | fmt.indent(); 416 | fmt.write("return 0;"); 417 | fmt.unindent(); 418 | fmt.write("}"); 419 | fmt.blankline(); 420 | 421 | if rules_have_nonterminals(rules) { 422 | fmt.write("size_t len;"); 423 | } 424 | 425 | fmt.write("unsigned char* original_out = out;"); 426 | fmt.write("size_t target = seq[*step];"); 427 | fmt.write("*step += 1;"); 428 | fmt.blankline(); 429 | 430 | fmt.write("switch (target) {"); 431 | fmt.indent(); 432 | 433 | for (i, rule) in rules.iter().enumerate() { 434 | fmt.write(format!("case {}: {{", i)); 435 | fmt.indent(); 436 | 437 | emit_serialization_function_rule(rule, fmt); 438 | 439 | fmt.write("break;"); 440 | fmt.unindent(); 441 | fmt.write("}"); 442 | } 443 | 444 | fmt.write("default: {"); 445 | fmt.indent(); 446 | fmt.write("__builtin_unreachable();"); 447 | fmt.unindent(); 448 | fmt.write("}"); 449 | 450 | fmt.unindent(); 451 | fmt.write("}"); 452 | fmt.blankline(); 453 | 454 | if rules_have_terminals(rules) { 455 | fmt.write("end:"); 456 | } 457 | fmt.write("return (size_t) (out - original_out);"); 458 | } 459 | 460 | fn emit_serialization_function( 461 | nonterm: usize, 462 | rules: &[Vec], 463 | grammar: &LowLevelGrammar, 464 | fmt: &mut CFormatter, 465 | ) { 466 | fmt.write(format!("// This is the serialization function for non-terminal {:?}", grammar.nonterminals()[nonterm])); 467 | fmt.write(format!("static size_t serialize_seq_nonterm{} (const size_t* const seq, const size_t seq_len, unsigned char* out, size_t out_len, size_t* const step) {{", nonterm)); 468 | fmt.indent(); 469 | 470 | #[cfg(feature = "debug-codegen")] 471 | fmt.write(format!( 472 | "printf(\"Serializing %s (%lu/%lu)\\n\", {:?}, *step + 1, seq_len);", 473 | grammar.nonterminals()[nonterm] 474 | )); 475 | 476 | if rules.is_empty() { 477 | unreachable!() 478 | } else if rules.len() == 1 { 479 | emit_serialization_function_single(&rules[0], fmt); 480 | } else { 481 | emit_serialization_function_multiple(rules, fmt); 482 | } 483 | 484 | fmt.unindent(); 485 | fmt.write("}"); 486 | fmt.blankline(); 487 | } 488 | 489 | fn emit_serialization_entrypoint(grammar: &LowLevelGrammar, fmt: &mut CFormatter) { 490 | fmt.write("EXPORT_FUNCTION"); 491 | fmt.write("size_t serialize_sequence (const size_t* seq, const size_t seq_len, unsigned char* out, const size_t out_len) {"); 492 | fmt.indent(); 493 | 494 | fmt.write("if (UNLIKELY(!seq || !seq_len || !out || !out_len)) {"); 495 | fmt.indent(); 496 | fmt.write("return 0;"); 497 | fmt.unindent(); 498 | fmt.write("}"); 499 | 500 | fmt.write("size_t step = 0;"); 501 | fmt.write(format!("return serialize_seq_nonterm{}(seq, seq_len, out, out_len, &step);", grammar.entrypoint().id())); 502 | fmt.unindent(); 503 | fmt.write("}"); 504 | fmt.blankline(); 505 | } 506 | 507 | fn emit_serialization_code(grammar: &LowLevelGrammar, fmt: &mut CFormatter) { 508 | emit_terminals(grammar, fmt); 509 | emit_serialization_declarations(grammar, fmt); 510 | 511 | for (nonterm, rules) in grammar.rules() { 512 | emit_serialization_function(*nonterm, rules, grammar, fmt); 513 | } 514 | 515 | emit_serialization_entrypoint(grammar, fmt); 516 | } 517 | 518 | fn emit_header( 519 | mut outfile: File, 520 | mutations: bool, 521 | serializations: bool, 522 | unparsing: bool, 523 | ) -> Result<(), std::io::Error> { 524 | write!( 525 | &mut outfile, 526 | " 527 | #ifndef __PEACOCK_GENERATOR_H 528 | #define __PEACOCK_GENERATOR_H 529 | 530 | #include 531 | " 532 | )?; 533 | 534 | if mutations { 535 | writeln!(&mut outfile, "size_t mutate_sequence (size_t* buf, size_t len, const size_t capacity);")?; 536 | } 537 | 538 | if serializations { 539 | writeln!(&mut outfile, "size_t serialize_sequence (const size_t* seq, const size_t seq_len, unsigned char* out, const size_t out_len);")?; 540 | } 541 | 542 | if unparsing { 543 | writeln!(&mut outfile, "size_t unparse_sequence (size_t* seq_buf, const size_t seq_capacity, const unsigned char* input, const size_t input_len);")?; 544 | } 545 | 546 | write!( 547 | &mut outfile, 548 | " 549 | void seed_generator (size_t new_seed); 550 | 551 | 552 | #endif /* __PEACOCK_GENERATOR_H */ 553 | " 554 | )?; 555 | 556 | Ok(()) 557 | } 558 | 559 | fn emit_unparsing_declarations(grammar: &LowLevelGrammar, fmt: &mut CFormatter) { 560 | fmt.write("/* Forward declarations for unparsing functions */"); 561 | 562 | for nonterm in grammar.rules().keys() { 563 | fmt.write(format!("static int unparse_seq_nonterm{} (Sequence* const, const unsigned char* const, const size_t, size_t* const);", *nonterm)); 564 | } 565 | 566 | fmt.blankline(); 567 | } 568 | 569 | fn emit_unparsing_function( 570 | nonterm: usize, 571 | rules: &[Vec], 572 | grammar: &LowLevelGrammar, 573 | fmt: &mut CFormatter, 574 | ) { 575 | fmt.write(format!("// This is the unparsing function for non-terminal {:?}", grammar.nonterminals()[nonterm])); 576 | fmt.write(format!("static int unparse_seq_nonterm{} (Sequence* const seq, const unsigned char* const input, const size_t input_len, size_t* const cursor) {{", nonterm)); 577 | fmt.indent(); 578 | 579 | fmt.write("size_t seq_idx = seq->len;"); 580 | fmt.blankline(); 581 | fmt.write("if (UNLIKELY(seq_idx >= seq->capacity)) {"); 582 | fmt.indent(); 583 | fmt.write("return 0;"); 584 | fmt.unindent(); 585 | fmt.write("}"); 586 | fmt.blankline(); 587 | 588 | fmt.write("size_t target_cursor = 0;"); 589 | fmt.write("size_t target_id = (size_t) -1LL;"); 590 | fmt.write("size_t target_seq_len = seq_idx;"); 591 | fmt.blankline(); 592 | 593 | for (i, rule) in rules.iter().enumerate().sorted_by(|(_, a), (_, b)| b.len().cmp(&a.len())) { 594 | fmt.write(format!("// Rule #{}", i)); 595 | fmt.write("do {"); 596 | fmt.indent(); 597 | fmt.write("seq->len = seq_idx + 1;"); 598 | fmt.write("size_t tmp_cursor = *cursor;"); 599 | fmt.blankline(); 600 | 601 | for symbol in rule { 602 | match symbol { 603 | LLSymbol::Terminal(term) => { 604 | fmt.write(format!( 605 | "if (UNLIKELY(input_len - tmp_cursor < sizeof(TERM{0})) || __builtin_memcmp(&input[tmp_cursor], TERM{0}, sizeof(TERM{0})) != 0) {{", 606 | term.id() 607 | )); 608 | fmt.indent(); 609 | fmt.write("break;"); 610 | fmt.unindent(); 611 | fmt.write("}"); 612 | fmt.write(format!("tmp_cursor += sizeof(TERM{0});", term.id())); 613 | fmt.blankline(); 614 | }, 615 | LLSymbol::NonTerminal(nonterm) => { 616 | fmt.write(format!( 617 | "if (!unparse_seq_nonterm{}(seq, input, input_len, &tmp_cursor)) {{", 618 | nonterm.id() 619 | )); 620 | fmt.indent(); 621 | fmt.write("break;"); 622 | fmt.unindent(); 623 | fmt.write("}"); 624 | fmt.blankline(); 625 | }, 626 | } 627 | } 628 | 629 | fmt.write("if (tmp_cursor > target_cursor) {"); 630 | fmt.indent(); 631 | fmt.write(format!("target_id = {};", i)); 632 | fmt.write("target_cursor = tmp_cursor;"); 633 | fmt.write("target_seq_len = seq->len;"); 634 | fmt.unindent(); 635 | fmt.write("}"); 636 | 637 | fmt.unindent(); 638 | fmt.write("} while (0);"); 639 | fmt.blankline(); 640 | } 641 | 642 | fmt.write("seq->len = target_seq_len;"); 643 | fmt.blankline(); 644 | 645 | fmt.write(format!("if (target_id < {}) {{", rules.len())); 646 | fmt.indent(); 647 | fmt.write("*cursor = target_cursor;"); 648 | fmt.write("seq->buf[seq_idx] = target_id;"); 649 | fmt.write("return 1;"); 650 | fmt.unindent(); 651 | fmt.write("} else {"); 652 | fmt.indent(); 653 | fmt.write("return 0;"); 654 | fmt.unindent(); 655 | fmt.write("}"); 656 | 657 | fmt.unindent(); 658 | fmt.write("}"); 659 | fmt.blankline(); 660 | } 661 | 662 | fn emit_unparsing_entrypoint(grammar: &LowLevelGrammar, fmt: &mut CFormatter) { 663 | fmt.write("EXPORT_FUNCTION"); 664 | fmt.write("size_t unparse_sequence (size_t* seq_buf, const size_t seq_capacity, const unsigned char* input, const size_t input_len) {"); 665 | fmt.indent(); 666 | 667 | fmt.write("if (UNLIKELY(!seq_buf || !seq_capacity || !input || !input_len)) {"); 668 | fmt.indent(); 669 | fmt.write("return 0;"); 670 | fmt.unindent(); 671 | fmt.write("}"); 672 | 673 | fmt.write("Sequence seq = {"); 674 | fmt.indent(); 675 | fmt.write(".buf = seq_buf,"); 676 | fmt.write(".len = 0,"); 677 | fmt.write(".capacity = seq_capacity,"); 678 | fmt.unindent(); 679 | fmt.write("};"); 680 | fmt.write("size_t cursor = 0;"); 681 | fmt.write(format!("if (!unparse_seq_nonterm{}(&seq, input, input_len, &cursor)) {{", grammar.entrypoint().id())); 682 | fmt.indent(); 683 | fmt.write("return 0;"); 684 | fmt.unindent(); 685 | fmt.write("} else { "); 686 | fmt.indent(); 687 | fmt.write("return seq.len;"); 688 | fmt.unindent(); 689 | fmt.write("}"); 690 | fmt.unindent(); 691 | fmt.write("}"); 692 | fmt.blankline(); 693 | } 694 | 695 | fn emit_unparsing_code(grammar: &LowLevelGrammar, fmt: &mut CFormatter) { 696 | emit_unparsing_declarations(grammar, fmt); 697 | 698 | for (nonterm, rules) in grammar.rules() { 699 | emit_unparsing_function(*nonterm, rules, grammar, fmt); 700 | } 701 | 702 | emit_unparsing_entrypoint(grammar, fmt); 703 | } 704 | 705 | /// This is the main struct of the [`C`](crate::backends::C) backend that does all the heavy lifting and generates the code. 706 | /// 707 | /// For documentation of the generated C code see the [README](https://github.com/z2-2z/peacock#c-api-documentation) of this project. 708 | pub struct CGenerator { 709 | header: bool, 710 | mutations: bool, 711 | serializations: bool, 712 | unparsing: bool, 713 | } 714 | 715 | impl CGenerator { 716 | /// Create a new CGenerator. 717 | #[allow(clippy::new_without_default)] 718 | pub fn new() -> Self { 719 | Self { 720 | header: true, 721 | mutations: true, 722 | serializations: true, 723 | unparsing: true, 724 | } 725 | } 726 | 727 | /// Also generate a .h file with all the definitions of the public C API of the generated code. 728 | /// 729 | /// Default: `true` 730 | pub fn generate_header(mut self, flag: bool) -> Self { 731 | self.header = flag; 732 | self 733 | } 734 | 735 | /// Emit code that realizes the mutation of an automaton walk. 736 | /// 737 | /// Default: `true` 738 | pub fn emit_mutation_procedure(mut self, flag: bool) -> Self { 739 | self.mutations = flag; 740 | self 741 | } 742 | 743 | /// Emit code that realizes the serialization of automaton walks into human-readable output. 744 | /// 745 | /// Default: `true` 746 | pub fn emit_serialization_procedure(mut self, flag: bool) -> Self { 747 | self.serializations = flag; 748 | self 749 | } 750 | 751 | /// Emit code that realizes the unparsing of user inputs into automaton walks. 752 | /// 753 | /// Default: `true` 754 | pub fn emit_unparsing_procedure(mut self, flag: bool) -> Self { 755 | self.unparsing = flag; 756 | self 757 | } 758 | 759 | /// Generate the C code for the given grammar `grammar` and write it to `output`. 760 | pub fn generate>(self, output: P, grammar: &ContextFreeGrammar) { 761 | let grammar = LowLevelGrammar::from_high_level_grammar(grammar); 762 | let outfile = File::create(output.as_ref()).expect("Could not create source file"); 763 | let mut formatter = CFormatter::new(outfile); 764 | 765 | emit_includes(&mut formatter); 766 | emit_macros(&mut formatter); 767 | emit_types(&mut formatter); 768 | emit_rand(&mut formatter); 769 | 770 | if self.mutations { 771 | emit_mutation_code(&grammar, &mut formatter); 772 | } 773 | 774 | if self.serializations { 775 | emit_serialization_code(&grammar, &mut formatter); 776 | } 777 | 778 | if self.unparsing { 779 | emit_unparsing_code(&grammar, &mut formatter); 780 | } 781 | 782 | if self.header { 783 | let mut outfile = output.as_ref().to_path_buf(); 784 | outfile.set_extension("h"); 785 | let outfile = File::create(outfile).expect("Could not create header file"); 786 | emit_header(outfile, self.mutations, self.serializations, self.unparsing) 787 | .expect("Could not write to header file"); 788 | } 789 | } 790 | } 791 | 792 | #[cfg(test)] 793 | mod tests { 794 | use super::*; 795 | 796 | #[test] 797 | fn test_generator() { 798 | let cfg = ContextFreeGrammar::builder() 799 | .gramatron_grammar("test-data/grammars/gramatron.json") 800 | .unwrap() 801 | .build() 802 | .unwrap(); 803 | CGenerator::new().generate("/tmp/out.c", &cfg); 804 | } 805 | } 806 | -------------------------------------------------------------------------------- /src/backends/C/formatter.rs: -------------------------------------------------------------------------------- 1 | use std::io::{ 2 | BufWriter, 3 | Write, 4 | }; 5 | 6 | pub struct CFormatter { 7 | stream: BufWriter, 8 | indentation: usize, 9 | } 10 | 11 | impl CFormatter 12 | where 13 | T: Write, 14 | { 15 | pub fn new(writer: T) -> Self { 16 | Self { 17 | stream: BufWriter::new(writer), 18 | indentation: 0, 19 | } 20 | } 21 | 22 | pub fn indent(&mut self) { 23 | self.indentation += 4; 24 | } 25 | 26 | pub fn unindent(&mut self) { 27 | if self.indentation > 0 { 28 | self.indentation -= 4; 29 | } 30 | } 31 | 32 | pub fn write>(&mut self, line: S) { 33 | writeln!(&mut self.stream, "{:width$}{}", "", line.as_ref(), width = self.indentation) 34 | .expect("Could not write to outfile"); 35 | } 36 | 37 | pub fn blankline(&mut self) { 38 | writeln!(&mut self.stream).expect("Could not write to outfile"); 39 | } 40 | } 41 | 42 | #[cfg(test)] 43 | mod tests { 44 | use super::*; 45 | use std::io::stdout; 46 | 47 | #[test] 48 | fn test_formatter() { 49 | let mut fmt = CFormatter::new(stdout()); 50 | fmt.write("asdf {"); 51 | fmt.indent(); 52 | fmt.blankline(); 53 | fmt.write("yeehaw"); 54 | fmt.blankline(); 55 | fmt.unindent(); 56 | fmt.write("}"); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/backends/C/grammar.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use crate::grammar::{ 4 | ContextFreeGrammar, 5 | Symbol, 6 | }; 7 | 8 | #[derive(Copy, Clone, Debug)] 9 | pub struct LLTerminal(usize); 10 | 11 | impl LLTerminal { 12 | pub fn id(&self) -> usize { 13 | self.0 14 | } 15 | } 16 | 17 | #[derive(Copy, Clone, Debug)] 18 | pub struct LLNonTerminal(usize); 19 | 20 | impl LLNonTerminal { 21 | pub fn id(&self) -> usize { 22 | self.0 23 | } 24 | } 25 | 26 | #[derive(Clone, Debug)] 27 | pub enum LLSymbol { 28 | Terminal(LLTerminal), 29 | NonTerminal(LLNonTerminal), 30 | } 31 | 32 | pub struct LowLevelGrammar { 33 | rules: HashMap>>, 34 | terminals: Vec, 35 | nonterminals: Vec, 36 | entrypoint: LLNonTerminal, 37 | } 38 | 39 | impl LowLevelGrammar { 40 | pub fn from_high_level_grammar(grammar: &ContextFreeGrammar) -> Self { 41 | let mut rules = HashMap::new(); 42 | let mut nonterm_map = HashMap::new(); 43 | let mut nonterminals = Vec::new(); 44 | let mut term_map = HashMap::new(); 45 | let mut terminals = Vec::new(); 46 | 47 | for rule in grammar.rules() { 48 | let lhs_id = *nonterm_map.entry(rule.lhs().id()).or_insert_with(|| { 49 | let ret = nonterminals.len(); 50 | nonterminals.push(rule.lhs().id().to_string()); 51 | ret 52 | }); 53 | let mut ll_symbols = Vec::new(); 54 | 55 | for symbol in rule.rhs() { 56 | match symbol { 57 | Symbol::Terminal(term) => { 58 | let id = *term_map.entry(term.content()).or_insert_with(|| { 59 | let ret = terminals.len(); 60 | terminals.push(term.content().to_string()); 61 | ret 62 | }); 63 | ll_symbols.push(LLSymbol::Terminal(LLTerminal(id))); 64 | }, 65 | Symbol::NonTerminal(nonterm) => { 66 | let id = *nonterm_map.entry(nonterm.id()).or_insert_with(|| { 67 | let ret = nonterminals.len(); 68 | nonterminals.push(nonterm.id().to_string()); 69 | ret 70 | }); 71 | ll_symbols.push(LLSymbol::NonTerminal(LLNonTerminal(id))); 72 | }, 73 | } 74 | } 75 | 76 | rules.entry(lhs_id).or_insert_with(Vec::new).push(ll_symbols); 77 | } 78 | 79 | Self { 80 | rules, 81 | terminals, 82 | nonterminals, 83 | entrypoint: LLNonTerminal(*nonterm_map.get(grammar.entrypoint().id()).unwrap()), 84 | } 85 | } 86 | 87 | pub fn rules(&self) -> &HashMap>> { 88 | &self.rules 89 | } 90 | 91 | pub fn terminals(&self) -> &[String] { 92 | &self.terminals 93 | } 94 | 95 | pub fn nonterminals(&self) -> &[String] { 96 | &self.nonterminals 97 | } 98 | 99 | pub fn entrypoint(&self) -> &LLNonTerminal { 100 | &self.entrypoint 101 | } 102 | } 103 | 104 | #[cfg(test)] 105 | mod tests { 106 | use super::*; 107 | 108 | #[test] 109 | fn test_ll() { 110 | let cfg = ContextFreeGrammar::builder() 111 | .peacock_grammar("test-data/grammars/unit_rules.json") 112 | .unwrap() 113 | .build() 114 | .unwrap(); 115 | let ll = LowLevelGrammar::from_high_level_grammar(&cfg); 116 | println!("{:#?}", ll.rules()); 117 | println!("terminals = {:?}", ll.terminals()); 118 | println!("nonterminals = {:?}", ll.nonterminals()); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/backends/C/mod.rs: -------------------------------------------------------------------------------- 1 | //! Generate a grammar-based mutator in C. 2 | //! 3 | //! Use it like so: 4 | //! ``` 5 | //! // First, load a grammar from disk 6 | //! let grammar = ContextFreeGrammar::builder() 7 | //! .peacock_grammar("my-grammar.json").unwrap() 8 | //! .build().unwrap(); 9 | //! 10 | //! // Then, generate grammar-based mutator code and write it into mutator.c 11 | //! CGenerator::new().generate("mutator.c", &grammar); 12 | //! ``` 13 | //! 14 | //! The API is documented in the [README](https://github.com/z2-2z/peacock#c-api-documentation) of this project. 15 | 16 | mod codegen; 17 | mod formatter; 18 | mod grammar; 19 | 20 | pub use codegen::CGenerator; 21 | pub(crate) use grammar::*; 22 | -------------------------------------------------------------------------------- /src/backends/interpreter/mod.rs: -------------------------------------------------------------------------------- 1 | //! Generate inputs by interpreting the rules of the grammar. 2 | //! 3 | //! Use it like so: 4 | //! ``` 5 | //! // First, load a grammar from disk 6 | //! let grammar = ContextFreeGrammar::builder() 7 | //! .peacock_grammar("my-grammar.json").unwrap() 8 | //! .build().unwrap(); 9 | //! 10 | //! // Then, generate one input and write it to a specified stream. 11 | //! let mut stream = std::io::stdout(); 12 | //! GrammarInterpreter::new(&grammar).interpret(&mut stream).unwrap(); 13 | //! ``` 14 | 15 | use std::io::Write; 16 | 17 | use crate::{ 18 | backends::C::{ 19 | LLSymbol, 20 | LowLevelGrammar, 21 | }, 22 | grammar::ContextFreeGrammar, 23 | }; 24 | 25 | /// The GrammarInterpreter interprets the rules of a grammar to generate inputs. 26 | pub struct GrammarInterpreter { 27 | grammar: LowLevelGrammar, 28 | seed: usize, 29 | stack: Vec, 30 | } 31 | 32 | impl GrammarInterpreter { 33 | /// Create a new GrammarInterpreter. 34 | #[allow(clippy::new_without_default)] 35 | pub fn new(grammar: &ContextFreeGrammar) -> Self { 36 | Self { 37 | grammar: LowLevelGrammar::from_high_level_grammar(grammar), 38 | seed: 0xDEADBEEF, 39 | stack: Vec::with_capacity(4096), 40 | } 41 | } 42 | 43 | /// Seed the RNG of the GrammarInterpreter. 44 | pub fn seed(&mut self, seed: usize) { 45 | if seed == 0 { 46 | self.seed = 0xDEADBEEF; 47 | } else { 48 | self.seed = seed; 49 | } 50 | } 51 | 52 | /// Generate one input and write it to the given output stream `stream`. 53 | /// Returns the number of bytes written to `stream`. 54 | pub fn interpret(&mut self, stream: &mut S) -> std::io::Result { 55 | let mut generated = 0; 56 | 57 | assert!(self.stack.is_empty()); 58 | self.stack.push(LLSymbol::NonTerminal(*self.grammar.entrypoint())); 59 | 60 | while let Some(symbol) = self.stack.pop() { 61 | match symbol { 62 | LLSymbol::Terminal(term) => { 63 | let term = &self.grammar.terminals()[term.id()].as_bytes(); 64 | generated += term.len(); 65 | stream.write_all(term)?; 66 | }, 67 | LLSymbol::NonTerminal(nonterm) => { 68 | let rules = self.grammar.rules().get(&nonterm.id()).unwrap(); 69 | 70 | // Inline RNG because of borrow problems 71 | let rand = { 72 | let mut x = self.seed; 73 | x ^= x << 13; 74 | x ^= x >> 7; 75 | x ^= x << 17; 76 | self.seed = x; 77 | x 78 | }; 79 | 80 | let rule = &rules[rand % rules.len()]; 81 | 82 | for symbol in rule.iter().rev() { 83 | self.stack.push(symbol.clone()); 84 | } 85 | }, 86 | } 87 | } 88 | 89 | Ok(generated) 90 | } 91 | } 92 | 93 | #[cfg(test)] 94 | mod tests { 95 | use super::*; 96 | 97 | #[test] 98 | fn test_interpreter() { 99 | let cfg = ContextFreeGrammar::builder() 100 | .gramatron_grammar("test-data/grammars/gramatron.json") 101 | .unwrap() 102 | .build() 103 | .unwrap(); 104 | let mut stdout = std::io::stdout(); 105 | let mut interpreter = GrammarInterpreter::new(&cfg); 106 | interpreter.seed(1238); 107 | let len = interpreter.interpret(&mut stdout).unwrap(); 108 | println!(); 109 | println!("Generated {} bytes", len); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/backends/json/generator.rs: -------------------------------------------------------------------------------- 1 | use serde::ser::Serialize; 2 | use serde_json::{ 3 | json, 4 | ser::PrettyFormatter, 5 | Serializer, 6 | Value, 7 | }; 8 | use std::{ 9 | fs::File, 10 | io::Write, 11 | path::Path, 12 | }; 13 | 14 | use crate::grammar::{ 15 | ContextFreeGrammar, 16 | Symbol, 17 | }; 18 | 19 | fn enclosed_in(s: &str, start: char, end: char) -> bool { 20 | s.len() >= 2 && s.starts_with(start) && s.ends_with(end) 21 | } 22 | 23 | fn terminal_string(content: &str) -> String { 24 | if enclosed_in(content, '<', '>') || enclosed_in(content, '\'', '\'') { 25 | return format!("'{}'", content); 26 | } 27 | 28 | content.to_string() 29 | } 30 | 31 | /// This is the main struct of the [`json`](crate::backends::json) backend that does all the heavy lifting and generates the grammar. 32 | pub struct JsonGenerator {} 33 | 34 | impl JsonGenerator { 35 | /// Create a new JsonGenerator. 36 | #[allow(clippy::new_without_default)] 37 | pub fn new() -> Self { 38 | Self {} 39 | } 40 | 41 | /// Write the production rules of the supplied `grammar` into the output file `path` in peacock format. 42 | pub fn generate>(self, path: P, grammar: &ContextFreeGrammar) { 43 | let mut json = json!({}); 44 | let object = json.as_object_mut().unwrap(); 45 | 46 | for rule in grammar.rules() { 47 | let array = object.entry(format!("<{}>", rule.lhs().id())).or_insert_with(|| Value::Array(Vec::new())); 48 | let array = array.as_array_mut().unwrap(); 49 | 50 | let mut insert = Vec::new(); 51 | for symbol in rule.rhs() { 52 | match symbol { 53 | Symbol::Terminal(term) => { 54 | insert.push(Value::String(terminal_string(term.content()))); 55 | }, 56 | Symbol::NonTerminal(nonterm) => { 57 | insert.push(Value::String(format!("<{}>", nonterm.id()))); 58 | }, 59 | } 60 | } 61 | 62 | array.push(Value::Array(insert)); 63 | } 64 | 65 | let mut buf = Vec::new(); 66 | let formatter = PrettyFormatter::with_indent(b" "); 67 | let mut ser = Serializer::with_formatter(&mut buf, formatter); 68 | json.serialize(&mut ser).unwrap(); 69 | 70 | let mut file = File::create(path).expect("Could not open output file"); 71 | file.write_all(&buf).expect("Could not write to output file"); 72 | } 73 | } 74 | 75 | #[cfg(test)] 76 | mod tests { 77 | use super::*; 78 | 79 | #[test] 80 | fn test_generator() { 81 | let cfg = ContextFreeGrammar::builder() 82 | .gramatron_grammar("test-data/grammars/gramatron.json") 83 | .unwrap() 84 | .optimize(false) 85 | .build() 86 | .unwrap(); 87 | JsonGenerator::new().generate("/tmp/new.json", &cfg); 88 | 89 | ContextFreeGrammar::builder().peacock_grammar("/tmp/new.json").unwrap().build().unwrap(); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/backends/json/mod.rs: -------------------------------------------------------------------------------- 1 | //! Generate a grammar in peacock format. 2 | //! 3 | //! Use it like so: 4 | //! ``` 5 | //! // First, load multiple grammars from disk. This will merge all the rules. 6 | //! let grammar = ContextFreeGrammar::builder() 7 | //! .peacock_grammar("my-grammar.json").unwrap() 8 | //! .peacock_grammar("common-definitions.json").unwrap() 9 | //! .gramatron_grammar("my-old-grammar.json").unwrap() 10 | //! .build().unwrap(); 11 | //! 12 | //! // Then, create a single new grammar in peacock format. 13 | //! JsonGenerator::new().generate("merged-grammar.json", &grammar); 14 | //! ``` 15 | 16 | mod generator; 17 | 18 | pub use generator::JsonGenerator; 19 | -------------------------------------------------------------------------------- /src/backends/mod.rs: -------------------------------------------------------------------------------- 1 | //! This module has all the available backends. 2 | 3 | #[allow(non_snake_case)] 4 | pub mod C; 5 | 6 | pub mod json; 7 | 8 | pub mod interpreter; 9 | -------------------------------------------------------------------------------- /src/bin/compile.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use peacock_fuzz::{ 3 | backends::C::CGenerator, 4 | grammar::ContextFreeGrammar, 5 | }; 6 | 7 | pub mod fuzz; 8 | use fuzz::GrammarFormat; 9 | 10 | #[derive(Parser, Debug)] 11 | #[command(author, version, about, long_about = None)] 12 | struct Args { 13 | #[arg(long, value_name = "GRAMMAR")] 14 | grammar: String, 15 | 16 | #[arg(long)] 17 | output: String, 18 | 19 | #[arg(long, default_value_t = GrammarFormat::Peacock)] 20 | format: GrammarFormat, 21 | 22 | #[arg(short, long)] 23 | entrypoint: Option, 24 | } 25 | 26 | fn main() { 27 | let args = Args::parse(); 28 | 29 | let mut cfg = ContextFreeGrammar::builder(); 30 | 31 | match args.format { 32 | GrammarFormat::Peacock => cfg = cfg.peacock_grammar(&args.grammar).unwrap(), 33 | GrammarFormat::Gramatron => cfg = cfg.gramatron_grammar(&args.grammar).unwrap(), 34 | } 35 | 36 | if let Some(entrypoint) = args.entrypoint { 37 | cfg = cfg.entrypoint(entrypoint); 38 | } 39 | 40 | let cfg = cfg.build().unwrap(); 41 | 42 | CGenerator::new().generate(&args.output, &cfg); 43 | } 44 | -------------------------------------------------------------------------------- /src/bin/dump.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use libafl::prelude::{ 3 | HasTargetBytes, 4 | Input, 5 | }; 6 | use libafl_bolts::prelude::AsSlice; 7 | use peacock_fuzz::components::{ 8 | load_generator, 9 | PeacockInput, 10 | }; 11 | use std::io::Write; 12 | 13 | #[derive(Parser, Debug)] 14 | #[command(author, version, about, long_about = None)] 15 | struct Args { 16 | #[arg(short, long)] 17 | generator: String, 18 | 19 | file: String, 20 | } 21 | 22 | fn main() { 23 | let args = Args::parse(); 24 | load_generator(&args.generator); 25 | let input = PeacockInput::from_file(&args.file).expect("Could not load specified input file"); 26 | let input = input.target_bytes(); 27 | std::io::stdout().write_all(input.as_slice()).expect("Could not write to stdout"); 28 | } 29 | -------------------------------------------------------------------------------- /src/bin/fuzz.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use libafl::prelude::{ 3 | feedback_or, 4 | powersched::PowerSchedule, 5 | CachedOnDiskCorpus, 6 | CalibrationStage, 7 | CanTrack, 8 | Corpus, 9 | CrashFeedback, 10 | Error, 11 | EventConfig, 12 | ForkserverExecutor, 13 | Fuzzer, 14 | HasCorpus, 15 | HitcountsMapObserver, 16 | IndexesLenTimeMinimizerScheduler, 17 | Launcher, 18 | LlmpRestartingEventManager, 19 | MaxMapFeedback, 20 | OnDiskCorpus, 21 | StdFuzzer, 22 | StdMapObserver, 23 | StdMutationalStage, 24 | StdState, 25 | StdWeightedScheduler, 26 | TimeFeedback, 27 | TimeObserver, 28 | TimeoutFeedback, 29 | }; 30 | #[cfg(not(debug_assertions))] 31 | use libafl::prelude::{ 32 | tui::ui::TuiUI, 33 | tui::TuiMonitor, 34 | }; 35 | use libafl_bolts::prelude::{ 36 | current_nanos, 37 | tuple_list, 38 | AsSliceMut, 39 | CoreId, 40 | Cores, 41 | ShMem, 42 | ShMemProvider, 43 | StdRand, 44 | UnixShMemProvider, 45 | }; 46 | use nix::sys::signal::Signal; 47 | use peacock_fuzz::{ 48 | backends::C::CGenerator, 49 | components::{ 50 | load_generator, 51 | seed_generator, 52 | PeacockGenerator, 53 | PeacockInput, 54 | PeacockMutator, 55 | }, 56 | grammar::ContextFreeGrammar, 57 | }; 58 | use std::{ 59 | path::{ 60 | Path, 61 | PathBuf, 62 | }, 63 | process::Command, 64 | time::Duration, 65 | }; 66 | 67 | const PRELOAD_ENV: &str = "PEACOCK_PRELOAD"; 68 | const CC_ENV: &str = "CC"; 69 | const MAP_SIZE_ENV: &str = "PEACOCK_MAP_SIZE"; 70 | 71 | const DEFAULT_MAP_SIZE: usize = 2_621_440; 72 | const DEFAULT_CC: &str = "cc"; 73 | 74 | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)] 75 | pub enum GrammarFormat { 76 | Peacock, 77 | Gramatron, 78 | } 79 | 80 | impl std::fmt::Display for GrammarFormat { 81 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 82 | match self { 83 | GrammarFormat::Peacock => write!(f, "peacock"), 84 | GrammarFormat::Gramatron => write!(f, "gramatron"), 85 | } 86 | } 87 | } 88 | 89 | #[derive(Parser, Debug)] 90 | #[command(author, version, about, long_about = None)] 91 | struct Args { 92 | #[arg(long, value_name = "CORES")] 93 | cores: String, 94 | 95 | #[arg(long, value_name = "GRAMMAR")] 96 | grammar: String, 97 | 98 | #[arg(short)] 99 | output: String, 100 | 101 | #[arg(long, default_value_t = GrammarFormat::Peacock)] 102 | format: GrammarFormat, 103 | 104 | #[arg(short, long)] 105 | entrypoint: Option, 106 | 107 | #[arg(short, long)] 108 | corpus: Option, 109 | 110 | #[arg(trailing_var_arg = true, allow_hyphen_values = true, required = true)] 111 | cmdline: Vec, 112 | } 113 | 114 | fn mkdir(dir: &str) { 115 | match std::fs::create_dir(dir) { 116 | Ok(()) => {}, 117 | Err(err) => { 118 | if err.kind() != std::io::ErrorKind::AlreadyExists { 119 | panic!("Could not create directory {}", dir); 120 | } 121 | }, 122 | } 123 | } 124 | 125 | /// Return true if a is newer than b 126 | fn is_newer, P2: AsRef>(a: P1, b: P2) -> bool { 127 | let a = std::fs::metadata(a).unwrap().modified().unwrap(); 128 | let b = std::fs::metadata(b).unwrap().modified().unwrap(); 129 | a > b 130 | } 131 | 132 | fn compile_source(output: &Path, input: &Path) { 133 | let cc = if let Ok(var) = std::env::var(CC_ENV) { var } else { DEFAULT_CC.to_string() }; 134 | 135 | let output = Command::new(cc) 136 | .args([ 137 | "-o", 138 | &output.to_string_lossy(), 139 | "-flto", 140 | "-s", 141 | "-fvisibility=hidden", 142 | "-DMAKE_VISIBLE", 143 | "-Ofast", 144 | "-march=native", 145 | "-fomit-frame-pointer", 146 | "-fno-stack-protector", 147 | "-fPIC", 148 | "-shared", 149 | &input.to_string_lossy(), 150 | "-nostdlib", 151 | ]) 152 | .output() 153 | .expect("Could not launch C compiler"); 154 | 155 | if !output.status.success() { 156 | panic!("Compiling grammar failed"); 157 | } 158 | } 159 | 160 | fn generate_source(args: &Args, c_file: &Path) { 161 | let mut cfg = ContextFreeGrammar::builder(); 162 | 163 | match &args.format { 164 | GrammarFormat::Peacock => cfg = cfg.peacock_grammar(&args.grammar).unwrap(), 165 | GrammarFormat::Gramatron => cfg = cfg.gramatron_grammar(&args.grammar).unwrap(), 166 | } 167 | 168 | if let Some(entrypoint) = &args.entrypoint { 169 | cfg = cfg.entrypoint(entrypoint); 170 | } 171 | 172 | let cfg = cfg.build().unwrap(); 173 | 174 | CGenerator::new().generate(c_file, &cfg); 175 | } 176 | 177 | fn load_grammar(args: &Args) { 178 | let generator_so = PathBuf::from(format!("{}/generator.so", &args.output)); 179 | let c_file = PathBuf::from(format!("{}/generator.c", &args.output)); 180 | 181 | mkdir(&args.output); 182 | if !generator_so.exists() || is_newer(&args.grammar, &generator_so) { 183 | println!("Compiling generator.so ..."); 184 | generate_source(args, &c_file); 185 | compile_source(&generator_so, &c_file); 186 | } 187 | 188 | load_generator(generator_so); 189 | } 190 | 191 | /* Harness */ 192 | fn fuzz(args: Args) -> Result<(), Error> { 193 | let mut map_size = if let Ok(value) = std::env::var(MAP_SIZE_ENV) { 194 | std::env::remove_var(MAP_SIZE_ENV); 195 | value.parse().expect("Invalid map size speficiation") 196 | } else { 197 | DEFAULT_MAP_SIZE 198 | }; 199 | 200 | if map_size % 64 != 0 { 201 | map_size = ((map_size + 63) >> 6) << 6; 202 | } 203 | 204 | let mut run_client = |state: Option<_>, mut mgr: LlmpRestartingEventManager<_, _, _>, core_id: CoreId| { 205 | let output_dir = Path::new(&args.output); 206 | let queue_dir = output_dir.join("queue"); 207 | let crashes_dir = output_dir.join("crashes"); 208 | let seed = current_nanos().rotate_left(core_id.0 as u32); 209 | let powerschedule = PowerSchedule::EXPLORE; 210 | let timeout = Duration::from_secs(10); 211 | let signal = str::parse::("SIGKILL").unwrap(); 212 | let debug_child = cfg!(debug_assertions); 213 | 214 | if let Ok(value) = std::env::var(PRELOAD_ENV) { 215 | std::env::set_var("LD_PRELOAD", value); 216 | std::env::remove_var(PRELOAD_ENV); 217 | } 218 | 219 | let mut shmem_provider = UnixShMemProvider::new()?; 220 | let mut shmem = shmem_provider.new_shmem(map_size)?; 221 | shmem.write_to_env("__AFL_SHM_ID")?; 222 | let shmem_buf = shmem.as_slice_mut(); 223 | std::env::set_var("AFL_MAP_SIZE", format!("{}", map_size)); 224 | 225 | let edges_observer = 226 | unsafe { HitcountsMapObserver::new(StdMapObserver::new("shared_mem", shmem_buf)).track_indices() }; 227 | 228 | let time_observer = TimeObserver::new("time"); 229 | 230 | let map_feedback = MaxMapFeedback::new(&edges_observer); 231 | 232 | let calibration = CalibrationStage::new(&map_feedback); 233 | 234 | let mut feedback = feedback_or!(map_feedback, TimeFeedback::new(&time_observer)); 235 | 236 | let mut objective = feedback_or!(CrashFeedback::new(), TimeoutFeedback::new()); 237 | 238 | seed_generator(seed as usize); 239 | 240 | let mut state = if let Some(state) = state { 241 | state 242 | } else { 243 | StdState::new( 244 | StdRand::with_seed(seed), 245 | CachedOnDiskCorpus::::new(&queue_dir, 128)?, 246 | OnDiskCorpus::new(crashes_dir)?, 247 | &mut feedback, 248 | &mut objective, 249 | )? 250 | }; 251 | 252 | let mutator = PeacockMutator::new(); 253 | 254 | let mutational = StdMutationalStage::with_max_iterations(mutator, 1); 255 | 256 | let scheduler = IndexesLenTimeMinimizerScheduler::new( 257 | &edges_observer, 258 | StdWeightedScheduler::with_schedule(&mut state, &edges_observer, Some(powerschedule)), 259 | ); 260 | 261 | let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); 262 | 263 | let mut executor = ForkserverExecutor::builder() 264 | .program(&args.cmdline[0]) 265 | .debug_child(debug_child) 266 | .parse_afl_cmdline(args.cmdline.get(1..).unwrap_or(&[])) 267 | .coverage_map_size(map_size) 268 | .is_persistent(false) 269 | .timeout(timeout) 270 | .kill_signal(signal) 271 | .build_dynamic_map(edges_observer, tuple_list!(time_observer))?; 272 | 273 | if state.corpus().count() == 0 { 274 | if let Some(corpus) = &args.corpus { 275 | state.load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[PathBuf::from(corpus)])?; 276 | } 277 | 278 | state.load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[queue_dir])?; 279 | 280 | if state.corpus().count() == 0 { 281 | let mut generator = PeacockGenerator::new(); 282 | state.generate_initial_inputs_forced(&mut fuzzer, &mut executor, &mut generator, &mut mgr, 16)?; 283 | } 284 | } 285 | 286 | let mut stages = tuple_list!(calibration, mutational); 287 | 288 | fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; 289 | Ok(()) 290 | }; 291 | 292 | let shmem_provider = UnixShMemProvider::new()?; 293 | 294 | #[cfg(not(debug_assertions))] 295 | let monitor = { 296 | let tui = TuiUI::new("peacock".to_string(), true); 297 | TuiMonitor::new(tui) 298 | }; 299 | 300 | #[cfg(debug_assertions)] 301 | let monitor = libafl::prelude::MultiMonitor::new(|s| println!("{}", s)); 302 | 303 | let cores = Cores::from_cmdline(&args.cores).expect("Invalid core specification"); 304 | 305 | match Launcher::builder() 306 | .shmem_provider(shmem_provider) 307 | .configuration(EventConfig::AlwaysUnique) 308 | .monitor(monitor) 309 | .run_client(&mut run_client) 310 | .cores(&cores) 311 | .build() 312 | .launch() 313 | { 314 | Err(Error::ShuttingDown) | Ok(()) => Ok(()), 315 | e => e, 316 | } 317 | } 318 | 319 | pub fn main() { 320 | let args = Args::parse(); 321 | load_grammar(&args); 322 | fuzz(args).expect("Could not launch fuzzer"); 323 | } 324 | -------------------------------------------------------------------------------- /src/bin/gen.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use peacock_fuzz::{ 3 | backends::interpreter::GrammarInterpreter, 4 | grammar::ContextFreeGrammar, 5 | }; 6 | use std::{ 7 | io::{ 8 | stdout, 9 | BufWriter, 10 | Write, 11 | }, 12 | time::SystemTime, 13 | }; 14 | 15 | pub mod fuzz; 16 | use fuzz::GrammarFormat; 17 | 18 | #[derive(Parser, Debug)] 19 | #[command(author, version, about, long_about = None)] 20 | struct Args { 21 | #[arg(short, long)] 22 | grammar: String, 23 | 24 | #[arg(long, default_value_t = GrammarFormat::Peacock)] 25 | format: GrammarFormat, 26 | 27 | #[arg(short, long)] 28 | entrypoint: Option, 29 | 30 | #[arg(long, default_value_t = false)] 31 | dont_optimize: bool, 32 | 33 | #[arg(long, short, default_value_t = String::from("1"))] 34 | count: String, 35 | 36 | #[arg(long, short)] 37 | seed: Option, 38 | } 39 | 40 | fn main() { 41 | let args = Args::parse(); 42 | 43 | let count = args.count.parse::().unwrap(); 44 | 45 | let mut builder = ContextFreeGrammar::builder(); 46 | 47 | match args.format { 48 | GrammarFormat::Peacock => builder = builder.peacock_grammar(args.grammar).unwrap(), 49 | GrammarFormat::Gramatron => builder = builder.gramatron_grammar(args.grammar).unwrap(), 50 | } 51 | 52 | if let Some(entrypoint) = args.entrypoint { 53 | builder = builder.entrypoint(entrypoint); 54 | } 55 | 56 | builder = builder.optimize(!args.dont_optimize); 57 | 58 | let cfg = builder.build().unwrap(); 59 | 60 | let mut stream = BufWriter::new(stdout()); 61 | let mut interpreter = GrammarInterpreter::new(&cfg); 62 | 63 | if let Some(seed) = args.seed { 64 | let seed = seed.parse::().unwrap(); 65 | interpreter.seed(seed); 66 | } else { 67 | let seed = 68 | SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs() ^ std::process::id() as u64; 69 | interpreter.seed(seed as usize); 70 | } 71 | 72 | for _ in 0..count { 73 | interpreter.interpret(&mut stream).unwrap(); 74 | writeln!(&mut stream).unwrap(); 75 | } 76 | 77 | stream.flush().unwrap(); 78 | } 79 | -------------------------------------------------------------------------------- /src/bin/merge.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | 3 | use peacock_fuzz::{ 4 | backends::json::JsonGenerator, 5 | grammar::ContextFreeGrammar, 6 | }; 7 | 8 | #[derive(Parser, Debug)] 9 | #[command(author, version, about, long_about = None)] 10 | struct Args { 11 | #[arg(short, long, required = true)] 12 | output: String, 13 | 14 | #[arg(long, required = false)] 15 | peacock_grammar: Vec, 16 | 17 | #[arg(long, required = false)] 18 | gramatron_grammar: Vec, 19 | 20 | #[arg(short, long)] 21 | entrypoint: Option, 22 | 23 | #[arg(long, default_value_t = false)] 24 | optimize: bool, 25 | } 26 | 27 | fn main() { 28 | let args = Args::parse(); 29 | 30 | if args.peacock_grammar.is_empty() && args.gramatron_grammar.is_empty() { 31 | panic!("You need to supply at least one grammar"); 32 | } 33 | 34 | let mut builder = ContextFreeGrammar::builder(); 35 | 36 | for path in &args.peacock_grammar { 37 | builder = builder.peacock_grammar(path).unwrap(); 38 | } 39 | 40 | for path in &args.gramatron_grammar { 41 | builder = builder.gramatron_grammar(path).unwrap(); 42 | } 43 | 44 | builder = builder.optimize(args.optimize); 45 | 46 | if let Some(entrypoint) = args.entrypoint { 47 | builder = builder.entrypoint(entrypoint); 48 | } 49 | 50 | let cfg = builder.build().unwrap(); 51 | JsonGenerator::new().generate(args.output, &cfg); 52 | } 53 | -------------------------------------------------------------------------------- /src/components/ffi.rs: -------------------------------------------------------------------------------- 1 | #[cfg(not(feature = "static-loading"))] 2 | use { 3 | std::ops::Deref, 4 | std::path::Path, 5 | }; 6 | 7 | type GrammarMutationFunc = unsafe extern "C" fn(buf: *mut usize, len: usize, capacity: usize) -> usize; 8 | type GrammarSerializationFunc = 9 | unsafe extern "C" fn(seq: *const usize, seq_len: usize, out: *mut u8, out_len: usize) -> usize; 10 | type GrammarSeedFunc = unsafe extern "C" fn(seed: usize); 11 | type GrammarUnparseFunc = 12 | unsafe extern "C" fn(seq: *mut usize, seq_capacity: usize, input: *const u8, input_len: usize) -> usize; 13 | 14 | #[allow(non_upper_case_globals)] 15 | static mut grammar_mutate: Option = None; 16 | #[allow(non_upper_case_globals)] 17 | static mut grammar_serialize: Option = None; 18 | #[allow(non_upper_case_globals)] 19 | static mut grammar_seed: Option = None; 20 | #[allow(non_upper_case_globals)] 21 | static mut grammar_unparse: Option = None; 22 | 23 | #[cfg(feature = "static-loading")] 24 | #[link(name = "generator")] 25 | extern "C" { 26 | fn mutate_sequence(buf: *mut usize, len: usize, capacity: usize) -> usize; 27 | fn serialize_sequence(seq: *const usize, seq_len: usize, out: *mut u8, out_len: usize) -> usize; 28 | fn seed_generator(seed: usize); 29 | fn unparse_sequence(seq: *mut usize, seq_capacity: usize, input: *const u8, input_len: usize) -> usize; 30 | } 31 | 32 | /// This function initializes the generator. Must be called before anything else. 33 | /// 34 | /// This is the __static__ version of this function, meaning that it expects you to link the generator 35 | /// functions statically into the binary. The generator must be an archive file called `libgenerator.a` 36 | /// otherwise symbol resolution will fail. 37 | #[cfg(feature = "static-loading")] 38 | pub fn load_generator() { 39 | unsafe { 40 | grammar_mutate = Some(mutate_sequence); 41 | grammar_serialize = Some(serialize_sequence); 42 | grammar_seed = Some(seed_generator); 43 | grammar_unparse = Some(unparse_sequence); 44 | } 45 | } 46 | 47 | #[cfg(not(feature = "static-loading"))] 48 | fn get_function(lib: &libloading::Library, name: &[u8]) -> T { 49 | let f: libloading::Symbol = unsafe { lib.get(name) }.expect("Could not find function in generator.so"); 50 | let f = f.deref(); 51 | *f 52 | } 53 | 54 | /// This function initializes the generator. Must be called before anything else. 55 | /// 56 | /// This is the __dynamic__ version of this function, which gets a path to a 57 | /// shared object as an argument and loads that via dlopen(). 58 | #[cfg(not(feature = "static-loading"))] 59 | pub fn load_generator>(path: P) { 60 | let path = path.as_ref(); 61 | 62 | unsafe { 63 | let lib = libloading::Library::new(path).expect("Could not load generator.so"); 64 | grammar_mutate = Some(get_function::(&lib, b"mutate_sequence")); 65 | grammar_serialize = Some(get_function::(&lib, b"serialize_sequence")); 66 | grammar_seed = Some(get_function::(&lib, b"seed_generator")); 67 | grammar_unparse = Some(get_function::(&lib, b"unparse_sequence")); 68 | std::mem::forget(lib); 69 | } 70 | } 71 | 72 | pub(crate) fn generator_mutate(sequence: &mut Vec) { 73 | let len = sequence.len(); 74 | let capacity = sequence.capacity(); 75 | let buf = sequence.as_mut_ptr(); 76 | 77 | let f = unsafe { grammar_mutate }.expect("load_generator() has not been called before fuzzing"); 78 | 79 | unsafe { 80 | let new_len = f(buf, len, capacity); 81 | sequence.set_len(new_len); 82 | } 83 | } 84 | 85 | pub(crate) fn generator_serialize(sequence: &[usize], out: *mut u8, out_len: usize) -> usize { 86 | let seq = sequence.as_ptr(); 87 | let seq_len = sequence.len(); 88 | 89 | let f = unsafe { grammar_serialize }.expect("load_generator() has not been called before fuzzing"); 90 | 91 | unsafe { f(seq, seq_len, out, out_len) } 92 | } 93 | 94 | /// Seed the RNG of the generator. 95 | pub fn generator_seed(seed: usize) { 96 | let f = unsafe { grammar_seed }.expect("load_generator() has not been called before generator_seed()"); 97 | 98 | unsafe { 99 | f(seed); 100 | } 101 | } 102 | 103 | pub(crate) fn generator_unparse(sequence: &mut Vec, input: &[u8]) -> bool { 104 | let seq = sequence.as_mut_ptr(); 105 | let seq_capacity = sequence.capacity(); 106 | let input_len = input.len(); 107 | let input = input.as_ptr(); 108 | 109 | let f = unsafe { grammar_unparse }.expect("load_generator() has not been called before fuzzing"); 110 | 111 | let new_len = unsafe { f(seq, seq_capacity, input, input_len) }; 112 | 113 | if new_len == 0 { 114 | return false; 115 | } 116 | 117 | unsafe { 118 | sequence.set_len(new_len); 119 | } 120 | 121 | true 122 | } 123 | -------------------------------------------------------------------------------- /src/components/generator.rs: -------------------------------------------------------------------------------- 1 | use crate::components::{ 2 | ffi::generator_mutate, 3 | PeacockInput, 4 | }; 5 | use libafl::prelude::{ 6 | Error, 7 | Generator, 8 | }; 9 | 10 | /// This component generates new inputs from scratch. 11 | pub struct PeacockGenerator; 12 | 13 | impl PeacockGenerator { 14 | /// Create a new generator. 15 | #[allow(clippy::new_without_default)] 16 | pub fn new() -> Self { 17 | Self {} 18 | } 19 | } 20 | 21 | impl Generator for PeacockGenerator { 22 | fn generate(&mut self, _state: &mut S) -> Result { 23 | let mut input = PeacockInput::default(); 24 | generator_mutate(input.sequence_mut()); 25 | Ok(input) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/components/input.rs: -------------------------------------------------------------------------------- 1 | use ahash::RandomState; 2 | use libafl::prelude::{ 3 | CorpusId, 4 | Error, 5 | HasTargetBytes, 6 | Input, 7 | }; 8 | use libafl_bolts::prelude::{ 9 | HasLen, 10 | OwnedSlice, 11 | }; 12 | use serde::{ 13 | Deserialize, 14 | Serialize, 15 | }; 16 | use std::{ 17 | fs::File, 18 | io::Read, 19 | path::Path, 20 | }; 21 | 22 | use crate::components::ffi::{ 23 | generator_serialize, 24 | generator_unparse, 25 | }; 26 | 27 | const BINARY_PREFIX: &str = "peacock-raw-"; 28 | static mut SERIALIZATION_BUFFER: [u8; 128 * 1024 * 1024] = [0; 128 * 1024 * 1024]; 29 | 30 | /// This component represents an Input during fuzzing. 31 | #[derive(Serialize, Deserialize, Debug, Hash)] 32 | pub struct PeacockInput { 33 | sequence: Vec, 34 | } 35 | 36 | impl PeacockInput { 37 | pub(crate) fn sequence(&self) -> &[usize] { 38 | &self.sequence 39 | } 40 | 41 | pub(crate) fn sequence_mut(&mut self) -> &mut Vec { 42 | &mut self.sequence 43 | } 44 | } 45 | 46 | impl Input for PeacockInput { 47 | fn generate_name(&self, _idx: Option) -> String { 48 | let hash = RandomState::with_seeds(0, 0, 0, 0).hash_one(self); 49 | format!("{}{:016x}", BINARY_PREFIX, hash) 50 | } 51 | 52 | fn from_file>(path: P) -> Result { 53 | let path = path.as_ref(); 54 | let mut file = File::open(path)?; 55 | let mut bytes: Vec = vec![]; 56 | file.read_to_end(&mut bytes)?; 57 | 58 | let is_raw = if let Some(file_name) = path.file_name().and_then(|x| x.to_str()) { 59 | file_name.starts_with(BINARY_PREFIX) 60 | } else { 61 | false 62 | }; 63 | 64 | if is_raw { 65 | Ok(postcard::from_bytes(&bytes)?) 66 | } else { 67 | let mut ret = Self::default(); 68 | 69 | if !generator_unparse(&mut ret.sequence, &bytes) { 70 | return Err(Error::serialize(format!("Could not unparse sequence from input file {}", path.display()))); 71 | } 72 | 73 | Ok(ret) 74 | } 75 | } 76 | } 77 | 78 | impl HasLen for PeacockInput { 79 | fn len(&self) -> usize { 80 | self.sequence.len() 81 | } 82 | } 83 | 84 | impl HasTargetBytes for PeacockInput { 85 | fn target_bytes(&self) -> OwnedSlice { 86 | let len = generator_serialize(&self.sequence, unsafe { SERIALIZATION_BUFFER.as_mut_ptr() }, unsafe { 87 | SERIALIZATION_BUFFER.len() 88 | }); 89 | 90 | unsafe { OwnedSlice::from_raw_parts(SERIALIZATION_BUFFER.as_ptr(), len) } 91 | } 92 | } 93 | 94 | impl Default for PeacockInput { 95 | fn default() -> Self { 96 | Self { 97 | sequence: Vec::with_capacity(4096 * 2), 98 | } 99 | } 100 | } 101 | 102 | impl Clone for PeacockInput { 103 | fn clone(&self) -> Self { 104 | let mut clone = Self::default(); 105 | clone.sequence.extend_from_slice(&self.sequence); 106 | clone 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/components/mod.rs: -------------------------------------------------------------------------------- 1 | //! This module contains LibAFL components that can interact with the generated code of the C backend. 2 | //! 3 | //! The components can interact with the code in two ways: 4 | //! - __dynamically__ (default): If the code is compiled into a shared object, it can be loaded via dlopen() 5 | //! in `load_generator("path/to/generator.so")`. 6 | //! - __statically__: The code can also be directly compiled into the fuzzer via a build script. 7 | //! If you plan to do this, activate the feature `static-loading` and call `load_generator()` without an argument. 8 | //! One caveat of this is that the generated code must be compiled into a static archive that is called `libgenerator.a`. 9 | //! This name is hardcoded into this library. 10 | //! 11 | //! Either way, it is mandatory that [`load_generator`] is called before fuzzing starts. 12 | //! 13 | //! ## Examples 14 | //! For an example of dynamic loading see the binary `peacock-fuzz` in `src/bin/fuzz.rs`. 15 | //! For an example of static loading see the fuzzer in `test-data/static_loading/src/main.rs`. 16 | 17 | pub(crate) mod ffi; 18 | mod generator; 19 | mod input; 20 | mod mutator; 21 | 22 | pub use ffi::{ 23 | generator_seed as seed_generator, 24 | load_generator, 25 | }; 26 | 27 | pub use generator::PeacockGenerator; 28 | pub use input::PeacockInput; 29 | pub use mutator::PeacockMutator; 30 | -------------------------------------------------------------------------------- /src/components/mutator.rs: -------------------------------------------------------------------------------- 1 | use libafl::prelude::{ 2 | Error, 3 | HasRand, 4 | MutationResult, 5 | Mutator, 6 | }; 7 | use libafl_bolts::prelude::{ 8 | Named, 9 | Rand, 10 | }; 11 | use std::borrow::Cow; 12 | 13 | use crate::components::{ 14 | ffi::generator_mutate, 15 | PeacockInput, 16 | }; 17 | 18 | /// This component implements grammar-based mutations. 19 | pub struct PeacockMutator; 20 | 21 | impl PeacockMutator { 22 | /// Create a new mutator. 23 | #[allow(clippy::new_without_default)] 24 | pub fn new() -> Self { 25 | Self {} 26 | } 27 | } 28 | 29 | impl Named for PeacockMutator { 30 | fn name(&self) -> &Cow<'static, str> { 31 | static NAME: Cow<'static, str> = Cow::Borrowed("PeacockMutator"); 32 | &NAME 33 | } 34 | } 35 | 36 | impl Mutator for PeacockMutator 37 | where 38 | S: HasRand, 39 | { 40 | fn mutate(&mut self, state: &mut S, input: &mut PeacockInput) -> Result { 41 | let len = state.rand_mut().below(input.sequence().len()); 42 | input.sequence_mut().truncate(len); 43 | generator_mutate(input.sequence_mut()); 44 | Ok(MutationResult::Mutated) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | //! This module contains various error types. 2 | 3 | use std::path::PathBuf; 4 | use thiserror::Error; 5 | 6 | /// A ParsingError means that the syntax or format of the provided grammar is invalid. 7 | #[derive(Debug, Error)] 8 | pub struct ParsingError { 9 | path: PathBuf, 10 | msg: String, 11 | } 12 | 13 | impl ParsingError { 14 | pub(crate) fn new, S: Into>(path: P, msg: S) -> Self { 15 | Self { 16 | path: path.into(), 17 | msg: msg.into(), 18 | } 19 | } 20 | } 21 | 22 | impl std::fmt::Display for ParsingError { 23 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 24 | write!(f, "ParsingError in {}: {}", self.path.display(), self.msg) 25 | } 26 | } 27 | 28 | /// A GrammarError represents an error with the content of a grammar. 29 | #[derive(Debug, Error)] 30 | pub enum GrammarError { 31 | /// The grammar does not contain rules to expand the entrypoint 32 | #[error("The grammar does not contain an explicit entrypoint: {0}")] 33 | MissingEntrypoint(String), 34 | 35 | /// The grammar is referencing a non-terminal that has no rules to expand. 36 | #[error("The non-terminal '{0}' is referenced but never defined")] 37 | MissingNonTerminal(String), 38 | } 39 | -------------------------------------------------------------------------------- /src/grammar/builder.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::HashSet, 3 | path::Path, 4 | }; 5 | 6 | use crate::{ 7 | error::{ 8 | GrammarError, 9 | ParsingError, 10 | }, 11 | grammar::{ 12 | ContextFreeGrammar, 13 | NonTerminal, 14 | ProductionRule, 15 | Symbol, 16 | }, 17 | parser::{ 18 | gramatron, 19 | peacock, 20 | }, 21 | }; 22 | 23 | /// The default non-terminal that is used as an entrypoint to the grammar 24 | pub const DEFAULT_ENTRYPOINT: &str = "ENTRYPOINT"; 25 | 26 | /// The GrammarBuilder loads grammars from disk and returns a unified [`ContextFreeGrammar`]. 27 | /// 28 | /// Use it like so: 29 | /// ``` 30 | /// // Load multiple grammars by joining their rules: 31 | /// let grammar = ContextFreeGrammar::builder() 32 | /// // Load a grammar in peacock format 33 | /// .peacock_grammar("my-grammar.json").unwrap() 34 | /// // Or a grammar in gramatron format 35 | /// .gramatron_grammar("my-old-grammar.json").unwrap() 36 | /// // Set the entrypoint 37 | /// .entrypoint("MY-ENTRYPOINT") 38 | /// .build().unwrap(); 39 | /// ``` 40 | pub struct GrammarBuilder { 41 | rules: Vec, 42 | optimize: bool, 43 | entrypoint: String, 44 | } 45 | 46 | impl GrammarBuilder { 47 | pub(crate) fn new() -> Self { 48 | Self { 49 | rules: Vec::new(), 50 | optimize: true, 51 | entrypoint: DEFAULT_ENTRYPOINT.to_string(), 52 | } 53 | } 54 | 55 | fn check_entrypoint(&self) -> bool { 56 | for rule in &self.rules { 57 | if rule.lhs().id() == self.entrypoint { 58 | return false; 59 | } 60 | } 61 | 62 | true 63 | } 64 | 65 | fn check_non_terminals(&self) -> Option { 66 | let mut defined_non_terms = HashSet::new(); 67 | 68 | for rule in &self.rules { 69 | defined_non_terms.insert(rule.lhs().id()); 70 | } 71 | 72 | for rule in &self.rules { 73 | for symbol in rule.rhs() { 74 | if let Symbol::NonTerminal(nonterm) = symbol { 75 | if !defined_non_terms.contains(nonterm.id()) { 76 | return Some(nonterm.id().to_string()); 77 | } 78 | } 79 | } 80 | } 81 | 82 | None 83 | } 84 | } 85 | 86 | impl GrammarBuilder { 87 | /// Load a grammar from disk that is in Peacock format. How the peacock format looks like is explained 88 | /// in the [README](https://github.com/z2-2z/peacock#how-to-write-grammars) of this project. 89 | pub fn peacock_grammar>(mut self, path: P) -> Result { 90 | let mut new_rules = peacock::parse_json(path.as_ref())?; 91 | self.rules.append(&mut new_rules); 92 | Ok(self) 93 | } 94 | 95 | /// Load a grammar from disk that is in [Gramatron](https://github.com/HexHive/Gramatron)'s format. 96 | pub fn gramatron_grammar>(mut self, path: P) -> Result { 97 | let mut new_rules = gramatron::parse_json(path.as_ref())?; 98 | self.rules.append(&mut new_rules); 99 | Ok(self) 100 | } 101 | 102 | /// Apply Gramatron-style optimizations to this grammar that enable better mutation quality. 103 | /// 104 | /// Default: `true` 105 | pub fn optimize(mut self, optimize: bool) -> Self { 106 | self.optimize = optimize; 107 | self 108 | } 109 | 110 | /// Set the entrypoint of all loaded grammars to be the given non-terminal `entrypoint`. 111 | /// 112 | /// Default: [`DEFAULT_ENTRYPOINT`] 113 | pub fn entrypoint>(mut self, entrypoint: S) -> Self { 114 | self.entrypoint = entrypoint.into(); 115 | self 116 | } 117 | 118 | /// Create a [`ContextFreeGrammar`]. 119 | pub fn build(self) -> Result { 120 | if self.check_entrypoint() { 121 | return Err(GrammarError::MissingEntrypoint(self.entrypoint)); 122 | } 123 | 124 | if let Some(nonterm) = self.check_non_terminals() { 125 | return Err(GrammarError::MissingNonTerminal(nonterm)); 126 | } 127 | 128 | let mut cfg = ContextFreeGrammar::new(self.rules, NonTerminal::new(self.entrypoint)); 129 | 130 | if self.optimize { 131 | cfg.concatenate_terminals(); 132 | cfg.remove_duplicate_rules(); 133 | cfg.remove_unit_rules(); 134 | cfg.remove_unused_rules(); 135 | 136 | if !cfg.is_in_gnf() { 137 | cfg.remove_mixed_rules(); 138 | cfg.break_rules(); 139 | cfg.convert_to_gnf(); 140 | cfg.remove_unused_rules(); 141 | } 142 | } 143 | 144 | if cfg.count_entrypoint_rules() > 1 { 145 | cfg.set_new_entrypoint(); 146 | } 147 | 148 | Ok(cfg) 149 | } 150 | } 151 | 152 | #[cfg(test)] 153 | mod tests { 154 | use super::*; 155 | 156 | #[test] 157 | #[should_panic] 158 | fn test_missing_refs() { 159 | ContextFreeGrammar::builder().peacock_grammar("test-data/grammars/invalid-refs.json").unwrap().build().unwrap(); 160 | } 161 | 162 | #[test] 163 | fn test_gramatron_grammar() { 164 | let cfg = ContextFreeGrammar::builder() 165 | .gramatron_grammar("test-data/grammars/gramatron.json") 166 | .unwrap() 167 | .build() 168 | .unwrap(); 169 | println!("{:#?}", cfg.rules()); 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /src/grammar/cfg.rs: -------------------------------------------------------------------------------- 1 | use ahash::RandomState; 2 | use petgraph::{ 3 | visit::Bfs, 4 | Graph, 5 | }; 6 | use std::collections::{ 7 | HashMap, 8 | HashSet, 9 | }; 10 | 11 | use crate::grammar::builder::GrammarBuilder; 12 | 13 | /// This type represents a [non-terminal](https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols#Nonterminal_symbols) in a context-free grammar. 14 | #[derive(Debug, Clone, Eq, Hash, PartialEq)] 15 | pub struct NonTerminal(String); 16 | 17 | impl NonTerminal { 18 | pub(crate) fn new>(s: S) -> Self { 19 | Self(s.into()) 20 | } 21 | 22 | /// The id of a non-terminal is its name from the grammar files. 23 | pub fn id(&self) -> &str { 24 | &self.0 25 | } 26 | } 27 | 28 | /// This type represents a [terminal](https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols#Terminal_symbols) in a context-free grammar. 29 | #[derive(Debug, Clone, Eq, Hash, PartialEq)] 30 | pub struct Terminal(String); 31 | 32 | impl Terminal { 33 | pub(crate) fn new>(s: S) -> Self { 34 | Self(s.into()) 35 | } 36 | 37 | /// The data of the terminal. 38 | pub fn content(&self) -> &str { 39 | &self.0 40 | } 41 | } 42 | 43 | /// A Symbol is either a terminal or a non-terminal and can be found on the right-hand-side 44 | /// of a production rule. 45 | #[derive(Debug, Clone, Eq, Hash, PartialEq)] 46 | pub enum Symbol { 47 | /// A terminal is a chunk of data that will be inserted into the output / parsed from the input. 48 | Terminal(Terminal), 49 | 50 | /// A non-terminal is a placeholder that will be replaced by other symbols. 51 | NonTerminal(NonTerminal), 52 | } 53 | 54 | impl Symbol { 55 | /// Return whether the Symbol is a terminal 56 | #[inline] 57 | pub fn is_terminal(&self) -> bool { 58 | matches!(self, Symbol::Terminal(_)) 59 | } 60 | 61 | /// Return whether the Symbol is a non-terminal 62 | #[inline] 63 | pub fn is_non_terminal(&self) -> bool { 64 | matches!(self, Symbol::NonTerminal(_)) 65 | } 66 | } 67 | 68 | /// A ProductionRule states how to expand a non-terminal. 69 | /// 70 | /// The left-hand-side (lhs) of a rule is the non-terminal to expand. 71 | /// The right-hand-side (rhs) of a rule is the sequence of Symbols that are replacing the lhs. 72 | /// 73 | /// Please note that if a grammar has multiple ways to expand a non-terminal like so: 74 | /// ```json 75 | /// { 76 | /// "": [ 77 | /// ["'a'"], 78 | /// ["'b'"], 79 | /// ] 80 | /// } 81 | /// ``` 82 | /// then multiple `ProductionRules` will be generated, one for each variant. 83 | #[derive(Debug, Clone, Eq, Hash, PartialEq)] 84 | pub struct ProductionRule { 85 | lhs: NonTerminal, 86 | rhs: Vec, 87 | } 88 | 89 | impl ProductionRule { 90 | pub(crate) fn new(lhs: NonTerminal, rhs: Vec) -> Self { 91 | Self { 92 | lhs, 93 | rhs, 94 | } 95 | } 96 | 97 | /// The left-hand-side of a production rule or the non-terminal that is to be expanded. 98 | pub fn lhs(&self) -> &NonTerminal { 99 | &self.lhs 100 | } 101 | 102 | /// The right-hand-side of a production rule or the sequence of Symbols that are replacing the left-hand-side. 103 | pub fn rhs(&self) -> &[Symbol] { 104 | &self.rhs 105 | } 106 | 107 | pub(crate) fn fixed_hash(&self) -> u64 { 108 | RandomState::with_seeds(0, 0, 0, 0).hash_one(self) 109 | } 110 | } 111 | 112 | fn is_mixed(rhs: &[Symbol]) -> bool { 113 | let mut terms = false; 114 | let mut non_terms = false; 115 | 116 | for symbol in rhs { 117 | terms |= symbol.is_terminal(); 118 | non_terms |= symbol.is_non_terminal(); 119 | } 120 | 121 | terms & non_terms 122 | } 123 | 124 | fn is_only_non_terminals(rhs: &[Symbol]) -> bool { 125 | for symbol in rhs { 126 | if symbol.is_terminal() { 127 | return false; 128 | } 129 | } 130 | 131 | true 132 | } 133 | 134 | /// A ContextFreeGrammar is a set of production rules that describe how to construct an input. 135 | /// 136 | /// Use the [`builder()`](ContextFreeGrammar::builder) method to actually create this struct. 137 | pub struct ContextFreeGrammar { 138 | rules: Vec, 139 | entrypoint: NonTerminal, 140 | } 141 | 142 | impl ContextFreeGrammar { 143 | /// Build a ContextFreeGrammar. 144 | pub fn builder() -> GrammarBuilder { 145 | GrammarBuilder::new() 146 | } 147 | 148 | /// Access the production rules of this grammar. 149 | pub fn rules(&self) -> &[ProductionRule] { 150 | &self.rules 151 | } 152 | 153 | /// Access the entrypoint non-terminal of this grammar. 154 | pub fn entrypoint(&self) -> &NonTerminal { 155 | &self.entrypoint 156 | } 157 | 158 | pub(crate) fn new(rules: Vec, entrypoint: NonTerminal) -> Self { 159 | Self { 160 | rules, 161 | entrypoint, 162 | } 163 | } 164 | } 165 | 166 | impl ContextFreeGrammar { 167 | pub(crate) fn concatenate_terminals(&mut self) { 168 | for rule in &mut self.rules { 169 | let mut i = 0; 170 | 171 | while i + 1 < rule.rhs.len() { 172 | if rule.rhs[i].is_terminal() && rule.rhs[i + 1].is_terminal() { 173 | let Symbol::Terminal(second) = rule.rhs.remove(i + 1) else { unreachable!() }; 174 | let Symbol::Terminal(first) = &mut rule.rhs[i] else { unreachable!() }; 175 | first.0.push_str(second.content()); 176 | } else { 177 | i += 1; 178 | } 179 | } 180 | } 181 | } 182 | 183 | pub(crate) fn remove_duplicate_rules(&mut self) { 184 | let mut hashes = HashSet::with_capacity(self.rules.len()); 185 | let mut i = 0; 186 | 187 | while i < self.rules.len() { 188 | let hash = self.rules[i].fixed_hash(); 189 | 190 | if !hashes.insert(hash) { 191 | self.rules.remove(i); 192 | } else { 193 | i += 1; 194 | } 195 | } 196 | } 197 | 198 | pub(crate) fn remove_unused_rules(&mut self) { 199 | let mut graph = Graph::<&str, ()>::new(); 200 | let mut nodes = HashMap::new(); 201 | 202 | /* Construct directed graph of non-terminals */ 203 | for rule in &self.rules { 204 | let src = rule.lhs().id(); 205 | let src = *nodes.entry(src).or_insert_with(|| graph.add_node(src)); 206 | 207 | for symbol in rule.rhs() { 208 | if let Symbol::NonTerminal(nonterm) = symbol { 209 | let dst = nonterm.id(); 210 | let dst = *nodes.entry(dst).or_insert_with(|| graph.add_node(dst)); 211 | 212 | graph.add_edge(src, dst, ()); 213 | } 214 | } 215 | } 216 | 217 | /* Do a BFS from entrypoint */ 218 | let entrypoint = *nodes.get(self.entrypoint.id()).unwrap(); 219 | let mut bfs = Bfs::new(&graph, entrypoint); 220 | 221 | while let Some(idx) = bfs.next(&graph) { 222 | let id = graph.node_weight(idx).unwrap(); 223 | nodes.remove(id); 224 | } 225 | 226 | /* Now `nodes` contains all the non-terminals that are never used */ 227 | let nodes: HashSet = nodes.into_keys().map(|x| x.to_string()).collect(); 228 | let mut i = 0; 229 | 230 | while i < self.rules.len() { 231 | let lhs = self.rules[i].lhs().id(); 232 | 233 | if nodes.contains(lhs) { 234 | self.rules.remove(i); 235 | } else { 236 | i += 1; 237 | } 238 | } 239 | } 240 | 241 | pub(crate) fn remove_unit_rules(&mut self) { 242 | let mut i = 0; 243 | 244 | while i < self.rules.len() { 245 | let rule = &self.rules[i]; 246 | 247 | if rule.rhs().len() == 1 && rule.rhs()[0].is_non_terminal() { 248 | let old_rule = self.rules.remove(i); 249 | let Symbol::NonTerminal(to_expand) = &old_rule.rhs[0] else { unreachable!() }; 250 | let mut new_rules = Vec::new(); 251 | 252 | for other_rule in &self.rules { 253 | if to_expand.id() == other_rule.lhs().id() { 254 | new_rules.push(ProductionRule::new(old_rule.lhs().clone(), other_rule.rhs.clone())); 255 | } 256 | } 257 | 258 | self.rules.append(&mut new_rules); 259 | } else { 260 | i += 1; 261 | } 262 | } 263 | } 264 | 265 | pub(crate) fn remove_mixed_rules(&mut self) { 266 | let mut terms = HashMap::new(); 267 | 268 | for rule in &mut self.rules { 269 | if is_mixed(rule.rhs()) { 270 | for j in 0..rule.rhs().len() { 271 | if let Symbol::Terminal(term) = &rule.rhs()[j] { 272 | let non_term = terms 273 | .entry(term.clone()) 274 | .or_insert_with(|| NonTerminal(format!("(term:{})", term.content()))) 275 | .clone(); 276 | rule.rhs[j] = Symbol::NonTerminal(non_term); 277 | } 278 | } 279 | } 280 | } 281 | 282 | for (term, nonterm) in terms { 283 | self.rules.push(ProductionRule::new(nonterm, vec![Symbol::Terminal(term)])); 284 | } 285 | } 286 | 287 | pub(crate) fn break_rules(&mut self) { 288 | let mut nonterm_cursor = 0; 289 | let mut i = 0; 290 | 291 | while i < self.rules.len() { 292 | let rule = &mut self.rules[i]; 293 | 294 | if rule.rhs().len() > 2 && is_only_non_terminals(rule.rhs()) { 295 | let len = rule.rhs().len() - 1; 296 | let symbols: Vec = rule.rhs.drain(0..len).collect(); 297 | 298 | let nonterm = NonTerminal(format!("(break_rules:{})", nonterm_cursor)); 299 | nonterm_cursor += 1; 300 | 301 | rule.rhs.insert(0, Symbol::NonTerminal(nonterm.clone())); 302 | 303 | self.rules.push(ProductionRule::new(nonterm, symbols)); 304 | } 305 | 306 | i += 1; 307 | } 308 | } 309 | 310 | pub(crate) fn convert_to_gnf(&mut self) { 311 | let mut i = 0; 312 | 313 | while i < self.rules.len() { 314 | if self.rules[i].rhs()[0].is_non_terminal() { 315 | let mut new_rules = Vec::new(); 316 | let mut old_rule = self.rules.remove(i); 317 | let Symbol::NonTerminal(nonterm) = old_rule.rhs.remove(0) else { unreachable!() }; 318 | 319 | for other_rule in &self.rules { 320 | if other_rule.lhs().id() == nonterm.id() { 321 | let mut new_symbols = other_rule.rhs.clone(); 322 | new_symbols.extend_from_slice(old_rule.rhs()); 323 | new_rules.push(ProductionRule::new(old_rule.lhs().clone(), new_symbols)); 324 | } 325 | } 326 | 327 | self.rules.append(&mut new_rules); 328 | } else { 329 | i += 1; 330 | } 331 | } 332 | } 333 | 334 | pub(crate) fn set_new_entrypoint(&mut self) { 335 | let nonterm = NonTerminal("(real_entrypoint)".to_string()); 336 | 337 | self.rules.push(ProductionRule::new(nonterm.clone(), vec![Symbol::NonTerminal(self.entrypoint.clone())])); 338 | 339 | self.entrypoint = nonterm; 340 | } 341 | 342 | pub(crate) fn count_entrypoint_rules(&self) -> usize { 343 | let mut count = 0; 344 | 345 | for rule in &self.rules { 346 | if rule.lhs().id() == self.entrypoint.id() { 347 | count += 1; 348 | } 349 | } 350 | 351 | count 352 | } 353 | 354 | pub(crate) fn is_in_gnf(&self) -> bool { 355 | for rule in &self.rules { 356 | let rhs = rule.rhs(); 357 | 358 | if rhs[0].is_non_terminal() { 359 | return false; 360 | } 361 | 362 | if let Some(symbols) = rhs.get(1..) { 363 | for symbol in symbols { 364 | if symbol.is_terminal() { 365 | return false; 366 | } 367 | } 368 | } 369 | } 370 | 371 | true 372 | } 373 | } 374 | 375 | #[cfg(test)] 376 | mod tests { 377 | use super::*; 378 | 379 | #[test] 380 | fn test_unused_rules() { 381 | let cfg = ContextFreeGrammar::builder() 382 | .peacock_grammar("test-data/grammars/unused_rules.json") 383 | .unwrap() 384 | .build() 385 | .unwrap(); 386 | 387 | println!("{:#?}", cfg.rules()); 388 | } 389 | 390 | #[test] 391 | fn test_duplicate_rules() { 392 | let cfg = ContextFreeGrammar::builder() 393 | .peacock_grammar("test-data/grammars/duplicate_rules.json") 394 | .unwrap() 395 | .build() 396 | .unwrap(); 397 | 398 | println!("{:#?}", cfg.rules()); 399 | } 400 | 401 | #[test] 402 | fn test_unit_rules() { 403 | let cfg = ContextFreeGrammar::builder() 404 | .peacock_grammar("test-data/grammars/unit_rules.json") 405 | .unwrap() 406 | .build() 407 | .unwrap(); 408 | 409 | println!("{:#?}", cfg.rules()); 410 | } 411 | 412 | #[test] 413 | #[should_panic] 414 | fn test_recursion() { 415 | let cfg = ContextFreeGrammar::builder() 416 | .peacock_grammar("test-data/grammars/recursion.json") 417 | .unwrap() 418 | .build() 419 | .unwrap(); 420 | 421 | println!("{:#?}", cfg.rules()); 422 | } 423 | 424 | #[test] 425 | #[ignore] 426 | fn test_mixed_rules() { 427 | let cfg = ContextFreeGrammar::builder() 428 | .peacock_grammar("test-data/grammars/mixed_rules.json") 429 | .unwrap() 430 | .build() 431 | .unwrap(); 432 | 433 | println!("{:#?}", cfg.rules()); 434 | } 435 | } 436 | -------------------------------------------------------------------------------- /src/grammar/mod.rs: -------------------------------------------------------------------------------- 1 | //! This is the frontend that loads grammars. 2 | //! 3 | //! Use it like so: 4 | //! ``` 5 | //! // Load multiple grammars by joining their rules: 6 | //! let grammar = ContextFreeGrammar::builder() 7 | //! // Load a grammar in peacock format 8 | //! .peacock_grammar("my-grammar.json").unwrap() 9 | //! // Or a grammar in gramatron format 10 | //! .gramatron_grammar("my-old-grammar.json").unwrap() 11 | //! // Set the entrypoint 12 | //! .entrypoint("MY-ENTRYPOINT") 13 | //! .build().unwrap(); 14 | //! ``` 15 | //! You can inspect the grammar contents like this: 16 | //! ``` 17 | //! // Since a grammar is nothing but a set of rules, traverse the rules 18 | //! for rule in grammar.rules() { 19 | //! // The left-hand-side (lhs) of a rule is a single non-terminal 20 | //! println!("lhs = {:?}", rule.lhs()); 21 | //! 22 | //! // The right-hand-side (rhs) of a rule is a sequence of terminals and non-terminals. 23 | //! // This is captured in the enum "Symbol". 24 | //! for symbol in rule.rhs() { 25 | //! match symbol { 26 | //! Symbol::Terminal(terminal) => println!("terminal: {}", terminal.content()), 27 | //! Symbol::NonTerminal(nonterminal) => println!("non-terminal {}", nonterminal.id()), 28 | //! } 29 | //! } 30 | //! } 31 | //! ``` 32 | 33 | mod builder; 34 | mod cfg; 35 | 36 | pub use builder::*; 37 | pub use cfg::*; 38 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This library contains everything you need to setup a grammar-based fuzzer. 2 | //! 3 | //! It consists of 4 | //! - __frontend__: Load grammars of different formats. Currently, the Gramatron and Peacock format are supported. 5 | //! - __backend__: Use the loaded grammar to do whatever you want. 6 | //! Current backends are 7 | //! - `C`: Generate a grammar-based mutator in C 8 | //! - `json`: Convert loaded grammar(s) into peacock format 9 | //! - `interpreter`: Generate inputs by interpreting a grammar 10 | //! 11 | //! but you can easily write your own. 12 | //! - __runtime__: LibAFL components that you can use in your fuzzer to realize grammar-based mutations. 13 | //! 14 | //! ## Grammars 15 | //! This library supports grammar files in two formats: 16 | //! 1. [Gramatron](https://github.com/HexHive/Gramatron) format for backwards compatibility 17 | //! 2. Its own "peacock format", which is documented in the [README](https://github.com/z2-2z/peacock#how-to-write-grammars) of this project 18 | //! 19 | //! ## Getting Started 20 | //! The first step always is to load grammars. To do this use the [`ContextFreeGrammar::builder()`](grammar::ContextFreeGrammar::builder) method 21 | //! that will give you access to a [`GrammarBuilder`](grammar::GrammarBuilder) like this: 22 | //! ``` 23 | //! // Load multiple grammars by joining their rules: 24 | //! let grammar = ContextFreeGrammar::builder() 25 | //! // Load a grammar in peacock format 26 | //! .peacock_grammar("my-grammar.json").unwrap() 27 | //! // Or a grammar in gramatron format 28 | //! .gramatron_grammar("my-old-grammar.json").unwrap() 29 | //! // Set the entrypoint 30 | //! .entrypoint("MY-ENTRYPOINT") 31 | //! .build().unwrap(); 32 | //! ``` 33 | //! Then, you can plug the grammar into one of the provided backends: 34 | //! ``` 35 | //! backends::C::CGenerator::new().generate("output-file.c", &grammar); 36 | //! // or 37 | //! backends::json::JsonGenerator::new().generate("output-file.json", &grammar); 38 | //! // or 39 | //! backends::interpreter::GrammarInterpreter::new(&grammar).interpret(&mut io::stdout()); 40 | //! ``` 41 | //! And that's it. 42 | //! 43 | //! ## Feature Flags 44 | //! - `components`: Include LibAFL components in this library. On by default. 45 | //! - `static-loading`: Activate this if you want to compile the generated C code into the fuzzer. For more details see the 46 | //! documentation of the `components`. 47 | //! - `debug-codegen`: This affects the C backend and inserts calls to printf() at the beginning of every generated function to 48 | //! help troubleshooting. 49 | 50 | #![deny(missing_docs)] 51 | 52 | pub(crate) mod parser; 53 | 54 | pub mod backends; 55 | pub mod error; 56 | pub mod grammar; 57 | 58 | #[cfg(feature = "components")] 59 | pub mod components; 60 | -------------------------------------------------------------------------------- /src/parser/gramatron.rs: -------------------------------------------------------------------------------- 1 | use serde_json as json; 2 | use std::{ 3 | fs::File, 4 | io::BufReader, 5 | path::Path, 6 | }; 7 | 8 | use crate::{ 9 | error::ParsingError, 10 | grammar::{ 11 | NonTerminal, 12 | ProductionRule, 13 | Symbol, 14 | Terminal, 15 | }, 16 | }; 17 | 18 | #[inline] 19 | fn is_whitespace(c: u8) -> bool { 20 | matches!(c, b' ' | b'\t' | b'\n' | b'\r' | 12 | 11) 21 | } 22 | 23 | fn parse_until bool>(buf: &[u8], mut delim: F) -> &[u8] { 24 | let mut cursor = 0; 25 | 26 | while cursor < buf.len() { 27 | if delim(buf[cursor]) { 28 | break; 29 | } else { 30 | cursor += 1; 31 | } 32 | } 33 | 34 | &buf[..cursor] 35 | } 36 | 37 | fn parse_grammar(value: json::Value) -> Result, String> { 38 | let mut rules = Vec::new(); 39 | 40 | let object = match value { 41 | json::Value::Object(object) => object, 42 | _ => return Err("Gramatron grammar must be specified as an object".to_string()), 43 | }; 44 | 45 | for (key, value) in &object { 46 | let rhs = match value { 47 | json::Value::Array(rhs) => rhs, 48 | _ => return Err(format!("Right-hand-side of '{}' must be an array", key)), 49 | }; 50 | 51 | if rhs.is_empty() { 52 | return Err(format!("Invalid production rule '{}': Must not be empty", key)); 53 | } 54 | 55 | for rule in rhs { 56 | let rule = match rule.as_str() { 57 | Some(rule) => rule, 58 | _ => return Err(format!("Right-hand-side of '{}' must be an array of strings", key)), 59 | }; 60 | let mut symbols = Vec::new(); 61 | let rule = rule.as_bytes(); 62 | let mut cursor = 0; 63 | 64 | while cursor < rule.len() { 65 | match &rule[cursor] { 66 | b'\'' => { 67 | cursor += 1; 68 | let content = parse_until(&rule[cursor..], |x| x == b'\''); 69 | cursor += content.len() + 1; 70 | let content = String::from_utf8(content.to_vec()).unwrap(); 71 | symbols.push(Symbol::Terminal(Terminal::new(content))); 72 | }, 73 | b'"' => { 74 | cursor += 1; 75 | let content = parse_until(&rule[cursor..], |x| x == b'"'); 76 | cursor += content.len() + 1; 77 | let content = String::from_utf8(content.to_vec()).unwrap(); 78 | symbols.push(Symbol::Terminal(Terminal::new(content))); 79 | }, 80 | c => { 81 | if is_whitespace(*c) { 82 | cursor += 1; 83 | } else { 84 | let content = parse_until(&rule[cursor..], |x| is_whitespace(x) || x == b'"' || x == b'\''); 85 | cursor += content.len(); 86 | let content = String::from_utf8(content.to_vec()).unwrap(); 87 | symbols.push(Symbol::NonTerminal(NonTerminal::new(content))); 88 | } 89 | }, 90 | } 91 | } 92 | 93 | if symbols.is_empty() { 94 | return Err(format!("Right-hand-side of '{}' must not contain a string with no tokens", key)); 95 | } 96 | 97 | rules.push(ProductionRule::new(NonTerminal::new(key.clone()), symbols)); 98 | } 99 | } 100 | 101 | Ok(rules) 102 | } 103 | 104 | pub fn parse_json(path: &Path) -> Result, ParsingError> { 105 | let file = File::open(path).unwrap(); 106 | let reader = BufReader::new(file); 107 | 108 | let value: json::Value = match json::from_reader(reader) { 109 | Ok(value) => value, 110 | Err(_) => { 111 | return Err(ParsingError::new(path, "Invalid JSON syntax")); 112 | }, 113 | }; 114 | 115 | parse_grammar(value).map_err(|e| ParsingError::new(path, e)) 116 | } 117 | -------------------------------------------------------------------------------- /src/parser/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod gramatron; 2 | pub mod peacock; 3 | -------------------------------------------------------------------------------- /src/parser/peacock.rs: -------------------------------------------------------------------------------- 1 | use json_comments::{ 2 | CommentSettings, 3 | StripComments, 4 | }; 5 | use serde_json as json; 6 | use std::{ 7 | fs::File, 8 | io::BufReader, 9 | path::Path, 10 | }; 11 | 12 | use crate::{ 13 | error::ParsingError, 14 | grammar::{ 15 | NonTerminal, 16 | ProductionRule, 17 | Symbol, 18 | Terminal, 19 | }, 20 | }; 21 | 22 | fn parse_non_terminal(keyword: &str) -> Option<&str> { 23 | if keyword.len() > 2 && keyword.starts_with('<') && keyword.ends_with('>') { 24 | Some(&keyword[1..keyword.len() - 1]) 25 | } else { 26 | None 27 | } 28 | } 29 | 30 | fn parse_terminal(keyword: &str) -> &str { 31 | if keyword.len() >= 2 && keyword.starts_with('\'') && keyword.ends_with('\'') { 32 | &keyword[1..keyword.len() - 1] 33 | } else { 34 | keyword 35 | } 36 | } 37 | 38 | fn parse_grammar(value: json::Value) -> Result, String> { 39 | let mut rules = Vec::new(); 40 | 41 | let object = match value { 42 | json::Value::Object(object) => object, 43 | _ => return Err("Peacock grammar must be specified as an object".to_string()), 44 | }; 45 | 46 | for (key, value) in &object { 47 | // LHS must be a non-terminal 48 | let lhs = match parse_non_terminal(key) { 49 | Some(lhs) => lhs, 50 | None => return Err(format!("'{}' is not a valid non-terminal", key)), 51 | }; 52 | 53 | // RHS must be an array of an array of strings that are either terminals or non-terminals 54 | let rhs = match value { 55 | json::Value::Array(rhs) => rhs, 56 | _ => return Err(format!("Right-hand-side of '{}' must be an array", key)), 57 | }; 58 | 59 | if rhs.is_empty() { 60 | return Err(format!("Invalid production rule '{}': Must not be empty", key)); 61 | } 62 | 63 | for rule in rhs { 64 | let tokens = match rule { 65 | json::Value::Array(tokens) => tokens, 66 | _ => return Err(format!("Right-hand-side of '{}' must be an array of arrays", key)), 67 | }; 68 | 69 | if tokens.is_empty() { 70 | return Err(format!("Invalid production rule '{}': One of its variants is empty", key)); 71 | } 72 | 73 | let mut symbols = Vec::new(); 74 | 75 | for token in tokens { 76 | let token = match token.as_str() { 77 | Some(token) => token, 78 | _ => return Err(format!("Right-hand-side of '{}' must be an array of arrays of strings", key)), 79 | }; 80 | 81 | if let Some(nonterm) = parse_non_terminal(token) { 82 | symbols.push(Symbol::NonTerminal(NonTerminal::new(nonterm))); 83 | } else { 84 | let term = parse_terminal(token); 85 | symbols.push(Symbol::Terminal(Terminal::new(term))); 86 | } 87 | } 88 | 89 | rules.push(ProductionRule::new(NonTerminal::new(lhs), symbols)); 90 | } 91 | } 92 | 93 | Ok(rules) 94 | } 95 | 96 | pub fn parse_json(path: &Path) -> Result, ParsingError> { 97 | let file = File::open(path).unwrap(); 98 | let reader = BufReader::new(file); 99 | let reader = StripComments::with_settings(CommentSettings::c_style(), reader); 100 | 101 | let value: json::Value = match json::from_reader(reader) { 102 | Ok(value) => value, 103 | Err(_) => { 104 | return Err(ParsingError::new(path, "Invalid JSON syntax")); 105 | }, 106 | }; 107 | 108 | parse_grammar(value).map_err(|e| ParsingError::new(path, e)) 109 | } 110 | 111 | #[cfg(test)] 112 | mod tests { 113 | use super::*; 114 | 115 | #[test] 116 | fn test_peacock() { 117 | println!("{:#?}", parse_json(Path::new("test-data/grammars/test-peacock.json")).unwrap()); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /template.c: -------------------------------------------------------------------------------- 1 | #error "This is just for brainstorming" 2 | 3 | typedef struct { 4 | size_t* buf; 5 | size_t len; 6 | size_t capacity; 7 | } Sequence; 8 | 9 | static int generate_seq_SNGLE (Sequence* seq, size_t* step) { 10 | size_t idx = seq->len; 11 | 12 | if (*step >= idx) { 13 | if (idx >= seq->capacity) { 14 | return 0; 15 | } 16 | 17 | seq->buf[idx] = 0; 18 | seq->len = idx + 1; 19 | } 20 | 21 | *step += 1; 22 | 23 | // code inside case 24 | 25 | return 1; 26 | } 27 | 28 | static int generate_seq_ENTRYPOINT (Sequence* seq, size_t* step) { 29 | size_t idx = seq->len; 30 | size_t target; 31 | 32 | if (*step < idx) { 33 | target = seq->buf[*step]; 34 | } else { 35 | if (idx >= seq->capacity) { 36 | return 0; 37 | } 38 | 39 | target = rand() % 2; 40 | seq->buf[idx] = target; 41 | seq->len = idx + 1; 42 | } 43 | 44 | *step += 1; 45 | 46 | switch (target) { 47 | case 0: { 48 | if (!generate_seq_A(seq, step)) { 49 | return 0; 50 | } 51 | 52 | // repeat for all other non-terminals in rule 53 | 54 | break; 55 | } 56 | 57 | case 1: { 58 | // no non-terminals to explore 59 | 60 | break; 61 | } 62 | 63 | default: { 64 | __builtin_unreachable(); 65 | }, 66 | } 67 | 68 | return 1; 69 | } 70 | 71 | // In rust: Vec 72 | size_t generate_sequence (void* buf, size_t len, size_t capacity) { 73 | if (UNLIKELY(!buf || !capacity)) { 74 | return 0; 75 | } 76 | 77 | Sequence seq = { 78 | .buf = (size_t*) buf, 79 | .len = len, 80 | .capacity = capacity, 81 | }; 82 | size_t step = 0; 83 | 84 | generate_seq_ENTRYPOINT(&seq, &step); 85 | 86 | return seq.len; 87 | } 88 | 89 | static const unsigned char term0[] = {...}; 90 | 91 | static size_t serialize_seq_ENTRYPOINT (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len, size_t* step) { 92 | if (UNLIKELY(*step >= seq_len)) { 93 | return 0; 94 | } 95 | 96 | unsigned char* original_out = out; 97 | size_t target = seq[*step]; 98 | *step += 1; 99 | 100 | switch (target) { 101 | case 0: { 102 | // non-terminal 103 | size_t len = serialize_seq_NONTERM(seq, seq_len, out, out_len, step); 104 | out += len; out_len -= len; 105 | 106 | // terminal 107 | if (UNLIKELY(out_len < sizeof(term0))) { 108 | goto end; 109 | } 110 | __builtin_memcpy_inline(out, term0, sizeof(term0)); 111 | out += sizeof(term0); out_len -= sizeof(term0); 112 | //TODO: optimize for 1, 2, 4, 8 113 | 114 | break; 115 | } 116 | 117 | default: { 118 | __builtin_unreachable(); 119 | } 120 | } 121 | 122 | end: 123 | return (size_t) (out - original_out); 124 | } 125 | 126 | size_t serialize_sequence (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len) { 127 | if (UNLIKELY(!seq || !seq_len || !out || !out_len)) { 128 | return 0; 129 | } 130 | 131 | size_t step = 0; 132 | 133 | return serialize_seq_ENTRYPOINT(seq, seq_len, out, out_len, &step); 134 | } 135 | 136 | 137 | static int unparse_sequence_nontermXYZ (Sequence* seq, unsigned char* input, size_t input_len, size_t* cursor) { 138 | size_t seq_idx = seq->len; 139 | 140 | if (UNLIKELY(seq_idx >= seq->capacity)) { 141 | return 0; 142 | } 143 | 144 | size_t target_cursor = 0; 145 | size_t target_id = (size_t) -1LL; 146 | size_t target_seq_len = seq_idx; 147 | 148 | // Single rule 149 | do { 150 | seq->len = seq_idx + 1; 151 | size_t tmp_cursor = *cursor; 152 | 153 | // try item 1: terminal 154 | if (UNLIKELY(input_len - tmp_cursor < sizeof(TERMX)) || __builtin_memcmp(&input[tmp_cursor], TERMX, sizeof(TERMX)) != 0) { 155 | break; 156 | } 157 | tmp_cursor += sizeof(TERMX); 158 | 159 | // try item 2: non-terminal 160 | if (!unparse_sequence_nontermABC(seq, input, input_len, &tmp_cursor)) { 161 | break; 162 | } 163 | 164 | if (tmp_cursor > target_cursor) { 165 | target_id = 0; // index of rule 166 | target_cursor = tmp_cursor; 167 | target_seq_len = seq->len; 168 | } 169 | } while(0); 170 | 171 | seq->len = target_seq_len; 172 | 173 | if (LIKELY(target_id < NUM_RULES)) { 174 | *cursor = target_cursor; 175 | seq->buf[seq_idx] = target_id; 176 | return 1; 177 | } 178 | 179 | return 0; 180 | } 181 | 182 | size_t unparse_sequence (size_t* seq_buf, size_t seq_capacity, unsigned char* input, size_t input_len) { 183 | if (UNLIKELY(!seq_buf || !seq_capacity || !input || !input_len)) { 184 | return 0; 185 | } 186 | 187 | Sequence seq = { 188 | .buf = seq_buf, 189 | .len = 0, 190 | .capacity = seq_capacity, 191 | }; 192 | size_t cursor = 0; 193 | unparse_sequence_nontermXYZ(&seq, input, input_len, &cursor); 194 | return seq.len; 195 | } 196 | -------------------------------------------------------------------------------- /test-data/C/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !*.c 3 | !.gitignore 4 | -------------------------------------------------------------------------------- /test-data/C/bench_generation.c: -------------------------------------------------------------------------------- 1 | // gcc -o bench_generation -Wall -Wextra -Wpedantic -Werror -O3 bench_generation.c /tmp/out.c 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | size_t mutate_sequence (void* buf, size_t len, size_t capacity); 8 | size_t serialize_sequence (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len); 9 | 10 | #define BUF_SIZE (1 * 1024 * 1024) 11 | 12 | int main (void) { 13 | size_t* sequence = calloc(4096, sizeof(size_t)); 14 | unsigned char* output = malloc(BUF_SIZE); 15 | size_t generated = 0; 16 | struct timespec start; 17 | struct timespec now; 18 | size_t trials = 0; 19 | 20 | clock_gettime(CLOCK_MONOTONIC, &start); 21 | 22 | while (1) { 23 | size_t seq_len = mutate_sequence(sequence, 0, 4096); 24 | size_t out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE); 25 | generated += out_len; 26 | trials++; 27 | 28 | if ((generated % 1048576) == 0) { 29 | clock_gettime(CLOCK_MONOTONIC, &now); 30 | 31 | double secs = (double) (now.tv_sec - start.tv_sec); 32 | double amount = (double) (generated / 1048576); 33 | printf("Generated >= %.02lf MiB/s | Avg. size: %lu\n", amount / secs, generated / trials); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /test-data/C/fuzz_mutate.c: -------------------------------------------------------------------------------- 1 | // clang -o fuzz_mutate -Wall -Wextra -Wpedantic -Werror -O0 -g -fsanitize=address,undefined fuzz_mutate.c /tmp/out.c 2 | 3 | #include 4 | #include 5 | 6 | size_t mutate_sequence (void* buf, size_t len, size_t capacity); 7 | 8 | int main (void) { 9 | while (1) { 10 | size_t capacity = rand() % 256; 11 | void* buf = calloc(capacity, sizeof(size_t)); 12 | 13 | size_t len = 0; 14 | 15 | if (capacity > 0 && (rand() % 4) == 3) { 16 | len = rand() % (capacity + 1); 17 | } 18 | 19 | size_t new_len = mutate_sequence(buf, len, capacity); 20 | 21 | printf("capacity=%lu old_len=%lu new_len=%lu\n", capacity, len, new_len); 22 | 23 | free(buf); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /test-data/C/fuzz_unparse.c: -------------------------------------------------------------------------------- 1 | // clang -o fuzz_unparse -Wall -Wextra -Wpedantic -Werror -O0 -g -fsanitize=address,undefined -I/tmp fuzz_unparse.c /tmp/out.c 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "out.h" 10 | 11 | #define SEQ_LEN 4096 12 | #define BUF_LEN (128 * 1024 * 1024) 13 | 14 | int main (void) { 15 | seed_generator(time(NULL)); 16 | size_t* generated = calloc(SEQ_LEN, sizeof(size_t)); 17 | size_t* unparsed = calloc(SEQ_LEN, sizeof(size_t)); 18 | unsigned char* output = malloc(BUF_LEN + 1); 19 | unsigned char* output2 = malloc(BUF_LEN + 1); 20 | size_t i = 0; 21 | 22 | while (1) { 23 | printf("Iter %lu\n", i++); 24 | 25 | size_t gen_len = mutate_sequence(generated, 0, SEQ_LEN); 26 | size_t out_len = serialize_sequence(generated, gen_len, output, BUF_LEN); 27 | size_t unp_len = unparse_sequence(unparsed, SEQ_LEN, output, out_len); 28 | size_t out2_len = serialize_sequence(unparsed, unp_len, output2, BUF_LEN); 29 | 30 | output[out_len] = 0; 31 | output2[out2_len] = 0; 32 | 33 | if (out_len != out2_len || strcmp((const char*) output, (const char*) output2)) { 34 | printf("out_len = %lu\n", out_len); 35 | printf("out2_len = %lu\n", out2_len); 36 | 37 | printf("--- GENERATED ---\n"); 38 | printf("%s\n", output); 39 | 40 | printf("--- UNPARSED ---\n"); 41 | printf("%s\n", output2); 42 | 43 | break; 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /test-data/C/test_generation.c: -------------------------------------------------------------------------------- 1 | // clang -o test_generation -Wall -Wextra -Wpedantic -Werror -O0 -g -fsanitize=address,undefined test_generation.c /tmp/out.c 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | size_t mutate_sequence (void* buf, size_t len, size_t capacity); 8 | size_t serialize_sequence (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len); 9 | 10 | #define BUF_SIZE (16 * 1024 * 1024) 11 | 12 | int main (void) { 13 | size_t* sequence = calloc(4096, sizeof(size_t)); 14 | unsigned char* output = malloc(BUF_SIZE); 15 | char buf[2]; 16 | 17 | while (1) { 18 | size_t seq_len = mutate_sequence(sequence, 0, 4096); 19 | size_t out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE - 1); 20 | assert(out_len < BUF_SIZE); 21 | 22 | output[out_len] = 0; 23 | printf("%s\n", output); 24 | 25 | fgets(buf, 2, stdin); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /test-data/C/test_mutate.c: -------------------------------------------------------------------------------- 1 | // clang -o test_mutate -Wall -Wextra -Wpedantic -Werror -O0 -g -fsanitize=address,undefined test_mutate.c /tmp/out.c 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | size_t mutate_sequence (void* buf, size_t len, size_t capacity); 8 | size_t serialize_sequence (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len); 9 | 10 | #define BUF_SIZE (16 * 1024 * 1024) 11 | 12 | int main (void) { 13 | size_t* sequence = calloc(4096, sizeof(size_t)); 14 | unsigned char* output = malloc(BUF_SIZE); 15 | 16 | // initial sequence 17 | size_t seq_len = mutate_sequence(sequence, 0, 4096); 18 | 19 | size_t out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE - 1); 20 | output[out_len] = 0; 21 | printf("Initial: %s\n\n", output); 22 | 23 | // Mutate 24 | seq_len = mutate_sequence(sequence, seq_len / 2, 4096); 25 | out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE - 1); 26 | output[out_len] = 0; 27 | printf("Mutation #1: %s\n\n", output); 28 | 29 | // Mutate 30 | seq_len = mutate_sequence(sequence, seq_len / 2, 4096); 31 | out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE - 1); 32 | output[out_len] = 0; 33 | printf("Mutation #2: %s\n\n", output); 34 | 35 | // Mutate 36 | seq_len = mutate_sequence(sequence, seq_len / 2, 4096); 37 | out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE - 1); 38 | output[out_len] = 0; 39 | printf("Mutation #3: %s\n\n", output); 40 | 41 | free(output); 42 | free(sequence); 43 | } 44 | -------------------------------------------------------------------------------- /test-data/C/test_unparse.c: -------------------------------------------------------------------------------- 1 | // gcc -O0 -g -o test_unparse -I/tmp test_unparse.c /tmp/out.c 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "out.h" 9 | 10 | #define SEQ_LEN 4096 11 | 12 | unsigned char* input = "var a=((((-9223372036854775808/-1++))));\\n"; 13 | 14 | int main (void) { 15 | size_t input_len = strlen(input); 16 | size_t* sequence = calloc(SEQ_LEN, sizeof(size_t)); 17 | 18 | size_t seq_len = unparse_sequence(sequence, SEQ_LEN, input, input_len); 19 | assert(seq_len > 0); 20 | 21 | for (size_t i = 0; i < seq_len; ++i) { 22 | printf(" seq[%lu] = %lu\n", i, sequence[i]); 23 | } 24 | 25 | unsigned char* output = malloc(input_len); 26 | 27 | size_t out_len = serialize_sequence(sequence, seq_len, output, input_len); 28 | printf("input_len=%d out_len=%d\n", input_len, out_len); 29 | assert(out_len == input_len); 30 | 31 | output[out_len] = 0; 32 | printf("%s\n", output); 33 | } 34 | -------------------------------------------------------------------------------- /test-data/benchmarks/.gitignore: -------------------------------------------------------------------------------- 1 | output/ 2 | .cur_input* 3 | baseline 4 | nop 5 | nop-gramatron 6 | throughput 7 | perf.data 8 | -------------------------------------------------------------------------------- /test-data/benchmarks/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: baseline nop throughput 3 | 4 | baseline: baseline.c 5 | gcc -o $@ -O3 $< 6 | 7 | nop: nop.c 8 | $(AFL_PATH)/afl-clang-lto -o $@ -O3 $< 9 | 10 | throughput: throughput.c output/generator.c 11 | clang -g -Ofast -march=native -fomit-frame-pointer -fno-stack-protector -o $@ $^ -Ioutput 12 | -------------------------------------------------------------------------------- /test-data/benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarks 2 | 3 | ## Execution speed 4 | - Baseline: 12k exec/s 5 | - Peacock: ~8.6k exec/s (29% overhead) 6 | - Original Gramatron: ~8.5k exec/s (30% overhead) 7 | - LibAFL Gramatron: N/A because GramatronInput does not implement HasTargetBytes, so it cannot be used with ForkServerExecutor 8 | 9 | ## Raw throughput 10 | Time to 1 GiB: 11 | - Peacock: secs=4 nsecs=763573725 => ~205 MiB/s 12 | - Original Gramatron: secs=17 nsecs=340090343 => ~60 MiB/s 13 | - LibAFL Gramatron: secs=26 nsecs=26327460838 => ~40 MiB/s 14 | -------------------------------------------------------------------------------- /test-data/benchmarks/baseline.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define TRIALS 50000 8 | 9 | int main (void) { 10 | struct timespec start, end; 11 | 12 | clock_gettime(CLOCK_MONOTONIC, &start); 13 | 14 | for (int i = 0; i < TRIALS; ++i) { 15 | pid_t child; 16 | switch (child = fork()) { 17 | case -1: return 1; 18 | case 0: _Exit(0); 19 | default: { 20 | if (waitpid(child, NULL, 0) == -1) { 21 | return 1; 22 | } 23 | } 24 | } 25 | } 26 | 27 | clock_gettime(CLOCK_MONOTONIC, &end); 28 | 29 | time_t diff_sec = end.tv_sec - start.tv_sec; 30 | 31 | printf("exec/s: %.02lf\n", (double)TRIALS / (double)diff_sec); 32 | } 33 | -------------------------------------------------------------------------------- /test-data/benchmarks/gramatron-patch: -------------------------------------------------------------------------------- 1 | diff --git a/src/gramfuzz-mutator/test.c b/src/gramfuzz-mutator/test.c 2 | index de00f9a..ed1fb7b 100644 3 | --- a/src/gramfuzz-mutator/test.c 4 | +++ b/src/gramfuzz-mutator/test.c 5 | @@ -1,5 +1,6 @@ 6 | /* This is the testing module for Gramatron 7 | */ 8 | +#include 9 | #include "afl-fuzz.h" 10 | #define NUMINPUTS 50 11 | 12 | @@ -99,6 +100,34 @@ void SanityCheck(char *automaton_path) { 13 | } 14 | } 15 | 16 | +void Benchmark(char* automaton_path) { 17 | + state* pda = create_pda(NULL, automaton_path); 18 | + size_t generated = 0; 19 | + struct timespec start, end; 20 | + 21 | + clock_gettime(CLOCK_MONOTONIC, &start); 22 | + 23 | + while (generated < 1 * 1024 * 1024 * 1024) { 24 | + Array* walk = gen_input(pda, NULL); 25 | + char* input = unparse_walk(walk); 26 | + generated += walk->inputlen; 27 | + free(walk->start); 28 | + free(walk); 29 | + free(input); 30 | + } 31 | + 32 | + clock_gettime(CLOCK_MONOTONIC, &end); 33 | + 34 | + time_t secs = end.tv_sec - start.tv_sec; 35 | + long nsecs = end.tv_nsec - start.tv_nsec; 36 | + 37 | + if (nsecs < 0) { 38 | + secs -= 1; 39 | + nsecs += 1000000000; 40 | + } 41 | + 42 | + printf("secs=%lu nsecs=%ld\n", secs, nsecs); 43 | +} 44 | 45 | int main(int argc, char*argv[]) { 46 | char *mode; 47 | @@ -121,6 +150,9 @@ int main(int argc, char*argv[]) { 48 | if (! strcmp(mode, "SanityCheck")) { 49 | SanityCheck(automaton_path); 50 | } 51 | + else if (! strcmp(mode, "Benchmark")) { 52 | + Benchmark(automaton_path); 53 | + } 54 | else { 55 | printf("\nUnrecognized mode"); 56 | return -1; 57 | -------------------------------------------------------------------------------- /test-data/benchmarks/grammar.json: -------------------------------------------------------------------------------- 1 | { 2 | "ARGS": [ 3 | "VAR", 4 | "VAR ',' ARGS", 5 | "' '" 6 | ], 7 | "IDENTIFIER": [ 8 | "'abcdef0123456789ABCDEF'", 9 | "'abcdefghijklmnopqrstuvwxyz'", 10 | "'abort'", 11 | "'abs'", 12 | "'accept'", 13 | "'acos'", 14 | "'acosh'", 15 | "'address'", 16 | "'alias'", 17 | "'alias_method'", 18 | "'allocation'", 19 | "'all_symbols'", 20 | "'ancestors'", 21 | "'and'", 22 | "'anum'", 23 | "'append'", 24 | "'append_features'", 25 | "'Apr'", 26 | "'aref_args'", 27 | "'arg'", 28 | "'arg0'", 29 | "'arg1'", 30 | "'arg2'", 31 | "'arg_rhs'", 32 | "'args'", 33 | "'argument'", 34 | "'ArgumentError'", 35 | "'arguments'", 36 | "'argv'", 37 | "'ARGV'", 38 | "'arity'", 39 | "'array'", 40 | "'Array'", 41 | "'ary'", 42 | "'__ary_cmp'", 43 | "'ary_concat'", 44 | "'__ary_eq'", 45 | "'ary_F'", 46 | "'__ary_index'", 47 | "'ary_replace'", 48 | "'ary_T'", 49 | "'asctime'", 50 | "'asin'", 51 | "'asinh'", 52 | "'__assert_fail'", 53 | "'assignment'", 54 | "'assoc'", 55 | "'assoc_list'", 56 | "'assocs'", 57 | "'assumed'", 58 | "'at'", 59 | "'atan'", 60 | "'atan2'", 61 | "'atanh'", 62 | "'__attached__'", 63 | "'attr'", 64 | "'attr_accessor'", 65 | "'attr_reader'", 66 | "'attrsym'", 67 | "'attr_writer'", 68 | "'available'", 69 | "'backref'", 70 | "'backtrace'", 71 | "'Backtrace'", 72 | "'BasicObject'", 73 | "'basic_symbol'", 74 | "'beg'", 75 | "'begin'", 76 | "'BEGIN'", 77 | "'big'", 78 | "'BIT'", 79 | "'blkarg_mark'", 80 | "'block'", 81 | "'block_arg'", 82 | "'block_call'", 83 | "'block_command'", 84 | "'block_param'", 85 | "'block_param_def'", 86 | "'BMATZ0000IREP'", 87 | "'body'", 88 | "'bodystmt'", 89 | "'boundary'", 90 | "'brace_block'", 91 | "'break'", 92 | "'bsearch'", 93 | "'bsearch_index'", 94 | "'buf'", 95 | "'bvar'", 96 | "'bv_decls'", 97 | "'byte'", 98 | "'bytes'", 99 | "'bytesize'", 100 | "'byteslice'", 101 | "'call'", 102 | "'call_args'", 103 | "'caller'", 104 | "'call_op'", 105 | "'call_op2'", 106 | "'capitalize'", 107 | "'case'", 108 | "'case_body'", 109 | "'casecmp'", 110 | "'__case_eqq'", 111 | "'cases'", 112 | "'cbrt'", 113 | "'cdr'", 114 | "'ceil'", 115 | "'change_gen_gc_mode'", 116 | "'character'", 117 | "'chars'", 118 | "'chomp'", 119 | "'chop'", 120 | "'chr'", 121 | "'clamp'", 122 | "'Class'", 123 | "'class_eval'", 124 | "'__classname__'", 125 | "'class_variable_get'", 126 | "'class_variables'", 127 | "'class_variable_set'", 128 | "'clause'", 129 | "'clear_all_old'", 130 | "'clone'", 131 | "'closure'", 132 | "'cLVAR'", 133 | "'cmd_brace_block'", 134 | "'cmp'", 135 | "'cname'", 136 | "'codegen'", 137 | "'codepoints'", 138 | "'collect'", 139 | "'collect_concat'", 140 | "'color'", 141 | "'column_count'", 142 | "'column_index'", 143 | "'combination'", 144 | "'comma'", 145 | "'command'", 146 | "'command_args'", 147 | "'command_asgn'", 148 | "'command_call'", 149 | "'command_rhs'", 150 | "'compact'", 151 | "'Comparable'", 152 | "'compile'", 153 | "'compstmt'", 154 | "'concat'", 155 | "'constant'", 156 | "'CONSTANT'", 157 | "'constants'", 158 | "'const_get'", 159 | "'const_missing'", 160 | "'const_set'", 161 | "'cont'", 162 | "'context'", 163 | "'copyright'", 164 | "'corrupted'", 165 | "'cos'", 166 | "'cosh'", 167 | "'count'", 168 | "'count_objects'", 169 | "'cpath'", 170 | "'ctime'", 171 | "'__ctype_b_loc'", 172 | "'curr'", 173 | "'current'", 174 | "'curry'", 175 | "'cycle'", 176 | "'Data'", 177 | "'day'", 178 | "'debug_info'", 179 | "'Dec'", 180 | "'deep'", 181 | "'def'", 182 | "'default'", 183 | "'DEFAULT'", 184 | "'default_proc'", 185 | "'defined'", 186 | "'define_method'", 187 | "'define_singleton_method'", 188 | "'__delete'", 189 | "'delete'", 190 | "'delete_at'", 191 | "'delete_if'", 192 | "'delete_prefix'", 193 | "'delete_suffix'", 194 | "'Deleting'", 195 | "'depth'", 196 | "'detect'", 197 | "'detected'", 198 | "'developers'", 199 | "'differs'", 200 | "'digit'", 201 | "'digits'", 202 | "'disable'", 203 | "'disabled'", 204 | "'discarding'", 205 | "'div'", 206 | "'divmod'", 207 | "'do'", 208 | "'do_block'", 209 | "'DomainError'", 210 | "'dot'", 211 | "'dot_or_colon'", 212 | "'downcase'", 213 | "'downto'", 214 | "'drop'", 215 | "'dropped'", 216 | "'dropping'", 217 | "'drop_while'", 218 | "'dump'", 219 | "'dup'", 220 | "'each'", 221 | "'each_byte'", 222 | "'each_char'", 223 | "'each_codepoint'", 224 | "'each_cons'", 225 | "'each_index'", 226 | "'each_key'", 227 | "'each_line'", 228 | "'each_object'", 229 | "'each_pair'", 230 | "'each_slice'", 231 | "'each_value'", 232 | "'each_with_index'", 233 | "'each_with_object'", 234 | "'ecall'", 235 | "'elem'", 236 | "'else'", 237 | "'elsif'", 238 | "'en'", 239 | "'enable'", 240 | "'__ENCODING__'", 241 | "'end'", 242 | "'__END__'", 243 | "'END'", 244 | "'ensure'", 245 | "'entries'", 246 | "'Enumerable'", 247 | "'enumerator'", 248 | "'Enumerator'", 249 | "'enumerator_block_call'", 250 | "'enum_for'", 251 | "'enums'", 252 | "'env'", 253 | "'erf'", 254 | "'erfc'", 255 | "'__errno_location'", 256 | "'error'", 257 | "'escape'", 258 | "'ETIR'", 259 | "'ETIR0004Ci'", 260 | "'exception'", 261 | "'Exception'", 262 | "'exc_list'", 263 | "'exc_var'", 264 | "'exhausted'", 265 | "'exp'", 266 | "'expected'", 267 | "'expr'", 268 | "'expression'", 269 | "'expr_value'", 270 | "'extend'", 271 | "'extended'", 272 | "'extend_object'", 273 | "'fail'", 274 | "'failed'", 275 | "'failure'", 276 | "'false'", 277 | "'FalseClass'", 278 | "'f_arg'", 279 | "'f_arg_item'", 280 | "'f_arglist'", 281 | "'f_args'", 282 | "'f_bad_arg'", 283 | "'f_block_arg'", 284 | "'f_block_opt'", 285 | "'f_block_optarg'", 286 | "'fclose'", 287 | "'Feb'", 288 | "'feed'", 289 | "'feedvalue'", 290 | "'feof'", 291 | "'fetch'", 292 | "'fetch_values'", 293 | "'fflush'", 294 | "'fgetc'", 295 | "'fib'", 296 | "'fiber'", 297 | "'Fiber'", 298 | "'fiber_check'", 299 | "'FiberError'", 300 | "'field'", 301 | "'file'", 302 | "'File'", 303 | "'__FILE__'", 304 | "'filename'", 305 | "'filenames_len'", 306 | "'fill'", 307 | "'final_marking_phase'", 308 | "'find'", 309 | "'find_all'", 310 | "'find_index'", 311 | "'first'", 312 | "'fish'", 313 | "'Fixnum'", 314 | "'flag'", 315 | "'f_larglist'", 316 | "'flat_map'", 317 | "'flatten'", 318 | "'Float'", 319 | "'FloatDomainError'", 320 | "'floor'", 321 | "'f_marg'", 322 | "'f_marg_list'", 323 | "'f_margs'", 324 | "'fmod'", 325 | "'fn'", 326 | "'Fn'", 327 | "'fname'", 328 | "'f_norm_arg'", 329 | "'fopen'", 330 | "'f_opt'", 331 | "'f_optarg'", 332 | "'f_opt_asgn'", 333 | "'for'", 334 | "'force'", 335 | "'format'", 336 | "'for_var'", 337 | "'found'", 338 | "'fprintf'", 339 | "'fputc'", 340 | "'fread'", 341 | "'free'", 342 | "'FREE'", 343 | "'freeze'", 344 | "'f_rest_arg'", 345 | "'frexp'", 346 | "'Fri'", 347 | "'FrozenError'", 348 | "'FsC'", 349 | "'fsym'", 350 | "'fwrite'", 351 | "'games'", 352 | "'GB'", 353 | "'GC'", 354 | "'gc_mark_children'", 355 | "'_gc_root_'", 356 | "'generational_mode'", 357 | "'Generator'", 358 | "'getbyte'", 359 | "'get_file'", 360 | "'getgm'", 361 | "'getlocal'", 362 | "'gettimeofday'", 363 | "'getutc'", 364 | "'given'", 365 | "'given_args'", 366 | "'global_variables'", 367 | "'__gmon_start__'", 368 | "'gmtime'", 369 | "'gmtime_r'", 370 | "'gn'", 371 | "'gnu'", 372 | "'GNU'", 373 | "'go'", 374 | "'grep'", 375 | "'group_by'", 376 | "'gsub'", 377 | "'h0'", 378 | "'h2'", 379 | "'H3'", 380 | "'h4'", 381 | "'h5'", 382 | "'H5'", 383 | "'h6'", 384 | "'H6'", 385 | "'h7'", 386 | "'h8'", 387 | "'hA'", 388 | "'hash'", 389 | "'Hash'", 390 | "'head'", 391 | "'heredoc'", 392 | "'heredoc_bodies'", 393 | "'heredoc_body'", 394 | "'heredoc_string_interp'", 395 | "'heredoc_string_rep'", 396 | "'heredoc_treat_nextline'", 397 | "'hex'", 398 | "'high'", 399 | "'hour'", 400 | "'hypot'", 401 | "'i2'", 402 | "'iClass'", 403 | "'__id__'", 404 | "'id2name'", 405 | "'identifier'", 406 | "'idx'", 407 | "'idx2'", 408 | "'if'", 409 | "'ifnone'", 410 | "'if_tail'", 411 | "'implemented'", 412 | "'in'", 413 | "'include'", 414 | "'included'", 415 | "'included_modules'", 416 | "'incremental_gc'", 417 | "'index'", 418 | "'IndexError'", 419 | "'inf'", 420 | "'Inf'", 421 | "'INF'", 422 | "'Infinity'", 423 | "'INFINITY'", 424 | "'inherited'", 425 | "'initialize'", 426 | "'initialize_copy'", 427 | "'inject'", 428 | "'in_lower_half'", 429 | "'input'", 430 | "'insert'", 431 | "'_inspect'", 432 | "'inspect'", 433 | "'instance_eval'", 434 | "'instance_exec'", 435 | "'instance_methods'", 436 | "'instance_variable_get'", 437 | "'instance_variables'", 438 | "'instance_variable_set'", 439 | "'int'", 440 | "'integer'", 441 | "'Integer'", 442 | "'Integral'", 443 | "'intern'", 444 | "'interval_ratio'", 445 | "'invert'", 446 | "'io'", 447 | "'Io'", 448 | "'_IO_putc'", 449 | "'ip'", 450 | "'Ip'", 451 | "'irep'", 452 | "'IREP'", 453 | "'isz'", 454 | "'iterate'", 455 | "'_ITM_deregisterTMCloneTable'", 456 | "'_ITM_registerTMCloneTable'", 457 | "'itself'", 458 | "'Jan'", 459 | "'join'", 460 | "'_Jv_RegisterClasses'", 461 | "'keep_if'", 462 | "'Kernel'", 463 | "'key'", 464 | "'KeyError'", 465 | "'keys'", 466 | "'keyword_alias'", 467 | "'keyword_and'", 468 | "'keyword_begin'", 469 | "'keyword_BEGIN'", 470 | "'keyword_break'", 471 | "'keyword_case'", 472 | "'keyword_class'", 473 | "'keyword_def'", 474 | "'keyword_do'", 475 | "'keyword_do_block'", 476 | "'keyword_do_cond'", 477 | "'keyword_do_LAMBDA'", 478 | "'keyword_else'", 479 | "'keyword_elsif'", 480 | "'keyword__ENCODING__'", 481 | "'keyword_end'", 482 | "'keyword_END'", 483 | "'keyword_ensure'", 484 | "'keyword_false'", 485 | "'keyword__FILE__'", 486 | "'keyword_for'", 487 | "'keyword_if'", 488 | "'keyword_in'", 489 | "'keyword__LINE__'", 490 | "'keyword_module'", 491 | "'keyword_next'", 492 | "'keyword_nil'", 493 | "'keyword_not'", 494 | "'keyword_or'", 495 | "'keyword_redo'", 496 | "'keyword_rescue'", 497 | "'keyword_retry'", 498 | "'keyword_return'", 499 | "'keyword_self'", 500 | "'keyword_super'", 501 | "'keyword_then'", 502 | "'keyword_true'", 503 | "'keyword_undef'", 504 | "'keyword_unless'", 505 | "'keyword_until'", 506 | "'keyword_when'", 507 | "'keyword_while'", 508 | "'keyword_yield'", 509 | "'kh_del_ht'", 510 | "'kh_del_iv'", 511 | "'kh_del_mt'", 512 | "'kh_del_n2s'", 513 | "'kh_del_st'", 514 | "'KLVAR'", 515 | "'lambda'", 516 | "'lambda_body'", 517 | "'last'", 518 | "'lazy'", 519 | "'Lazy'", 520 | "'LC'", 521 | "'ld'", 522 | "'LD'", 523 | "'ldexp'", 524 | "'left'", 525 | "'len'", 526 | "'length'", 527 | "'level'", 528 | "'lfD'", 529 | "'lhs'", 530 | "'__libc_start_main'", 531 | "'LII'", 532 | "'lIJ'", 533 | "'lim'", 534 | "'line'", 535 | "'__LINE__'", 536 | "'LINE'", 537 | "'lines'", 538 | "'literal'", 539 | "'literals'", 540 | "'live_after_mark'", 541 | "'ljust'", 542 | "'ln'", 543 | "'Ln'", 544 | "'lo'", 545 | "'local'", 546 | "'LOCAL'", 547 | "'LocalJumpError'", 548 | "'localtime'", 549 | "'localtime_r'", 550 | "'local_variables'", 551 | "'log'", 552 | "'log10'", 553 | "'log2'", 554 | "'long'", 555 | "'longjmp'", 556 | "'lookahead'", 557 | "'loop'", 558 | "'low'", 559 | "'lround'", 560 | "'LS'", 561 | "'lstrip'", 562 | "'LVAR'", 563 | "'machine'", 564 | "'main'", 565 | "'make_curry'", 566 | "'map'", 567 | "'match'", 568 | "'matched'", 569 | "'Math'", 570 | "'max'", 571 | "'max_by'", 572 | "'max_cmp'", 573 | "'May'", 574 | "'mday'", 575 | "'member'", 576 | "'__members__'", 577 | "'members'", 578 | "'memchr'", 579 | "'memcmp'", 580 | "'memcpy'", 581 | "'memmove'", 582 | "'memory'", 583 | "'memset'", 584 | "'merge'", 585 | "'mesg'", 586 | "'message'", 587 | "'meth'", 588 | "'__method__'", 589 | "'method'", 590 | "'method_call'", 591 | "'method_missing'", 592 | "'method_removed'", 593 | "'methods'", 594 | "'mid'", 595 | "'min'", 596 | "'min_by'", 597 | "'min_cmp'", 598 | "'minmax'", 599 | "'minmax_by'", 600 | "'mktime'", 601 | "'mlhs_basic'", 602 | "'mlhs_inner'", 603 | "'mlhs_item'", 604 | "'mlhs_list'", 605 | "'mlhs_node'", 606 | "'mlhs_post'", 607 | "'mode'", 608 | "'modified'", 609 | "'modifier_if'", 610 | "'modifier_rescue'", 611 | "'modifier_unless'", 612 | "'modifier_until'", 613 | "'modifier_while'", 614 | "'module'", 615 | "'Module'", 616 | "'module_eval'", 617 | "'module_function'", 618 | "'modules'", 619 | "'mon'", 620 | "'Mon'", 621 | "'month'", 622 | "'mrb_ary_delete_at'", 623 | "'mrb_ary_new_from_values'", 624 | "'mrb_ary_plus'", 625 | "'mrb_ary_pop'", 626 | "'mrb_ary_push'", 627 | "'mrb_ary_push_m'", 628 | "'mrb_ary_resize'", 629 | "'mrb_ary_reverse'", 630 | "'mrb_ary_set'", 631 | "'mrb_ary_shift'", 632 | "'mrb_ary_splice'", 633 | "'mrb_ary_times'", 634 | "'mrb_ary_unshift'", 635 | "'mrb_ary_unshift_m'", 636 | "'mrb_assoc_new'", 637 | "'mrb_data_init'", 638 | "'mrb_debug_get_line'", 639 | "'mrb_debug_info_alloc'", 640 | "'mrb_debug_info_append_file'", 641 | "'mrb_debug_info_free'", 642 | "'mrb_field_write_barrier'", 643 | "'mrb_gc_mark'", 644 | "'MRB_GC_STATE_ROOT'", 645 | "'MRB_GC_STATE_SWEEP'", 646 | "'mrb_gc_unregister'", 647 | "'mrb_i_mt_state'", 648 | "'mrb_incremental_gc'", 649 | "'mrb_malloc'", 650 | "'mrb_mod_s_nesting'", 651 | "'mrb_obj_value'", 652 | "'mrb_random_init'", 653 | "'mrb_random_srand'", 654 | "'mrb_realloc'", 655 | "'mrb_str_format'", 656 | "'MRB_TT_DATA'", 657 | "'MRB_TT_FIBER'", 658 | "'MRB_TT_FREE'", 659 | "'mrb_vm_const_get'", 660 | "'mrb_vm_exec'", 661 | "'mrb_write_barrier'", 662 | "'mrhs'", 663 | "'mruby'", 664 | "'MRUBY_COPYRIGHT'", 665 | "'MRUBY_DESCRIPTION'", 666 | "'MRUBY_RELEASE_DATE'", 667 | "'MRUBY_RELEASE_NO'", 668 | "'MRUBY_VERSION'", 669 | "'name'", 670 | "'named'", 671 | "'NameError'", 672 | "'names'", 673 | "'nan'", 674 | "'NaN'", 675 | "'NAN'", 676 | "'nesting'", 677 | "'new'", 678 | "'new_args'", 679 | "'new_key'", 680 | "'new_msym'", 681 | "'next'", 682 | "'next_values'", 683 | "'nil'", 684 | "'NilClass'", 685 | "'nl'", 686 | "'nlocals'", 687 | "'nLVAR'", 688 | "'nMATZ0000IREP'", 689 | "'NODE_DREGX'", 690 | "'NODE_DSTR'", 691 | "'NODE_DXSTR'", 692 | "'NODE_FALSE'", 693 | "'NODE_NEGATE'", 694 | "'NODE_NIL'", 695 | "'NODE_REDO'", 696 | "'NODE_RETRY'", 697 | "'NODE_SELF'", 698 | "'NODE_TRUE'", 699 | "'NODE_UNDEF'", 700 | "'NODE_ZSUPER'", 701 | "'NoMemoryError'", 702 | "'NoMethodError'", 703 | "'none'", 704 | "'NONE'", 705 | "'norm'", 706 | "'not'", 707 | "'NotImplementedError'", 708 | "'Nov'", 709 | "'now'", 710 | "'Np'", 711 | "'nregs'", 712 | "'num'", 713 | "'number'", 714 | "'numbered'", 715 | "'numeric'", 716 | "'Numeric'", 717 | "'obj'", 718 | "'object'", 719 | "'Object'", 720 | "'object_id'", 721 | "'ObjectSpace'", 722 | "'oct'", 723 | "'Oct'", 724 | "'offset'", 725 | "'on'", 726 | "'On'", 727 | "'only'", 728 | "'Oo'", 729 | "'op'", 730 | "'Op'", 731 | "'operation'", 732 | "'operation2'", 733 | "'operation3'", 734 | "'OP_NOP'", 735 | "'OP_STOP'", 736 | "'opt_block_arg'", 737 | "'opt_block_param'", 738 | "'opt_bv_decl'", 739 | "'opt_call_args'", 740 | "'opt_else'", 741 | "'opt_ensure'", 742 | "'opt_f_block_arg'", 743 | "'opt_nl'", 744 | "'opt_paren_args'", 745 | "'opt_rescue'", 746 | "'opt_terms'", 747 | "'or'", 748 | "'ord'", 749 | "'orig'", 750 | "'other'", 751 | "'__outer__'", 752 | "'P9o'", 753 | "'padding'", 754 | "'pad_repetitions'", 755 | "'padstr'", 756 | "'parameters'", 757 | "'paren_args'", 758 | "'partition'", 759 | "'pattern'", 760 | "'PC'", 761 | "'peek'", 762 | "'peek_values'", 763 | "'permutation'", 764 | "'plen'", 765 | "'point'", 766 | "'pop'", 767 | "'popping'", 768 | "'pos'", 769 | "'posnum'", 770 | "'post'", 771 | "'pow'", 772 | "'pp'", 773 | "'pproc'", 774 | "'pre'", 775 | "'precision'", 776 | "'prefix'", 777 | "'prepend'", 778 | "'prepended'", 779 | "'prepend_features'", 780 | "'primary'", 781 | "'primary_value'", 782 | "'print'", 783 | "'printf'", 784 | "'__printstr__'", 785 | "'private'", 786 | "'private_methods'", 787 | "'prl'", 788 | "'proc'", 789 | "'Proc'", 790 | "'program'", 791 | "'protected'", 792 | "'protected_methods'", 793 | "'ps'", 794 | "'public'", 795 | "'public_methods'", 796 | "'push'", 797 | "'putchar'", 798 | "'puts'", 799 | "'quo'", 800 | "'raise'", 801 | "'rand'", 802 | "'Random'", 803 | "'range'", 804 | "'Range'", 805 | "'RangeError'", 806 | "'rassoc'", 807 | "'rb'", 808 | "'RB'", 809 | "'rbracket'", 810 | "'RC'", 811 | "'read_debug_record'", 812 | "'readint_mrb_int'", 813 | "'read_irep_record_1'", 814 | "'read_lv_record'", 815 | "'read_section_debug'", 816 | "'read_section_lv'", 817 | "'realloc'", 818 | "'redo'", 819 | "'reduce'", 820 | "'reg'", 821 | "'regexp'", 822 | "'Regexp'", 823 | "'RegexpError'", 824 | "'rehash'", 825 | "'reject'", 826 | "'remove_class_variable'", 827 | "'remove_const'", 828 | "'remove_instance_variable'", 829 | "'remove_method'", 830 | "'replace'", 831 | "'req'", 832 | "'required'", 833 | "'res'", 834 | "'rescue'", 835 | "'resize_capa'", 836 | "'rest'", 837 | "'restarg_mark'", 838 | "'result'", 839 | "'resume'", 840 | "'reswords'", 841 | "'ret'", 842 | "'retry'", 843 | "'return'", 844 | "'reverse'", 845 | "'reverse_each'", 846 | "'rewind'", 847 | "'right'", 848 | "'rindex'", 849 | "'rjust'", 850 | "'rotate'", 851 | "'round'", 852 | "'row'", 853 | "'rparen'", 854 | "'rpartition'", 855 | "'rs_len'", 856 | "'rstrip'", 857 | "'RUBY_ENGINE'", 858 | "'RUBY_ENGINE_VERSION'", 859 | "'RUBY_VERSION'", 860 | "'RuntimeError'", 861 | "'sample'", 862 | "'Sat'", 863 | "'satisfied'", 864 | "'scan'", 865 | "'SClass'", 866 | "'scope'", 867 | "'scope_new'", 868 | "'script'", 869 | "'ScriptError'", 870 | "'sec'", 871 | "'select'", 872 | "'self'", 873 | "'self_arity'", 874 | "'__send__'", 875 | "'send'", 876 | "'sep'", 877 | "'Sep'", 878 | "'sequence'", 879 | "'set'", 880 | "'set_backtrace'", 881 | "'setbyte'", 882 | "'_setjmp'", 883 | "'shift'", 884 | "'shuffle'", 885 | "'sin'", 886 | "'singleton'", 887 | "'singleton_class'", 888 | "'singleton_methods'", 889 | "'sinh'", 890 | "'size'", 891 | "'sl'", 892 | "'slice'", 893 | "'snprintf'", 894 | "'so'", 895 | "'So'", 896 | "'sort'", 897 | "'sort_by'", 898 | "'__sort_sub__'", 899 | "'source_location'", 900 | "'Sp'", 901 | "'spaces'", 902 | "'specifier'", 903 | "'splice'", 904 | "'split'", 905 | "'sprintf'", 906 | "'sqrt'", 907 | "'srand'", 908 | "'__stack_chk_fail'", 909 | "'StandardError'", 910 | "'start'", 911 | "'state'", 912 | "'stderr'", 913 | "'stdin'", 914 | "'stdout'", 915 | "'step'", 916 | "'step_ratio'", 917 | "'stmt'", 918 | "'stmts'", 919 | "'stop_exc'", 920 | "'StopIteration'", 921 | "'store'", 922 | "'str'", 923 | "'str2'", 924 | "'strchr'", 925 | "'strcmp'", 926 | "'str_each'", 927 | "'string'", 928 | "'String'", 929 | "'string_interp'", 930 | "'string_rep'", 931 | "'strip'", 932 | "'strlen'", 933 | "'str_make_shared'", 934 | "'strncmp'", 935 | "'strncpy'", 936 | "'strtoul'", 937 | "'struct'", 938 | "'Struct'", 939 | "'sub'", 940 | "'__sub_replace'", 941 | "'succ'", 942 | "'Sun'", 943 | "'super'", 944 | "'superclass'", 945 | "'supported'", 946 | "'__svalue'", 947 | "'SVD'", 948 | "'swapcase'", 949 | "'sym'", 950 | "'symbol'", 951 | "'Symbol'", 952 | "'symbols'", 953 | "'sym_inspect'", 954 | "'syntax'", 955 | "'SyntaxError'", 956 | "'_sys_fail'", 957 | "'SystemCallError'", 958 | "'SystemStackError'", 959 | "'TA'", 960 | "'tail'", 961 | "'take'", 962 | "'taken'", 963 | "'take_while'", 964 | "'tAMPER'", 965 | "'tan'", 966 | "'tANDDOT'", 967 | "'tANDOP'", 968 | "'tanh'", 969 | "'tap'", 970 | "'tAREF'", 971 | "'T_ARRAY'", 972 | "'tASET'", 973 | "'tASSOC'", 974 | "'TB'", 975 | "'tBACK_REF'", 976 | "'TbG'", 977 | "'T_CLASS'", 978 | "'tCMP'", 979 | "'tCOLON2'", 980 | "'tCOLON3'", 981 | "'tCONSTANT'", 982 | "'T_CPTR'", 983 | "'tCVAR'", 984 | "'T_DATA'", 985 | "'tDOT2'", 986 | "'tDOT3'", 987 | "'TeD'", 988 | "'T_ENV'", 989 | "'tEQ'", 990 | "'tEQQ'", 991 | "'term'", 992 | "'terms'", 993 | "'T_EXCEPTION'", 994 | "'T_FALSE'", 995 | "'T_FIBER'", 996 | "'tFID'", 997 | "'T_FILE'", 998 | "'T_FIXNUM'", 999 | "'tFLOAT'", 1000 | "'T_FLOAT'", 1001 | "'T_FREE'", 1002 | "'tGEQ'", 1003 | "'tGVAR'", 1004 | "'T_HASH'", 1005 | "'tHD_LITERAL_DELIM'", 1006 | "'tHD_STRING_MID'", 1007 | "'tHD_STRING_PART'", 1008 | "'then'", 1009 | "'tHEREDOC_BEG'", 1010 | "'tHEREDOC_END'", 1011 | "'this'", 1012 | "'T_ICLASS'", 1013 | "'tIDENTIFIER'", 1014 | "'time'", 1015 | "'Time'", 1016 | "'times'", 1017 | "'tINTEGER'", 1018 | "'tIVAR'", 1019 | "'tLABEL'", 1020 | "'tLABEL_END'", 1021 | "'tLAMBDA'", 1022 | "'tLAMBEG'", 1023 | "'tLAST_TOKEN'", 1024 | "'tLBRACE'", 1025 | "'tLBRACE_ARG'", 1026 | "'tLBRACK'", 1027 | "'tLEQ'", 1028 | "'tLITERAL_DELIM'", 1029 | "'tLOWEST'", 1030 | "'tLPAREN'", 1031 | "'tLPAREN_ARG'", 1032 | "'tLSHFT'", 1033 | "'tMATCH'", 1034 | "'T_MODULE'", 1035 | "'tmp'", 1036 | "'tNEQ'", 1037 | "'tNMATCH'", 1038 | "'tNTH_REF'", 1039 | "'to_ary'", 1040 | "'T_OBJECT'", 1041 | "'to_enum'", 1042 | "'to_h'", 1043 | "'to_hash'", 1044 | "'to_i'", 1045 | "'to_int'", 1046 | "'TOJ'", 1047 | "'TOLERANCE'", 1048 | "'tolower'", 1049 | "'tOP_ASGN'", 1050 | "'top_compstmt'", 1051 | "'to_proc'", 1052 | "'top_stmt'", 1053 | "'top_stmts'", 1054 | "'tOROP'", 1055 | "'to_s'", 1056 | "'to_str'", 1057 | "'to_sym'", 1058 | "'TOTAL'", 1059 | "'toupper'", 1060 | "'tPOW'", 1061 | "'T_PROC'", 1062 | "'trailer'", 1063 | "'T_RANGE'", 1064 | "'transfer'", 1065 | "'transform_keys'", 1066 | "'transform_values'", 1067 | "'transpose'", 1068 | "'tREGEXP'", 1069 | "'tREGEXP_BEG'", 1070 | "'tREGEXP_END'", 1071 | "'tRPAREN'", 1072 | "'tRSHFT'", 1073 | "'true'", 1074 | "'TrueClass'", 1075 | "'truncate'", 1076 | "'try_convert'", 1077 | "'T_SCLASS'", 1078 | "'tSTAR'", 1079 | "'tSTRING'", 1080 | "'T_STRING'", 1081 | "'tSTRING_BEG'", 1082 | "'tSTRING_DVAR'", 1083 | "'tSTRING_MID'", 1084 | "'tSTRING_PART'", 1085 | "'tSYMBEG'", 1086 | "'T_SYMBOL'", 1087 | "'tSYMBOLS_BEG'", 1088 | "'tt'", 1089 | "'T_TRUE'", 1090 | "'Tue'", 1091 | "'tUMINUS'", 1092 | "'tUMINUS_NUM'", 1093 | "'T_UNDEF'", 1094 | "'tUPLUS'", 1095 | "'twice'", 1096 | "'tWORDS_BEG'", 1097 | "'tXSTRING'", 1098 | "'tXSTRING_BEG'", 1099 | "'type'", 1100 | "'TypeError'", 1101 | "'umrb_obj_value'", 1102 | "'undef'", 1103 | "'undefined'", 1104 | "'undef_list'", 1105 | "'undef_method'", 1106 | "'uniq'", 1107 | "'unless'", 1108 | "'unshift'", 1109 | "'until'", 1110 | "'upcase'", 1111 | "'__update'", 1112 | "'update'", 1113 | "'upto'", 1114 | "'usec'", 1115 | "'useless'", 1116 | "'utc'", 1117 | "'v0000'", 1118 | "'val'", 1119 | "'validated'", 1120 | "'vals'", 1121 | "'value'", 1122 | "'values'", 1123 | "'values_at'", 1124 | "'variable'", 1125 | "'var_lhs'", 1126 | "'var_ref'", 1127 | "'verbose'", 1128 | "'version'", 1129 | "'vm'", 1130 | "'Vm'", 1131 | "'warn'", 1132 | "'wday'", 1133 | "'Wed'", 1134 | "'when'", 1135 | "'while'", 1136 | "'width'", 1137 | "'with_index'", 1138 | "'with_object'", 1139 | "'words'", 1140 | "'x86_64'", 1141 | "'xstring'", 1142 | "'yday'", 1143 | "'year'", 1144 | "'yield'", 1145 | "'yielder'", 1146 | "'Yielder'", 1147 | "'yield_self'", 1148 | "'zip'", 1149 | "'zone'" 1150 | ], 1151 | "SP": [ 1152 | "' '" 1153 | ], 1154 | "ENTRYPOINT": [ 1155 | "RUBYBLOCK" 1156 | ], 1157 | "RUBYBLOCK": [ 1158 | "STATEMENT NEWLINE RUBYBLOCK", 1159 | "' '" 1160 | ], 1161 | "NEWLINE": [ 1162 | "'\\n'" 1163 | ], 1164 | "STATEMENT": [ 1165 | "VAR '=' VAR '.' IDENTIFIER '(' ARGS ')'", 1166 | "VAR '=' IDENTIFIER '.' IDENTIFIER '(' ARGS ')'", 1167 | "VAR '=' VAL '.' IDENTIFIER '(' ARGS ')'", 1168 | "VAR '=' VAL", 1169 | "'return' SP VAR", 1170 | "'yield' SP VAR", 1171 | "'continue' SP VAR", 1172 | "'break' SP VAR", 1173 | "'next' SP VAR" 1174 | ], 1175 | "VAL": [ 1176 | "'1'", 1177 | "'0'", 1178 | "'0.0'", 1179 | "'\"foo\"'", 1180 | "'\"asdfasdf\"'", 1181 | "'\"o\"'", 1182 | "'nil'", 1183 | "'true'", 1184 | "'false'", 1185 | "'/foo/'", 1186 | "'[]'", 1187 | "'[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,nil]'" 1188 | ], 1189 | "VAR": [ 1190 | "'a'", 1191 | "'b'", 1192 | "'c'", 1193 | "'d'" 1194 | ] 1195 | } 1196 | -------------------------------------------------------------------------------- /test-data/benchmarks/grammar.postcard: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/z2-2z/peacock/f2d053cf0e198be03220b6d2e8be4e1fd26be86a/test-data/benchmarks/grammar.postcard -------------------------------------------------------------------------------- /test-data/benchmarks/nop.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main (void) { 5 | #ifdef __AFL_INIT 6 | __AFL_INIT(); 7 | #endif 8 | _Exit(0); 9 | } 10 | -------------------------------------------------------------------------------- /test-data/benchmarks/patch-libafl: -------------------------------------------------------------------------------- 1 | diff --git a/fuzzers/baby_fuzzer_gramatron/src/main.rs b/fuzzers/baby_fuzzer_gramatron/src/main.rs 2 | index ada8c1df..6a9600cf 100644 3 | --- a/fuzzers/baby_fuzzer_gramatron/src/main.rs 4 | +++ b/fuzzers/baby_fuzzer_gramatron/src/main.rs 5 | @@ -106,11 +106,10 @@ pub fn main() { 6 | ) 7 | .expect("Failed to create the Executor"); 8 | 9 | - let automaton = read_automaton_from_file(PathBuf::from("auto.postcard")); 10 | + let automaton = read_automaton_from_file(PathBuf::from("grammar.postcard")); 11 | let mut generator = GramatronGenerator::new(&automaton); 12 | 13 | // Use this code to profile the generator performance 14 | - /* 15 | use libafl::generators::Generator; 16 | use std::collections::HashSet; 17 | use std::collections::hash_map::DefaultHasher; 18 | @@ -122,21 +121,18 @@ pub fn main() { 19 | s.finish() 20 | } 21 | 22 | - let mut set = HashSet::new(); 23 | - let st = libafl_bolts::current_milliseconds(); 24 | - let mut b = vec![]; 25 | - let mut c = 0; 26 | - for _ in 0..100000 { 27 | + let mut b = Vec::with_capacity(128 * 1024 * 1024); 28 | + let mut generated = 0_usize; 29 | + let start = std::time::Instant::now(); 30 | + while generated < 1024 * 1024 * 1024 { 31 | let i = generator.generate(&mut state).unwrap(); 32 | i.unparse(&mut b); 33 | - set.insert(calculate_hash(&b)); 34 | - c += b.len(); 35 | + generated += b.len(); 36 | } 37 | - println!("{} / {}", c, libafl_bolts::current_milliseconds() - st); 38 | - println!("{} / 100000", set.len()); 39 | + let delta = std::time::Instant::now() - start; 40 | + println!("secs={} nsecs={}", delta.as_secs(), delta.as_nanos()); 41 | 42 | return; 43 | - */ 44 | 45 | // Generate 8 initial inputs 46 | state 47 | -------------------------------------------------------------------------------- /test-data/benchmarks/throughput.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "generator.h" 6 | 7 | #define SEQ_LEN 4096 8 | #define BUF_SIZE (128 * 1024 * 1024) 9 | 10 | void bench_generation(size_t* sequence, unsigned char* output) { 11 | struct timespec start, end; 12 | size_t generated = 0; 13 | 14 | clock_gettime(CLOCK_MONOTONIC, &start); 15 | 16 | while (generated < 1 * 1024 * 1024 * 1024) { 17 | size_t seq_len = mutate_sequence(sequence, 0, SEQ_LEN); 18 | size_t out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE); 19 | generated += out_len; 20 | } 21 | 22 | clock_gettime(CLOCK_MONOTONIC, &end); 23 | 24 | time_t secs = end.tv_sec - start.tv_sec; 25 | long nsecs = end.tv_nsec - start.tv_nsec; 26 | 27 | if (nsecs < 0) { 28 | secs -= 1; 29 | nsecs += 1000000000; 30 | } 31 | 32 | printf("Generation: secs=%lu nsecs=%ld\n", secs, nsecs); 33 | } 34 | 35 | void bench_mutation(size_t* sequence, unsigned char* output) { 36 | struct timespec start, end; 37 | size_t generated = 0; 38 | size_t seq_len = mutate_sequence(sequence, 0, SEQ_LEN); 39 | 40 | clock_gettime(CLOCK_MONOTONIC, &start); 41 | 42 | while (generated < 1 * 1024 * 1024 * 1024) { 43 | seq_len = mutate_sequence(sequence, seq_len / 2, SEQ_LEN); 44 | size_t out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE); 45 | generated += out_len; 46 | } 47 | 48 | clock_gettime(CLOCK_MONOTONIC, &end); 49 | 50 | time_t secs = end.tv_sec - start.tv_sec; 51 | long nsecs = end.tv_nsec - start.tv_nsec; 52 | 53 | if (nsecs < 0) { 54 | secs -= 1; 55 | nsecs += 1000000000; 56 | } 57 | 58 | printf("Mutation: secs=%lu nsecs=%ld\n", secs, nsecs); 59 | } 60 | 61 | int main (void) { 62 | size_t* sequence = calloc(SEQ_LEN, sizeof(size_t)); 63 | unsigned char* output = malloc(BUF_SIZE); 64 | bench_generation(sequence, output); 65 | bench_mutation(sequence, output); 66 | } 67 | -------------------------------------------------------------------------------- /test-data/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !*.c 3 | !.gitignore 4 | -------------------------------------------------------------------------------- /test-data/fuzz/main.c: -------------------------------------------------------------------------------- 1 | // 2 | 3 | #include 4 | #include 5 | 6 | void print_loop (FILE* input) { 7 | char buf[128]; 8 | 9 | while (!feof(input) && !ferror(input)) { 10 | size_t num = fread(buf, 1, sizeof(buf), input); 11 | 12 | if (!num) { 13 | break; 14 | } 15 | 16 | fwrite(buf, 1, num, stdout); 17 | } 18 | 19 | fprintf(stdout, "\n"); 20 | fflush(stdout); 21 | } 22 | 23 | int main (int argc, char** argv) { 24 | __AFL_INIT(); 25 | 26 | FILE* input = NULL; 27 | 28 | if (argc == 1) { 29 | input = stdin; 30 | } else if (argc == 2) { 31 | input = fopen(argv[1], "rb"); 32 | } else { 33 | fprintf(stderr, "Invalid test invocation\n"); 34 | return 1; 35 | } 36 | 37 | print_loop(input); 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /test-data/grammars/duplicate_rules.json: -------------------------------------------------------------------------------- 1 | { 2 | "": [ 3 | ["a", "b"], 4 | ["ab"], 5 | ["b", "a"], 6 | ["a", "b"], 7 | ["ba"] 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /test-data/grammars/gramatron.json: -------------------------------------------------------------------------------- 1 | { 2 | "ARGLIST": [ 3 | "EXPR ',' ARGLIST", 4 | "EXPR", 5 | "EXPR ',' ARGLIST", 6 | "EXPR" 7 | ], 8 | "ARGS": [ 9 | "'()'", 10 | "'(' ARGLIST ')'", 11 | "'()'", 12 | "'(' ARGLIST ')'" 13 | ], 14 | "ARITHMETICOPERATION": [ 15 | "EXPR '/' EXPR", 16 | "EXPR '*' EXPR", 17 | "EXPR '+' EXPR", 18 | "EXPR '-' EXPR", 19 | "EXPR '%' EXPR", 20 | "EXPR '**' EXPR", 21 | "EXPR '++'" 22 | ], 23 | "ARRAY": [ 24 | "'[' ARRAYCONTENT ']'", 25 | "'[]'" 26 | ], 27 | "ARRAYCONTENT": [ 28 | "EXPR ',' ARRAYCONTENT", 29 | "EXPR" 30 | ], 31 | "BOOLEAN": [ 32 | "'true'", 33 | "'false'" 34 | ], 35 | "BYTEWISEOPERATION": [ 36 | "EXPR '&' EXPR", 37 | "EXPR '|' EXPR" 38 | ], 39 | "COMPARISONOPERATION": [ 40 | "EXPR '<' EXPR" 41 | ], 42 | "DECIMALDIGITS": [ 43 | "'20'", 44 | "'1234'", 45 | "'66'", 46 | "'234_9'", 47 | "'99999999999999999999'" 48 | ], 49 | "DECIMALNUMBER": [ 50 | "DECIMALDIGITS" 51 | ], 52 | "EXPR": [ 53 | "'(' EXPR ')'", 54 | "VAR", 55 | "'delete' SP EXPR", 56 | "'new' SP IDENTIFIER ARGS", 57 | "LITERAL", 58 | "IDENTIFIER", 59 | "METHODCALL", 60 | "'(' ARITHMETICOPERATION ')'", 61 | "'(' COMPARISONOPERATION ')'", 62 | "'(' BYTEWISEOPERATION ')'", 63 | "'(' LOGICALOPERATION ')'" 64 | ], 65 | "IDENTIFIER": [ 66 | "'Object'", 67 | "VAR", 68 | "'Function'", 69 | "'main'", 70 | "'opt'", 71 | "'Boolean'", 72 | "'Symbol'", 73 | "'JSON'", 74 | "'Error'", 75 | "'EvalError'", 76 | "'RangeError'", 77 | "'ReferenceError'", 78 | "'SyntaxError'", 79 | "'TypeError'", 80 | "'URIError'", 81 | "'this'", 82 | "'Number'", 83 | "'Math'", 84 | "'Date'", 85 | "'String'", 86 | "'RegExp'", 87 | "'Array'", 88 | "'Int8Array'", 89 | "'Uint8Array'", 90 | "'Uint8ClampedArray'", 91 | "'Int16Array'", 92 | "'Uint16Array'", 93 | "'Int32Array'", 94 | "'Uint32Array'", 95 | "'Float32Array'", 96 | "'Float64Array'", 97 | "'DataView'", 98 | "'ArrayBuffer'", 99 | "'Map'", 100 | "'Set'", 101 | "'WeakMap'", 102 | "'WeakSet'", 103 | "'Promise'", 104 | "'AsyncFunction'", 105 | "'asyncGenerator'", 106 | "'Reflect'", 107 | "'Proxy'", 108 | "'Intl'", 109 | "'Intl.Collator'", 110 | "'Intl.DateTimeFormat'", 111 | "'Intl.NumberFormat'", 112 | "'Intl.PluralRules'", 113 | "'WebAssembly'", 114 | "'WebAssembly.Module'", 115 | "'WebAssembly.Instance'", 116 | "'WebAssembly.Memory'", 117 | "'WebAssembly.Table'", 118 | "'WebAssembly.CompileError'", 119 | "'WebAssembly.LinkError'", 120 | "'WebAssembly.RuntimeError'", 121 | "'arguments'", 122 | "'Infinity'", 123 | "'NaN'", 124 | "'undefined'", 125 | "'null'", 126 | "'console'", 127 | "' '" 128 | ], 129 | "IDENTIFIERLIST": [ 130 | "IDENTIFIER ',' IDENTIFIERLIST", 131 | "'(' IDENTIFIERLIST '),' IDENTIFIERLIST", 132 | "IDENTIFIER" 133 | ], 134 | "JSBLOCK": [ 135 | "JSSTATEMENT", 136 | "JSSTATEMENT JSBLOCK" 137 | ], 138 | "JSSTATEMENT": [ 139 | "STATEMENT NEWLINE" 140 | ], 141 | "LITERAL": [ 142 | "'null'", 143 | "BOOLEAN", 144 | "NUMBER", 145 | "ARRAY" 146 | ], 147 | "LOGICALOPERATION": [ 148 | "EXPR '&&' EXPR", 149 | "EXPR '||' EXPR" 150 | ], 151 | "METHODCALL": [ 152 | "OBJECT PROPERTY METHODCALL1" 153 | ], 154 | "METHODCALL1": [ 155 | "'.' METHOD_NAME ARGS METHODCALL1", 156 | "' '" 157 | ], 158 | "METHOD_NAME": [ 159 | "IDENTIFIER", 160 | "'print'", 161 | "'eval'", 162 | "'uneval'", 163 | "'isFinite'", 164 | "'isNaN'", 165 | "'parseFloat'", 166 | "'parseInt'", 167 | "'decodeURI'", 168 | "'decodeURIComponent'", 169 | "'encodeURI'", 170 | "'encodeURIComponent'", 171 | "'escape'", 172 | "'unescape'", 173 | "'assign'", 174 | "'create'", 175 | "'defineProperty'", 176 | "'defineProperties'", 177 | "'entries'", 178 | "'freeze'", 179 | "'getOwnPropertyDescriptor'", 180 | "'getOwnPropertyDescriptors'", 181 | "'getOwnPropertyNames'", 182 | "'getOwnPropertySymbols'", 183 | "'getPrototypeOf'", 184 | "'is'", 185 | "'isExtensible'", 186 | "'isFrozen'", 187 | "'isSealed'", 188 | "'keys'", 189 | "'preventExtensions'", 190 | "'seal'", 191 | "'setPrototypeOf'", 192 | "'values'", 193 | "'__defineGetter__'", 194 | "'__defineSetter__'", 195 | "'__lookupGetter__'", 196 | "'__lookupSetter__'", 197 | "'hasOwnProperty'", 198 | "'isPrototypeOf'", 199 | "'propertyIsEnumerable'", 200 | "'toSource'", 201 | "'toLocaleString'", 202 | "'toString'", 203 | "'unwatch'", 204 | "'valueOf'", 205 | "'watch'", 206 | "'apply'", 207 | "'bind'", 208 | "'call'", 209 | "'isGenerator'", 210 | "'valueOf'", 211 | "'for'", 212 | "'keyFor'", 213 | "'stringify'", 214 | "'isInteger'", 215 | "'isSafeInteger'", 216 | "'toInteger'", 217 | "'toExponential'", 218 | "'toFixed'", 219 | "'toLocaleString'", 220 | "'toPrecision'", 221 | "'abs'", 222 | "'acos'", 223 | "'acosh'", 224 | "'asin'", 225 | "'asinh'", 226 | "'atan'", 227 | "'atanh'", 228 | "'atan2'", 229 | "'cbrt'", 230 | "'ceil'", 231 | "'clz32'", 232 | "'cos'", 233 | "'cosh'", 234 | "'exp'", 235 | "'expm1'", 236 | "'floor'", 237 | "'fround'", 238 | "'hypot'", 239 | "'imul'", 240 | "'log'", 241 | "'log1p'", 242 | "'log10'", 243 | "'log2'", 244 | "'max'", 245 | "'min'", 246 | "'pow'", 247 | "'random'", 248 | "'round'", 249 | "'sign'", 250 | "'sin'", 251 | "'sinh'", 252 | "'sqrt'", 253 | "'tan'", 254 | "'tanh'", 255 | "'trunc'", 256 | "'now'", 257 | "'parse'", 258 | "'UTC'", 259 | "'getDate'", 260 | "'getDay'", 261 | "'getFullYear'", 262 | "'getHours'", 263 | "'getMilliseconds'", 264 | "'getMinutes'", 265 | "'getMonth'", 266 | "'getSeconds'", 267 | "'getTime'", 268 | "'getTimezoneOffset'", 269 | "'getUTCDate'", 270 | "'getUTCDay'", 271 | "'getUTCFullYear'", 272 | "'getUTCHours'", 273 | "'getUTCMilliseconds'", 274 | "'getUTCMinutes'", 275 | "'getUTCMonth'", 276 | "'getUTCSeconds'", 277 | "'getYear'", 278 | "'setDate'", 279 | "'setFullYear'", 280 | "'setHours'", 281 | "'setMilliseconds'", 282 | "'setMinutes'", 283 | "'setMonth'", 284 | "'setSeconds'", 285 | "'setTime'", 286 | "'setUTCDate'", 287 | "'setUTCFullYear'", 288 | "'setUTCHours'", 289 | "'setUTCMilliseconds'", 290 | "'setUTCMinutes'", 291 | "'setUTCMonth'", 292 | "'setUTCSeconds'", 293 | "'setYear'", 294 | "'toDateString'", 295 | "'toISOString'", 296 | "'toJSON'", 297 | "'toGMTString'", 298 | "'toLocaleDateString'", 299 | "'toLocaleFormat'", 300 | "'toLocaleString'", 301 | "'toLocaleTimeString'", 302 | "'toTimeString'", 303 | "'toUTCString'", 304 | "'indexOf'", 305 | "'substring'", 306 | "'charAt'", 307 | "'strcmp'", 308 | "'fromCharCode'", 309 | "'fromCodePoint'", 310 | "'raw'", 311 | "'charCodeAt'", 312 | "'slice'", 313 | "'codePointAt'", 314 | "'concat'", 315 | "'includes'", 316 | "'endsWith'", 317 | "'lastIndexOf'", 318 | "'localeCompare'", 319 | "'match'", 320 | "'normalize'", 321 | "'padEnd'", 322 | "'padStart'", 323 | "'quote'", 324 | "'repeat'", 325 | "'replace'", 326 | "'search'", 327 | "'split'", 328 | "'startsWith'", 329 | "'substr'", 330 | "'toLocaleLowerCase'", 331 | "'toLocaleUpperCase'", 332 | "'toLowerCase'", 333 | "'toUpperCase'", 334 | "'trim'", 335 | "'trimleft'", 336 | "'trimright'", 337 | "'anchor'", 338 | "'big'", 339 | "'blink'", 340 | "'bold'", 341 | "'fixed'", 342 | "'fontcolor'", 343 | "'fontsize'", 344 | "'italics'", 345 | "'link'", 346 | "'small'", 347 | "'strike'", 348 | "'sub'", 349 | "'sup'", 350 | "'compile'", 351 | "'exec'", 352 | "'test'", 353 | "'from'", 354 | "'isArray'", 355 | "'of'", 356 | "'copyWithin'", 357 | "'fill'", 358 | "'pop'", 359 | "'push'", 360 | "'reverse'", 361 | "'shift'", 362 | "'sort'", 363 | "'splice'", 364 | "'unshift'", 365 | "'concat'", 366 | "'join'", 367 | "'every'", 368 | "'filter'", 369 | "'findIndex'", 370 | "'forEach'", 371 | "'map'", 372 | "'reduce'", 373 | "'reduceRight'", 374 | "'some'", 375 | "'move'", 376 | "'getInt8'", 377 | "'getUint8'", 378 | "'getInt16'", 379 | "'getUint16'", 380 | "'getInt32'", 381 | "'getUint32'", 382 | "'getFloat32'", 383 | "'getFloat64'", 384 | "'setInt8'", 385 | "'setUint8'", 386 | "'setInt16'", 387 | "'setUint16'", 388 | "'setInt32'", 389 | "'setUint32'", 390 | "'setFloat32'", 391 | "'setFloat64'", 392 | "'isView'", 393 | "'transfer'", 394 | "'clear'", 395 | "'get'", 396 | "'has'", 397 | "'set'", 398 | "'add'", 399 | "'splat'", 400 | "'check'", 401 | "'extractLane'", 402 | "'replaceLane'", 403 | "'load'", 404 | "'load1'", 405 | "'load2'", 406 | "'load3'", 407 | "'store'", 408 | "'store1'", 409 | "'store2'", 410 | "'store3'", 411 | "'addSaturate'", 412 | "'div'", 413 | "'mul'", 414 | "'neg'", 415 | "'reciprocalApproximation'", 416 | "'reciprocalSqrtApproximation'", 417 | "'subSaturate'", 418 | "'shuffle'", 419 | "'swizzle'", 420 | "'maxNum'", 421 | "'minNum'", 422 | "'select'", 423 | "'equal'", 424 | "'notEqual'", 425 | "'lessThan'", 426 | "'lessThanOrEqual'", 427 | "'greaterThan'", 428 | "'greaterThanOrEqual'", 429 | "'and'", 430 | "'or'", 431 | "'xor'", 432 | "'not'", 433 | "'shiftLeftByScalar'", 434 | "'shiftRightByScalar'", 435 | "'allTrue'", 436 | "'anyTrue'", 437 | "'fromFloat32x4'", 438 | "'fromFloat32x4Bits'", 439 | "'fromFloat64x2Bits'", 440 | "'fromInt32x4'", 441 | "'fromInt32x4Bits'", 442 | "'fromInt16x8Bits'", 443 | "'fromInt8x16Bits'", 444 | "'fromUint32x4'", 445 | "'fromUint32x4Bits'", 446 | "'fromUint16x8Bits'", 447 | "'fromUint8x16Bits'", 448 | "'neg'", 449 | "'compareExchange'", 450 | "'exchange'", 451 | "'wait'", 452 | "'wake'", 453 | "'isLockFree'", 454 | "'all'", 455 | "'race'", 456 | "'reject'", 457 | "'resolve'", 458 | "'catch'", 459 | "'then'", 460 | "'finally'", 461 | "'next'", 462 | "'throw'", 463 | "'close'", 464 | "'send'", 465 | "'apply'", 466 | "'construct'", 467 | "'deleteProperty'", 468 | "'ownKeys'", 469 | "'getCanonicalLocales'", 470 | "'supportedLocalesOf'", 471 | "'resolvedOptions'", 472 | "'formatToParts'", 473 | "'resolvedOptions'", 474 | "'instantiate'", 475 | "'instantiateStreaming'", 476 | "'compileStreaming'", 477 | "'validate'", 478 | "'customSections'", 479 | "'exports'", 480 | "'imports'", 481 | "'grow'", 482 | "'super'", 483 | "'in'", 484 | "'instanceof'", 485 | "' '" 486 | ], 487 | "NEWLINE": [ 488 | "'\\n'" 489 | ], 490 | "NUMBER": [ 491 | "'1/2'", 492 | "'1E2'", 493 | "'1E02'", 494 | "'1E+02'", 495 | "'-1'", 496 | "'-1.00'", 497 | "'-1/2'", 498 | "'-1E2'", 499 | "'-1E02'", 500 | "'-1E+02'", 501 | "'1/0'", 502 | "'0/0'", 503 | "'-2147483648/-1'", 504 | "'-9223372036854775808/-1'", 505 | "'-0'", 506 | "'-0.0'", 507 | "'+0'" 508 | ], 509 | "OBJECT": [ 510 | "IDENTIFIER" 511 | ], 512 | "ENTRYPOINT": [ 513 | "JSBLOCK" 514 | ], 515 | "PROPERTY": [ 516 | "'.length' PROPERTY", 517 | "'.prototype' PROPERTY", 518 | "'.constructor' PROPERTY", 519 | "'.__proto__' PROPERTY", 520 | "'.__noSuchMethod__' PROPERTY", 521 | "'.__count__' PROPERTY", 522 | "'.__parent__' PROPERTY", 523 | "'.arguments' PROPERTY", 524 | "'.arity' PROPERTY", 525 | "'.caller' PROPERTY", 526 | "'.name' PROPERTY", 527 | "'.displayName' PROPERTY", 528 | "'.iterator' PROPERTY", 529 | "'.asyncIterator' PROPERTY", 530 | "'.match' PROPERTY", 531 | "'.replace' PROPERTY", 532 | "'.search' PROPERTY", 533 | "'.split' PROPERTY", 534 | "'.hasInstance' PROPERTY", 535 | "'.isConcatSpreadable' PROPERTY", 536 | "'.unscopables' PROPERTY", 537 | "'.species' PROPERTY", 538 | "'.toPrimitive' PROPERTY", 539 | "'.toStringTag' PROPERTY", 540 | "'.fileName' PROPERTY", 541 | "'.lineNumber' PROPERTY", 542 | "'.columnNumber' PROPERTY", 543 | "'.message' PROPERTY", 544 | "'.name' PROPERTY", 545 | "'.EPSILON' PROPERTY", 546 | "'.MAX_SAFE_INTEGER' PROPERTY", 547 | "'.MAX_VALUE' PROPERTY", 548 | "'.MIN_SAFE_INTEGER' PROPERTY", 549 | "'.MIN_VALUE' PROPERTY", 550 | "'.NaN' PROPERTY", 551 | "'.NEGATIVE_INFINITY' PROPERTY", 552 | "'.POSITIVE_INFINITY' PROPERTY", 553 | "'.E' PROPERTY", 554 | "'.LN2' PROPERTY", 555 | "'.LN10' PROPERTY", 556 | "'.LOG2E' PROPERTY", 557 | "'.LOG10E' PROPERTY", 558 | "'.PI' PROPERTY", 559 | "'.SQRT1_2' PROPERTY", 560 | "'.SQRT2' PROPERTY", 561 | "'.flags' PROPERTY", 562 | "'.global' PROPERTY", 563 | "'.ignoreCase' PROPERTY", 564 | "'.multiline' PROPERTY", 565 | "'.source' PROPERTY", 566 | "'.sticky' PROPERTY", 567 | "'.unicode' PROPERTY", 568 | "'.buffer' PROPERTY", 569 | "'.byteLength' PROPERTY", 570 | "'.byteOffset' PROPERTY", 571 | "'.BYTES_PER_ELEMENT' PROPERTY", 572 | "'.compare' PROPERTY", 573 | "'.format' PROPERTY", 574 | "'.callee' PROPERTY", 575 | "'.caller' PROPERTY", 576 | "'.memory' PROPERTY", 577 | "'.exports' PROPERTY", 578 | "' '" 579 | ], 580 | "SP": [ 581 | "' '" 582 | ], 583 | "STATEMENT": [ 584 | "EXPR ';'", 585 | "'var' SP VAR '=' EXPR ';'", 586 | "'let' SP VAR '=' EXPR ';'", 587 | "VAR '=' EXPR ';'", 588 | "VAR PROPERTY '=' EXPR ';'", 589 | "VAR '[' DECIMALNUMBER ']' '=' EXPR ';'", 590 | "'const' SP VAR '=' EXPR ';'", 591 | "'typeof' SP EXPR ';'", 592 | "'void' SP EXPR ';'", 593 | "'return' SP EXPR ';'", 594 | "VAR ':'" 595 | ], 596 | "VAR": [ 597 | "'a'", 598 | "'b'", 599 | "'c'", 600 | "'d'", 601 | "'e'", 602 | "'f'", 603 | "'g'", 604 | "'h'" 605 | ] 606 | } 607 | -------------------------------------------------------------------------------- /test-data/grammars/invalid-refs.json: -------------------------------------------------------------------------------- 1 | { 2 | "": [ 3 | [""] 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /test-data/grammars/mixed_rules.json: -------------------------------------------------------------------------------- 1 | { 2 | "": [ 3 | ["e", "", "f", ""] 4 | ], 5 | "": [ 6 | ["a"] 7 | ], 8 | "": [ 9 | ["b"] 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /test-data/grammars/recursion.json: -------------------------------------------------------------------------------- 1 | { 2 | "": [ 3 | [""] 4 | ], 5 | "": [ 6 | [""] 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /test-data/grammars/test-peacock.json: -------------------------------------------------------------------------------- 1 | { 2 | "": [ 3 | [""] 4 | ], 5 | 6 | "": [ 7 | ["<", "''", "", ">"], 8 | ["'''"] 9 | ] 10 | } -------------------------------------------------------------------------------- /test-data/grammars/unit_rules.json: -------------------------------------------------------------------------------- 1 | { 2 | "": [ 3 | [""], 4 | ["e"] 5 | ], 6 | "": [ 7 | [""], 8 | ["a"] 9 | ], 10 | "": [ 11 | ["b1"], 12 | ["b2"] 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /test-data/grammars/unused_rules.json: -------------------------------------------------------------------------------- 1 | { 2 | // COMPONENT 1 3 | "": [ 4 | ["", ""] 5 | ], 6 | "": [ 7 | ["a", ""], 8 | ["a"] 9 | ], 10 | "": [ 11 | ["b", ""], 12 | ["b"] 13 | ], 14 | 15 | // COMPONENT 2 16 | "": [ 17 | [""] 18 | ], 19 | "": [ 20 | [""] 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /test-data/libfuzzer/.gitignore: -------------------------------------------------------------------------------- 1 | harness 2 | corpus/ 3 | generator.* 4 | -------------------------------------------------------------------------------- /test-data/libfuzzer/Makefile: -------------------------------------------------------------------------------- 1 | 2 | harness: harness.c generator.c 3 | clang -O3 -flto -fsanitize=fuzzer -o $@ -I. $^ 4 | -------------------------------------------------------------------------------- /test-data/libfuzzer/harness.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "generator.h" 9 | 10 | #define OUT_LEN (128 * 1024 * 1024) 11 | 12 | size_t LLVMFuzzerCustomMutator (uint8_t* data, size_t size, size_t max_size, unsigned int seed) { 13 | //printf("LLVMFuzzerCustomMutator(%p, %lu, %lu, %u)\n", data, size, max_size, seed); 14 | 15 | if ((size % sizeof(size_t)) != 0) { 16 | size = 0; 17 | } 18 | 19 | size /= sizeof(size_t); 20 | 21 | if (size) { 22 | size = rand() % size; 23 | } 24 | 25 | max_size /= sizeof(size_t); 26 | 27 | seed_generator(seed); 28 | 29 | size_t new_len = mutate_sequence( 30 | (size_t*) data, 31 | size, 32 | max_size 33 | ); 34 | 35 | return new_len * sizeof(size_t); 36 | } 37 | 38 | int LLVMFuzzerTestOneInput (const uint8_t* data, size_t size) { 39 | static unsigned char* output = NULL; 40 | 41 | //printf("LLVMFuzzerTestOneInput(%p, %lu)\n", data, size); 42 | 43 | if ((size % sizeof(size_t)) != 0) { 44 | return -1; 45 | } 46 | 47 | if (!output) { 48 | output = malloc(OUT_LEN + 1); 49 | } 50 | 51 | size /= sizeof(size_t); 52 | 53 | size_t new_len = serialize_sequence( 54 | (size_t*) data, 55 | size, 56 | output, 57 | OUT_LEN 58 | ); 59 | output[new_len] = 0; 60 | 61 | //printf("%s\n", output); 62 | 63 | return 0; 64 | } 65 | 66 | void print_file (char* filename) { 67 | FILE* file = fopen(filename, "rb"); 68 | fseek(file, 0, SEEK_END); 69 | size_t file_size = ftell(file); 70 | 71 | if ((file_size % sizeof(size_t)) != 0) { 72 | exit(1); 73 | } 74 | 75 | fseek(file, 0, SEEK_SET); 76 | size_t* buffer = malloc(file_size); 77 | fread(buffer, 1, file_size, file); 78 | 79 | unsigned char* output = malloc(OUT_LEN + 1); 80 | 81 | size_t out_len = serialize_sequence( 82 | buffer, 83 | file_size / sizeof(size_t), 84 | output, 85 | OUT_LEN 86 | ); 87 | output[out_len] = 0; 88 | 89 | printf("%s\n", output); 90 | 91 | fclose(file); 92 | } 93 | 94 | int LLVMFuzzerInitialize (int* argcp, char*** argvp) { 95 | int argc = *argcp; 96 | char** argv = *argvp; 97 | 98 | if (argc == 2 && !strncmp(argv[1], "--print=", 8)) { 99 | print_file(argv[1] + 8); 100 | exit(0); 101 | } 102 | 103 | return 0; 104 | } 105 | -------------------------------------------------------------------------------- /test-data/static_loading/.gitignore: -------------------------------------------------------------------------------- 1 | generator.* 2 | output/ 3 | .cur_input_* 4 | -------------------------------------------------------------------------------- /test-data/static_loading/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "static_loading" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [profile.release] 7 | lto = true 8 | codegen-units = 1 9 | 10 | [dependencies] 11 | peacock-fuzz = { path = "../..", features = ["static-loading"] } 12 | libafl = "0.13.0" 13 | libafl_bolts = "0.13.0" 14 | nix = "0.29" 15 | 16 | [build-dependencies] 17 | peacock-fuzz = { path = "../.." } 18 | cc = "1.0" 19 | -------------------------------------------------------------------------------- /test-data/static_loading/build.rs: -------------------------------------------------------------------------------- 1 | use peacock_fuzz::{ 2 | grammar::ContextFreeGrammar, 3 | backends::C::CGenerator, 4 | }; 5 | use cc; 6 | 7 | const GRAMMAR_FILE: &str = "php.json"; 8 | const GENERATOR_FILE: &str = "generator.c"; 9 | 10 | fn main() { 11 | let cfg = ContextFreeGrammar::builder() 12 | .gramatron_grammar(GRAMMAR_FILE).unwrap() 13 | .entrypoint("PROGRAM") 14 | .build().unwrap(); 15 | 16 | CGenerator::new().generate(GENERATOR_FILE, &cfg); 17 | 18 | cc::Build::new() 19 | .file(GENERATOR_FILE) 20 | .flag("-O3") 21 | .flag("-flto") 22 | .compile("generator"); 23 | 24 | println!("cargo:rerun-if-changed={}", GRAMMAR_FILE); 25 | } 26 | -------------------------------------------------------------------------------- /test-data/static_loading/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | use std::time::Duration; 3 | use nix::sys::signal::Signal; 4 | use libafl::prelude::{ 5 | Error, 6 | HitcountsMapObserver, StdMapObserver, 7 | TimeObserver, MaxMapFeedback, CalibrationStage, feedback_or, 8 | TimeFeedback, CrashFeedback, StdState, CachedOnDiskCorpus, 9 | OnDiskCorpus, 10 | StdMutationalStage, IndexesLenTimeMinimizerScheduler, 11 | StdWeightedScheduler, powersched::PowerSchedule, 12 | StdFuzzer, ForkserverExecutor, 13 | Fuzzer, 14 | TimeoutFeedback, HasCorpus, Corpus, 15 | Launcher, EventConfig, 16 | LlmpRestartingEventManager, CanTrack, 17 | }; 18 | use libafl_bolts::prelude::{ 19 | UnixShMemProvider, ShMemProvider, ShMem, AsSliceMut, 20 | current_nanos, StdRand, tuple_list, 21 | Cores, 22 | }; 23 | use peacock_fuzz::components::{ 24 | load_generator, 25 | PeacockInput, 26 | PeacockMutator, 27 | PeacockGenerator, 28 | seed_generator, 29 | }; 30 | 31 | fn main() -> Result<(), Error> { 32 | let args: Vec = std::env::args().skip(1).collect(); 33 | 34 | load_generator(); 35 | 36 | let mut run_client = |state: Option<_>, mut mgr: LlmpRestartingEventManager<_, _, _>, _core_id| { 37 | let output_dir = Path::new("output"); 38 | let queue_dir = output_dir.join("queue"); 39 | let crashes_dir = output_dir.join("crashes"); 40 | const MAP_SIZE: usize = 2_621_440; 41 | let seed = current_nanos(); 42 | let powerschedule = PowerSchedule::EXPLORE; 43 | let timeout = Duration::from_secs(10); 44 | let signal = str::parse::("SIGKILL").unwrap(); 45 | let debug_child = cfg!(debug_assertions); 46 | 47 | let mut shmem_provider = UnixShMemProvider::new()?; 48 | let mut shmem = shmem_provider.new_shmem(MAP_SIZE)?; 49 | shmem.write_to_env("__AFL_SHM_ID")?; 50 | let shmem_buf = shmem.as_slice_mut(); 51 | std::env::set_var("AFL_MAP_SIZE", format!("{}", MAP_SIZE)); 52 | 53 | let edges_observer = unsafe { HitcountsMapObserver::new(StdMapObserver::new("shared_mem", shmem_buf)).track_indices() }; 54 | 55 | let time_observer = TimeObserver::new("time"); 56 | 57 | let map_feedback = MaxMapFeedback::new(&edges_observer); 58 | 59 | let calibration = CalibrationStage::new(&map_feedback); 60 | 61 | let mut feedback = feedback_or!( 62 | map_feedback, 63 | TimeFeedback::new(&time_observer) 64 | ); 65 | 66 | let mut objective = feedback_or!( 67 | CrashFeedback::new(), 68 | TimeoutFeedback::new() 69 | ); 70 | 71 | seed_generator(seed as usize); 72 | 73 | let mut state = if let Some(state) = state { 74 | state 75 | } else { 76 | StdState::new( 77 | StdRand::with_seed(seed), 78 | CachedOnDiskCorpus::::new(&queue_dir, 128)?, 79 | OnDiskCorpus::new(crashes_dir)?, 80 | &mut feedback, 81 | &mut objective, 82 | )? 83 | }; 84 | 85 | let mutator = PeacockMutator::new(); 86 | 87 | let mutational = StdMutationalStage::with_max_iterations(mutator, 1); 88 | 89 | let scheduler = IndexesLenTimeMinimizerScheduler::new( 90 | &edges_observer, 91 | StdWeightedScheduler::with_schedule( 92 | &mut state, 93 | &edges_observer, 94 | Some(powerschedule), 95 | ) 96 | ); 97 | 98 | let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); 99 | 100 | let mut executor = ForkserverExecutor::builder() 101 | .program(&args[0]) 102 | .debug_child(debug_child) 103 | .parse_afl_cmdline(args.get(1..).unwrap_or(&[])) 104 | .coverage_map_size(MAP_SIZE) 105 | .is_persistent(false) 106 | .timeout(timeout) 107 | .kill_signal(signal) 108 | .build_dynamic_map(edges_observer, tuple_list!(time_observer))?; 109 | 110 | state.load_initial_inputs( 111 | &mut fuzzer, 112 | &mut executor, 113 | &mut mgr, 114 | &[ 115 | queue_dir, 116 | ] 117 | )?; 118 | 119 | if state.corpus().count() == 0 { 120 | let mut generator = PeacockGenerator::new(); 121 | state.generate_initial_inputs_forced( 122 | &mut fuzzer, 123 | &mut executor, 124 | &mut generator, 125 | &mut mgr, 126 | 16, 127 | )?; 128 | } 129 | 130 | let mut stages = tuple_list!(calibration, mutational); 131 | 132 | fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; 133 | Ok(()) 134 | }; 135 | 136 | let shmem_provider = UnixShMemProvider::new()?; 137 | 138 | let monitor = libafl::prelude::SimplePrintingMonitor::new(); 139 | 140 | let cores = Cores::from_cmdline("0").expect("Invalid core specification"); 141 | 142 | match Launcher::builder() 143 | .shmem_provider(shmem_provider) 144 | .configuration(EventConfig::AlwaysUnique) 145 | .monitor(monitor) 146 | .run_client(&mut run_client) 147 | .cores(&cores) 148 | .build() 149 | .launch() 150 | { 151 | Err(Error::ShuttingDown) | Ok(()) => Ok(()), 152 | e => e, 153 | } 154 | } 155 | --------------------------------------------------------------------------------