├── .gitignore
├── COPYING
├── Cargo.lock
├── Cargo.toml
├── README.md
├── TODO
├── logo.png
├── rustfmt.toml
├── src
    ├── backends
    │   ├── C
    │   │   ├── codegen.rs
    │   │   ├── formatter.rs
    │   │   ├── grammar.rs
    │   │   └── mod.rs
    │   ├── interpreter
    │   │   └── mod.rs
    │   ├── json
    │   │   ├── generator.rs
    │   │   └── mod.rs
    │   └── mod.rs
    ├── bin
    │   ├── compile.rs
    │   ├── dump.rs
    │   ├── fuzz.rs
    │   ├── gen.rs
    │   └── merge.rs
    ├── components
    │   ├── ffi.rs
    │   ├── generator.rs
    │   ├── input.rs
    │   ├── mod.rs
    │   └── mutator.rs
    ├── error.rs
    ├── grammar
    │   ├── builder.rs
    │   ├── cfg.rs
    │   └── mod.rs
    ├── lib.rs
    └── parser
    │   ├── gramatron.rs
    │   ├── mod.rs
    │   └── peacock.rs
├── template.c
└── test-data
    ├── C
        ├── .gitignore
        ├── bench_generation.c
        ├── fuzz_mutate.c
        ├── fuzz_unparse.c
        ├── test_generation.c
        ├── test_mutate.c
        └── test_unparse.c
    ├── benchmarks
        ├── .gitignore
        ├── Makefile
        ├── README.md
        ├── baseline.c
        ├── gramatron-patch
        ├── grammar.json
        ├── grammar.postcard
        ├── nop.c
        ├── patch-libafl
        ├── source_automata.json
        └── throughput.c
    ├── fuzz
        ├── .gitignore
        └── main.c
    ├── grammars
        ├── duplicate_rules.json
        ├── gramatron.json
        ├── invalid-refs.json
        ├── mixed_rules.json
        ├── recursion.json
        ├── test-peacock.json
        ├── unit_rules.json
        └── unused_rules.json
    ├── libfuzzer
        ├── .gitignore
        ├── Makefile
        └── harness.c
    └── static_loading
        ├── .gitignore
        ├── Cargo.lock
        ├── Cargo.toml
        ├── build.rs
        ├── php.json
        └── src
            └── main.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "peacock-fuzz"
 3 | description = "Library to parse context-free grammars and create grammar-based fuzzing tools"
 4 | version = "0.2.4"
 5 | edition = "2021"
 6 | license = "GPL-3.0-only"
 7 | readme = "README.md"
 8 | repository = "https://github.com/z2-2z/peacock"
 9 | keywords = ["fuzzing", "grammars"]
10 | include = [
11 |     "/src",
12 |     "/COPYING",
13 |     "/README.md",
14 | ]
15 | 
16 | [profile.release]
17 | lto = true
18 | codegen-units = 1
19 | debug = true
20 | panic = "abort"
21 | 
22 | [dependencies]
23 | serde_json = { version = "1.0", features = ["default", "preserve_order"] }
24 | json_comments = "0.2"
25 | thiserror = "1.0"
26 | ahash = { version = "0.8", default-features = false, features = ["std", "compile-time-rng"] }
27 | petgraph = "0.6"
28 | itertools = "0.12"
29 | libloading = "0.8"
30 | clap = { version = "4.4", features = ["derive"] }
31 | serde = "1.0"
32 | libafl = "0.13"
33 | postcard = "1.0"
34 | libafl_bolts = "0.13"
35 | nix = "0.29"
36 | 
37 | [[bin]]
38 | name = "peacock-dump"
39 | path = "src/bin/dump.rs"
40 | 
41 | [[bin]]
42 | name = "peacock-fuzz"
43 | path = "src/bin/fuzz.rs"
44 | 
45 | [[bin]]
46 | name = "peacock-compile"
47 | path = "src/bin/compile.rs"
48 | 
49 | [[bin]]
50 | name = "peacock-merge"
51 | path = "src/bin/merge.rs"
52 | 
53 | [[bin]]
54 | name = "peacock-gen"
55 | path = "src/bin/gen.rs"
56 | 
57 | [features]
58 | default = ["components"]
59 | 
60 | # Enables debugging of generated code by the C backend by inserting printf() statements
61 | # at the beginning of each function call.
62 | debug-codegen = []
63 | 
64 | docs-rs = ["libafl/document-features"]
65 | 
66 | # For the LibAFL components: Activate this when a generator is statically compiled into the fuzzer
67 | static-loading = ["components"]
68 | 
69 | # Include LibAFL components in library
70 | components = []
71 | 
72 | [package.metadata.docs.rs]
73 | features = ["docs-rs"]
74 | rustc-args = ["--cfg", "docsrs"]
75 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |     <a href="https://crates.io/crates/peacock-fuzz"><img align="center" src="logo.png"></a>
  3 |     <b>~~~ fuzzing with grammar-based mutations ~~~</b>
  4 | </div>
  5 | 
  6 | <br/>
  7 | 
  8 | This project is a reimplementation of [Gramatron](https://github.com/HexHive/Gramatron) that is
  9 | 
 10 | - __performant__: 4x higher throughput than LibAFL's Gramatron implementation
 11 | - __versatile__: usable with LibAFL, libfuzzer, in a custom AFL++ mutator or standalone
 12 | - __easy to use__: no more orchestration of different scripts to get the fuzzing campaign running, everything is batteries-included
 13 | - __extendable__: at its core, peacock is a library that you can use at your leisure to customize every step of the grammar fuzzing process
 14 | - __backwards compatible__: it works with grammars that you have already written for other tools
 15 | 
 16 | ## What's inside
 17 | 1. A __standalone fuzzer__ similar to afl-fuzz that employs grammar-based mutations
 18 | 2. __LibAFL components__ to build your own grammar-based fuzzer
 19 | 3. Grammar mutation procedure generated as __C code__ that you can employ in other contexts
 20 | 
 21 | ## How to use it
 22 | Clone the repo and execute
 23 | ```
 24 | cargo build --release
 25 | ```
 26 | This creates 5 ready-to-use tools:
 27 | 
 28 | 1. `peacock-fuzz`: A coverage-guided fuzzer that can fuzz any binary compiled with AFL++'s compilers or anything that speaks AFL's forkserver protocol
 29 | 2. `peacock-dump`: peacock-fuzz saves crashes and queue items in a raw, binary format to disk. Use this tool to get a human readable output from any such file. All these binary files have the prefix `peacock-raw-`
 30 | 3. `peacock-compile`: Takes a grammar and compiles it to C code
 31 | 4. `peacock-merge`: Merge multiple grammar files into one or convert a grammar file from one format into another
 32 | 5. `peacock-gen`: Generate individual inputs from a grammar
 33 | 
 34 | If you want more fine-grained control you can use the crate `peacock_fuzz`, which is the backbone of all the tools from above.
 35 | See the documentation at [docs.rs](https://docs.rs/peacock-fuzz) in order to get started with peacock as a library.
 36 | 
 37 | ## How it works
 38 | Peacock is a fuzzer that implements so-called "grammar-based mutations". This means that it will mutate its inputs in such a way that they will always adhere to a given [grammar](https://en.wikipedia.org/wiki/Formal_grammar).     
 39 | 
 40 | The way mutations work is the same as in Gramatron. A grammar is converted to a [PDA](https://en.wikipedia.org/wiki/Pushdown_automaton) such that an input can be represented as a walk through the automaton. Then, a mutation of an input is simply a modification of an automaton walk. We cut off the walk at a random point and let it find a new random path through the automaton from there.
 41 | 
 42 | While Gramatron and LibAFL realize the automaton as an adjacency matrix,
 43 | peacock generates C code that encodes the automaton in its control flow. This saves us a lot of memory accesses and makes the mutation procedure faster.
 44 | 
 45 | The generated C code exposes a certain API that can be used by any application, e.g. a libfuzzer harness, an AFL++ custom mutator or even Rust code.
 46 | 
 47 | ## How to write grammars
 48 | Peacock accepts its context-free grammars in JSON format.
 49 | A context-free grammar has production rules of the form:
 50 | ```
 51 | A -> X Y Z ...
 52 | ```
 53 | where `A` _must_ be a non-terminal and `X`,`Y`,`Z` can be non-terminals or terminals. The right-hand-side must contain at least one symbol.
 54 | 
 55 | Non-terminals are enclosed in `<>`, so the non-terminal `A` would be represented as `<A>`. Terminals are enclosed in `''`.
 56 | 
 57 | The set of rules 
 58 | ```
 59 | A -> a B
 60 | A -> a
 61 | B -> b B
 62 | B -> Ɛ
 63 | ```
 64 | would be written as
 65 | ```jsonc
 66 | {
 67 |     // Comments are also possible :)
 68 |     "<A>": [
 69 |         ["'a'", "<B>"],
 70 |         ["'a'"]
 71 |     ],
 72 |     "<B>": [
 73 |         ["'b'", "<B>"],
 74 |         ["''"] // Ɛ = ''
 75 |     ]
 76 | }
 77 | ```
 78 | and corresponds to the regular expression `a(b*)`.
 79 | 
 80 | Peacock also supports the Gramatron format, which is a bit different and does not allow for comments.
 81 | 
 82 | The non-terminal `<ENTRYPOINT>` is the entrypoint of the grammar.
 83 | 
 84 | ## C API Documentation
 85 | - `void seed_generator (size_t new_seed)`   
 86 |   Supply a seed for the RNG of the mutator.
 87 | - `size_t unparse_sequence (size_t* seq_buf, size_t seq_capacity, unsigned char* input, size_t input_len)`   
 88 |   Given an input that adheres to the grammar, find the corresponding automaton walk. _This function may be slow, use outside of hot loop._
 89 |   - `seq_buf`: Automaton walk will be written into this buffer
 90 |   - `seq_capacity`: Maximum number of elements that `seq_buf` can hold (not number of bytes)
 91 |   - `input`: User input adhering to grammar
 92 |   - `input_len`: Length of `input`
 93 |   
 94 |   Returns the number of elements written to `seq_buf` or 0 if input does not adhere to grammar.
 95 | - `size_t mutate_sequence (size_t* buf, size_t len, size_t capacity)`   
 96 |   Given an automaton walk, create a random mutant of the walk.
 97 |   - `buf`: Pointer to array that holds automaton walk
 98 |   - `len`: Number of items in `buf` (not number of bytes)
 99 |   - `capacity`: Maximum number of items that `buf` can hold (not number of bytes)
100 |   
101 |   Returns the length of the new walk.
102 | - `size_t serialize_sequence (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len)`    
103 |   Given an automaton walk, create the corresponding output.
104 |   - `seq`: Pointer to automaton walk
105 |   - `seq_len`: Number of items in `seq` (not number of bytes)
106 |   - `out`: Output will be written into that buffer
107 |   - `out_len`: Number of bytes in `out`
108 |   
109 |   Returns how many bytes have been written to `out`.
110 |   
111 |   
112 | Macros:
113 | - `MAKE_THREAD_SAFE`: Define this to make the mutator completely thread-safe
114 | - `MAKE_VISIBLE`: Define this to explicitly set the visibility of the functions from above to "default"
115 | - `STATIC_SEED=<your seed>`: Compile-time seed for the RNG
116 | - `DISABLE_rand`: Don't include the internal `rand` function and use an external one with the signature `size_t rand (void)`
117 | - `DISABLE_seed_generator`: Don't include the function `seed_generator`
118 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 | - comment functions in CGenerator: headline, annotation
2 | - peacock-gen take input on stdin if stdin is pipe, parse it and print mutation
3 | - reproducible benchmarks
4 | - rename peacock-raw filename to peacock-seq
5 | - better handling of static-loading in components/ffi.rs
6 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z2-2z/peacock/f2d053cf0e198be03220b6d2e8be4e1fd26be86a/logo.png


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 120
2 | use_small_heuristics = "Max"
3 | edition = "2021"
4 | match_block_trailing_comma = true
5 | use_field_init_shorthand = true
6 | struct_lit_width = 0
7 | imports_granularity = "Crate"
8 | imports_layout = "Vertical"
9 | 


--------------------------------------------------------------------------------
/src/backends/C/codegen.rs:
--------------------------------------------------------------------------------
  1 | use itertools::Itertools;
  2 | use std::{
  3 |     fs::File,
  4 |     io::Write,
  5 |     path::Path,
  6 | };
  7 | 
  8 | use crate::{
  9 |     backends::C::{
 10 |         formatter::CFormatter,
 11 |         grammar::{
 12 |             LLSymbol,
 13 |             LowLevelGrammar,
 14 |         },
 15 |     },
 16 |     grammar::ContextFreeGrammar,
 17 | };
 18 | 
 19 | fn rule_has_nonterminals(rule: &[LLSymbol]) -> bool {
 20 |     for symbol in rule {
 21 |         if matches!(symbol, LLSymbol::NonTerminal(_)) {
 22 |             return true;
 23 |         }
 24 |     }
 25 | 
 26 |     false
 27 | }
 28 | 
 29 | fn rules_have_nonterminals(rules: &[Vec<LLSymbol>]) -> bool {
 30 |     for rule in rules {
 31 |         if rule_has_nonterminals(rule) {
 32 |             return true;
 33 |         }
 34 |     }
 35 | 
 36 |     false
 37 | }
 38 | 
 39 | fn rule_has_terminals(rule: &[LLSymbol]) -> bool {
 40 |     for symbol in rule {
 41 |         if matches!(symbol, LLSymbol::Terminal(_)) {
 42 |             return true;
 43 |         }
 44 |     }
 45 | 
 46 |     false
 47 | }
 48 | 
 49 | fn rules_have_terminals(rules: &[Vec<LLSymbol>]) -> bool {
 50 |     for rule in rules {
 51 |         if rule_has_terminals(rule) {
 52 |             return true;
 53 |         }
 54 |     }
 55 | 
 56 |     false
 57 | }
 58 | 
 59 | fn emit_includes(fmt: &mut CFormatter<File>) {
 60 |     #[cfg(feature = "debug-codegen")]
 61 |     fmt.write("#include <stdio.h>");
 62 | 
 63 |     fmt.write("#include <stddef.h>");
 64 |     fmt.blankline();
 65 | }
 66 | 
 67 | fn emit_macros(fmt: &mut CFormatter<File>) {
 68 |     fmt.write("/* Helper Macros */");
 69 | 
 70 |     fmt.write("#undef THREAD_LOCAL");
 71 |     fmt.write("#ifdef MAKE_THREAD_SAFE");
 72 |     fmt.write("#define THREAD_LOCAL __thread");
 73 |     fmt.write("#else");
 74 |     fmt.write("#define THREAD_LOCAL");
 75 |     fmt.write("#endif");
 76 |     fmt.blankline();
 77 | 
 78 |     fmt.write("#undef UNLIKELY");
 79 |     fmt.write("#define UNLIKELY(x) __builtin_expect(!!(x), 0)");
 80 |     fmt.write("#undef LIKELY");
 81 |     fmt.write("#define LIKELY(x) __builtin_expect(!!(x), 1)");
 82 |     fmt.blankline();
 83 | 
 84 |     fmt.write("#ifndef __clang__");
 85 |     fmt.write("#undef __builtin_memcpy_inline");
 86 |     fmt.write("#define __builtin_memcpy_inline __builtin_memcpy");
 87 |     fmt.write("#endif");
 88 |     fmt.blankline();
 89 | 
 90 |     fmt.write("#undef EXPORT_FUNCTION");
 91 |     fmt.write("#ifdef MAKE_VISIBLE");
 92 |     fmt.write("#define EXPORT_FUNCTION __attribute__((visibility (\"default\")))");
 93 |     fmt.write("#else");
 94 |     fmt.write("#define EXPORT_FUNCTION");
 95 |     fmt.write("#endif");
 96 |     fmt.blankline();
 97 | }
 98 | 
 99 | fn emit_rand(fmt: &mut CFormatter<File>) {
100 |     fmt.write("/* RNG */");
101 | 
102 |     fmt.write("#ifndef STATIC_SEED");
103 |     fmt.write(" #define STATIC_SEED 0x35c6be9ba2548264");
104 |     fmt.write("#endif");
105 |     fmt.blankline();
106 | 
107 |     fmt.write("static THREAD_LOCAL size_t rand_state = STATIC_SEED;");
108 |     fmt.blankline();
109 | 
110 |     fmt.write("#ifndef DISABLE_rand");
111 |     fmt.write("static inline size_t rand (void) {");
112 |     fmt.indent();
113 |     fmt.write("size_t x = rand_state;");
114 |     fmt.write("x ^= x << 13;");
115 |     fmt.write("x ^= x >> 7;");
116 |     fmt.write("x ^= x << 17;");
117 |     fmt.write("return rand_state = x;");
118 |     fmt.unindent();
119 |     fmt.write("}");
120 |     fmt.write("#else");
121 |     fmt.write("size_t rand (void);");
122 |     fmt.write("#endif");
123 |     fmt.blankline();
124 | 
125 |     fmt.write("#ifndef DISABLE_seed_generator");
126 |     fmt.write("EXPORT_FUNCTION");
127 |     fmt.write("void seed_generator (size_t new_seed) {");
128 |     fmt.indent();
129 |     fmt.write("if (!new_seed) {");
130 |     fmt.indent();
131 |     fmt.write("new_seed = 0xDEADBEEF;");
132 |     fmt.unindent();
133 |     fmt.write("}");
134 |     fmt.blankline();
135 |     fmt.write("rand_state = new_seed;");
136 |     fmt.unindent();
137 |     fmt.write("}");
138 |     fmt.write("#else");
139 |     fmt.write("void seed_generator (size_t);");
140 |     fmt.write("#endif");
141 |     fmt.blankline();
142 | }
143 | 
144 | fn emit_types(fmt: &mut CFormatter<File>) {
145 |     fmt.write("// Used to represent a sequence of rules");
146 |     fmt.write("typedef struct {");
147 |     fmt.indent();
148 |     fmt.write("size_t* buf;");
149 |     fmt.write("size_t len;");
150 |     fmt.write("size_t capacity;");
151 |     fmt.unindent();
152 |     fmt.write("} Sequence;");
153 |     fmt.blankline();
154 | }
155 | 
156 | fn emit_mutation_declarations(grammar: &LowLevelGrammar, fmt: &mut CFormatter<File>) {
157 |     fmt.write("/* Forward declarations for sequence mutation functions */");
158 | 
159 |     for nonterm in grammar.rules().keys() {
160 |         fmt.write(format!(
161 |             "static int mutate_seq_nonterm{} (size_t* const, size_t* const, const size_t, size_t* const);",
162 |             *nonterm
163 |         ));
164 |     }
165 | 
166 |     fmt.blankline();
167 | }
168 | 
169 | fn emit_mutation_function_rule(rule: &[LLSymbol], fmt: &mut CFormatter<File>) {
170 |     for symbol in rule {
171 |         if let LLSymbol::NonTerminal(dst) = symbol {
172 |             fmt.write(format!("if (UNLIKELY(!mutate_seq_nonterm{}(buf, len, capacity, step))) {{", dst.id()));
173 |             fmt.indent();
174 |             fmt.write("return 0;");
175 |             fmt.unindent();
176 |             fmt.write("}");
177 |             fmt.blankline();
178 |         }
179 |     }
180 | }
181 | 
182 | fn emit_mutation_function_single(rule: &[LLSymbol], fmt: &mut CFormatter<File>) {
183 |     fmt.write("size_t idx = *len;");
184 |     fmt.blankline();
185 |     fmt.write("if (*step >= idx) {");
186 |     fmt.indent();
187 |     fmt.write("if (UNLIKELY(idx >= capacity)) {");
188 |     fmt.indent();
189 |     fmt.write("return 0;");
190 |     fmt.unindent();
191 |     fmt.write("}");
192 |     fmt.blankline();
193 |     fmt.write("buf[idx] = 0;");
194 |     fmt.write("*len = idx + 1;");
195 |     fmt.unindent();
196 |     fmt.write("}");
197 |     fmt.blankline();
198 | 
199 |     fmt.write("*step += 1;");
200 |     fmt.blankline();
201 | 
202 |     emit_mutation_function_rule(rule, fmt);
203 | 
204 |     fmt.write("return 1;");
205 | }
206 | 
207 | fn emit_mutation_function_multiple(rules: &[Vec<LLSymbol>], fmt: &mut CFormatter<File>) {
208 |     let have_nonterminals = rules_have_nonterminals(rules);
209 | 
210 |     fmt.write("size_t idx = *len;");
211 |     fmt.write("size_t target;");
212 |     fmt.blankline();
213 | 
214 |     if have_nonterminals {
215 |         fmt.write("if (*step < idx) {");
216 |         fmt.indent();
217 |         fmt.write("target = buf[*step];");
218 |         fmt.unindent();
219 |         fmt.write("} else {");
220 |     } else {
221 |         fmt.write("if (*step >= idx) {");
222 |     }
223 | 
224 |     fmt.indent();
225 |     fmt.write("if (UNLIKELY(idx >= capacity)) {");
226 |     fmt.indent();
227 |     fmt.write("return 0;");
228 |     fmt.unindent();
229 |     fmt.write("}");
230 |     fmt.blankline();
231 |     fmt.write(format!("target = rand() % {};", rules.len()));
232 |     fmt.write("buf[idx] = target;");
233 |     fmt.write("*len = idx + 1;");
234 |     fmt.unindent();
235 |     fmt.write("}");
236 |     fmt.blankline();
237 | 
238 |     fmt.write("*step += 1;");
239 |     fmt.blankline();
240 | 
241 |     if have_nonterminals {
242 |         fmt.write("switch (target) {");
243 |         fmt.indent();
244 | 
245 |         for (i, rule) in rules.iter().enumerate() {
246 |             fmt.write(format!("case {}: {{", i));
247 |             fmt.indent();
248 | 
249 |             emit_mutation_function_rule(rule, fmt);
250 | 
251 |             fmt.write("break;");
252 |             fmt.unindent();
253 |             fmt.write("}");
254 |         }
255 | 
256 |         fmt.write("default: {");
257 |         fmt.indent();
258 |         fmt.write("__builtin_unreachable();");
259 |         fmt.unindent();
260 |         fmt.write("}");
261 | 
262 |         fmt.unindent();
263 |         fmt.write("}");
264 |         fmt.blankline();
265 |     }
266 | 
267 |     fmt.write("return 1;");
268 | }
269 | 
270 | fn emit_mutation_function(
271 |     nonterm: usize,
272 |     rules: &[Vec<LLSymbol>],
273 |     grammar: &LowLevelGrammar,
274 |     fmt: &mut CFormatter<File>,
275 | ) {
276 |     fmt.write(format!(
277 |         "// This is the sequence mutation function for non-terminal {:?}",
278 |         grammar.nonterminals()[nonterm]
279 |     ));
280 |     fmt.write(format!("static int mutate_seq_nonterm{} (size_t* const buf, size_t* const len, const size_t capacity, size_t* const step) {{", nonterm));
281 |     fmt.indent();
282 | 
283 |     if rules.is_empty() {
284 |         unreachable!()
285 |     } else if rules.len() == 1 {
286 |         emit_mutation_function_single(&rules[0], fmt);
287 |     } else {
288 |         emit_mutation_function_multiple(rules, fmt);
289 |     }
290 | 
291 |     fmt.unindent();
292 |     fmt.write("}");
293 |     fmt.blankline();
294 | }
295 | 
296 | fn emit_mutation_entrypoint(grammar: &LowLevelGrammar, fmt: &mut CFormatter<File>) {
297 |     fmt.write("EXPORT_FUNCTION");
298 |     fmt.write("size_t mutate_sequence (size_t* buf, size_t len, const size_t capacity) {");
299 |     fmt.indent();
300 | 
301 |     #[cfg(feature = "debug-codegen")]
302 |     fmt.write("printf(\"Calling mutate_sequence(%p, %lu, %lu)\\n\", buf, len, capacity);");
303 | 
304 |     fmt.write("if (UNLIKELY(!buf | !capacity)) {");
305 |     fmt.indent();
306 |     fmt.write("return 0;");
307 |     fmt.unindent();
308 |     fmt.write("}");
309 | 
310 |     fmt.write("size_t step = 0;");
311 |     fmt.write(format!("mutate_seq_nonterm{}(buf, &len, capacity, &step);", grammar.entrypoint().id()));
312 |     fmt.write("return len;");
313 | 
314 |     fmt.unindent();
315 |     fmt.write("}");
316 |     fmt.blankline();
317 | }
318 | 
319 | fn emit_mutation_code(grammar: &LowLevelGrammar, fmt: &mut CFormatter<File>) {
320 |     emit_mutation_declarations(grammar, fmt);
321 | 
322 |     for (nonterm, rules) in grammar.rules() {
323 |         emit_mutation_function(*nonterm, rules, grammar, fmt);
324 |     }
325 | 
326 |     emit_mutation_entrypoint(grammar, fmt);
327 | }
328 | 
329 | fn emit_terminals(grammar: &LowLevelGrammar, fmt: &mut CFormatter<File>) {
330 |     fmt.write("/* Terminals */");
331 | 
332 |     for (i, term) in grammar.terminals().iter().enumerate() {
333 |         let term = term.as_bytes();
334 | 
335 |         fmt.write(format!("static const unsigned char TERM{}[{}] = {{", i, term.len()));
336 |         fmt.indent();
337 | 
338 |         for chunk in term.chunks(8) {
339 |             let x: Vec<String> = chunk.iter().map(|x| format!("{:#02X},", *x)).collect();
340 |             fmt.write(x.join(" "));
341 |         }
342 | 
343 |         fmt.unindent();
344 |         fmt.write("};");
345 |     }
346 | 
347 |     fmt.blankline();
348 | }
349 | 
350 | fn emit_serialization_declarations(grammar: &LowLevelGrammar, fmt: &mut CFormatter<File>) {
351 |     fmt.write("/* Forward declarations for serialization functions */");
352 | 
353 |     for nonterm in grammar.rules().keys() {
354 |         fmt.write(format!("static size_t serialize_seq_nonterm{} (const size_t* const, const size_t, unsigned char*, size_t, size_t* const);", *nonterm));
355 |     }
356 | 
357 |     fmt.blankline();
358 | }
359 | 
360 | fn emit_serialization_function_rule(rule: &[LLSymbol], fmt: &mut CFormatter<File>) {
361 |     for symbol in rule {
362 |         match symbol {
363 |             LLSymbol::NonTerminal(nonterm) => {
364 |                 fmt.write(format!("len = serialize_seq_nonterm{}(seq, seq_len, out, out_len, step);", nonterm.id()));
365 |                 fmt.write("out += len; out_len -= len;");
366 |                 fmt.blankline();
367 |             },
368 |             LLSymbol::Terminal(term) => {
369 |                 fmt.write(format!("if (UNLIKELY(out_len < sizeof(TERM{}))) {{", term.id()));
370 |                 fmt.indent();
371 |                 fmt.write("goto end;");
372 |                 fmt.unindent();
373 |                 fmt.write("}");
374 |                 fmt.write(format!("__builtin_memcpy_inline(out, TERM{0}, sizeof(TERM{0}));", term.id()));
375 |                 fmt.write(format!("out += sizeof(TERM{0}); out_len -= sizeof(TERM{0});", term.id()));
376 |                 fmt.blankline();
377 |             },
378 |         }
379 |     }
380 | }
381 | 
382 | fn emit_serialization_function_single(rule: &[LLSymbol], fmt: &mut CFormatter<File>) {
383 |     let has_nonterminals = rule_has_nonterminals(rule);
384 | 
385 |     if !has_nonterminals {
386 |         fmt.write("(void) seq;");
387 |         fmt.blankline();
388 |     }
389 | 
390 |     fmt.write("if (UNLIKELY(*step >= seq_len)) {");
391 |     fmt.indent();
392 |     fmt.write("return 0;");
393 |     fmt.unindent();
394 |     fmt.write("}");
395 |     fmt.blankline();
396 | 
397 |     if has_nonterminals {
398 |         fmt.write("size_t len;");
399 |     }
400 | 
401 |     fmt.write("unsigned char* original_out = out;");
402 |     fmt.write("*step += 1;");
403 |     fmt.blankline();
404 | 
405 |     emit_serialization_function_rule(rule, fmt);
406 | 
407 |     if rule_has_terminals(rule) {
408 |         fmt.write("end:");
409 |     }
410 |     fmt.write("return (size_t) (out - original_out);");
411 | }
412 | 
413 | fn emit_serialization_function_multiple(rules: &[Vec<LLSymbol>], fmt: &mut CFormatter<File>) {
414 |     fmt.write("if (UNLIKELY(*step >= seq_len)) {");
415 |     fmt.indent();
416 |     fmt.write("return 0;");
417 |     fmt.unindent();
418 |     fmt.write("}");
419 |     fmt.blankline();
420 | 
421 |     if rules_have_nonterminals(rules) {
422 |         fmt.write("size_t len;");
423 |     }
424 | 
425 |     fmt.write("unsigned char* original_out = out;");
426 |     fmt.write("size_t target = seq[*step];");
427 |     fmt.write("*step += 1;");
428 |     fmt.blankline();
429 | 
430 |     fmt.write("switch (target) {");
431 |     fmt.indent();
432 | 
433 |     for (i, rule) in rules.iter().enumerate() {
434 |         fmt.write(format!("case {}: {{", i));
435 |         fmt.indent();
436 | 
437 |         emit_serialization_function_rule(rule, fmt);
438 | 
439 |         fmt.write("break;");
440 |         fmt.unindent();
441 |         fmt.write("}");
442 |     }
443 | 
444 |     fmt.write("default: {");
445 |     fmt.indent();
446 |     fmt.write("__builtin_unreachable();");
447 |     fmt.unindent();
448 |     fmt.write("}");
449 | 
450 |     fmt.unindent();
451 |     fmt.write("}");
452 |     fmt.blankline();
453 | 
454 |     if rules_have_terminals(rules) {
455 |         fmt.write("end:");
456 |     }
457 |     fmt.write("return (size_t) (out - original_out);");
458 | }
459 | 
460 | fn emit_serialization_function(
461 |     nonterm: usize,
462 |     rules: &[Vec<LLSymbol>],
463 |     grammar: &LowLevelGrammar,
464 |     fmt: &mut CFormatter<File>,
465 | ) {
466 |     fmt.write(format!("// This is the serialization function for non-terminal {:?}", grammar.nonterminals()[nonterm]));
467 |     fmt.write(format!("static size_t serialize_seq_nonterm{} (const size_t* const seq, const size_t seq_len, unsigned char* out, size_t out_len, size_t* const step) {{", nonterm));
468 |     fmt.indent();
469 | 
470 |     #[cfg(feature = "debug-codegen")]
471 |     fmt.write(format!(
472 |         "printf(\"Serializing %s (%lu/%lu)\\n\", {:?}, *step + 1, seq_len);",
473 |         grammar.nonterminals()[nonterm]
474 |     ));
475 | 
476 |     if rules.is_empty() {
477 |         unreachable!()
478 |     } else if rules.len() == 1 {
479 |         emit_serialization_function_single(&rules[0], fmt);
480 |     } else {
481 |         emit_serialization_function_multiple(rules, fmt);
482 |     }
483 | 
484 |     fmt.unindent();
485 |     fmt.write("}");
486 |     fmt.blankline();
487 | }
488 | 
489 | fn emit_serialization_entrypoint(grammar: &LowLevelGrammar, fmt: &mut CFormatter<File>) {
490 |     fmt.write("EXPORT_FUNCTION");
491 |     fmt.write("size_t serialize_sequence (const size_t* seq, const size_t seq_len, unsigned char* out, const size_t out_len) {");
492 |     fmt.indent();
493 | 
494 |     fmt.write("if (UNLIKELY(!seq || !seq_len || !out || !out_len)) {");
495 |     fmt.indent();
496 |     fmt.write("return 0;");
497 |     fmt.unindent();
498 |     fmt.write("}");
499 | 
500 |     fmt.write("size_t step = 0;");
501 |     fmt.write(format!("return serialize_seq_nonterm{}(seq, seq_len, out, out_len, &step);", grammar.entrypoint().id()));
502 |     fmt.unindent();
503 |     fmt.write("}");
504 |     fmt.blankline();
505 | }
506 | 
507 | fn emit_serialization_code(grammar: &LowLevelGrammar, fmt: &mut CFormatter<File>) {
508 |     emit_terminals(grammar, fmt);
509 |     emit_serialization_declarations(grammar, fmt);
510 | 
511 |     for (nonterm, rules) in grammar.rules() {
512 |         emit_serialization_function(*nonterm, rules, grammar, fmt);
513 |     }
514 | 
515 |     emit_serialization_entrypoint(grammar, fmt);
516 | }
517 | 
518 | fn emit_header(
519 |     mut outfile: File,
520 |     mutations: bool,
521 |     serializations: bool,
522 |     unparsing: bool,
523 | ) -> Result<(), std::io::Error> {
524 |     write!(
525 |         &mut outfile,
526 |         "
527 | #ifndef __PEACOCK_GENERATOR_H
528 | #define __PEACOCK_GENERATOR_H
529 | 
530 | #include <stddef.h>
531 | "
532 |     )?;
533 | 
534 |     if mutations {
535 |         writeln!(&mut outfile, "size_t mutate_sequence (size_t* buf, size_t len, const size_t capacity);")?;
536 |     }
537 | 
538 |     if serializations {
539 |         writeln!(&mut outfile, "size_t serialize_sequence (const size_t* seq, const size_t seq_len, unsigned char* out, const size_t out_len);")?;
540 |     }
541 | 
542 |     if unparsing {
543 |         writeln!(&mut outfile, "size_t unparse_sequence (size_t* seq_buf, const size_t seq_capacity, const unsigned char* input, const size_t input_len);")?;
544 |     }
545 | 
546 |     write!(
547 |         &mut outfile,
548 |         "
549 | void seed_generator (size_t new_seed);
550 | 
551 | 
552 | #endif /* __PEACOCK_GENERATOR_H */
553 | "
554 |     )?;
555 | 
556 |     Ok(())
557 | }
558 | 
559 | fn emit_unparsing_declarations(grammar: &LowLevelGrammar, fmt: &mut CFormatter<File>) {
560 |     fmt.write("/* Forward declarations for unparsing functions */");
561 | 
562 |     for nonterm in grammar.rules().keys() {
563 |         fmt.write(format!("static int unparse_seq_nonterm{} (Sequence* const, const unsigned char* const, const size_t, size_t* const);", *nonterm));
564 |     }
565 | 
566 |     fmt.blankline();
567 | }
568 | 
569 | fn emit_unparsing_function(
570 |     nonterm: usize,
571 |     rules: &[Vec<LLSymbol>],
572 |     grammar: &LowLevelGrammar,
573 |     fmt: &mut CFormatter<File>,
574 | ) {
575 |     fmt.write(format!("// This is the unparsing function for non-terminal {:?}", grammar.nonterminals()[nonterm]));
576 |     fmt.write(format!("static int unparse_seq_nonterm{} (Sequence* const seq, const unsigned char* const input, const size_t input_len, size_t* const cursor) {{", nonterm));
577 |     fmt.indent();
578 | 
579 |     fmt.write("size_t seq_idx = seq->len;");
580 |     fmt.blankline();
581 |     fmt.write("if (UNLIKELY(seq_idx >= seq->capacity)) {");
582 |     fmt.indent();
583 |     fmt.write("return 0;");
584 |     fmt.unindent();
585 |     fmt.write("}");
586 |     fmt.blankline();
587 | 
588 |     fmt.write("size_t target_cursor = 0;");
589 |     fmt.write("size_t target_id = (size_t) -1LL;");
590 |     fmt.write("size_t target_seq_len = seq_idx;");
591 |     fmt.blankline();
592 | 
593 |     for (i, rule) in rules.iter().enumerate().sorted_by(|(_, a), (_, b)| b.len().cmp(&a.len())) {
594 |         fmt.write(format!("// Rule #{}", i));
595 |         fmt.write("do {");
596 |         fmt.indent();
597 |         fmt.write("seq->len = seq_idx + 1;");
598 |         fmt.write("size_t tmp_cursor = *cursor;");
599 |         fmt.blankline();
600 | 
601 |         for symbol in rule {
602 |             match symbol {
603 |                 LLSymbol::Terminal(term) => {
604 |                     fmt.write(format!(
605 |                         "if (UNLIKELY(input_len - tmp_cursor < sizeof(TERM{0})) || __builtin_memcmp(&input[tmp_cursor], TERM{0}, sizeof(TERM{0})) != 0) {{",
606 |                         term.id()
607 |                     ));
608 |                     fmt.indent();
609 |                     fmt.write("break;");
610 |                     fmt.unindent();
611 |                     fmt.write("}");
612 |                     fmt.write(format!("tmp_cursor += sizeof(TERM{0});", term.id()));
613 |                     fmt.blankline();
614 |                 },
615 |                 LLSymbol::NonTerminal(nonterm) => {
616 |                     fmt.write(format!(
617 |                         "if (!unparse_seq_nonterm{}(seq, input, input_len, &tmp_cursor)) {{",
618 |                         nonterm.id()
619 |                     ));
620 |                     fmt.indent();
621 |                     fmt.write("break;");
622 |                     fmt.unindent();
623 |                     fmt.write("}");
624 |                     fmt.blankline();
625 |                 },
626 |             }
627 |         }
628 | 
629 |         fmt.write("if (tmp_cursor > target_cursor) {");
630 |         fmt.indent();
631 |         fmt.write(format!("target_id = {};", i));
632 |         fmt.write("target_cursor = tmp_cursor;");
633 |         fmt.write("target_seq_len = seq->len;");
634 |         fmt.unindent();
635 |         fmt.write("}");
636 | 
637 |         fmt.unindent();
638 |         fmt.write("} while (0);");
639 |         fmt.blankline();
640 |     }
641 | 
642 |     fmt.write("seq->len = target_seq_len;");
643 |     fmt.blankline();
644 | 
645 |     fmt.write(format!("if (target_id < {}) {{", rules.len()));
646 |     fmt.indent();
647 |     fmt.write("*cursor = target_cursor;");
648 |     fmt.write("seq->buf[seq_idx] = target_id;");
649 |     fmt.write("return 1;");
650 |     fmt.unindent();
651 |     fmt.write("} else {");
652 |     fmt.indent();
653 |     fmt.write("return 0;");
654 |     fmt.unindent();
655 |     fmt.write("}");
656 | 
657 |     fmt.unindent();
658 |     fmt.write("}");
659 |     fmt.blankline();
660 | }
661 | 
662 | fn emit_unparsing_entrypoint(grammar: &LowLevelGrammar, fmt: &mut CFormatter<File>) {
663 |     fmt.write("EXPORT_FUNCTION");
664 |     fmt.write("size_t unparse_sequence (size_t* seq_buf, const size_t seq_capacity, const unsigned char* input, const size_t input_len) {");
665 |     fmt.indent();
666 | 
667 |     fmt.write("if (UNLIKELY(!seq_buf || !seq_capacity || !input || !input_len)) {");
668 |     fmt.indent();
669 |     fmt.write("return 0;");
670 |     fmt.unindent();
671 |     fmt.write("}");
672 | 
673 |     fmt.write("Sequence seq = {");
674 |     fmt.indent();
675 |     fmt.write(".buf = seq_buf,");
676 |     fmt.write(".len = 0,");
677 |     fmt.write(".capacity = seq_capacity,");
678 |     fmt.unindent();
679 |     fmt.write("};");
680 |     fmt.write("size_t cursor = 0;");
681 |     fmt.write(format!("if (!unparse_seq_nonterm{}(&seq, input, input_len, &cursor)) {{", grammar.entrypoint().id()));
682 |     fmt.indent();
683 |     fmt.write("return 0;");
684 |     fmt.unindent();
685 |     fmt.write("} else { ");
686 |     fmt.indent();
687 |     fmt.write("return seq.len;");
688 |     fmt.unindent();
689 |     fmt.write("}");
690 |     fmt.unindent();
691 |     fmt.write("}");
692 |     fmt.blankline();
693 | }
694 | 
695 | fn emit_unparsing_code(grammar: &LowLevelGrammar, fmt: &mut CFormatter<File>) {
696 |     emit_unparsing_declarations(grammar, fmt);
697 | 
698 |     for (nonterm, rules) in grammar.rules() {
699 |         emit_unparsing_function(*nonterm, rules, grammar, fmt);
700 |     }
701 | 
702 |     emit_unparsing_entrypoint(grammar, fmt);
703 | }
704 | 
705 | /// This is the main struct of the [`C`](crate::backends::C) backend that does all the heavy lifting and generates the code.
706 | ///
707 | /// For documentation of the generated C code see the [README](https://github.com/z2-2z/peacock#c-api-documentation) of this project.
708 | pub struct CGenerator {
709 |     header: bool,
710 |     mutations: bool,
711 |     serializations: bool,
712 |     unparsing: bool,
713 | }
714 | 
715 | impl CGenerator {
716 |     /// Create a new CGenerator.
717 |     #[allow(clippy::new_without_default)]
718 |     pub fn new() -> Self {
719 |         Self {
720 |             header: true,
721 |             mutations: true,
722 |             serializations: true,
723 |             unparsing: true,
724 |         }
725 |     }
726 | 
727 |     /// Also generate a .h file with all the definitions of the public C API of the generated code.
728 |     ///
729 |     /// Default: `true`
730 |     pub fn generate_header(mut self, flag: bool) -> Self {
731 |         self.header = flag;
732 |         self
733 |     }
734 | 
735 |     /// Emit code that realizes the mutation of an automaton walk.
736 |     ///
737 |     /// Default: `true`
738 |     pub fn emit_mutation_procedure(mut self, flag: bool) -> Self {
739 |         self.mutations = flag;
740 |         self
741 |     }
742 | 
743 |     /// Emit code that realizes the serialization of automaton walks into human-readable output.
744 |     ///
745 |     /// Default: `true`
746 |     pub fn emit_serialization_procedure(mut self, flag: bool) -> Self {
747 |         self.serializations = flag;
748 |         self
749 |     }
750 | 
751 |     /// Emit code that realizes the unparsing of user inputs into automaton walks.
752 |     ///
753 |     /// Default: `true`
754 |     pub fn emit_unparsing_procedure(mut self, flag: bool) -> Self {
755 |         self.unparsing = flag;
756 |         self
757 |     }
758 | 
759 |     /// Generate the C code for the given grammar `grammar` and write it to `output`.
760 |     pub fn generate<P: AsRef<Path>>(self, output: P, grammar: &ContextFreeGrammar) {
761 |         let grammar = LowLevelGrammar::from_high_level_grammar(grammar);
762 |         let outfile = File::create(output.as_ref()).expect("Could not create source file");
763 |         let mut formatter = CFormatter::new(outfile);
764 | 
765 |         emit_includes(&mut formatter);
766 |         emit_macros(&mut formatter);
767 |         emit_types(&mut formatter);
768 |         emit_rand(&mut formatter);
769 | 
770 |         if self.mutations {
771 |             emit_mutation_code(&grammar, &mut formatter);
772 |         }
773 | 
774 |         if self.serializations {
775 |             emit_serialization_code(&grammar, &mut formatter);
776 |         }
777 | 
778 |         if self.unparsing {
779 |             emit_unparsing_code(&grammar, &mut formatter);
780 |         }
781 | 
782 |         if self.header {
783 |             let mut outfile = output.as_ref().to_path_buf();
784 |             outfile.set_extension("h");
785 |             let outfile = File::create(outfile).expect("Could not create header file");
786 |             emit_header(outfile, self.mutations, self.serializations, self.unparsing)
787 |                 .expect("Could not write to header file");
788 |         }
789 |     }
790 | }
791 | 
792 | #[cfg(test)]
793 | mod tests {
794 |     use super::*;
795 | 
796 |     #[test]
797 |     fn test_generator() {
798 |         let cfg = ContextFreeGrammar::builder()
799 |             .gramatron_grammar("test-data/grammars/gramatron.json")
800 |             .unwrap()
801 |             .build()
802 |             .unwrap();
803 |         CGenerator::new().generate("/tmp/out.c", &cfg);
804 |     }
805 | }
806 | 


--------------------------------------------------------------------------------
/src/backends/C/formatter.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{
 2 |     BufWriter,
 3 |     Write,
 4 | };
 5 | 
 6 | pub struct CFormatter<T: Write> {
 7 |     stream: BufWriter<T>,
 8 |     indentation: usize,
 9 | }
10 | 
11 | impl<T> CFormatter<T>
12 | where
13 |     T: Write,
14 | {
15 |     pub fn new(writer: T) -> Self {
16 |         Self {
17 |             stream: BufWriter::new(writer),
18 |             indentation: 0,
19 |         }
20 |     }
21 | 
22 |     pub fn indent(&mut self) {
23 |         self.indentation += 4;
24 |     }
25 | 
26 |     pub fn unindent(&mut self) {
27 |         if self.indentation > 0 {
28 |             self.indentation -= 4;
29 |         }
30 |     }
31 | 
32 |     pub fn write<S: AsRef<str>>(&mut self, line: S) {
33 |         writeln!(&mut self.stream, "{:width$}{}", "", line.as_ref(), width = self.indentation)
34 |             .expect("Could not write to outfile");
35 |     }
36 | 
37 |     pub fn blankline(&mut self) {
38 |         writeln!(&mut self.stream).expect("Could not write to outfile");
39 |     }
40 | }
41 | 
42 | #[cfg(test)]
43 | mod tests {
44 |     use super::*;
45 |     use std::io::stdout;
46 | 
47 |     #[test]
48 |     fn test_formatter() {
49 |         let mut fmt = CFormatter::new(stdout());
50 |         fmt.write("asdf {");
51 |         fmt.indent();
52 |         fmt.blankline();
53 |         fmt.write("yeehaw");
54 |         fmt.blankline();
55 |         fmt.unindent();
56 |         fmt.write("}");
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/backends/C/grammar.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashMap;
  2 | 
  3 | use crate::grammar::{
  4 |     ContextFreeGrammar,
  5 |     Symbol,
  6 | };
  7 | 
  8 | #[derive(Copy, Clone, Debug)]
  9 | pub struct LLTerminal(usize);
 10 | 
 11 | impl LLTerminal {
 12 |     pub fn id(&self) -> usize {
 13 |         self.0
 14 |     }
 15 | }
 16 | 
 17 | #[derive(Copy, Clone, Debug)]
 18 | pub struct LLNonTerminal(usize);
 19 | 
 20 | impl LLNonTerminal {
 21 |     pub fn id(&self) -> usize {
 22 |         self.0
 23 |     }
 24 | }
 25 | 
 26 | #[derive(Clone, Debug)]
 27 | pub enum LLSymbol {
 28 |     Terminal(LLTerminal),
 29 |     NonTerminal(LLNonTerminal),
 30 | }
 31 | 
 32 | pub struct LowLevelGrammar {
 33 |     rules: HashMap<usize, Vec<Vec<LLSymbol>>>,
 34 |     terminals: Vec<String>,
 35 |     nonterminals: Vec<String>,
 36 |     entrypoint: LLNonTerminal,
 37 | }
 38 | 
 39 | impl LowLevelGrammar {
 40 |     pub fn from_high_level_grammar(grammar: &ContextFreeGrammar) -> Self {
 41 |         let mut rules = HashMap::new();
 42 |         let mut nonterm_map = HashMap::new();
 43 |         let mut nonterminals = Vec::new();
 44 |         let mut term_map = HashMap::new();
 45 |         let mut terminals = Vec::new();
 46 | 
 47 |         for rule in grammar.rules() {
 48 |             let lhs_id = *nonterm_map.entry(rule.lhs().id()).or_insert_with(|| {
 49 |                 let ret = nonterminals.len();
 50 |                 nonterminals.push(rule.lhs().id().to_string());
 51 |                 ret
 52 |             });
 53 |             let mut ll_symbols = Vec::new();
 54 | 
 55 |             for symbol in rule.rhs() {
 56 |                 match symbol {
 57 |                     Symbol::Terminal(term) => {
 58 |                         let id = *term_map.entry(term.content()).or_insert_with(|| {
 59 |                             let ret = terminals.len();
 60 |                             terminals.push(term.content().to_string());
 61 |                             ret
 62 |                         });
 63 |                         ll_symbols.push(LLSymbol::Terminal(LLTerminal(id)));
 64 |                     },
 65 |                     Symbol::NonTerminal(nonterm) => {
 66 |                         let id = *nonterm_map.entry(nonterm.id()).or_insert_with(|| {
 67 |                             let ret = nonterminals.len();
 68 |                             nonterminals.push(nonterm.id().to_string());
 69 |                             ret
 70 |                         });
 71 |                         ll_symbols.push(LLSymbol::NonTerminal(LLNonTerminal(id)));
 72 |                     },
 73 |                 }
 74 |             }
 75 | 
 76 |             rules.entry(lhs_id).or_insert_with(Vec::new).push(ll_symbols);
 77 |         }
 78 | 
 79 |         Self {
 80 |             rules,
 81 |             terminals,
 82 |             nonterminals,
 83 |             entrypoint: LLNonTerminal(*nonterm_map.get(grammar.entrypoint().id()).unwrap()),
 84 |         }
 85 |     }
 86 | 
 87 |     pub fn rules(&self) -> &HashMap<usize, Vec<Vec<LLSymbol>>> {
 88 |         &self.rules
 89 |     }
 90 | 
 91 |     pub fn terminals(&self) -> &[String] {
 92 |         &self.terminals
 93 |     }
 94 | 
 95 |     pub fn nonterminals(&self) -> &[String] {
 96 |         &self.nonterminals
 97 |     }
 98 | 
 99 |     pub fn entrypoint(&self) -> &LLNonTerminal {
100 |         &self.entrypoint
101 |     }
102 | }
103 | 
104 | #[cfg(test)]
105 | mod tests {
106 |     use super::*;
107 | 
108 |     #[test]
109 |     fn test_ll() {
110 |         let cfg = ContextFreeGrammar::builder()
111 |             .peacock_grammar("test-data/grammars/unit_rules.json")
112 |             .unwrap()
113 |             .build()
114 |             .unwrap();
115 |         let ll = LowLevelGrammar::from_high_level_grammar(&cfg);
116 |         println!("{:#?}", ll.rules());
117 |         println!("terminals = {:?}", ll.terminals());
118 |         println!("nonterminals = {:?}", ll.nonterminals());
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/src/backends/C/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Generate a grammar-based mutator in C.
 2 | //!
 3 | //! Use it like so:
 4 | //! ```
 5 | //! // First, load a grammar from disk
 6 | //! let grammar = ContextFreeGrammar::builder()
 7 | //!     .peacock_grammar("my-grammar.json").unwrap()
 8 | //!     .build().unwrap();
 9 | //!
10 | //! // Then, generate grammar-based mutator code and write it into mutator.c
11 | //! CGenerator::new().generate("mutator.c", &grammar);
12 | //! ```
13 | //!
14 | //! The API is documented in the [README](https://github.com/z2-2z/peacock#c-api-documentation) of this project.
15 | 
16 | mod codegen;
17 | mod formatter;
18 | mod grammar;
19 | 
20 | pub use codegen::CGenerator;
21 | pub(crate) use grammar::*;
22 | 


--------------------------------------------------------------------------------
/src/backends/interpreter/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Generate inputs by interpreting the rules of the grammar.
  2 | //!
  3 | //! Use it like so:
  4 | //! ```
  5 | //! // First, load a grammar from disk
  6 | //! let grammar = ContextFreeGrammar::builder()
  7 | //!     .peacock_grammar("my-grammar.json").unwrap()
  8 | //!     .build().unwrap();
  9 | //!
 10 | //! // Then, generate one input and write it to a specified stream.
 11 | //! let mut stream = std::io::stdout();
 12 | //! GrammarInterpreter::new(&grammar).interpret(&mut stream).unwrap();
 13 | //! ```
 14 | 
 15 | use std::io::Write;
 16 | 
 17 | use crate::{
 18 |     backends::C::{
 19 |         LLSymbol,
 20 |         LowLevelGrammar,
 21 |     },
 22 |     grammar::ContextFreeGrammar,
 23 | };
 24 | 
 25 | /// The GrammarInterpreter interprets the rules of a grammar to generate inputs.
 26 | pub struct GrammarInterpreter {
 27 |     grammar: LowLevelGrammar,
 28 |     seed: usize,
 29 |     stack: Vec<LLSymbol>,
 30 | }
 31 | 
 32 | impl GrammarInterpreter {
 33 |     /// Create a new GrammarInterpreter.
 34 |     #[allow(clippy::new_without_default)]
 35 |     pub fn new(grammar: &ContextFreeGrammar) -> Self {
 36 |         Self {
 37 |             grammar: LowLevelGrammar::from_high_level_grammar(grammar),
 38 |             seed: 0xDEADBEEF,
 39 |             stack: Vec::with_capacity(4096),
 40 |         }
 41 |     }
 42 | 
 43 |     /// Seed the RNG of the GrammarInterpreter.
 44 |     pub fn seed(&mut self, seed: usize) {
 45 |         if seed == 0 {
 46 |             self.seed = 0xDEADBEEF;
 47 |         } else {
 48 |             self.seed = seed;
 49 |         }
 50 |     }
 51 | 
 52 |     /// Generate one input and write it to the given output stream `stream`.
 53 |     /// Returns the number of bytes written to `stream`.
 54 |     pub fn interpret<S: Write>(&mut self, stream: &mut S) -> std::io::Result<usize> {
 55 |         let mut generated = 0;
 56 | 
 57 |         assert!(self.stack.is_empty());
 58 |         self.stack.push(LLSymbol::NonTerminal(*self.grammar.entrypoint()));
 59 | 
 60 |         while let Some(symbol) = self.stack.pop() {
 61 |             match symbol {
 62 |                 LLSymbol::Terminal(term) => {
 63 |                     let term = &self.grammar.terminals()[term.id()].as_bytes();
 64 |                     generated += term.len();
 65 |                     stream.write_all(term)?;
 66 |                 },
 67 |                 LLSymbol::NonTerminal(nonterm) => {
 68 |                     let rules = self.grammar.rules().get(&nonterm.id()).unwrap();
 69 | 
 70 |                     // Inline RNG because of borrow problems
 71 |                     let rand = {
 72 |                         let mut x = self.seed;
 73 |                         x ^= x << 13;
 74 |                         x ^= x >> 7;
 75 |                         x ^= x << 17;
 76 |                         self.seed = x;
 77 |                         x
 78 |                     };
 79 | 
 80 |                     let rule = &rules[rand % rules.len()];
 81 | 
 82 |                     for symbol in rule.iter().rev() {
 83 |                         self.stack.push(symbol.clone());
 84 |                     }
 85 |                 },
 86 |             }
 87 |         }
 88 | 
 89 |         Ok(generated)
 90 |     }
 91 | }
 92 | 
 93 | #[cfg(test)]
 94 | mod tests {
 95 |     use super::*;
 96 | 
 97 |     #[test]
 98 |     fn test_interpreter() {
 99 |         let cfg = ContextFreeGrammar::builder()
100 |             .gramatron_grammar("test-data/grammars/gramatron.json")
101 |             .unwrap()
102 |             .build()
103 |             .unwrap();
104 |         let mut stdout = std::io::stdout();
105 |         let mut interpreter = GrammarInterpreter::new(&cfg);
106 |         interpreter.seed(1238);
107 |         let len = interpreter.interpret(&mut stdout).unwrap();
108 |         println!();
109 |         println!("Generated {} bytes", len);
110 |     }
111 | }
112 | 


--------------------------------------------------------------------------------
/src/backends/json/generator.rs:
--------------------------------------------------------------------------------
 1 | use serde::ser::Serialize;
 2 | use serde_json::{
 3 |     json,
 4 |     ser::PrettyFormatter,
 5 |     Serializer,
 6 |     Value,
 7 | };
 8 | use std::{
 9 |     fs::File,
10 |     io::Write,
11 |     path::Path,
12 | };
13 | 
14 | use crate::grammar::{
15 |     ContextFreeGrammar,
16 |     Symbol,
17 | };
18 | 
19 | fn enclosed_in(s: &str, start: char, end: char) -> bool {
20 |     s.len() >= 2 && s.starts_with(start) && s.ends_with(end)
21 | }
22 | 
23 | fn terminal_string(content: &str) -> String {
24 |     if enclosed_in(content, '<', '>') || enclosed_in(content, '\'', '\'') {
25 |         return format!("'{}'", content);
26 |     }
27 | 
28 |     content.to_string()
29 | }
30 | 
31 | /// This is the main struct of the [`json`](crate::backends::json) backend that does all the heavy lifting and generates the grammar.
32 | pub struct JsonGenerator {}
33 | 
34 | impl JsonGenerator {
35 |     /// Create a new JsonGenerator.
36 |     #[allow(clippy::new_without_default)]
37 |     pub fn new() -> Self {
38 |         Self {}
39 |     }
40 | 
41 |     /// Write the production rules of the supplied `grammar` into the output file `path` in peacock format.
42 |     pub fn generate<P: AsRef<Path>>(self, path: P, grammar: &ContextFreeGrammar) {
43 |         let mut json = json!({});
44 |         let object = json.as_object_mut().unwrap();
45 | 
46 |         for rule in grammar.rules() {
47 |             let array = object.entry(format!("<{}>", rule.lhs().id())).or_insert_with(|| Value::Array(Vec::new()));
48 |             let array = array.as_array_mut().unwrap();
49 | 
50 |             let mut insert = Vec::new();
51 |             for symbol in rule.rhs() {
52 |                 match symbol {
53 |                     Symbol::Terminal(term) => {
54 |                         insert.push(Value::String(terminal_string(term.content())));
55 |                     },
56 |                     Symbol::NonTerminal(nonterm) => {
57 |                         insert.push(Value::String(format!("<{}>", nonterm.id())));
58 |                     },
59 |                 }
60 |             }
61 | 
62 |             array.push(Value::Array(insert));
63 |         }
64 | 
65 |         let mut buf = Vec::new();
66 |         let formatter = PrettyFormatter::with_indent(b"    ");
67 |         let mut ser = Serializer::with_formatter(&mut buf, formatter);
68 |         json.serialize(&mut ser).unwrap();
69 | 
70 |         let mut file = File::create(path).expect("Could not open output file");
71 |         file.write_all(&buf).expect("Could not write to output file");
72 |     }
73 | }
74 | 
75 | #[cfg(test)]
76 | mod tests {
77 |     use super::*;
78 | 
79 |     #[test]
80 |     fn test_generator() {
81 |         let cfg = ContextFreeGrammar::builder()
82 |             .gramatron_grammar("test-data/grammars/gramatron.json")
83 |             .unwrap()
84 |             .optimize(false)
85 |             .build()
86 |             .unwrap();
87 |         JsonGenerator::new().generate("/tmp/new.json", &cfg);
88 | 
89 |         ContextFreeGrammar::builder().peacock_grammar("/tmp/new.json").unwrap().build().unwrap();
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/backends/json/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Generate a grammar in peacock format.
 2 | //!
 3 | //! Use it like so:
 4 | //! ```
 5 | //! // First, load multiple grammars from disk. This will merge all the rules.
 6 | //! let grammar = ContextFreeGrammar::builder()
 7 | //!     .peacock_grammar("my-grammar.json").unwrap()
 8 | //!     .peacock_grammar("common-definitions.json").unwrap()
 9 | //!     .gramatron_grammar("my-old-grammar.json").unwrap()
10 | //!     .build().unwrap();
11 | //!
12 | //! // Then, create a single new grammar in peacock format.
13 | //! JsonGenerator::new().generate("merged-grammar.json", &grammar);
14 | //! ```
15 | 
16 | mod generator;
17 | 
18 | pub use generator::JsonGenerator;
19 | 


--------------------------------------------------------------------------------
/src/backends/mod.rs:
--------------------------------------------------------------------------------
1 | //! This module has all the available backends.
2 | 
3 | #[allow(non_snake_case)]
4 | pub mod C;
5 | 
6 | pub mod json;
7 | 
8 | pub mod interpreter;
9 | 


--------------------------------------------------------------------------------
/src/bin/compile.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | use peacock_fuzz::{
 3 |     backends::C::CGenerator,
 4 |     grammar::ContextFreeGrammar,
 5 | };
 6 | 
 7 | pub mod fuzz;
 8 | use fuzz::GrammarFormat;
 9 | 
10 | #[derive(Parser, Debug)]
11 | #[command(author, version, about, long_about = None)]
12 | struct Args {
13 |     #[arg(long, value_name = "GRAMMAR")]
14 |     grammar: String,
15 | 
16 |     #[arg(long)]
17 |     output: String,
18 | 
19 |     #[arg(long, default_value_t = GrammarFormat::Peacock)]
20 |     format: GrammarFormat,
21 | 
22 |     #[arg(short, long)]
23 |     entrypoint: Option<String>,
24 | }
25 | 
26 | fn main() {
27 |     let args = Args::parse();
28 | 
29 |     let mut cfg = ContextFreeGrammar::builder();
30 | 
31 |     match args.format {
32 |         GrammarFormat::Peacock => cfg = cfg.peacock_grammar(&args.grammar).unwrap(),
33 |         GrammarFormat::Gramatron => cfg = cfg.gramatron_grammar(&args.grammar).unwrap(),
34 |     }
35 | 
36 |     if let Some(entrypoint) = args.entrypoint {
37 |         cfg = cfg.entrypoint(entrypoint);
38 |     }
39 | 
40 |     let cfg = cfg.build().unwrap();
41 | 
42 |     CGenerator::new().generate(&args.output, &cfg);
43 | }
44 | 


--------------------------------------------------------------------------------
/src/bin/dump.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | use libafl::prelude::{
 3 |     HasTargetBytes,
 4 |     Input,
 5 | };
 6 | use libafl_bolts::prelude::AsSlice;
 7 | use peacock_fuzz::components::{
 8 |     load_generator,
 9 |     PeacockInput,
10 | };
11 | use std::io::Write;
12 | 
13 | #[derive(Parser, Debug)]
14 | #[command(author, version, about, long_about = None)]
15 | struct Args {
16 |     #[arg(short, long)]
17 |     generator: String,
18 | 
19 |     file: String,
20 | }
21 | 
22 | fn main() {
23 |     let args = Args::parse();
24 |     load_generator(&args.generator);
25 |     let input = PeacockInput::from_file(&args.file).expect("Could not load specified input file");
26 |     let input = input.target_bytes();
27 |     std::io::stdout().write_all(input.as_slice()).expect("Could not write to stdout");
28 | }
29 | 


--------------------------------------------------------------------------------
/src/bin/fuzz.rs:
--------------------------------------------------------------------------------
  1 | use clap::Parser;
  2 | use libafl::prelude::{
  3 |     feedback_or,
  4 |     powersched::PowerSchedule,
  5 |     CachedOnDiskCorpus,
  6 |     CalibrationStage,
  7 |     CanTrack,
  8 |     Corpus,
  9 |     CrashFeedback,
 10 |     Error,
 11 |     EventConfig,
 12 |     ForkserverExecutor,
 13 |     Fuzzer,
 14 |     HasCorpus,
 15 |     HitcountsMapObserver,
 16 |     IndexesLenTimeMinimizerScheduler,
 17 |     Launcher,
 18 |     LlmpRestartingEventManager,
 19 |     MaxMapFeedback,
 20 |     OnDiskCorpus,
 21 |     StdFuzzer,
 22 |     StdMapObserver,
 23 |     StdMutationalStage,
 24 |     StdState,
 25 |     StdWeightedScheduler,
 26 |     TimeFeedback,
 27 |     TimeObserver,
 28 |     TimeoutFeedback,
 29 | };
 30 | #[cfg(not(debug_assertions))]
 31 | use libafl::prelude::{
 32 |     tui::ui::TuiUI,
 33 |     tui::TuiMonitor,
 34 | };
 35 | use libafl_bolts::prelude::{
 36 |     current_nanos,
 37 |     tuple_list,
 38 |     AsSliceMut,
 39 |     CoreId,
 40 |     Cores,
 41 |     ShMem,
 42 |     ShMemProvider,
 43 |     StdRand,
 44 |     UnixShMemProvider,
 45 | };
 46 | use nix::sys::signal::Signal;
 47 | use peacock_fuzz::{
 48 |     backends::C::CGenerator,
 49 |     components::{
 50 |         load_generator,
 51 |         seed_generator,
 52 |         PeacockGenerator,
 53 |         PeacockInput,
 54 |         PeacockMutator,
 55 |     },
 56 |     grammar::ContextFreeGrammar,
 57 | };
 58 | use std::{
 59 |     path::{
 60 |         Path,
 61 |         PathBuf,
 62 |     },
 63 |     process::Command,
 64 |     time::Duration,
 65 | };
 66 | 
 67 | const PRELOAD_ENV: &str = "PEACOCK_PRELOAD";
 68 | const CC_ENV: &str = "CC";
 69 | const MAP_SIZE_ENV: &str = "PEACOCK_MAP_SIZE";
 70 | 
 71 | const DEFAULT_MAP_SIZE: usize = 2_621_440;
 72 | const DEFAULT_CC: &str = "cc";
 73 | 
 74 | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)]
 75 | pub enum GrammarFormat {
 76 |     Peacock,
 77 |     Gramatron,
 78 | }
 79 | 
 80 | impl std::fmt::Display for GrammarFormat {
 81 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 82 |         match self {
 83 |             GrammarFormat::Peacock => write!(f, "peacock"),
 84 |             GrammarFormat::Gramatron => write!(f, "gramatron"),
 85 |         }
 86 |     }
 87 | }
 88 | 
 89 | #[derive(Parser, Debug)]
 90 | #[command(author, version, about, long_about = None)]
 91 | struct Args {
 92 |     #[arg(long, value_name = "CORES")]
 93 |     cores: String,
 94 | 
 95 |     #[arg(long, value_name = "GRAMMAR")]
 96 |     grammar: String,
 97 | 
 98 |     #[arg(short)]
 99 |     output: String,
100 | 
101 |     #[arg(long, default_value_t = GrammarFormat::Peacock)]
102 |     format: GrammarFormat,
103 | 
104 |     #[arg(short, long)]
105 |     entrypoint: Option<String>,
106 | 
107 |     #[arg(short, long)]
108 |     corpus: Option<String>,
109 | 
110 |     #[arg(trailing_var_arg = true, allow_hyphen_values = true, required = true)]
111 |     cmdline: Vec<String>,
112 | }
113 | 
114 | fn mkdir(dir: &str) {
115 |     match std::fs::create_dir(dir) {
116 |         Ok(()) => {},
117 |         Err(err) => {
118 |             if err.kind() != std::io::ErrorKind::AlreadyExists {
119 |                 panic!("Could not create directory {}", dir);
120 |             }
121 |         },
122 |     }
123 | }
124 | 
125 | /// Return true if a is newer than b
126 | fn is_newer<P1: AsRef<Path>, P2: AsRef<Path>>(a: P1, b: P2) -> bool {
127 |     let a = std::fs::metadata(a).unwrap().modified().unwrap();
128 |     let b = std::fs::metadata(b).unwrap().modified().unwrap();
129 |     a > b
130 | }
131 | 
132 | fn compile_source(output: &Path, input: &Path) {
133 |     let cc = if let Ok(var) = std::env::var(CC_ENV) { var } else { DEFAULT_CC.to_string() };
134 | 
135 |     let output = Command::new(cc)
136 |         .args([
137 |             "-o",
138 |             &output.to_string_lossy(),
139 |             "-flto",
140 |             "-s",
141 |             "-fvisibility=hidden",
142 |             "-DMAKE_VISIBLE",
143 |             "-Ofast",
144 |             "-march=native",
145 |             "-fomit-frame-pointer",
146 |             "-fno-stack-protector",
147 |             "-fPIC",
148 |             "-shared",
149 |             &input.to_string_lossy(),
150 |             "-nostdlib",
151 |         ])
152 |         .output()
153 |         .expect("Could not launch C compiler");
154 | 
155 |     if !output.status.success() {
156 |         panic!("Compiling grammar failed");
157 |     }
158 | }
159 | 
160 | fn generate_source(args: &Args, c_file: &Path) {
161 |     let mut cfg = ContextFreeGrammar::builder();
162 | 
163 |     match &args.format {
164 |         GrammarFormat::Peacock => cfg = cfg.peacock_grammar(&args.grammar).unwrap(),
165 |         GrammarFormat::Gramatron => cfg = cfg.gramatron_grammar(&args.grammar).unwrap(),
166 |     }
167 | 
168 |     if let Some(entrypoint) = &args.entrypoint {
169 |         cfg = cfg.entrypoint(entrypoint);
170 |     }
171 | 
172 |     let cfg = cfg.build().unwrap();
173 | 
174 |     CGenerator::new().generate(c_file, &cfg);
175 | }
176 | 
177 | fn load_grammar(args: &Args) {
178 |     let generator_so = PathBuf::from(format!("{}/generator.so", &args.output));
179 |     let c_file = PathBuf::from(format!("{}/generator.c", &args.output));
180 | 
181 |     mkdir(&args.output);
182 |     if !generator_so.exists() || is_newer(&args.grammar, &generator_so) {
183 |         println!("Compiling generator.so ...");
184 |         generate_source(args, &c_file);
185 |         compile_source(&generator_so, &c_file);
186 |     }
187 | 
188 |     load_generator(generator_so);
189 | }
190 | 
191 | /* Harness */
192 | fn fuzz(args: Args) -> Result<(), Error> {
193 |     let mut map_size = if let Ok(value) = std::env::var(MAP_SIZE_ENV) {
194 |         std::env::remove_var(MAP_SIZE_ENV);
195 |         value.parse().expect("Invalid map size speficiation")
196 |     } else {
197 |         DEFAULT_MAP_SIZE
198 |     };
199 | 
200 |     if map_size % 64 != 0 {
201 |         map_size = ((map_size + 63) >> 6) << 6;
202 |     }
203 | 
204 |     let mut run_client = |state: Option<_>, mut mgr: LlmpRestartingEventManager<_, _, _>, core_id: CoreId| {
205 |         let output_dir = Path::new(&args.output);
206 |         let queue_dir = output_dir.join("queue");
207 |         let crashes_dir = output_dir.join("crashes");
208 |         let seed = current_nanos().rotate_left(core_id.0 as u32);
209 |         let powerschedule = PowerSchedule::EXPLORE;
210 |         let timeout = Duration::from_secs(10);
211 |         let signal = str::parse::<Signal>("SIGKILL").unwrap();
212 |         let debug_child = cfg!(debug_assertions);
213 | 
214 |         if let Ok(value) = std::env::var(PRELOAD_ENV) {
215 |             std::env::set_var("LD_PRELOAD", value);
216 |             std::env::remove_var(PRELOAD_ENV);
217 |         }
218 | 
219 |         let mut shmem_provider = UnixShMemProvider::new()?;
220 |         let mut shmem = shmem_provider.new_shmem(map_size)?;
221 |         shmem.write_to_env("__AFL_SHM_ID")?;
222 |         let shmem_buf = shmem.as_slice_mut();
223 |         std::env::set_var("AFL_MAP_SIZE", format!("{}", map_size));
224 | 
225 |         let edges_observer =
226 |             unsafe { HitcountsMapObserver::new(StdMapObserver::new("shared_mem", shmem_buf)).track_indices() };
227 | 
228 |         let time_observer = TimeObserver::new("time");
229 | 
230 |         let map_feedback = MaxMapFeedback::new(&edges_observer);
231 | 
232 |         let calibration = CalibrationStage::new(&map_feedback);
233 | 
234 |         let mut feedback = feedback_or!(map_feedback, TimeFeedback::new(&time_observer));
235 | 
236 |         let mut objective = feedback_or!(CrashFeedback::new(), TimeoutFeedback::new());
237 | 
238 |         seed_generator(seed as usize);
239 | 
240 |         let mut state = if let Some(state) = state {
241 |             state
242 |         } else {
243 |             StdState::new(
244 |                 StdRand::with_seed(seed),
245 |                 CachedOnDiskCorpus::<PeacockInput>::new(&queue_dir, 128)?,
246 |                 OnDiskCorpus::new(crashes_dir)?,
247 |                 &mut feedback,
248 |                 &mut objective,
249 |             )?
250 |         };
251 | 
252 |         let mutator = PeacockMutator::new();
253 | 
254 |         let mutational = StdMutationalStage::with_max_iterations(mutator, 1);
255 | 
256 |         let scheduler = IndexesLenTimeMinimizerScheduler::new(
257 |             &edges_observer,
258 |             StdWeightedScheduler::with_schedule(&mut state, &edges_observer, Some(powerschedule)),
259 |         );
260 | 
261 |         let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
262 | 
263 |         let mut executor = ForkserverExecutor::builder()
264 |             .program(&args.cmdline[0])
265 |             .debug_child(debug_child)
266 |             .parse_afl_cmdline(args.cmdline.get(1..).unwrap_or(&[]))
267 |             .coverage_map_size(map_size)
268 |             .is_persistent(false)
269 |             .timeout(timeout)
270 |             .kill_signal(signal)
271 |             .build_dynamic_map(edges_observer, tuple_list!(time_observer))?;
272 | 
273 |         if state.corpus().count() == 0 {
274 |             if let Some(corpus) = &args.corpus {
275 |                 state.load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[PathBuf::from(corpus)])?;
276 |             }
277 | 
278 |             state.load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[queue_dir])?;
279 | 
280 |             if state.corpus().count() == 0 {
281 |                 let mut generator = PeacockGenerator::new();
282 |                 state.generate_initial_inputs_forced(&mut fuzzer, &mut executor, &mut generator, &mut mgr, 16)?;
283 |             }
284 |         }
285 | 
286 |         let mut stages = tuple_list!(calibration, mutational);
287 | 
288 |         fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;
289 |         Ok(())
290 |     };
291 | 
292 |     let shmem_provider = UnixShMemProvider::new()?;
293 | 
294 |     #[cfg(not(debug_assertions))]
295 |     let monitor = {
296 |         let tui = TuiUI::new("peacock".to_string(), true);
297 |         TuiMonitor::new(tui)
298 |     };
299 | 
300 |     #[cfg(debug_assertions)]
301 |     let monitor = libafl::prelude::MultiMonitor::new(|s| println!("{}", s));
302 | 
303 |     let cores = Cores::from_cmdline(&args.cores).expect("Invalid core specification");
304 | 
305 |     match Launcher::builder()
306 |         .shmem_provider(shmem_provider)
307 |         .configuration(EventConfig::AlwaysUnique)
308 |         .monitor(monitor)
309 |         .run_client(&mut run_client)
310 |         .cores(&cores)
311 |         .build()
312 |         .launch()
313 |     {
314 |         Err(Error::ShuttingDown) | Ok(()) => Ok(()),
315 |         e => e,
316 |     }
317 | }
318 | 
319 | pub fn main() {
320 |     let args = Args::parse();
321 |     load_grammar(&args);
322 |     fuzz(args).expect("Could not launch fuzzer");
323 | }
324 | 


--------------------------------------------------------------------------------
/src/bin/gen.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | use peacock_fuzz::{
 3 |     backends::interpreter::GrammarInterpreter,
 4 |     grammar::ContextFreeGrammar,
 5 | };
 6 | use std::{
 7 |     io::{
 8 |         stdout,
 9 |         BufWriter,
10 |         Write,
11 |     },
12 |     time::SystemTime,
13 | };
14 | 
15 | pub mod fuzz;
16 | use fuzz::GrammarFormat;
17 | 
18 | #[derive(Parser, Debug)]
19 | #[command(author, version, about, long_about = None)]
20 | struct Args {
21 |     #[arg(short, long)]
22 |     grammar: String,
23 | 
24 |     #[arg(long, default_value_t = GrammarFormat::Peacock)]
25 |     format: GrammarFormat,
26 | 
27 |     #[arg(short, long)]
28 |     entrypoint: Option<String>,
29 | 
30 |     #[arg(long, default_value_t = false)]
31 |     dont_optimize: bool,
32 | 
33 |     #[arg(long, short, default_value_t = String::from("1"))]
34 |     count: String,
35 | 
36 |     #[arg(long, short)]
37 |     seed: Option<String>,
38 | }
39 | 
40 | fn main() {
41 |     let args = Args::parse();
42 | 
43 |     let count = args.count.parse::<usize>().unwrap();
44 | 
45 |     let mut builder = ContextFreeGrammar::builder();
46 | 
47 |     match args.format {
48 |         GrammarFormat::Peacock => builder = builder.peacock_grammar(args.grammar).unwrap(),
49 |         GrammarFormat::Gramatron => builder = builder.gramatron_grammar(args.grammar).unwrap(),
50 |     }
51 | 
52 |     if let Some(entrypoint) = args.entrypoint {
53 |         builder = builder.entrypoint(entrypoint);
54 |     }
55 | 
56 |     builder = builder.optimize(!args.dont_optimize);
57 | 
58 |     let cfg = builder.build().unwrap();
59 | 
60 |     let mut stream = BufWriter::new(stdout());
61 |     let mut interpreter = GrammarInterpreter::new(&cfg);
62 | 
63 |     if let Some(seed) = args.seed {
64 |         let seed = seed.parse::<usize>().unwrap();
65 |         interpreter.seed(seed);
66 |     } else {
67 |         let seed =
68 |             SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs() ^ std::process::id() as u64;
69 |         interpreter.seed(seed as usize);
70 |     }
71 | 
72 |     for _ in 0..count {
73 |         interpreter.interpret(&mut stream).unwrap();
74 |         writeln!(&mut stream).unwrap();
75 |     }
76 | 
77 |     stream.flush().unwrap();
78 | }
79 | 


--------------------------------------------------------------------------------
/src/bin/merge.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | 
 3 | use peacock_fuzz::{
 4 |     backends::json::JsonGenerator,
 5 |     grammar::ContextFreeGrammar,
 6 | };
 7 | 
 8 | #[derive(Parser, Debug)]
 9 | #[command(author, version, about, long_about = None)]
10 | struct Args {
11 |     #[arg(short, long, required = true)]
12 |     output: String,
13 | 
14 |     #[arg(long, required = false)]
15 |     peacock_grammar: Vec<String>,
16 | 
17 |     #[arg(long, required = false)]
18 |     gramatron_grammar: Vec<String>,
19 | 
20 |     #[arg(short, long)]
21 |     entrypoint: Option<String>,
22 | 
23 |     #[arg(long, default_value_t = false)]
24 |     optimize: bool,
25 | }
26 | 
27 | fn main() {
28 |     let args = Args::parse();
29 | 
30 |     if args.peacock_grammar.is_empty() && args.gramatron_grammar.is_empty() {
31 |         panic!("You need to supply at least one grammar");
32 |     }
33 | 
34 |     let mut builder = ContextFreeGrammar::builder();
35 | 
36 |     for path in &args.peacock_grammar {
37 |         builder = builder.peacock_grammar(path).unwrap();
38 |     }
39 | 
40 |     for path in &args.gramatron_grammar {
41 |         builder = builder.gramatron_grammar(path).unwrap();
42 |     }
43 | 
44 |     builder = builder.optimize(args.optimize);
45 | 
46 |     if let Some(entrypoint) = args.entrypoint {
47 |         builder = builder.entrypoint(entrypoint);
48 |     }
49 | 
50 |     let cfg = builder.build().unwrap();
51 |     JsonGenerator::new().generate(args.output, &cfg);
52 | }
53 | 


--------------------------------------------------------------------------------
/src/components/ffi.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(not(feature = "static-loading"))]
  2 | use {
  3 |     std::ops::Deref,
  4 |     std::path::Path,
  5 | };
  6 | 
  7 | type GrammarMutationFunc = unsafe extern "C" fn(buf: *mut usize, len: usize, capacity: usize) -> usize;
  8 | type GrammarSerializationFunc =
  9 |     unsafe extern "C" fn(seq: *const usize, seq_len: usize, out: *mut u8, out_len: usize) -> usize;
 10 | type GrammarSeedFunc = unsafe extern "C" fn(seed: usize);
 11 | type GrammarUnparseFunc =
 12 |     unsafe extern "C" fn(seq: *mut usize, seq_capacity: usize, input: *const u8, input_len: usize) -> usize;
 13 | 
 14 | #[allow(non_upper_case_globals)]
 15 | static mut grammar_mutate: Option<GrammarMutationFunc> = None;
 16 | #[allow(non_upper_case_globals)]
 17 | static mut grammar_serialize: Option<GrammarSerializationFunc> = None;
 18 | #[allow(non_upper_case_globals)]
 19 | static mut grammar_seed: Option<GrammarSeedFunc> = None;
 20 | #[allow(non_upper_case_globals)]
 21 | static mut grammar_unparse: Option<GrammarUnparseFunc> = None;
 22 | 
 23 | #[cfg(feature = "static-loading")]
 24 | #[link(name = "generator")]
 25 | extern "C" {
 26 |     fn mutate_sequence(buf: *mut usize, len: usize, capacity: usize) -> usize;
 27 |     fn serialize_sequence(seq: *const usize, seq_len: usize, out: *mut u8, out_len: usize) -> usize;
 28 |     fn seed_generator(seed: usize);
 29 |     fn unparse_sequence(seq: *mut usize, seq_capacity: usize, input: *const u8, input_len: usize) -> usize;
 30 | }
 31 | 
 32 | /// This function initializes the generator. Must be called before anything else.
 33 | ///
 34 | /// This is the __static__ version of this function, meaning that it expects you to link the generator
 35 | /// functions statically into the binary. The generator must be an archive file called `libgenerator.a`
 36 | /// otherwise symbol resolution will fail.
 37 | #[cfg(feature = "static-loading")]
 38 | pub fn load_generator() {
 39 |     unsafe {
 40 |         grammar_mutate = Some(mutate_sequence);
 41 |         grammar_serialize = Some(serialize_sequence);
 42 |         grammar_seed = Some(seed_generator);
 43 |         grammar_unparse = Some(unparse_sequence);
 44 |     }
 45 | }
 46 | 
 47 | #[cfg(not(feature = "static-loading"))]
 48 | fn get_function<T: Copy>(lib: &libloading::Library, name: &[u8]) -> T {
 49 |     let f: libloading::Symbol<T> = unsafe { lib.get(name) }.expect("Could not find function in generator.so");
 50 |     let f = f.deref();
 51 |     *f
 52 | }
 53 | 
 54 | /// This function initializes the generator. Must be called before anything else.
 55 | ///
 56 | /// This is the __dynamic__ version of this function, which gets a path to a
 57 | /// shared object as an argument and loads that via dlopen().
 58 | #[cfg(not(feature = "static-loading"))]
 59 | pub fn load_generator<P: AsRef<Path>>(path: P) {
 60 |     let path = path.as_ref();
 61 | 
 62 |     unsafe {
 63 |         let lib = libloading::Library::new(path).expect("Could not load generator.so");
 64 |         grammar_mutate = Some(get_function::<GrammarMutationFunc>(&lib, b"mutate_sequence"));
 65 |         grammar_serialize = Some(get_function::<GrammarSerializationFunc>(&lib, b"serialize_sequence"));
 66 |         grammar_seed = Some(get_function::<GrammarSeedFunc>(&lib, b"seed_generator"));
 67 |         grammar_unparse = Some(get_function::<GrammarUnparseFunc>(&lib, b"unparse_sequence"));
 68 |         std::mem::forget(lib);
 69 |     }
 70 | }
 71 | 
 72 | pub(crate) fn generator_mutate(sequence: &mut Vec<usize>) {
 73 |     let len = sequence.len();
 74 |     let capacity = sequence.capacity();
 75 |     let buf = sequence.as_mut_ptr();
 76 | 
 77 |     let f = unsafe { grammar_mutate }.expect("load_generator() has not been called before fuzzing");
 78 | 
 79 |     unsafe {
 80 |         let new_len = f(buf, len, capacity);
 81 |         sequence.set_len(new_len);
 82 |     }
 83 | }
 84 | 
 85 | pub(crate) fn generator_serialize(sequence: &[usize], out: *mut u8, out_len: usize) -> usize {
 86 |     let seq = sequence.as_ptr();
 87 |     let seq_len = sequence.len();
 88 | 
 89 |     let f = unsafe { grammar_serialize }.expect("load_generator() has not been called before fuzzing");
 90 | 
 91 |     unsafe { f(seq, seq_len, out, out_len) }
 92 | }
 93 | 
 94 | /// Seed the RNG of the generator.
 95 | pub fn generator_seed(seed: usize) {
 96 |     let f = unsafe { grammar_seed }.expect("load_generator() has not been called before generator_seed()");
 97 | 
 98 |     unsafe {
 99 |         f(seed);
100 |     }
101 | }
102 | 
103 | pub(crate) fn generator_unparse(sequence: &mut Vec<usize>, input: &[u8]) -> bool {
104 |     let seq = sequence.as_mut_ptr();
105 |     let seq_capacity = sequence.capacity();
106 |     let input_len = input.len();
107 |     let input = input.as_ptr();
108 | 
109 |     let f = unsafe { grammar_unparse }.expect("load_generator() has not been called before fuzzing");
110 | 
111 |     let new_len = unsafe { f(seq, seq_capacity, input, input_len) };
112 | 
113 |     if new_len == 0 {
114 |         return false;
115 |     }
116 | 
117 |     unsafe {
118 |         sequence.set_len(new_len);
119 |     }
120 | 
121 |     true
122 | }
123 | 


--------------------------------------------------------------------------------
/src/components/generator.rs:
--------------------------------------------------------------------------------
 1 | use crate::components::{
 2 |     ffi::generator_mutate,
 3 |     PeacockInput,
 4 | };
 5 | use libafl::prelude::{
 6 |     Error,
 7 |     Generator,
 8 | };
 9 | 
10 | /// This component generates new inputs from scratch.
11 | pub struct PeacockGenerator;
12 | 
13 | impl PeacockGenerator {
14 |     /// Create a new generator.
15 |     #[allow(clippy::new_without_default)]
16 |     pub fn new() -> Self {
17 |         Self {}
18 |     }
19 | }
20 | 
21 | impl<S> Generator<PeacockInput, S> for PeacockGenerator {
22 |     fn generate(&mut self, _state: &mut S) -> Result<PeacockInput, Error> {
23 |         let mut input = PeacockInput::default();
24 |         generator_mutate(input.sequence_mut());
25 |         Ok(input)
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/components/input.rs:
--------------------------------------------------------------------------------
  1 | use ahash::RandomState;
  2 | use libafl::prelude::{
  3 |     CorpusId,
  4 |     Error,
  5 |     HasTargetBytes,
  6 |     Input,
  7 | };
  8 | use libafl_bolts::prelude::{
  9 |     HasLen,
 10 |     OwnedSlice,
 11 | };
 12 | use serde::{
 13 |     Deserialize,
 14 |     Serialize,
 15 | };
 16 | use std::{
 17 |     fs::File,
 18 |     io::Read,
 19 |     path::Path,
 20 | };
 21 | 
 22 | use crate::components::ffi::{
 23 |     generator_serialize,
 24 |     generator_unparse,
 25 | };
 26 | 
 27 | const BINARY_PREFIX: &str = "peacock-raw-";
 28 | static mut SERIALIZATION_BUFFER: [u8; 128 * 1024 * 1024] = [0; 128 * 1024 * 1024];
 29 | 
 30 | /// This component represents an Input during fuzzing.
 31 | #[derive(Serialize, Deserialize, Debug, Hash)]
 32 | pub struct PeacockInput {
 33 |     sequence: Vec<usize>,
 34 | }
 35 | 
 36 | impl PeacockInput {
 37 |     pub(crate) fn sequence(&self) -> &[usize] {
 38 |         &self.sequence
 39 |     }
 40 | 
 41 |     pub(crate) fn sequence_mut(&mut self) -> &mut Vec<usize> {
 42 |         &mut self.sequence
 43 |     }
 44 | }
 45 | 
 46 | impl Input for PeacockInput {
 47 |     fn generate_name(&self, _idx: Option<CorpusId>) -> String {
 48 |         let hash = RandomState::with_seeds(0, 0, 0, 0).hash_one(self);
 49 |         format!("{}{:016x}", BINARY_PREFIX, hash)
 50 |     }
 51 | 
 52 |     fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
 53 |         let path = path.as_ref();
 54 |         let mut file = File::open(path)?;
 55 |         let mut bytes: Vec<u8> = vec![];
 56 |         file.read_to_end(&mut bytes)?;
 57 | 
 58 |         let is_raw = if let Some(file_name) = path.file_name().and_then(|x| x.to_str()) {
 59 |             file_name.starts_with(BINARY_PREFIX)
 60 |         } else {
 61 |             false
 62 |         };
 63 | 
 64 |         if is_raw {
 65 |             Ok(postcard::from_bytes(&bytes)?)
 66 |         } else {
 67 |             let mut ret = Self::default();
 68 | 
 69 |             if !generator_unparse(&mut ret.sequence, &bytes) {
 70 |                 return Err(Error::serialize(format!("Could not unparse sequence from input file {}", path.display())));
 71 |             }
 72 | 
 73 |             Ok(ret)
 74 |         }
 75 |     }
 76 | }
 77 | 
 78 | impl HasLen for PeacockInput {
 79 |     fn len(&self) -> usize {
 80 |         self.sequence.len()
 81 |     }
 82 | }
 83 | 
 84 | impl HasTargetBytes for PeacockInput {
 85 |     fn target_bytes(&self) -> OwnedSlice<u8> {
 86 |         let len = generator_serialize(&self.sequence, unsafe { SERIALIZATION_BUFFER.as_mut_ptr() }, unsafe {
 87 |             SERIALIZATION_BUFFER.len()
 88 |         });
 89 | 
 90 |         unsafe { OwnedSlice::from_raw_parts(SERIALIZATION_BUFFER.as_ptr(), len) }
 91 |     }
 92 | }
 93 | 
 94 | impl Default for PeacockInput {
 95 |     fn default() -> Self {
 96 |         Self {
 97 |             sequence: Vec::with_capacity(4096 * 2),
 98 |         }
 99 |     }
100 | }
101 | 
102 | impl Clone for PeacockInput {
103 |     fn clone(&self) -> Self {
104 |         let mut clone = Self::default();
105 |         clone.sequence.extend_from_slice(&self.sequence);
106 |         clone
107 |     }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/components/mod.rs:
--------------------------------------------------------------------------------
 1 | //! This module contains LibAFL components that can interact with the generated code of the C backend.
 2 | //!
 3 | //! The components can interact with the code in two ways:
 4 | //! - __dynamically__ (default): If the code is compiled into a shared object, it can be loaded via dlopen()
 5 | //!   in `load_generator("path/to/generator.so")`.
 6 | //! - __statically__: The code can also be directly compiled into the fuzzer via a build script.
 7 | //!   If you plan to do this, activate the feature `static-loading` and call `load_generator()` without an argument.
 8 | //!   One caveat of this is that the generated code must be compiled into a static archive that is called `libgenerator.a`.
 9 | //!   This name is hardcoded into this library.
10 | //!
11 | //! Either way, it is mandatory that [`load_generator`] is called before fuzzing starts.
12 | //!
13 | //! ## Examples
14 | //! For an example of dynamic loading see the binary `peacock-fuzz` in `src/bin/fuzz.rs`.    
15 | //! For an example of static loading see the fuzzer in `test-data/static_loading/src/main.rs`.
16 | 
17 | pub(crate) mod ffi;
18 | mod generator;
19 | mod input;
20 | mod mutator;
21 | 
22 | pub use ffi::{
23 |     generator_seed as seed_generator,
24 |     load_generator,
25 | };
26 | 
27 | pub use generator::PeacockGenerator;
28 | pub use input::PeacockInput;
29 | pub use mutator::PeacockMutator;
30 | 


--------------------------------------------------------------------------------
/src/components/mutator.rs:
--------------------------------------------------------------------------------
 1 | use libafl::prelude::{
 2 |     Error,
 3 |     HasRand,
 4 |     MutationResult,
 5 |     Mutator,
 6 | };
 7 | use libafl_bolts::prelude::{
 8 |     Named,
 9 |     Rand,
10 | };
11 | use std::borrow::Cow;
12 | 
13 | use crate::components::{
14 |     ffi::generator_mutate,
15 |     PeacockInput,
16 | };
17 | 
18 | /// This component implements grammar-based mutations.
19 | pub struct PeacockMutator;
20 | 
21 | impl PeacockMutator {
22 |     /// Create a new mutator.
23 |     #[allow(clippy::new_without_default)]
24 |     pub fn new() -> Self {
25 |         Self {}
26 |     }
27 | }
28 | 
29 | impl Named for PeacockMutator {
30 |     fn name(&self) -> &Cow<'static, str> {
31 |         static NAME: Cow<'static, str> = Cow::Borrowed("PeacockMutator");
32 |         &NAME
33 |     }
34 | }
35 | 
36 | impl<S> Mutator<PeacockInput, S> for PeacockMutator
37 | where
38 |     S: HasRand,
39 | {
40 |     fn mutate(&mut self, state: &mut S, input: &mut PeacockInput) -> Result<MutationResult, Error> {
41 |         let len = state.rand_mut().below(input.sequence().len());
42 |         input.sequence_mut().truncate(len);
43 |         generator_mutate(input.sequence_mut());
44 |         Ok(MutationResult::Mutated)
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
 1 | //! This module contains various error types.
 2 | 
 3 | use std::path::PathBuf;
 4 | use thiserror::Error;
 5 | 
 6 | /// A ParsingError means that the syntax or format of the provided grammar is invalid.
 7 | #[derive(Debug, Error)]
 8 | pub struct ParsingError {
 9 |     path: PathBuf,
10 |     msg: String,
11 | }
12 | 
13 | impl ParsingError {
14 |     pub(crate) fn new<P: Into<PathBuf>, S: Into<String>>(path: P, msg: S) -> Self {
15 |         Self {
16 |             path: path.into(),
17 |             msg: msg.into(),
18 |         }
19 |     }
20 | }
21 | 
22 | impl std::fmt::Display for ParsingError {
23 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24 |         write!(f, "ParsingError in {}: {}", self.path.display(), self.msg)
25 |     }
26 | }
27 | 
28 | /// A GrammarError represents an error with the content of a grammar.
29 | #[derive(Debug, Error)]
30 | pub enum GrammarError {
31 |     /// The grammar does not contain rules to expand the entrypoint
32 |     #[error("The grammar does not contain an explicit entrypoint: {0}")]
33 |     MissingEntrypoint(String),
34 | 
35 |     /// The grammar is referencing a non-terminal that has no rules to expand.
36 |     #[error("The non-terminal '{0}' is referenced but never defined")]
37 |     MissingNonTerminal(String),
38 | }
39 | 


--------------------------------------------------------------------------------
/src/grammar/builder.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     collections::HashSet,
  3 |     path::Path,
  4 | };
  5 | 
  6 | use crate::{
  7 |     error::{
  8 |         GrammarError,
  9 |         ParsingError,
 10 |     },
 11 |     grammar::{
 12 |         ContextFreeGrammar,
 13 |         NonTerminal,
 14 |         ProductionRule,
 15 |         Symbol,
 16 |     },
 17 |     parser::{
 18 |         gramatron,
 19 |         peacock,
 20 |     },
 21 | };
 22 | 
 23 | /// The default non-terminal that is used as an entrypoint to the grammar
 24 | pub const DEFAULT_ENTRYPOINT: &str = "ENTRYPOINT";
 25 | 
 26 | /// The GrammarBuilder loads grammars from disk and returns a unified [`ContextFreeGrammar`].
 27 | ///    
 28 | /// Use it like so:
 29 | /// ```
 30 | /// // Load multiple grammars by joining their rules:
 31 | /// let grammar = ContextFreeGrammar::builder()
 32 | ///     // Load a grammar in peacock format
 33 | ///     .peacock_grammar("my-grammar.json").unwrap()
 34 | ///     // Or a grammar in gramatron format
 35 | ///     .gramatron_grammar("my-old-grammar.json").unwrap()
 36 | ///     // Set the entrypoint
 37 | ///     .entrypoint("MY-ENTRYPOINT")
 38 | ///     .build().unwrap();
 39 | /// ```
 40 | pub struct GrammarBuilder {
 41 |     rules: Vec<ProductionRule>,
 42 |     optimize: bool,
 43 |     entrypoint: String,
 44 | }
 45 | 
 46 | impl GrammarBuilder {
 47 |     pub(crate) fn new() -> Self {
 48 |         Self {
 49 |             rules: Vec::new(),
 50 |             optimize: true,
 51 |             entrypoint: DEFAULT_ENTRYPOINT.to_string(),
 52 |         }
 53 |     }
 54 | 
 55 |     fn check_entrypoint(&self) -> bool {
 56 |         for rule in &self.rules {
 57 |             if rule.lhs().id() == self.entrypoint {
 58 |                 return false;
 59 |             }
 60 |         }
 61 | 
 62 |         true
 63 |     }
 64 | 
 65 |     fn check_non_terminals(&self) -> Option<String> {
 66 |         let mut defined_non_terms = HashSet::new();
 67 | 
 68 |         for rule in &self.rules {
 69 |             defined_non_terms.insert(rule.lhs().id());
 70 |         }
 71 | 
 72 |         for rule in &self.rules {
 73 |             for symbol in rule.rhs() {
 74 |                 if let Symbol::NonTerminal(nonterm) = symbol {
 75 |                     if !defined_non_terms.contains(nonterm.id()) {
 76 |                         return Some(nonterm.id().to_string());
 77 |                     }
 78 |                 }
 79 |             }
 80 |         }
 81 | 
 82 |         None
 83 |     }
 84 | }
 85 | 
 86 | impl GrammarBuilder {
 87 |     /// Load a grammar from disk that is in Peacock format. How the peacock format looks like is explained
 88 |     /// in the [README](https://github.com/z2-2z/peacock#how-to-write-grammars) of this project.
 89 |     pub fn peacock_grammar<P: AsRef<Path>>(mut self, path: P) -> Result<Self, ParsingError> {
 90 |         let mut new_rules = peacock::parse_json(path.as_ref())?;
 91 |         self.rules.append(&mut new_rules);
 92 |         Ok(self)
 93 |     }
 94 | 
 95 |     /// Load a grammar from disk that is in [Gramatron](https://github.com/HexHive/Gramatron)'s format.
 96 |     pub fn gramatron_grammar<P: AsRef<Path>>(mut self, path: P) -> Result<Self, ParsingError> {
 97 |         let mut new_rules = gramatron::parse_json(path.as_ref())?;
 98 |         self.rules.append(&mut new_rules);
 99 |         Ok(self)
100 |     }
101 | 
102 |     /// Apply Gramatron-style optimizations to this grammar that enable better mutation quality.
103 |     ///
104 |     /// Default: `true`
105 |     pub fn optimize(mut self, optimize: bool) -> Self {
106 |         self.optimize = optimize;
107 |         self
108 |     }
109 | 
110 |     /// Set the entrypoint of all loaded grammars to be the given non-terminal `entrypoint`.
111 |     ///
112 |     /// Default: [`DEFAULT_ENTRYPOINT`]
113 |     pub fn entrypoint<S: Into<String>>(mut self, entrypoint: S) -> Self {
114 |         self.entrypoint = entrypoint.into();
115 |         self
116 |     }
117 | 
118 |     /// Create a [`ContextFreeGrammar`].
119 |     pub fn build(self) -> Result<ContextFreeGrammar, GrammarError> {
120 |         if self.check_entrypoint() {
121 |             return Err(GrammarError::MissingEntrypoint(self.entrypoint));
122 |         }
123 | 
124 |         if let Some(nonterm) = self.check_non_terminals() {
125 |             return Err(GrammarError::MissingNonTerminal(nonterm));
126 |         }
127 | 
128 |         let mut cfg = ContextFreeGrammar::new(self.rules, NonTerminal::new(self.entrypoint));
129 | 
130 |         if self.optimize {
131 |             cfg.concatenate_terminals();
132 |             cfg.remove_duplicate_rules();
133 |             cfg.remove_unit_rules();
134 |             cfg.remove_unused_rules();
135 | 
136 |             if !cfg.is_in_gnf() {
137 |                 cfg.remove_mixed_rules();
138 |                 cfg.break_rules();
139 |                 cfg.convert_to_gnf();
140 |                 cfg.remove_unused_rules();
141 |             }
142 |         }
143 | 
144 |         if cfg.count_entrypoint_rules() > 1 {
145 |             cfg.set_new_entrypoint();
146 |         }
147 | 
148 |         Ok(cfg)
149 |     }
150 | }
151 | 
152 | #[cfg(test)]
153 | mod tests {
154 |     use super::*;
155 | 
156 |     #[test]
157 |     #[should_panic]
158 |     fn test_missing_refs() {
159 |         ContextFreeGrammar::builder().peacock_grammar("test-data/grammars/invalid-refs.json").unwrap().build().unwrap();
160 |     }
161 | 
162 |     #[test]
163 |     fn test_gramatron_grammar() {
164 |         let cfg = ContextFreeGrammar::builder()
165 |             .gramatron_grammar("test-data/grammars/gramatron.json")
166 |             .unwrap()
167 |             .build()
168 |             .unwrap();
169 |         println!("{:#?}", cfg.rules());
170 |     }
171 | }
172 | 


--------------------------------------------------------------------------------
/src/grammar/cfg.rs:
--------------------------------------------------------------------------------
  1 | use ahash::RandomState;
  2 | use petgraph::{
  3 |     visit::Bfs,
  4 |     Graph,
  5 | };
  6 | use std::collections::{
  7 |     HashMap,
  8 |     HashSet,
  9 | };
 10 | 
 11 | use crate::grammar::builder::GrammarBuilder;
 12 | 
 13 | /// This type represents a [non-terminal](https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols#Nonterminal_symbols) in a context-free grammar.
 14 | #[derive(Debug, Clone, Eq, Hash, PartialEq)]
 15 | pub struct NonTerminal(String);
 16 | 
 17 | impl NonTerminal {
 18 |     pub(crate) fn new<S: Into<String>>(s: S) -> Self {
 19 |         Self(s.into())
 20 |     }
 21 | 
 22 |     /// The id of a non-terminal is its name from the grammar files.
 23 |     pub fn id(&self) -> &str {
 24 |         &self.0
 25 |     }
 26 | }
 27 | 
 28 | /// This type represents a [terminal](https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols#Terminal_symbols) in a context-free grammar.
 29 | #[derive(Debug, Clone, Eq, Hash, PartialEq)]
 30 | pub struct Terminal(String);
 31 | 
 32 | impl Terminal {
 33 |     pub(crate) fn new<S: Into<String>>(s: S) -> Self {
 34 |         Self(s.into())
 35 |     }
 36 | 
 37 |     /// The data of the terminal.
 38 |     pub fn content(&self) -> &str {
 39 |         &self.0
 40 |     }
 41 | }
 42 | 
 43 | /// A Symbol is either a terminal or a non-terminal and can be found on the right-hand-side
 44 | /// of a production rule.
 45 | #[derive(Debug, Clone, Eq, Hash, PartialEq)]
 46 | pub enum Symbol {
 47 |     /// A terminal is a chunk of data that will be inserted into the output / parsed from the input.
 48 |     Terminal(Terminal),
 49 | 
 50 |     /// A non-terminal is a placeholder that will be replaced by other symbols.
 51 |     NonTerminal(NonTerminal),
 52 | }
 53 | 
 54 | impl Symbol {
 55 |     /// Return whether the Symbol is a terminal
 56 |     #[inline]
 57 |     pub fn is_terminal(&self) -> bool {
 58 |         matches!(self, Symbol::Terminal(_))
 59 |     }
 60 | 
 61 |     /// Return whether the Symbol is a non-terminal
 62 |     #[inline]
 63 |     pub fn is_non_terminal(&self) -> bool {
 64 |         matches!(self, Symbol::NonTerminal(_))
 65 |     }
 66 | }
 67 | 
 68 | /// A ProductionRule states how to expand a non-terminal.  
 69 | ///   
 70 | /// The left-hand-side (lhs) of a rule is the non-terminal to expand.
 71 | /// The right-hand-side (rhs) of a rule is the sequence of Symbols that are replacing the lhs.
 72 | ///
 73 | /// Please note that if a grammar has multiple ways to expand a non-terminal like so:
 74 | /// ```json
 75 | /// {
 76 | ///     "<A-OR-B>": [
 77 | ///         ["'a'"],
 78 | ///         ["'b'"],
 79 | ///     ]
 80 | /// }
 81 | /// ```
 82 | /// then multiple `ProductionRules` will be generated, one for each variant.
 83 | #[derive(Debug, Clone, Eq, Hash, PartialEq)]
 84 | pub struct ProductionRule {
 85 |     lhs: NonTerminal,
 86 |     rhs: Vec<Symbol>,
 87 | }
 88 | 
 89 | impl ProductionRule {
 90 |     pub(crate) fn new(lhs: NonTerminal, rhs: Vec<Symbol>) -> Self {
 91 |         Self {
 92 |             lhs,
 93 |             rhs,
 94 |         }
 95 |     }
 96 | 
 97 |     /// The left-hand-side of a production rule or the non-terminal that is to be expanded.
 98 |     pub fn lhs(&self) -> &NonTerminal {
 99 |         &self.lhs
100 |     }
101 | 
102 |     /// The right-hand-side of a production rule or the sequence of Symbols that are replacing the left-hand-side.
103 |     pub fn rhs(&self) -> &[Symbol] {
104 |         &self.rhs
105 |     }
106 | 
107 |     pub(crate) fn fixed_hash(&self) -> u64 {
108 |         RandomState::with_seeds(0, 0, 0, 0).hash_one(self)
109 |     }
110 | }
111 | 
112 | fn is_mixed(rhs: &[Symbol]) -> bool {
113 |     let mut terms = false;
114 |     let mut non_terms = false;
115 | 
116 |     for symbol in rhs {
117 |         terms |= symbol.is_terminal();
118 |         non_terms |= symbol.is_non_terminal();
119 |     }
120 | 
121 |     terms & non_terms
122 | }
123 | 
124 | fn is_only_non_terminals(rhs: &[Symbol]) -> bool {
125 |     for symbol in rhs {
126 |         if symbol.is_terminal() {
127 |             return false;
128 |         }
129 |     }
130 | 
131 |     true
132 | }
133 | 
134 | /// A ContextFreeGrammar is a set of production rules that describe how to construct an input.
135 | ///
136 | /// Use the [`builder()`](ContextFreeGrammar::builder) method to actually create this struct.
137 | pub struct ContextFreeGrammar {
138 |     rules: Vec<ProductionRule>,
139 |     entrypoint: NonTerminal,
140 | }
141 | 
142 | impl ContextFreeGrammar {
143 |     /// Build a ContextFreeGrammar.
144 |     pub fn builder() -> GrammarBuilder {
145 |         GrammarBuilder::new()
146 |     }
147 | 
148 |     /// Access the production rules of this grammar.
149 |     pub fn rules(&self) -> &[ProductionRule] {
150 |         &self.rules
151 |     }
152 | 
153 |     /// Access the entrypoint non-terminal of this grammar.
154 |     pub fn entrypoint(&self) -> &NonTerminal {
155 |         &self.entrypoint
156 |     }
157 | 
158 |     pub(crate) fn new(rules: Vec<ProductionRule>, entrypoint: NonTerminal) -> Self {
159 |         Self {
160 |             rules,
161 |             entrypoint,
162 |         }
163 |     }
164 | }
165 | 
166 | impl ContextFreeGrammar {
167 |     pub(crate) fn concatenate_terminals(&mut self) {
168 |         for rule in &mut self.rules {
169 |             let mut i = 0;
170 | 
171 |             while i + 1 < rule.rhs.len() {
172 |                 if rule.rhs[i].is_terminal() && rule.rhs[i + 1].is_terminal() {
173 |                     let Symbol::Terminal(second) = rule.rhs.remove(i + 1) else { unreachable!() };
174 |                     let Symbol::Terminal(first) = &mut rule.rhs[i] else { unreachable!() };
175 |                     first.0.push_str(second.content());
176 |                 } else {
177 |                     i += 1;
178 |                 }
179 |             }
180 |         }
181 |     }
182 | 
183 |     pub(crate) fn remove_duplicate_rules(&mut self) {
184 |         let mut hashes = HashSet::with_capacity(self.rules.len());
185 |         let mut i = 0;
186 | 
187 |         while i < self.rules.len() {
188 |             let hash = self.rules[i].fixed_hash();
189 | 
190 |             if !hashes.insert(hash) {
191 |                 self.rules.remove(i);
192 |             } else {
193 |                 i += 1;
194 |             }
195 |         }
196 |     }
197 | 
198 |     pub(crate) fn remove_unused_rules(&mut self) {
199 |         let mut graph = Graph::<&str, ()>::new();
200 |         let mut nodes = HashMap::new();
201 | 
202 |         /* Construct directed graph of non-terminals */
203 |         for rule in &self.rules {
204 |             let src = rule.lhs().id();
205 |             let src = *nodes.entry(src).or_insert_with(|| graph.add_node(src));
206 | 
207 |             for symbol in rule.rhs() {
208 |                 if let Symbol::NonTerminal(nonterm) = symbol {
209 |                     let dst = nonterm.id();
210 |                     let dst = *nodes.entry(dst).or_insert_with(|| graph.add_node(dst));
211 | 
212 |                     graph.add_edge(src, dst, ());
213 |                 }
214 |             }
215 |         }
216 | 
217 |         /* Do a BFS from entrypoint */
218 |         let entrypoint = *nodes.get(self.entrypoint.id()).unwrap();
219 |         let mut bfs = Bfs::new(&graph, entrypoint);
220 | 
221 |         while let Some(idx) = bfs.next(&graph) {
222 |             let id = graph.node_weight(idx).unwrap();
223 |             nodes.remove(id);
224 |         }
225 | 
226 |         /* Now `nodes` contains all the non-terminals that are never used */
227 |         let nodes: HashSet<String> = nodes.into_keys().map(|x| x.to_string()).collect();
228 |         let mut i = 0;
229 | 
230 |         while i < self.rules.len() {
231 |             let lhs = self.rules[i].lhs().id();
232 | 
233 |             if nodes.contains(lhs) {
234 |                 self.rules.remove(i);
235 |             } else {
236 |                 i += 1;
237 |             }
238 |         }
239 |     }
240 | 
241 |     pub(crate) fn remove_unit_rules(&mut self) {
242 |         let mut i = 0;
243 | 
244 |         while i < self.rules.len() {
245 |             let rule = &self.rules[i];
246 | 
247 |             if rule.rhs().len() == 1 && rule.rhs()[0].is_non_terminal() {
248 |                 let old_rule = self.rules.remove(i);
249 |                 let Symbol::NonTerminal(to_expand) = &old_rule.rhs[0] else { unreachable!() };
250 |                 let mut new_rules = Vec::new();
251 | 
252 |                 for other_rule in &self.rules {
253 |                     if to_expand.id() == other_rule.lhs().id() {
254 |                         new_rules.push(ProductionRule::new(old_rule.lhs().clone(), other_rule.rhs.clone()));
255 |                     }
256 |                 }
257 | 
258 |                 self.rules.append(&mut new_rules);
259 |             } else {
260 |                 i += 1;
261 |             }
262 |         }
263 |     }
264 | 
265 |     pub(crate) fn remove_mixed_rules(&mut self) {
266 |         let mut terms = HashMap::new();
267 | 
268 |         for rule in &mut self.rules {
269 |             if is_mixed(rule.rhs()) {
270 |                 for j in 0..rule.rhs().len() {
271 |                     if let Symbol::Terminal(term) = &rule.rhs()[j] {
272 |                         let non_term = terms
273 |                             .entry(term.clone())
274 |                             .or_insert_with(|| NonTerminal(format!("(term:{})", term.content())))
275 |                             .clone();
276 |                         rule.rhs[j] = Symbol::NonTerminal(non_term);
277 |                     }
278 |                 }
279 |             }
280 |         }
281 | 
282 |         for (term, nonterm) in terms {
283 |             self.rules.push(ProductionRule::new(nonterm, vec![Symbol::Terminal(term)]));
284 |         }
285 |     }
286 | 
287 |     pub(crate) fn break_rules(&mut self) {
288 |         let mut nonterm_cursor = 0;
289 |         let mut i = 0;
290 | 
291 |         while i < self.rules.len() {
292 |             let rule = &mut self.rules[i];
293 | 
294 |             if rule.rhs().len() > 2 && is_only_non_terminals(rule.rhs()) {
295 |                 let len = rule.rhs().len() - 1;
296 |                 let symbols: Vec<Symbol> = rule.rhs.drain(0..len).collect();
297 | 
298 |                 let nonterm = NonTerminal(format!("(break_rules:{})", nonterm_cursor));
299 |                 nonterm_cursor += 1;
300 | 
301 |                 rule.rhs.insert(0, Symbol::NonTerminal(nonterm.clone()));
302 | 
303 |                 self.rules.push(ProductionRule::new(nonterm, symbols));
304 |             }
305 | 
306 |             i += 1;
307 |         }
308 |     }
309 | 
310 |     pub(crate) fn convert_to_gnf(&mut self) {
311 |         let mut i = 0;
312 | 
313 |         while i < self.rules.len() {
314 |             if self.rules[i].rhs()[0].is_non_terminal() {
315 |                 let mut new_rules = Vec::new();
316 |                 let mut old_rule = self.rules.remove(i);
317 |                 let Symbol::NonTerminal(nonterm) = old_rule.rhs.remove(0) else { unreachable!() };
318 | 
319 |                 for other_rule in &self.rules {
320 |                     if other_rule.lhs().id() == nonterm.id() {
321 |                         let mut new_symbols = other_rule.rhs.clone();
322 |                         new_symbols.extend_from_slice(old_rule.rhs());
323 |                         new_rules.push(ProductionRule::new(old_rule.lhs().clone(), new_symbols));
324 |                     }
325 |                 }
326 | 
327 |                 self.rules.append(&mut new_rules);
328 |             } else {
329 |                 i += 1;
330 |             }
331 |         }
332 |     }
333 | 
334 |     pub(crate) fn set_new_entrypoint(&mut self) {
335 |         let nonterm = NonTerminal("(real_entrypoint)".to_string());
336 | 
337 |         self.rules.push(ProductionRule::new(nonterm.clone(), vec![Symbol::NonTerminal(self.entrypoint.clone())]));
338 | 
339 |         self.entrypoint = nonterm;
340 |     }
341 | 
342 |     pub(crate) fn count_entrypoint_rules(&self) -> usize {
343 |         let mut count = 0;
344 | 
345 |         for rule in &self.rules {
346 |             if rule.lhs().id() == self.entrypoint.id() {
347 |                 count += 1;
348 |             }
349 |         }
350 | 
351 |         count
352 |     }
353 | 
354 |     pub(crate) fn is_in_gnf(&self) -> bool {
355 |         for rule in &self.rules {
356 |             let rhs = rule.rhs();
357 | 
358 |             if rhs[0].is_non_terminal() {
359 |                 return false;
360 |             }
361 | 
362 |             if let Some(symbols) = rhs.get(1..) {
363 |                 for symbol in symbols {
364 |                     if symbol.is_terminal() {
365 |                         return false;
366 |                     }
367 |                 }
368 |             }
369 |         }
370 | 
371 |         true
372 |     }
373 | }
374 | 
375 | #[cfg(test)]
376 | mod tests {
377 |     use super::*;
378 | 
379 |     #[test]
380 |     fn test_unused_rules() {
381 |         let cfg = ContextFreeGrammar::builder()
382 |             .peacock_grammar("test-data/grammars/unused_rules.json")
383 |             .unwrap()
384 |             .build()
385 |             .unwrap();
386 | 
387 |         println!("{:#?}", cfg.rules());
388 |     }
389 | 
390 |     #[test]
391 |     fn test_duplicate_rules() {
392 |         let cfg = ContextFreeGrammar::builder()
393 |             .peacock_grammar("test-data/grammars/duplicate_rules.json")
394 |             .unwrap()
395 |             .build()
396 |             .unwrap();
397 | 
398 |         println!("{:#?}", cfg.rules());
399 |     }
400 | 
401 |     #[test]
402 |     fn test_unit_rules() {
403 |         let cfg = ContextFreeGrammar::builder()
404 |             .peacock_grammar("test-data/grammars/unit_rules.json")
405 |             .unwrap()
406 |             .build()
407 |             .unwrap();
408 | 
409 |         println!("{:#?}", cfg.rules());
410 |     }
411 | 
412 |     #[test]
413 |     #[should_panic]
414 |     fn test_recursion() {
415 |         let cfg = ContextFreeGrammar::builder()
416 |             .peacock_grammar("test-data/grammars/recursion.json")
417 |             .unwrap()
418 |             .build()
419 |             .unwrap();
420 | 
421 |         println!("{:#?}", cfg.rules());
422 |     }
423 | 
424 |     #[test]
425 |     #[ignore]
426 |     fn test_mixed_rules() {
427 |         let cfg = ContextFreeGrammar::builder()
428 |             .peacock_grammar("test-data/grammars/mixed_rules.json")
429 |             .unwrap()
430 |             .build()
431 |             .unwrap();
432 | 
433 |         println!("{:#?}", cfg.rules());
434 |     }
435 | }
436 | 


--------------------------------------------------------------------------------
/src/grammar/mod.rs:
--------------------------------------------------------------------------------
 1 | //! This is the frontend that loads grammars.
 2 | //!
 3 | //! Use it like so:
 4 | //! ```
 5 | //! // Load multiple grammars by joining their rules:
 6 | //! let grammar = ContextFreeGrammar::builder()
 7 | //!     // Load a grammar in peacock format
 8 | //!     .peacock_grammar("my-grammar.json").unwrap()
 9 | //!     // Or a grammar in gramatron format
10 | //!     .gramatron_grammar("my-old-grammar.json").unwrap()
11 | //!     // Set the entrypoint
12 | //!     .entrypoint("MY-ENTRYPOINT")
13 | //!     .build().unwrap();
14 | //! ```
15 | //! You can inspect the grammar contents like this:
16 | //! ```
17 | //! // Since a grammar is nothing but a set of rules, traverse the rules
18 | //! for rule in grammar.rules() {
19 | //!     // The left-hand-side (lhs) of a rule is a single non-terminal
20 | //!     println!("lhs = {:?}", rule.lhs());
21 | //!
22 | //!     // The right-hand-side (rhs) of a rule is a sequence of terminals and non-terminals.
23 | //!     // This is captured in the enum "Symbol".
24 | //!     for symbol in rule.rhs() {
25 | //!         match symbol {
26 | //!             Symbol::Terminal(terminal) => println!("terminal: {}", terminal.content()),
27 | //!             Symbol::NonTerminal(nonterminal) => println!("non-terminal {}", nonterminal.id()),
28 | //!         }
29 | //!     }
30 | //! }
31 | //! ```
32 | 
33 | mod builder;
34 | mod cfg;
35 | 
36 | pub use builder::*;
37 | pub use cfg::*;
38 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! This library contains everything you need to setup a grammar-based fuzzer.
 2 | //!
 3 | //! It consists of
 4 | //! - __frontend__: Load grammars of different formats. Currently, the Gramatron and Peacock format are supported.
 5 | //! - __backend__: Use the loaded grammar to do whatever you want.
 6 | //!   Current backends are
 7 | //!   - `C`: Generate a grammar-based mutator in C
 8 | //!   - `json`: Convert loaded grammar(s) into peacock format
 9 | //!   - `interpreter`: Generate inputs by interpreting a grammar
10 | //!
11 | //!   but you can easily write your own.
12 | //! - __runtime__: LibAFL components that you can use in your fuzzer to realize grammar-based mutations.
13 | //!
14 | //! ## Grammars
15 | //! This library supports grammar files in two formats:
16 | //! 1. [Gramatron](https://github.com/HexHive/Gramatron) format for backwards compatibility
17 | //! 2. Its own "peacock format", which is documented in the [README](https://github.com/z2-2z/peacock#how-to-write-grammars) of this project
18 | //!
19 | //! ## Getting Started
20 | //! The first step always is to load grammars. To do this use the [`ContextFreeGrammar::builder()`](grammar::ContextFreeGrammar::builder) method
21 | //! that will give you access to a [`GrammarBuilder`](grammar::GrammarBuilder) like this:
22 | //! ```
23 | //! // Load multiple grammars by joining their rules:
24 | //! let grammar = ContextFreeGrammar::builder()
25 | //!     // Load a grammar in peacock format
26 | //!     .peacock_grammar("my-grammar.json").unwrap()
27 | //!     // Or a grammar in gramatron format
28 | //!     .gramatron_grammar("my-old-grammar.json").unwrap()
29 | //!     // Set the entrypoint
30 | //!     .entrypoint("MY-ENTRYPOINT")
31 | //!     .build().unwrap();
32 | //! ```
33 | //! Then, you can plug the grammar into one of the provided backends:
34 | //! ```
35 | //! backends::C::CGenerator::new().generate("output-file.c", &grammar);
36 | //! // or
37 | //! backends::json::JsonGenerator::new().generate("output-file.json", &grammar);
38 | //! // or
39 | //! backends::interpreter::GrammarInterpreter::new(&grammar).interpret(&mut io::stdout());
40 | //! ```
41 | //! And that's it.
42 | //!
43 | //! ## Feature Flags
44 | //! - `components`: Include LibAFL components in this library. On by default.
45 | //! - `static-loading`: Activate this if you want to compile the generated C code into the fuzzer. For more details see the
46 | //!   documentation of the `components`.
47 | //! - `debug-codegen`: This affects the C backend and inserts calls to printf() at the beginning of every generated function to
48 | //!    help troubleshooting.
49 | 
50 | #![deny(missing_docs)]
51 | 
52 | pub(crate) mod parser;
53 | 
54 | pub mod backends;
55 | pub mod error;
56 | pub mod grammar;
57 | 
58 | #[cfg(feature = "components")]
59 | pub mod components;
60 | 


--------------------------------------------------------------------------------
/src/parser/gramatron.rs:
--------------------------------------------------------------------------------
  1 | use serde_json as json;
  2 | use std::{
  3 |     fs::File,
  4 |     io::BufReader,
  5 |     path::Path,
  6 | };
  7 | 
  8 | use crate::{
  9 |     error::ParsingError,
 10 |     grammar::{
 11 |         NonTerminal,
 12 |         ProductionRule,
 13 |         Symbol,
 14 |         Terminal,
 15 |     },
 16 | };
 17 | 
 18 | #[inline]
 19 | fn is_whitespace(c: u8) -> bool {
 20 |     matches!(c, b' ' | b'\t' | b'\n' | b'\r' | 12 | 11)
 21 | }
 22 | 
 23 | fn parse_until<F: FnMut(u8) -> bool>(buf: &[u8], mut delim: F) -> &[u8] {
 24 |     let mut cursor = 0;
 25 | 
 26 |     while cursor < buf.len() {
 27 |         if delim(buf[cursor]) {
 28 |             break;
 29 |         } else {
 30 |             cursor += 1;
 31 |         }
 32 |     }
 33 | 
 34 |     &buf[..cursor]
 35 | }
 36 | 
 37 | fn parse_grammar(value: json::Value) -> Result<Vec<ProductionRule>, String> {
 38 |     let mut rules = Vec::new();
 39 | 
 40 |     let object = match value {
 41 |         json::Value::Object(object) => object,
 42 |         _ => return Err("Gramatron grammar must be specified as an object".to_string()),
 43 |     };
 44 | 
 45 |     for (key, value) in &object {
 46 |         let rhs = match value {
 47 |             json::Value::Array(rhs) => rhs,
 48 |             _ => return Err(format!("Right-hand-side of '{}' must be an array", key)),
 49 |         };
 50 | 
 51 |         if rhs.is_empty() {
 52 |             return Err(format!("Invalid production rule '{}': Must not be empty", key));
 53 |         }
 54 | 
 55 |         for rule in rhs {
 56 |             let rule = match rule.as_str() {
 57 |                 Some(rule) => rule,
 58 |                 _ => return Err(format!("Right-hand-side of '{}' must be an array of strings", key)),
 59 |             };
 60 |             let mut symbols = Vec::new();
 61 |             let rule = rule.as_bytes();
 62 |             let mut cursor = 0;
 63 | 
 64 |             while cursor < rule.len() {
 65 |                 match &rule[cursor] {
 66 |                     b'\'' => {
 67 |                         cursor += 1;
 68 |                         let content = parse_until(&rule[cursor..], |x| x == b'\'');
 69 |                         cursor += content.len() + 1;
 70 |                         let content = String::from_utf8(content.to_vec()).unwrap();
 71 |                         symbols.push(Symbol::Terminal(Terminal::new(content)));
 72 |                     },
 73 |                     b'"' => {
 74 |                         cursor += 1;
 75 |                         let content = parse_until(&rule[cursor..], |x| x == b'"');
 76 |                         cursor += content.len() + 1;
 77 |                         let content = String::from_utf8(content.to_vec()).unwrap();
 78 |                         symbols.push(Symbol::Terminal(Terminal::new(content)));
 79 |                     },
 80 |                     c => {
 81 |                         if is_whitespace(*c) {
 82 |                             cursor += 1;
 83 |                         } else {
 84 |                             let content = parse_until(&rule[cursor..], |x| is_whitespace(x) || x == b'"' || x == b'\'');
 85 |                             cursor += content.len();
 86 |                             let content = String::from_utf8(content.to_vec()).unwrap();
 87 |                             symbols.push(Symbol::NonTerminal(NonTerminal::new(content)));
 88 |                         }
 89 |                     },
 90 |                 }
 91 |             }
 92 | 
 93 |             if symbols.is_empty() {
 94 |                 return Err(format!("Right-hand-side of '{}' must not contain a string with no tokens", key));
 95 |             }
 96 | 
 97 |             rules.push(ProductionRule::new(NonTerminal::new(key.clone()), symbols));
 98 |         }
 99 |     }
100 | 
101 |     Ok(rules)
102 | }
103 | 
104 | pub fn parse_json(path: &Path) -> Result<Vec<ProductionRule>, ParsingError> {
105 |     let file = File::open(path).unwrap();
106 |     let reader = BufReader::new(file);
107 | 
108 |     let value: json::Value = match json::from_reader(reader) {
109 |         Ok(value) => value,
110 |         Err(_) => {
111 |             return Err(ParsingError::new(path, "Invalid JSON syntax"));
112 |         },
113 |     };
114 | 
115 |     parse_grammar(value).map_err(|e| ParsingError::new(path, e))
116 | }
117 | 


--------------------------------------------------------------------------------
/src/parser/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod gramatron;
2 | pub mod peacock;
3 | 


--------------------------------------------------------------------------------
/src/parser/peacock.rs:
--------------------------------------------------------------------------------
  1 | use json_comments::{
  2 |     CommentSettings,
  3 |     StripComments,
  4 | };
  5 | use serde_json as json;
  6 | use std::{
  7 |     fs::File,
  8 |     io::BufReader,
  9 |     path::Path,
 10 | };
 11 | 
 12 | use crate::{
 13 |     error::ParsingError,
 14 |     grammar::{
 15 |         NonTerminal,
 16 |         ProductionRule,
 17 |         Symbol,
 18 |         Terminal,
 19 |     },
 20 | };
 21 | 
 22 | fn parse_non_terminal(keyword: &str) -> Option<&str> {
 23 |     if keyword.len() > 2 && keyword.starts_with('<') && keyword.ends_with('>') {
 24 |         Some(&keyword[1..keyword.len() - 1])
 25 |     } else {
 26 |         None
 27 |     }
 28 | }
 29 | 
 30 | fn parse_terminal(keyword: &str) -> &str {
 31 |     if keyword.len() >= 2 && keyword.starts_with('\'') && keyword.ends_with('\'') {
 32 |         &keyword[1..keyword.len() - 1]
 33 |     } else {
 34 |         keyword
 35 |     }
 36 | }
 37 | 
 38 | fn parse_grammar(value: json::Value) -> Result<Vec<ProductionRule>, String> {
 39 |     let mut rules = Vec::new();
 40 | 
 41 |     let object = match value {
 42 |         json::Value::Object(object) => object,
 43 |         _ => return Err("Peacock grammar must be specified as an object".to_string()),
 44 |     };
 45 | 
 46 |     for (key, value) in &object {
 47 |         // LHS must be a non-terminal
 48 |         let lhs = match parse_non_terminal(key) {
 49 |             Some(lhs) => lhs,
 50 |             None => return Err(format!("'{}' is not a valid non-terminal", key)),
 51 |         };
 52 | 
 53 |         // RHS must be an array of an array of strings that are either terminals or non-terminals
 54 |         let rhs = match value {
 55 |             json::Value::Array(rhs) => rhs,
 56 |             _ => return Err(format!("Right-hand-side of '{}' must be an array", key)),
 57 |         };
 58 | 
 59 |         if rhs.is_empty() {
 60 |             return Err(format!("Invalid production rule '{}': Must not be empty", key));
 61 |         }
 62 | 
 63 |         for rule in rhs {
 64 |             let tokens = match rule {
 65 |                 json::Value::Array(tokens) => tokens,
 66 |                 _ => return Err(format!("Right-hand-side of '{}' must be an array of arrays", key)),
 67 |             };
 68 | 
 69 |             if tokens.is_empty() {
 70 |                 return Err(format!("Invalid production rule '{}': One of its variants is empty", key));
 71 |             }
 72 | 
 73 |             let mut symbols = Vec::new();
 74 | 
 75 |             for token in tokens {
 76 |                 let token = match token.as_str() {
 77 |                     Some(token) => token,
 78 |                     _ => return Err(format!("Right-hand-side of '{}' must be an array of arrays of strings", key)),
 79 |                 };
 80 | 
 81 |                 if let Some(nonterm) = parse_non_terminal(token) {
 82 |                     symbols.push(Symbol::NonTerminal(NonTerminal::new(nonterm)));
 83 |                 } else {
 84 |                     let term = parse_terminal(token);
 85 |                     symbols.push(Symbol::Terminal(Terminal::new(term)));
 86 |                 }
 87 |             }
 88 | 
 89 |             rules.push(ProductionRule::new(NonTerminal::new(lhs), symbols));
 90 |         }
 91 |     }
 92 | 
 93 |     Ok(rules)
 94 | }
 95 | 
 96 | pub fn parse_json(path: &Path) -> Result<Vec<ProductionRule>, ParsingError> {
 97 |     let file = File::open(path).unwrap();
 98 |     let reader = BufReader::new(file);
 99 |     let reader = StripComments::with_settings(CommentSettings::c_style(), reader);
100 | 
101 |     let value: json::Value = match json::from_reader(reader) {
102 |         Ok(value) => value,
103 |         Err(_) => {
104 |             return Err(ParsingError::new(path, "Invalid JSON syntax"));
105 |         },
106 |     };
107 | 
108 |     parse_grammar(value).map_err(|e| ParsingError::new(path, e))
109 | }
110 | 
111 | #[cfg(test)]
112 | mod tests {
113 |     use super::*;
114 | 
115 |     #[test]
116 |     fn test_peacock() {
117 |         println!("{:#?}", parse_json(Path::new("test-data/grammars/test-peacock.json")).unwrap());
118 |     }
119 | }
120 | 


--------------------------------------------------------------------------------
/template.c:
--------------------------------------------------------------------------------
  1 | #error "This is just for brainstorming"
  2 | 
  3 | typedef struct {
  4 |     size_t* buf;
  5 |     size_t len;
  6 |     size_t capacity;
  7 | } Sequence;
  8 | 
  9 | static int generate_seq_SNGLE (Sequence* seq, size_t* step) {
 10 |     size_t idx = seq->len;
 11 |     
 12 |     if (*step >= idx) {
 13 |         if (idx >= seq->capacity) {
 14 |             return 0;
 15 |         }
 16 |         
 17 |         seq->buf[idx] = 0;
 18 |         seq->len = idx + 1;
 19 |     }
 20 |     
 21 |      *step += 1;
 22 |     
 23 |     // code inside case
 24 |     
 25 |     return 1;
 26 | }
 27 | 
 28 | static int generate_seq_ENTRYPOINT (Sequence* seq, size_t* step) {
 29 |     size_t idx = seq->len;
 30 |     size_t target;
 31 |     
 32 |     if (*step < idx) {
 33 |         target = seq->buf[*step];
 34 |     } else {
 35 |         if (idx >= seq->capacity) {
 36 |             return 0;
 37 |         }
 38 |         
 39 |         target = rand() % 2;
 40 |         seq->buf[idx] = target;
 41 |         seq->len = idx + 1;
 42 |     }
 43 |     
 44 |     *step += 1;
 45 |     
 46 |     switch (target) {
 47 |         case 0: {
 48 |             if (!generate_seq_A(seq, step)) {
 49 |                 return 0;
 50 |             }
 51 |             
 52 |             // repeat for all other non-terminals in rule
 53 |             
 54 |             break;
 55 |         }
 56 |         
 57 |         case 1: {
 58 |             // no non-terminals to explore
 59 |             
 60 |             break;
 61 |         }
 62 |         
 63 |         default: {
 64 |             __builtin_unreachable();
 65 |         },
 66 |     }
 67 |     
 68 |     return 1;
 69 | }
 70 | 
 71 | // In rust: Vec<usize>
 72 | size_t generate_sequence (void* buf, size_t len, size_t capacity) {
 73 |     if (UNLIKELY(!buf || !capacity)) {
 74 |         return 0;
 75 |     }
 76 |     
 77 |     Sequence seq = {
 78 |         .buf = (size_t*) buf,
 79 |         .len = len,
 80 |         .capacity = capacity,
 81 |     };
 82 |     size_t step = 0;
 83 |     
 84 |     generate_seq_ENTRYPOINT(&seq, &step);
 85 |     
 86 |     return seq.len;
 87 | }
 88 | 
 89 | static const unsigned char term0[] = {...};
 90 | 
 91 | static size_t serialize_seq_ENTRYPOINT (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len, size_t* step) {
 92 |     if (UNLIKELY(*step >= seq_len)) {
 93 |         return 0;
 94 |     }
 95 |     
 96 |     unsigned char* original_out = out;
 97 |     size_t target = seq[*step];
 98 |     *step += 1;
 99 |     
100 |     switch (target) {
101 |         case 0: {
102 |             // non-terminal
103 |             size_t len = serialize_seq_NONTERM(seq, seq_len, out, out_len, step);
104 |             out += len; out_len -= len;
105 |             
106 |             // terminal
107 |             if (UNLIKELY(out_len < sizeof(term0))) {
108 |                 goto end;
109 |             }
110 |             __builtin_memcpy_inline(out, term0, sizeof(term0));
111 |             out += sizeof(term0); out_len -= sizeof(term0);
112 |             //TODO: optimize for 1, 2, 4, 8
113 |             
114 |             break;
115 |         }
116 |         
117 |         default: {
118 |             __builtin_unreachable();
119 |         }
120 |     }
121 |     
122 |   end:
123 |     return (size_t) (out - original_out);
124 | }
125 | 
126 | size_t serialize_sequence (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len) {
127 |     if (UNLIKELY(!seq || !seq_len || !out || !out_len)) {
128 |         return 0;
129 |     }
130 |     
131 |     size_t step = 0;
132 |     
133 |     return serialize_seq_ENTRYPOINT(seq, seq_len, out, out_len, &step);
134 | }
135 | 
136 | 
137 | static int unparse_sequence_nontermXYZ (Sequence* seq, unsigned char* input, size_t input_len, size_t* cursor) {
138 |     size_t seq_idx = seq->len;
139 |     
140 |     if (UNLIKELY(seq_idx >= seq->capacity)) {
141 |         return 0;
142 |     }
143 |     
144 |     size_t target_cursor = 0;
145 |     size_t target_id = (size_t) -1LL;
146 |     size_t target_seq_len = seq_idx;
147 |     
148 |     // Single rule
149 |     do {
150 |         seq->len = seq_idx + 1;
151 |         size_t tmp_cursor = *cursor;
152 |         
153 |         // try item 1: terminal
154 |         if (UNLIKELY(input_len - tmp_cursor < sizeof(TERMX)) || __builtin_memcmp(&input[tmp_cursor], TERMX, sizeof(TERMX)) != 0) {
155 |             break;
156 |         }
157 |         tmp_cursor += sizeof(TERMX);
158 |         
159 |         // try item 2: non-terminal
160 |         if (!unparse_sequence_nontermABC(seq, input, input_len, &tmp_cursor)) {
161 |             break;
162 |         }
163 |         
164 |         if (tmp_cursor > target_cursor) {
165 |             target_id = 0; // index of rule
166 |             target_cursor = tmp_cursor;
167 |             target_seq_len = seq->len;
168 |         }
169 |     } while(0);
170 |     
171 |     seq->len = target_seq_len;
172 |     
173 |     if (LIKELY(target_id < NUM_RULES)) {
174 |         *cursor = target_cursor;
175 |         seq->buf[seq_idx] = target_id;
176 |         return 1;
177 |     }
178 |     
179 |     return 0;
180 | }
181 | 
182 | size_t unparse_sequence (size_t* seq_buf, size_t seq_capacity, unsigned char* input, size_t input_len) {
183 |     if (UNLIKELY(!seq_buf || !seq_capacity || !input || !input_len)) {
184 |         return 0;
185 |     }
186 |     
187 |     Sequence seq = {
188 |         .buf = seq_buf,
189 |         .len = 0,
190 |         .capacity = seq_capacity,
191 |     };
192 |     size_t cursor = 0;
193 |     unparse_sequence_nontermXYZ(&seq, input, input_len, &cursor);
194 |     return seq.len;
195 | }
196 | 


--------------------------------------------------------------------------------
/test-data/C/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !*.c
3 | !.gitignore
4 | 


--------------------------------------------------------------------------------
/test-data/C/bench_generation.c:
--------------------------------------------------------------------------------
 1 | // gcc -o bench_generation -Wall -Wextra -Wpedantic -Werror -O3 bench_generation.c /tmp/out.c
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <time.h>
 6 | 
 7 | size_t mutate_sequence (void* buf, size_t len, size_t capacity);
 8 | size_t serialize_sequence (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len);
 9 | 
10 | #define BUF_SIZE (1 * 1024 * 1024)
11 | 
12 | int main (void) {
13 |     size_t* sequence = calloc(4096, sizeof(size_t));
14 |     unsigned char* output = malloc(BUF_SIZE);
15 |     size_t generated = 0;
16 |     struct timespec start;
17 |     struct timespec now;
18 |     size_t trials = 0;
19 |     
20 |     clock_gettime(CLOCK_MONOTONIC, &start);
21 |     
22 |     while (1) {
23 |         size_t seq_len = mutate_sequence(sequence, 0, 4096);
24 |         size_t out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE);
25 |         generated += out_len;
26 |         trials++;
27 |         
28 |         if ((generated % 1048576) == 0) {
29 |             clock_gettime(CLOCK_MONOTONIC, &now);
30 |             
31 |             double secs = (double) (now.tv_sec - start.tv_sec);
32 |             double amount = (double) (generated / 1048576);
33 |             printf("Generated >= %.02lf MiB/s | Avg. size: %lu\n", amount / secs, generated / trials);
34 |         }
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/test-data/C/fuzz_mutate.c:
--------------------------------------------------------------------------------
 1 | // clang -o fuzz_mutate -Wall -Wextra -Wpedantic -Werror -O0 -g -fsanitize=address,undefined fuzz_mutate.c /tmp/out.c
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | 
 6 | size_t mutate_sequence (void* buf, size_t len, size_t capacity);
 7 | 
 8 | int main (void) {
 9 |     while (1) {
10 |         size_t capacity = rand() % 256;
11 |         void* buf = calloc(capacity, sizeof(size_t));
12 |         
13 |         size_t len = 0;
14 |         
15 |         if (capacity > 0 && (rand() % 4) == 3) {
16 |             len = rand() % (capacity + 1);
17 |         }
18 |         
19 |         size_t new_len = mutate_sequence(buf, len, capacity);
20 |         
21 |         printf("capacity=%lu old_len=%lu new_len=%lu\n", capacity, len, new_len);
22 |         
23 |         free(buf);
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test-data/C/fuzz_unparse.c:
--------------------------------------------------------------------------------
 1 | // clang -o fuzz_unparse -Wall -Wextra -Wpedantic -Werror -O0 -g -fsanitize=address,undefined -I/tmp fuzz_unparse.c /tmp/out.c
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <string.h>
 6 | #include <assert.h>
 7 | #include <time.h>
 8 | 
 9 | #include "out.h"
10 | 
11 | #define SEQ_LEN 4096
12 | #define BUF_LEN (128 * 1024 * 1024)
13 | 
14 | int main (void) {
15 |     seed_generator(time(NULL));
16 |     size_t* generated = calloc(SEQ_LEN, sizeof(size_t));
17 |     size_t* unparsed = calloc(SEQ_LEN, sizeof(size_t));
18 |     unsigned char* output = malloc(BUF_LEN + 1);
19 |     unsigned char* output2 = malloc(BUF_LEN + 1);
20 |     size_t i = 0;
21 |     
22 |     while (1) {
23 |         printf("Iter %lu\n", i++);
24 |         
25 |         size_t gen_len = mutate_sequence(generated, 0, SEQ_LEN);
26 |         size_t out_len = serialize_sequence(generated, gen_len, output, BUF_LEN);
27 |         size_t unp_len = unparse_sequence(unparsed, SEQ_LEN, output, out_len);
28 |         size_t out2_len = serialize_sequence(unparsed, unp_len, output2, BUF_LEN);
29 |         
30 |         output[out_len] = 0;
31 |         output2[out2_len] = 0;
32 |         
33 |         if (out_len != out2_len || strcmp((const char*) output, (const char*) output2)) {
34 |             printf("out_len = %lu\n", out_len);
35 |             printf("out2_len = %lu\n", out2_len);
36 |             
37 |             printf("--- GENERATED ---\n");
38 |             printf("%s\n", output);
39 |             
40 |             printf("--- UNPARSED ---\n");
41 |             printf("%s\n", output2);
42 |             
43 |             break;
44 |         }
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/test-data/C/test_generation.c:
--------------------------------------------------------------------------------
 1 | // clang -o test_generation -Wall -Wextra -Wpedantic -Werror -O0 -g -fsanitize=address,undefined test_generation.c /tmp/out.c
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <assert.h>
 6 | 
 7 | size_t mutate_sequence (void* buf, size_t len, size_t capacity);
 8 | size_t serialize_sequence (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len);
 9 | 
10 | #define BUF_SIZE (16 * 1024 * 1024)
11 | 
12 | int main (void) {
13 |     size_t* sequence = calloc(4096, sizeof(size_t));
14 |     unsigned char* output = malloc(BUF_SIZE);
15 |     char buf[2];
16 |     
17 |     while (1) {
18 |         size_t seq_len = mutate_sequence(sequence, 0, 4096);
19 |         size_t out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE - 1);
20 |         assert(out_len < BUF_SIZE);
21 |         
22 |         output[out_len] = 0;
23 |         printf("%s\n", output);
24 |         
25 |         fgets(buf, 2, stdin);
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/test-data/C/test_mutate.c:
--------------------------------------------------------------------------------
 1 | // clang -o test_mutate -Wall -Wextra -Wpedantic -Werror -O0 -g -fsanitize=address,undefined test_mutate.c /tmp/out.c
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <assert.h>
 6 | 
 7 | size_t mutate_sequence (void* buf, size_t len, size_t capacity);
 8 | size_t serialize_sequence (size_t* seq, size_t seq_len, unsigned char* out, size_t out_len);
 9 | 
10 | #define BUF_SIZE (16 * 1024 * 1024)
11 | 
12 | int main (void) {
13 |     size_t* sequence = calloc(4096, sizeof(size_t));
14 |     unsigned char* output = malloc(BUF_SIZE);
15 |     
16 |     // initial sequence
17 |     size_t seq_len = mutate_sequence(sequence, 0, 4096);
18 |     
19 |     size_t out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE - 1);
20 |     output[out_len] = 0;    
21 |     printf("Initial: %s\n\n", output);
22 |     
23 |     // Mutate
24 |     seq_len = mutate_sequence(sequence, seq_len / 2, 4096);
25 |     out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE - 1);
26 |     output[out_len] = 0;    
27 |     printf("Mutation #1: %s\n\n", output);
28 |     
29 |     // Mutate
30 |     seq_len = mutate_sequence(sequence, seq_len / 2, 4096);
31 |     out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE - 1);
32 |     output[out_len] = 0;    
33 |     printf("Mutation #2: %s\n\n", output);
34 |     
35 |     // Mutate
36 |     seq_len = mutate_sequence(sequence, seq_len / 2, 4096);
37 |     out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE - 1);
38 |     output[out_len] = 0;    
39 |     printf("Mutation #3: %s\n\n", output);
40 |     
41 |     free(output);
42 |     free(sequence);
43 | }
44 | 


--------------------------------------------------------------------------------
/test-data/C/test_unparse.c:
--------------------------------------------------------------------------------
 1 | // gcc -O0 -g -o test_unparse -I/tmp test_unparse.c /tmp/out.c
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <string.h>
 6 | #include <assert.h>
 7 | 
 8 | #include "out.h"
 9 | 
10 | #define SEQ_LEN 4096
11 | 
12 | unsigned char* input = "var a=((((-9223372036854775808/-1++))));\\n";
13 | 
14 | int main (void) {
15 |     size_t input_len = strlen(input);
16 |     size_t* sequence = calloc(SEQ_LEN, sizeof(size_t));
17 |     
18 |     size_t seq_len = unparse_sequence(sequence, SEQ_LEN, input, input_len);
19 |     assert(seq_len > 0);
20 |     
21 |     for (size_t i = 0; i < seq_len; ++i) {
22 |         printf("  seq[%lu] = %lu\n", i, sequence[i]);
23 |     }
24 |     
25 |     unsigned char* output = malloc(input_len);
26 |     
27 |     size_t out_len = serialize_sequence(sequence, seq_len, output, input_len);
28 |     printf("input_len=%d out_len=%d\n", input_len, out_len);
29 |     assert(out_len == input_len);
30 |     
31 |     output[out_len] = 0;
32 |     printf("%s\n", output);
33 | }
34 | 


--------------------------------------------------------------------------------
/test-data/benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | output/
2 | .cur_input*
3 | baseline
4 | nop
5 | nop-gramatron
6 | throughput
7 | perf.data
8 | 


--------------------------------------------------------------------------------
/test-data/benchmarks/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | all: baseline nop throughput
 3 | 
 4 | baseline: baseline.c
 5 | 	gcc -o $@ -O3 $<
 6 | 	
 7 | nop: nop.c
 8 | 	$(AFL_PATH)/afl-clang-lto -o $@ -O3 $<
 9 | 	
10 | throughput: throughput.c output/generator.c
11 | 	clang -g -Ofast -march=native -fomit-frame-pointer -fno-stack-protector -o $@ $^ -Ioutput
12 | 


--------------------------------------------------------------------------------
/test-data/benchmarks/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmarks
 2 | 
 3 | ## Execution speed
 4 | - Baseline: 12k exec/s
 5 | - Peacock: ~8.6k exec/s (29% overhead)
 6 | - Original Gramatron: ~8.5k exec/s (30% overhead)
 7 | - LibAFL Gramatron: N/A because GramatronInput does not implement HasTargetBytes, so it cannot be used with ForkServerExecutor
 8 | 
 9 | ## Raw throughput
10 | Time to 1 GiB:
11 | - Peacock: secs=4 nsecs=763573725 => ~205 MiB/s
12 | - Original Gramatron: secs=17 nsecs=340090343 => ~60 MiB/s
13 | - LibAFL Gramatron: secs=26 nsecs=26327460838 => ~40 MiB/s
14 | 


--------------------------------------------------------------------------------
/test-data/benchmarks/baseline.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <sys/wait.h>
 4 | #include <unistd.h>
 5 | #include <time.h>
 6 | 
 7 | #define TRIALS 50000
 8 | 
 9 | int main (void) {
10 |     struct timespec start, end;
11 |     
12 |     clock_gettime(CLOCK_MONOTONIC, &start);
13 |     
14 |     for (int i = 0; i < TRIALS; ++i) {
15 |         pid_t child;
16 |         switch (child = fork()) {
17 |             case -1: return 1;
18 |             case 0: _Exit(0);
19 |             default: {
20 |                 if (waitpid(child, NULL, 0) == -1) {
21 |                     return 1;
22 |                 }
23 |             }
24 |         }
25 |     }
26 |     
27 |     clock_gettime(CLOCK_MONOTONIC, &end);
28 |     
29 |     time_t diff_sec = end.tv_sec - start.tv_sec;
30 |     
31 |     printf("exec/s: %.02lf\n", (double)TRIALS / (double)diff_sec);
32 | }
33 | 


--------------------------------------------------------------------------------
/test-data/benchmarks/gramatron-patch:
--------------------------------------------------------------------------------
 1 | diff --git a/src/gramfuzz-mutator/test.c b/src/gramfuzz-mutator/test.c
 2 | index de00f9a..ed1fb7b 100644
 3 | --- a/src/gramfuzz-mutator/test.c
 4 | +++ b/src/gramfuzz-mutator/test.c
 5 | @@ -1,5 +1,6 @@
 6 |  /* This is the testing module for Gramatron
 7 |   */
 8 | +#include <time.h>
 9 |  #include "afl-fuzz.h"
10 |  #define NUMINPUTS 50 
11 |  
12 | @@ -99,6 +100,34 @@ void SanityCheck(char *automaton_path) {
13 |      }
14 |  }
15 |  
16 | +void Benchmark(char* automaton_path) {
17 | +    state* pda = create_pda(NULL, automaton_path);
18 | +    size_t generated = 0;
19 | +    struct timespec start, end;
20 | +
21 | +    clock_gettime(CLOCK_MONOTONIC, &start);
22 | +
23 | +    while (generated < 1 * 1024 * 1024 * 1024) {
24 | +	Array* walk = gen_input(pda, NULL);
25 | +	char* input = unparse_walk(walk);
26 | +	generated += walk->inputlen;
27 | +	free(walk->start);
28 | +	free(walk);
29 | +	free(input);
30 | +    }
31 | +
32 | +    clock_gettime(CLOCK_MONOTONIC, &end);
33 | +
34 | +    time_t secs = end.tv_sec - start.tv_sec;
35 | +    long  nsecs = end.tv_nsec - start.tv_nsec;
36 | +    
37 | +    if (nsecs < 0) {
38 | +        secs -= 1;
39 | +        nsecs += 1000000000;
40 | +    }
41 | +    
42 | +    printf("secs=%lu nsecs=%ld\n", secs, nsecs);
43 | +}
44 |  
45 |  int main(int argc, char*argv[]) {
46 |      char *mode;
47 | @@ -121,6 +150,9 @@ int main(int argc, char*argv[]) {
48 |      if (! strcmp(mode, "SanityCheck")) {
49 |          SanityCheck(automaton_path);
50 |      }
51 | +    else if (! strcmp(mode, "Benchmark")) {
52 | +    	Benchmark(automaton_path);
53 | +    }	
54 |      else {
55 |          printf("\nUnrecognized mode");
56 |          return -1;
57 | 


--------------------------------------------------------------------------------
/test-data/benchmarks/grammar.json:
--------------------------------------------------------------------------------
   1 | {
   2 |     "ARGS": [
   3 |         "VAR",
   4 |         "VAR ',' ARGS",
   5 |         "' '"
   6 |     ],
   7 |     "IDENTIFIER": [
   8 |         "'abcdef0123456789ABCDEF'",
   9 |         "'abcdefghijklmnopqrstuvwxyz'",
  10 |         "'abort'",
  11 |         "'abs'",
  12 |         "'accept'",
  13 |         "'acos'",
  14 |         "'acosh'",
  15 |         "'address'",
  16 |         "'alias'",
  17 |         "'alias_method'",
  18 |         "'allocation'",
  19 |         "'all_symbols'",
  20 |         "'ancestors'",
  21 |         "'and'",
  22 |         "'anum'",
  23 |         "'append'",
  24 |         "'append_features'",
  25 |         "'Apr'",
  26 |         "'aref_args'",
  27 |         "'arg'",
  28 |         "'arg0'",
  29 |         "'arg1'",
  30 |         "'arg2'",
  31 |         "'arg_rhs'",
  32 |         "'args'",
  33 |         "'argument'",
  34 |         "'ArgumentError'",
  35 |         "'arguments'",
  36 |         "'argv'",
  37 |         "'ARGV'",
  38 |         "'arity'",
  39 |         "'array'",
  40 |         "'Array'",
  41 |         "'ary'",
  42 |         "'__ary_cmp'",
  43 |         "'ary_concat'",
  44 |         "'__ary_eq'",
  45 |         "'ary_F'",
  46 |         "'__ary_index'",
  47 |         "'ary_replace'",
  48 |         "'ary_T'",
  49 |         "'asctime'",
  50 |         "'asin'",
  51 |         "'asinh'",
  52 |         "'__assert_fail'",
  53 |         "'assignment'",
  54 |         "'assoc'",
  55 |         "'assoc_list'",
  56 |         "'assocs'",
  57 |         "'assumed'",
  58 |         "'at'",
  59 |         "'atan'",
  60 |         "'atan2'",
  61 |         "'atanh'",
  62 |         "'__attached__'",
  63 |         "'attr'",
  64 |         "'attr_accessor'",
  65 |         "'attr_reader'",
  66 |         "'attrsym'",
  67 |         "'attr_writer'",
  68 |         "'available'",
  69 |         "'backref'",
  70 |         "'backtrace'",
  71 |         "'Backtrace'",
  72 |         "'BasicObject'",
  73 |         "'basic_symbol'",
  74 |         "'beg'",
  75 |         "'begin'",
  76 |         "'BEGIN'",
  77 |         "'big'",
  78 |         "'BIT'",
  79 |         "'blkarg_mark'",
  80 |         "'block'",
  81 |         "'block_arg'",
  82 |         "'block_call'",
  83 |         "'block_command'",
  84 |         "'block_param'",
  85 |         "'block_param_def'",
  86 |         "'BMATZ0000IREP'",
  87 |         "'body'",
  88 |         "'bodystmt'",
  89 |         "'boundary'",
  90 |         "'brace_block'",
  91 |         "'break'",
  92 |         "'bsearch'",
  93 |         "'bsearch_index'",
  94 |         "'buf'",
  95 |         "'bvar'",
  96 |         "'bv_decls'",
  97 |         "'byte'",
  98 |         "'bytes'",
  99 |         "'bytesize'",
 100 |         "'byteslice'",
 101 |         "'call'",
 102 |         "'call_args'",
 103 |         "'caller'",
 104 |         "'call_op'",
 105 |         "'call_op2'",
 106 |         "'capitalize'",
 107 |         "'case'",
 108 |         "'case_body'",
 109 |         "'casecmp'",
 110 |         "'__case_eqq'",
 111 |         "'cases'",
 112 |         "'cbrt'",
 113 |         "'cdr'",
 114 |         "'ceil'",
 115 |         "'change_gen_gc_mode'",
 116 |         "'character'",
 117 |         "'chars'",
 118 |         "'chomp'",
 119 |         "'chop'",
 120 |         "'chr'",
 121 |         "'clamp'",
 122 |         "'Class'",
 123 |         "'class_eval'",
 124 |         "'__classname__'",
 125 |         "'class_variable_get'",
 126 |         "'class_variables'",
 127 |         "'class_variable_set'",
 128 |         "'clause'",
 129 |         "'clear_all_old'",
 130 |         "'clone'",
 131 |         "'closure'",
 132 |         "'cLVAR'",
 133 |         "'cmd_brace_block'",
 134 |         "'cmp'",
 135 |         "'cname'",
 136 |         "'codegen'",
 137 |         "'codepoints'",
 138 |         "'collect'",
 139 |         "'collect_concat'",
 140 |         "'color'",
 141 |         "'column_count'",
 142 |         "'column_index'",
 143 |         "'combination'",
 144 |         "'comma'",
 145 |         "'command'",
 146 |         "'command_args'",
 147 |         "'command_asgn'",
 148 |         "'command_call'",
 149 |         "'command_rhs'",
 150 |         "'compact'",
 151 |         "'Comparable'",
 152 |         "'compile'",
 153 |         "'compstmt'",
 154 |         "'concat'",
 155 |         "'constant'",
 156 |         "'CONSTANT'",
 157 |         "'constants'",
 158 |         "'const_get'",
 159 |         "'const_missing'",
 160 |         "'const_set'",
 161 |         "'cont'",
 162 |         "'context'",
 163 |         "'copyright'",
 164 |         "'corrupted'",
 165 |         "'cos'",
 166 |         "'cosh'",
 167 |         "'count'",
 168 |         "'count_objects'",
 169 |         "'cpath'",
 170 |         "'ctime'",
 171 |         "'__ctype_b_loc'",
 172 |         "'curr'",
 173 |         "'current'",
 174 |         "'curry'",
 175 |         "'cycle'",
 176 |         "'Data'",
 177 |         "'day'",
 178 |         "'debug_info'",
 179 |         "'Dec'",
 180 |         "'deep'",
 181 |         "'def'",
 182 |         "'default'",
 183 |         "'DEFAULT'",
 184 |         "'default_proc'",
 185 |         "'defined'",
 186 |         "'define_method'",
 187 |         "'define_singleton_method'",
 188 |         "'__delete'",
 189 |         "'delete'",
 190 |         "'delete_at'",
 191 |         "'delete_if'",
 192 |         "'delete_prefix'",
 193 |         "'delete_suffix'",
 194 |         "'Deleting'",
 195 |         "'depth'",
 196 |         "'detect'",
 197 |         "'detected'",
 198 |         "'developers'",
 199 |         "'differs'",
 200 |         "'digit'",
 201 |         "'digits'",
 202 |         "'disable'",
 203 |         "'disabled'",
 204 |         "'discarding'",
 205 |         "'div'",
 206 |         "'divmod'",
 207 |         "'do'",
 208 |         "'do_block'",
 209 |         "'DomainError'",
 210 |         "'dot'",
 211 |         "'dot_or_colon'",
 212 |         "'downcase'",
 213 |         "'downto'",
 214 |         "'drop'",
 215 |         "'dropped'",
 216 |         "'dropping'",
 217 |         "'drop_while'",
 218 |         "'dump'",
 219 |         "'dup'",
 220 |         "'each'",
 221 |         "'each_byte'",
 222 |         "'each_char'",
 223 |         "'each_codepoint'",
 224 |         "'each_cons'",
 225 |         "'each_index'",
 226 |         "'each_key'",
 227 |         "'each_line'",
 228 |         "'each_object'",
 229 |         "'each_pair'",
 230 |         "'each_slice'",
 231 |         "'each_value'",
 232 |         "'each_with_index'",
 233 |         "'each_with_object'",
 234 |         "'ecall'",
 235 |         "'elem'",
 236 |         "'else'",
 237 |         "'elsif'",
 238 |         "'en'",
 239 |         "'enable'",
 240 |         "'__ENCODING__'",
 241 |         "'end'",
 242 |         "'__END__'",
 243 |         "'END'",
 244 |         "'ensure'",
 245 |         "'entries'",
 246 |         "'Enumerable'",
 247 |         "'enumerator'",
 248 |         "'Enumerator'",
 249 |         "'enumerator_block_call'",
 250 |         "'enum_for'",
 251 |         "'enums'",
 252 |         "'env'",
 253 |         "'erf'",
 254 |         "'erfc'",
 255 |         "'__errno_location'",
 256 |         "'error'",
 257 |         "'escape'",
 258 |         "'ETIR'",
 259 |         "'ETIR0004Ci'",
 260 |         "'exception'",
 261 |         "'Exception'",
 262 |         "'exc_list'",
 263 |         "'exc_var'",
 264 |         "'exhausted'",
 265 |         "'exp'",
 266 |         "'expected'",
 267 |         "'expr'",
 268 |         "'expression'",
 269 |         "'expr_value'",
 270 |         "'extend'",
 271 |         "'extended'",
 272 |         "'extend_object'",
 273 |         "'fail'",
 274 |         "'failed'",
 275 |         "'failure'",
 276 |         "'false'",
 277 |         "'FalseClass'",
 278 |         "'f_arg'",
 279 |         "'f_arg_item'",
 280 |         "'f_arglist'",
 281 |         "'f_args'",
 282 |         "'f_bad_arg'",
 283 |         "'f_block_arg'",
 284 |         "'f_block_opt'",
 285 |         "'f_block_optarg'",
 286 |         "'fclose'",
 287 |         "'Feb'",
 288 |         "'feed'",
 289 |         "'feedvalue'",
 290 |         "'feof'",
 291 |         "'fetch'",
 292 |         "'fetch_values'",
 293 |         "'fflush'",
 294 |         "'fgetc'",
 295 |         "'fib'",
 296 |         "'fiber'",
 297 |         "'Fiber'",
 298 |         "'fiber_check'",
 299 |         "'FiberError'",
 300 |         "'field'",
 301 |         "'file'",
 302 |         "'File'",
 303 |         "'__FILE__'",
 304 |         "'filename'",
 305 |         "'filenames_len'",
 306 |         "'fill'",
 307 |         "'final_marking_phase'",
 308 |         "'find'",
 309 |         "'find_all'",
 310 |         "'find_index'",
 311 |         "'first'",
 312 |         "'fish'",
 313 |         "'Fixnum'",
 314 |         "'flag'",
 315 |         "'f_larglist'",
 316 |         "'flat_map'",
 317 |         "'flatten'",
 318 |         "'Float'",
 319 |         "'FloatDomainError'",
 320 |         "'floor'",
 321 |         "'f_marg'",
 322 |         "'f_marg_list'",
 323 |         "'f_margs'",
 324 |         "'fmod'",
 325 |         "'fn'",
 326 |         "'Fn'",
 327 |         "'fname'",
 328 |         "'f_norm_arg'",
 329 |         "'fopen'",
 330 |         "'f_opt'",
 331 |         "'f_optarg'",
 332 |         "'f_opt_asgn'",
 333 |         "'for'",
 334 |         "'force'",
 335 |         "'format'",
 336 |         "'for_var'",
 337 |         "'found'",
 338 |         "'fprintf'",
 339 |         "'fputc'",
 340 |         "'fread'",
 341 |         "'free'",
 342 |         "'FREE'",
 343 |         "'freeze'",
 344 |         "'f_rest_arg'",
 345 |         "'frexp'",
 346 |         "'Fri'",
 347 |         "'FrozenError'",
 348 |         "'FsC'",
 349 |         "'fsym'",
 350 |         "'fwrite'",
 351 |         "'games'",
 352 |         "'GB'",
 353 |         "'GC'",
 354 |         "'gc_mark_children'",
 355 |         "'_gc_root_'",
 356 |         "'generational_mode'",
 357 |         "'Generator'",
 358 |         "'getbyte'",
 359 |         "'get_file'",
 360 |         "'getgm'",
 361 |         "'getlocal'",
 362 |         "'gettimeofday'",
 363 |         "'getutc'",
 364 |         "'given'",
 365 |         "'given_args'",
 366 |         "'global_variables'",
 367 |         "'__gmon_start__'",
 368 |         "'gmtime'",
 369 |         "'gmtime_r'",
 370 |         "'gn'",
 371 |         "'gnu'",
 372 |         "'GNU'",
 373 |         "'go'",
 374 |         "'grep'",
 375 |         "'group_by'",
 376 |         "'gsub'",
 377 |         "'h0'",
 378 |         "'h2'",
 379 |         "'H3'",
 380 |         "'h4'",
 381 |         "'h5'",
 382 |         "'H5'",
 383 |         "'h6'",
 384 |         "'H6'",
 385 |         "'h7'",
 386 |         "'h8'",
 387 |         "'hA'",
 388 |         "'hash'",
 389 |         "'Hash'",
 390 |         "'head'",
 391 |         "'heredoc'",
 392 |         "'heredoc_bodies'",
 393 |         "'heredoc_body'",
 394 |         "'heredoc_string_interp'",
 395 |         "'heredoc_string_rep'",
 396 |         "'heredoc_treat_nextline'",
 397 |         "'hex'",
 398 |         "'high'",
 399 |         "'hour'",
 400 |         "'hypot'",
 401 |         "'i2'",
 402 |         "'iClass'",
 403 |         "'__id__'",
 404 |         "'id2name'",
 405 |         "'identifier'",
 406 |         "'idx'",
 407 |         "'idx2'",
 408 |         "'if'",
 409 |         "'ifnone'",
 410 |         "'if_tail'",
 411 |         "'implemented'",
 412 |         "'in'",
 413 |         "'include'",
 414 |         "'included'",
 415 |         "'included_modules'",
 416 |         "'incremental_gc'",
 417 |         "'index'",
 418 |         "'IndexError'",
 419 |         "'inf'",
 420 |         "'Inf'",
 421 |         "'INF'",
 422 |         "'Infinity'",
 423 |         "'INFINITY'",
 424 |         "'inherited'",
 425 |         "'initialize'",
 426 |         "'initialize_copy'",
 427 |         "'inject'",
 428 |         "'in_lower_half'",
 429 |         "'input'",
 430 |         "'insert'",
 431 |         "'_inspect'",
 432 |         "'inspect'",
 433 |         "'instance_eval'",
 434 |         "'instance_exec'",
 435 |         "'instance_methods'",
 436 |         "'instance_variable_get'",
 437 |         "'instance_variables'",
 438 |         "'instance_variable_set'",
 439 |         "'int'",
 440 |         "'integer'",
 441 |         "'Integer'",
 442 |         "'Integral'",
 443 |         "'intern'",
 444 |         "'interval_ratio'",
 445 |         "'invert'",
 446 |         "'io'",
 447 |         "'Io'",
 448 |         "'_IO_putc'",
 449 |         "'ip'",
 450 |         "'Ip'",
 451 |         "'irep'",
 452 |         "'IREP'",
 453 |         "'isz'",
 454 |         "'iterate'",
 455 |         "'_ITM_deregisterTMCloneTable'",
 456 |         "'_ITM_registerTMCloneTable'",
 457 |         "'itself'",
 458 |         "'Jan'",
 459 |         "'join'",
 460 |         "'_Jv_RegisterClasses'",
 461 |         "'keep_if'",
 462 |         "'Kernel'",
 463 |         "'key'",
 464 |         "'KeyError'",
 465 |         "'keys'",
 466 |         "'keyword_alias'",
 467 |         "'keyword_and'",
 468 |         "'keyword_begin'",
 469 |         "'keyword_BEGIN'",
 470 |         "'keyword_break'",
 471 |         "'keyword_case'",
 472 |         "'keyword_class'",
 473 |         "'keyword_def'",
 474 |         "'keyword_do'",
 475 |         "'keyword_do_block'",
 476 |         "'keyword_do_cond'",
 477 |         "'keyword_do_LAMBDA'",
 478 |         "'keyword_else'",
 479 |         "'keyword_elsif'",
 480 |         "'keyword__ENCODING__'",
 481 |         "'keyword_end'",
 482 |         "'keyword_END'",
 483 |         "'keyword_ensure'",
 484 |         "'keyword_false'",
 485 |         "'keyword__FILE__'",
 486 |         "'keyword_for'",
 487 |         "'keyword_if'",
 488 |         "'keyword_in'",
 489 |         "'keyword__LINE__'",
 490 |         "'keyword_module'",
 491 |         "'keyword_next'",
 492 |         "'keyword_nil'",
 493 |         "'keyword_not'",
 494 |         "'keyword_or'",
 495 |         "'keyword_redo'",
 496 |         "'keyword_rescue'",
 497 |         "'keyword_retry'",
 498 |         "'keyword_return'",
 499 |         "'keyword_self'",
 500 |         "'keyword_super'",
 501 |         "'keyword_then'",
 502 |         "'keyword_true'",
 503 |         "'keyword_undef'",
 504 |         "'keyword_unless'",
 505 |         "'keyword_until'",
 506 |         "'keyword_when'",
 507 |         "'keyword_while'",
 508 |         "'keyword_yield'",
 509 |         "'kh_del_ht'",
 510 |         "'kh_del_iv'",
 511 |         "'kh_del_mt'",
 512 |         "'kh_del_n2s'",
 513 |         "'kh_del_st'",
 514 |         "'KLVAR'",
 515 |         "'lambda'",
 516 |         "'lambda_body'",
 517 |         "'last'",
 518 |         "'lazy'",
 519 |         "'Lazy'",
 520 |         "'LC'",
 521 |         "'ld'",
 522 |         "'LD'",
 523 |         "'ldexp'",
 524 |         "'left'",
 525 |         "'len'",
 526 |         "'length'",
 527 |         "'level'",
 528 |         "'lfD'",
 529 |         "'lhs'",
 530 |         "'__libc_start_main'",
 531 |         "'LII'",
 532 |         "'lIJ'",
 533 |         "'lim'",
 534 |         "'line'",
 535 |         "'__LINE__'",
 536 |         "'LINE'",
 537 |         "'lines'",
 538 |         "'literal'",
 539 |         "'literals'",
 540 |         "'live_after_mark'",
 541 |         "'ljust'",
 542 |         "'ln'",
 543 |         "'Ln'",
 544 |         "'lo'",
 545 |         "'local'",
 546 |         "'LOCAL'",
 547 |         "'LocalJumpError'",
 548 |         "'localtime'",
 549 |         "'localtime_r'",
 550 |         "'local_variables'",
 551 |         "'log'",
 552 |         "'log10'",
 553 |         "'log2'",
 554 |         "'long'",
 555 |         "'longjmp'",
 556 |         "'lookahead'",
 557 |         "'loop'",
 558 |         "'low'",
 559 |         "'lround'",
 560 |         "'LS'",
 561 |         "'lstrip'",
 562 |         "'LVAR'",
 563 |         "'machine'",
 564 |         "'main'",
 565 |         "'make_curry'",
 566 |         "'map'",
 567 |         "'match'",
 568 |         "'matched'",
 569 |         "'Math'",
 570 |         "'max'",
 571 |         "'max_by'",
 572 |         "'max_cmp'",
 573 |         "'May'",
 574 |         "'mday'",
 575 |         "'member'",
 576 |         "'__members__'",
 577 |         "'members'",
 578 |         "'memchr'",
 579 |         "'memcmp'",
 580 |         "'memcpy'",
 581 |         "'memmove'",
 582 |         "'memory'",
 583 |         "'memset'",
 584 |         "'merge'",
 585 |         "'mesg'",
 586 |         "'message'",
 587 |         "'meth'",
 588 |         "'__method__'",
 589 |         "'method'",
 590 |         "'method_call'",
 591 |         "'method_missing'",
 592 |         "'method_removed'",
 593 |         "'methods'",
 594 |         "'mid'",
 595 |         "'min'",
 596 |         "'min_by'",
 597 |         "'min_cmp'",
 598 |         "'minmax'",
 599 |         "'minmax_by'",
 600 |         "'mktime'",
 601 |         "'mlhs_basic'",
 602 |         "'mlhs_inner'",
 603 |         "'mlhs_item'",
 604 |         "'mlhs_list'",
 605 |         "'mlhs_node'",
 606 |         "'mlhs_post'",
 607 |         "'mode'",
 608 |         "'modified'",
 609 |         "'modifier_if'",
 610 |         "'modifier_rescue'",
 611 |         "'modifier_unless'",
 612 |         "'modifier_until'",
 613 |         "'modifier_while'",
 614 |         "'module'",
 615 |         "'Module'",
 616 |         "'module_eval'",
 617 |         "'module_function'",
 618 |         "'modules'",
 619 |         "'mon'",
 620 |         "'Mon'",
 621 |         "'month'",
 622 |         "'mrb_ary_delete_at'",
 623 |         "'mrb_ary_new_from_values'",
 624 |         "'mrb_ary_plus'",
 625 |         "'mrb_ary_pop'",
 626 |         "'mrb_ary_push'",
 627 |         "'mrb_ary_push_m'",
 628 |         "'mrb_ary_resize'",
 629 |         "'mrb_ary_reverse'",
 630 |         "'mrb_ary_set'",
 631 |         "'mrb_ary_shift'",
 632 |         "'mrb_ary_splice'",
 633 |         "'mrb_ary_times'",
 634 |         "'mrb_ary_unshift'",
 635 |         "'mrb_ary_unshift_m'",
 636 |         "'mrb_assoc_new'",
 637 |         "'mrb_data_init'",
 638 |         "'mrb_debug_get_line'",
 639 |         "'mrb_debug_info_alloc'",
 640 |         "'mrb_debug_info_append_file'",
 641 |         "'mrb_debug_info_free'",
 642 |         "'mrb_field_write_barrier'",
 643 |         "'mrb_gc_mark'",
 644 |         "'MRB_GC_STATE_ROOT'",
 645 |         "'MRB_GC_STATE_SWEEP'",
 646 |         "'mrb_gc_unregister'",
 647 |         "'mrb_i_mt_state'",
 648 |         "'mrb_incremental_gc'",
 649 |         "'mrb_malloc'",
 650 |         "'mrb_mod_s_nesting'",
 651 |         "'mrb_obj_value'",
 652 |         "'mrb_random_init'",
 653 |         "'mrb_random_srand'",
 654 |         "'mrb_realloc'",
 655 |         "'mrb_str_format'",
 656 |         "'MRB_TT_DATA'",
 657 |         "'MRB_TT_FIBER'",
 658 |         "'MRB_TT_FREE'",
 659 |         "'mrb_vm_const_get'",
 660 |         "'mrb_vm_exec'",
 661 |         "'mrb_write_barrier'",
 662 |         "'mrhs'",
 663 |         "'mruby'",
 664 |         "'MRUBY_COPYRIGHT'",
 665 |         "'MRUBY_DESCRIPTION'",
 666 |         "'MRUBY_RELEASE_DATE'",
 667 |         "'MRUBY_RELEASE_NO'",
 668 |         "'MRUBY_VERSION'",
 669 |         "'name'",
 670 |         "'named'",
 671 |         "'NameError'",
 672 |         "'names'",
 673 |         "'nan'",
 674 |         "'NaN'",
 675 |         "'NAN'",
 676 |         "'nesting'",
 677 |         "'new'",
 678 |         "'new_args'",
 679 |         "'new_key'",
 680 |         "'new_msym'",
 681 |         "'next'",
 682 |         "'next_values'",
 683 |         "'nil'",
 684 |         "'NilClass'",
 685 |         "'nl'",
 686 |         "'nlocals'",
 687 |         "'nLVAR'",
 688 |         "'nMATZ0000IREP'",
 689 |         "'NODE_DREGX'",
 690 |         "'NODE_DSTR'",
 691 |         "'NODE_DXSTR'",
 692 |         "'NODE_FALSE'",
 693 |         "'NODE_NEGATE'",
 694 |         "'NODE_NIL'",
 695 |         "'NODE_REDO'",
 696 |         "'NODE_RETRY'",
 697 |         "'NODE_SELF'",
 698 |         "'NODE_TRUE'",
 699 |         "'NODE_UNDEF'",
 700 |         "'NODE_ZSUPER'",
 701 |         "'NoMemoryError'",
 702 |         "'NoMethodError'",
 703 |         "'none'",
 704 |         "'NONE'",
 705 |         "'norm'",
 706 |         "'not'",
 707 |         "'NotImplementedError'",
 708 |         "'Nov'",
 709 |         "'now'",
 710 |         "'Np'",
 711 |         "'nregs'",
 712 |         "'num'",
 713 |         "'number'",
 714 |         "'numbered'",
 715 |         "'numeric'",
 716 |         "'Numeric'",
 717 |         "'obj'",
 718 |         "'object'",
 719 |         "'Object'",
 720 |         "'object_id'",
 721 |         "'ObjectSpace'",
 722 |         "'oct'",
 723 |         "'Oct'",
 724 |         "'offset'",
 725 |         "'on'",
 726 |         "'On'",
 727 |         "'only'",
 728 |         "'Oo'",
 729 |         "'op'",
 730 |         "'Op'",
 731 |         "'operation'",
 732 |         "'operation2'",
 733 |         "'operation3'",
 734 |         "'OP_NOP'",
 735 |         "'OP_STOP'",
 736 |         "'opt_block_arg'",
 737 |         "'opt_block_param'",
 738 |         "'opt_bv_decl'",
 739 |         "'opt_call_args'",
 740 |         "'opt_else'",
 741 |         "'opt_ensure'",
 742 |         "'opt_f_block_arg'",
 743 |         "'opt_nl'",
 744 |         "'opt_paren_args'",
 745 |         "'opt_rescue'",
 746 |         "'opt_terms'",
 747 |         "'or'",
 748 |         "'ord'",
 749 |         "'orig'",
 750 |         "'other'",
 751 |         "'__outer__'",
 752 |         "'P9o'",
 753 |         "'padding'",
 754 |         "'pad_repetitions'",
 755 |         "'padstr'",
 756 |         "'parameters'",
 757 |         "'paren_args'",
 758 |         "'partition'",
 759 |         "'pattern'",
 760 |         "'PC'",
 761 |         "'peek'",
 762 |         "'peek_values'",
 763 |         "'permutation'",
 764 |         "'plen'",
 765 |         "'point'",
 766 |         "'pop'",
 767 |         "'popping'",
 768 |         "'pos'",
 769 |         "'posnum'",
 770 |         "'post'",
 771 |         "'pow'",
 772 |         "'pp'",
 773 |         "'pproc'",
 774 |         "'pre'",
 775 |         "'precision'",
 776 |         "'prefix'",
 777 |         "'prepend'",
 778 |         "'prepended'",
 779 |         "'prepend_features'",
 780 |         "'primary'",
 781 |         "'primary_value'",
 782 |         "'print'",
 783 |         "'printf'",
 784 |         "'__printstr__'",
 785 |         "'private'",
 786 |         "'private_methods'",
 787 |         "'prl'",
 788 |         "'proc'",
 789 |         "'Proc'",
 790 |         "'program'",
 791 |         "'protected'",
 792 |         "'protected_methods'",
 793 |         "'ps'",
 794 |         "'public'",
 795 |         "'public_methods'",
 796 |         "'push'",
 797 |         "'putchar'",
 798 |         "'puts'",
 799 |         "'quo'",
 800 |         "'raise'",
 801 |         "'rand'",
 802 |         "'Random'",
 803 |         "'range'",
 804 |         "'Range'",
 805 |         "'RangeError'",
 806 |         "'rassoc'",
 807 |         "'rb'",
 808 |         "'RB'",
 809 |         "'rbracket'",
 810 |         "'RC'",
 811 |         "'read_debug_record'",
 812 |         "'readint_mrb_int'",
 813 |         "'read_irep_record_1'",
 814 |         "'read_lv_record'",
 815 |         "'read_section_debug'",
 816 |         "'read_section_lv'",
 817 |         "'realloc'",
 818 |         "'redo'",
 819 |         "'reduce'",
 820 |         "'reg'",
 821 |         "'regexp'",
 822 |         "'Regexp'",
 823 |         "'RegexpError'",
 824 |         "'rehash'",
 825 |         "'reject'",
 826 |         "'remove_class_variable'",
 827 |         "'remove_const'",
 828 |         "'remove_instance_variable'",
 829 |         "'remove_method'",
 830 |         "'replace'",
 831 |         "'req'",
 832 |         "'required'",
 833 |         "'res'",
 834 |         "'rescue'",
 835 |         "'resize_capa'",
 836 |         "'rest'",
 837 |         "'restarg_mark'",
 838 |         "'result'",
 839 |         "'resume'",
 840 |         "'reswords'",
 841 |         "'ret'",
 842 |         "'retry'",
 843 |         "'return'",
 844 |         "'reverse'",
 845 |         "'reverse_each'",
 846 |         "'rewind'",
 847 |         "'right'",
 848 |         "'rindex'",
 849 |         "'rjust'",
 850 |         "'rotate'",
 851 |         "'round'",
 852 |         "'row'",
 853 |         "'rparen'",
 854 |         "'rpartition'",
 855 |         "'rs_len'",
 856 |         "'rstrip'",
 857 |         "'RUBY_ENGINE'",
 858 |         "'RUBY_ENGINE_VERSION'",
 859 |         "'RUBY_VERSION'",
 860 |         "'RuntimeError'",
 861 |         "'sample'",
 862 |         "'Sat'",
 863 |         "'satisfied'",
 864 |         "'scan'",
 865 |         "'SClass'",
 866 |         "'scope'",
 867 |         "'scope_new'",
 868 |         "'script'",
 869 |         "'ScriptError'",
 870 |         "'sec'",
 871 |         "'select'",
 872 |         "'self'",
 873 |         "'self_arity'",
 874 |         "'__send__'",
 875 |         "'send'",
 876 |         "'sep'",
 877 |         "'Sep'",
 878 |         "'sequence'",
 879 |         "'set'",
 880 |         "'set_backtrace'",
 881 |         "'setbyte'",
 882 |         "'_setjmp'",
 883 |         "'shift'",
 884 |         "'shuffle'",
 885 |         "'sin'",
 886 |         "'singleton'",
 887 |         "'singleton_class'",
 888 |         "'singleton_methods'",
 889 |         "'sinh'",
 890 |         "'size'",
 891 |         "'sl'",
 892 |         "'slice'",
 893 |         "'snprintf'",
 894 |         "'so'",
 895 |         "'So'",
 896 |         "'sort'",
 897 |         "'sort_by'",
 898 |         "'__sort_sub__'",
 899 |         "'source_location'",
 900 |         "'Sp'",
 901 |         "'spaces'",
 902 |         "'specifier'",
 903 |         "'splice'",
 904 |         "'split'",
 905 |         "'sprintf'",
 906 |         "'sqrt'",
 907 |         "'srand'",
 908 |         "'__stack_chk_fail'",
 909 |         "'StandardError'",
 910 |         "'start'",
 911 |         "'state'",
 912 |         "'stderr'",
 913 |         "'stdin'",
 914 |         "'stdout'",
 915 |         "'step'",
 916 |         "'step_ratio'",
 917 |         "'stmt'",
 918 |         "'stmts'",
 919 |         "'stop_exc'",
 920 |         "'StopIteration'",
 921 |         "'store'",
 922 |         "'str'",
 923 |         "'str2'",
 924 |         "'strchr'",
 925 |         "'strcmp'",
 926 |         "'str_each'",
 927 |         "'string'",
 928 |         "'String'",
 929 |         "'string_interp'",
 930 |         "'string_rep'",
 931 |         "'strip'",
 932 |         "'strlen'",
 933 |         "'str_make_shared'",
 934 |         "'strncmp'",
 935 |         "'strncpy'",
 936 |         "'strtoul'",
 937 |         "'struct'",
 938 |         "'Struct'",
 939 |         "'sub'",
 940 |         "'__sub_replace'",
 941 |         "'succ'",
 942 |         "'Sun'",
 943 |         "'super'",
 944 |         "'superclass'",
 945 |         "'supported'",
 946 |         "'__svalue'",
 947 |         "'SVD'",
 948 |         "'swapcase'",
 949 |         "'sym'",
 950 |         "'symbol'",
 951 |         "'Symbol'",
 952 |         "'symbols'",
 953 |         "'sym_inspect'",
 954 |         "'syntax'",
 955 |         "'SyntaxError'",
 956 |         "'_sys_fail'",
 957 |         "'SystemCallError'",
 958 |         "'SystemStackError'",
 959 |         "'TA'",
 960 |         "'tail'",
 961 |         "'take'",
 962 |         "'taken'",
 963 |         "'take_while'",
 964 |         "'tAMPER'",
 965 |         "'tan'",
 966 |         "'tANDDOT'",
 967 |         "'tANDOP'",
 968 |         "'tanh'",
 969 |         "'tap'",
 970 |         "'tAREF'",
 971 |         "'T_ARRAY'",
 972 |         "'tASET'",
 973 |         "'tASSOC'",
 974 |         "'TB'",
 975 |         "'tBACK_REF'",
 976 |         "'TbG'",
 977 |         "'T_CLASS'",
 978 |         "'tCMP'",
 979 |         "'tCOLON2'",
 980 |         "'tCOLON3'",
 981 |         "'tCONSTANT'",
 982 |         "'T_CPTR'",
 983 |         "'tCVAR'",
 984 |         "'T_DATA'",
 985 |         "'tDOT2'",
 986 |         "'tDOT3'",
 987 |         "'TeD'",
 988 |         "'T_ENV'",
 989 |         "'tEQ'",
 990 |         "'tEQQ'",
 991 |         "'term'",
 992 |         "'terms'",
 993 |         "'T_EXCEPTION'",
 994 |         "'T_FALSE'",
 995 |         "'T_FIBER'",
 996 |         "'tFID'",
 997 |         "'T_FILE'",
 998 |         "'T_FIXNUM'",
 999 |         "'tFLOAT'",
1000 |         "'T_FLOAT'",
1001 |         "'T_FREE'",
1002 |         "'tGEQ'",
1003 |         "'tGVAR'",
1004 |         "'T_HASH'",
1005 |         "'tHD_LITERAL_DELIM'",
1006 |         "'tHD_STRING_MID'",
1007 |         "'tHD_STRING_PART'",
1008 |         "'then'",
1009 |         "'tHEREDOC_BEG'",
1010 |         "'tHEREDOC_END'",
1011 |         "'this'",
1012 |         "'T_ICLASS'",
1013 |         "'tIDENTIFIER'",
1014 |         "'time'",
1015 |         "'Time'",
1016 |         "'times'",
1017 |         "'tINTEGER'",
1018 |         "'tIVAR'",
1019 |         "'tLABEL'",
1020 |         "'tLABEL_END'",
1021 |         "'tLAMBDA'",
1022 |         "'tLAMBEG'",
1023 |         "'tLAST_TOKEN'",
1024 |         "'tLBRACE'",
1025 |         "'tLBRACE_ARG'",
1026 |         "'tLBRACK'",
1027 |         "'tLEQ'",
1028 |         "'tLITERAL_DELIM'",
1029 |         "'tLOWEST'",
1030 |         "'tLPAREN'",
1031 |         "'tLPAREN_ARG'",
1032 |         "'tLSHFT'",
1033 |         "'tMATCH'",
1034 |         "'T_MODULE'",
1035 |         "'tmp'",
1036 |         "'tNEQ'",
1037 |         "'tNMATCH'",
1038 |         "'tNTH_REF'",
1039 |         "'to_ary'",
1040 |         "'T_OBJECT'",
1041 |         "'to_enum'",
1042 |         "'to_h'",
1043 |         "'to_hash'",
1044 |         "'to_i'",
1045 |         "'to_int'",
1046 |         "'TOJ'",
1047 |         "'TOLERANCE'",
1048 |         "'tolower'",
1049 |         "'tOP_ASGN'",
1050 |         "'top_compstmt'",
1051 |         "'to_proc'",
1052 |         "'top_stmt'",
1053 |         "'top_stmts'",
1054 |         "'tOROP'",
1055 |         "'to_s'",
1056 |         "'to_str'",
1057 |         "'to_sym'",
1058 |         "'TOTAL'",
1059 |         "'toupper'",
1060 |         "'tPOW'",
1061 |         "'T_PROC'",
1062 |         "'trailer'",
1063 |         "'T_RANGE'",
1064 |         "'transfer'",
1065 |         "'transform_keys'",
1066 |         "'transform_values'",
1067 |         "'transpose'",
1068 |         "'tREGEXP'",
1069 |         "'tREGEXP_BEG'",
1070 |         "'tREGEXP_END'",
1071 |         "'tRPAREN'",
1072 |         "'tRSHFT'",
1073 |         "'true'",
1074 |         "'TrueClass'",
1075 |         "'truncate'",
1076 |         "'try_convert'",
1077 |         "'T_SCLASS'",
1078 |         "'tSTAR'",
1079 |         "'tSTRING'",
1080 |         "'T_STRING'",
1081 |         "'tSTRING_BEG'",
1082 |         "'tSTRING_DVAR'",
1083 |         "'tSTRING_MID'",
1084 |         "'tSTRING_PART'",
1085 |         "'tSYMBEG'",
1086 |         "'T_SYMBOL'",
1087 |         "'tSYMBOLS_BEG'",
1088 |         "'tt'",
1089 |         "'T_TRUE'",
1090 |         "'Tue'",
1091 |         "'tUMINUS'",
1092 |         "'tUMINUS_NUM'",
1093 |         "'T_UNDEF'",
1094 |         "'tUPLUS'",
1095 |         "'twice'",
1096 |         "'tWORDS_BEG'",
1097 |         "'tXSTRING'",
1098 |         "'tXSTRING_BEG'",
1099 |         "'type'",
1100 |         "'TypeError'",
1101 |         "'umrb_obj_value'",
1102 |         "'undef'",
1103 |         "'undefined'",
1104 |         "'undef_list'",
1105 |         "'undef_method'",
1106 |         "'uniq'",
1107 |         "'unless'",
1108 |         "'unshift'",
1109 |         "'until'",
1110 |         "'upcase'",
1111 |         "'__update'",
1112 |         "'update'",
1113 |         "'upto'",
1114 |         "'usec'",
1115 |         "'useless'",
1116 |         "'utc'",
1117 |         "'v0000'",
1118 |         "'val'",
1119 |         "'validated'",
1120 |         "'vals'",
1121 |         "'value'",
1122 |         "'values'",
1123 |         "'values_at'",
1124 |         "'variable'",
1125 |         "'var_lhs'",
1126 |         "'var_ref'",
1127 |         "'verbose'",
1128 |         "'version'",
1129 |         "'vm'",
1130 |         "'Vm'",
1131 |         "'warn'",
1132 |         "'wday'",
1133 |         "'Wed'",
1134 |         "'when'",
1135 |         "'while'",
1136 |         "'width'",
1137 |         "'with_index'",
1138 |         "'with_object'",
1139 |         "'words'",
1140 |         "'x86_64'",
1141 |         "'xstring'",
1142 |         "'yday'",
1143 |         "'year'",
1144 |         "'yield'",
1145 |         "'yielder'",
1146 |         "'Yielder'",
1147 |         "'yield_self'",
1148 |         "'zip'",
1149 |         "'zone'"
1150 |     ],
1151 |     "SP": [
1152 |         "' '"
1153 |     ],
1154 |     "ENTRYPOINT": [
1155 |         "RUBYBLOCK"
1156 |     ],
1157 |     "RUBYBLOCK": [
1158 |         "STATEMENT NEWLINE RUBYBLOCK",
1159 |         "' '"
1160 |     ],
1161 |     "NEWLINE": [
1162 |         "'\\n'"
1163 |     ],
1164 |     "STATEMENT": [
1165 |         "VAR '=' VAR '.' IDENTIFIER '(' ARGS ')'",
1166 |         "VAR '=' IDENTIFIER '.' IDENTIFIER '(' ARGS ')'",
1167 |         "VAR '=' VAL '.' IDENTIFIER '(' ARGS ')'",
1168 |         "VAR '=' VAL",
1169 |         "'return' SP VAR",
1170 |         "'yield' SP VAR",
1171 |         "'continue' SP VAR",
1172 |         "'break' SP VAR",
1173 |         "'next' SP VAR"
1174 |     ],
1175 |     "VAL": [
1176 |         "'1'",
1177 |         "'0'",
1178 |         "'0.0'",
1179 |         "'\"foo\"'",
1180 |         "'\"asdfasdf\"'",
1181 |         "'\"o\"'",
1182 |         "'nil'",
1183 |         "'true'",
1184 |         "'false'",
1185 |         "'/foo/'",
1186 |         "'[]'",
1187 |         "'[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,nil]'"
1188 |     ],
1189 |     "VAR": [
1190 |         "'a'",
1191 |         "'b'",
1192 |         "'c'",
1193 |         "'d'"
1194 |     ]
1195 | }
1196 | 


--------------------------------------------------------------------------------
/test-data/benchmarks/grammar.postcard:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z2-2z/peacock/f2d053cf0e198be03220b6d2e8be4e1fd26be86a/test-data/benchmarks/grammar.postcard


--------------------------------------------------------------------------------
/test-data/benchmarks/nop.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | 
 4 | int main (void) {
 5 | #ifdef __AFL_INIT
 6 |     __AFL_INIT();
 7 | #endif
 8 |     _Exit(0);
 9 | }
10 | 


--------------------------------------------------------------------------------
/test-data/benchmarks/patch-libafl:
--------------------------------------------------------------------------------
 1 | diff --git a/fuzzers/baby_fuzzer_gramatron/src/main.rs b/fuzzers/baby_fuzzer_gramatron/src/main.rs
 2 | index ada8c1df..6a9600cf 100644
 3 | --- a/fuzzers/baby_fuzzer_gramatron/src/main.rs
 4 | +++ b/fuzzers/baby_fuzzer_gramatron/src/main.rs
 5 | @@ -106,11 +106,10 @@ pub fn main() {
 6 |      )
 7 |      .expect("Failed to create the Executor");
 8 |  
 9 | -    let automaton = read_automaton_from_file(PathBuf::from("auto.postcard"));
10 | +    let automaton = read_automaton_from_file(PathBuf::from("grammar.postcard"));
11 |      let mut generator = GramatronGenerator::new(&automaton);
12 |  
13 |      // Use this code to profile the generator performance
14 | -    /*
15 |      use libafl::generators::Generator;
16 |      use std::collections::HashSet;
17 |      use std::collections::hash_map::DefaultHasher;
18 | @@ -122,21 +121,18 @@ pub fn main() {
19 |          s.finish()
20 |      }
21 |  
22 | -    let mut set = HashSet::new();
23 | -    let st = libafl_bolts::current_milliseconds();
24 | -    let mut b = vec![];
25 | -    let mut c = 0;
26 | -    for _ in 0..100000 {
27 | +    let mut b = Vec::with_capacity(128 * 1024 * 1024);
28 | +    let mut generated = 0_usize;
29 | +    let start = std::time::Instant::now();
30 | +    while generated < 1024 * 1024 * 1024 {
31 |          let i = generator.generate(&mut state).unwrap();
32 |          i.unparse(&mut b);
33 | -        set.insert(calculate_hash(&b));
34 | -        c += b.len();
35 | +        generated += b.len();
36 |      }
37 | -    println!("{} / {}", c, libafl_bolts::current_milliseconds() - st);
38 | -    println!("{} / 100000", set.len());
39 | +    let delta = std::time::Instant::now() - start;
40 | +    println!("secs={} nsecs={}", delta.as_secs(), delta.as_nanos());
41 |  
42 |      return;
43 | -    */
44 |  
45 |      // Generate 8 initial inputs
46 |      state
47 | 


--------------------------------------------------------------------------------
/test-data/benchmarks/throughput.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <time.h>
 4 | 
 5 | #include "generator.h"
 6 | 
 7 | #define SEQ_LEN  4096
 8 | #define BUF_SIZE (128 * 1024 * 1024)
 9 | 
10 | void bench_generation(size_t* sequence, unsigned char* output) {
11 |     struct timespec start, end;
12 |     size_t generated = 0;
13 |     
14 |     clock_gettime(CLOCK_MONOTONIC, &start);
15 |     
16 |     while (generated < 1 * 1024 * 1024 * 1024) {
17 |         size_t seq_len = mutate_sequence(sequence, 0, SEQ_LEN);
18 |         size_t out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE);
19 |         generated += out_len;
20 |     }
21 |     
22 |     clock_gettime(CLOCK_MONOTONIC, &end);
23 |     
24 |     time_t secs = end.tv_sec - start.tv_sec;
25 |     long  nsecs = end.tv_nsec - start.tv_nsec;
26 |     
27 |     if (nsecs < 0) {
28 |         secs -= 1;
29 |         nsecs += 1000000000;
30 |     }
31 |     
32 |     printf("Generation: secs=%lu nsecs=%ld\n", secs, nsecs);
33 | }
34 | 
35 | void bench_mutation(size_t* sequence, unsigned char* output) {
36 |     struct timespec start, end;
37 |     size_t generated = 0;
38 |     size_t seq_len = mutate_sequence(sequence, 0, SEQ_LEN);
39 |     
40 |     clock_gettime(CLOCK_MONOTONIC, &start);
41 |     
42 |     while (generated < 1 * 1024 * 1024 * 1024) {
43 |         seq_len = mutate_sequence(sequence, seq_len / 2, SEQ_LEN);
44 |         size_t out_len = serialize_sequence(sequence, seq_len, output, BUF_SIZE);
45 |         generated += out_len;
46 |     }
47 |     
48 |     clock_gettime(CLOCK_MONOTONIC, &end);
49 |     
50 |     time_t secs = end.tv_sec - start.tv_sec;
51 |     long  nsecs = end.tv_nsec - start.tv_nsec;
52 |     
53 |     if (nsecs < 0) {
54 |         secs -= 1;
55 |         nsecs += 1000000000;
56 |     }
57 |     
58 |     printf("Mutation: secs=%lu nsecs=%ld\n", secs, nsecs);
59 | }
60 | 
61 | int main (void) {
62 |     size_t* sequence = calloc(SEQ_LEN, sizeof(size_t));
63 |     unsigned char* output = malloc(BUF_SIZE);
64 |     bench_generation(sequence, output);
65 |     bench_mutation(sequence, output);
66 | }
67 | 


--------------------------------------------------------------------------------
/test-data/fuzz/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !*.c
3 | !.gitignore
4 | 


--------------------------------------------------------------------------------
/test-data/fuzz/main.c:
--------------------------------------------------------------------------------
 1 | // 
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | 
 6 | void print_loop (FILE* input) {
 7 |     char buf[128];
 8 |     
 9 |     while (!feof(input) && !ferror(input)) {
10 |         size_t num = fread(buf, 1, sizeof(buf), input);
11 |         
12 |         if (!num) {
13 |             break;
14 |         }
15 |         
16 |         fwrite(buf, 1, num, stdout);
17 |     }
18 |     
19 |     fprintf(stdout, "\n");
20 |     fflush(stdout);
21 | }
22 | 
23 | int main (int argc, char** argv) {
24 |     __AFL_INIT();
25 |     
26 |     FILE* input = NULL;
27 |     
28 |     if (argc == 1) {
29 |         input = stdin;
30 |     } else if (argc == 2) {
31 |         input = fopen(argv[1], "rb");
32 |     } else {
33 |         fprintf(stderr, "Invalid test invocation\n");
34 |         return 1;
35 |     }
36 |     
37 |     print_loop(input);
38 |     
39 |     return 0;
40 | }
41 | 


--------------------------------------------------------------------------------
/test-data/grammars/duplicate_rules.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "<ENTRYPOINT>": [
 3 |         ["a", "b"],
 4 |         ["ab"],
 5 |         ["b", "a"],
 6 |         ["a", "b"],
 7 |         ["ba"]
 8 |     ]
 9 | }
10 | 


--------------------------------------------------------------------------------
/test-data/grammars/gramatron.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "ARGLIST": [
  3 |         "EXPR ',' ARGLIST",
  4 |         "EXPR",
  5 |         "EXPR ',' ARGLIST",
  6 |         "EXPR"
  7 |     ],
  8 |     "ARGS": [
  9 |         "'()'",
 10 |         "'(' ARGLIST ')'",
 11 |         "'()'",
 12 |         "'(' ARGLIST ')'"
 13 |     ],
 14 |     "ARITHMETICOPERATION": [
 15 |         "EXPR '/' EXPR",
 16 |         "EXPR '*' EXPR",
 17 |         "EXPR '+' EXPR",
 18 |         "EXPR '-' EXPR",
 19 |         "EXPR '%' EXPR",
 20 |         "EXPR '**' EXPR",
 21 |         "EXPR '++'"
 22 |     ],
 23 |     "ARRAY": [
 24 |         "'[' ARRAYCONTENT ']'",
 25 |         "'[]'"
 26 |     ],
 27 |     "ARRAYCONTENT": [
 28 |         "EXPR ',' ARRAYCONTENT",
 29 |         "EXPR"
 30 |     ],
 31 |     "BOOLEAN": [
 32 |         "'true'",
 33 |         "'false'"
 34 |     ],
 35 |     "BYTEWISEOPERATION": [
 36 |         "EXPR '&' EXPR",
 37 |         "EXPR '|' EXPR"
 38 |     ],
 39 |     "COMPARISONOPERATION": [
 40 |         "EXPR '<' EXPR"
 41 |     ],
 42 |     "DECIMALDIGITS": [
 43 |         "'20'",
 44 |         "'1234'",
 45 |         "'66'",
 46 |         "'234_9'",
 47 |         "'99999999999999999999'"
 48 |     ],
 49 |     "DECIMALNUMBER": [
 50 |         "DECIMALDIGITS"
 51 |     ],
 52 |     "EXPR": [
 53 |         "'(' EXPR ')'",
 54 |         "VAR",
 55 |         "'delete' SP EXPR",
 56 |         "'new' SP IDENTIFIER ARGS",
 57 |         "LITERAL",
 58 |         "IDENTIFIER",
 59 |         "METHODCALL",
 60 |         "'(' ARITHMETICOPERATION ')'",
 61 |         "'(' COMPARISONOPERATION ')'",
 62 |         "'(' BYTEWISEOPERATION ')'",
 63 |         "'(' LOGICALOPERATION ')'"
 64 |     ],
 65 |     "IDENTIFIER": [
 66 |         "'Object'",
 67 |         "VAR",
 68 |         "'Function'",
 69 |         "'main'",
 70 |         "'opt'",
 71 |         "'Boolean'",
 72 |         "'Symbol'",
 73 |         "'JSON'",
 74 |         "'Error'",
 75 |         "'EvalError'",
 76 |         "'RangeError'",
 77 |         "'ReferenceError'",
 78 |         "'SyntaxError'",
 79 |         "'TypeError'",
 80 |         "'URIError'",
 81 |         "'this'",
 82 |         "'Number'",
 83 |         "'Math'",
 84 |         "'Date'",
 85 |         "'String'",
 86 |         "'RegExp'",
 87 |         "'Array'",
 88 |         "'Int8Array'",
 89 |         "'Uint8Array'",
 90 |         "'Uint8ClampedArray'",
 91 |         "'Int16Array'",
 92 |         "'Uint16Array'",
 93 |         "'Int32Array'",
 94 |         "'Uint32Array'",
 95 |         "'Float32Array'",
 96 |         "'Float64Array'",
 97 |         "'DataView'",
 98 |         "'ArrayBuffer'",
 99 |         "'Map'",
100 |         "'Set'",
101 |         "'WeakMap'",
102 |         "'WeakSet'",
103 |         "'Promise'",
104 |         "'AsyncFunction'",
105 |         "'asyncGenerator'",
106 |         "'Reflect'",
107 |         "'Proxy'",
108 |         "'Intl'",
109 |         "'Intl.Collator'",
110 |         "'Intl.DateTimeFormat'",
111 |         "'Intl.NumberFormat'",
112 |         "'Intl.PluralRules'",
113 |         "'WebAssembly'",
114 |         "'WebAssembly.Module'",
115 |         "'WebAssembly.Instance'",
116 |         "'WebAssembly.Memory'",
117 |         "'WebAssembly.Table'",
118 |         "'WebAssembly.CompileError'",
119 |         "'WebAssembly.LinkError'",
120 |         "'WebAssembly.RuntimeError'",
121 |         "'arguments'",
122 |         "'Infinity'",
123 |         "'NaN'",
124 |         "'undefined'",
125 |         "'null'",
126 |         "'console'",
127 |         "' '"
128 |     ],
129 |     "IDENTIFIERLIST": [
130 |         "IDENTIFIER ',' IDENTIFIERLIST",
131 |         "'(' IDENTIFIERLIST '),' IDENTIFIERLIST",
132 |         "IDENTIFIER"
133 |     ],
134 |     "JSBLOCK": [
135 |         "JSSTATEMENT",
136 |         "JSSTATEMENT JSBLOCK"
137 |     ],
138 |     "JSSTATEMENT": [
139 |         "STATEMENT NEWLINE"
140 |     ],
141 |     "LITERAL": [
142 |         "'null'",
143 |         "BOOLEAN",
144 |         "NUMBER",
145 |         "ARRAY"
146 |     ],
147 |     "LOGICALOPERATION": [
148 |         "EXPR '&&' EXPR",
149 |         "EXPR '||' EXPR"
150 |     ],
151 |     "METHODCALL": [
152 |         "OBJECT PROPERTY METHODCALL1"
153 |     ],
154 |     "METHODCALL1": [
155 |         "'.' METHOD_NAME ARGS METHODCALL1",
156 |         "' '"
157 |     ],
158 |     "METHOD_NAME": [
159 |         "IDENTIFIER",
160 |         "'print'",
161 |         "'eval'",
162 |         "'uneval'",
163 |         "'isFinite'",
164 |         "'isNaN'",
165 |         "'parseFloat'",
166 |         "'parseInt'",
167 |         "'decodeURI'",
168 |         "'decodeURIComponent'",
169 |         "'encodeURI'",
170 |         "'encodeURIComponent'",
171 |         "'escape'",
172 |         "'unescape'",
173 |         "'assign'",
174 |         "'create'",
175 |         "'defineProperty'",
176 |         "'defineProperties'",
177 |         "'entries'",
178 |         "'freeze'",
179 |         "'getOwnPropertyDescriptor'",
180 |         "'getOwnPropertyDescriptors'",
181 |         "'getOwnPropertyNames'",
182 |         "'getOwnPropertySymbols'",
183 |         "'getPrototypeOf'",
184 |         "'is'",
185 |         "'isExtensible'",
186 |         "'isFrozen'",
187 |         "'isSealed'",
188 |         "'keys'",
189 |         "'preventExtensions'",
190 |         "'seal'",
191 |         "'setPrototypeOf'",
192 |         "'values'",
193 |         "'__defineGetter__'",
194 |         "'__defineSetter__'",
195 |         "'__lookupGetter__'",
196 |         "'__lookupSetter__'",
197 |         "'hasOwnProperty'",
198 |         "'isPrototypeOf'",
199 |         "'propertyIsEnumerable'",
200 |         "'toSource'",
201 |         "'toLocaleString'",
202 |         "'toString'",
203 |         "'unwatch'",
204 |         "'valueOf'",
205 |         "'watch'",
206 |         "'apply'",
207 |         "'bind'",
208 |         "'call'",
209 |         "'isGenerator'",
210 |         "'valueOf'",
211 |         "'for'",
212 |         "'keyFor'",
213 |         "'stringify'",
214 |         "'isInteger'",
215 |         "'isSafeInteger'",
216 |         "'toInteger'",
217 |         "'toExponential'",
218 |         "'toFixed'",
219 |         "'toLocaleString'",
220 |         "'toPrecision'",
221 |         "'abs'",
222 |         "'acos'",
223 |         "'acosh'",
224 |         "'asin'",
225 |         "'asinh'",
226 |         "'atan'",
227 |         "'atanh'",
228 |         "'atan2'",
229 |         "'cbrt'",
230 |         "'ceil'",
231 |         "'clz32'",
232 |         "'cos'",
233 |         "'cosh'",
234 |         "'exp'",
235 |         "'expm1'",
236 |         "'floor'",
237 |         "'fround'",
238 |         "'hypot'",
239 |         "'imul'",
240 |         "'log'",
241 |         "'log1p'",
242 |         "'log10'",
243 |         "'log2'",
244 |         "'max'",
245 |         "'min'",
246 |         "'pow'",
247 |         "'random'",
248 |         "'round'",
249 |         "'sign'",
250 |         "'sin'",
251 |         "'sinh'",
252 |         "'sqrt'",
253 |         "'tan'",
254 |         "'tanh'",
255 |         "'trunc'",
256 |         "'now'",
257 |         "'parse'",
258 |         "'UTC'",
259 |         "'getDate'",
260 |         "'getDay'",
261 |         "'getFullYear'",
262 |         "'getHours'",
263 |         "'getMilliseconds'",
264 |         "'getMinutes'",
265 |         "'getMonth'",
266 |         "'getSeconds'",
267 |         "'getTime'",
268 |         "'getTimezoneOffset'",
269 |         "'getUTCDate'",
270 |         "'getUTCDay'",
271 |         "'getUTCFullYear'",
272 |         "'getUTCHours'",
273 |         "'getUTCMilliseconds'",
274 |         "'getUTCMinutes'",
275 |         "'getUTCMonth'",
276 |         "'getUTCSeconds'",
277 |         "'getYear'",
278 |         "'setDate'",
279 |         "'setFullYear'",
280 |         "'setHours'",
281 |         "'setMilliseconds'",
282 |         "'setMinutes'",
283 |         "'setMonth'",
284 |         "'setSeconds'",
285 |         "'setTime'",
286 |         "'setUTCDate'",
287 |         "'setUTCFullYear'",
288 |         "'setUTCHours'",
289 |         "'setUTCMilliseconds'",
290 |         "'setUTCMinutes'",
291 |         "'setUTCMonth'",
292 |         "'setUTCSeconds'",
293 |         "'setYear'",
294 |         "'toDateString'",
295 |         "'toISOString'",
296 |         "'toJSON'",
297 |         "'toGMTString'",
298 |         "'toLocaleDateString'",
299 |         "'toLocaleFormat'",
300 |         "'toLocaleString'",
301 |         "'toLocaleTimeString'",
302 |         "'toTimeString'",
303 |         "'toUTCString'",
304 |         "'indexOf'",
305 |         "'substring'",
306 |         "'charAt'",
307 |         "'strcmp'",
308 |         "'fromCharCode'",
309 |         "'fromCodePoint'",
310 |         "'raw'",
311 |         "'charCodeAt'",
312 |         "'slice'",
313 |         "'codePointAt'",
314 |         "'concat'",
315 |         "'includes'",
316 |         "'endsWith'",
317 |         "'lastIndexOf'",
318 |         "'localeCompare'",
319 |         "'match'",
320 |         "'normalize'",
321 |         "'padEnd'",
322 |         "'padStart'",
323 |         "'quote'",
324 |         "'repeat'",
325 |         "'replace'",
326 |         "'search'",
327 |         "'split'",
328 |         "'startsWith'",
329 |         "'substr'",
330 |         "'toLocaleLowerCase'",
331 |         "'toLocaleUpperCase'",
332 |         "'toLowerCase'",
333 |         "'toUpperCase'",
334 |         "'trim'",
335 |         "'trimleft'",
336 |         "'trimright'",
337 |         "'anchor'",
338 |         "'big'",
339 |         "'blink'",
340 |         "'bold'",
341 |         "'fixed'",
342 |         "'fontcolor'",
343 |         "'fontsize'",
344 |         "'italics'",
345 |         "'link'",
346 |         "'small'",
347 |         "'strike'",
348 |         "'sub'",
349 |         "'sup'",
350 |         "'compile'",
351 |         "'exec'",
352 |         "'test'",
353 |         "'from'",
354 |         "'isArray'",
355 |         "'of'",
356 |         "'copyWithin'",
357 |         "'fill'",
358 |         "'pop'",
359 |         "'push'",
360 |         "'reverse'",
361 |         "'shift'",
362 |         "'sort'",
363 |         "'splice'",
364 |         "'unshift'",
365 |         "'concat'",
366 |         "'join'",
367 |         "'every'",
368 |         "'filter'",
369 |         "'findIndex'",
370 |         "'forEach'",
371 |         "'map'",
372 |         "'reduce'",
373 |         "'reduceRight'",
374 |         "'some'",
375 |         "'move'",
376 |         "'getInt8'",
377 |         "'getUint8'",
378 |         "'getInt16'",
379 |         "'getUint16'",
380 |         "'getInt32'",
381 |         "'getUint32'",
382 |         "'getFloat32'",
383 |         "'getFloat64'",
384 |         "'setInt8'",
385 |         "'setUint8'",
386 |         "'setInt16'",
387 |         "'setUint16'",
388 |         "'setInt32'",
389 |         "'setUint32'",
390 |         "'setFloat32'",
391 |         "'setFloat64'",
392 |         "'isView'",
393 |         "'transfer'",
394 |         "'clear'",
395 |         "'get'",
396 |         "'has'",
397 |         "'set'",
398 |         "'add'",
399 |         "'splat'",
400 |         "'check'",
401 |         "'extractLane'",
402 |         "'replaceLane'",
403 |         "'load'",
404 |         "'load1'",
405 |         "'load2'",
406 |         "'load3'",
407 |         "'store'",
408 |         "'store1'",
409 |         "'store2'",
410 |         "'store3'",
411 |         "'addSaturate'",
412 |         "'div'",
413 |         "'mul'",
414 |         "'neg'",
415 |         "'reciprocalApproximation'",
416 |         "'reciprocalSqrtApproximation'",
417 |         "'subSaturate'",
418 |         "'shuffle'",
419 |         "'swizzle'",
420 |         "'maxNum'",
421 |         "'minNum'",
422 |         "'select'",
423 |         "'equal'",
424 |         "'notEqual'",
425 |         "'lessThan'",
426 |         "'lessThanOrEqual'",
427 |         "'greaterThan'",
428 |         "'greaterThanOrEqual'",
429 |         "'and'",
430 |         "'or'",
431 |         "'xor'",
432 |         "'not'",
433 |         "'shiftLeftByScalar'",
434 |         "'shiftRightByScalar'",
435 |         "'allTrue'",
436 |         "'anyTrue'",
437 |         "'fromFloat32x4'",
438 |         "'fromFloat32x4Bits'",
439 |         "'fromFloat64x2Bits'",
440 |         "'fromInt32x4'",
441 |         "'fromInt32x4Bits'",
442 |         "'fromInt16x8Bits'",
443 |         "'fromInt8x16Bits'",
444 |         "'fromUint32x4'",
445 |         "'fromUint32x4Bits'",
446 |         "'fromUint16x8Bits'",
447 |         "'fromUint8x16Bits'",
448 |         "'neg'",
449 |         "'compareExchange'",
450 |         "'exchange'",
451 |         "'wait'",
452 |         "'wake'",
453 |         "'isLockFree'",
454 |         "'all'",
455 |         "'race'",
456 |         "'reject'",
457 |         "'resolve'",
458 |         "'catch'",
459 |         "'then'",
460 |         "'finally'",
461 |         "'next'",
462 |         "'throw'",
463 |         "'close'",
464 |         "'send'",
465 |         "'apply'",
466 |         "'construct'",
467 |         "'deleteProperty'",
468 |         "'ownKeys'",
469 |         "'getCanonicalLocales'",
470 |         "'supportedLocalesOf'",
471 |         "'resolvedOptions'",
472 |         "'formatToParts'",
473 |         "'resolvedOptions'",
474 |         "'instantiate'",
475 |         "'instantiateStreaming'",
476 |         "'compileStreaming'",
477 |         "'validate'",
478 |         "'customSections'",
479 |         "'exports'",
480 |         "'imports'",
481 |         "'grow'",
482 |         "'super'",
483 |         "'in'",
484 |         "'instanceof'",
485 |         "' '"
486 |     ],
487 |     "NEWLINE": [
488 |         "'\\n'"
489 |     ],
490 |     "NUMBER": [
491 |         "'1/2'",
492 |         "'1E2'",
493 |         "'1E02'",
494 |         "'1E+02'",
495 |         "'-1'",
496 |         "'-1.00'",
497 |         "'-1/2'",
498 |         "'-1E2'",
499 |         "'-1E02'",
500 |         "'-1E+02'",
501 |         "'1/0'",
502 |         "'0/0'",
503 |         "'-2147483648/-1'",
504 |         "'-9223372036854775808/-1'",
505 |         "'-0'",
506 |         "'-0.0'",
507 |         "'+0'"
508 |     ],
509 |     "OBJECT": [
510 |         "IDENTIFIER"
511 |     ],
512 |     "ENTRYPOINT": [
513 |         "JSBLOCK"
514 |     ],
515 |     "PROPERTY": [
516 |         "'.length' PROPERTY",
517 |         "'.prototype' PROPERTY",
518 |         "'.constructor' PROPERTY",
519 |         "'.__proto__' PROPERTY",
520 |         "'.__noSuchMethod__' PROPERTY",
521 |         "'.__count__' PROPERTY",
522 |         "'.__parent__' PROPERTY",
523 |         "'.arguments' PROPERTY",
524 |         "'.arity' PROPERTY",
525 |         "'.caller' PROPERTY",
526 |         "'.name' PROPERTY",
527 |         "'.displayName' PROPERTY",
528 |         "'.iterator' PROPERTY",
529 |         "'.asyncIterator' PROPERTY",
530 |         "'.match' PROPERTY",
531 |         "'.replace' PROPERTY",
532 |         "'.search' PROPERTY",
533 |         "'.split' PROPERTY",
534 |         "'.hasInstance' PROPERTY",
535 |         "'.isConcatSpreadable' PROPERTY",
536 |         "'.unscopables' PROPERTY",
537 |         "'.species' PROPERTY",
538 |         "'.toPrimitive' PROPERTY",
539 |         "'.toStringTag' PROPERTY",
540 |         "'.fileName' PROPERTY",
541 |         "'.lineNumber' PROPERTY",
542 |         "'.columnNumber' PROPERTY",
543 |         "'.message' PROPERTY",
544 |         "'.name' PROPERTY",
545 |         "'.EPSILON' PROPERTY",
546 |         "'.MAX_SAFE_INTEGER' PROPERTY",
547 |         "'.MAX_VALUE' PROPERTY",
548 |         "'.MIN_SAFE_INTEGER' PROPERTY",
549 |         "'.MIN_VALUE' PROPERTY",
550 |         "'.NaN' PROPERTY",
551 |         "'.NEGATIVE_INFINITY' PROPERTY",
552 |         "'.POSITIVE_INFINITY' PROPERTY",
553 |         "'.E' PROPERTY",
554 |         "'.LN2' PROPERTY",
555 |         "'.LN10' PROPERTY",
556 |         "'.LOG2E' PROPERTY",
557 |         "'.LOG10E' PROPERTY",
558 |         "'.PI' PROPERTY",
559 |         "'.SQRT1_2' PROPERTY",
560 |         "'.SQRT2' PROPERTY",
561 |         "'.flags' PROPERTY",
562 |         "'.global' PROPERTY",
563 |         "'.ignoreCase' PROPERTY",
564 |         "'.multiline' PROPERTY",
565 |         "'.source' PROPERTY",
566 |         "'.sticky' PROPERTY",
567 |         "'.unicode' PROPERTY",
568 |         "'.buffer' PROPERTY",
569 |         "'.byteLength' PROPERTY",
570 |         "'.byteOffset' PROPERTY",
571 |         "'.BYTES_PER_ELEMENT' PROPERTY",
572 |         "'.compare' PROPERTY",
573 |         "'.format' PROPERTY",
574 |         "'.callee' PROPERTY",
575 |         "'.caller' PROPERTY",
576 |         "'.memory' PROPERTY",
577 |         "'.exports' PROPERTY",
578 |         "' '"
579 |     ],
580 |     "SP": [
581 |         "' '"
582 |     ],
583 |     "STATEMENT": [
584 |         "EXPR ';'",
585 |         "'var' SP VAR '=' EXPR ';'",
586 |         "'let' SP VAR '=' EXPR ';'",
587 |         "VAR '=' EXPR ';'",
588 |         "VAR PROPERTY '=' EXPR ';'",
589 |         "VAR '[' DECIMALNUMBER ']' '=' EXPR ';'",
590 |         "'const' SP VAR '=' EXPR ';'",
591 |         "'typeof' SP EXPR ';'",
592 |         "'void' SP EXPR ';'",
593 |         "'return' SP EXPR ';'",
594 |         "VAR ':'"
595 |     ],
596 |     "VAR": [
597 |         "'a'",
598 |         "'b'",
599 |         "'c'",
600 |         "'d'",
601 |         "'e'",
602 |         "'f'",
603 |         "'g'",
604 |         "'h'"
605 |     ]
606 | }
607 | 


--------------------------------------------------------------------------------
/test-data/grammars/invalid-refs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "<ENTRYPOINT>": [
3 |         ["<missing>"]
4 |     ]
5 | }
6 | 


--------------------------------------------------------------------------------
/test-data/grammars/mixed_rules.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "<ENTRYPOINT>": [
 3 |         ["e", "<A>", "f", "<B>"]
 4 |     ],
 5 |     "<A>": [
 6 |         ["a"]
 7 |     ],
 8 |     "<B>": [
 9 |         ["b"]
10 |     ]
11 | }
12 | 


--------------------------------------------------------------------------------
/test-data/grammars/recursion.json:
--------------------------------------------------------------------------------
1 | {
2 |     "<ENTRYPOINT>": [
3 |         ["<X>"]
4 |     ],
5 |     "<X>": [
6 |         ["<ENTRYPOINT>"]
7 |     ]
8 | }
9 | 


--------------------------------------------------------------------------------
/test-data/grammars/test-peacock.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "<epsilon>": [
 3 |         [""]
 4 |     ],
 5 |     
 6 |     "<asdf>": [
 7 |         ["<", "'<asdf>'", "<asdf>", ">"],
 8 |         ["'''"]
 9 |     ]
10 | }


--------------------------------------------------------------------------------
/test-data/grammars/unit_rules.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "<ENTRYPOINT>": [
 3 |         ["<A>"],
 4 |         ["e"]
 5 |     ],
 6 |     "<A>": [
 7 |         ["<B>"],
 8 |         ["a"]
 9 |     ],
10 |     "<B>": [
11 |         ["b1"],
12 |         ["b2"]
13 |     ]
14 | }
15 | 


--------------------------------------------------------------------------------
/test-data/grammars/unused_rules.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // COMPONENT 1
 3 |     "<ENTRYPOINT>": [
 4 |         ["<A>", "<B>"]
 5 |     ],
 6 |     "<A>": [
 7 |         ["a", "<ENTRYPOINT>"],
 8 |         ["a"]
 9 |     ],
10 |     "<B>": [
11 |         ["b", "<ENTRYPOINT>"],
12 |         ["b"]
13 |     ],
14 |     
15 |     // COMPONENT 2
16 |     "<C>": [
17 |         ["<D>"]
18 |     ],
19 |     "<D>": [
20 |         ["<C>"]
21 |     ]
22 | }
23 | 


--------------------------------------------------------------------------------
/test-data/libfuzzer/.gitignore:
--------------------------------------------------------------------------------
1 | harness
2 | corpus/
3 | generator.*
4 | 


--------------------------------------------------------------------------------
/test-data/libfuzzer/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | harness: harness.c generator.c
3 | 	clang -O3 -flto -fsanitize=fuzzer -o $@ -I. $^
4 | 


--------------------------------------------------------------------------------
/test-data/libfuzzer/harness.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdint.h>
  3 | #include <stddef.h>
  4 | #include <stdlib.h>
  5 | #include <sys/mman.h>
  6 | #include <string.h>
  7 | 
  8 | #include "generator.h"
  9 | 
 10 | #define OUT_LEN (128 * 1024 * 1024)
 11 | 
 12 | size_t LLVMFuzzerCustomMutator (uint8_t* data, size_t size, size_t max_size, unsigned int seed) {
 13 |     //printf("LLVMFuzzerCustomMutator(%p, %lu, %lu, %u)\n", data, size, max_size, seed);
 14 |     
 15 |     if ((size % sizeof(size_t)) != 0) {
 16 |         size = 0;
 17 |     }
 18 |     
 19 |     size /= sizeof(size_t);
 20 |     
 21 |     if (size) {
 22 |         size = rand() % size;
 23 |     }
 24 |     
 25 |     max_size /= sizeof(size_t);
 26 |     
 27 |     seed_generator(seed);
 28 |     
 29 |     size_t new_len = mutate_sequence(
 30 |         (size_t*) data,
 31 |         size,
 32 |         max_size
 33 |     );
 34 |     
 35 |     return new_len * sizeof(size_t);
 36 | }
 37 | 
 38 | int LLVMFuzzerTestOneInput (const uint8_t* data, size_t size) {
 39 |     static unsigned char* output = NULL;
 40 |     
 41 |     //printf("LLVMFuzzerTestOneInput(%p, %lu)\n", data, size);
 42 |     
 43 |     if ((size % sizeof(size_t)) != 0) {
 44 |         return -1;
 45 |     }
 46 |     
 47 |     if (!output) {
 48 |         output = malloc(OUT_LEN + 1);
 49 |     }
 50 |     
 51 |     size /= sizeof(size_t);
 52 |     
 53 |     size_t new_len = serialize_sequence(
 54 |         (size_t*) data,
 55 |         size,
 56 |         output,
 57 |         OUT_LEN
 58 |     );
 59 |     output[new_len] = 0;
 60 |     
 61 |     //printf("%s\n", output);
 62 |     
 63 |     return 0;
 64 | }
 65 | 
 66 | void print_file (char* filename) {
 67 |     FILE* file = fopen(filename, "rb");
 68 |     fseek(file, 0, SEEK_END);
 69 |     size_t file_size = ftell(file);
 70 |     
 71 |     if ((file_size % sizeof(size_t)) != 0) {
 72 |         exit(1);
 73 |     }
 74 |     
 75 |     fseek(file, 0, SEEK_SET);
 76 |     size_t* buffer = malloc(file_size);
 77 |     fread(buffer, 1, file_size, file);
 78 |     
 79 |     unsigned char* output = malloc(OUT_LEN + 1);
 80 |     
 81 |     size_t out_len = serialize_sequence(
 82 |         buffer,
 83 |         file_size / sizeof(size_t),
 84 |         output,
 85 |         OUT_LEN
 86 |     );
 87 |     output[out_len] = 0;
 88 |     
 89 |     printf("%s\n", output);
 90 |     
 91 |     fclose(file);
 92 | }
 93 | 
 94 | int LLVMFuzzerInitialize (int* argcp, char*** argvp) {
 95 |     int argc = *argcp;
 96 |     char** argv = *argvp;
 97 |     
 98 |     if (argc == 2 && !strncmp(argv[1], "--print=", 8)) {
 99 |         print_file(argv[1] + 8);
100 |         exit(0);
101 |     }
102 |     
103 |     return 0;
104 | }
105 | 


--------------------------------------------------------------------------------
/test-data/static_loading/.gitignore:
--------------------------------------------------------------------------------
1 | generator.*
2 | output/
3 | .cur_input_*
4 | 


--------------------------------------------------------------------------------
/test-data/static_loading/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "static_loading"
 3 | version = "0.0.0"
 4 | edition = "2021"
 5 | 
 6 | [profile.release]
 7 | lto = true
 8 | codegen-units = 1
 9 | 
10 | [dependencies]
11 | peacock-fuzz = { path = "../..", features = ["static-loading"] }
12 | libafl = "0.13.0"
13 | libafl_bolts = "0.13.0"
14 | nix = "0.29"
15 | 
16 | [build-dependencies]
17 | peacock-fuzz = { path = "../.." }
18 | cc = "1.0"
19 | 


--------------------------------------------------------------------------------
/test-data/static_loading/build.rs:
--------------------------------------------------------------------------------
 1 | use peacock_fuzz::{
 2 |     grammar::ContextFreeGrammar,
 3 |     backends::C::CGenerator,
 4 | };
 5 | use cc;
 6 | 
 7 | const GRAMMAR_FILE: &str = "php.json";
 8 | const GENERATOR_FILE: &str = "generator.c";
 9 | 
10 | fn main() {
11 |     let cfg = ContextFreeGrammar::builder()
12 |         .gramatron_grammar(GRAMMAR_FILE).unwrap()
13 |         .entrypoint("PROGRAM")
14 |         .build().unwrap();
15 |     
16 |     CGenerator::new().generate(GENERATOR_FILE, &cfg);
17 |     
18 |     cc::Build::new()
19 |         .file(GENERATOR_FILE)
20 |         .flag("-O3")
21 |         .flag("-flto")
22 |         .compile("generator");
23 |     
24 |     println!("cargo:rerun-if-changed={}", GRAMMAR_FILE);
25 | }
26 | 


--------------------------------------------------------------------------------
/test-data/static_loading/src/main.rs:
--------------------------------------------------------------------------------
  1 | use std::path::Path;
  2 | use std::time::Duration;
  3 | use nix::sys::signal::Signal;
  4 | use libafl::prelude::{
  5 |     Error,
  6 |     HitcountsMapObserver, StdMapObserver,
  7 |     TimeObserver, MaxMapFeedback, CalibrationStage, feedback_or,
  8 |     TimeFeedback, CrashFeedback, StdState, CachedOnDiskCorpus,
  9 |     OnDiskCorpus,
 10 |     StdMutationalStage, IndexesLenTimeMinimizerScheduler,
 11 |     StdWeightedScheduler, powersched::PowerSchedule,
 12 |     StdFuzzer, ForkserverExecutor, 
 13 |     Fuzzer,
 14 |      TimeoutFeedback, HasCorpus, Corpus,
 15 |     Launcher, EventConfig,
 16 |     LlmpRestartingEventManager, CanTrack,
 17 | };
 18 | use libafl_bolts::prelude::{
 19 |     UnixShMemProvider, ShMemProvider, ShMem, AsSliceMut,
 20 |     current_nanos, StdRand, tuple_list,
 21 |     Cores,
 22 | };
 23 | use peacock_fuzz::components::{
 24 |     load_generator,
 25 |     PeacockInput,
 26 |     PeacockMutator,
 27 |     PeacockGenerator,
 28 |     seed_generator,
 29 | };
 30 | 
 31 | fn main() -> Result<(), Error> {
 32 |     let args: Vec<String> = std::env::args().skip(1).collect();
 33 |     
 34 |     load_generator();
 35 |     
 36 |     let mut run_client = |state: Option<_>, mut mgr: LlmpRestartingEventManager<_, _, _>, _core_id| {
 37 |         let output_dir = Path::new("output");
 38 |         let queue_dir = output_dir.join("queue");
 39 |         let crashes_dir = output_dir.join("crashes");
 40 |         const MAP_SIZE: usize = 2_621_440;
 41 |         let seed = current_nanos();
 42 |         let powerschedule = PowerSchedule::EXPLORE;
 43 |         let timeout = Duration::from_secs(10);
 44 |         let signal = str::parse::<Signal>("SIGKILL").unwrap();
 45 |         let debug_child = cfg!(debug_assertions);
 46 |         
 47 |         let mut shmem_provider = UnixShMemProvider::new()?;
 48 |         let mut shmem = shmem_provider.new_shmem(MAP_SIZE)?;
 49 |         shmem.write_to_env("__AFL_SHM_ID")?;
 50 |         let shmem_buf = shmem.as_slice_mut();
 51 |         std::env::set_var("AFL_MAP_SIZE", format!("{}", MAP_SIZE));
 52 |         
 53 |         let edges_observer = unsafe { HitcountsMapObserver::new(StdMapObserver::new("shared_mem", shmem_buf)).track_indices() };
 54 |         
 55 |         let time_observer = TimeObserver::new("time");
 56 |         
 57 |         let map_feedback = MaxMapFeedback::new(&edges_observer);
 58 |         
 59 |         let calibration = CalibrationStage::new(&map_feedback);
 60 |         
 61 |         let mut feedback = feedback_or!(
 62 |             map_feedback,
 63 |             TimeFeedback::new(&time_observer)
 64 |         );
 65 |         
 66 |         let mut objective = feedback_or!(
 67 |             CrashFeedback::new(),
 68 |             TimeoutFeedback::new()
 69 |         );
 70 |         
 71 |         seed_generator(seed as usize);
 72 |         
 73 |         let mut state = if let Some(state) = state {
 74 |             state
 75 |         } else {
 76 |             StdState::new(
 77 |                 StdRand::with_seed(seed),
 78 |                 CachedOnDiskCorpus::<PeacockInput>::new(&queue_dir, 128)?,
 79 |                 OnDiskCorpus::new(crashes_dir)?,
 80 |                 &mut feedback,
 81 |                 &mut objective,
 82 |             )?
 83 |         };
 84 | 
 85 |         let mutator = PeacockMutator::new();
 86 |         
 87 |         let mutational = StdMutationalStage::with_max_iterations(mutator, 1);
 88 |         
 89 |         let scheduler = IndexesLenTimeMinimizerScheduler::new(
 90 |             &edges_observer,
 91 |             StdWeightedScheduler::with_schedule(
 92 |                 &mut state,
 93 |                 &edges_observer,
 94 |                 Some(powerschedule),
 95 |             )
 96 |         );
 97 |         
 98 |         let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
 99 |         
100 |         let mut executor = ForkserverExecutor::builder()
101 |             .program(&args[0])
102 |             .debug_child(debug_child)
103 |             .parse_afl_cmdline(args.get(1..).unwrap_or(&[]))
104 |             .coverage_map_size(MAP_SIZE)
105 |             .is_persistent(false)
106 |             .timeout(timeout)
107 |             .kill_signal(signal)
108 |             .build_dynamic_map(edges_observer, tuple_list!(time_observer))?;
109 |         
110 |         state.load_initial_inputs(
111 |             &mut fuzzer,
112 |             &mut executor,
113 |             &mut mgr,
114 |             &[
115 |                 queue_dir,
116 |             ]
117 |         )?;
118 |         
119 |         if state.corpus().count() == 0 {
120 |             let mut generator = PeacockGenerator::new();
121 |             state.generate_initial_inputs_forced(
122 |                 &mut fuzzer,
123 |                 &mut executor,
124 |                 &mut generator,
125 |                 &mut mgr,
126 |                 16,
127 |             )?;
128 |         }
129 |         
130 |         let mut stages = tuple_list!(calibration, mutational);
131 | 
132 |         fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;
133 |         Ok(())
134 |     };
135 |     
136 |     let shmem_provider = UnixShMemProvider::new()?;
137 |     
138 |     let monitor = libafl::prelude::SimplePrintingMonitor::new();
139 |     
140 |     let cores = Cores::from_cmdline("0").expect("Invalid core specification");
141 |     
142 |     match Launcher::builder()
143 |         .shmem_provider(shmem_provider)
144 |         .configuration(EventConfig::AlwaysUnique)
145 |         .monitor(monitor)
146 |         .run_client(&mut run_client)
147 |         .cores(&cores)
148 |         .build()
149 |         .launch()
150 |     {
151 |         Err(Error::ShuttingDown) | Ok(()) => Ok(()),
152 |         e => e,
153 |     }
154 | }
155 | 


--------------------------------------------------------------------------------