├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── autarkie ├── Cargo.toml └── src │ ├── fuzzer │ ├── afl.rs │ ├── context.rs │ ├── feedback │ │ ├── mod.rs │ │ └── register.rs │ ├── libfuzzer.rs │ ├── mod.rs │ ├── mutators │ │ ├── commons.rs │ │ ├── mod.rs │ │ ├── recurse_mutate.rs │ │ ├── splice.rs │ │ └── splice_append.rs │ └── stages │ │ ├── binary_mutator.rs │ │ ├── cmp.rs │ │ ├── generate.rs │ │ ├── libfuzzer_cmp.rs │ │ ├── minimization.rs │ │ ├── mod.rs │ │ ├── mutating.rs │ │ ├── mutational.rs │ │ ├── novelty_minimization.rs │ │ ├── recursive_minimization.rs │ │ └── stats.rs │ ├── graph.rs │ ├── lib.rs │ ├── scale.rs │ ├── serde.rs │ ├── tree.rs │ └── visitor.rs ├── autarkie_derive ├── Cargo.toml └── src │ ├── lib.rs │ ├── trait_bounds.rs │ └── utils.rs ├── autarkie_test ├── Cargo.toml └── src │ └── lib.rs ├── guides ├── rbpf.md └── sql.md ├── libafl_libfuzzer ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── build.rs ├── runtime │ ├── Cargo.toml.template │ ├── build.rs │ └── src │ │ ├── fuzz.rs │ │ ├── harness_wrap.cpp │ │ ├── harness_wrap.h │ │ └── lib.rs └── src │ └── lib.rs └── libafl_libfuzzer_runtime ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── build.rs ├── build.sh └── src /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | *.pdf 3 | *.aux 4 | *.run.xml 5 | *.toc 6 | *.out 7 | *.lot 8 | *.blg 9 | *.aux 10 | *.lof 11 | kitchensink 12 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | edition = "2021" 3 | 4 | members = [ 5 | "autarkie", 6 | "autarkie_derive", 7 | "autarkie_test", 8 | "libafl_libfuzzer", 9 | "libafl_libfuzzer_runtime", 10 | ] 11 | 12 | [workspace.dependencies] 13 | libafl = { git = "https://github.com/AFLplusplus/LibAFL", rev = "dd0bcba103d076a15ee5231007f909980261cffc" } 14 | libafl_bolts = { git = "https://github.com/AFLplusplus/LibAFL", rev = "dd0bcba103d076a15ee5231007f909980261cffc"} 15 | libafl_targets = { git = "https://github.com/AFLplusplus/LibAFL", rev = "dd0bcba103d076a15ee5231007f909980261cffc", default-features = false } 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Autarkie - Instant Grammar Fuzzing Using Rust Macros 2 | Autarkie is a native grammar fuzzer built in Rust. Using procedural macros, it (almost completely) automatically creates a grammar fuzzer. 3 | Autarkie is heavily inspired by [nautilus](https://github.com/nautilus-fuzz/nautilus). 4 | 5 | # Features 6 | - Essentially a drop-in replacement for [arbitrary](https://github.com/rust-fuzz/arbitrary) 7 | - Actual grammar fuzzing - not "structure aware" 8 | - Supports both AFL++ (Forkserver) and cargo-fuzz (Inprocess). 9 | - As long as the grammar is defined using Rust, you can fuzz C/C++ too (using AFL++ forkserver) 10 | - Really easy to use, complexity is abstracted for you. 11 | - Trivial to integrate with other fuzzers. 12 | 13 | # Niche features 14 | Autarkie has several features that other grammar fuzzers do not have: 15 | - No grammar maintenance; because the grammar is part of the code, if the project is updated, the grammar updates too. 16 | - Grammar is completely exhaustive; the compiler will make sure that every necessary type is included. No more guesswork. 17 | - Corpus is re-usable. If you stop the fuzzer, you can re-start it and it will be able to re-use the corpus! 18 | - Can learn from other fuzzers! (TODO: almost implemented) 19 | - Has native [cmplog](https://www.ndss-symposium.org/ndss-paper/redqueen-fuzzing-with-input-to-state-correspondence/) support (TODO: almost implemented) 20 | 21 | # How to Use 22 | There are two main walkthroughs: 23 | 1. Fuzz AFL++ instrumented C/C++ project 24 | 25 | This example fuzzes ``sqlite3`` by using grammar defined in [datafusion-sqlparser-rs](https://github.com/apache/datafusion-sqlparser-rs). 26 | Personal favourite as it shows Autarkie's magic: you can build a highly sophisticated grammar fuzzer covering a language as complex as SQL in under 5 minutes. 27 | This example also shows how you can render the internal structure into a different format for the harness 28 | 29 | [Go to the walkthrough](guides/sql.md) 30 | 31 | 32 | 2. Fuzz a Rust project using cargo-fuzz 33 | 34 | This example fuzzes Solana's ``sbpf`` interpreter which is implemented in Rust. Autarkie has ``cargo-fuzz`` integration, so it is trivial to fuzz native Rust projects. 35 | 36 | [Go to the walkthrough](guides/rbpf.md) 37 | 38 | 39 | # Limitations and Caveats 40 | ### Beta 41 | Autarkie is in beta - expect issues, do not tread lightly. 42 | 43 | ### Static Lifetimes 44 | The type MUST own all it's data; it cannot use lifetimes. This is due to the use of ``std::intrinsics::type_id`` which require types to have a ``'static`` lifetime. 45 | 46 | Note: that you can simply write a wrapper type that owns all the data and converts it to the native type 47 | ### Nightly only 48 | Limited to ``nightly`` due to the usage of the ``#![feature(compiler_intrinsics)]`` feature. 49 | 50 | # Contributions 51 | Contributions, questions and feedback welcome. 52 | Please engage! 53 | -------------------------------------------------------------------------------- /autarkie/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "autarkie" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | # different encodings 8 | bincode = { version = "1.3.3", optional = true} 9 | serde = { version = "1.0.0", features = ["derive"] } 10 | 11 | autarkie_derive = {path = "../autarkie_derive", optional = true} 12 | parity-scale-codec = { version = "3.6.12", features = ["derive"], optional = true } 13 | borsh = { version = "1.5.3", features = ["derive"], optional = true } 14 | 15 | libafl = {workspace = true} 16 | libafl_bolts = {workspace = true} 17 | libafl_targets = {workspace = true} 18 | blake3 = "1.5.5" 19 | colored = "3.0.0" 20 | petgraph = "0.7.1" 21 | clap = { version = "4.5.20", features = ["derive"] } 22 | 23 | regex = "1.11.1" 24 | num-traits = "0.2.19" 25 | serde_json = "1.0.140" 26 | 27 | [features] 28 | derive = ["autarkie_derive"] 29 | bincode = ["dep:bincode"] 30 | scale = ["dep:parity-scale-codec"] 31 | borsh = ["dep:borsh"] 32 | introspection = ["libafl/introspection"] 33 | libfuzzer = [] 34 | afl = [] 35 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/afl.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! impl_converter { 3 | ($t:ty) => { 4 | #[derive(Clone)] 5 | pub struct FuzzDataTargetBytesConverter; 6 | 7 | impl FuzzDataTargetBytesConverter { 8 | pub fn new() -> Self { 9 | Self {} 10 | } 11 | } 12 | 13 | impl autarkie::TargetBytesConverter<$t> for FuzzDataTargetBytesConverter { 14 | fn to_target_bytes<'a>(&mut self, input: &'a $t) -> autarkie::OwnedSlice<'a, u8> { 15 | let bytes = autarkie::serialize(input); 16 | let bytes = if bytes.len() == 0 { 17 | vec![0, 0, 0, 0] 18 | } else { 19 | bytes 20 | }; 21 | autarkie::OwnedSlice::from(bytes) 22 | } 23 | } 24 | }; 25 | // We may want to render to bytes manually (eg: to_string) so we offer the possibility of a closure too. 26 | ($t:ty, $closure:expr) => { 27 | #[derive(Clone)] 28 | pub struct FuzzDataTargetBytesConverter; 29 | 30 | impl FuzzDataTargetBytesConverter { 31 | pub fn new() -> Self { 32 | Self 33 | } 34 | } 35 | 36 | impl autarkie::TargetBytesConverter<$t> for FuzzDataTargetBytesConverter { 37 | fn to_target_bytes<'a>(&mut self, input: &'a $t) -> autarkie::OwnedSlice<'a, u8> { 38 | let bytes = $closure(input); 39 | let bytes = if bytes.len() == 0 { 40 | vec![0, 0, 0, 0] 41 | } else { 42 | bytes 43 | }; 44 | autarkie::OwnedSlice::from(bytes) 45 | } 46 | } 47 | }; 48 | } 49 | 50 | #[macro_export] 51 | macro_rules! impl_input { 52 | ($t:ty) => { 53 | impl autarkie::Input for $t { 54 | fn to_file

(&self, path: P) -> Result<(), autarkie::LibAFLError> 55 | where 56 | P: AsRef, 57 | { 58 | let bytes = autarkie::serialize(self); 59 | std::fs::write(path, bytes)?; 60 | Ok(()) 61 | } 62 | 63 | // TODO: don't serialize here 64 | fn generate_name(&self, id: Option) -> String { 65 | let bytes = autarkie::serialize(self); 66 | std::format!("{}", autarkie::hash(bytes.as_slice())) 67 | } 68 | 69 | fn from_file

(path: P) -> Result 70 | where 71 | P: AsRef, 72 | { 73 | let data = std::fs::read(path)?; 74 | let res = autarkie::deserialize::<$t>(&mut data.as_slice()); 75 | Ok(res) 76 | } 77 | } 78 | }; 79 | } 80 | 81 | #[macro_export] 82 | macro_rules! fuzz_afl_inner { 83 | ($t: ty) => { 84 | fn main() { 85 | let harness: Option autarkie::LibAFLExitKind> = None; 86 | $crate::fuzzer::run_fuzzer(FuzzDataTargetBytesConverter::new(), harness); 87 | } 88 | }; 89 | } 90 | 91 | #[macro_export] 92 | macro_rules! fuzz_afl { 93 | ($t:ty) => { 94 | $crate::impl_input!($t); 95 | $crate::impl_converter!($t); 96 | $crate::fuzz_afl_inner!($t); 97 | $crate::impl_hash!($t); 98 | }; 99 | ($t:ty, $closure:expr) => { 100 | $crate::impl_input!($t); 101 | $crate::impl_converter!($t, $closure); 102 | $crate::fuzz_afl_inner!($t); 103 | $crate::impl_hash!($t); 104 | }; 105 | } 106 | 107 | #[macro_export] 108 | macro_rules! impl_hash { 109 | ($t:ty) => { 110 | impl std::hash::Hash for $t { 111 | fn hash(&self, state: &mut H) { 112 | autarkie::serialize(&self).hash(state) 113 | } 114 | } 115 | }; 116 | } 117 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/context.rs: -------------------------------------------------------------------------------- 1 | use crate::{FieldLocation, Id, Node, Visitor}; 2 | use libafl::{corpus::CorpusId, SerdeAny}; 3 | use libafl_bolts::current_time; 4 | use serde::{Deserialize, Serialize}; 5 | use std::{ 6 | collections::{HashMap, HashSet, VecDeque}, 7 | io::ErrorKind, 8 | path::{Path, PathBuf}, 9 | time::Duration, 10 | u128, 11 | }; 12 | 13 | #[derive(Debug, Clone, Serialize, Deserialize)] 14 | pub enum InputCause { 15 | Default, 16 | Generated, 17 | } 18 | #[derive(Debug, Clone, SerdeAny, Serialize, Deserialize)] 19 | pub struct Context { 20 | mutations: HashSet, 21 | out_dir: PathBuf, 22 | type_input_map: HashMap>, 23 | input_cause: InputCause, 24 | } 25 | 26 | // TODO: chunk & cmp reloading 27 | impl Context { 28 | pub fn register_input(&mut self, input: &I, visitor: &mut Visitor) 29 | where 30 | I: Node, 31 | { 32 | let generated_fields = match &self.input_cause { 33 | InputCause::Default => visitor.serialized(), 34 | InputCause::Generated => { 35 | input.__autarkie_serialized(visitor); 36 | visitor.serialized() 37 | } 38 | }; 39 | for field in generated_fields { 40 | let (data, ty) = field; 41 | // todo: optimize this 42 | let path = self.out_dir.join("chunks").join(ty.to_string()); 43 | match std::fs::create_dir(&path) { 44 | Ok(_) => {} 45 | Err(e) => { 46 | if !matches!(e.kind(), ErrorKind::AlreadyExists) { 47 | panic!("{:?}", e) 48 | } 49 | } 50 | }; 51 | let hash = blake3::hash(&data); 52 | let path = path.join(hash.to_string()); 53 | if !std::fs::exists(&path).unwrap() { 54 | std::fs::write(&path, data).unwrap(); 55 | if let Some(e) = self.type_input_map.get_mut(&ty) { 56 | e.push(path); 57 | } else { 58 | self.type_input_map.insert(ty, vec![path]); 59 | } 60 | } 61 | } 62 | self.input_cause = InputCause::Default; 63 | } 64 | 65 | pub fn generated_input(&mut self) { 66 | self.input_cause = InputCause::Generated; 67 | } 68 | 69 | pub fn default_input(&mut self) { 70 | self.input_cause = InputCause::Default; 71 | } 72 | 73 | pub fn add_existing_chunk(&mut self, path: PathBuf) { 74 | let ty = path 75 | .parent() 76 | .unwrap() 77 | .file_name() 78 | .unwrap() 79 | .to_str() 80 | .unwrap() 81 | .parse::() 82 | .expect("corrupt chunk ID!"); 83 | if let Some(e) = self.type_input_map.get_mut(&ty) { 84 | e.push(path); 85 | } else { 86 | self.type_input_map.insert(ty, vec![path]); 87 | } 88 | } 89 | 90 | pub fn get_inputs_for_type(&self, t: &Id) -> Option<&Vec> { 91 | self.type_input_map.get(t) 92 | } 93 | } 94 | 95 | impl Context { 96 | pub fn new(out_dir: PathBuf) -> Self { 97 | let type_input_map = HashMap::default(); 98 | Self { 99 | mutations: HashSet::new(), 100 | input_cause: InputCause::Default, 101 | out_dir, 102 | type_input_map, 103 | } 104 | } 105 | 106 | pub fn add_mutation(&mut self, m: MutationMetadata) { 107 | self.mutations.insert(m); 108 | } 109 | 110 | pub fn clear_mutations(&mut self) -> HashSet { 111 | let cloned = self.mutations.clone(); 112 | self.mutations = HashSet::new(); 113 | cloned 114 | } 115 | } 116 | 117 | /// Track why a testcase was added to the corpus. 118 | #[derive( 119 | Debug, 120 | Clone, 121 | serde::Serialize, 122 | serde::Deserialize, 123 | SerdeAny, 124 | PartialEq, 125 | Eq, 126 | Hash, 127 | PartialOrd, 128 | Ord, 129 | )] 130 | pub enum MutationMetadata { 131 | /// Splice Full Iterable 132 | SpliceFull, 133 | /// Splice Single Node (never an iterable) 134 | SpliceSingle, 135 | /// Splice Partial Iterable 136 | SpliceSubSplice, 137 | /// Splice Append 138 | SpliceAppend, 139 | /// Splice Single Node (never an iterable) 140 | RecurseMutateSingle, 141 | /// Random Generate Partial Iterable 142 | RecurseMutateSubsplice, 143 | /// RecursiveMinimization 144 | RecursiveMinimization, 145 | /// Iterable Minimization 146 | IterableMinimization, 147 | /// Novelty Minimization 148 | NoveltyMinimization, 149 | /// Afl 150 | Afl, 151 | /// Generate 152 | Generate, 153 | /// Cmplog 154 | Cmplog, 155 | /// I2S 156 | I2S, 157 | } 158 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/feedback/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod register; 2 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/feedback/register.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | borrow::Cow, 3 | cell::RefCell, 4 | collections::{BTreeMap, HashSet}, 5 | marker::PhantomData, 6 | rc::Rc, 7 | }; 8 | 9 | use libafl::{ 10 | corpus::{Corpus, Testcase}, 11 | executors::ExitKind, 12 | feedbacks::{Feedback, StateInitializer}, 13 | state::{HasCorpus, HasCurrentTestcase}, 14 | Error, HasMetadata, 15 | }; 16 | 17 | use crate::{ 18 | fuzzer::{context::MutationMetadata, stages::stats::AutarkieStats}, 19 | Node, Visitor, 20 | }; 21 | use libafl_bolts::Named; 22 | 23 | use crate::fuzzer::Context; 24 | 25 | pub struct RegisterFeedback { 26 | visitor: Rc>, 27 | phantom: PhantomData, 28 | } 29 | 30 | impl RegisterFeedback { 31 | pub fn new(visitor: Rc>) -> Self { 32 | Self { 33 | visitor, 34 | phantom: PhantomData, 35 | } 36 | } 37 | } 38 | 39 | impl Feedback for RegisterFeedback 40 | where 41 | I: Node, 42 | S: HasCurrentTestcase + HasCorpus + HasMetadata, 43 | { 44 | fn is_interesting( 45 | &mut self, 46 | _state: &mut S, 47 | _manager: &mut EM, 48 | _input: &I, 49 | _observers: &OT, 50 | _exit_kind: &ExitKind, 51 | ) -> Result { 52 | Ok(false) 53 | } 54 | 55 | fn append_metadata( 56 | &mut self, 57 | state: &mut S, 58 | _manager: &mut EM, 59 | _observers: &OT, 60 | testcase: &mut Testcase, 61 | ) -> Result<(), Error> { 62 | let metadata = state 63 | .metadata_mut::() 64 | .expect("we must have context!"); 65 | metadata.register_input( 66 | testcase.input().as_ref().expect("we must have input!"), 67 | &mut self.visitor.borrow_mut(), 68 | ); 69 | let done_mutations = metadata.clear_mutations(); 70 | let metadata = state 71 | .metadata_mut::() 72 | .unwrap() 73 | .add_new_input_mutations(done_mutations); 74 | Ok(()) 75 | } 76 | } 77 | 78 | impl StateInitializer for RegisterFeedback {} 79 | 80 | impl Named for RegisterFeedback { 81 | fn name(&self) -> &std::borrow::Cow<'static, str> { 82 | &Cow::Borrowed("RegisterFeedback") 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/libfuzzer.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! fuzz_libfuzzer { 3 | ($t:ty) => { 4 | $crate::impl_input!($t); 5 | $crate::impl_converter!($t); 6 | $crate::impl_hash!($t); 7 | }; 8 | ($t:ty, $closure:expr) => { 9 | $crate::impl_input!($t); 10 | $crate::impl_converter!($t, $closure); 11 | $crate::impl_hash!($t); 12 | }; 13 | } 14 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/mod.rs: -------------------------------------------------------------------------------- 1 | #![allow(warnings)] 2 | #![feature(core_intrinsics)] 3 | 4 | pub mod afl; 5 | mod context; 6 | mod feedback; 7 | pub mod libfuzzer; 8 | mod mutators; 9 | mod stages; 10 | 11 | use crate::{DepthInfo, Node, Visitor}; 12 | use clap::Parser; 13 | use context::{Context, MutationMetadata}; 14 | use feedback::register::RegisterFeedback; 15 | #[cfg(feature = "libfuzzer")] 16 | use libafl::mutators::I2SRandReplace; 17 | use libafl::{ 18 | corpus::{CachedOnDiskCorpus, Corpus, OnDiskCorpus}, 19 | events::{ClientDescription, EventConfig, Launcher, SimpleEventManager}, 20 | executors::{ 21 | ExitKind, ForkserverExecutor, InProcessExecutor, InProcessForkExecutor, ShadowExecutor, 22 | }, 23 | feedback_or, feedback_or_fast, 24 | feedbacks::{ 25 | CrashFeedback, MaxMapFeedback, MaxMapOneOrFilledFeedback, MaxMapPow2Feedback, TimeFeedback, 26 | TimeoutFeedback, 27 | }, 28 | inputs::{BytesInput, HasTargetBytes, Input, TargetBytesConverter}, 29 | monitors::{MultiMonitor, SimpleMonitor}, 30 | mutators::{ 31 | havoc_mutations, havoc_mutations_no_crossover, tokens_mutations, HavocScheduledMutator, 32 | }, 33 | observers::{CanTrack, HitcountsMapObserver, StdMapObserver, TimeObserver}, 34 | schedulers::{powersched::PowerSchedule, QueueScheduler, StdWeightedScheduler}, 35 | stages::{IfStage, StdMutationalStage, StdPowerMutationalStage}, 36 | state::{HasCorpus, HasCurrentTestcase, StdState}, 37 | BloomInputFilter, Evaluator, Fuzzer, HasMetadata, StdFuzzer, 38 | }; 39 | pub use libafl_bolts::current_nanos; 40 | use libafl_bolts::tuples::Merge; 41 | use libafl_bolts::AsSlice; 42 | use libafl_bolts::TargetArgs; 43 | use libafl_bolts::{ 44 | core_affinity::{CoreId, Cores}, 45 | fs::get_unique_std_input_file, 46 | ownedref::OwnedRefMut, 47 | rands::{RomuDuoJrRand, StdRand}, 48 | shmem::{ShMem, ShMemProvider, StdShMemProvider, UnixShMemProvider}, 49 | tuples::{tuple_list, Handled}, 50 | AsSliceMut, Error, 51 | }; 52 | #[cfg(feature = "libfuzzer")] 53 | use libafl::stages::ShadowTracingStage; 54 | #[cfg(feature = "libfuzzer")] 55 | use libafl_targets::{extra_counters, CmpLogObserver}; 56 | #[cfg(feature = "afl")] 57 | use libafl_targets::{AFLppCmpLogMap, AFLppCmpLogObserver}; 58 | use mutators::{ 59 | recurse_mutate::{AutarkieRecurseMutator, RECURSE_STACK}, 60 | splice::{AutarkieSpliceMutator, SPLICE_STACK}, 61 | splice_append::{AutarkieSpliceAppendMutator, SPLICE_APPEND_STACK}, 62 | }; 63 | use regex::Regex; 64 | use stages::{ 65 | binary_mutator::AutarkieBinaryMutatorStage, 66 | generate::GenerateStage, 67 | minimization::MinimizationStage, 68 | mutating::MutatingStageWrapper, 69 | mutational::AutarkieMutationalStage, 70 | novelty_minimization::NoveltyMinimizationStage, 71 | recursive_minimization::RecursiveMinimizationStage, 72 | stats::{AutarkieStats, StatsStage}, 73 | }; 74 | 75 | #[cfg(feature = "afl")] 76 | use stages::cmp::CmpLogStage; 77 | use std::io::{stderr, stdout, Write}; 78 | use std::os::fd::AsRawFd; 79 | use std::str::FromStr; 80 | use std::{cell::RefCell, io::ErrorKind, path::PathBuf, process::Command, rc::Rc, time::Duration}; 81 | use std::{env::args, ffi::c_int}; 82 | 83 | use stages::generate; 84 | 85 | #[cfg(feature = "afl")] 86 | const SHMEM_ENV_VAR: &str = "__AFL_SHM_ID"; 87 | 88 | #[cfg(any(feature = "libfuzzer", feature = "afl"))] 89 | pub fn run_fuzzer(bytes_converter: TC, harness: Option) 90 | where 91 | I: Node + Input, 92 | TC: TargetBytesConverter + Clone, 93 | F: Fn(&I) -> ExitKind, 94 | { 95 | #[cfg(feature = "afl")] 96 | let monitor = MultiMonitor::new(|s| println!("{s}")); 97 | // TODO: -close_fd_mask from libfuzzer 98 | #[cfg(feature = "libfuzzer")] 99 | let monitor = MultiMonitor::new(create_monitor_closure()); 100 | let shmem_provider = StdShMemProvider::new().expect("Failed to init shared memory"); 101 | #[cfg(feature = "afl")] 102 | let opt = Opt::parse(); 103 | #[cfg(feature = "libfuzzer")] 104 | let opt = { 105 | let mut opt = args().collect::>(); 106 | opt.remove(1); 107 | opt.remove(opt.len() - 1); 108 | Opt::parse_from(opt) 109 | }; 110 | 111 | let run_client = |mut state: Option<_>, 112 | mut mgr: _, 113 | core: ClientDescription| 114 | -> Result<(), libafl_bolts::Error> { 115 | if !opt.output_dir.exists() { 116 | std::fs::create_dir(&opt.output_dir).unwrap(); 117 | } 118 | #[cfg(feature = "afl")] 119 | let map_size = { 120 | let map_size = Command::new(opt.executable.clone()) 121 | .env("AFL_DUMP_MAP_SIZE", "1") 122 | .output() 123 | .expect("target gave no output"); 124 | let map_size = String::from_utf8(map_size.stdout) 125 | .expect("target returned illegal mapsize") 126 | .replace("\n", ""); 127 | map_size.parse::().expect("illegal mapsize output") + opt.map_bias 128 | }; 129 | 130 | let fuzzer_dir = opt.output_dir.join(format!("{}", core.core_id().0)); 131 | match std::fs::create_dir(&fuzzer_dir) { 132 | Ok(_) => {} 133 | Err(e) => { 134 | if !matches!(e.kind(), ErrorKind::AlreadyExists) { 135 | panic!("{:?}", e) 136 | } 137 | } 138 | }; 139 | #[cfg(feature = "libfuzzer")] 140 | let cmplog_observer = CmpLogObserver::new("cmplog", true); 141 | // Create the shared memory map for comms with the forkserver 142 | #[cfg(feature = "afl")] 143 | let mut shmem_provider = UnixShMemProvider::new().unwrap(); 144 | #[cfg(feature = "afl")] 145 | let mut shmem = shmem_provider.new_shmem(map_size).unwrap(); 146 | #[cfg(feature = "afl")] 147 | unsafe { 148 | shmem.write_to_env(SHMEM_ENV_VAR).unwrap(); 149 | } 150 | #[cfg(feature = "afl")] 151 | let shmem_buf = shmem.as_slice_mut(); 152 | 153 | // Create an observation channel to keep track of edges hit. 154 | #[cfg(feature = "afl")] 155 | let edges_observer = unsafe { 156 | HitcountsMapObserver::new(StdMapObserver::new("edges", shmem_buf)) 157 | .track_indices() 158 | .track_novelties() 159 | }; 160 | #[cfg(feature = "libfuzzer")] 161 | let edges = unsafe { extra_counters() }; 162 | #[cfg(feature = "libfuzzer")] 163 | let edges_observer = 164 | StdMapObserver::from_mut_slice("edges", edges.into_iter().next().unwrap()) 165 | .track_indices() 166 | .track_novelties(); 167 | 168 | let seed = opt.rng_seed.unwrap_or(current_nanos()); 169 | 170 | // Initialize Autarkie's visitor 171 | let mut visitor = Visitor::new( 172 | seed, 173 | DepthInfo { 174 | generate: opt.generate_depth, 175 | iterate: opt.iterate_depth, 176 | }, 177 | ); 178 | I::__autarkie_register(&mut visitor, None, 0); 179 | visitor.calculate_recursion(); 180 | let visitor = Rc::new(RefCell::new(visitor)); 181 | 182 | // Create a MapFeedback for coverage guided fuzzin' 183 | let map_feedback = MaxMapFeedback::new(&edges_observer); 184 | 185 | let time_observer = TimeObserver::new("time"); 186 | let cb = |_fuzzer: &mut _, 187 | _executor: &mut _, 188 | _state: &mut StdState, I, StdRand, OnDiskCorpus>, 189 | _event_manager: &mut _| 190 | -> Result { Ok(true) }; 191 | let minimization_stage = IfStage::new( 192 | cb, 193 | tuple_list!( 194 | MinimizationStage::new(Rc::clone(&visitor), &map_feedback), 195 | RecursiveMinimizationStage::new(Rc::clone(&visitor), &map_feedback) 196 | ), 197 | ); 198 | let mut feedback = feedback_or!( 199 | map_feedback, 200 | TimeFeedback::new(&time_observer), 201 | RegisterFeedback::new(Rc::clone(&visitor)), 202 | ); 203 | 204 | let mut objective = feedback_or_fast!(CrashFeedback::new(), TimeoutFeedback::new(),); 205 | 206 | // Initialize our State if necessary 207 | let mut state = state.unwrap_or( 208 | StdState::new( 209 | RomuDuoJrRand::with_seed(seed), 210 | // TODO: configure testcache size 211 | CachedOnDiskCorpus::::new(fuzzer_dir.join("queue"), 4096).unwrap(), 212 | OnDiskCorpus::::new(fuzzer_dir.join("crash")).unwrap(), 213 | &mut feedback, 214 | &mut objective, 215 | ) 216 | .unwrap(), 217 | ); 218 | 219 | if !fuzzer_dir.join("chunks").exists() { 220 | std::fs::create_dir(fuzzer_dir.join("chunks")).unwrap(); 221 | } 222 | if !fuzzer_dir.join("cmps").exists() { 223 | std::fs::create_dir(fuzzer_dir.join("cmps")).unwrap(); 224 | } 225 | 226 | let mut context = Context::new(fuzzer_dir.clone()); 227 | 228 | let scheduler = StdWeightedScheduler::with_schedule( 229 | &mut state, 230 | &edges_observer, 231 | Some(PowerSchedule::explore()), 232 | ); 233 | let observers = tuple_list!(edges_observer, time_observer); 234 | let scheduler = scheduler.cycling_scheduler(); 235 | // Create our Fuzzer 236 | let mut fuzzer = 237 | StdFuzzer::with_bloom_input_filter(scheduler, feedback, objective, 10_000, 0.0001); 238 | 239 | // Create our Executor 240 | #[cfg(feature = "afl")] 241 | let mut executor = ForkserverExecutor::builder() 242 | .program(opt.executable.clone()) 243 | .coverage_map_size(map_size) 244 | .debug_child(opt.debug_child) 245 | .is_persistent(true) 246 | .is_deferred_frksrv(true) 247 | .timeout(Duration::from_millis(opt.hang_timeout)) 248 | .shmem_provider(&mut shmem_provider) 249 | .target_bytes_converter(bytes_converter.clone()) 250 | .build(observers) 251 | .unwrap(); 252 | #[cfg(feature = "libfuzzer")] 253 | let mut harness = harness.unwrap(); 254 | #[cfg(feature = "libfuzzer")] 255 | let mut executor = InProcessExecutor::with_timeout( 256 | &mut harness, 257 | observers, 258 | &mut fuzzer, 259 | &mut state, 260 | &mut mgr, 261 | Duration::from_millis(opt.hang_timeout), 262 | )?; 263 | #[cfg(feature = "libfuzzer")] 264 | let mut executor = ShadowExecutor::new(executor, tuple_list!(cmplog_observer)); 265 | // Setup a tracing stage in which we log comparisons 266 | #[cfg(feature = "libfuzzer")] 267 | let tracing = ShadowTracingStage::new(); 268 | 269 | if let Some(dict_file) = &opt.dict_file { 270 | let file = std::fs::read_to_string(dict_file).expect("cannot read dict file"); 271 | for entry in file.split("\n") { 272 | visitor.borrow_mut().register_string(entry.to_string()); 273 | } 274 | } 275 | 276 | // Read strings from the target if configured 277 | #[cfg(feature = "afl")] 278 | if opt.get_strings { 279 | let string_regex = Regex::new("^[a-zA-Z0-9_]+$").unwrap(); 280 | let strings = Command::new("strings") 281 | .arg(opt.executable.clone()) 282 | .output() 283 | .expect("strings gave no output!"); 284 | let strings = String::from_utf8_lossy(&strings.stdout); 285 | for string in strings.lines().into_iter() { 286 | if string_regex.is_match(string) { 287 | visitor.borrow_mut().register_string(string.to_string()); 288 | } 289 | } 290 | } 291 | 292 | // Reload corpus chunks if they exist 293 | for chunk_dir in std::fs::read_dir(fuzzer_dir.join("chunks"))? { 294 | let dir = chunk_dir?.path(); 295 | for chunk in std::fs::read_dir(dir)? { 296 | let path = chunk?.path(); 297 | context.add_existing_chunk(path); 298 | } 299 | } 300 | state.add_metadata(context); 301 | state.add_metadata(AutarkieStats::default()); 302 | 303 | // Reload corpus 304 | if state.must_load_initial_inputs() { 305 | state.load_initial_inputs( 306 | &mut fuzzer, 307 | &mut executor, 308 | &mut mgr, 309 | &[fuzzer_dir.join("queue").clone()], 310 | )?; 311 | for _ in 0..opt.initial_generated_inputs { 312 | let mut metadata = state.metadata_mut::().expect("fxeZamEw____"); 313 | metadata.generated_input(); 314 | let mut generated = crate::fuzzer::generate::generate(&mut visitor.borrow_mut()); 315 | while generated.is_none() { 316 | generated = crate::fuzzer::generate::generate(&mut visitor.borrow_mut()); 317 | } 318 | fuzzer 319 | .evaluate_input( 320 | &mut state, 321 | &mut executor, 322 | &mut mgr, 323 | generated.as_ref().expect("dVoSuGRU____"), 324 | ) 325 | .unwrap(); 326 | } 327 | let mut metadata = state.metadata_mut::().expect("fxeZamEw____"); 328 | metadata.default_input(); 329 | println!("We imported {} inputs from disk.", state.corpus().count()); 330 | } 331 | 332 | let splice_mutator = AutarkieSpliceMutator::new(Rc::clone(&visitor), opt.max_subslice_size); 333 | let recursion_mutator = 334 | AutarkieRecurseMutator::new(Rc::clone(&visitor), opt.max_subslice_size); 335 | let append_mutator = AutarkieSpliceAppendMutator::new(Rc::clone(&visitor)); 336 | /* #[cfg(feature = "afl")] 337 | let cmplog = { 338 | // The CmpLog map shared between the CmpLog observer and CmpLog executor 339 | let mut cmplog_shmem = shmem_provider.uninit_on_shmem::().unwrap(); 340 | 341 | // Let the Forkserver know the CmpLog shared memory map ID. 342 | unsafe { 343 | cmplog_shmem.write_to_env("__AFL_CMPLOG_SHM_ID").unwrap(); 344 | } 345 | let cmpmap = unsafe { OwnedRefMut::from_shmem(&mut cmplog_shmem) }; 346 | // Create the CmpLog observer. 347 | let cmplog_observer = AFLppCmpLogObserver::new("cmplog", cmpmap, true); 348 | let cmplog_ref = cmplog_observer.handle(); 349 | // Create the CmpLog executor. 350 | // Cmplog has 25% execution overhead so we give it double the timeout 351 | let cmplog_executor = ForkserverExecutor::builder() 352 | .program(opt.executable.clone()) 353 | .coverage_map_size(map_size) 354 | .debug_child(opt.debug_child) 355 | .is_persistent(true) 356 | .is_deferred_frksrv(true) 357 | .timeout(Duration::from_millis(opt.hang_timeout * 2)) 358 | .shmem_provider(&mut shmem_provider) 359 | .target_bytes_converter(bytes_converter.clone()) 360 | .build(tuple_list!(cmplog_observer)) 361 | .unwrap(); 362 | 363 | let cb = |_fuzzer: &mut _, 364 | _executor: &mut _, 365 | state: &mut StdState, I, StdRand, OnDiskCorpus>, 366 | _event_manager: &mut _| 367 | -> Result { 368 | if !opt.cmplog || core.core_id() != *opt.cores.ids.first().unwrap() { 369 | return Ok(false); 370 | } 371 | let testcase = state.current_testcase()?; 372 | if testcase.scheduled_count() > 1 { 373 | return Ok(false); 374 | } 375 | Ok(true) 376 | }; 377 | IfStage::new( 378 | cb, 379 | tuple_list!(stages::cmp::CmpLogStage::new( 380 | Rc::clone(&visitor), 381 | cmplog_executor, 382 | cmplog_ref 383 | )), 384 | ) 385 | }; */ 386 | let generate_stage = GenerateStage::new(Rc::clone(&visitor)); 387 | let afl_stage = AutarkieBinaryMutatorStage::new( 388 | havoc_mutations_no_crossover(), 389 | 7, 390 | MutationMetadata::Afl, 391 | ); 392 | #[cfg(feature = "libfuzzer")] 393 | let i2s = AutarkieBinaryMutatorStage::new( 394 | tuple_list!(I2SRandReplace::new()), 395 | 7, 396 | MutationMetadata::I2S, 397 | ); 398 | // TODO: I2S for AFL 399 | #[cfg(feature = "afl")] 400 | let mut stages = tuple_list!( 401 | minimization_stage, 402 | MutatingStageWrapper::new( 403 | AutarkieMutationalStage::new(append_mutator, SPLICE_APPEND_STACK), 404 | Rc::clone(&visitor) 405 | ), 406 | MutatingStageWrapper::new( 407 | AutarkieMutationalStage::new(recursion_mutator, RECURSE_STACK), 408 | Rc::clone(&visitor) 409 | ), 410 | MutatingStageWrapper::new( 411 | AutarkieMutationalStage::new(splice_mutator, SPLICE_STACK), 412 | Rc::clone(&visitor) 413 | ), 414 | MutatingStageWrapper::new(afl_stage, Rc::clone(&visitor)), 415 | MutatingStageWrapper::new(generate_stage, Rc::clone(&visitor)), 416 | StatsStage::new(fuzzer_dir), 417 | ); 418 | #[cfg(feature = "libfuzzer")] 419 | let mut stages = tuple_list!( 420 | minimization_stage, 421 | tracing, 422 | MutatingStageWrapper::new(i2s, Rc::clone(&visitor)), 423 | MutatingStageWrapper::new( 424 | AutarkieMutationalStage::new(append_mutator, SPLICE_APPEND_STACK), 425 | Rc::clone(&visitor) 426 | ), 427 | MutatingStageWrapper::new( 428 | AutarkieMutationalStage::new(recursion_mutator, RECURSE_STACK), 429 | Rc::clone(&visitor) 430 | ), 431 | MutatingStageWrapper::new( 432 | AutarkieMutationalStage::new(splice_mutator, SPLICE_STACK), 433 | Rc::clone(&visitor) 434 | ), 435 | MutatingStageWrapper::new(generate_stage, Rc::clone(&visitor)), 436 | MutatingStageWrapper::new(afl_stage, Rc::clone(&visitor)), 437 | StatsStage::new(fuzzer_dir), 438 | ); 439 | fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?; 440 | Err(Error::shutting_down()) 441 | }; 442 | 443 | Launcher::builder() 444 | .cores(&opt.cores) 445 | .monitor(monitor) 446 | .run_client(run_client) 447 | .broker_port(4444) 448 | .shmem_provider(shmem_provider) 449 | .configuration(EventConfig::from_name("default")) 450 | .build() 451 | .launch(); 452 | } 453 | 454 | #[derive(Debug, Parser, Clone)] 455 | #[command( 456 | name = "autarkie", 457 | about = "autarkie", 458 | author = "aarnav " 459 | )] 460 | struct Opt { 461 | /// What we wanna fuzz 462 | #[cfg(feature = "afl")] 463 | executable: PathBuf, 464 | /// Fuzzer output dir; will also load inputs from there 465 | #[arg(short = 'o')] 466 | output_dir: PathBuf, 467 | 468 | /// Timeout in milliseconds 469 | #[arg(short = 't', default_value_t = 1000)] 470 | hang_timeout: u64, 471 | 472 | /// seed for rng 473 | #[arg(short = 's')] 474 | rng_seed: Option, 475 | 476 | /// debug the child 477 | #[arg(short = 'd')] 478 | debug_child: bool, 479 | 480 | #[cfg(feature = "afl")] 481 | /// AFL_DUMP_MAP_SIZE + x where x = map bias 482 | #[arg(short = 'm')] 483 | map_bias: usize, 484 | 485 | /// Amount of initial inputs to generate 486 | #[arg(short = 'i', default_value_t = 100)] 487 | initial_generated_inputs: usize, 488 | 489 | /// Include a generate input stage (advanced) 490 | #[arg(short = 'g')] 491 | generate: bool, 492 | 493 | #[arg(short = 'c', value_parser=Cores::from_cmdline)] 494 | cores: Cores, 495 | 496 | /// Max iterate depth when generating iterable nodes (advanced) 497 | #[arg(short = 'I', default_value_t = 5)] 498 | iterate_depth: usize, 499 | 500 | /// Max subslice length when doing partial iterable splicing (advanced) 501 | #[arg(short = 'z', default_value_t = 15)] 502 | max_subslice_size: usize, 503 | 504 | /// Max generate depth when generating recursive nodes (advanced) 505 | #[arg(short = 'G', default_value_t = 2)] 506 | generate_depth: usize, 507 | 508 | /// AFL++ LLVM_DICT2FILE 509 | #[arg(short = 'x')] 510 | dict_file: Option, 511 | 512 | /// Use AFL++'s cmplog feature 513 | #[arg(short = 'e')] 514 | cmplog: bool, 515 | 516 | /// capture strings from the binary (only useful if you have a lot of String nodes) 517 | #[arg(short = 'S')] 518 | get_strings: bool, 519 | } 520 | 521 | #[macro_export] 522 | macro_rules! debug_grammar { 523 | ($t:ty) => { 524 | use $crate::{Node, Visitor}; 525 | let mut visitor = Visitor::new( 526 | $crate::fuzzer::current_nanos(), 527 | $crate::DepthInfo { 528 | generate: 105, 529 | iterate: 500, 530 | }, 531 | ); 532 | <$t>::__autarkie_register(&mut visitor, None, 0); 533 | visitor.calculate_recursion(); 534 | let gen_depth = visitor.generate_depth(); 535 | loop { 536 | /* println!( 537 | "{:?}", 538 | <$t>::__autarkie_generate(&mut visitor, &mut gen_depth.clone(), &mut 0) 539 | ); 540 | println!("--------------------------------"); */ 541 | } 542 | }; 543 | } 544 | #[cfg(feature = "libfuzzer")] 545 | fn create_monitor_closure() -> impl Fn(&str) + Clone { 546 | #[cfg(unix)] 547 | let stderr_fd = std::os::fd::RawFd::from_str(&std::env::var(STDERR_FD_VAR).unwrap()).unwrap(); // set in main 548 | move |s| { 549 | #[cfg(unix)] 550 | { 551 | use std::os::fd::FromRawFd; 552 | 553 | // unfortunate requirement to meet Clone... thankfully, this does not 554 | // generate effectively any overhead (no allocations, calls get merged) 555 | let mut stderr = unsafe { std::fs::File::from_raw_fd(stderr_fd) }; 556 | writeln!(stderr, "{s}").expect("Could not write to stderr???"); 557 | std::mem::forget(stderr); // do not close the descriptor! 558 | } 559 | #[cfg(not(unix))] 560 | eprintln!("{s}"); 561 | } 562 | } 563 | #[cfg(feature = "libfuzzer")] 564 | /// Communicate the stderr duplicated fd to subprocesses 565 | pub const STDERR_FD_VAR: &str = "_LIBAFL_LIBFUZZER_STDERR_FD"; 566 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/mutators/commons.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | 3 | use crate::Visitor; 4 | 5 | pub fn calculate_subslice_bounds(len: usize, max: usize, visitor: &mut Visitor) -> Range { 6 | // minus 1 because we zero index and len is always +1 7 | let start = visitor.random_range(0, len - 1); 8 | let mut end = visitor.random_range(start, len); 9 | if end - start > max { 10 | end = start + max; 11 | } 12 | start..end 13 | } 14 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/mutators/mod.rs: -------------------------------------------------------------------------------- 1 | mod commons; 2 | pub mod recurse_mutate; 3 | pub mod splice; 4 | pub mod splice_append; 5 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/mutators/recurse_mutate.rs: -------------------------------------------------------------------------------- 1 | use crate::Visitor; 2 | use crate::{MutationType, Node}; 3 | use libafl::{ 4 | corpus::Corpus, 5 | mutators::{MutationResult, Mutator}, 6 | state::{HasCorpus, HasRand}, 7 | HasMetadata, 8 | }; 9 | #[cfg(feature = "introspection")] 10 | use libafl::{mark_feature_time, start_timer}; 11 | use libafl_bolts::{HasLen, Named}; 12 | use std::{borrow::Cow, cell::RefCell, collections::VecDeque, marker::PhantomData, rc::Rc}; 13 | 14 | use crate::fuzzer::Context; 15 | 16 | use super::commons::calculate_subslice_bounds; 17 | 18 | pub const RECURSE_STACK: usize = 100; 19 | pub struct AutarkieRecurseMutator { 20 | max_subslice_size: usize, 21 | visitor: Rc>, 22 | phantom: PhantomData, 23 | } 24 | 25 | impl Mutator for AutarkieRecurseMutator 26 | where 27 | I: Node, 28 | S: HasCorpus + HasRand + HasMetadata, 29 | { 30 | fn mutate(&mut self, state: &mut S, input: &mut I) -> Result { 31 | let mut metadata = state.metadata_mut::()?; 32 | #[cfg(feature = "introspection")] 33 | start_timer!(state); 34 | input.__autarkie_fields(&mut self.visitor.borrow_mut(), 0); 35 | let mut fields = self.visitor.borrow_mut().fields(); 36 | #[cfg(feature = "introspection")] 37 | mark_feature_time!(state, Data::Fields); 38 | let field_splice_index = self.visitor.borrow_mut().random_range(0, fields.len() - 1); 39 | let field = &mut fields[field_splice_index]; 40 | let ((id, node_ty), ty) = field.last().unwrap(); 41 | let mut bias = if self.visitor.borrow_mut().coinflip() { 42 | self.visitor.borrow().generate_depth() 43 | } else { 44 | 0 45 | }; 46 | if let crate::NodeType::Iterable(is_fixed_len, field_len, inner_ty) = node_ty { 47 | if *field_len < 3 { 48 | return Ok(MutationResult::Skipped); 49 | } 50 | let mut path = VecDeque::from_iter(field.iter().map(|(i, ty)| i.0)); 51 | let subslice_bounds = calculate_subslice_bounds( 52 | *field_len, 53 | self.max_subslice_size, 54 | &mut self.visitor.borrow_mut(), 55 | ); 56 | for index in subslice_bounds { 57 | let mut path = VecDeque::from_iter(field.iter().map(|(i, ty)| i.0)); 58 | path.push_back(index); 59 | #[cfg(debug_assertions)] 60 | println!("recursive_mutate | subslice | {:?}", field); 61 | input.__autarkie_mutate( 62 | &mut MutationType::GenerateReplace(bias), 63 | &mut self.visitor.borrow_mut(), 64 | path, 65 | ); 66 | } 67 | metadata.add_mutation(crate::fuzzer::context::MutationMetadata::RecurseMutateSubsplice); 68 | } else { 69 | let mut path = VecDeque::from_iter(field.iter().map(|(i, ty)| i.0)); 70 | #[cfg(debug_assertions)] 71 | println!("recursive_mutate | single | {:?}", field); 72 | input.__autarkie_mutate( 73 | &mut MutationType::GenerateReplace(bias), 74 | &mut self.visitor.borrow_mut(), 75 | path, 76 | ); 77 | metadata.add_mutation(crate::fuzzer::context::MutationMetadata::RecurseMutateSingle); 78 | } 79 | Ok(MutationResult::Mutated) 80 | } 81 | 82 | fn post_exec( 83 | &mut self, 84 | _state: &mut S, 85 | _new_corpus_id: Option, 86 | ) -> Result<(), libafl::Error> { 87 | Ok(()) 88 | } 89 | } 90 | 91 | impl Named for AutarkieRecurseMutator { 92 | fn name(&self) -> &std::borrow::Cow<'static, str> { 93 | &Cow::Borrowed("AutarkieRecurseMutator") 94 | } 95 | } 96 | impl AutarkieRecurseMutator { 97 | pub fn new(visitor: Rc>, max_subslice_size: usize) -> Self { 98 | Self { 99 | visitor, 100 | max_subslice_size, 101 | phantom: PhantomData, 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/mutators/splice.rs: -------------------------------------------------------------------------------- 1 | use crate::Visitor; 2 | use crate::{MutationType, Node}; 3 | use libafl::{ 4 | corpus::Corpus, 5 | mutators::{MutationResult, Mutator}, 6 | state::{HasCorpus, HasRand}, 7 | HasMetadata, 8 | }; 9 | #[cfg(feature = "introspection")] 10 | use libafl::{mark_feature_time, start_timer}; 11 | use libafl_bolts::{current_time, AsSlice, Named}; 12 | use std::{borrow::Cow, cell::RefCell, collections::VecDeque, marker::PhantomData, rc::Rc}; 13 | 14 | use crate::fuzzer::Context; 15 | 16 | use super::commons::calculate_subslice_bounds; 17 | 18 | pub const SPLICE_STACK: usize = 100; 19 | pub struct AutarkieSpliceMutator { 20 | visitor: Rc>, 21 | max_subslice_size: usize, 22 | phantom: PhantomData, 23 | } 24 | 25 | #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] 26 | #[repr(u8)] 27 | pub enum Data { 28 | Fields, 29 | } 30 | 31 | impl Mutator for AutarkieSpliceMutator 32 | where 33 | I: Node, 34 | S: HasCorpus + HasRand + HasMetadata, 35 | { 36 | fn mutate(&mut self, state: &mut S, input: &mut I) -> Result { 37 | let mut metadata = state.metadata_mut::()?; 38 | let mut mutated_path = None; 39 | input.__autarkie_fields(&mut self.visitor.borrow_mut(), 0); 40 | let mut fields = self.visitor.borrow_mut().fields(); 41 | let field_splice_index = self.visitor.borrow_mut().random_range(0, fields.len() - 1); 42 | let field = &fields[field_splice_index]; 43 | let ((id, node_ty), ty) = field.last().unwrap(); 44 | if let crate::NodeType::Iterable(is_fixed_len, field_len, inner_ty) = node_ty { 45 | let subslice = self.visitor.borrow_mut().coinflip_with_prob(0.6); 46 | if subslice && *field_len > 3 { 47 | let Some(possible_splices) = metadata.get_inputs_for_type(&inner_ty) else { 48 | return Ok(MutationResult::Skipped); 49 | }; 50 | let mut path = VecDeque::from_iter(field.iter().map(|(i, ty)| i.0)); 51 | mutated_path = Some(path.clone()); 52 | let subslice_bounds = calculate_subslice_bounds( 53 | *field_len, 54 | self.max_subslice_size, 55 | &mut self.visitor.borrow_mut(), 56 | ); 57 | for index in subslice_bounds { 58 | let mut child_path = path.clone(); 59 | child_path.push_back(index); 60 | let random_splice = possible_splices 61 | .get( 62 | self.visitor 63 | .borrow_mut() 64 | .random_range(0, possible_splices.len() - 1), 65 | ) 66 | .unwrap(); 67 | // TODO: cache this in memory 68 | let data = std::fs::read(random_splice).unwrap(); 69 | #[cfg(debug_assertions)] 70 | println!("splice | subslice | {:?}", (&field, &path)); 71 | input.__autarkie_mutate( 72 | &mut MutationType::Splice(&mut data.as_slice()), 73 | &mut self.visitor.borrow_mut(), 74 | child_path, 75 | ); 76 | } 77 | metadata.add_mutation(crate::fuzzer::context::MutationMetadata::SpliceSubSplice); 78 | } else { 79 | let Some(possible_splices) = metadata.get_inputs_for_type(&inner_ty) else { 80 | return Ok(MutationResult::Skipped); 81 | }; 82 | // unfortunately we need to replace the exact amount. 83 | // cause we don't differentiate between vec and slice 84 | let path = VecDeque::from_iter(field.iter().map(|(i, ty)| i.0)); 85 | let items = (0..*field_len) 86 | .into_iter() 87 | .map(|_| { 88 | std::fs::read( 89 | possible_splices 90 | .get( 91 | self.visitor 92 | .borrow_mut() 93 | .random_range(0, possible_splices.len() - 1), 94 | ) 95 | .expect("NZkjgWib____"), 96 | ) 97 | .expect("could not read splice file") 98 | }) 99 | .collect::>(); 100 | let mut data = if !*is_fixed_len { 101 | crate::serialize_vec_len(if *field_len > 0 { *field_len } else { 0 }) 102 | } else { 103 | vec![] 104 | }; 105 | data.extend(items.iter().flatten()); 106 | mutated_path = Some(path.clone()); 107 | #[cfg(debug_assertions)] 108 | println!("splice | full | {:?}", field); 109 | input.__autarkie_mutate( 110 | &mut MutationType::Splice(&mut data.as_slice()), 111 | &mut self.visitor.borrow_mut(), 112 | path, 113 | ); 114 | metadata.add_mutation(crate::fuzzer::context::MutationMetadata::SpliceFull); 115 | } 116 | } else { 117 | let Some(possible_splices) = metadata.get_inputs_for_type(ty) else { 118 | return Ok(MutationResult::Skipped); 119 | }; 120 | let mut path = VecDeque::from_iter(field.iter().map(|(i, ty)| i.0)); 121 | let random_splice = possible_splices 122 | .get( 123 | self.visitor 124 | .borrow_mut() 125 | .random_range(0, possible_splices.len() - 1), 126 | ) 127 | .unwrap(); 128 | let data = std::fs::read(random_splice).unwrap(); 129 | mutated_path = Some(path.clone()); 130 | #[cfg(debug_assertions)] 131 | println!("splice | one | {:?} {:?}", field, path); 132 | input.__autarkie_mutate( 133 | &mut MutationType::Splice(&mut data.as_slice()), 134 | &mut self.visitor.borrow_mut(), 135 | path, 136 | ); 137 | metadata.add_mutation(crate::fuzzer::context::MutationMetadata::SpliceSingle); 138 | } 139 | Ok(MutationResult::Mutated) 140 | } 141 | 142 | fn post_exec( 143 | &mut self, 144 | _state: &mut S, 145 | _new_corpus_id: Option, 146 | ) -> Result<(), libafl::Error> { 147 | Ok(()) 148 | } 149 | } 150 | 151 | impl Named for AutarkieSpliceMutator { 152 | fn name(&self) -> &std::borrow::Cow<'static, str> { 153 | &Cow::Borrowed("AutarkieSpliceMutator") 154 | } 155 | } 156 | impl AutarkieSpliceMutator { 157 | pub fn new(visitor: Rc>, max_subslice_size: usize) -> Self { 158 | Self { 159 | visitor, 160 | max_subslice_size, 161 | phantom: PhantomData, 162 | } 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/mutators/splice_append.rs: -------------------------------------------------------------------------------- 1 | use crate::fuzzer::context::MutationMetadata; 2 | use crate::Node; 3 | use crate::Visitor; 4 | use libafl::{ 5 | corpus::Corpus, 6 | mutators::{MutationResult, Mutator}, 7 | state::{HasCorpus, HasRand}, 8 | HasMetadata, 9 | }; 10 | #[cfg(feature = "introspection")] 11 | use libafl::{mark_feature_time, start_timer}; 12 | use libafl_bolts::{AsSlice, Named}; 13 | use std::{borrow::Cow, cell::RefCell, collections::VecDeque, marker::PhantomData, rc::Rc}; 14 | 15 | use crate::fuzzer::Context; 16 | 17 | pub const SPLICE_APPEND_STACK: usize = 5; 18 | pub struct AutarkieSpliceAppendMutator { 19 | visitor: Rc>, 20 | phantom: PhantomData, 21 | } 22 | 23 | impl Mutator for AutarkieSpliceAppendMutator 24 | where 25 | I: Node, 26 | S: HasCorpus + HasRand + HasMetadata, 27 | { 28 | fn mutate(&mut self, state: &mut S, input: &mut I) -> Result { 29 | let mut metadata = state.metadata_mut::().unwrap(); 30 | #[cfg(feature = "introspection")] 31 | start_timer!(state); 32 | input.__autarkie_fields(&mut self.visitor.borrow_mut(), 0); 33 | #[cfg(feature = "introspection")] 34 | mark_feature_time!(state, Data::Fields); 35 | let mut fields = self 36 | .visitor 37 | .borrow_mut() 38 | .fields() 39 | .into_iter() 40 | .filter(|inner| { 41 | let last = inner.last().as_ref().unwrap(); 42 | matches!(&crate::NodeType::Iterable, last) 43 | }) 44 | .collect::>(); 45 | if fields.len() == 0 { 46 | return Ok(MutationResult::Skipped); 47 | } 48 | let field_splice_index = self.visitor.borrow_mut().random_range(0, fields.len() - 1); 49 | let field = &fields[field_splice_index]; 50 | let ((id, node_ty), ty) = field.last().unwrap(); 51 | if let crate::NodeType::Iterable(is_fixed_len, field_len, inner_ty) = node_ty { 52 | if *is_fixed_len { 53 | return Ok(MutationResult::Skipped); 54 | } 55 | if let Some(possible_splices) = metadata.get_inputs_for_type(&inner_ty) { 56 | // calculate subsplice size 57 | let iter_count = self.visitor.borrow().iterate_depth(); 58 | let append_count = self.visitor.borrow_mut().random_range(1, iter_count); 59 | let path = VecDeque::from_iter(field.iter().map(|(i, ty)| i.0)); 60 | for _ in 0..append_count { 61 | let random_splice = possible_splices 62 | .get( 63 | self.visitor 64 | .borrow_mut() 65 | .random_range(0, possible_splices.len() - 1), 66 | ) 67 | .unwrap(); 68 | // TODO: cache this in memory 69 | let data = std::fs::read(random_splice).unwrap(); 70 | #[cfg(debug_assertions)] 71 | println!("splice | splice_append | {:?}", (&field, &path)); 72 | input.__autarkie_mutate( 73 | &mut crate::MutationType::SpliceAppend(&mut data.as_slice()), 74 | &mut self.visitor.borrow_mut(), 75 | path.clone(), 76 | ); 77 | } 78 | metadata.add_mutation(crate::fuzzer::context::MutationMetadata::SpliceAppend); 79 | return Ok(MutationResult::Mutated); 80 | } else { 81 | return Ok(MutationResult::Skipped); 82 | } 83 | } 84 | Ok(MutationResult::Skipped) 85 | } 86 | 87 | fn post_exec( 88 | &mut self, 89 | _state: &mut S, 90 | _new_corpus_id: Option, 91 | ) -> Result<(), libafl::Error> { 92 | Ok(()) 93 | } 94 | } 95 | 96 | impl Named for AutarkieSpliceAppendMutator { 97 | fn name(&self) -> &std::borrow::Cow<'static, str> { 98 | &Cow::Borrowed("AutarkieSpliceAppendMutator") 99 | } 100 | } 101 | impl AutarkieSpliceAppendMutator { 102 | pub fn new(visitor: Rc>) -> Self { 103 | Self { 104 | visitor, 105 | phantom: PhantomData, 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/binary_mutator.rs: -------------------------------------------------------------------------------- 1 | //! Stage that wraps mutating stages for stats and cleanup 2 | use crate::fuzzer::context::MutationMetadata; 3 | use crate::Visitor; 4 | use crate::{fuzzer::Context, Node}; 5 | use core::{marker::PhantomData, time::Duration}; 6 | use libafl::inputs::BytesInput; 7 | use libafl::mutators::MutatorsTuple; 8 | use libafl::state::HasRand; 9 | use libafl_bolts::rands::Rand; 10 | use libafl_bolts::{current_time, Error}; 11 | use std::cell::RefCell; 12 | use std::num::NonZero; 13 | use std::rc::Rc; 14 | 15 | use libafl::{ 16 | events::EventFirer, 17 | executors::Executor, 18 | mutators::{MutationResult, Mutator}, 19 | stages::{Restartable, Stage}, 20 | state::HasCurrentTestcase, 21 | Evaluator, HasMetadata, 22 | }; 23 | 24 | #[derive(Debug)] 25 | pub struct AutarkieBinaryMutatorStage { 26 | inner: M, 27 | stack: usize, 28 | mutation_ty: MutationMetadata, 29 | phantom: PhantomData<(I, S)>, 30 | } 31 | 32 | impl AutarkieBinaryMutatorStage { 33 | /// Create a `AutarkieBinaryMutatorStage` 34 | pub fn new(inner: M, stack: usize, mutation_ty: MutationMetadata) -> Self { 35 | Self { 36 | stack, 37 | inner, 38 | mutation_ty, 39 | phantom: PhantomData, 40 | } 41 | } 42 | } 43 | 44 | impl Stage for AutarkieBinaryMutatorStage 45 | where 46 | I: Node, 47 | E: Executor, 48 | Z: Evaluator, 49 | EM: EventFirer, 50 | S: HasMetadata + HasCurrentTestcase + HasRand, 51 | M: MutatorsTuple, S>, 52 | { 53 | fn perform( 54 | &mut self, 55 | fuzzer: &mut Z, 56 | executor: &mut E, 57 | state: &mut S, 58 | manager: &mut EM, 59 | ) -> Result<(), Error> { 60 | let mut metadata = state.metadata_mut::().expect("fxeZamEw____"); 61 | let mut input = crate::serialize(&state.current_input_cloned().unwrap()); 62 | let mut metadata = state.metadata_mut::().unwrap(); 63 | metadata.generated_input(); 64 | for _ in 0..self.stack { 65 | let mutation = state 66 | .rand_mut() 67 | .below(unsafe { NonZero::new(self.inner.len()).unwrap_unchecked() }) 68 | .into(); 69 | self.inner.get_and_mutate(mutation, state, &mut input); 70 | #[cfg(not(feature = "scale"))] 71 | let Some(deserialized) = crate::maybe_deserialize(&input) else { 72 | return Ok(()); 73 | }; 74 | #[cfg(feature = "scale")] 75 | let Some(deserialized) = crate::maybe_deserialize(&mut input.as_slice()) else { 76 | return Ok(()); 77 | }; 78 | let mut metadata = state.metadata_mut::().unwrap(); 79 | metadata.add_mutation(self.mutation_ty.clone()); 80 | fuzzer.evaluate_input(state, executor, manager, &deserialized)?; 81 | } 82 | let mut metadata = state.metadata_mut::().expect("fxeZamEw____"); 83 | metadata.default_input(); 84 | Ok(()) 85 | } 86 | } 87 | 88 | impl Restartable for AutarkieBinaryMutatorStage { 89 | fn should_restart(&mut self, state: &mut S) -> Result { 90 | Ok(true) 91 | } 92 | 93 | fn clear_progress(&mut self, state: &mut S) -> Result<(), Error> { 94 | Ok(()) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/cmp.rs: -------------------------------------------------------------------------------- 1 | use crate::{MutationType, Node, Visitor}; 2 | use libafl::{ 3 | corpus::Corpus, 4 | events::EventFirer, 5 | executors::{Executor, HasObservers}, 6 | observers::{AFLppCmpValuesMetadata, CmpValues, ObserversTuple}, 7 | stages::{Restartable, Stage}, 8 | state::HasCurrentTestcase, 9 | Evaluator, HasMetadata, 10 | }; 11 | use libafl_bolts::{ 12 | tuples::{Handle, MatchNameRef}, 13 | AsSlice, 14 | }; 15 | use libafl_targets::AFLppCmpLogObserver; 16 | use serde::Serialize; 17 | use std::{ 18 | cell::RefCell, 19 | collections::{HashSet, VecDeque}, 20 | marker::PhantomData, 21 | rc::Rc, 22 | }; 23 | 24 | use crate::fuzzer::Context; 25 | 26 | #[derive(Debug)] 27 | pub struct CmpLogStage<'a, TE, I> { 28 | visitor: Rc>, 29 | tracer_executor: TE, 30 | cmplog_observer_handle: Handle>, 31 | phantom: PhantomData, 32 | } 33 | 34 | impl<'a, TE, I> CmpLogStage<'a, TE, I> { 35 | pub fn new( 36 | visitor: Rc>, 37 | tracer_executor: TE, 38 | cmplog_observer_handle: Handle>, 39 | ) -> Self { 40 | Self { 41 | cmplog_observer_handle, 42 | tracer_executor, 43 | visitor, 44 | phantom: PhantomData, 45 | } 46 | } 47 | } 48 | 49 | impl Stage for CmpLogStage<'_, TE, I> 50 | where 51 | I: Node + Serialize + Clone, 52 | S: HasCurrentTestcase + HasMetadata, 53 | E: Executor, 54 | EM: EventFirer, 55 | TE: Executor + HasObservers, 56 | TE::Observers: MatchNameRef + ObserversTuple, 57 | Z: Evaluator, 58 | { 59 | fn perform( 60 | &mut self, 61 | fuzzer: &mut Z, 62 | executor: &mut E, 63 | state: &mut S, 64 | manager: &mut EM, 65 | ) -> Result<(), libafl_bolts::Error> { 66 | if state.current_testcase().unwrap().scheduled_count() > 1 { 67 | return Ok(()); 68 | } 69 | 70 | let unmutated_input = state.current_input_cloned()?; 71 | 72 | let mut obs = self.tracer_executor.observers_mut(); 73 | let ob = obs.get_mut(&self.cmplog_observer_handle).unwrap(); 74 | ob.set_original(true); 75 | self.tracer_executor 76 | .observers_mut() 77 | .pre_exec_all(state, &unmutated_input)?; 78 | 79 | let exit_kind = 80 | self.tracer_executor 81 | .run_target(fuzzer, state, manager, &unmutated_input)?; 82 | self.tracer_executor 83 | .observers_mut() 84 | .post_exec_all(state, &unmutated_input, &exit_kind)?; 85 | 86 | let mut reduced = HashSet::new(); 87 | if let Ok(data) = state.metadata::() { 88 | for item in data.orig_cmpvals().values() { 89 | for i in item.iter() { 90 | match i { 91 | CmpValues::U16((left, right, is_const)) => { 92 | reduced.insert((*left as u64, *right as u64)); 93 | reduced.insert((*right as u64, *left as u64)); 94 | } 95 | CmpValues::U32((left, right, is_const)) => { 96 | reduced.insert((*left as u64, *right as u64)); 97 | reduced.insert((*right as u64, *left as u64)); 98 | } 99 | CmpValues::U64((left, right, is_const)) => { 100 | reduced.insert((*left, *right)); 101 | reduced.insert((*right, *left)); 102 | } 103 | CmpValues::Bytes((left, right)) => { 104 | // TODO 105 | } 106 | // ignore U8 107 | CmpValues::U8(_) => {} 108 | } 109 | } 110 | } 111 | } 112 | 113 | let metadata = state 114 | .metadata_mut::() 115 | .expect("we must have context!"); 116 | 117 | for cmp in reduced { 118 | unmutated_input.__autarkie_cmps(&mut self.visitor.borrow_mut(), 0, cmp); 119 | let matches = self.visitor.borrow_mut().cmps(); 120 | for path in matches { 121 | let cmp_path = path.0.iter().map(|(i, ty)| i.0).collect::>(); 122 | let mut serialized_alternative = path.1.as_slice(); 123 | let mut input = unmutated_input.clone(); 124 | let before = crate::serialize(&input); 125 | #[cfg(debug_assertions)] 126 | println!("cmplog_splice | one | {:?}", path.0); 127 | input.__autarkie_mutate( 128 | &mut MutationType::Splice(&mut serialized_alternative), 129 | &mut self.visitor.borrow_mut(), 130 | cmp_path, 131 | ); 132 | let res = fuzzer.evaluate_input(state, executor, manager, &input)?; 133 | } 134 | } 135 | 136 | // walk all fields in the input and capture the paths where reduced is present and store 137 | // those paths as potentially interesting. 138 | Ok(()) 139 | } 140 | } 141 | 142 | impl<'a, TE, I, S> Restartable for CmpLogStage<'a, TE, I> { 143 | fn should_restart(&mut self, state: &mut S) -> Result { 144 | Ok(true) 145 | } 146 | 147 | fn clear_progress(&mut self, state: &mut S) -> Result<(), libafl::Error> { 148 | Ok(()) 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/generate.rs: -------------------------------------------------------------------------------- 1 | use crate::{fuzzer::context::Context, Node, Visitor}; 2 | use libafl::{ 3 | corpus::Corpus, 4 | events::EventFirer, 5 | executors::Executor, 6 | stages::{Restartable, Stage}, 7 | state::{HasCorpus, HasCurrentTestcase}, 8 | Evaluator, HasMetadata, 9 | }; 10 | use serde::Serialize; 11 | use std::{cell::RefCell, marker::PhantomData, rc::Rc}; 12 | 13 | #[derive(Debug)] 14 | pub struct GenerateStage { 15 | visitor: Rc>, 16 | phantom: PhantomData, 17 | } 18 | 19 | impl GenerateStage { 20 | pub fn new(visitor: Rc>) -> Self { 21 | Self { 22 | visitor, 23 | phantom: PhantomData, 24 | } 25 | } 26 | } 27 | 28 | impl Stage for GenerateStage 29 | where 30 | I: Node + Serialize, 31 | S: HasCurrentTestcase + HasCorpus + HasMetadata, 32 | E: Executor, 33 | EM: EventFirer, 34 | Z: Evaluator, 35 | { 36 | fn perform( 37 | &mut self, 38 | fuzzer: &mut Z, 39 | executor: &mut E, 40 | state: &mut S, 41 | manager: &mut EM, 42 | ) -> Result<(), libafl_bolts::Error> { 43 | let mut metadata = state.metadata_mut::()?; 44 | metadata.generated_input(); 45 | let Some(generated) = generate(&mut self.visitor.borrow_mut()) else { 46 | metadata.default_input(); 47 | return Ok(()); 48 | }; 49 | metadata.add_mutation(crate::fuzzer::context::MutationMetadata::Generate); 50 | fuzzer.evaluate_input(state, executor, manager, &generated)?; 51 | let mut metadata = state.metadata_mut::()?; 52 | metadata.default_input(); 53 | Ok(()) 54 | } 55 | } 56 | 57 | pub fn generate(visitor: &mut Visitor) -> Option 58 | where 59 | I: Node, 60 | { 61 | I::__autarkie_generate(visitor, &mut visitor.generate_depth(), &mut 0) 62 | } 63 | 64 | impl Restartable for GenerateStage { 65 | fn should_restart(&mut self, state: &mut S) -> Result { 66 | Ok(true) 67 | } 68 | 69 | fn clear_progress(&mut self, state: &mut S) -> Result<(), libafl::Error> { 70 | Ok(()) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/libfuzzer_cmp.rs: -------------------------------------------------------------------------------- 1 | use crate::{MutationType, Node, Visitor}; 2 | use libafl::{ 3 | corpus::Corpus, 4 | events::EventFirer, 5 | executors::{Executor, HasObservers}, 6 | observers::{AFLppCmpValuesMetadata, CmpValues, CmpValuesMetadata, ObserversTuple}, 7 | stages::{Restartable, Stage}, 8 | state::HasCurrentTestcase, 9 | Evaluator, HasMetadata, 10 | }; 11 | use libafl_bolts::{ 12 | tuples::{Handle, MatchNameRef}, 13 | AsSlice, 14 | }; 15 | use serde::Serialize; 16 | use std::{ 17 | cell::RefCell, 18 | collections::{HashSet, VecDeque}, 19 | marker::PhantomData, 20 | rc::Rc, 21 | }; 22 | 23 | use crate::fuzzer::Context; 24 | 25 | #[derive(Debug)] 26 | pub struct LibfuzzerCmplogStage { 27 | visitor: Rc>, 28 | phantom: PhantomData, 29 | } 30 | 31 | impl LibfuzzerCmplogStage { 32 | pub fn new(visitor: Rc>) -> Self { 33 | Self { 34 | visitor, 35 | phantom: PhantomData, 36 | } 37 | } 38 | } 39 | 40 | impl Stage for LibfuzzerCmplogStage 41 | where 42 | I: Node + Serialize + Clone, 43 | S: HasCurrentTestcase + HasMetadata, 44 | E: Executor, 45 | EM: EventFirer, 46 | Z: Evaluator, 47 | { 48 | fn perform( 49 | &mut self, 50 | fuzzer: &mut Z, 51 | executor: &mut E, 52 | state: &mut S, 53 | manager: &mut EM, 54 | ) -> Result<(), libafl_bolts::Error> { 55 | if state.current_testcase().unwrap().scheduled_count() > 1 { 56 | return Ok(()); 57 | } 58 | let unmutated_input = state.current_input_cloned()?; 59 | let mut reduced = HashSet::new(); 60 | if let Ok(data) = state.metadata::() { 61 | for i in data.list.clone() { 62 | match i { 63 | CmpValues::U16((left, right, is_const)) => { 64 | reduced.insert((left as u64, right as u64)); 65 | reduced.insert((right as u64, left as u64)); 66 | } 67 | CmpValues::U32((left, right, is_const)) => { 68 | reduced.insert((left as u64, right as u64)); 69 | reduced.insert((right as u64, left as u64)); 70 | } 71 | CmpValues::U64((left, right, is_const)) => { 72 | reduced.insert((left, right)); 73 | reduced.insert((right, left)); 74 | } 75 | CmpValues::Bytes((left, right)) => { 76 | if left.as_slice() 77 | != [ 78 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80 | ] 81 | && right.as_slice() != left.as_slice() 82 | { 83 | // TODO 84 | } 85 | } 86 | // ignore U8 87 | CmpValues::U8(_) => {} 88 | } 89 | } 90 | } 91 | 92 | let metadata = state 93 | .metadata_mut::() 94 | .expect("we must have context!"); 95 | metadata.generated_input(); 96 | for cmp in reduced { 97 | unmutated_input.__autarkie_cmps(&mut self.visitor.borrow_mut(), 0, cmp); 98 | let matches = self.visitor.borrow_mut().cmps(); 99 | for path in matches { 100 | let cmp_path = path.0.iter().map(|(i, ty)| i.0).collect::>(); 101 | let mut serialized_alternative = path.1.as_slice(); 102 | let mut input = unmutated_input.clone(); 103 | let before = crate::serialize(&input); 104 | #[cfg(debug_assertions)] 105 | println!("cmplog_splice | one | {:?}", path.0); 106 | input.__autarkie_mutate( 107 | &mut MutationType::Splice(&mut serialized_alternative), 108 | &mut self.visitor.borrow_mut(), 109 | cmp_path, 110 | ); 111 | 112 | let metadata = state 113 | .metadata_mut::() 114 | .expect("we must have context!"); 115 | metadata.add_mutation(crate::fuzzer::context::MutationMetadata::Cmplog); 116 | let res = fuzzer.evaluate_input(state, executor, manager, &input)?; 117 | } 118 | } 119 | let metadata = state 120 | .metadata_mut::() 121 | .expect("we must have context!"); 122 | metadata.default_input(); 123 | 124 | // walk all fields in the input and capture the paths where reduced is present and store 125 | // those paths as potentially interesting. 126 | Ok(()) 127 | } 128 | } 129 | 130 | impl Restartable for LibfuzzerCmplogStage { 131 | fn should_restart(&mut self, state: &mut S) -> Result { 132 | Ok(true) 133 | } 134 | 135 | fn clear_progress(&mut self, state: &mut S) -> Result<(), libafl::Error> { 136 | Ok(()) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/minimization.rs: -------------------------------------------------------------------------------- 1 | use crate::{fuzzer::stages::stats::AutarkieStats, MutationType, Node, NodeType, Visitor}; 2 | use libafl::{ 3 | corpus::Corpus, 4 | events::EventFirer, 5 | executors::{Executor, HasObservers}, 6 | feedbacks::{HasObserverHandle, MapIndexesMetadata, MapNoveltiesMetadata}, 7 | observers::{CanTrack, MapObserver, ObserversTuple}, 8 | stages::{Restartable, Stage}, 9 | state::{HasCorpus, HasCurrentTestcase}, 10 | Evaluator, HasMetadata, 11 | }; 12 | use libafl_bolts::{tuples::Handle, AsIter, Named}; 13 | use num_traits::Bounded; 14 | use serde::{Deserialize, Serialize}; 15 | use std::{ 16 | borrow::{Borrow, Cow}, 17 | cell::RefCell, 18 | collections::{HashMap, HashSet, VecDeque}, 19 | fmt::Debug, 20 | marker::PhantomData, 21 | rc::Rc, 22 | }; 23 | 24 | use crate::fuzzer::Context; 25 | 26 | #[derive(Debug)] 27 | pub struct MinimizationStage { 28 | map_observer_handle: Handle, 29 | map_name: Cow<'static, str>, 30 | visitor: Rc>, 31 | phantom: PhantomData<(E, O, OT, S, I)>, 32 | } 33 | 34 | impl MinimizationStage 35 | where 36 | O: MapObserver, 37 | for<'it> O: AsIter<'it, Item = O::Entry>, 38 | C: AsRef + CanTrack, 39 | OT: ObserversTuple, 40 | { 41 | pub fn new(visitor: Rc>, map_feedback: &F) -> Self 42 | where 43 | F: HasObserverHandle + Named, 44 | { 45 | let map_name = map_feedback.name().clone(); 46 | Self { 47 | map_observer_handle: map_feedback.observer_handle().clone(), 48 | map_name: map_name.clone(), 49 | visitor, 50 | phantom: PhantomData, 51 | } 52 | } 53 | } 54 | 55 | impl Stage for MinimizationStage 56 | where 57 | I: Node + Serialize + Clone, 58 | S: HasCurrentTestcase + HasCorpus + HasMetadata, 59 | E: Executor + HasObservers, 60 | EM: EventFirer, 61 | Z: Evaluator, 62 | O: MapObserver, 63 | C: AsRef + CanTrack, 64 | for<'de> ::Entry: 65 | Serialize + Deserialize<'de> + 'static + Default + Debug + Bounded, 66 | OT: ObserversTuple, 67 | { 68 | fn perform( 69 | &mut self, 70 | fuzzer: &mut Z, 71 | executor: &mut E, 72 | state: &mut S, 73 | manager: &mut EM, 74 | ) -> Result<(), libafl_bolts::Error> { 75 | if state.current_testcase()?.scheduled_count() > 0 { 76 | return Ok(()); 77 | } 78 | 79 | let metadata = state.metadata::().unwrap(); 80 | let novelties = state 81 | .current_testcase() 82 | .unwrap() 83 | .borrow() 84 | .metadata::() 85 | .unwrap() 86 | .list 87 | .clone(); 88 | let mut current = state.current_input_cloned().unwrap(); 89 | current.__autarkie_fields(&mut self.visitor.borrow_mut(), 0); 90 | let mut skip = 0; 91 | let mut fields = self.visitor.borrow_mut().fields(); 92 | shuffle(&mut fields, &mut self.visitor.borrow_mut()); 93 | let mut found = false; 94 | loop { 95 | let field = fields.pop(); 96 | if field.is_none() { 97 | break; 98 | } 99 | let field = field.unwrap(); 100 | let ((id, node_ty), ty) = field.last().unwrap(); 101 | if let NodeType::Iterable(is_fixed_len, field_len, inner_ty) = node_ty { 102 | let path = VecDeque::from_iter(field.iter().map(|(i, ty)| i.0)); 103 | // NOTE: -1 because we zero index 104 | let mut len = field_len.saturating_sub(1); 105 | let mut counter = 0; 106 | if *is_fixed_len { 107 | continue; 108 | } 109 | loop { 110 | if len == 0 || counter >= len { 111 | break; 112 | } 113 | let mut inner = current.clone(); 114 | inner.__autarkie_mutate( 115 | &mut MutationType::IterablePop(counter), 116 | &mut self.visitor.borrow_mut(), 117 | path.clone(), 118 | ); 119 | let run = fuzzer.evaluate_input(state, executor, manager, &inner)?; 120 | let map = &executor.observers()[&self.map_observer_handle] 121 | .as_ref() 122 | .how_many_set(&novelties); 123 | if *map == novelties.len() { 124 | found = true; 125 | current = inner; 126 | current.__autarkie_fields(&mut self.visitor.borrow_mut(), 0); 127 | fields = self.visitor.borrow_mut().fields(); 128 | len = len.saturating_sub(1); 129 | } 130 | counter += 1; 131 | } 132 | } 133 | } 134 | state.current_testcase_mut()?.set_input(current.clone()); 135 | if found { 136 | let metadata = state 137 | .metadata_mut::() 138 | .unwrap() 139 | .add_new_input_mutation( 140 | crate::fuzzer::context::MutationMetadata::IterableMinimization, 141 | ); 142 | } 143 | Ok(()) 144 | } 145 | } 146 | impl Restartable for MinimizationStage { 147 | fn should_restart(&mut self, state: &mut S) -> Result { 148 | Ok(true) 149 | } 150 | 151 | fn clear_progress(&mut self, state: &mut S) -> Result<(), libafl::Error> { 152 | Ok(()) 153 | } 154 | } 155 | 156 | pub fn shuffle(data: &mut [T], v: &mut Visitor) { 157 | let n: usize = data.len(); 158 | for i in (1..data.len()).rev() { 159 | let j = v.random_range(0, (i)); 160 | data.swap(i, j); 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod binary_mutator; 2 | #[cfg(feature = "afl")] 3 | pub mod cmp; 4 | pub mod generate; 5 | #[cfg(feature = "libfuzzer")] 6 | pub mod libfuzzer_cmp; 7 | pub mod minimization; 8 | pub mod mutating; 9 | pub mod mutational; 10 | pub mod novelty_minimization; 11 | pub mod recursive_minimization; 12 | pub mod stats; 13 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/mutating.rs: -------------------------------------------------------------------------------- 1 | //! Stage that wraps mutating stages for stats and cleanup 2 | use crate::fuzzer::Context; 3 | use crate::Visitor; 4 | use core::{marker::PhantomData, time::Duration}; 5 | use libafl_bolts::{current_time, Error}; 6 | use std::cell::RefCell; 7 | use std::rc::Rc; 8 | 9 | use libafl::{ 10 | stages::{Restartable, Stage}, 11 | HasMetadata, 12 | }; 13 | /// Track an inner Stage's execution time 14 | #[derive(Debug)] 15 | pub struct MutatingStageWrapper { 16 | inner: ST, 17 | visitor: Rc>, 18 | phantom: PhantomData, 19 | } 20 | 21 | impl MutatingStageWrapper { 22 | /// Create a `MutatingStageWrapper` 23 | pub fn new(inner: ST, visitor: Rc>) -> Self { 24 | Self { 25 | inner, 26 | visitor, 27 | phantom: PhantomData, 28 | } 29 | } 30 | } 31 | 32 | impl Stage for MutatingStageWrapper 33 | where 34 | S: HasMetadata, 35 | ST: Stage, 36 | { 37 | fn perform( 38 | &mut self, 39 | fuzzer: &mut Z, 40 | executor: &mut E, 41 | state: &mut S, 42 | manager: &mut M, 43 | ) -> Result<(), Error> { 44 | self.inner.perform(fuzzer, executor, state, manager)?; 45 | let _ = state.metadata_mut::().unwrap().clear_mutations(); 46 | let _ = self.visitor.borrow_mut().serialized(); 47 | Ok(()) 48 | } 49 | } 50 | 51 | impl Restartable for MutatingStageWrapper 52 | where 53 | ST: Restartable, 54 | { 55 | fn should_restart(&mut self, state: &mut S) -> Result { 56 | self.inner.should_restart(state) 57 | } 58 | 59 | fn clear_progress(&mut self, state: &mut S) -> Result<(), Error> { 60 | self.inner.clear_progress(state) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/mutational.rs: -------------------------------------------------------------------------------- 1 | //! Stage that wraps mutating stages for stats and cleanup 2 | use crate::fuzzer::Context; 3 | use crate::Visitor; 4 | use core::{marker::PhantomData, time::Duration}; 5 | use libafl_bolts::{current_time, Error}; 6 | use std::cell::RefCell; 7 | use std::rc::Rc; 8 | 9 | use libafl::{ 10 | events::EventFirer, 11 | executors::Executor, 12 | mutators::{MutationResult, Mutator}, 13 | stages::{Restartable, Stage}, 14 | state::HasCurrentTestcase, 15 | Evaluator, HasMetadata, 16 | }; 17 | 18 | #[derive(Debug)] 19 | pub struct AutarkieMutationalStage { 20 | inner: M, 21 | stack: usize, 22 | phantom: PhantomData<(I, S)>, 23 | } 24 | 25 | impl AutarkieMutationalStage { 26 | /// Create a `AutarkieMutationalStage` 27 | pub fn new(inner: M, stack: usize) -> Self { 28 | Self { 29 | inner, 30 | stack, 31 | phantom: PhantomData, 32 | } 33 | } 34 | } 35 | 36 | impl Stage for AutarkieMutationalStage 37 | where 38 | E: Executor, 39 | Z: Evaluator, 40 | EM: EventFirer, 41 | S: HasMetadata + HasCurrentTestcase, 42 | M: Mutator, 43 | { 44 | fn perform( 45 | &mut self, 46 | fuzzer: &mut Z, 47 | executor: &mut E, 48 | state: &mut S, 49 | manager: &mut EM, 50 | ) -> Result<(), Error> { 51 | let mut current = state.current_input_cloned().unwrap(); 52 | for i in 0..self.stack { 53 | if self.inner.mutate(state, &mut current)? == MutationResult::Mutated { 54 | fuzzer.evaluate_input(state, executor, manager, ¤t)?; 55 | } 56 | } 57 | Ok(()) 58 | } 59 | } 60 | 61 | impl Restartable for AutarkieMutationalStage { 62 | fn should_restart(&mut self, state: &mut S) -> Result { 63 | Ok(true) 64 | } 65 | 66 | fn clear_progress(&mut self, state: &mut S) -> Result<(), Error> { 67 | Ok(()) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/novelty_minimization.rs: -------------------------------------------------------------------------------- 1 | use crate::{fuzzer::stages::stats::AutarkieStats, MutationType, Node, NodeType, Visitor}; 2 | use libafl::{ 3 | corpus::Corpus, 4 | events::EventFirer, 5 | executors::{Executor, HasObservers}, 6 | feedbacks::{HasObserverHandle, MapIndexesMetadata, MapNoveltiesMetadata}, 7 | observers::{CanTrack, MapObserver, ObserversTuple}, 8 | stages::{Restartable, Stage}, 9 | state::{HasCorpus, HasCurrentTestcase}, 10 | Evaluator, HasMetadata, 11 | }; 12 | use libafl_bolts::{tuples::Handle, AsIter, Named}; 13 | use num_traits::Bounded; 14 | use serde::{Deserialize, Serialize}; 15 | use std::{ 16 | borrow::{Borrow, Cow}, 17 | cell::RefCell, 18 | collections::{HashMap, HashSet, VecDeque}, 19 | fmt::Debug, 20 | marker::PhantomData, 21 | rc::Rc, 22 | }; 23 | 24 | use crate::fuzzer::Context; 25 | 26 | #[derive(Debug)] 27 | pub struct NoveltyMinimizationStage { 28 | map_observer_handle: Handle, 29 | map_name: Cow<'static, str>, 30 | visitor: Rc>, 31 | phantom: PhantomData<(E, O, OT, S, I)>, 32 | } 33 | 34 | impl NoveltyMinimizationStage 35 | where 36 | O: MapObserver, 37 | for<'it> O: AsIter<'it, Item = O::Entry>, 38 | O::Entry: 'static + Default + Debug + serde::de::DeserializeOwned + serde::Serialize, 39 | C: AsRef + CanTrack, 40 | OT: ObserversTuple, 41 | { 42 | pub fn new(visitor: Rc>, map_feedback: &F) -> Self 43 | where 44 | F: HasObserverHandle + Named, 45 | { 46 | let map_name = map_feedback.name().clone(); 47 | Self { 48 | map_observer_handle: map_feedback.observer_handle().clone(), 49 | map_name: map_name.clone(), 50 | visitor, 51 | phantom: PhantomData, 52 | } 53 | } 54 | } 55 | 56 | impl Stage for NoveltyMinimizationStage 57 | where 58 | I: Node + Serialize + Clone, 59 | S: HasCurrentTestcase + HasCorpus + HasMetadata, 60 | E: Executor + HasObservers, 61 | EM: EventFirer, 62 | Z: Evaluator, 63 | O: MapObserver, 64 | C: AsRef + CanTrack, 65 | for<'de> ::Entry: 66 | Serialize + Deserialize<'de> + 'static + Default + Debug + Bounded, 67 | OT: ObserversTuple, 68 | { 69 | fn perform( 70 | &mut self, 71 | fuzzer: &mut Z, 72 | executor: &mut E, 73 | state: &mut S, 74 | manager: &mut EM, 75 | ) -> Result<(), libafl_bolts::Error> { 76 | if state.current_testcase()?.scheduled_count() > 0 { 77 | return Ok(()); 78 | } 79 | 80 | let metadata = state.metadata::().unwrap(); 81 | let novelties = state 82 | .current_testcase() 83 | .unwrap() 84 | .borrow() 85 | .metadata::() 86 | .unwrap() 87 | .list 88 | .clone(); 89 | let mut current = state.current_input_cloned().unwrap(); 90 | current.__autarkie_fields(&mut self.visitor.borrow_mut(), 0); 91 | let mut skip = 0; 92 | let mut fields = self.visitor.borrow_mut().fields(); 93 | let mut found = false; 94 | loop { 95 | let field = fields.pop(); 96 | if field.is_none() { 97 | break; 98 | } 99 | let field = field.unwrap(); 100 | let ((id, node_ty), ty) = field.last().unwrap(); 101 | if let NodeType::Iterable(is_fixed_len, field_len, inner_ty) = node_ty { 102 | let path = VecDeque::from_iter(field.iter().map(|(i, ty)| i.0)); 103 | // NOTE: -1 because we zero index 104 | let mut len = field_len.saturating_sub(1); 105 | let mut counter = 0; 106 | if *is_fixed_len { 107 | continue; 108 | } 109 | loop { 110 | if len == 0 || counter >= len { 111 | break; 112 | } 113 | let mut inner = current.clone(); 114 | inner.__autarkie_mutate( 115 | &mut MutationType::IterablePop(counter), 116 | &mut self.visitor.borrow_mut(), 117 | path.clone(), 118 | ); 119 | let run = fuzzer.evaluate_input(state, executor, manager, &inner)?; 120 | let map = &executor.observers()[&self.map_observer_handle] 121 | .as_ref() 122 | .how_many_set(&novelties); 123 | if *map == novelties.len() { 124 | found = true; 125 | current = inner; 126 | current.__autarkie_fields(&mut self.visitor.borrow_mut(), 0); 127 | fields = self.visitor.borrow_mut().fields(); 128 | len = len.saturating_sub(1); 129 | } 130 | counter += 1; 131 | } 132 | } 133 | } 134 | if found { 135 | let metadata = state 136 | .metadata_mut::() 137 | .unwrap() 138 | .add_new_input_mutation( 139 | crate::fuzzer::context::MutationMetadata::NoveltyMinimization, 140 | ); 141 | fuzzer.add_input(state, executor, manager, current)?; 142 | } 143 | Ok(()) 144 | } 145 | } 146 | impl Restartable for NoveltyMinimizationStage { 147 | fn should_restart(&mut self, state: &mut S) -> Result { 148 | Ok(true) 149 | } 150 | 151 | fn clear_progress(&mut self, state: &mut S) -> Result<(), libafl::Error> { 152 | Ok(()) 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/recursive_minimization.rs: -------------------------------------------------------------------------------- 1 | use crate::{MutationType, Node, NodeType, Visitor}; 2 | use libafl::{ 3 | corpus::Corpus, 4 | events::EventFirer, 5 | executors::{Executor, HasObservers}, 6 | feedbacks::{HasObserverHandle, MapIndexesMetadata}, 7 | observers::{CanTrack, MapObserver, ObserversTuple}, 8 | stages::{Restartable, Stage}, 9 | state::{HasCorpus, HasCurrentTestcase}, 10 | Evaluator, HasMetadata, 11 | }; 12 | use libafl_bolts::{tuples::Handle, AsIter, Named}; 13 | use num_traits::Bounded; 14 | use serde::{Deserialize, Serialize}; 15 | use std::{ 16 | borrow::{Borrow, Cow}, 17 | cell::RefCell, 18 | collections::{HashMap, HashSet, VecDeque}, 19 | fmt::Debug, 20 | marker::PhantomData, 21 | rc::Rc, 22 | }; 23 | 24 | use crate::fuzzer::Context; 25 | 26 | use super::stats::AutarkieStats; 27 | 28 | #[derive(Debug)] 29 | pub struct RecursiveMinimizationStage { 30 | map_observer_handle: Handle, 31 | map_name: Cow<'static, str>, 32 | visitor: Rc>, 33 | phantom: PhantomData<(E, O, OT, S, I)>, 34 | } 35 | 36 | impl RecursiveMinimizationStage 37 | where 38 | O: MapObserver, 39 | for<'it> O: AsIter<'it, Item = O::Entry>, 40 | C: AsRef + CanTrack, 41 | OT: ObserversTuple, 42 | { 43 | pub fn new(visitor: Rc>, map_feedback: &F) -> Self 44 | where 45 | F: HasObserverHandle + Named, 46 | { 47 | let map_name = map_feedback.name().clone(); 48 | Self { 49 | map_observer_handle: map_feedback.observer_handle().clone(), 50 | map_name: map_name.clone(), 51 | visitor, 52 | phantom: PhantomData, 53 | } 54 | } 55 | } 56 | 57 | impl Stage for RecursiveMinimizationStage 58 | where 59 | I: Node + Serialize + Clone, 60 | S: HasCurrentTestcase + HasCorpus + HasMetadata, 61 | E: Executor + HasObservers, 62 | Z: Evaluator, 63 | EM: EventFirer, 64 | O: MapObserver, 65 | C: AsRef + CanTrack, 66 | for<'de> ::Entry: 67 | Serialize + Deserialize<'de> + 'static + Default + Debug + Bounded, 68 | OT: ObserversTuple, 69 | { 70 | fn perform( 71 | &mut self, 72 | fuzzer: &mut Z, 73 | executor: &mut E, 74 | state: &mut S, 75 | manager: &mut EM, 76 | ) -> Result<(), libafl_bolts::Error> { 77 | if state.current_testcase()?.scheduled_count() > 0 { 78 | return Ok(()); 79 | } 80 | let metadata = state.metadata::().unwrap(); 81 | let indexes = state 82 | .current_testcase() 83 | .unwrap() 84 | .borrow() 85 | .metadata::() 86 | .unwrap() 87 | .list 88 | .clone(); 89 | 90 | let mut current = state.current_input_cloned().unwrap(); 91 | current.__autarkie_fields(&mut self.visitor.borrow_mut(), 0); 92 | let mut fields = self.visitor.borrow_mut().fields(); 93 | 94 | let mut skip = 0; 95 | let mut found = false; 96 | let mut cur_iter = 0; 97 | 98 | loop { 99 | let field = fields.pop(); 100 | if field.is_none() { 101 | break; 102 | } 103 | let field = field.unwrap(); 104 | let ((id, node_ty), ty) = field.last().unwrap(); 105 | if let NodeType::Recursive = node_ty { 106 | if cur_iter < skip { 107 | cur_iter += 1; 108 | continue; 109 | } 110 | let path = VecDeque::from_iter(field.iter().map(|(i, ty)| i.0)); 111 | let mut inner = current.clone(); 112 | // We are only trying to replace with one non recursive variant (maybe try to replace with ALL possible non recursive varaints?) 113 | inner.__autarkie_mutate( 114 | &mut MutationType::RecursiveReplace, 115 | &mut self.visitor.borrow_mut(), 116 | path.clone(), 117 | ); 118 | let run = fuzzer.evaluate_input(state, executor, manager, &inner)?; 119 | let map = &executor.observers()[&self.map_observer_handle] 120 | .as_ref() 121 | .to_vec(); 122 | let map = map 123 | .into_iter() 124 | .enumerate() 125 | .filter(|i| i.1 != &O::Entry::default()) 126 | .map(|i| i.0) 127 | .collect::>(); 128 | if map == indexes { 129 | found = true; 130 | cur_iter = 0; 131 | current = inner; 132 | current.__autarkie_fields(&mut self.visitor.borrow_mut(), 0); 133 | fields = self.visitor.borrow_mut().fields(); 134 | } else { 135 | skip += 1; 136 | } 137 | cur_iter += 1; 138 | } 139 | } 140 | if found { 141 | let metadata = state 142 | .metadata_mut::() 143 | .unwrap() 144 | .add_new_input_mutation( 145 | crate::fuzzer::context::MutationMetadata::RecursiveMinimization, 146 | ); 147 | } 148 | state.current_testcase_mut()?.set_input(current); 149 | Ok(()) 150 | } 151 | } 152 | 153 | impl Restartable for RecursiveMinimizationStage { 154 | fn should_restart(&mut self, state: &mut S) -> Result { 155 | Ok(true) 156 | } 157 | 158 | fn clear_progress(&mut self, state: &mut S) -> Result<(), libafl::Error> { 159 | Ok(()) 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /autarkie/src/fuzzer/stages/stats.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | fuzzer::context::{Context, MutationMetadata}, 3 | Node, Visitor, 4 | }; 5 | use libafl::{ 6 | corpus::Corpus, 7 | events::EventFirer, 8 | executors::Executor, 9 | stages::{Restartable, Stage}, 10 | state::{HasCorpus, HasCurrentTestcase}, 11 | Evaluator, HasMetadata, 12 | }; 13 | use serde::Serialize; 14 | use std::{ 15 | cell::RefCell, 16 | collections::{BTreeMap, HashSet}, 17 | marker::PhantomData, 18 | path::PathBuf, 19 | rc::Rc, 20 | time::{Duration, Instant}, 21 | }; 22 | 23 | #[derive(Debug)] 24 | pub struct StatsStage { 25 | last_run: Instant, 26 | out_dir: PathBuf, 27 | phantom: PhantomData, 28 | } 29 | 30 | impl StatsStage { 31 | pub fn new(out_dir: PathBuf) -> Self { 32 | Self { 33 | last_run: Instant::now(), 34 | out_dir, 35 | phantom: PhantomData, 36 | } 37 | } 38 | } 39 | 40 | impl Stage for StatsStage 41 | where 42 | I: Node + Serialize, 43 | S: HasCurrentTestcase + HasCorpus + HasMetadata, 44 | E: Executor, 45 | EM: EventFirer, 46 | Z: Evaluator, 47 | { 48 | fn perform( 49 | &mut self, 50 | fuzzer: &mut Z, 51 | executor: &mut E, 52 | state: &mut S, 53 | manager: &mut EM, 54 | ) -> Result<(), libafl_bolts::Error> { 55 | if Instant::now() - self.last_run > Duration::from_secs(5) { 56 | let mut metadata = state.metadata_mut::()?; 57 | std::fs::write( 58 | self.out_dir.join("stats.json"), 59 | serde_json::to_string_pretty(&metadata).expect("____YR5BenN6"), 60 | ) 61 | .expect("____weNooV3S"); 62 | self.last_run = Instant::now(); 63 | } 64 | Ok(()) 65 | } 66 | } 67 | 68 | impl Restartable for StatsStage { 69 | fn should_restart(&mut self, state: &mut S) -> Result { 70 | Ok(true) 71 | } 72 | 73 | fn clear_progress(&mut self, state: &mut S) -> Result<(), libafl::Error> { 74 | Ok(()) 75 | } 76 | } 77 | 78 | #[derive(Debug, Clone, serde::Serialize, serde::Deserialize, libafl::SerdeAny, Default)] 79 | pub struct AutarkieStats { 80 | mutations: BTreeMap, 81 | } 82 | 83 | impl AutarkieStats { 84 | pub fn add_new_input_mutations(&mut self, mutations: HashSet) { 85 | for m in mutations { 86 | self.mutations 87 | .entry(m) 88 | .and_modify(|v| { 89 | *v += 1; 90 | }) 91 | .or_insert(1); 92 | } 93 | } 94 | pub fn add_new_input_mutation(&mut self, m: MutationMetadata) { 95 | self.mutations 96 | .entry(m) 97 | .and_modify(|v| { 98 | *v += 1; 99 | }) 100 | .or_insert(1); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /autarkie/src/graph.rs: -------------------------------------------------------------------------------- 1 | use petgraph::graphmap::GraphMap; 2 | use petgraph::visit::IntoNeighbors; 3 | use petgraph::Directed; 4 | use std::collections::HashSet; 5 | use std::fmt::Debug; 6 | 7 | /// Find cycles in type tree to find which types are recursive 8 | pub fn find_cycles( 9 | graph: &GraphMap, 10 | ) -> HashSet> { 11 | let mut cycles = HashSet::new(); 12 | let mut visited = HashSet::new(); 13 | let mut stack = Vec::new(); 14 | let mut done = HashSet::new(); 15 | for node in graph.nodes() { 16 | visited.drain(); 17 | if !done.contains(&node) { 18 | dfs_cycle(graph, node, &mut visited, &mut stack, &mut cycles); 19 | } 20 | done.insert(node); 21 | } 22 | 23 | cycles 24 | } 25 | 26 | fn dfs_cycle( 27 | graph: &GraphMap, 28 | node: N, 29 | visited: &mut HashSet, 30 | stack: &mut Vec, 31 | cycles: &mut HashSet>, 32 | ) { 33 | visited.insert(node); 34 | stack.push(node); 35 | for neighbor in graph.neighbors(node) { 36 | if !visited.contains(&neighbor) { 37 | dfs_cycle(graph, neighbor, visited, stack, cycles); 38 | } else if stack.contains(&neighbor) { 39 | let cycle_start = stack.iter().position(|&x| x == neighbor).unwrap(); 40 | let cycle = stack[cycle_start..].to_vec(); 41 | cycles.insert(cycle); 42 | } 43 | } 44 | 45 | stack.pop(); 46 | } 47 | -------------------------------------------------------------------------------- /autarkie/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(warnings)] 2 | #![feature(core_intrinsics)] 3 | 4 | #[cfg(feature = "autarkie_derive")] 5 | pub use autarkie_derive::Grammar; 6 | 7 | pub use blake3::hash; 8 | pub use libafl::corpus::CorpusId; 9 | pub use libafl::executors::ExitKind as LibAFLExitKind; 10 | pub use libafl::inputs::Input; 11 | pub use libafl::inputs::TargetBytesConverter; 12 | pub use libafl_bolts::ownedref::OwnedSlice; 13 | pub use libafl_bolts::Error as LibAFLError; 14 | 15 | pub mod tree; 16 | pub mod visitor; 17 | pub use tree::*; 18 | pub use visitor::*; 19 | 20 | mod graph; 21 | 22 | #[cfg(feature = "bincode")] 23 | pub mod serde; 24 | #[cfg(feature = "bincode")] 25 | pub use serde::*; 26 | 27 | #[cfg(feature = "scale")] 28 | pub mod scale; 29 | #[cfg(feature = "scale")] 30 | pub use scale::*; 31 | 32 | pub mod fuzzer; 33 | pub use fuzzer::afl; 34 | pub use fuzzer::libfuzzer; 35 | -------------------------------------------------------------------------------- /autarkie/src/scale.rs: -------------------------------------------------------------------------------- 1 | /// Primitives for parity-scale-codec 2 | use parity_scale_codec::{Compact, Decode, Encode}; 3 | use serde::{Deserialize, Serialize}; 4 | 5 | use crate::Node; 6 | 7 | #[derive(Debug, Clone, Encode, Decode, Serialize, Deserialize)] 8 | pub struct Compactu8(#[codec(compact)] pub u8); 9 | 10 | #[derive(Debug, Clone, Encode, Decode, Serialize, Deserialize)] 11 | pub struct Compactu16(#[codec(compact)] pub u16); 12 | 13 | #[derive(Debug, Clone, Encode, Decode, Serialize, Deserialize)] 14 | pub struct Compactu32(#[codec(compact)] pub u32); 15 | 16 | #[derive(Debug, Clone, Encode, Decode, Serialize, Deserialize)] 17 | pub struct Compactu64(#[codec(compact)] pub u64); 18 | 19 | #[derive(Debug, Clone, Encode, Decode, Serialize, Deserialize)] 20 | pub struct Compactu128(#[codec(compact)] pub u128); 21 | 22 | macro_rules! impl_generate_compact { 23 | ($type: ty, $inner: ty, $num_bytes: literal) => { 24 | impl Node for $type { 25 | fn __autarkie_generate( 26 | v: &mut crate::Visitor, 27 | depth: &mut usize, 28 | cur_depth: &mut usize, 29 | ) -> Option { 30 | let inner = 31 | crate::deserialize::<$inner>(&mut v.generate_bytes($num_bytes).as_slice()); 32 | Some(Self(inner)) 33 | } 34 | fn __autarkie_cmps(&self, v: &mut crate::Visitor, index: usize, val: (u64, u64)) { 35 | if val.0 == self.0 as u64 { 36 | v.register_cmp(crate::serialize(&(val.1 as $inner))); 37 | }; 38 | } 39 | } 40 | }; 41 | // we don't do cmps for u8 42 | (u8, $num_bytes: literal) => { 43 | impl Node for $type { 44 | fn __autarkie_generate(v: &mut Visitor) -> Option { 45 | let inner = 46 | crate::deserialize::<$inner>(&mut v.generate_bytes($num_bytes).as_slice()); 47 | Some(Self(inner)) 48 | } 49 | } 50 | }; 51 | } 52 | 53 | impl_generate_compact!(Compactu8, u8, 1); 54 | impl_generate_compact!(Compactu16, u16, 2); 55 | impl_generate_compact!(Compactu32, u32, 4); 56 | impl_generate_compact!(Compactu64, u64, 8); 57 | impl_generate_compact!(Compactu128, u128, 32); 58 | -------------------------------------------------------------------------------- /autarkie/src/serde.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::VecDeque, fmt::Debug}; 2 | 3 | use crate::{deserialize, serialize, MutationType, Node, Visitor}; 4 | 5 | macro_rules! impl_node_serde_array { 6 | ($n: literal) => { 7 | impl Node for [T; $n] 8 | where 9 | T: Node, 10 | { 11 | fn __autarkie_generate( 12 | visitor: &mut Visitor, 13 | depth: &mut usize, 14 | cur_depth: &mut usize, 15 | ) -> Option { 16 | Some( 17 | (0..$n) 18 | .map(|_| T::__autarkie_generate(visitor, depth, cur_depth)) 19 | .filter_map(|i| i) 20 | .collect::>() 21 | .try_into() 22 | .ok()?, 23 | ) 24 | } 25 | 26 | fn __autarkie_serialized(&self, visitor: &mut Visitor) { 27 | for item in self { 28 | visitor.add_serialized(serialize(&item), T::__autarkie_id()); 29 | item.__autarkie_serialized(visitor); 30 | } 31 | } 32 | 33 | fn __autarkie_node_ty(&self, visitor: &Visitor) -> crate::NodeType { 34 | crate::NodeType::Iterable(true, $n, T::__autarkie_id()) 35 | } 36 | 37 | fn __autarkie_register(v: &mut Visitor, parent: Option, variant: usize) { 38 | if !v.is_recursive(T::__autarkie_id()) { 39 | T::__autarkie_register(v, parent, variant); 40 | } else { 41 | v.register_ty(parent, T::__autarkie_id(), variant); 42 | v.pop_ty(); 43 | } 44 | } 45 | 46 | fn __autarkie_mutate( 47 | &mut self, 48 | ty: &mut MutationType, 49 | visitor: &mut Visitor, 50 | mut path: VecDeque, 51 | ) { 52 | if let Some(popped) = path.pop_front() { 53 | self.get_mut(popped) 54 | .expect("mdNWnhI6____") 55 | .__autarkie_mutate(ty, visitor, path); 56 | } else { 57 | match ty { 58 | MutationType::Splice(other) => { 59 | *self = deserialize(other); 60 | } 61 | MutationType::GenerateReplace(ref mut bias) => { 62 | if let Some(generated) = 63 | Self::__autarkie_generate(visitor, bias, &mut 0) 64 | { 65 | *self = generated; 66 | self.__autarkie_serialized(visitor); 67 | } 68 | } 69 | _ => unreachable!("tAL6LPUb____"), 70 | } 71 | } 72 | } 73 | fn __autarkie_fields(&self, visitor: &mut Visitor, index: usize) { 74 | for (index, child) in self.iter().enumerate() { 75 | visitor.register_field_stack(( 76 | ((index, child.__autarkie_node_ty(visitor))), 77 | T::__autarkie_id(), 78 | )); 79 | child.__autarkie_fields(visitor, 0); 80 | visitor.pop_field(); 81 | } 82 | } 83 | 84 | fn __autarkie_cmps(&self, visitor: &mut Visitor, index: usize, val: (u64, u64)) { 85 | for (index, child) in self.iter().enumerate() { 86 | visitor.register_field_stack(( 87 | ((index, child.__autarkie_node_ty(visitor))), 88 | T::__autarkie_id(), 89 | )); 90 | child.__autarkie_cmps(visitor, index, val); 91 | visitor.pop_field(); 92 | } 93 | } 94 | } 95 | }; 96 | } 97 | 98 | impl_node_serde_array!(1usize); 99 | impl_node_serde_array!(2usize); 100 | impl_node_serde_array!(3usize); 101 | impl_node_serde_array!(4usize); 102 | impl_node_serde_array!(5usize); 103 | impl_node_serde_array!(6usize); 104 | impl_node_serde_array!(7usize); 105 | impl_node_serde_array!(8usize); 106 | impl_node_serde_array!(9usize); 107 | impl_node_serde_array!(10usize); 108 | impl_node_serde_array!(11usize); 109 | impl_node_serde_array!(12usize); 110 | impl_node_serde_array!(13usize); 111 | impl_node_serde_array!(14usize); 112 | impl_node_serde_array!(15usize); 113 | impl_node_serde_array!(16usize); 114 | impl_node_serde_array!(17usize); 115 | impl_node_serde_array!(18usize); 116 | impl_node_serde_array!(19usize); 117 | impl_node_serde_array!(20usize); 118 | impl_node_serde_array!(21usize); 119 | impl_node_serde_array!(22usize); 120 | impl_node_serde_array!(23usize); 121 | impl_node_serde_array!(24usize); 122 | impl_node_serde_array!(25usize); 123 | impl_node_serde_array!(26usize); 124 | impl_node_serde_array!(27usize); 125 | impl_node_serde_array!(28usize); 126 | impl_node_serde_array!(29usize); 127 | impl_node_serde_array!(30usize); 128 | impl_node_serde_array!(31usize); 129 | impl_node_serde_array!(32usize); 130 | -------------------------------------------------------------------------------- /autarkie/src/visitor.rs: -------------------------------------------------------------------------------- 1 | use crate::Id; 2 | use libafl_bolts::rands::{Rand, StdRand}; 3 | use num_traits::CheckedSub; 4 | use petgraph::{ 5 | data::Build, 6 | dot::{Config, Dot}, 7 | graph::DiGraph, 8 | graphmap::DiGraphMap, 9 | Directed, 10 | }; 11 | use std::collections::{BTreeMap, BTreeSet, HashMap}; 12 | 13 | /// The `Visitor` struct is the primary way to communicate with the Fuzz-ed type during runtime. 14 | /// Unforuntately procedural macros are rather limiting, so we must delegate effort to the runtime. 15 | /// For example, it is impossible to statically know what fields a struct may have due to Enums. 16 | /// Yes, I know this is not a Visitor in the traditional GoF sense, but so what. 17 | #[derive(Debug, Clone)] 18 | pub struct Visitor { 19 | /// The maximum depth used to constrain generation and mutation of inputs 20 | depth: DepthInfo, 21 | /// Pool of strings the fuzzer uses. 22 | strings: StringPool, 23 | /// The list of fields inside a Fuzz-ed type's Instance 24 | fields: Vec>, 25 | /// The stack of fields inside a Fuzz-ed type's Instance. 26 | field_stack: Vec, 27 | /// For cmplog, we map fields which match the bytes provided 28 | matching_cmps: Vec<(Vec, Vec)>, 29 | /// A map of types which are mapped to their variants and their fields. 30 | /// Examples: 31 | /// a struct will be { Struct: {0: { usize, u32 } } } 32 | /// an enum will be { Enum: {variant_0: { usize, u32 }, variant_1: {u8}} } 33 | ty_map: BTreeMap>>, 34 | /// Types we have already analyzed. to prevent infinite recursion 35 | ty_done: BTreeSet, 36 | /// A stack of types we are analyzing, to prevent infinite recursion 37 | ty_map_stack: Vec, 38 | /// Fields which are serialized by the Fuzz-ed type's instance. Used to save to corpora for splicing 39 | serialized: Vec<(Vec, Id)>, 40 | ty_generate_map: BTreeMap>>, 41 | /// State of randomnes 42 | rng: StdRand, 43 | } 44 | 45 | impl Visitor { 46 | pub fn get_string(&mut self) -> String { 47 | self.strings.get_string(&mut self.rng) 48 | } 49 | pub fn register_string(&mut self, string: String) { 50 | self.strings.register_string(string) 51 | } 52 | 53 | pub fn generate_bytes(&mut self, amount: usize) -> Vec { 54 | // TODO: possible to make more efficient? 55 | (0..amount) 56 | .map(|_| self.rng.next() as u8) 57 | .collect::>() 58 | } 59 | 60 | pub fn coinflip(&mut self) -> bool { 61 | self.rng.coinflip(0.5) 62 | } 63 | 64 | pub fn coinflip_with_prob(&mut self, prob: f64) -> bool { 65 | self.rng.coinflip(prob) 66 | } 67 | 68 | pub fn random_range(&mut self, min: usize, max: usize) -> usize { 69 | self.rng.between(min, max) 70 | } 71 | 72 | pub fn register_field(&mut self, item: FieldLocation) { 73 | self.field_stack.push(item); 74 | self.fields.push(self.field_stack.clone()); 75 | } 76 | 77 | pub fn register_cmp(&mut self, data: Vec) { 78 | self.matching_cmps.push((self.field_stack.clone(), data)); 79 | } 80 | 81 | pub fn register_field_stack(&mut self, item: FieldLocation) { 82 | self.field_stack.push(item); 83 | } 84 | 85 | pub fn pop_field(&mut self) { 86 | self.field_stack.pop(); 87 | } 88 | 89 | pub fn cmps(&mut self) -> Vec<(Vec, Vec)> { 90 | let cmps = std::mem::take(&mut self.matching_cmps); 91 | self.fields.clear(); 92 | self.field_stack.clear(); 93 | cmps 94 | } 95 | 96 | pub fn fields(&mut self) -> Vec> { 97 | let fields = std::mem::take(&mut self.fields); 98 | self.field_stack.clear(); 99 | fields 100 | } 101 | 102 | pub fn add_serialized(&mut self, serialized_data: Vec, id: Id) { 103 | self.serialized.push((serialized_data, id)) 104 | } 105 | 106 | pub fn serialized(&mut self) -> Vec<(Vec, Id)> { 107 | let serialized = std::mem::take(&mut self.serialized); 108 | serialized 109 | } 110 | 111 | pub fn generate_depth(&self) -> usize { 112 | self.depth.generate 113 | } 114 | 115 | pub fn iterate_depth(&self) -> usize { 116 | self.depth.iterate 117 | } 118 | 119 | /// This function adds a type to the type map 120 | pub fn register_ty(&mut self, parent: Option, id: Id, variant: usize) { 121 | self.ty_map_stack.push(id.clone()); 122 | #[cfg(debug_assertions)] 123 | let parent = parent.unwrap_or("AutarkieInternalFuzzData".to_string()); 124 | #[cfg(not(debug_assertions))] 125 | // Let's hope we get no collisions! 126 | let parent = parent.unwrap_or(u128::MIN); 127 | if !self.ty_map.get(&parent).is_some() { 128 | self.ty_map.insert( 129 | parent.clone(), 130 | BTreeMap::from_iter([(variant, BTreeSet::new())]), 131 | ); 132 | } 133 | self.ty_map 134 | .get_mut(&parent) 135 | .expect("____rwBG5LkVKH") 136 | .entry(variant) 137 | .and_modify(|i| { 138 | i.insert(id.clone()); 139 | }) 140 | .or_insert(BTreeSet::from_iter([id.clone()])); 141 | } 142 | 143 | pub fn pop_ty(&mut self) { 144 | let popped = self.ty_map_stack.pop().expect("____mZiIy3hlu8"); 145 | self.ty_done.insert(popped); 146 | } 147 | 148 | pub fn is_recursive(&mut self, id: Id) -> bool { 149 | self.ty_map_stack.contains(&id) || self.ty_done.contains(&id) 150 | } 151 | 152 | // TODO: optimize 153 | // TODO: refactor ffs 154 | // TODO: document algorithm 155 | /// Automatically determine recursive types 156 | pub fn calculate_recursion(&mut self) -> BTreeMap> { 157 | let mut recursive_nodes = BTreeMap::new(); 158 | let mut g = DiGraphMap::<_, usize>::new(); 159 | for (ty, variants) in self.ty_map.iter() { 160 | for (variant_id, variant_tys) in variants { 161 | g.add_edge((ty, -1), (ty, *variant_id as isize), 1); 162 | for variant_ty in variant_tys { 163 | g.add_edge((ty, *variant_id as isize), (variant_ty, -1), 1); 164 | } 165 | } 166 | } 167 | let cycles = crate::graph::find_cycles(&g); 168 | for cycle in cycles { 169 | let (root_ty, root_variant) = cycle.first().unwrap(); 170 | let root = self.ty_map.get(cycle.first().unwrap().0).unwrap(); 171 | let (last_ty, last_variant) = cycle.last().unwrap(); 172 | let last = self.ty_map.get(cycle.last().unwrap().0).unwrap(); 173 | if *root_ty == *last_ty { 174 | // a type may be recursive to it's own so we ignore 175 | if last_variant.gt(&-1) { 176 | recursive_nodes 177 | .entry(root_ty.clone().clone()) 178 | .and_modify(|inner: &mut BTreeSet| { 179 | inner.insert(last_variant.clone().try_into().unwrap_or(0)); 180 | }) 181 | .or_insert(BTreeSet::from_iter([last_variant 182 | .clone() 183 | .try_into() 184 | .unwrap_or(0)])); 185 | } 186 | } else { 187 | let root_index = 1; 188 | let last_index = cycle.len().checked_sub(1).unwrap(); 189 | let (root_ty, root_variant) = cycle.get(root_index).unwrap(); 190 | let (last_ty, last_variant) = cycle.get(last_index).unwrap(); 191 | let root_variant_count = self.ty_map.get(root_ty.clone()).unwrap().len(); 192 | let last_variant_count = self.ty_map.get(last_ty.clone()).unwrap().len(); 193 | if root_variant_count > last_variant_count { 194 | recursive_nodes 195 | .entry(root_ty.clone().clone()) 196 | .and_modify(|inner: &mut BTreeSet| { 197 | inner.insert(root_variant.clone().try_into().unwrap_or(0)); 198 | }) 199 | .or_insert(BTreeSet::from_iter([root_variant 200 | .clone() 201 | .try_into() 202 | .unwrap_or(0)])); 203 | } else if last_variant_count > root_variant_count { 204 | recursive_nodes 205 | .entry(last_ty.clone().clone()) 206 | .and_modify(|inner: &mut BTreeSet| { 207 | inner.insert(last_variant.clone().try_into().unwrap_or(0)); 208 | }) 209 | .or_insert(BTreeSet::from_iter([last_variant 210 | .clone() 211 | .try_into() 212 | .unwrap_or(0)])); 213 | } 214 | } 215 | } 216 | for (ty, map) in &self.ty_map { 217 | let r_variants = recursive_nodes 218 | .get(ty) 219 | .unwrap_or(&BTreeSet::default()) 220 | .clone(); 221 | self.ty_generate_map.insert( 222 | ty.clone(), 223 | BTreeMap::from_iter([(GenerateType::Recursive, r_variants.clone())]), 224 | ); 225 | let mut nr_variants = map.keys().cloned().collect::>(); 226 | if r_variants.len() > 0 { 227 | nr_variants = nr_variants 228 | .into_iter() 229 | .filter(|item| !r_variants.contains(item)) 230 | .collect::>(); 231 | } 232 | self.ty_generate_map 233 | .entry(ty.clone()) 234 | .and_modify(|inner| { 235 | inner.insert(GenerateType::NonRecursive, nr_variants.clone()); 236 | }) 237 | .or_insert(BTreeMap::from_iter([( 238 | GenerateType::NonRecursive, 239 | nr_variants, 240 | )])); 241 | } 242 | return recursive_nodes; 243 | } 244 | 245 | #[inline] 246 | pub fn is_recursive_variant(&self, id: Id, variant: usize) -> bool { 247 | self.ty_generate_map 248 | .get(&id) 249 | .expect("____H2PJrlvAdz") 250 | .get(&GenerateType::Recursive) 251 | .expect("oBdODZ8L____") 252 | .contains(&variant) 253 | } 254 | 255 | #[inline] 256 | // TODO: refactor 257 | /// This function is used by enums to determine which variant to generate. 258 | /// Since some variant are recursive, we check whether our depth is under the recursive depth 259 | /// limit. 260 | /// If so, we MAY pick a recursive variant 261 | /// If not, we MAY NOT pick a recursive variant 262 | /// If we do not have any non-recursive variants we return None and the Input 263 | /// generation/mutation fails. 264 | pub fn generate(&mut self, id: &Id, depth: &usize) -> Option<(usize, bool)> { 265 | let consider_recursive = *depth < self.depth.generate; 266 | let (variant, is_recursive) = if consider_recursive { 267 | let variants = self.ty_generate_map.get(id).expect("____VbO3rGYTSf"); 268 | let nr_variants = variants 269 | .get(&GenerateType::NonRecursive) 270 | .expect("____lCAftArdHS"); 271 | let r_variants = variants 272 | .get(&GenerateType::Recursive) 273 | .expect("____q154Wl5zf2"); 274 | let nr_variants_len = nr_variants.len().saturating_sub(1); 275 | let r_variants_len = r_variants.len().saturating_sub(1); 276 | let id = self.rng.between(0, nr_variants_len + r_variants_len); 277 | if id <= nr_variants_len { 278 | if let Some(nr_variant) = nr_variants.iter().nth(id) { 279 | (nr_variant.clone(), false) 280 | } else { 281 | ( 282 | r_variants.iter().nth(id).expect("nd5oh1G2____").clone(), 283 | true, 284 | ) 285 | } 286 | } else { 287 | ( 288 | r_variants 289 | .iter() 290 | .nth(id.checked_sub(nr_variants_len).expect("____ibvCjQB5oX")) 291 | .expect("____LaawYczeqc") 292 | .clone(), 293 | true, 294 | ) 295 | } 296 | } else { 297 | let variants = self 298 | .ty_generate_map 299 | .get(id) 300 | .expect("____clESlzqUbX") 301 | .get(&GenerateType::NonRecursive) 302 | .expect("____ffxyyA6Nub"); 303 | if variants.len() == 0 { 304 | return None; 305 | } 306 | let variants_len = variants.len().saturating_sub(1); 307 | let nth = self.rng.between(0, variants_len); 308 | ( 309 | variants.iter().nth(nth).expect("____pvPK973BLH").clone(), 310 | false, 311 | ) 312 | }; 313 | Some((variant, is_recursive)) 314 | } 315 | 316 | pub fn new(seed: u64, depth: DepthInfo) -> Self { 317 | let mut visitor = Self { 318 | ty_generate_map: BTreeMap::default(), 319 | ty_done: BTreeSet::default(), 320 | ty_map_stack: vec![], 321 | depth, 322 | fields: vec![], 323 | field_stack: vec![], 324 | matching_cmps: vec![], 325 | serialized: vec![], 326 | strings: StringPool::new(), 327 | ty_map: BTreeMap::new(), 328 | rng: StdRand::with_seed(seed), 329 | }; 330 | visitor.strings.add_strings(&mut visitor.rng, 100, 10); 331 | return visitor; 332 | } 333 | } 334 | 335 | #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] 336 | pub enum NodeType { 337 | /// A normal node 338 | NonRecursive, 339 | /// A node with a field which of type Self. eg: Box 340 | Recursive, 341 | /// An iterable node. eg Vec 342 | Iterable( 343 | /// if fixed element count (eg: [u8; 32]) 344 | bool, 345 | /// Amount of Elements 346 | usize, 347 | /// type id of Elements 348 | Id, 349 | ), 350 | } 351 | 352 | impl NodeType { 353 | pub fn is_recursive(&self) -> bool { 354 | matches!(self, NodeType::Recursive) 355 | } 356 | 357 | pub fn is_iterable(&self) -> bool { 358 | matches!(self, NodeType::Iterable(_, _, _)) 359 | } 360 | } 361 | 362 | #[derive(Debug, Clone)] 363 | /// The DepthInfo struct throttles the generation and mutation of inputs. 364 | /// We need to set a recursive depth on Inputs so self referencing types do not result in a stack overflow 365 | /// We need to set a limit on the amount of elements in an iterable for performance reasons. 366 | pub struct DepthInfo { 367 | /// For recursive generation (eg. if an enum is recursive (eg: Box)) 368 | pub generate: usize, 369 | /// For iterative generation (Vec/HashMap) 370 | pub iterate: usize, 371 | } 372 | 373 | #[derive(Ord, PartialEq, Eq, PartialOrd, Debug, Clone)] 374 | enum GenerateType { 375 | Recursive, 376 | NonRecursive, 377 | } 378 | 379 | pub type FieldLocation = ((usize, NodeType), Id); 380 | 381 | /// Pool of Strings used by the fuzzer 382 | #[derive(Debug, Clone)] 383 | pub struct StringPool { 384 | strings: Vec, 385 | } 386 | 387 | impl StringPool { 388 | /// Fetch a random string from the string pool 389 | pub fn get_string(&mut self, r: &mut StdRand) -> String { 390 | let string_count = self.strings.len() - 1; 391 | let index = r.between(0, string_count); 392 | self.strings.get(index).expect("5hxil4dq____").clone() 393 | } 394 | 395 | pub fn new() -> Self { 396 | Self { strings: vec![] } 397 | } 398 | 399 | /// Add a string manually 400 | pub fn register_string(&mut self, string: String) { 401 | if !self.strings.contains(&string) { 402 | self.strings.push(string); 403 | } 404 | } 405 | 406 | /// Add `num` amount of unique strings of `max_len` 407 | pub fn add_strings(&mut self, r: &mut StdRand, num: usize, max_len: usize) { 408 | while self.strings.len() < num { 409 | let element_count = r.between(1, max_len); 410 | let printables = 411 | "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".as_bytes(); 412 | let res = (0..element_count) 413 | .map(|_| printables[r.between(0, printables.len() - 1)]) 414 | .collect::>(); 415 | let string = String::from_utf8(res).unwrap(); 416 | if !self.strings.contains(&string) { 417 | self.strings.push(string); 418 | } 419 | } 420 | } 421 | } 422 | -------------------------------------------------------------------------------- /autarkie_derive/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "autarkie_derive" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [lib] 7 | proc-macro = true 8 | 9 | [dependencies] 10 | blake3 = "1.5.4" 11 | proc-macro2 = "1.0" 12 | quote = "1.0" 13 | syn = { version = "2", features = ['derive', 'full', 'visit', 'parsing', 'extra-traits'] } 14 | 15 | # different encodings 16 | bincode = {version = "1.3.3"} 17 | serde = { version = "1.0.215", features = ["derive"], optional = true } 18 | proc-macro-crate = "3.1.0" 19 | regex = "1.11.1" 20 | -------------------------------------------------------------------------------- /autarkie_derive/src/trait_bounds.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Parity Technologies 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::iter; 16 | 17 | use proc_macro2::Ident; 18 | use syn::{ 19 | parse_quote, 20 | spanned::Spanned, 21 | visit::{self, Visit}, 22 | Generics, Result, Type, TypePath, 23 | }; 24 | 25 | use crate::utils::{self}; 26 | 27 | /// Visits the ast and checks if one of the given idents is found. 28 | struct ContainIdents<'a> { 29 | result: bool, 30 | idents: &'a [Ident], 31 | } 32 | 33 | impl<'ast> Visit<'ast> for ContainIdents<'_> { 34 | fn visit_ident(&mut self, i: &'ast Ident) { 35 | if self.idents.iter().any(|id| id == i) { 36 | self.result = true; 37 | } 38 | } 39 | } 40 | 41 | /// Checks if the given type contains one of the given idents. 42 | fn type_contain_idents(ty: &Type, idents: &[Ident]) -> bool { 43 | let mut visitor = ContainIdents { 44 | result: false, 45 | idents, 46 | }; 47 | visitor.visit_type(ty); 48 | visitor.result 49 | } 50 | 51 | /// Visits the ast and checks if the a type path starts with the given ident. 52 | struct TypePathStartsWithIdent<'a> { 53 | result: bool, 54 | ident: &'a Ident, 55 | } 56 | 57 | impl<'ast> Visit<'ast> for TypePathStartsWithIdent<'_> { 58 | fn visit_type_path(&mut self, i: &'ast TypePath) { 59 | if let Some(segment) = i.path.segments.first() { 60 | if &segment.ident == self.ident { 61 | self.result = true; 62 | return; 63 | } 64 | } 65 | 66 | visit::visit_type_path(self, i); 67 | } 68 | } 69 | 70 | /// Checks if the given type path or any containing type path starts with the given ident. 71 | fn type_path_or_sub_starts_with_ident(ty: &TypePath, ident: &Ident) -> bool { 72 | let mut visitor = TypePathStartsWithIdent { 73 | result: false, 74 | ident, 75 | }; 76 | visitor.visit_type_path(ty); 77 | visitor.result 78 | } 79 | 80 | /// Checks if the given type or any containing type path starts with the given ident. 81 | fn type_or_sub_type_path_starts_with_ident(ty: &Type, ident: &Ident) -> bool { 82 | let mut visitor = TypePathStartsWithIdent { 83 | result: false, 84 | ident, 85 | }; 86 | visitor.visit_type(ty); 87 | visitor.result 88 | } 89 | 90 | /// Visits the ast and collects all type paths that do not start or contain the given ident. 91 | /// 92 | /// Returns `T`, `N`, `A` for `Vec<(Recursive, A)>` with `Recursive` as ident. 93 | struct FindTypePathsNotStartOrContainIdent<'a> { 94 | result: Vec, 95 | ident: &'a Ident, 96 | } 97 | 98 | impl<'ast> Visit<'ast> for FindTypePathsNotStartOrContainIdent<'_> { 99 | fn visit_type_path(&mut self, i: &'ast TypePath) { 100 | if type_path_or_sub_starts_with_ident(i, self.ident) { 101 | visit::visit_type_path(self, i); 102 | } else { 103 | self.result.push(i.clone()); 104 | } 105 | } 106 | } 107 | 108 | /// Collects all type paths that do not start or contain the given ident in the given type. 109 | /// 110 | /// Returns `T`, `N`, `A` for `Vec<(Recursive, A)>` with `Recursive` as ident. 111 | fn find_type_paths_not_start_or_contain_ident(ty: &Type, ident: &Ident) -> Vec { 112 | let mut visitor = FindTypePathsNotStartOrContainIdent { 113 | result: Vec::new(), 114 | ident, 115 | }; 116 | visitor.visit_type(ty); 117 | visitor.result 118 | } 119 | 120 | pub fn add(input_ident: &Ident, generics: &mut Generics, data: &syn::Data) { 121 | let ty_params = generics 122 | .type_params() 123 | .map(|tp| tp.ident.clone()) 124 | .collect::>(); 125 | let types_with_bounds = 126 | get_types_to_add_trait_bound(input_ident, data, &ty_params, false).unwrap(); 127 | let where_clause = generics.make_where_clause(); 128 | types_with_bounds.into_iter().for_each(|ty| { 129 | where_clause 130 | .predicates 131 | .push(parse_quote!(#ty : ::autarkie::Node)) 132 | }); 133 | } 134 | /// Returns all types that must be added to the where clause with the respective trait bound. 135 | pub fn get_types_to_add_trait_bound( 136 | input_ident: &Ident, 137 | data: &syn::Data, 138 | ty_params: &[Ident], 139 | dumb_trait_bound: bool, 140 | ) -> Result> { 141 | if dumb_trait_bound { 142 | Ok(ty_params.iter().map(|t| parse_quote!( #t )).collect()) 143 | } else { 144 | let needs_codec_bound = |f: &syn::Field| { 145 | !utils::is_compact(f) 146 | && utils::get_encoded_as_type(f).is_none() 147 | && !utils::should_skip(&f.attrs) 148 | }; 149 | let res = collect_types(data, needs_codec_bound)? 150 | .into_iter() 151 | // Only add a bound if the type uses a generic 152 | .filter(|ty| type_contain_idents(ty, ty_params)) 153 | // If a struct contains itself as field type, we can not add this type into the where 154 | // clause. This is required to work a round the following compiler bug: https://github.com/rust-lang/rust/issues/47032 155 | .flat_map(|ty| { 156 | find_type_paths_not_start_or_contain_ident(&ty, input_ident) 157 | .into_iter() 158 | .map(Type::Path) 159 | // Remove again types that do not contain any of our generic parameters 160 | .filter(|ty| type_contain_idents(ty, ty_params)) 161 | // Add back the original type, as we don't want to loose it. 162 | .chain(iter::once(ty)) 163 | }) 164 | // Remove all remaining types that start/contain the input ident to not have them in the 165 | // where clause. 166 | .filter(|ty| !type_or_sub_type_path_starts_with_ident(ty, input_ident)) 167 | .collect(); 168 | 169 | Ok(res) 170 | } 171 | } 172 | 173 | fn collect_types(data: &syn::Data, type_filter: fn(&syn::Field) -> bool) -> Result> { 174 | use syn::*; 175 | 176 | let types = match *data { 177 | Data::Struct(ref data) => match &data.fields { 178 | Fields::Named(FieldsNamed { named: fields, .. }) 179 | | Fields::Unnamed(FieldsUnnamed { 180 | unnamed: fields, .. 181 | }) => fields 182 | .iter() 183 | .filter(|f| type_filter(f)) 184 | .map(|f| f.ty.clone()) 185 | .collect(), 186 | 187 | Fields::Unit => Vec::new(), 188 | }, 189 | 190 | Data::Enum(ref data) => data 191 | .variants 192 | .iter() 193 | .filter(|variant| !utils::should_skip(&variant.attrs)) 194 | .flat_map(|variant| match &variant.fields { 195 | Fields::Named(FieldsNamed { named: fields, .. }) 196 | | Fields::Unnamed(FieldsUnnamed { 197 | unnamed: fields, .. 198 | }) => fields 199 | .iter() 200 | .filter(|f| type_filter(f)) 201 | .map(|f| f.ty.clone()) 202 | .collect(), 203 | 204 | Fields::Unit => Vec::new(), 205 | }) 206 | .collect(), 207 | 208 | Data::Union(ref data) => { 209 | return Err(Error::new( 210 | data.union_token.span(), 211 | "Union types are not supported.", 212 | )) 213 | } 214 | }; 215 | 216 | Ok(types) 217 | } 218 | -------------------------------------------------------------------------------- /autarkie_derive/src/utils.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2018-2020 Parity Technologies 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //! Various internal utils. 16 | //! 17 | //! NOTE: attributes finder must be checked using check_attribute first, 18 | //! otherwise the macro can panic. 19 | 20 | use std::str::FromStr; 21 | 22 | use proc_macro2::TokenStream; 23 | use quote::quote; 24 | use syn::{ 25 | parse::Parse, parse_quote, punctuated::Punctuated, spanned::Spanned, token, Attribute, Expr, 26 | ExprLit, Field, Lit, Meta, Path, Token, 27 | }; 28 | 29 | fn find_meta_item<'a, F, R, I, M>(mut itr: I, mut pred: F) -> Option 30 | where 31 | F: FnMut(M) -> Option + Clone, 32 | I: Iterator, 33 | M: Parse, 34 | { 35 | itr.find_map(|attr| { 36 | attr.path() 37 | .is_ident("codec") 38 | .then(|| pred(attr.parse_args().ok()?)) 39 | .flatten() 40 | }) 41 | } 42 | 43 | /// Look for a `#[codec(encoded_as = "SomeType")]` outer attribute on the given 44 | /// `Field`. 45 | pub fn get_encoded_as_type(field: &Field) -> Option { 46 | find_meta_item(field.attrs.iter(), |meta| { 47 | if let Meta::NameValue(ref nv) = meta { 48 | if nv.path.is_ident("encoded_as") { 49 | if let Expr::Lit(ExprLit { 50 | lit: Lit::Str(ref s), 51 | .. 52 | }) = nv.value 53 | { 54 | return Some( 55 | TokenStream::from_str(&s.value()) 56 | .expect("Internal error, encoded_as attribute must have been checked"), 57 | ); 58 | } 59 | } 60 | } 61 | 62 | None 63 | }) 64 | } 65 | 66 | /// Look for a `#[codec(compact)]` outer attribute on the given `Field`. If the attribute is found, 67 | /// return the compact type associated with the field type. 68 | pub fn get_compact_type(field: &Field, crate_path: &syn::Path) -> Option { 69 | find_meta_item(field.attrs.iter(), |meta| { 70 | if let Meta::Path(ref path) = meta { 71 | if path.is_ident("compact") { 72 | let field_type = &field.ty; 73 | return Some(quote! {<#field_type as #crate_path::HasCompact>::Type}); 74 | } 75 | } 76 | 77 | None 78 | }) 79 | } 80 | 81 | /// Look for a `#[codec(compact)]` outer attribute on the given `Field`. 82 | pub fn is_compact(field: &Field) -> bool { 83 | get_compact_type(field, &parse_quote!(::crate)).is_some() 84 | } 85 | 86 | /// Look for a `#[codec(skip)]` in the given attributes. 87 | pub fn should_skip(attrs: &[Attribute]) -> bool { 88 | find_meta_item(attrs.iter(), |meta| { 89 | if let Meta::Path(ref path) = meta { 90 | if path.is_ident("skip") { 91 | return Some(path.span()); 92 | } 93 | } 94 | 95 | None 96 | }) 97 | .is_some() 98 | } 99 | 100 | /// This struct matches `crate = ...` where the ellipsis is a `Path`. 101 | struct CratePath { 102 | _crate_token: Token![crate], 103 | _eq_token: Token![=], 104 | path: Path, 105 | } 106 | 107 | impl Parse for CratePath { 108 | fn parse(input: syn::parse::ParseStream) -> syn::Result { 109 | Ok(CratePath { 110 | _crate_token: input.parse()?, 111 | _eq_token: input.parse()?, 112 | path: input.parse()?, 113 | }) 114 | } 115 | } 116 | 117 | impl From for Path { 118 | fn from(CratePath { path, .. }: CratePath) -> Self { 119 | path 120 | } 121 | } 122 | 123 | /// Parse `name(T: Bound, N: Bound)` or `name(skip_type_params(T, N))` as a custom trait bound. 124 | pub enum CustomTraitBound { 125 | SpecifiedBounds { 126 | _name: N, 127 | _paren_token: token::Paren, 128 | _bounds: Punctuated, 129 | }, 130 | SkipTypeParams { 131 | _name: N, 132 | _paren_token_1: token::Paren, 133 | _skip_type_params: skip_type_params, 134 | _paren_token_2: token::Paren, 135 | _type_names: Punctuated, 136 | }, 137 | } 138 | 139 | impl Parse for CustomTraitBound { 140 | fn parse(input: syn::parse::ParseStream) -> syn::Result { 141 | let mut content; 142 | let _name: N = input.parse()?; 143 | let _paren_token = syn::parenthesized!(content in input); 144 | if content.peek(skip_type_params) { 145 | Ok(Self::SkipTypeParams { 146 | _name, 147 | _paren_token_1: _paren_token, 148 | _skip_type_params: content.parse::()?, 149 | _paren_token_2: syn::parenthesized!(content in content), 150 | _type_names: content.parse_terminated(syn::Ident::parse, Token![,])?, 151 | }) 152 | } else { 153 | Ok(Self::SpecifiedBounds { 154 | _name, 155 | _paren_token, 156 | _bounds: content.parse_terminated(syn::WherePredicate::parse, Token![,])?, 157 | }) 158 | } 159 | } 160 | } 161 | 162 | syn::custom_keyword!(encode_bound); 163 | syn::custom_keyword!(decode_bound); 164 | syn::custom_keyword!(decode_with_mem_tracking_bound); 165 | syn::custom_keyword!(mel_bound); 166 | syn::custom_keyword!(skip_type_params); 167 | -------------------------------------------------------------------------------- /autarkie_test/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "autarkie_test" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | autarkie = {path = "../autarkie", features = ["derive", "bincode"]} 8 | serde = { version = "1.0.217", features = ["derive"] } 9 | -------------------------------------------------------------------------------- /autarkie_test/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[allow(unused_imports)] 2 | use autarkie::{Grammar, Node}; 3 | use serde::{Deserialize, Serialize}; 4 | 5 | #[derive(Clone, Debug, Grammar, Serialize, Deserialize)] 6 | pub enum Expr { 7 | Literal(String), 8 | Number(u128), 9 | // recursive 10 | Add(Box, Box), 11 | // potentially recursive 12 | Vec(Vec), 13 | // potentially recursive 14 | What(Box>), 15 | // TODO 5 recursive 16 | WhatTwo(InnerBoxed), 17 | // recursive 18 | WhatTwoInner(InnerBoxedEnum), 19 | // recursive 20 | SayWhat((usize, Box)), 21 | // TODO: 8 potentially recursive 22 | Res(Result), 23 | // recursive 24 | Stmt(Box), 25 | } 26 | 27 | #[derive(Clone, Debug, Grammar, Serialize, Deserialize)] 28 | pub struct Inner { 29 | what: Expr, 30 | who: u64, 31 | } 32 | 33 | #[derive(Clone, Debug, Grammar, Serialize, Deserialize)] 34 | pub struct InnerBoxed { 35 | what: InnerInnerBoxed, 36 | } 37 | 38 | #[derive(Clone, Debug, Grammar, Serialize, Deserialize)] 39 | pub struct InnerInnerBoxed { 40 | what: Box, 41 | } 42 | 43 | #[derive(Clone, Debug, Grammar, Serialize, Deserialize)] 44 | pub enum InnerBoxedEnum { 45 | Test(Box), 46 | } 47 | 48 | #[derive(Clone, Debug, Grammar, Serialize, Deserialize)] 49 | pub enum Statement { 50 | Exp(Expr), 51 | } 52 | #[cfg(test)] 53 | mod tests { 54 | use std::collections::{BTreeMap, BTreeSet}; 55 | 56 | use autarkie::Visitor; 57 | 58 | use super::*; 59 | #[test] 60 | fn register_ty() { 61 | let mut visitor = Visitor::new( 62 | 0, 63 | autarkie::DepthInfo { 64 | generate: 2, 65 | iterate: 2, 66 | }, 67 | ); 68 | Statement::__autarkie_register(&mut visitor, None, 0); 69 | assert_eq!( 70 | visitor.calculate_recursion(), 71 | BTreeMap::from_iter([ 72 | ( 73 | "autarkie_test::Expr".to_string(), 74 | BTreeSet::from_iter([2, 3, 4, 5, 6, 7, 8, 9]) 75 | ), 76 | ( 77 | "core::option::Option".to_string(), 78 | BTreeSet::from_iter([1]) 79 | ), 80 | ( 81 | "core::result::Result".to_string(), 82 | BTreeSet::from_iter([0]) 83 | ) 84 | ]) 85 | ); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /guides/rbpf.md: -------------------------------------------------------------------------------- 1 | # Fuzzing SBPF 2 | 3 | ## Clone the project 4 | 1. Clone the target 5 | ``` 6 | git clone https://github.com/anza-xyz/sbpf 7 | cd sbpf 8 | ``` 9 | 10 | ## Deriving the grammar 11 | For fuzzing projects with cargo-fuzz, Autarkie expects the grammar to be defined in a crate. The crate MUST expose the struct ``FuzzData``. 12 | This is because it automatically builds an inprocess fuzzer. 13 | 14 | Usually, the grammar is already defined for us. 15 | For example, ``sbpf`` already has an ``Insn`` struct but it includes the ``ptr`` field which we do not need. So we need to re-define it slightly. 16 | 17 | 1. Add ``autarkie`` and ``serde`` as a dependency 18 | For autarkie, we need to pick a serialization primtive. Autarkie supports ``serde``, ``borsh`` and ``scale``. 19 | We will use ``serde`` as our serialization primitive. 20 | ``` 21 | # there are some dumb conflicts with the package libc 22 | rm Cargo.lock 23 | cargo add autarkie --git https://github.com/R9295/autarkie --features bincode --features derive 24 | cargo add serde --features derive 25 | ``` 26 | 27 | 2. Define our Grammar in ``lib.rs`` 28 | 29 | ``sbpf`` already has an ``Insn`` struct but it includes the ``ptr`` field which we do not need. 30 | So we just copy the struct without the ``ptr`` field. 31 | 32 | ``` rust 33 | /// An EBPF instruction 34 | #[derive(serde::Serialize, serde::Deserialize, autarkie::Grammar, Debug, Clone)] 35 | pub struct FuzzInsn { 36 | /// Operation code. 37 | pub opc: u8, 38 | /// Destination register operand. 39 | pub dst: u8, 40 | /// Source register operand. 41 | pub src: u8, 42 | /// Offset operand. 43 | pub off: i16, 44 | /// Immediate value operand. 45 | pub imm: i64, 46 | } 47 | /// Autarkie's FuzzData 48 | #[derive(serde::Serialize, serde::Deserialize, autarkie::Grammar, Debug, Clone)] 49 | pub struct FuzzData { 50 | // list of instructions. 51 | pub insns: Vec, 52 | // Initial data for the interpreter's Stack 53 | pub mem: Vec 54 | } 55 | 56 | /// Implement necessary traits for LibAFL 57 | autarkie::fuzz_libfuzzer!(FuzzData); 58 | ``` 59 | 60 | That's it! That's your grammar! 61 | 62 | ## Creating our harness 63 | Let's create our fuzzing harness. 64 | In the root of the project: 65 | ``` 66 | cargo fuzz add autarkie_harness 67 | vim fuzz/fuzz_targets/autarkie_harness.rs 68 | ``` 69 | Don't worry, the next section will analyze the harness. 70 | ``` rust 71 | #![no_main] 72 | use libfuzzer_sys::fuzz_target; 73 | use solana_sbpf::{FuzzData, FuzzInsn}; 74 | use solana_sbpf::{ 75 | ebpf, 76 | elf::Executable, 77 | insn_builder::IntoBytes, 78 | memory_region::MemoryRegion, 79 | program::{BuiltinFunction, BuiltinProgram, FunctionRegistry, SBPFVersion}, 80 | verifier::{RequisiteVerifier, Verifier}, 81 | }; 82 | use test_utils::{create_vm, TestContextObject}; 83 | 84 | fn to_bytes(insns: &[FuzzInsn]) -> Vec { 85 | let mut data = vec![]; 86 | for insn in insns { 87 | data.extend([ 88 | insn.opc, 89 | insn.src << 4 | insn.dst, 90 | insn.off as u8, 91 | (insn.off >> 8) as u8, 92 | insn.imm as u8, 93 | (insn.imm >> 8) as u8, 94 | (insn.imm >> 16) as u8, 95 | (insn.imm >> 24) as u8, 96 | ]); 97 | } 98 | data 99 | } 100 | 101 | fuzz_target!(|data: &[u8]| { 102 | let Ok(fuzz_data) = bincode::deserialize::(data) else { 103 | return; 104 | }; 105 | let prog = to_bytes(&fuzz_data.insns); 106 | let config = solana_sbpf::vm::Config::default(); 107 | let function_registry = FunctionRegistry::default(); 108 | let syscall_registry = FunctionRegistry::>::default(); 109 | 110 | if RequisiteVerifier::verify( 111 | &prog, 112 | &config, 113 | SBPFVersion::V3, 114 | &function_registry, 115 | &syscall_registry, 116 | ) 117 | .is_err() 118 | { 119 | // verify please 120 | return; 121 | } 122 | 123 | #[allow(unused_mut)] 124 | let mut executable = Executable::::from_text_bytes( 125 | &prog, 126 | std::sync::Arc::new(BuiltinProgram::new_loader(config)), 127 | SBPFVersion::V3, 128 | function_registry, 129 | ) 130 | .unwrap(); 131 | let mut interp_mem = fuzz_data.mem.clone(); 132 | let mut interp_context_object = TestContextObject::new(1 << 16); 133 | let interp_mem_region = MemoryRegion::new_writable(&mut interp_mem, ebpf::MM_INPUT_START); 134 | create_vm!( 135 | interp_vm, 136 | &executable, 137 | &mut interp_context_object, 138 | interp_stack, 139 | interp_heap, 140 | vec![interp_mem_region], 141 | None 142 | ); 143 | #[allow(unused)] 144 | let (_interp_ins_count, interp_res) = interp_vm.execute_program(&executable, true); 145 | 146 | #[cfg(all(not(target_os = "windows"), target_arch = "x86_64"))] 147 | if executable.jit_compile().is_ok() { 148 | let mut jit_mem = fuzz_data.mem; 149 | let mut jit_context_object = TestContextObject::new(1 << 16); 150 | let jit_mem_region = MemoryRegion::new_writable(&mut jit_mem, ebpf::MM_INPUT_START); 151 | create_vm!( 152 | jit_vm, 153 | &executable, 154 | &mut jit_context_object, 155 | jit_stack, 156 | jit_heap, 157 | vec![jit_mem_region], 158 | None 159 | ); 160 | let (_jit_ins_count, jit_res) = jit_vm.execute_program(&executable, false); 161 | if format!("{:?}", interp_res) != format!("{:?}", jit_res) { 162 | // spot check: there's a meaningless bug where ExceededMaxInstructions is different due to jump calculations 163 | if format!("{:?}", interp_res).contains("ExceededMaxInstructions") 164 | && format!("{:?}", jit_res).contains("ExceededMaxInstructions") 165 | { 166 | return; 167 | } 168 | panic!("Expected {:?}, but got {:?}", interp_res, jit_res); 169 | } 170 | if interp_res.is_ok() { 171 | // we know jit res must be ok if interp res is by this point 172 | if interp_context_object.remaining != jit_context_object.remaining { 173 | panic!( 174 | "Expected {} insts remaining, but got {}", 175 | interp_context_object.remaining, jit_context_object.remaining 176 | ); 177 | } 178 | if interp_mem != jit_mem { 179 | panic!( 180 | "Expected different memory. From interpreter: {:?}\nFrom JIT: {:?}", 181 | interp_mem, jit_mem 182 | ); 183 | } 184 | } 185 | } 186 | }); 187 | ``` 188 | 189 | ## Understanding the harness 190 | It is mostly copied from ``fuzz/fuzz_targets/smart_jit_diff.rs`` 191 | But we introduce some key autarkie functionality. 192 | 1. Importing our grammar 193 | 194 | ``` rust 195 | use solana_sbpf::{FuzzData, FuzzInsn}; 196 | ``` 197 | 2. Converting instructions into native bytes. 198 | 199 | Since the EBPF format expects the instructions to be in a particular format, we can use need to convert our list of instructions into the native EBPF instruction format. 200 | ``` rust 201 | fn to_bytes(insns: &[FuzzInsn]) -> Vec { 202 | let data = vec![]; 203 | for insn in insns { 204 | data.extend([ 205 | insn.opc, 206 | insn.src << 4 | insn.dst, 207 | insn.off as u8, 208 | (insn.off >> 8) as u8, 209 | insn.imm as u8, 210 | (insn.imm >> 8) as u8, 211 | (insn.imm >> 16) as u8, 212 | (insn.imm >> 24) as u8, 213 | ]); 214 | } 215 | data 216 | } 217 | ``` 218 | 3. Deserialzing the bytes into our grammar. 219 | 220 | 221 | **Please note**: Autarkie will always send valid data, and thus, the deserialization will always be successful. 222 | We only add the return clause so that the harness can be re-used for other fuzzers who may produce structurally invalid input. 223 | ``` rust 224 | let Ok(fuzz_data) = bincode::deserialize::(data) else { 225 | return; 226 | } 227 | let prog = to_bytes(&fuzz_data.insns); 228 | ``` 229 | 230 | ## Running the fuzzer 231 | 1. Autarkie has a ``libfuzzer`` shim, based on ``libafl_libfuzzer``. Let's replace the libfuzzer with Autarkie's libfuzzer 232 | ``` 233 | vim fuzz/Cargo.toml 234 | ``` 235 | ``` toml 236 | # replace 237 | # libfuzzer-sys = "0.4" 238 | # add 239 | libfuzzer-sys = {git = "https://github.com/R9295/autarkie", package = "libafl_libfuzzer"} 240 | ``` 241 | 2. Let's add bincode to deserialize 242 | ``` 243 | cd fuzz 244 | cargo add bincode@1 245 | cd .. 246 | ``` 247 | 248 | Run! 249 | We run autarkie with 1 core(core_id = 0) with the output directory of ``./output_dir`` 250 | 251 | For more cores, use ``-c 0-7`` for 8 cores and cores ``-c 0-15`` for 16 cores etc.. 252 | 253 | We also give autarkie the path to the crate which contains the grammar (which exports ``FuzzData``). 254 | It is ``pwd`` since we are in the root directory of the project. 255 | ```bash 256 | $ pwd 257 | /fuzz/sbpf 258 | AUTARKIE_GRAMMAR_SRC=$(pwd) cargo fuzz run autarkie_harness -- -o ./output_dir -c0 259 | ``` 260 | ## Important Note 261 | The fuzzing target MAY have autarkie as a dependency (in this case, it does). This means that autarkie, and all it's dependencies will also be instrumented for coverage, but they won't ever be run. So the "edges discovered" metric will be rather low. 262 | 263 | This will result in a big inflation of edges and the percentage of edges discovered as a metric will be severly biased. 264 | For example, when fuzzing sbpf, autarkie will report that there are ``edges: ..../935178 (0%)`` discovered. This cannot be true as the project isn't large enough to have 935178 edges, its biased by autarkie and it's dependencies. 265 | 266 | Take this percentage of edges dicsovered metric with a grain of salt and rather look at the number of edges directly. Also, run coverage reports often. 267 | Autarkie is working hard, it's just that many edges will be unreachable! 268 | 269 | ## For help: 270 | ```bash 271 | $ pwd 272 | /fuzz/sbpf 273 | # For help 274 | AUTARKIE_GRAMMAR_SRC=$(pwd) cargo fuzz run autarkie_harness -- --help 275 | ``` 276 | 277 | ## Further work 278 | Please report bugs and or suggestions! 279 | -------------------------------------------------------------------------------- /guides/sql.md: -------------------------------------------------------------------------------- 1 | # Fuzzing SQLite3 2 | 3 | This example focuses on fuzzing with the AFL++ Forkserver. 4 | 5 | We need a grammar source to fuzz sqlite3. Since the grammar must be defined in Rust, we can re-use [Apache's](https://github.com/apache/datafusion-sqlparser-rs) ``datafusion-sqlparser-rs``. 6 | 7 | This example shows the magic of Autarkie. We build a highly sophisticated grammar fuzzer in less than 5 minutes. 8 | 9 | ## Clone the project 10 | 1. Clone the target 11 | ``` 12 | cd /tmp 13 | git clone https://github.com/apache/datafusion-sqlparser-rs 14 | cd datafusion-sqlparser-rs 15 | # go to a fixed commit 16 | git reset --hard 7703fd0d3180c2e8b347c11394084c3a2458be14 17 | ``` 18 | 19 | ## Deriving the grammar 20 | 1. Add ``autarkie`` as a dependency. 21 | For autarkie, we need to pick a serialization primtive. Autarkie supports ``serde``, ``borsh`` and ``scale``. 22 | We will use ``serde`` as our serialization primitive. Since the project already has serde serialization and deserialization support, we do not need to add it as a dependency. 23 | ``` bash 24 | cargo add autarkie --git https://github.com/R9295/autarkie --features bincode --features derive 25 | ``` 26 | 2. Derive ``autarkie::Grammar`` macro for the AST. 27 | Since the parser already has serde support, we can simply find all places which have the ``Serialize`` macro and add autarkie's ``Grammar`` macro too. 28 | ``` bash 29 | rg "Serialize" --files-with-matches | xargs sed -i 's/Serialize,/Serialize, autarkie::Grammar,/g' 30 | ``` 31 | 3. Modify the grammar slightly 32 | 33 | We need to modify the datafusion-parser's code a bit because it does not allow us to render potentially invalid SQL. 34 | Our fuzzer may generate potentially invalid SQL (for example, the quote may not be ``"``, but a random character). 35 | ``` bash 36 | # delete an assert statement 37 | sed -i '390d' ./src/ast/mod.rs 38 | # remove a panic 39 | rg "panic!\(\"unexpected quote style\"\)" --files-with-matches | xargs sed -i 's/panic!("unexpected quote style")/write!(f, "\\\"{}\\\"", value::escape_quoted_string(\&self.value, \'"\'))/g' 40 | ``` 41 | 42 | That's it! Let's test it to see if it builds. 43 | We need to use the serde feature flag since the serde is feature gated. 44 | ``` 45 | cargo build --features serde 46 | ``` 47 | That's it! Too easy? We have our grammar source fully instrumented. 48 | 49 | ## Building our fuzzer 50 | Since we are fuzzing C code, we need to create a fuzzer from the grammar. We cannot fuzz inprocess, like with [sbpf](/guides/rbpf.md). 51 | 1. Initialize the fuzzer 52 | ```bash 53 | cd /tmp 54 | mkdir sql-fuzzer 55 | cd sql-fuzzer 56 | cargo init 57 | ``` 58 | 2. Add our dependencies 59 | 60 | We need to add the grammar source, serde and autarkie as dependencies. 61 | 62 | The grammar source is the macro instrumented ``datafusion-sqlparser-rs`` 63 | ``` bash 64 | # we add serde 65 | cargo add serde --features derive 66 | # we add autarkie with the afl, bincode and derive features 67 | cargo add autarkie --git https://github.com/R9295/autarkie --features bincode --features derive --features afl 68 | # we add the grammar source WITH the serde feature 69 | cargo add sqlparser --path /tmp/datafusion-sqlparser-rs --features serde 70 | ``` 71 | 72 | 3. Fuzzer code 73 | ``` 74 | vim src/main.rs 75 | ``` 76 | ``` rust 77 | use sqlparser::ast::Statement; 78 | 79 | /// A list of statements to execute 80 | /// This will be given to our fuzzing harness 81 | #[derive(serde::Serialize, serde::Deserialize, autarkie::Grammar, Debug, Clone)] 82 | pub struct FuzzData { 83 | statements: Vec, 84 | } 85 | 86 | // We need to render the internal type to a harness supported format. 87 | // Autarkie's macro allows us to provide a custom render function. 88 | // the sqlparser package provides a ``to_string`` function which we can 89 | // use to render the internal representation into text SQL. 90 | autarkie::fuzz_afl!(FuzzData, |data: &FuzzData| -> Vec { 91 | let mut ret = vec![]; 92 | for statement in &data.statements { 93 | ret.extend(statement.to_string().as_bytes()) 94 | } 95 | ret 96 | }); 97 | ``` 98 | Normally, when fuzzing a target which can decode our input on the other end (if they also use ``bincode``/``borsh``) we can simply use the macro as the following: 99 | 100 | ``` rust 101 | autarkie::fuzz_afl!(FuzzData); 102 | ``` 103 | This will automatically use ``bincode``/``borsh`` to serialize the input to bytes for the fuzzing target. 104 | 105 | **But** in this case, we need to render the input to a harness supported type. This is common when fuzzing programming langauges for example. 106 | 107 | That's it! Our fuzzer is ready. Let's build 108 | ``` 109 | cargo build --release 110 | ``` 111 | 112 | ## Building the Harness 113 | Let's build oss-fuzz's sqlite. Make sure to install oss-fuzz pre-requisites. 114 | 115 | 1. Build 116 | 117 | ``` 118 | cd /tmp 119 | git clone https://github.com/google/oss-fuzz/ 120 | cd oss-fuzz 121 | python3 infra/helper.py build_fuzzers --engine afl sqlite3 122 | ``` 123 | 124 | 2. Copy the harness to our fuzzer directory 125 | 126 | ```bash 127 | cp ./build/out/sqlite3/ossfuzz /tmp/sql-fuzzer/ 128 | ``` 129 | 130 | ## Running the fuzzer 131 | 132 | We run autarkie with 1 core(core_id = 0) with the output directory of ``./output_dir`` 133 | For more cores, use ``-c 0-7`` for 8 cores and cores ``-c 0-15`` for 16 cores etc.. 134 | 135 | ``` 136 | cd /tmp/sql-fuzzer/ 137 | cargo build --release 138 | ./target/release/sql-fuzzer -o ./output_dir -c0 -m100 ./ossfuzz 139 | ``` 140 | :) 141 | 142 | 143 | ## Further work 144 | Please report bugs and or suggestions! 145 | -------------------------------------------------------------------------------- /libafl_libfuzzer/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "libafl_libfuzzer" 3 | version = "0.1.0" 4 | description = "libFuzzer shim which uses LibAFL with common defaults" 5 | repository = "https://github.com/AFLplusplus/LibAFL/" 6 | readme = "../README.md" 7 | license = "MIT OR Apache-2.0" 8 | keywords = ["fuzzing", "testing", "security"] 9 | edition = "2021" 10 | rust-version = "1.85" 11 | categories = ["development-tools::testing"] 12 | 13 | include = [ 14 | "/src", 15 | "/Cargo.toml", 16 | "/build.rs", 17 | "/runtime", 18 | "LICENSE-MIT", 19 | "LICENSE-APACHE", 20 | ] 21 | 22 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 23 | 24 | [build-dependencies] 25 | cc = "1.2" 26 | rustversion = "1.0.17" 27 | toml = { version = "0.8.19", features = ["preserve_order"] } 28 | 29 | [features] 30 | default = ["fork"] 31 | 32 | #! ## Feature Flags 33 | 34 | ## Enables the derive macros for the arbitrary dependency, transparently forwarded from libfuzzer-sys 35 | arbitrary-derive = ["libfuzzer-sys/arbitrary-derive"] 36 | 37 | ## Enables forking in the fuzzer runtime for restarting managers for Unix systems (on by default) 38 | fork = [] 39 | 40 | ## Embeds the built libafl_libfuzzer_runtime library into the crate with include_bytes! for use 41 | ## in downstream cases like libafl_cc linking the runtime with: 42 | ## `-fsanitize=fuzzer-no-link -l:libafl_libfuzzer_runtime.a` 43 | embed-runtime = [] 44 | 45 | ## 🐇 46 | rabbit = [] 47 | 48 | ## For testing and publishing purposes only: enforce that the runtime uses versions rather than paths 49 | libafl-libfuzzer-use-version = [] 50 | 51 | [dependencies] 52 | libfuzzer-sys = { version = "0.4.7", default-features = false } 53 | 54 | [package.metadata.docs.rs] 55 | all-features = true 56 | 57 | rustdoc-args = ["--cfg", "docsrs"] 58 | 59 | -------------------------------------------------------------------------------- /libafl_libfuzzer/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /libafl_libfuzzer/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /libafl_libfuzzer/README.md: -------------------------------------------------------------------------------- 1 | # libafl_libfuzzer 2 | 3 | `libafl_libfuzzer` is a shim for [libFuzzer] which may be used in place of libFuzzer in most contexts. 4 | It can be used both as a direct shim for existing libFuzzer-compatible targets which are simply linked with libFuzzer 5 | (e.g., `clang -fsanitize=fuzzer`) and as a Rust crate for [`libfuzzer-sys`]-based harnesses. 6 | 7 | ## Background 8 | 9 | `libafl_libfuzzer` was first developed as a shim in light of the [de-facto deprecation of libFuzzer]. 10 | Given the widespread use of libFuzzer and that LibAFL already supported most of the instrumentation used by libFuzzer, 11 | we sought to develop a replacement which could directly replace it without much additional effort from the end user. 12 | To do so, `libafl_libfuzzer` provides the same interface and uses the same instrumentation as libFuzzer so that 13 | libFuzzer users can change over to a more modern LibAFL-based runtime without needing extensive changes to their 14 | fuzzing environment or updating their harnesses. 15 | 16 | ## Usage 17 | 18 | `libafl_libfuzzer` currently has known support for Rust, C, and C++ targets on Linux and macOS. 19 | Windows is not currently supported, as we do not currently test or develop for Windows machines, but [we will happily 20 | hear what issues you face and patch them as possible](https://github.com/AFLplusplus/LibAFL/issues/1563). 21 | 22 | For both cases, you should install a recent **nightly** version of Rust via `rustup` and add the `llvm-tools` component 23 | with `rustup component add llvm-tools`. 24 | 25 | ### Usage with Rust harnesses 26 | 27 | To use `libafl_libfuzzer` on Rust harnesses which use `libfuzzer-sys`, all you need to do is change the following line 28 | in your Cargo.toml: 29 | 30 | ```toml 31 | libfuzzer-sys = { version = "...", features = ["your", "features", "here"] } 32 | ``` 33 | 34 | to 35 | 36 | ```toml 37 | libfuzzer-sys = { version = "0.11.0", features = ["your", "features", "here"], package = "libafl_libfuzzer" } 38 | ``` 39 | 40 | To use the most up-to-date version (with experimental changes), use: 41 | 42 | ```toml 43 | libfuzzer-sys = { git = "https://github.com/AFLplusplus/LibAFL.git", features = ["your", "features", "here"], package = "libafl_libfuzzer" } 44 | ``` 45 | 46 | As the repository generally offers the highest performance version of `libafl_libfuzzer`, we recommend the latter. 47 | Remember to `cargo update` often if using the experimental changes, and please [submit an issue] 48 | if you encounter problems while using the git branch! 49 | 50 | For stability purposes, consider [specifying a commit](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#choice-of-commit). 51 | 52 | #### macOS 53 | 54 | On macOS, you will need to add weak linking for some functions in a `build.rs` file: 55 | 56 | ```rust 57 | fn main() { 58 | for func in [ 59 | "_libafl_main", 60 | "_LLVMFuzzerCustomMutator", 61 | "_LLVMFuzzerCustomCrossOver", 62 | ] { 63 | println!("cargo:rustc-link-arg=-Wl,-U,{func}"); 64 | } 65 | } 66 | ``` 67 | 68 | #### Caveats 69 | 70 | Like harnesses built with `libfuzzer-sys`, Rust targets which build other libraries (e.g. C/C++ FFI) may not 71 | automatically apply instrumentation. 72 | In addition to installing clang, you may also wish to set the following environmental variables: 73 | 74 | ```bash 75 | CC=clang 76 | CXX=clang++ 77 | CFLAGS='-fsanitize=fuzzer-no-link' 78 | CXXFLAGS='-fsanitize=fuzzer-no-link' 79 | ``` 80 | 81 | ### Usage as a standalone library (for C/C++/etc.) 82 | 83 | The runtime for `libafl_libfuzzer` may be used standalone as a direct replacement for libFuzzer with other targets as 84 | well. 85 | To do so, [ensure a recent nightly version of Rust is installed](https://rustup.rs/), then enter the 86 | [`libafl_libfuzzer_runtime`](../libafl_libfuzzer_runtime) folder and build the runtime with the following command: 87 | 88 | ```bash 89 | ./build.sh 90 | ``` 91 | 92 | The static library will be available at `libFuzzer.a` in the [`libafl_libfuzzer_runtime`](../libafl_libfuzzer_runtime) 93 | directory. 94 | If you encounter build failures without clear error outputs that help you resolve the issue, please [submit an issue]. 95 | 96 | This library may now be used in place of libFuzzer. 97 | To do so, change your CFLAGS/CXXFLAGS from `-fsanitize=fuzzer` to: 98 | 99 | ``` 100 | -fsanitize=fuzzer-no-link -L/path/to/libafl_libfuzzer_runtime -lFuzzer 101 | ``` 102 | 103 | Alternatively, you may directly overwrite the system libFuzzer library and use `-fsanitize=fuzzer` as normal. 104 | This changes per system, but on my machine is located at `/usr/lib64/clang/16/lib/linux/libclang_rt.fuzzer-x86_64.a`. 105 | 106 | #### Caveats 107 | 108 | This standalone library is _not_ compatible with Rust targets; you must instead use the crate-based dependency. 109 | This is due to potential symbol conflict between your harness and the fuzzer runtime, which is resolved by additional 110 | build steps provided in the `libafl_libfuzzer` crate itself. 111 | 112 | ## Flags 113 | 114 | You can pass additional flags to the libFuzzer runtime in `cargo-fuzz` like so: 115 | 116 | ```bash 117 | cargo fuzz run fuzz_target -- -extra_flag=1 118 | ``` 119 | 120 | When the runtime is used standalone, flags may be passed just like normal libFuzzer. 121 | 122 | You will commonly need this for flags such as `-ignore_crashes=1` and `-timeout=5`. In addition 123 | to partial support of libfuzzer flags, `libafl_libfuzzer` offers: 124 | 125 | - `-dedup=n`, with `n` = 1 enabling deduplication of crashes by stacktrace. 126 | - `-grimoire=n`, with `n` set to 0 or 1 disabling or enabling [grimoire] mutations, respectively. 127 | - if not specified explicitly, `libafl_libfuzzer` will select based on whether existing inputs are UTF-8 128 | - you should disable grimoire if your target is not string-like 129 | - `-report=n`, with `n` = 1 causing `libafl_libfuzzer` to emit a report on the corpus content. 130 | - `-skip_tracing=n`, with `n` = 1 causing `libafl_libfuzzer` to disable cmplog tracing. 131 | - you should do this if your target performs many comparisons on memory sequences which are 132 | not contained in the input 133 | - `-tui=n`, with `n` = 1 enabling a graphical terminal interface. 134 | - experimental; some users report inconsistent behaviour with tui enabled 135 | 136 | ### Supported flags from libfuzzer 137 | 138 | - `-merge` 139 | - `-minimize_crash` 140 | - `-artifact_prefix` 141 | - `-timeout` 142 | - unlike libfuzzer, `libafl_libfuzzer` supports partial second timeouts (e.g. `-timeout=.5`) 143 | - `-dict` 144 | - `-fork` and `-jobs` 145 | - in `libafl_libfuzzer`, these are synonymous 146 | - `-ignore_crashes`, `-ignore_ooms`, and `-ignore_timeouts` 147 | - note that setting `-tui=1` enables these flags by default, so you'll need to explicitly mention `-ignore_...=0` to 148 | disable them 149 | - `-rss_limit_mb` and `-malloc_limit_mb` 150 | - `-ignore_remaining_args` 151 | - `-shrink` 152 | - `-runs` 153 | - `-close_fd_mask` 154 | 155 | [libFuzzer]: https://llvm.org/docs/LibFuzzer.html 156 | 157 | [`libfuzzer-sys`]: https://docs.rs/libfuzzer-sys/ 158 | 159 | [de-facto deprecation of libFuzzer]: https://llvm.org/docs/LibFuzzer.html#status 160 | 161 | [submit an issue]: https://github.com/AFLplusplus/LibAFL/issues/new/choose 162 | 163 | [grimoire]: https://www.usenix.org/conference/usenixsecurity19/presentation/blazytko 164 | -------------------------------------------------------------------------------- /libafl_libfuzzer/build.rs: -------------------------------------------------------------------------------- 1 | /// Keep in sync with https://github.com/AFLplusplus/LibAFL/blob/main/libafl_libfuzzer/build.rs 2 | use core::error::Error; 3 | use std::{ 4 | fs::{self, File}, 5 | io::{BufRead, BufReader, BufWriter, Write}, 6 | path::{Path, PathBuf}, 7 | process::{Command, Stdio}, 8 | str::FromStr, 9 | }; 10 | 11 | #[cfg(feature = "rabbit")] 12 | const NAMESPACE: &str = "🐇"; 13 | #[cfg(not(feature = "rabbit"))] 14 | const NAMESPACE: &str = "__libafl"; 15 | const NAMESPACE_LEN: usize = NAMESPACE.len(); 16 | 17 | #[expect(clippy::too_many_lines)] 18 | fn main() -> Result<(), Box> { 19 | if cfg!(any(clippy, docsrs)) { 20 | return Ok(()); // skip when clippy or docs is running 21 | } 22 | 23 | if cfg!(not(any(target_os = "linux", target_os = "macos"))) { 24 | println!( 25 | "cargo:warning=The libafl_libfuzzer runtime may only be built for linux or macos; failing fast." 26 | ); 27 | return Ok(()); 28 | } 29 | 30 | println!("cargo:rerun-if-changed=libafl_libfuzzer_runtime/src"); 31 | println!("cargo:rerun-if-changed=libafl_libfuzzer_runtime/build.rs"); 32 | 33 | let custom_lib_dir = 34 | AsRef::::as_ref(&std::env::var_os("OUT_DIR").unwrap()).join("libafl_libfuzzer"); 35 | let custom_lib_target = custom_lib_dir.join("target"); 36 | fs::create_dir_all(&custom_lib_target) 37 | .expect("Couldn't create the output directory for the fuzzer runtime build"); 38 | 39 | let lib_src: PathBuf = AsRef::::as_ref(&std::env::var_os("CARGO_MANIFEST_DIR").unwrap()) 40 | .join("libafl_libfuzzer_runtime"); 41 | 42 | let mut command = Command::new(std::env::var_os("CARGO").unwrap()); 43 | command 44 | .env_remove("RUSTFLAGS") 45 | .env_remove("CARGO_ENCODED_RUSTFLAGS"); 46 | 47 | for (var, _) in std::env::vars() { 48 | if var.starts_with("CARGO_PKG_") || var.starts_with("CARGO_FEATURE_") { 49 | command.env_remove(var); 50 | } 51 | } 52 | 53 | command 54 | .env("PATH", std::env::var_os("PATH").unwrap()) 55 | .current_dir(&lib_src); 56 | 57 | command.arg("build"); 58 | 59 | let mut features = vec![]; 60 | 61 | if cfg!(any(feature = "fork")) { 62 | features.push("fork"); 63 | } 64 | 65 | if !features.is_empty() { 66 | command.arg("--features").arg(features.join(",")); 67 | } 68 | 69 | command 70 | .arg("--release") 71 | .arg("--no-default-features") 72 | .arg("--target-dir") 73 | .arg(&custom_lib_target) 74 | .arg("--target") 75 | .arg(std::env::var_os("TARGET").unwrap()); 76 | 77 | command.current_dir("../libafl_libfuzzer_runtime"); 78 | // autarkie: make sure we have a grammar source. 79 | let Ok(grammar_source) = std::env::var("AUTARKIE_GRAMMAR_SRC") else { 80 | eprintln!("Autarkie: missing path to grammar source (AUTARKIE_GRAMMAR_SRC)"); 81 | panic!("Autarkie: missing path to grammar source (AUTARKIE_GRAMMAR_SRC)"); 82 | }; 83 | 84 | let grammar_source = PathBuf::from_str(&grammar_source)?; 85 | assert!( 86 | grammar_source.is_absolute(), 87 | "grammar source must be an absolute path." 88 | ); 89 | let mut grammar_source_toml = 90 | toml::from_str(&std::fs::read_to_string(grammar_source.join("Cargo.toml"))?)?; 91 | let toml::Value::Table(grammar_source_toml) = &mut grammar_source_toml else { 92 | unreachable!("Invalid Cargo.toml"); 93 | }; 94 | let Some(toml::Value::Table(name)) = grammar_source_toml.get("package") else { 95 | unreachable!("Invalid Cargo.toml"); 96 | }; 97 | let Some(toml::Value::Table(grammar_deps)) = grammar_source_toml.get("dependencies") else { 98 | unreachable!("Invalid Cargo.toml"); 99 | }; 100 | let name = name.get("name").unwrap().to_string(); 101 | 102 | let mut template = toml::from_str(&std::fs::read_to_string( 103 | "../libafl_libfuzzer_runtime/Cargo.toml", 104 | )?)?; 105 | let toml::Value::Table(root) = &mut template else { 106 | unreachable!("Invalid Cargo.toml"); 107 | }; 108 | let Some(toml::Value::Table(deps)) = root.get_mut("dependencies") else { 109 | unreachable!("Invalid Cargo.toml"); 110 | }; 111 | // TODO: remove old grammar 112 | if deps.contains_key("grammar_source") { 113 | deps.remove("grammar_source"); 114 | } 115 | // remove old autarkie dependency 116 | // We need to re-add it because serialization primives may change 117 | if deps.contains_key("autarkie") { 118 | deps.remove("autarkie"); 119 | } 120 | let mut grammar_autarkie = grammar_deps 121 | .get("autarkie") 122 | .expect("Grammar source must have autarkie as a dependency") 123 | .clone(); 124 | if let Some(autarkie_path) = grammar_autarkie.get("path") { 125 | assert!( 126 | PathBuf::from(autarkie_path.to_string().replace("\"", "")).is_absolute(), 127 | "Autarkie's path in the grammar source must either be absolute or a git repository" 128 | ); 129 | } 130 | let Some(toml::Value::Array(autarkie_features)) = grammar_autarkie.get_mut("features") else { 131 | unreachable!("Invalid autarkie declaration"); 132 | }; 133 | if !autarkie_features.contains(&toml::Value::String("libfuzzer".to_string())) { 134 | autarkie_features.push("libfuzzer".into()); 135 | } 136 | 137 | let mut dep = toml::map::Map::from_iter([ 138 | ( 139 | "path".to_string(), 140 | toml::Value::String(grammar_source.to_str().unwrap().to_string()), 141 | ), 142 | ( 143 | "package".to_string(), 144 | toml::Value::String(name.replace("\"", "")), 145 | ), 146 | ]); 147 | if let Ok(features) = std::env::var("AUTARKIE_GRAMMAR_SRC_FEATURES") { 148 | let features = features.replace(" ", ""); 149 | dep.insert( 150 | "features".to_string(), 151 | toml::Value::Array( 152 | features 153 | .split(",") 154 | .map(|i| toml::Value::String(i.to_string())) 155 | .collect::>(), 156 | ), 157 | ); 158 | } 159 | deps.insert("grammar_source".to_string(), toml::Value::Table(dep)); 160 | deps.insert("autarkie".to_string(), grammar_autarkie); 161 | let serialized = toml::to_string(&template)?; 162 | fs::write("../libafl_libfuzzer_runtime/Cargo.toml", serialized)?; 163 | assert!( 164 | command.status().is_ok_and(|s| s.success()), 165 | "Couldn't build runtime crate! Did you remember to use nightly? (`rustup default nightly` to install)" 166 | ); 167 | 168 | let mut archive_path = custom_lib_target.join(std::env::var_os("TARGET").unwrap()); 169 | archive_path.push("release"); 170 | 171 | archive_path.push("libafl_libfuzzer_runtime.a"); 172 | let target_libdir = Command::new("rustc") 173 | .args(["--print", "target-libdir"]) 174 | .output() 175 | .expect("Couldn't find rustc's target-libdir"); 176 | let target_libdir = String::from_utf8(target_libdir.stdout).unwrap(); 177 | let target_libdir = Path::new(target_libdir.trim()); 178 | 179 | // NOTE: depends on llvm-tools 180 | let rust_objcopy = target_libdir.join("../bin/llvm-objcopy"); 181 | let nm = target_libdir.join("../bin/llvm-nm"); 182 | 183 | let redefined_archive_path = custom_lib_target.join("libFuzzer.a"); 184 | let redefined_symbols = custom_lib_target.join("redefs.txt"); 185 | 186 | let mut nm_child = Command::new(nm) 187 | .arg(&archive_path) 188 | .stdout(Stdio::piped()) 189 | .spawn() 190 | .expect("llvm-nm does not work (are you using nightly? or did you install by rustup component add llvm-tools?)"); 191 | 192 | let mut redefinitions_file = BufWriter::new(File::create(&redefined_symbols).unwrap()); 193 | 194 | let rn_prefix = if cfg!(target_os = "macos") { 195 | // macOS symbols have an extra `_` 196 | "__RN" 197 | } else { 198 | "_RN" 199 | }; 200 | 201 | let zn_prefix = if cfg!(target_os = "macos") { 202 | // macOS symbols have an extra `_` 203 | "__ZN" 204 | } else { 205 | "_ZN" 206 | }; 207 | 208 | let replacement = format!("{zn_prefix}{NAMESPACE_LEN}{NAMESPACE}"); 209 | // redefine all the rust-mangled symbols we can 210 | // TODO this will break when v0 mangling is stabilised 211 | for line in BufReader::new(nm_child.stdout.take().unwrap()).lines() { 212 | let line = line.unwrap(); 213 | 214 | // Skip headers 215 | if line.ends_with(':') || line.is_empty() { 216 | continue; 217 | } 218 | let (_, symbol) = line.rsplit_once(' ').unwrap(); 219 | 220 | if symbol.starts_with(rn_prefix) { 221 | let (_prefix, renamed) = symbol.split_once("__rustc").unwrap(); 222 | let (size, renamed) = renamed.split_once('_').unwrap(); 223 | writeln!(redefinitions_file, "{symbol} {replacement}{size}{renamed}E").unwrap(); 224 | } else if symbol.starts_with(zn_prefix) { 225 | writeln!( 226 | redefinitions_file, 227 | "{symbol} {}", 228 | symbol.replacen(zn_prefix, &replacement, 1) 229 | ) 230 | .unwrap(); 231 | } 232 | } 233 | redefinitions_file.flush().unwrap(); 234 | drop(redefinitions_file); 235 | 236 | assert!( 237 | nm_child.wait().is_ok_and(|s| s.success()), 238 | "Couldn't link runtime crate! Do you have the llvm-tools component installed? (`rustup component add llvm-tools-preview` to install)" 239 | ); 240 | 241 | let mut objcopy_command = Command::new(rust_objcopy); 242 | 243 | for symbol in [ 244 | "libafl_cmplog_enabled", 245 | "libafl_cmplog_map", 246 | "libafl_cmp_map", 247 | "rust_begin_unwind", 248 | "rust_panic", 249 | "rust_eh_personality", 250 | "__rust_drop_panic", 251 | "__rust_foreign_exception", 252 | "__rg_oom", 253 | "__rdl_oom", 254 | "__rdl_alloc", 255 | "__rust_alloc", 256 | "__rdl_dealloc", 257 | "__rust_dealloc", 258 | "__rdl_realloc", 259 | "__rust_realloc", 260 | "__rdl_alloc_zeroed", 261 | "__rust_alloc_zeroed", 262 | "__rust_alloc_error_handler", 263 | "__rust_no_alloc_shim_is_unstable", 264 | "__rust_alloc_error_handler_should_panic", 265 | ] { 266 | let mut symbol = symbol.to_string(); 267 | // macOS symbols have an extra `_` 268 | if cfg!(target_os = "macos") { 269 | symbol.insert(0, '_'); 270 | } 271 | 272 | objcopy_command 273 | .arg("--redefine-sym") 274 | .arg(format!("{symbol}={symbol}_libafl_libfuzzer_runtime")); 275 | } 276 | 277 | objcopy_command 278 | .arg("--redefine-syms") 279 | .arg(redefined_symbols) 280 | .args([&archive_path, &redefined_archive_path]); 281 | 282 | assert!( 283 | objcopy_command.status().is_ok_and(|s| s.success()), 284 | "Couldn't rename allocators in the runtime crate! Do you have the llvm-tools component installed? (`rustup component add llvm-tools-preview` to install)" 285 | ); 286 | 287 | #[cfg(feature = "embed-runtime")] 288 | { 289 | // NOTE: lib, .a are added always on unix-like systems as described in: 290 | // https://gist.github.com/novafacing/1389cbb2f0a362d7eb103e67b4468e2b 291 | println!( 292 | "cargo:rustc-env=LIBAFL_LIBFUZZER_RUNTIME_PATH={}", 293 | redefined_archive_path.display() 294 | ); 295 | } 296 | 297 | println!( 298 | "cargo:rustc-link-search=native={}", 299 | custom_lib_target.to_str().unwrap() 300 | ); 301 | println!("cargo:rustc-link-lib=static=Fuzzer"); 302 | 303 | if cfg!(target_os = "macos") { 304 | println!("cargo:rustc-link-lib=c++"); 305 | } else { 306 | println!("cargo:rustc-link-lib=stdc++"); 307 | } 308 | Ok(()) 309 | } 310 | -------------------------------------------------------------------------------- /libafl_libfuzzer/runtime/Cargo.toml.template: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "libafl_libfuzzer_runtime" 3 | version = "0.15.1" 4 | edition = "2021" 5 | publish = false 6 | 7 | [features] 8 | default = ["fork"] 9 | fork = ["libafl/fork"] 10 | track_hit_feedbacks = ["libafl/track_hit_feedbacks", "libafl_targets/track_hit_feedbacks"] 11 | 12 | [profile.release] 13 | lto = true 14 | codegen-units = 1 15 | opt-level = 3 16 | debug = true 17 | 18 | [profile.release-fuzzbench] 19 | inherits = "release" 20 | debug = false 21 | strip = true 22 | 23 | [lib] 24 | name = "afl_libfuzzer_runtime" 25 | crate-type = ["staticlib", "rlib"] 26 | 27 | [dependencies] 28 | libc = "0.2.159" 29 | rand = "0.8.5" 30 | utf8-chars = "3.0.4" 31 | env_logger = "0.11.5" 32 | 33 | [dependencies.libafl] 34 | workspace = true 35 | 36 | [dependencies.libafl_bolts] 37 | workspace = true 38 | 39 | [dependencies.libafl_targets] 40 | workspace = true 41 | default-features = false 42 | features = ["sancov_8bit", "libfuzzer", "cmplog", "sancov_cmplog", "sancov_pcguard", "libfuzzer_define_run_driver"] 43 | 44 | [dependencies.ahash] 45 | version = "0.8.11" 46 | default-features = false 47 | 48 | [dependencies.log] 49 | version = "0.4.22" 50 | features = ["release_max_level_info"] 51 | 52 | [dependencies.mimalloc] 53 | version = "0.1.43" 54 | default-features = false 55 | 56 | [dependencies.num-traits] 57 | version = "0.2.19" 58 | default-features = true 59 | 60 | [dependencies.serde] 61 | version = "1.0.210" 62 | default-features = true 63 | features = ["derive"] 64 | 65 | [dependencies.hashbrown] 66 | version = "0.14.5" 67 | default-features = true 68 | 69 | 70 | [build-dependencies] 71 | bindgen = "0.71.1" 72 | 73 | [build-dependencies.cc] 74 | version = "1.1.22" 75 | features = ["parallel"] 76 | -------------------------------------------------------------------------------- /libafl_libfuzzer/runtime/build.rs: -------------------------------------------------------------------------------- 1 | use std::{env, path::Path}; 2 | 3 | #[expect(clippy::too_many_lines)] 4 | fn main() { 5 | let out_dir = env::var_os("OUT_DIR").unwrap(); 6 | 7 | println!("cargo:rerun-if-changed=src/harness_wrap.h"); 8 | println!("cargo:rerun-if-changed=src/harness_wrap.cpp"); 9 | 10 | let build = bindgen::builder() 11 | .header("src/harness_wrap.h") 12 | .generate_comments(true) 13 | .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) 14 | .generate() 15 | .expect("Couldn't generate the harness wrapper!"); 16 | 17 | build 18 | .write_to_file(Path::new(&out_dir).join("harness_wrap.rs")) 19 | .expect("Couldn't write the harness wrapper!"); 20 | 21 | cc::Build::new() 22 | .cpp(true) 23 | .file("src/harness_wrap.cpp") 24 | .compile("harness_wrap"); 25 | } 26 | -------------------------------------------------------------------------------- /libafl_libfuzzer/runtime/src/fuzz.rs: -------------------------------------------------------------------------------- 1 | use autarkie::TargetBytesConverter; 2 | use core::ffi::c_int; 3 | use grammar_source::{FuzzData, FuzzDataTargetBytesConverter}; 4 | use libafl::executors::ExitKind; 5 | use libafl::Error; 6 | use libafl_bolts::AsSlice; 7 | 8 | fn fuzz_many_forking(harness: &extern "C" fn(*const u8, usize) -> c_int) -> Result<(), Error> { 9 | let harness = |input: &FuzzData| { 10 | let target = FuzzDataTargetBytesConverter::new().to_target_bytes(input); 11 | let buf = target.as_slice(); 12 | let result = unsafe { 13 | crate::libafl_libfuzzer_test_one_input(Some(*harness), buf.as_ptr(), buf.len()) 14 | }; 15 | match result { 16 | -2 => ExitKind::Crash, 17 | _ => ExitKind::Ok, 18 | } 19 | }; 20 | autarkie::fuzzer::run_fuzzer(FuzzDataTargetBytesConverter::new(), Some(harness)); 21 | Ok(()) 22 | } 23 | 24 | pub fn fuzz(harness: &extern "C" fn(*const u8, usize) -> c_int) -> Result<(), Error> { 25 | fuzz_many_forking(harness) 26 | } 27 | -------------------------------------------------------------------------------- /libafl_libfuzzer/runtime/src/harness_wrap.cpp: -------------------------------------------------------------------------------- 1 | #include "harness_wrap.h" 2 | 3 | extern "C" int libafl_libfuzzer_test_one_input( 4 | int (*harness)(const uint8_t *, size_t), const uint8_t *data, size_t len) { 5 | try { 6 | return harness(data, len); 7 | } catch (...) { 8 | return -2; // custom code for "we died!" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /libafl_libfuzzer/runtime/src/harness_wrap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | int libafl_libfuzzer_test_one_input(int (*harness)(const uint8_t *, size_t), 11 | const uint8_t *data, size_t len); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | -------------------------------------------------------------------------------- /libafl_libfuzzer/runtime/src/lib.rs: -------------------------------------------------------------------------------- 1 | use core::ffi::{c_char, c_int}; 2 | 3 | use env_logger::Target; 4 | use libafl::Error; 5 | use mimalloc::MiMalloc; 6 | use std::os::fd::RawFd; 7 | #[global_allocator] 8 | static GLOBAL: MiMalloc = MiMalloc; 9 | 10 | mod fuzz; 11 | 12 | mod harness_wrap { 13 | #![allow(non_snake_case)] 14 | #![allow(non_camel_case_types)] 15 | #![allow(non_upper_case_globals)] 16 | #![allow(unused)] 17 | #![allow(improper_ctypes)] 18 | #![allow(clippy::unreadable_literal)] 19 | #![allow(missing_docs)] 20 | #![allow(unused_qualifications)] 21 | include!(concat!(env!("OUT_DIR"), "/harness_wrap.rs")); 22 | } 23 | 24 | /// Starts to fuzz on a single node 25 | pub fn start_fuzzing_single( 26 | mut fuzz_single: F, 27 | initial_state: Option, 28 | mgr: EM, 29 | ) -> Result<(), Error> 30 | where 31 | F: FnMut(Option, EM, usize) -> Result<(), Error>, 32 | { 33 | fuzz_single(initial_state, mgr, 0) 34 | } 35 | 36 | pub(crate) use harness_wrap::libafl_libfuzzer_test_one_input; 37 | unsafe extern "C" { 38 | // redeclaration against libafl_targets because the pointers in our case may be mutable 39 | fn libafl_targets_libfuzzer_init(argc: *mut c_int, argv: *mut *mut *const c_char) -> i32; 40 | } 41 | 42 | /// A method to start the fuzzer at a later point in time from a library. 43 | /// To quote the `libfuzzer` docs: 44 | /// > when it’s ready to start fuzzing, it can call `LLVMFuzzerRunDriver`, passing in the program arguments and a callback. This callback is invoked just like `LLVMFuzzerTestOneInput`, and has the same signature. 45 | /// 46 | /// # Safety 47 | /// Will dereference all parameters. 48 | /// This will then call the (potentially unsafe) harness. 49 | /// The fuzzer itself should catch any side effects and, hence be reasonably safe, if the `harness_fn` parameter is correct. 50 | #[expect(clippy::similar_names)] 51 | #[unsafe(no_mangle)] 52 | pub unsafe extern "C" fn LLVMFuzzerRunDriver( 53 | argc: *mut c_int, 54 | argv: *mut *mut *const c_char, 55 | harness_fn: Option c_int>, 56 | ) -> c_int { 57 | let harness = harness_fn 58 | .as_ref() 59 | .expect("Illegal harness provided to libafl."); 60 | // early duplicate the stderr fd so we can close it later for the target 61 | #[cfg(unix)] 62 | { 63 | use std::{ 64 | os::fd::{AsRawFd, FromRawFd}, 65 | str::FromStr, 66 | }; 67 | 68 | let stderr_fd = std::env::var(autarkie::fuzzer::STDERR_FD_VAR) 69 | .map_err(Error::from) 70 | .and_then(|s| RawFd::from_str(&s).map_err(Error::from)) 71 | .unwrap_or_else(|_| { 72 | let stderr = unsafe { libc::dup(std::io::stderr().as_raw_fd()) }; 73 | unsafe { 74 | std::env::set_var(autarkie::fuzzer::STDERR_FD_VAR, stderr.to_string()); 75 | } 76 | stderr 77 | }); 78 | let stderr = unsafe { std::fs::File::from_raw_fd(stderr_fd) }; 79 | env_logger::builder() 80 | .parse_default_env() 81 | .target(Target::Pipe(Box::new(stderr))) 82 | .init(); 83 | } 84 | 85 | // it appears that no one, not even libfuzzer, uses this return value 86 | // https://github.com/llvm/llvm-project/blob/llvmorg-15.0.7/compiler-rt/lib/fuzzer/FuzzerDriver.cpp#L648 87 | unsafe { 88 | libafl_targets_libfuzzer_init(argc, argv); 89 | } 90 | let res = crate::fuzz::fuzz(harness); 91 | match res { 92 | Ok(()) | Err(Error::ShuttingDown) => 0, 93 | Err(err) => { 94 | eprintln!("Encountered error while performing libfuzzer shimming: {err}"); 95 | 1 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /libafl_libfuzzer/src/lib.rs: -------------------------------------------------------------------------------- 1 | use core::ffi::{c_char, c_int}; 2 | 3 | pub use libfuzzer_sys::*; 4 | 5 | unsafe extern "C" { 6 | /// `LLVMFuzzerRunDriver` allows for harnesses which specify their own main. See: 7 | /// 8 | /// You can call this function inside of a main function in your harness, or specify `#![no_main]` 9 | /// to accept the default runtime driver. 10 | pub fn LLVMFuzzerRunDriver( 11 | argc: *mut c_int, 12 | argv: *mut *mut *const c_char, 13 | harness_fn: Option c_int>, 14 | ) -> c_int; 15 | } 16 | 17 | #[cfg(all( 18 | feature = "embed-runtime", 19 | target_family = "unix", 20 | // Disable when building with clippy, as it will complain about the missing environment 21 | // variable which is set by the build script, which is not run under clippy. 22 | not(clippy) 23 | ))] 24 | pub const LIBAFL_LIBFUZZER_RUNTIME_LIBRARY: &'static [u8] = 25 | include_bytes!(env!("LIBAFL_LIBFUZZER_RUNTIME_PATH")); 26 | 27 | #[cfg(test)] 28 | mod tests { 29 | #[cfg(all(feature = "embed-runtime", not(clippy)))] 30 | #[test] 31 | fn test_embed_runtime_sized() { 32 | use crate::LIBAFL_LIBFUZZER_RUNTIME_LIBRARY; 33 | 34 | assert_ne!( 35 | LIBAFL_LIBFUZZER_RUNTIME_LIBRARY.len(), 36 | 0, 37 | "Runtime library empty" 38 | ); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /libafl_libfuzzer_runtime/Cargo.toml: -------------------------------------------------------------------------------- 1 | ../libafl_libfuzzer/runtime/Cargo.toml.template -------------------------------------------------------------------------------- /libafl_libfuzzer_runtime/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /libafl_libfuzzer_runtime/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /libafl_libfuzzer_runtime/README.md: -------------------------------------------------------------------------------- 1 | # libafl_libfuzzer_runtime 2 | 3 | This is the runtime for `libafl_libfuzzer`. 4 | 5 | Please see the [`libafl_libfuzzer`](../libafl_libfuzzer) documentation for details. 6 | This crate should not be used alone except in very special circumstances. -------------------------------------------------------------------------------- /libafl_libfuzzer_runtime/build.rs: -------------------------------------------------------------------------------- 1 | use std::{env, path::Path}; 2 | 3 | fn main() { 4 | let out_dir = env::var_os("OUT_DIR").unwrap(); 5 | 6 | println!("cargo:rerun-if-changed=src/harness_wrap.h"); 7 | println!("cargo:rerun-if-changed=src/harness_wrap.cpp"); 8 | 9 | let build = bindgen::builder() 10 | .header("src/harness_wrap.h") 11 | .generate_comments(true) 12 | .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) 13 | .generate() 14 | .expect("Couldn't generate the harness wrapper!"); 15 | 16 | build 17 | .write_to_file(Path::new(&out_dir).join("harness_wrap.rs")) 18 | .expect("Couldn't write the harness wrapper!"); 19 | 20 | cc::Build::new() 21 | .cpp(true) 22 | .file("src/harness_wrap.cpp") 23 | .compile("harness_wrap"); 24 | } 25 | -------------------------------------------------------------------------------- /libafl_libfuzzer_runtime/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 6 | 7 | cd "${SCRIPT_DIR}" || exit 1 8 | 9 | if [ -z ${1+x} ]; then 10 | profile=release 11 | else 12 | profile="$1" 13 | fi 14 | 15 | if ! cargo +nightly --version >& /dev/null; then 16 | echo -e "You must install a recent Rust nightly to build the libafl_libfuzzer runtime!" 17 | exit 1 18 | fi 19 | 20 | cargo +nightly build --profile "$profile" 21 | 22 | if [[ "$OSTYPE" == "darwin"* ]]; then 23 | # MacOS and iOS 24 | "${CXX:-clang++}" -dynamiclib -Wl,-force_load target/release/libafl_libfuzzer_runtime.a \ 25 | -Wl,-U,_LLVMFuzzerInitialize -Wl,-U,_LLVMFuzzerCustomMutator -Wl,-U,_LLVMFuzzerCustomCrossOver -Wl,-U,_libafl_main \ 26 | -o libafl_libfuzzer_runtime.dylib 27 | else 28 | # Linux and *BSD 29 | RUSTC_BIN="$(cargo +nightly rustc -Zunstable-options --print target-libdir)/../bin" 30 | RUST_LLD="${RUSTC_BIN}/rust-lld" 31 | RUST_AR="${RUSTC_BIN}/llvm-ar" 32 | 33 | if ! [ -f "${RUST_LLD}" ] && [ -f "${RUST_AR}" ]; then 34 | echo -e "You must install the llvm-tools component: \`rustup component add llvm-tools'" 35 | exit 1 36 | fi 37 | 38 | tmpdir="" 39 | 40 | cleanup() { 41 | rm -rf "${tmpdir}" 42 | exit 43 | } 44 | trap cleanup INT TERM 45 | 46 | tmpdir="$(mktemp -d)" 47 | "${RUST_LLD}" -flavor gnu -r --whole-archive target/release/libafl_libfuzzer_runtime.a -o "${tmpdir}/libFuzzer.o" 48 | "${RUST_AR}" cr libFuzzer.a "${tmpdir}/libFuzzer.o" 49 | 50 | echo "Done! Wrote the runtime to \`${SCRIPT_DIR}/libFuzzer.a'" 51 | cleanup 52 | fi 53 | 54 | -------------------------------------------------------------------------------- /libafl_libfuzzer_runtime/src: -------------------------------------------------------------------------------- 1 | ../libafl_libfuzzer/runtime/src --------------------------------------------------------------------------------