├── .gitignore
├── shared
    ├── aht
    │   ├── .gitignore
    │   ├── Cargo.lock
    │   ├── Cargo.toml
    │   └── src
    │   │   └── lib.rs
    ├── atomicvec
    │   ├── .gitignore
    │   ├── Cargo.lock
    │   ├── Cargo.toml
    │   └── src
    │   │   └── lib.rs
    └── falkhash
    │   ├── .gitignore
    │   ├── Cargo.toml
    │   └── src
    │       └── lib.rs
├── ldscript.ld
├── Cargo.toml
├── Cargo.lock
├── src
    ├── primitive.rs
    ├── mutate.rs
    ├── affinity.rs
    ├── jitcache.rs
    ├── mmu.rs
    ├── main.rs
    └── emulator.rs
└── LICENSE


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | 


--------------------------------------------------------------------------------
/shared/aht/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | 
3 | 


--------------------------------------------------------------------------------
/shared/atomicvec/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | 
3 | 


--------------------------------------------------------------------------------
/shared/falkhash/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | **/*.rs.bk
3 | Cargo.lock
4 | 


--------------------------------------------------------------------------------
/ldscript.ld:
--------------------------------------------------------------------------------
1 | SECTIONS
2 | {
3 |   . = 0x10000;
4 |   .text : { *(.text) }
5 |   .rodata : { *(.rodata) }
6 |   .bss : { *(.bss) }
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/shared/aht/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | [[package]]
4 | name = "aht"
5 | version = "0.1.0"
6 | 


--------------------------------------------------------------------------------
/shared/atomicvec/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | [[package]]
4 | name = "atomicvec"
5 | version = "0.1.0"
6 | 


--------------------------------------------------------------------------------
/shared/aht/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "aht"
 3 | version = "0.1.0"
 4 | authors = ["Brandon Falk <bfalk@gamozolabs.com>"]
 5 | edition = "2018"
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | [dependencies]
10 | 


--------------------------------------------------------------------------------
/shared/atomicvec/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "atomicvec"
 3 | version = "0.1.0"
 4 | authors = ["Brandon Falk <bfalk@gamozolabs.com>"]
 5 | edition = "2018"
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | [dependencies]
10 | 


--------------------------------------------------------------------------------
/shared/falkhash/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "falkhash"
 3 | version = "0.1.0"
 4 | authors = ["Brandon Falk <bfalk@gamozolabs.com>"]
 5 | edition = "2018"
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | [dependencies]
10 | 
11 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fuzz_with_emus"
 3 | version = "0.1.0"
 4 | authors = ["Brandon Falk <bfalk@gamozolabs.com>"]
 5 | edition = "2018"
 6 | license = "MIT"
 7 | 
 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 9 | 
10 | [dependencies]
11 | falkhash = { path = "shared/falkhash" }
12 | aht = { path = "shared/aht" }
13 | atomicvec = { path = "shared/atomicvec" }
14 | basic_mutator = { path = "../mutator" }
15 | 
16 | [profile.release]
17 | overflow-checks=true
18 | 
19 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
 1 | # This file is automatically @generated by Cargo.
 2 | # It is not intended for manual editing.
 3 | [[package]]
 4 | name = "aht"
 5 | version = "0.1.0"
 6 | 
 7 | [[package]]
 8 | name = "atomicvec"
 9 | version = "0.1.0"
10 | 
11 | [[package]]
12 | name = "basic_mutator"
13 | version = "0.1.0"
14 | 
15 | [[package]]
16 | name = "falkhash"
17 | version = "0.1.0"
18 | 
19 | [[package]]
20 | name = "fuzz_with_emus"
21 | version = "0.1.0"
22 | dependencies = [
23 |  "aht",
24 |  "atomicvec",
25 |  "basic_mutator",
26 |  "falkhash",
27 | ]
28 | 


--------------------------------------------------------------------------------
/src/primitive.rs:
--------------------------------------------------------------------------------
 1 | //! Generic support for primitive types which are safe to cast
 2 | 
 3 | pub unsafe trait Primitive: Default + Clone + Copy {}
 4 | unsafe impl Primitive for u8    {}
 5 | unsafe impl Primitive for u16   {}
 6 | unsafe impl Primitive for u32   {}
 7 | unsafe impl Primitive for u64   {}
 8 | unsafe impl Primitive for u128  {}
 9 | unsafe impl Primitive for usize {}
10 | unsafe impl Primitive for i8    {}
11 | unsafe impl Primitive for i16   {}
12 | unsafe impl Primitive for i32   {}
13 | unsafe impl Primitive for i64   {}
14 | unsafe impl Primitive for i128  {}
15 | unsafe impl Primitive for isize {}
16 | 
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Gamozo Labs, LLC
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/mutate.rs:
--------------------------------------------------------------------------------
 1 | //! Basic fuzzer mutation strategies
 2 | 
 3 | struct Mutation {
 4 |     /// Input vector to mutate, this is just an entire input files bytes
 5 |     pub input: Vec<u8>,
 6 | 
 7 |     /// If non-zero length, this contains a list of valid indicies into
 8 |     /// `input`, indicating which bytes of the input should mutated. This often
 9 |     /// comes from instrumentation like access tracking or taint tracking to
10 |     /// indicate which parts of the input are used. This will prevent us from
11 |     /// corrupting parts of the file which have zero effect on the program.
12 |     ///
13 |     /// It's possible you can have this take any meaning you want, all it does
14 |     /// is limit the corruption/splicing locations to the indicies in this
15 |     /// vector. Feel free to change this to have different meanings, like
16 |     /// indicate indicies which are used in comparison instructions!
17 |     pub accessed: Vec<usize>,
18 | }
19 | 
20 | impl Mutation {
21 |     /// Performs standard mutation of the input
22 |     pub fn mutate(&mut self) {
23 |         let strategies = [
24 |             Self::Shrink
25 |         ];
26 |     }
27 | 
28 |     pub fn shrink(&mut self) {
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/affinity.rs:
--------------------------------------------------------------------------------
 1 | //! Functions to set affinity in an OS agnostic way
 2 | 
 3 | #[cfg(unix)]
 4 | pub fn set_affinity(core: usize) -> Result<(), ()> {
 5 |     extern "system" {
 6 |         fn sched_setaffinity(pid: usize, cpusetsize: usize,
 7 |                              mask: *const usize) -> i32;
 8 |     }
 9 | 
10 |     const USIZE_BITS: usize = core::mem::size_of::<usize>() * 8;
11 | 
12 |     let mut mask = [0usize; 32];
13 |     mask[core / USIZE_BITS] |= 1 << (core % USIZE_BITS);
14 | 
15 |     unsafe {
16 |         if sched_setaffinity(0, std::mem::size_of_val(&mask),
17 |                 mask.as_ptr()) == 0 {
18 |             Ok(())
19 |         } else {
20 |             Err(())
21 |         }
22 |     }
23 | }
24 | 
25 | #[cfg(windows)]
26 | pub fn set_affinity(core: usize) -> Result<(), ()> {
27 |     extern "system" {
28 |         fn GetCurrentThread() -> usize;
29 |         fn SetThreadAffinityMask(hThread: usize,
30 |                                  dwThreadAffinityMask: usize) -> usize;
31 |     }
32 | 
33 |     assert!(core < 64, "Yeah, we don't support more than 64 cores here");
34 | 
35 |     unsafe {
36 |         if SetThreadAffinityMask(GetCurrentThread(), 1usize << core) != 0 {
37 |             Ok(())
38 |         } else {
39 |             Err(())
40 |         }
41 |     }
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/shared/atomicvec/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! An atomic vector with a fixed size capacity and insert-only semantics
  2 | 
  3 | #![no_std]
  4 | #![feature(const_generics)]
  5 | #![allow(incomplete_features)]
  6 | 
  7 | extern crate alloc;
  8 | 
  9 | use core::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
 10 | use core::alloc::Layout;
 11 | use alloc::boxed::Box;
 12 | use alloc::alloc::alloc_zeroed;
 13 | 
 14 | /// A fixed-capacity insert-only vector which allows multi-threaded insertion
 15 | /// via atomics.
 16 | pub struct AtomicVec<T, const N: usize> {
 17 |     /// The backing for the atomic vector
 18 |     ///
 19 |     /// The entries are null pointers when invalid, and when they become valid
 20 |     /// they turn into non-null pointers.
 21 |     backing: Box<[AtomicPtr<T>; N]>,
 22 | 
 23 |     /// Number of entries in use in the vector
 24 |     in_use: AtomicUsize,
 25 | }
 26 | 
 27 | impl<T, const N: usize> AtomicVec<T, N> {
 28 |     /// Create a new `AtomicVec` which contains a vector of pointers to type
 29 |     /// `T`s, with a capacity of `N`
 30 |     #[track_caller]
 31 |     pub fn new() -> Self {
 32 |         // Determine the layout for an allocation to satisfy an array of `N`
 33 |         // `AtomicPtr<T>`'s
 34 |         let layout = Layout::array::<AtomicPtr<T>>(N)
 35 |             .expect("Invalid shape for AtomicVec");
 36 | 
 37 |         // Create a zeroed allocation, which will be all null atomic pointers
 38 |         let allocation = unsafe { alloc_zeroed(layout) };
 39 |         let allocation = allocation as *mut [AtomicPtr<T>; N];
 40 |         assert!(!allocation.is_null(), "Allocation failure for AtomicVec");
 41 | 
 42 |         // Return out the empty `AtomicVec`
 43 |         AtomicVec { 
 44 |             backing: unsafe { Box::from_raw(allocation) },
 45 |             in_use:  AtomicUsize::new(0),
 46 |         }
 47 |     }
 48 | 
 49 |     /// Get the length of this vector, in elements
 50 |     pub fn len(&self) -> usize { self.in_use.load(Ordering::SeqCst) }
 51 | 
 52 |     /// Get the capacity of this vector, in elements
 53 |     pub const fn capacity(&self) -> usize { N }
 54 | 
 55 |     /// Push an element to the vector
 56 |     #[track_caller]
 57 |     pub fn push(&self, element: Box<T>) {
 58 |         // Get a unique index for insertion. We don't do a fetch add here such
 59 |         // that we can make sure we do not overflow capacity
 60 |         let idx = loop {
 61 |             // Get the current in use
 62 |             let cur = self.in_use.load(Ordering::SeqCst);
 63 |             assert!(cur < N, "AtomicVec out of capacity");
 64 | 
 65 |             // Attempt to reserve this index
 66 |             if self.in_use.compare_and_swap(cur, cur + 1,
 67 |                                             Ordering::SeqCst) == cur {
 68 |                 break cur;
 69 |             }
 70 |         };
 71 | 
 72 |         // Store the element into the array!
 73 |         let ptr = Box::into_raw(element);
 74 |         assert!(!ptr.is_null(), "Whoa, can't use a null pointer in AtomicVec");
 75 |         self.backing[idx].store(ptr, Ordering::SeqCst);
 76 |     }
 77 | 
 78 |     /// Get a reference to the element at `idx` in the `AtomicVec`
 79 |     #[track_caller]
 80 |     pub fn get(&self, idx: usize) -> Option<&T> {
 81 |         // Get the element pointer
 82 |         let ptr = self.backing.get(idx)?.load(Ordering::SeqCst);
 83 | 
 84 |         // If the pointer is null, this entry is not filled in yet, thus return
 85 |         // `None`
 86 |         if ptr.is_null() { return None; }
 87 | 
 88 |         // Return out a Rust reference to the contents
 89 |         Some(unsafe { &*ptr })
 90 |     }
 91 | }
 92 | 
 93 | impl<T, const N: usize> Drop for AtomicVec<T, N> {
 94 |     fn drop(&mut self) {
 95 |         // Go through each entry in the vector
 96 |         for ii in 0..self.len() {
 97 |             // Get the old pointer so we can drop it
 98 |             let ptr = self.backing[ii].load(Ordering::SeqCst);
 99 | 
100 |             // If the pointer was non-null, convert it back into a `Box` and
101 |             // let it drop
102 |             if !ptr.is_null() {
103 |                 unsafe { Box::from_raw(ptr); }
104 |             }
105 |         }
106 |     }
107 | }
108 | 
109 | #[cfg(test)]
110 | mod tests {
111 |     use crate::*;
112 | 
113 |     #[test]
114 |     fn it_works() {
115 |         loop {
116 |             let _foo: AtomicVec<u32, 4096> = AtomicVec::new();
117 |             for _ in 0..4096 {
118 |                 _foo.push(Box::new(5));
119 |             }
120 |         }
121 |     }
122 | }
123 | 
124 | 


--------------------------------------------------------------------------------
/src/jitcache.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Mutex;
  2 | use std::sync::atomic::{AtomicUsize, Ordering};
  3 | use std::collections::BTreeMap;
  4 | use crate::mmu::VirtAddr;
  5 | 
  6 | #[cfg(target_os="windows")]
  7 | pub fn alloc_rwx(size: usize) -> &'static mut [u8] {
  8 |     extern {
  9 |         fn VirtualAlloc(lpAddress: *const u8, dwSize: usize,
 10 |                         flAllocationType: u32, flProtect: u32) -> *mut u8;
 11 |     }
 12 | 
 13 |     unsafe {
 14 |         const PAGE_EXECUTE_READWRITE: u32 = 0x40;
 15 | 
 16 |         const MEM_COMMIT:  u32 = 0x00001000;
 17 |         const MEM_RESERVE: u32 = 0x00002000;
 18 | 
 19 |         let ret = VirtualAlloc(0 as *const _, size, MEM_COMMIT | MEM_RESERVE,
 20 |                                PAGE_EXECUTE_READWRITE);
 21 |         assert!(!ret.is_null());
 22 | 
 23 |         std::slice::from_raw_parts_mut(ret, size)
 24 |     }
 25 | }
 26 | 
 27 | #[cfg(target_os="linux")]
 28 | pub fn alloc_rwx(size: usize) -> &'static mut [u8] {
 29 |     extern {
 30 |         fn mmap(addr: *mut u8, length: usize, prot: i32, flags: i32, fd: i32,
 31 |                 offset: usize) -> *mut u8;
 32 |     }
 33 | 
 34 |     unsafe {
 35 |         // Alloc RWX and MAP_PRIVATE | MAP_ANON
 36 |         let ret = mmap(0 as *mut u8, size, 7, 34, -1, 0);
 37 |         assert!(!ret.is_null());
 38 |         
 39 |         std::slice::from_raw_parts_mut(ret, size)
 40 |     }
 41 | }
 42 | 
 43 | /// A cache which stores cached JIT blocks and translation tables to them
 44 | pub struct JitCache {
 45 |     /// A vector which contains the addresses of JIT code for the corresponding
 46 |     /// guest virtual address.
 47 |     ///
 48 |     /// Ex. jit_addr = jitcache.blocks[Guest Virtual Address / 4];
 49 |     ///
 50 |     /// An entry which is a zero indicates the block has not yet been
 51 |     /// translated.
 52 |     ///
 53 |     /// The blocks are referenced by the guest virtual address divided by 4
 54 |     /// because all RISC-V instructions are 4 bytes (for the non-compressed
 55 |     /// variant)
 56 |     blocks: Box<[AtomicUsize]>,
 57 | 
 58 |     /// The raw JIT RWX backing, the amount of bytes in use, and a dedup
 59 |     /// table
 60 |     jit: Mutex<(&'static mut [u8], usize, BTreeMap<Vec<u8>, usize>)>,
 61 | }
 62 | 
 63 | // JIT calling convention
 64 | // rax - Scratch
 65 | // rbx - Scratch
 66 | // rcx - Scratch
 67 | // rdx - Scratch
 68 | // rsi - Scratchpad memory
 69 | // r8  - Pointer to the base of mmu.memory
 70 | // r9  - Pointer to the base of mmu.permissions
 71 | // r10 - Pointer to the base of mmu.dirty
 72 | // r11 - Pointer to the base of mmu.dirty_bitmap
 73 | // r12 - Dirty index for the dirty list
 74 | // r13 - Pointer to emu.registers
 75 | // r14 - Pointer to the base of jitcache.blocks
 76 | // r15 - Number of instructions executed
 77 | //
 78 | // JIT return code (in rax)
 79 | // In all cases rbx = PC to resume execution at upon reentry
 80 | // 1 - Branch resolution issue
 81 | // 2 - ECALL instruction
 82 | // 3 - EBREAK instruction
 83 | // 4 - Read fault, rcx = guest faulting address
 84 | // 5 - Write fault, rcx = guest faulting address
 85 | // 6 - Instruction timeout
 86 | // 7 - Breakpoint, rcx = reentry point
 87 | // 8 - Invalid opcode
 88 | 
 89 | impl JitCache {
 90 |     /// Allocates a new `JitCache` which is capable of handling up to
 91 |     /// `max_guest_addr` in executable code.
 92 |     pub fn new(max_guest_addr: VirtAddr) -> Self {
 93 |         JitCache {
 94 |             // Allocate a zeroed out block cache
 95 |             blocks: (0..(max_guest_addr.0 + 3) / 4).map(|_| {
 96 |                 AtomicUsize::new(0)
 97 |             }).collect::<Vec<_>>().into_boxed_slice(),
 98 |             jit:
 99 |                 Mutex::new((alloc_rwx(256 * 1024 * 1024), 0, BTreeMap::new())),
100 |         }
101 |     }
102 | 
103 |     /// Get the address of the JIT block translation table
104 |     #[inline]
105 |     pub fn translation_table(&self) -> usize {
106 |         self.blocks.as_ptr() as usize
107 |     }
108 | 
109 |     /// Returns the maximum number of blocks this `JitCache` can translate
110 |     pub fn num_blocks(&self) -> usize {
111 |         self.blocks.len()
112 |     }
113 | 
114 |     /// Look up the JIT address for a given guest address
115 |     #[inline]
116 |     pub fn lookup(&self, addr: VirtAddr) -> Option<usize> {
117 |         // Make sure the address is aligned
118 |         if addr.0 & 3 != 0 {
119 |             return None;
120 |         }
121 | 
122 |         let addr = self.blocks.get(addr.0 / 4)?.load(Ordering::SeqCst);
123 |         if addr == 0 {
124 |             None
125 |         } else {
126 |             Some(addr)
127 |         }
128 |     }
129 | 
130 |     /// Add a JIT to the JIT cache, the `code` are the raw bytes of the
131 |     /// compiled JIT and the `BTreeMap` converts guest addresses into JIT
132 |     /// addresses
133 |     pub fn add_mappings(&self, addr: VirtAddr, code: &[u8],
134 |                         mappings: &BTreeMap<VirtAddr, usize>) -> usize {
135 |         // Get exclusive access to the JIT
136 |         let mut jit = self.jit.lock().unwrap();
137 | 
138 |         // Determine if any of the guest addresses are new to the JIT, if even
139 |         // one is, then we have to insert the JIT into the cache
140 |         let has_new = mappings.keys().any(|&x| self.lookup(x).is_none());
141 |         if !has_new {
142 |             // We have nothing new, just give the JIT address `addr`
143 |             return self.lookup(addr).unwrap();
144 |         }
145 | 
146 |         // Check if we already have identical code
147 |         let new_addr = if let Some(&existing) = jit.2.get(code) {
148 |             // We have identical code, alias this code for the requested PC
149 |             existing
150 |         } else {
151 |             // Compute the aligned size of code, this ensures we can do aligned
152 |             // vector operations because we ensure alignment of loaded JITs
153 |             let align_size = (code.len() + 0x3f) & !0x3f;
154 | 
155 |             // Number of remaining bytes in the JIT storage
156 |             let jit_inuse  = jit.1;
157 |             let jit_remain = jit.0.len() - jit_inuse;
158 |             assert!(jit_remain > align_size, "Out of space in JIT");
159 | 
160 |             // Copy the new code into the JIT
161 |             jit.0[jit_inuse..jit_inuse + code.len()].copy_from_slice(code);
162 | 
163 |             // Compute the address of the JIT we're inserting
164 |             let new_addr = jit.0[jit_inuse..].as_ptr() as usize;
165 |             
166 |             // Update the in use for the JIT
167 |             jit.1 += align_size;
168 | 
169 |             // Update the dedup table
170 |             assert!(jit.2.insert(code.into(), new_addr).is_none());
171 | 
172 |             new_addr
173 |         };
174 | 
175 |         // Update the JIT lookup address
176 |         for (addr, offset) in mappings {
177 |             self.blocks[addr.0 / 4].store(new_addr + offset, Ordering::SeqCst);
178 |         }
179 | 
180 |         // Return the newly allocated JIT
181 |         self.lookup(addr).unwrap()
182 |     }
183 | }
184 | 
185 | 


--------------------------------------------------------------------------------
/shared/aht/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Atomic hash table. Allows thread-safe atomic hash table insertions without
  2 | //! needing locks
  3 | 
  4 | #![feature(const_generics)]
  5 | #![allow(incomplete_features)]
  6 | #![no_std]
  7 | 
  8 | extern crate alloc;
  9 | 
 10 | use core::mem::MaybeUninit;
 11 | use core::borrow::Borrow;
 12 | use core::alloc::Layout;
 13 | use core::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
 14 | use alloc::boxed::Box;
 15 | use alloc::alloc::alloc_zeroed;
 16 | use alloc::borrow::ToOwned;
 17 | 
 18 | /// Type for an internal hash table entry. Tuple is
 19 | /// (pointer to boxed value, key)
 20 | type HashTableEntry<K, V> = (AtomicPtr<V>, MaybeUninit<K>);
 21 | 
 22 | /// Type used for a hash table internal table
 23 | type HashTable<K, V, const N: usize> = [HashTableEntry<K, V>; N];
 24 | 
 25 | /// An enum which contains information of whether an entry was inserted or
 26 | /// already existed for returning from `entry_or_insert`
 27 | pub enum Entry<'a, V> {
 28 |     /// `V` is a reference to a value that was just inserted into the table
 29 |     Inserted(&'a V),
 30 | 
 31 |     /// `V` is a reference to an old entry in the table
 32 |     Exists(&'a V),
 33 | }
 34 | 
 35 | impl<'a, V> Entry<'a, V> {
 36 |     /// Gets a `bool` indicating if the entry was inserted
 37 |     pub fn inserted(&self) -> bool { matches!(self, Entry::Inserted(..)) }
 38 | 
 39 |     /// Gets a `bool` indicating if the entry already exists
 40 |     pub fn exists(&self) -> bool { matches!(self, Entry::Exists(..)) }
 41 | 
 42 |     /// Gets the reference to the entry
 43 |     pub fn entry(&self) -> &'a V {
 44 |         match self {
 45 |             Entry::Inserted(x) => x,
 46 |             Entry::Exists(x)   => x,
 47 |         }
 48 |     }
 49 | }
 50 | 
 51 | /// An atomic hash table that allows insertions and lookups in parallel.
 52 | /// However resizing of the hash table or removing of entries is not supported.
 53 | pub struct Aht<K, V, const N: usize> {
 54 |     /// Raw hash table
 55 |     hash_table: Box<HashTable<K, V, N>>,
 56 | 
 57 |     /// Number of entries currently present in the hash table
 58 |     entries: AtomicUsize,
 59 | }
 60 | 
 61 | impl<K, V, const N: usize> Aht<K, V, N> {
 62 |     /// Create a new atomic hash table
 63 |     pub fn new() -> Self {
 64 |         // Determine the layout for an allocation to satisfy an array of `N`
 65 |         // `HashTableEntry`'s
 66 |         let layout = Layout::array::<HashTableEntry<K, V>>(N)
 67 |             .expect("Invalid shape for Aht");
 68 | 
 69 |         // Create a new, initialized-as-zero allocation
 70 |         // This will create uninitialized keys, which are held in `MaybeUninit`
 71 |         // and zeroed out `AtomicPtr`s, which are "empty" entries in the table
 72 |         let allocation = unsafe { alloc_zeroed(layout) };
 73 |         let allocation = allocation as *mut HashTable<K, V, N>;
 74 |         assert!(!allocation.is_null(), "Allocation failure for Aht");
 75 | 
 76 |         // Convert the new allocation into a `Box`
 77 |         let boxed = unsafe { Box::from_raw(allocation) };
 78 | 
 79 |         Aht {
 80 |             hash_table: boxed,
 81 |             entries:    AtomicUsize::new(0),
 82 |         }
 83 |     }
 84 | 
 85 |     /// Get the number of entries in this hash table
 86 |     pub fn len(&self) -> usize { self.entries.load(Ordering::SeqCst) }
 87 |     
 88 |     /// Insert a `key` into the hash table using `hash` as the first index
 89 |     /// into the table.
 90 |     ///
 91 |     /// If `key` is not present in the hash table, `insert` will be invoked to
 92 |     /// produce a value which will be inserted.
 93 |     ///
 94 |     /// Returns a reference to the inserted or old entry in the table
 95 |     /// If the key was already in the table, returns `Err(ref old entry)`
 96 |     /// otherwise it returns `Ok(ref new entry)`
 97 |     pub fn entry_or_insert<F, Q>(&self, key: &Q, mut hash: usize,
 98 |                                  insert: F) -> Entry<V>
 99 |             where F: FnOnce() -> Box<V>,
100 |                   K: Borrow<Q>,
101 |                   Q: Eq + ToOwned + ?Sized,
102 |                   Q::Owned: Into<K> {
103 |         let empty:   *mut V =  0 as *mut V;
104 |         let filling: *mut V = !0 as *mut V;
105 | 
106 |         for attempts in 0usize.. {
107 |             // Check if there are no free entries left in the hash table
108 |             assert!(attempts < N, "Out of entries in the atomic hash table");
109 | 
110 |             // Get the index into the hash table for this entry
111 |             let hti = hash % N;
112 | 
113 |             // Try to get exclusive access to this hash table entry
114 |             if self.hash_table[hti].0.load(Ordering::SeqCst) == empty &&
115 |                     self.hash_table[hti].0
116 |                         .compare_and_swap(empty, filling,
117 |                                           Ordering::SeqCst) == empty {
118 |                 // Request the caller to create the entry
119 |                 let ent = Box::into_raw(insert());
120 | 
121 |                 // Make sure the pointer doesn't end up turning into one of
122 |                 // the reserved values we use for our hash table internals.
123 |                 assert!(ent != empty && ent != filling,
124 |                     "Invalid pointer value for Aht");
125 |                 
126 |                 // Save the key into the table. It is safe to fill this entry
127 |                 // in with an immutable reference as we have exclusive access
128 |                 // to it
129 |                 unsafe {
130 |                     let ht = self.hash_table[hti].1.as_ptr() as *mut K;
131 |                     core::ptr::write(ht, key.to_owned().into());
132 |                 }
133 | 
134 |                 // Fill in the entry
135 |                 self.hash_table[hti].0.store(ent, Ordering::SeqCst);
136 | 
137 |                 // Update number of entries in our table
138 |                 self.entries.fetch_add(1, Ordering::SeqCst);
139 | 
140 |                 // Return a reference to the newly created data
141 |                 return Entry::Inserted(unsafe { &*ent });
142 |             } else {
143 |                 // Either we lost the race, or the entry was valid. Lets wait
144 |                 // for it to become valid first.
145 | 
146 |                 // Loop forever until this entry in the hash table is valid
147 |                 while self.hash_table[hti]
148 |                     .0.load(Ordering::SeqCst) == filling {}
149 | 
150 |                 // Now that we know the entry is valid, check if the keys match
151 |                 if key == unsafe {
152 |                         (*self.hash_table[hti].1.as_ptr()).borrow() } {
153 |                     // Entry is already in the map, just return the existing
154 |                     // entry!
155 |                     let reference = self.hash_table[hti].0
156 |                         .load(Ordering::SeqCst) as *const V;
157 |                     return Entry::Exists(unsafe { &*reference });
158 |                 } else {
159 |                     // There was a collision in the hash table for this entry.
160 |                     // We were stored at the same index, however we were not
161 |                     // a matching entry. Move to the next entry in the hash
162 |                     // table by falling through and going to the next iteration
163 |                     // of this loop.
164 |                 }
165 |             }
166 | 
167 |             // Advance to the next index in the hash table
168 |             hash = hash.wrapping_add(1);
169 |         }
170 | 
171 |         unreachable!("Unreachable");
172 |     }
173 | }
174 | 
175 | impl<K, V, const N: usize> Drop for Aht<K, V, N> {
176 |     fn drop(&mut self) {
177 |         for ii in 0..N {
178 |             // Get the entry
179 |             let ptr = self.hash_table[ii].0.load(Ordering::SeqCst);
180 | 
181 |             // It should be impossible to `Drop` while an entry is being filled
182 |             // in
183 |             assert!(ptr != !0usize as *mut V);
184 | 
185 |             if !ptr.is_null() {
186 |                 // Drop the value
187 |                 unsafe { Box::from_raw(ptr); }
188 | 
189 |                 // Drop the key as well, as it's not automatically dropped due
190 |                 // to `MaybeUninit`
191 |                 unsafe {
192 |                     core::ptr::drop_in_place(
193 |                         self.hash_table[ii].1.as_mut_ptr())
194 |                 }
195 |             }
196 |         }
197 |     }
198 | }
199 | 
200 | #[cfg(test)]
201 | mod test {
202 |     use crate::*;
203 | 
204 |     extern crate std;
205 |     use alloc::string::String;
206 | 
207 |     #[test]
208 |     fn test() {
209 |         let mut table: Aht<u32, u64, 64> = Aht::new();
210 |         let foo1 = table.entry_or_insert(&11, 50, || Box::new(57));
211 |         assert!(*foo1 == 57);
212 |         let foo2 = table.entry_or_insert(&15, 50, || Box::new(52));
213 |         assert!(*foo2 == 52);
214 |         let foo3 = table.entry_or_insert(&11, 50, || Box::new(1111));
215 |         assert!(*foo3 == 57);
216 |     }
217 | }
218 | 
219 | 


--------------------------------------------------------------------------------
/shared/falkhash/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use core::arch::x86_64::*;
  2 | 
  3 | /// Structure which gives access to a `hash` member function, allowing 128-bit
  4 | /// non-cryptographic-hashing of a slice of bytes
  5 | /// 
  6 | /// This structure exists only to protect access to the `hash` function by first
  7 | /// validating that the current CPU has AES-NI instructions available for use.
  8 | /// This check is done when `FalkHasher::new()` is used to create a new hasher
  9 | /// and never again. This makes `FalkHasher::hash()` have no feature detection
 10 | /// overhead while still guarding use of falkhash with a runtime CPU feature
 11 | /// check.
 12 | pub struct FalkHasher(());
 13 | 
 14 | impl FalkHasher {
 15 |     /// Create a new `FalkHasher`
 16 |     pub fn new() -> Self {
 17 |         // Make sure AES-NI is present on this CPU                              
 18 |         assert!(std::is_x86_feature_detected!("aes"),                           
 19 |             "AES-NI instructions not present, required for falkhash");    
 20 | 
 21 |         // If AES is present it's safe to return an object which allows
 22 |         // use of `falkhash` from this point on
 23 |         FalkHasher(())
 24 |     }
 25 | 
 26 |     /// A non-cryptographically-safe hash leveraging AES instructions on x86 to
 27 |     /// quickly generate a 128-bit hash the input `buffer`
 28 |     pub fn hash(&self, buffer: &[u8]) -> u128 {
 29 |         unsafe { crate::falkhash_int(buffer) }
 30 |     }
 31 | }
 32 | 
 33 | /// A non-cryptographically-safe hash leveraging AES instructions on x86 to
 34 | /// quickly generate a 128-bit hash the input `buffer`
 35 | #[target_feature(enable = "aes")]
 36 | unsafe fn falkhash_int(buffer: &[u8]) -> u128 {
 37 |     // Seed is initialized with random values, and also takes into account the
 38 |     // buffer length
 39 |     let seed =
 40 |         _mm_set_epi64x(0x2a4ba81ac0bfd4fe + buffer.len() as i64,
 41 |                        0x52c8611d3941be6a);
 42 | 
 43 |     // Hash starts out as the seed value
 44 |     let mut hash = seed;
 45 | 
 46 |     // Scratch buffer used to pad out buffers to 0x50 bytes if they are not
 47 |     // evenly divisble by 0x50
 48 |     let mut tmp = [0u8; 0x50];
 49 |     
 50 |     // Go through each 0x50 byte chunk
 51 |     for chunk in buffer.chunks(0x50) {
 52 |         // Check if this chunk is large enough for our operation size
 53 |         let ptr = if chunk.len() < 0x50 {
 54 |             // Pad with zeros by copying to the temporary buffer
 55 |             tmp[..chunk.len()].copy_from_slice(chunk);
 56 |             &tmp[..]
 57 |         } else {
 58 |             // Chunk was exactly 0x50 bytes, leave it as is
 59 |             chunk
 60 |         };
 61 | 
 62 |         // Load up all the raw data
 63 |         let p0 = _mm_loadu_si128((ptr.as_ptr() as *const __m128i).offset(0));
 64 |         let p1 = _mm_loadu_si128((ptr.as_ptr() as *const __m128i).offset(1));
 65 |         let p2 = _mm_loadu_si128((ptr.as_ptr() as *const __m128i).offset(2));
 66 |         let p3 = _mm_loadu_si128((ptr.as_ptr() as *const __m128i).offset(3));
 67 |         let p4 = _mm_loadu_si128((ptr.as_ptr() as *const __m128i).offset(4));
 68 | 
 69 |         // Xor against `seed`
 70 |         let p0 = _mm_xor_si128(p0, seed);
 71 |         let p1 = _mm_xor_si128(p1, seed);
 72 |         let p2 = _mm_xor_si128(p2, seed);
 73 |         let p3 = _mm_xor_si128(p3, seed);
 74 |         let p4 = _mm_xor_si128(p4, seed);
 75 | 
 76 |         // `aesenc` to merge into `p0`
 77 |         let p0 = _mm_aesenc_si128(p0, p1);
 78 |         let p0 = _mm_aesenc_si128(p0, p2);
 79 |         let p0 = _mm_aesenc_si128(p0, p3);
 80 |         let p0 = _mm_aesenc_si128(p0, p4);
 81 | 
 82 |         // Finalize by `aesenc`ing against `seed`
 83 |         let p0 = _mm_aesenc_si128(p0, seed);
 84 | 
 85 |         // Merge this block into the hash
 86 |         hash = _mm_aesenc_si128(hash, p0);
 87 |     }
 88 | 
 89 |     // Finalize hash by `aesenc`ing against the seed four times
 90 |     hash = _mm_aesenc_si128(hash, seed);
 91 |     hash = _mm_aesenc_si128(hash, seed);
 92 |     hash = _mm_aesenc_si128(hash, seed);
 93 |     hash = _mm_aesenc_si128(hash, seed);
 94 | 
 95 |     // Return out the hash!
 96 |     *((&hash as *const __m128i) as *const u128)
 97 | }
 98 | 
 99 | #[test]
100 | fn validate_correctness() {
101 |     // Hash a buffer full of 'A's at different sizes and make sure we get the
102 |     // expected results for the hash.
103 |     // We try a bunch of different sizes to make sure we're correctly handling
104 |     // padding of data when it's less than the internal hash chunk size.
105 | 
106 |     // Create a new `FalkHasher`
107 |     let fh = FalkHasher::new();
108 | 
109 |     // Buffer of 'A's
110 |     let test_data = [0x41u8; 128];
111 | 
112 |     assert!(fh.hash(&test_data[..0x00]) == 0x4208942bcc22d29ce42a0c56daaf5088);
113 |     assert!(fh.hash(&test_data[..0x01]) == 0x489903837004cd2617a44fae84df6e64);
114 |     assert!(fh.hash(&test_data[..0x02]) == 0x3db8c8b575d65c8017411771965c667b);
115 |     assert!(fh.hash(&test_data[..0x03]) == 0x8bcadd96fe92478b756752736b2afc5e);
116 |     assert!(fh.hash(&test_data[..0x04]) == 0x77d97e0e05ca147689729bd9cb3d25f9);
117 |     assert!(fh.hash(&test_data[..0x05]) == 0xd673c188d4ea71f106416a3a6476abc1);
118 |     assert!(fh.hash(&test_data[..0x06]) == 0xce9cb1472235b776e6b16a340cd4a36d);
119 |     assert!(fh.hash(&test_data[..0x07]) == 0x8d68a27f5c2c26710b080eacdb96f3a4);
120 |     assert!(fh.hash(&test_data[..0x08]) == 0x6d53b4eb5ea247bf0dfd453ad8ad5e6f);
121 |     assert!(fh.hash(&test_data[..0x09]) == 0xc36870d8bae6d870c840df4cb4e13b05);
122 |     assert!(fh.hash(&test_data[..0x0a]) == 0x556ad9cd2c556ebe31613046b1668bfa);
123 |     assert!(fh.hash(&test_data[..0x0b]) == 0x528fbdd299fcd286e579afb2b588dedc);
124 |     assert!(fh.hash(&test_data[..0x0c]) == 0x50e9dc0cc0d37464984dddc3fea801e7);
125 |     assert!(fh.hash(&test_data[..0x0d]) == 0xd2a878fc3ba87a573f76d27bbccddfe3);
126 |     assert!(fh.hash(&test_data[..0x0e]) == 0xb4cb18caf0ede9b822b140b5b5108c0f);
127 |     assert!(fh.hash(&test_data[..0x0f]) == 0x38a6eee841f4cc496a6df40300835d90);
128 |     assert!(fh.hash(&test_data[..0x10]) == 0xb11e14830381b1f77c421d6388f005d2);
129 |     assert!(fh.hash(&test_data[..0x11]) == 0xfcf434743799df67707e7d028359ffea);
130 |     assert!(fh.hash(&test_data[..0x12]) == 0x56413713a7fbd1822c4a4086bc30bc0f);
131 |     assert!(fh.hash(&test_data[..0x13]) == 0x5b43f47f1694fcbe4dab2723923dcd25);
132 |     assert!(fh.hash(&test_data[..0x14]) == 0xf8820c5bf51e39df6b0fe680317bed50);
133 |     assert!(fh.hash(&test_data[..0x15]) == 0x6c568012d2c3ffac7725727b4e6abbdc);
134 |     assert!(fh.hash(&test_data[..0x16]) == 0x9e14134e254e93a2de37a54b80cb5d0d);
135 |     assert!(fh.hash(&test_data[..0x17]) == 0xc25baa45a8477a8e4f356d4141d47a68);
136 |     assert!(fh.hash(&test_data[..0x18]) == 0x9492255bb3f0b26b6dde29c1caab41c8);
137 |     assert!(fh.hash(&test_data[..0x19]) == 0xf6d214cb9a86ed2d4426a77f591cfbce);
138 |     assert!(fh.hash(&test_data[..0x1a]) == 0x3f8943ab20c6809887b45c0f3dfc3118);
139 |     assert!(fh.hash(&test_data[..0x1b]) == 0xadc0a2c1c2b556403678c37c190f7a77);
140 |     assert!(fh.hash(&test_data[..0x1c]) == 0x6d79539f0eeae4d1bd99b3c688f321d8);
141 |     assert!(fh.hash(&test_data[..0x1d]) == 0xdb180909e29b37acd858055aa71b0d37);
142 |     assert!(fh.hash(&test_data[..0x1e]) == 0xc2d21f2fc4bf4cabd0d5c433229ae657);
143 |     assert!(fh.hash(&test_data[..0x1f]) == 0x695e7481c2db7defb13d7933a7d335ee);
144 |     assert!(fh.hash(&test_data[..0x20]) == 0x96bb8bb54a0212f71b5a6be72addc913);
145 |     assert!(fh.hash(&test_data[..0x21]) == 0x4ec919773abf7660acf3f8d078a702ac);
146 |     assert!(fh.hash(&test_data[..0x22]) == 0x107a9d7af19b3d9c2d8a44dcac947302);
147 |     assert!(fh.hash(&test_data[..0x23]) == 0x6db6313c39dbdce322b46b9de6140431);
148 |     assert!(fh.hash(&test_data[..0x24]) == 0x7289c5dba45a814d6bd85ee0b2673f89);
149 |     assert!(fh.hash(&test_data[..0x25]) == 0x2a4a4790b528df5652671071af084610);
150 |     assert!(fh.hash(&test_data[..0x26]) == 0x0b58ffcab3292ac18e387adc0e429d06);
151 |     assert!(fh.hash(&test_data[..0x27]) == 0xeb370058e83b603e158df11921ee7f25);
152 |     assert!(fh.hash(&test_data[..0x28]) == 0x2b3106e63b234bb4aa3671a32ea28068);
153 |     assert!(fh.hash(&test_data[..0x29]) == 0x3899e92051e82c0d53816a1bf84fa9bd);
154 |     assert!(fh.hash(&test_data[..0x2a]) == 0x9f05cadfe76b2de64723c5d0284b055e);
155 |     assert!(fh.hash(&test_data[..0x2b]) == 0xeb99a42e69fc8211d123f3a7699619bd);
156 |     assert!(fh.hash(&test_data[..0x2c]) == 0x94f4d167b239faedef84b52072ad5ee7);
157 |     assert!(fh.hash(&test_data[..0x2d]) == 0x9bad00b29b997f0962acd56ebeb91302);
158 |     assert!(fh.hash(&test_data[..0x2e]) == 0x24c260c9a415b890af0f0d5ba274e07e);
159 |     assert!(fh.hash(&test_data[..0x2f]) == 0xcad210a611ecce1991a7971f2410fb1f);
160 |     assert!(fh.hash(&test_data[..0x30]) == 0x20925b42385d71a0994649534b7572ee);
161 |     assert!(fh.hash(&test_data[..0x31]) == 0x87a4d86880c7a8ced1c4a4e185a508a9);
162 |     assert!(fh.hash(&test_data[..0x32]) == 0xb06bef3d05681aa41fb1fccac2f5ff17);
163 |     assert!(fh.hash(&test_data[..0x33]) == 0x51587eb5a4ae727a2c10082154ce487d);
164 |     assert!(fh.hash(&test_data[..0x34]) == 0x8605c3378154a19ed166ccbc518ce950);
165 |     assert!(fh.hash(&test_data[..0x35]) == 0x71e100167a049854685577695ecc3966);
166 |     assert!(fh.hash(&test_data[..0x36]) == 0x0f84c5ae68063821376ac9c84e916dd1);
167 |     assert!(fh.hash(&test_data[..0x37]) == 0x0577e3325e69ae592222b41f654b4f39);
168 |     assert!(fh.hash(&test_data[..0x38]) == 0xe79943d1088e52eec253f616f9517f1b);
169 |     assert!(fh.hash(&test_data[..0x39]) == 0x116bba02b2717caf756de21739ca436f);
170 |     assert!(fh.hash(&test_data[..0x3a]) == 0xcb8da9b5cd74f35fcd3f9593eb5ed601);
171 |     assert!(fh.hash(&test_data[..0x3b]) == 0x18e25a2269dbd3c49fb84acd55503f78);
172 |     assert!(fh.hash(&test_data[..0x3c]) == 0x79f1f6c070662b9be4a02b18dfb9fd13);
173 |     assert!(fh.hash(&test_data[..0x3d]) == 0xe070c8c0158ce4d8278a68899f6a82e5);
174 |     assert!(fh.hash(&test_data[..0x3e]) == 0x6bc91aee6308bd3318e1d14d7a3d49d5);
175 |     assert!(fh.hash(&test_data[..0x3f]) == 0x3bc6da3ca213b637b0c630302d4c96a9);
176 |     assert!(fh.hash(&test_data[..0x40]) == 0xb8363ca2747026cbbc38aee9babf115e);
177 |     assert!(fh.hash(&test_data[..0x41]) == 0xf15e6ca7d3f39bb42bfb174cd47601d2);
178 |     assert!(fh.hash(&test_data[..0x42]) == 0x8ff97a04055abf60fc158974dae33e3a);
179 |     assert!(fh.hash(&test_data[..0x43]) == 0xcbe80bb1894aa411465ef3d5b09aa9cf);
180 |     assert!(fh.hash(&test_data[..0x44]) == 0x1826f61a6773d90c1d684439a85cde7f);
181 |     assert!(fh.hash(&test_data[..0x45]) == 0x9088c619dc6c4993e464c5854eb00fef);
182 |     assert!(fh.hash(&test_data[..0x46]) == 0xdabe91cf7b80c2b5fb7d32f81d90060f);
183 |     assert!(fh.hash(&test_data[..0x47]) == 0x1673f80d93d65fb1def14e095580a4f3);
184 |     assert!(fh.hash(&test_data[..0x48]) == 0x23983232158b8999256972203df8020c);
185 |     assert!(fh.hash(&test_data[..0x49]) == 0x0a3c8c6c4b066bb9faaca704c61b6e32);
186 |     assert!(fh.hash(&test_data[..0x4a]) == 0x003209cbf8ea9a4085656adfe215265b);
187 |     assert!(fh.hash(&test_data[..0x4b]) == 0xb887392707af86556b1097a1b89d3e0d);
188 |     assert!(fh.hash(&test_data[..0x4c]) == 0xb67ef5d61cb027f2a132df75b0ef9f4e);
189 |     assert!(fh.hash(&test_data[..0x4d]) == 0x595951ec2bf4cbf05b8a0382b0fa5921);
190 |     assert!(fh.hash(&test_data[..0x4e]) == 0x3e98b49ddaa6327a8064205acd7ed114);
191 |     assert!(fh.hash(&test_data[..0x4f]) == 0x33fd8878d2de0fe10119e0e9ed813a73);
192 |     assert!(fh.hash(&test_data[..0x50]) == 0x28310c491c3605f71922f1cb4a827ce9);
193 |     assert!(fh.hash(&test_data[..0x51]) == 0xbe83f0b1a23c22fd1c3709513671711e);
194 |     assert!(fh.hash(&test_data[..0x52]) == 0xf58579eceae78f038dde8369372e2973);
195 |     assert!(fh.hash(&test_data[..0x53]) == 0xf3b47b70eaf05f0fecb11058a9d9d2b9);
196 |     assert!(fh.hash(&test_data[..0x54]) == 0x28baa7bc1b1eb62d75a5bd3bea5390c7);
197 |     assert!(fh.hash(&test_data[..0x55]) == 0xa4f3f3c8f043cc4e7a7e00f39c31dad6);
198 |     assert!(fh.hash(&test_data[..0x56]) == 0x9944d09410d705234031c26862125426);
199 |     assert!(fh.hash(&test_data[..0x57]) == 0x3a689420ceaf9fa42f5209784d1bd508);
200 |     assert!(fh.hash(&test_data[..0x58]) == 0x73d20fa06762a542eaf25020db35c2f1);
201 |     assert!(fh.hash(&test_data[..0x59]) == 0xe7304fcf1ecc199f11d376e5abf11724);
202 |     assert!(fh.hash(&test_data[..0x5a]) == 0x31fb056271e1c3ffde8a60aeb10ac9f0);
203 |     assert!(fh.hash(&test_data[..0x5b]) == 0x4595eda877b512f16759c15ca1c6d6c6);
204 |     assert!(fh.hash(&test_data[..0x5c]) == 0x11ea5290120c784f513cb09753c9eff8);
205 |     assert!(fh.hash(&test_data[..0x5d]) == 0x9137260e034d3f20f46ccf8c95920c3e);
206 |     assert!(fh.hash(&test_data[..0x5e]) == 0xd1caeabee2b2184427a496f3d617a929);
207 |     assert!(fh.hash(&test_data[..0x5f]) == 0x2c37a3d8c37e7cf46614748fde2740f6);
208 |     assert!(fh.hash(&test_data[..0x60]) == 0x5ca6be3c15723fcdc126da32a900e756);
209 |     assert!(fh.hash(&test_data[..0x61]) == 0x5ee6ea3d25ad6b63f19eba491fdf5fd0);
210 |     assert!(fh.hash(&test_data[..0x62]) == 0x86d1f5893a3762e1be161f4abd4860bd);
211 |     assert!(fh.hash(&test_data[..0x63]) == 0x106f9d93575fe2ae4e9e4a980209a1d2);
212 |     assert!(fh.hash(&test_data[..0x64]) == 0x2ac25f909974c21e02ffc38bdb67f8c5);
213 |     assert!(fh.hash(&test_data[..0x65]) == 0x1b8d8ee55850f59b2760f79c04ac41bb);
214 |     assert!(fh.hash(&test_data[..0x66]) == 0xaf517df53c73cc63541d059349428c85);
215 |     assert!(fh.hash(&test_data[..0x67]) == 0x15c90f5346e9c0c10e18b6948cff7def);
216 |     assert!(fh.hash(&test_data[..0x68]) == 0xb827406e56e52147aa64e87730b45053);
217 |     assert!(fh.hash(&test_data[..0x69]) == 0xf7c07cd7b54659b4e0fa9b6f876c67c4);
218 |     assert!(fh.hash(&test_data[..0x6a]) == 0x249a2282072ef3290e137789b6397918);
219 |     assert!(fh.hash(&test_data[..0x6b]) == 0x8ec980a5dfba8033af83b544866825d9);
220 |     assert!(fh.hash(&test_data[..0x6c]) == 0xeffd0d6be048336bf51f346096cea90e);
221 |     assert!(fh.hash(&test_data[..0x6d]) == 0x743958c67392f3459eccc4b03d4509b8);
222 |     assert!(fh.hash(&test_data[..0x6e]) == 0x3d4ead5cdc0423f49c6b1772b314fd69);
223 |     assert!(fh.hash(&test_data[..0x6f]) == 0xe1a67abb501b5febe0686f38ee215964);
224 |     assert!(fh.hash(&test_data[..0x70]) == 0x980bc0179a5ffd74e70de96a053beff8);
225 |     assert!(fh.hash(&test_data[..0x71]) == 0x426327d413ed8925f441df43c539e1de);
226 |     assert!(fh.hash(&test_data[..0x72]) == 0x441ab6fe573769e60d2fb1bb5d2b70cb);
227 |     assert!(fh.hash(&test_data[..0x73]) == 0xedadeb80ad2dbb1e586455dbee535f4d);
228 |     assert!(fh.hash(&test_data[..0x74]) == 0xf49c2b8742471c62bdfd64bb697a7d9a);
229 |     assert!(fh.hash(&test_data[..0x75]) == 0x0e2bb0091da73d6fe227ac910d0ff929);
230 |     assert!(fh.hash(&test_data[..0x76]) == 0x2f38ef36ee76f013aea3d6ab77b4a92d);
231 |     assert!(fh.hash(&test_data[..0x77]) == 0x812b397763acd48889ed19055024dc1b);
232 |     assert!(fh.hash(&test_data[..0x78]) == 0x814cee548741e96992715789cfc905ad);
233 |     assert!(fh.hash(&test_data[..0x79]) == 0x32e271accb857b3140201686e494c3be);
234 |     assert!(fh.hash(&test_data[..0x7a]) == 0x93333372751d99e6307837ae6c74653f);
235 |     assert!(fh.hash(&test_data[..0x7b]) == 0xa4b5706b9198301fb06ba0d49e2d796a);
236 |     assert!(fh.hash(&test_data[..0x7c]) == 0x5cace7793572149cf2f8817aec2ff832);
237 |     assert!(fh.hash(&test_data[..0x7d]) == 0x2807e6952b39447cfd43412078268e1f);
238 |     assert!(fh.hash(&test_data[..0x7e]) == 0x2c6fdb32d030e19c70afe6bc399e0ea0);
239 |     assert!(fh.hash(&test_data[..0x7f]) == 0x84878347cb3091a055024fb9d5beddbb);
240 | }
241 | 


--------------------------------------------------------------------------------
/src/mmu.rs:
--------------------------------------------------------------------------------
  1 | //! A software MMU with byte level permissions and uninitialized memory access
  2 | //! detection
  3 | 
  4 | use std::path::Path;
  5 | use std::collections::HashMap;
  6 | use crate::emulator::VmExit;
  7 | use crate::primitive::Primitive;
  8 | 
  9 | /// Block size used for resetting and tracking memory which has been modified
 10 | /// The larger this is, the fewer but more expensive memcpys() need to occur,
 11 | /// the small, the greater but less expensive memcpys() need to occur.
 12 | /// It seems the sweet spot is often 128-4096 bytes
 13 | pub const DIRTY_BLOCK_SIZE: usize = 64;
 14 | 
 15 | /// If `true` the logic for uninitialized memory tracking will be disabled and
 16 | /// all memory will be marked as readable if it has the RAW bit set
 17 | const DISABLE_UNINIT: bool = true;
 18 | 
 19 | // Don't change these, they're hardcoded in the JIT (namely write vs raw dist,
 20 | // during raw bit updates in writes)
 21 | pub const PERM_READ:  u8 = 1 << 0;
 22 | pub const PERM_WRITE: u8 = 1 << 1;
 23 | pub const PERM_EXEC:  u8 = 1 << 2;
 24 | pub const PERM_RAW:   u8 = 1 << 3;
 25 | 
 26 | /// Accessed bit, set when the byte is read, but not when it is written
 27 | pub const PERM_ACC: u8 = 1 << 4;
 28 | 
 29 | /// A permissions byte which corresponds to a memory byte and defines the
 30 | /// permissions it has
 31 | #[repr(transparent)]
 32 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 33 | pub struct Perm(pub u8);
 34 | 
 35 | /// A guest virtual address
 36 | #[repr(transparent)]
 37 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 38 | pub struct VirtAddr(pub usize);
 39 | 
 40 | /// Section information for a file
 41 | pub struct Section {
 42 |     pub file_off:    usize,
 43 |     pub virt_addr:   VirtAddr,
 44 |     pub file_size:   usize,
 45 |     pub mem_size:    usize,
 46 |     pub permissions: Perm,
 47 | }
 48 | 
 49 | /// An isolated memory space
 50 | #[derive(PartialEq)]
 51 | pub struct Mmu {
 52 |     /// Block of memory for this address space
 53 |     /// Offset 0 corresponds to address 0 in the guest address space
 54 |     memory: Vec<u8>,
 55 | 
 56 |     /// Holds the permission bytes for the corresponding byte in memory
 57 |     permissions: Vec<Perm>,
 58 | 
 59 |     /// Dirtied memory information
 60 |     dirty_state: DirtyState,
 61 | 
 62 |     /// Current base address of the next allocation
 63 |     cur_alc: VirtAddr,
 64 | 
 65 |     /// Map an active allocation to its size
 66 |     active_alcs: HashMap<VirtAddr, usize>,
 67 | }
 68 | 
 69 | /// Tracks the state of dirtied memory
 70 | #[derive(PartialEq)]
 71 | pub struct DirtyState {
 72 |     /// Tracks block indicies in `memory` which are dirty
 73 |     dirty: Vec<usize>,
 74 | 
 75 |     /// Tracks which parts of memory have been dirtied
 76 |     dirty_bitmap: Vec<u64>,
 77 | }
 78 | 
 79 | impl DirtyState {
 80 |     /// Updates the dirty map indicating that the byte at `addr` has been
 81 |     /// dirtied
 82 |     fn update_dirty(&mut self, addr: VirtAddr) {
 83 |         let block = addr.0 / DIRTY_BLOCK_SIZE;
 84 | 
 85 |         // Determine the bitmap position of the dirty block
 86 |         let idx = block / 64;
 87 |         let bit = block % 64;
 88 |         
 89 |         // Check if the block is not dirty
 90 |         if self.dirty_bitmap[idx] & (1 << bit) == 0 {
 91 |             // Block is not dirty, add it to the dirty list
 92 |             self.dirty.push(block);
 93 | 
 94 |             // Update the dirty bitmap
 95 |             self.dirty_bitmap[idx] |= 1 << bit;
 96 |         }
 97 |     }
 98 | }
 99 | 
100 | impl Mmu {
101 |     /// Create a new memory space which can hold `size` bytes
102 |     pub fn new(size: usize) -> Self {
103 |         Mmu {
104 |             memory:      vec![0; size],
105 |             permissions: vec![Perm(0); size],
106 |             cur_alc:     VirtAddr(0x10000),
107 |             active_alcs: Default::default(),
108 |             dirty_state: DirtyState {
109 |                 dirty:        Vec::with_capacity(size / DIRTY_BLOCK_SIZE + 1),
110 |                 dirty_bitmap: vec![0u64; size / DIRTY_BLOCK_SIZE / 64 + 1],
111 |             },
112 |         }
113 |     }
114 | 
115 |     /// Fork from an existing MMU
116 |     pub fn fork(&self) -> Self {
117 |         let size = self.memory.len();
118 | 
119 |         Mmu {
120 |             memory:      self.memory.clone(),
121 |             permissions: self.permissions.clone(),
122 |             cur_alc:     self.cur_alc.clone(),
123 |             active_alcs: self.active_alcs.clone(),
124 |             dirty_state: DirtyState {
125 |                 dirty:        Vec::with_capacity(size / DIRTY_BLOCK_SIZE + 1),
126 |                 dirty_bitmap: vec![0u64; size / DIRTY_BLOCK_SIZE / 64 + 1],
127 |             }
128 |         }
129 |     }
130 | 
131 |     /// Restores memory back to the original state (eg. restores all dirty
132 |     /// blocks to the state of `other`)
133 |     pub fn reset(&mut self, other: &Mmu) {
134 |         for &block in &self.dirty_state.dirty {
135 |             // Get the start and end addresses of the dirtied memory
136 |             let start = block * DIRTY_BLOCK_SIZE;
137 |             let end   = (block + 1) * DIRTY_BLOCK_SIZE;
138 | 
139 |             // Zero the bitmap. This hits wide, but it's fine, we have to do
140 |             // a 64-bit write anyways, no reason to compute the bit index
141 |             self.dirty_state.dirty_bitmap[block / 64] = 0;
142 | 
143 |             // Restore memory state
144 |             self.memory[start..end].copy_from_slice(&other.memory[start..end]);
145 | 
146 |             // Restore permissions
147 |             self.permissions[start..end].copy_from_slice(
148 |                 &other.permissions[start..end]);
149 |         }
150 | 
151 |         // Clear the dirty list
152 |         self.dirty_state.dirty.clear();
153 | 
154 |         // Restore allocator state
155 |         self.cur_alc = other.cur_alc;
156 | 
157 |         // Clear active allocation state
158 |         self.active_alcs.clear();
159 |         self.active_alcs.extend(other.active_alcs.iter());
160 | 
161 |         if false {
162 |             // Tests to make sure everything to reset perfectly
163 |             assert!(self.cur_alc == other.cur_alc);
164 |             assert!(self.memory == other.memory);
165 |             assert!(self.permissions == other.permissions);
166 |             assert!(self.active_alcs == other.active_alcs);
167 |         }
168 |     }
169 | 
170 |     /// Allocate a region of memory as RW in the address space
171 |     pub fn allocate(&mut self, size: usize) -> Option<VirtAddr> {
172 |         // Add some padding and alignment
173 |         let align_size = (size + 0x1f) & !0xf;
174 | 
175 |         // Get the current allocation base
176 |         let base = self.cur_alc;
177 | 
178 |         // Cannot allocate
179 |         if base.0 >= self.memory.len() {
180 |             return None;
181 |         }
182 | 
183 |         // Update the allocation size
184 |         self.cur_alc = VirtAddr(self.cur_alc.0.checked_add(align_size)?);
185 | 
186 |         // Could not satisfy allocation without going OOM
187 |         if self.cur_alc.0 > self.memory.len() {
188 |             return None;
189 |         }
190 | 
191 |         // Mark the memory as un-initialized and writable
192 |         self.set_permissions(base, size, Perm(PERM_RAW | PERM_WRITE));
193 | 
194 |         // Log the allocation
195 |         self.active_alcs.insert(base, size);
196 | 
197 |         Some(base)
198 |     }
199 | 
200 |     /// Get the size of an active allocation if `base` is an active allocation
201 |     pub fn get_alc(&self, base: VirtAddr) -> Option<usize> {
202 |         self.active_alcs.get(&base).copied()
203 |     }
204 | 
205 |     /// Free a region of memory based on the allocation from a prior `allocate`
206 |     /// call
207 |     pub fn free(&mut self, base: VirtAddr) -> Result<(), VmExit> {
208 |         if let Some(size) = self.active_alcs.remove(&base) {
209 |             // Clear permissions
210 |             self.set_permissions(base, size, Perm(0));
211 | 
212 |             Ok(())
213 |         } else {
214 |             Err(VmExit::InvalidFree(base))
215 |         }
216 |     }
217 | 
218 |     /// Apply permissions to a region of memory
219 |     pub fn set_permissions(&mut self, addr: VirtAddr, size: usize,
220 |                            mut perm: Perm) -> Option<()> {
221 |         // Fast path, nothing to change
222 |         if size == 0 { return Some(()); }
223 | 
224 |         if DISABLE_UNINIT {
225 |             // If memory is marked as RAW, mark it as readable right away if
226 |             // we have uninit tracking disabled
227 |             if perm.0 & PERM_RAW != 0 { perm.0 |= PERM_READ; }
228 |         }
229 | 
230 |         // Apply permissions
231 |         self.permissions.get_mut(addr.0..addr.0.checked_add(size)?)?
232 |             .iter_mut().for_each(|x| *x = perm);
233 |         
234 |         // Compute dirty bit blocks
235 |         let block_start = addr.0 / DIRTY_BLOCK_SIZE;
236 |         let block_end   = (addr.0 + size) / DIRTY_BLOCK_SIZE;
237 |         for block in block_start..=block_end {
238 |             // Determine the bitmap position of the dirty block
239 |             let idx = block / 64;
240 |             let bit = block % 64;
241 |             
242 |             // Check if the block is not dirty
243 |             if self.dirty_state.dirty_bitmap[idx] & (1 << bit) == 0 {
244 |                 // Block is not dirty, add it to the dirty list
245 |                 self.dirty_state.dirty.push(block);
246 | 
247 |                 // Update the dirty bitmap
248 |                 self.dirty_state.dirty_bitmap[idx] |= 1 << bit;
249 |             }
250 |         }
251 | 
252 |         Some(())
253 |     }
254 | 
255 |     /// Get the maximum size of guest memory
256 |     #[inline]
257 |     pub fn len(&self) -> usize {
258 |         self.memory.len()
259 |     }
260 | 
261 |     /// Get the dirty list length
262 |     #[inline]
263 |     pub fn dirty_len(&self) -> usize {
264 |         self.dirty_state.dirty.len()
265 |     }
266 | 
267 |     /// Set the dirty list length
268 |     #[inline]
269 |     pub unsafe fn set_dirty_len(&mut self, len: usize) {
270 |         self.dirty_state.dirty.set_len(len);
271 |     }
272 | 
273 |     /// Get the tuple of (memory ptr, permissions pointer, dirty pointer,
274 |     /// dirty bitmap pointer)
275 |     #[inline]
276 |     pub fn jit_addrs(&self) -> (usize, usize, usize, usize) {
277 |         (
278 |             self.memory.as_ptr() as usize,
279 |             self.permissions.as_ptr() as usize,
280 |             self.dirty_state.dirty.as_ptr() as usize,
281 |             self.dirty_state.dirty_bitmap.as_ptr() as usize,
282 |         )
283 |     }
284 | 
285 |     /// Write the bytes from `buf` into `addr`
286 |     pub fn write_from(&mut self, addr: VirtAddr, buf: &[u8])
287 |             -> Result<(), VmExit> {
288 |         let perms =
289 |             self.permissions.get_mut(addr.0..addr.0.checked_add(buf.len())
290 |                 .ok_or(VmExit::AddressIntegerOverflow)?)
291 |             .ok_or(VmExit::AddressMiss(addr, buf.len()))?;
292 | 
293 |         // Check permissions
294 |         let mut has_raw = false;
295 |         for (idx, &perm) in perms.iter().enumerate() {
296 |             // Accumulate if any permission has the raw bit set, this will
297 |             // allow us to bypass permission updates if no RAW is in use
298 |             has_raw |= (perm.0 & PERM_RAW) != 0;
299 | 
300 |             if (perm.0 & PERM_WRITE) == 0 {
301 |                 // Permission denied, return error
302 |                 return Err(VmExit::WriteFault(VirtAddr(addr.0 + idx)));
303 |             }
304 |         }
305 | 
306 |         // Copy the buffer into memory!
307 |         self.memory[addr.0..addr.0 + buf.len()].copy_from_slice(buf);
308 | 
309 |         // Compute dirty bit blocks
310 |         let block_start = addr.0 / DIRTY_BLOCK_SIZE;
311 |         let block_end   = (addr.0 + buf.len()) / DIRTY_BLOCK_SIZE;
312 |         for block in block_start..=block_end {
313 |             // Determine the bitmap position of the dirty block
314 |             let idx = block / 64;
315 |             let bit = block % 64;
316 |             
317 |             // Check if the block is not dirty
318 |             if self.dirty_state.dirty_bitmap[idx] & (1 << bit) == 0 {
319 |                 // Block is not dirty, add it to the dirty list
320 |                 self.dirty_state.dirty.push(block);
321 | 
322 |                 // Update the dirty bitmap
323 |                 self.dirty_state.dirty_bitmap[idx] |= 1 << bit;
324 |             }
325 |         }
326 | 
327 |         // Update RaW bits
328 |         if has_raw {
329 |             perms.iter_mut().for_each(|x| {
330 |                 if (x.0 & PERM_RAW) != 0 {
331 |                     // Mark memory as readable
332 |                     *x = Perm((x.0 | PERM_READ) & (!PERM_RAW));
333 |                 }
334 |             });
335 |         }
336 | 
337 |         Ok(())
338 |     }
339 |     
340 |     /// Return a mutable slice to permissions at `addr` for `size` bytes
341 |     pub fn peek_perms(&mut self, addr: VirtAddr, size: usize)
342 |             -> Result<&mut [Perm], VmExit> {
343 |         self.permissions.get_mut(addr.0..addr.0.checked_add(size)
344 |             .ok_or(VmExit::AddressIntegerOverflow)?)
345 |             .ok_or(VmExit::AddressMiss(addr, size))
346 |     }
347 |  
348 |     /// Return a mutable slice to memory at `addr` for `size` bytes that
349 |     /// has been validated to match all `exp_perms`
350 |     pub fn peek(&mut self, addr: VirtAddr, size: usize,
351 |                 exp_perms: Perm) -> Result<&mut [u8], VmExit> {
352 |         let perms =
353 |             self.permissions.get_mut(addr.0..addr.0.checked_add(size)
354 |                 .ok_or(VmExit::AddressIntegerOverflow)?)
355 |             .ok_or(VmExit::AddressMiss(addr, size))?;
356 | 
357 |         // Check permissions
358 |         for (idx, perm) in perms.iter().enumerate() {
359 |             if (perm.0 & exp_perms.0) != exp_perms.0 {
360 |                 if exp_perms.0 == PERM_READ && (perm.0 & PERM_RAW) != 0 {
361 |                     // If we were attempting a normal read, and the readable
362 |                     // memory was unreadable but had the RAW bit set, report
363 |                     // it as an uninitialized memory access rather than a read
364 |                     // access
365 |                     return Err(VmExit::UninitFault(VirtAddr(addr.0 + idx)));
366 |                 } else if exp_perms.0 == PERM_WRITE {
367 |                     return Err(VmExit::WriteFault(VirtAddr(addr.0 + idx)));
368 |                 } else {
369 |                     return Err(VmExit::ReadFault(VirtAddr(addr.0 + idx)));
370 |                 }
371 |             }
372 |         }
373 | 
374 |         // Update dirty bits
375 |         for (ii, perm) in perms.iter_mut().enumerate() {
376 |             // Check if we're getting write access
377 |             if (exp_perms.0 & PERM_WRITE) != 0 {
378 |                 // Propagate RAW
379 |                 if (perm.0 & PERM_RAW) != 0 {
380 |                     perm.0 |= PERM_READ;
381 |                 }
382 | 
383 |                 // Update dirty bits
384 |                 self.dirty_state.update_dirty(VirtAddr(addr.0 + ii));
385 |             }
386 | 
387 |             // Indicate the memory has been accessed
388 |             if (exp_perms.0 & PERM_READ) != 0 {
389 |                 perm.0 |= PERM_ACC;
390 |                 self.dirty_state.update_dirty(VirtAddr(addr.0 + ii));
391 |             }
392 |         }
393 |        
394 |         // Return a slice to the memory
395 |         Ok(&mut self.memory[addr.0..addr.0 + size])
396 |     }
397 |    
398 |     /// Read the memory at `addr` into `buf`
399 |     /// This function checks to see if all bits in `exp_perms` are set in the
400 |     /// permission bytes. If this is zero, we ignore permissions entirely.
401 |     pub fn read_into_perms(&mut self, addr: VirtAddr, buf: &mut [u8],
402 |                            exp_perms: Perm) -> Result<(), VmExit> {
403 |         let perms =
404 |             self.permissions.get_mut(addr.0..addr.0.checked_add(buf.len())
405 |                 .ok_or(VmExit::AddressIntegerOverflow)?)
406 |             .ok_or(VmExit::AddressMiss(addr, buf.len()))?;
407 | 
408 |         // Check permissions
409 |         for (idx, &perm) in perms.iter().enumerate() {
410 |             if (perm.0 & exp_perms.0) != exp_perms.0 {
411 |                 if exp_perms.0 == PERM_READ && (perm.0 & PERM_RAW) != 0 {
412 |                     // If we were attempting a normal read, and the readable
413 |                     // memory was unreadable but had the RAW bit set, report
414 |                     // it as an uninitialized memory access rather than a read
415 |                     // access
416 |                     return Err(VmExit::UninitFault(VirtAddr(addr.0 + idx)));
417 |                 } else {
418 |                     return Err(VmExit::ReadFault(VirtAddr(addr.0 + idx)));
419 |                 }
420 |             }
421 |         }
422 | 
423 |         // Copy the memory
424 |         buf.copy_from_slice(&self.memory[addr.0..addr.0 + buf.len()]);
425 |         
426 |         // Indicate that this memory has been accessed
427 |         for (ii, perm) in perms.iter_mut().enumerate() {
428 |             perm.0 |= PERM_ACC;
429 |             self.dirty_state.update_dirty(VirtAddr(addr.0 + ii));
430 |         }
431 | 
432 |         Ok(())
433 |     }
434 | 
435 |     /// Read the memory at `addr` into `buf`
436 |     pub fn read_into(&mut self, addr: VirtAddr, buf: &mut [u8])
437 |             -> Result<(), VmExit> {
438 |         self.read_into_perms(addr, buf, Perm(PERM_READ))
439 |     }
440 | 
441 |     /// Read a type `T` at `vaddr` expecting `perms`
442 |     pub fn read_perms<T: Primitive>(&mut self, addr: VirtAddr,
443 |                                     exp_perms: Perm) -> Result<T, VmExit> {
444 |         let mut tmp = [0u8; 16];
445 |         self.read_into_perms(addr, &mut tmp[..core::mem::size_of::<T>()],
446 |             exp_perms)?;
447 |         Ok(unsafe { core::ptr::read_unaligned(tmp.as_ptr() as *const T) })
448 |     }
449 |     
450 |     /// Read a type `T` at `vaddr`
451 |     pub fn read<T: Primitive>(&mut self, addr: VirtAddr) -> Result<T, VmExit> {
452 |         self.read_perms(addr, Perm(PERM_READ))
453 |     }
454 |     
455 |     /// Write a `val` to `addr`
456 |     pub fn write<T: Primitive>(&mut self, addr: VirtAddr,
457 |                                val: T) -> Result<(), VmExit> {
458 |         let tmp = unsafe {
459 |             core::slice::from_raw_parts(&val as *const T as *const u8,
460 |                                         core::mem::size_of::<T>())
461 |         };
462 | 
463 |         self.write_from(addr, tmp)
464 |     }
465 | 
466 |     /// Load a file into the emulators address space using the sections as
467 |     /// described
468 |     pub fn load<P: AsRef<Path>>(&mut self, filename: P,
469 |                                 sections: &[Section]) -> Option<()> {
470 |         // Read the input file
471 |         let contents = std::fs::read(filename).ok()?;
472 | 
473 |         // Go through each section and load it
474 |         for section in sections {
475 |             // Set memory to writable
476 |             self.set_permissions(section.virt_addr, section.mem_size,
477 |                                         Perm(PERM_WRITE))?;
478 | 
479 |             // Write in the original file contents
480 |             self.write_from(section.virt_addr,
481 |                 contents.get(
482 |                     section.file_off..
483 |                     section.file_off.checked_add(section.file_size)?)?
484 |                 ).ok()?;
485 | 
486 |             // Write in any padding with zeros
487 |             if section.mem_size > section.file_size {
488 |                 let padding = vec![0u8; section.mem_size - section.file_size];
489 |                 self.write_from(
490 |                     VirtAddr(section.virt_addr.0
491 |                              .checked_add(section.file_size)?),
492 |                     &padding).ok()?;
493 |             }
494 |             
495 |             // Demote permissions to originals
496 |             self.set_permissions(section.virt_addr, section.mem_size,
497 |                                         section.permissions)?;
498 | 
499 |             // Update the allocator beyond any sections we load
500 |             self.cur_alc = VirtAddr(std::cmp::max(
501 |                 self.cur_alc.0,
502 |                 (section.virt_addr.0 + section.mem_size + 0xfff) & !0xfff
503 |             ));
504 |         }
505 | 
506 |         Some(())
507 |     }
508 | }
509 | 
510 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
   1 | pub mod primitive;
   2 | pub mod mmu;
   3 | pub mod emulator;
   4 | pub mod jitcache;
   5 | pub mod affinity;
   6 | 
   7 | use std::fs::File;
   8 | use std::io::{self, Write};
   9 | use std::path::Path;
  10 | use std::sync::{Arc, Mutex};
  11 | use std::sync::atomic::AtomicU64;
  12 | use std::time::{Duration, Instant};
  13 | use std::convert::TryInto;
  14 | use std::collections::{BTreeMap, BTreeSet};
  15 | use mmu::{VirtAddr, Perm, Section, PERM_READ, PERM_WRITE, PERM_EXEC, PERM_ACC};
  16 | use emulator::{Emulator, Register, VmExit, EmuFile, FaultType, AddressType};
  17 | use emulator::{CoverageType, COVERAGE_ENTRY_EMPTY};
  18 | use jitcache::JitCache;
  19 | 
  20 | use aht::Aht;
  21 | use falkhash::FalkHasher;
  22 | use atomicvec::AtomicVec;
  23 | use basic_mutator::{Mutator, InputDatabase, EmptyDatabase};
  24 | 
  25 | /// If set, uses the enclosed string as a filename and uses it as the input
  26 | /// without any corruption
  27 | const REPRO_MODE: Option<&str> = None; //Some("crashes/0x69478_Read_Normal.crash");
  28 | 
  29 | /// If set, prints information about all hooked allocations
  30 | const VERBOSE_ALLOCS: bool = false;
  31 | 
  32 | /// If `true` the guest writes to stdout and stderr will be printed to our own
  33 | /// stdout and stderr
  34 | const VERBOSE_GUEST_PRINTS: bool = false;
  35 | 
  36 | fn rdtsc() -> u64 {
  37 |     unsafe { std::arch::x86_64::_rdtsc() }
  38 | }
  39 | 
  40 | struct Rng(u64);
  41 | 
  42 | impl Rng {
  43 |     /// Create a new random number generator
  44 |     fn new() -> Self {
  45 |         //Rng(0x8644d6eb17b7ab1a ^ rdtsc())
  46 |         Rng(0x8644d6eb17b7ab1a)
  47 |     }
  48 | 
  49 |     /// Generate a random number
  50 |     #[inline]
  51 |     fn rand(&mut self) -> usize {
  52 |         let val = self.0;
  53 |         self.0 ^= self.0 << 13;
  54 |         self.0 ^= self.0 >> 17;
  55 |         self.0 ^= self.0 << 43;
  56 |         val as usize
  57 |     }
  58 | }
  59 | 
  60 | /// Stat structure from kernel_stat64
  61 | #[repr(C)]
  62 | #[derive(Default, Debug)]
  63 | struct Stat {
  64 |     st_dev:     u64,
  65 |     st_ino:     u64,
  66 |     st_mode:    u32,
  67 |     st_nlink:   u32,
  68 |     st_uid:     u32,
  69 |     st_gid:     u32,
  70 |     st_rdev:    u64,
  71 |     __pad1:     u64,
  72 | 
  73 |     st_size:    i64,
  74 |     st_blksize: i32,
  75 |     __pad2:     i32,
  76 | 
  77 |     st_blocks: i64,
  78 | 
  79 |     st_atime:     u64,
  80 |     st_atimensec: u64,
  81 |     st_mtime:     u64,
  82 |     st_mtimensec: u64,
  83 |     st_ctime:     u64,
  84 |     st_ctimensec: u64,
  85 |     
  86 |     __glibc_reserved: [i32; 2],
  87 | }
  88 | 
  89 | fn handle_syscall(emu: &mut Emulator) -> Result<(), VmExit> {
  90 |     // Get the syscall number
  91 |     let num = emu.reg(Register::A7);
  92 | 
  93 |     //print!("Syscall {}\n", num);
  94 | 
  95 |     match num {
  96 |         214 => {
  97 |             // brk()
  98 |             let req_base = emu.reg(Register::A0);
  99 |             if req_base == 0 {
 100 |                 emu.set_reg(Register::A0, 0);
 101 |                 return Ok(());
 102 |             }
 103 | 
 104 |             panic!("Not expecting brk");
 105 | 
 106 |             /*
 107 |             let increment = if req_base != 0 {
 108 |                 (req_base as i64).checked_sub(cur_base.0 as i64)
 109 |                     .ok_or(VmExit::SyscallIntegerOverflow)?
 110 |             } else {
 111 |                 0
 112 |             };
 113 | 
 114 |             // We don't handle negative brks yet
 115 |             if increment < 0 {
 116 |                 emu.set_reg(Register::A0, cur_base.0 as u64);
 117 |                 return Ok(());
 118 |             }
 119 | 
 120 |             // Attempt to extend data section by increment
 121 |             if let Some(_) = emu.memory.allocate(increment as usize) {
 122 |                 let new_base = cur_base.0 + increment as usize;
 123 |                 emu.set_reg(Register::A0, new_base as u64);
 124 |             } else {
 125 |                 emu.set_reg(Register::A0, !0);
 126 |             }
 127 | 
 128 |             Ok(())*/
 129 |         }
 130 |         64 => {
 131 |             // write()
 132 |             let fd  = emu.reg(Register::A0) as usize;
 133 |             let buf = emu.reg(Register::A1);
 134 |             let len = emu.reg(Register::A2);
 135 | 
 136 |             let file = emu.files.get_file(fd);
 137 |             if let Some(Some(file)) = file {
 138 |                 if file == &EmuFile::Stdout || file == &EmuFile::Stderr {
 139 |                     // Writes to stdout and stderr
 140 | 
 141 |                     // Get access to the underlying bytes to write
 142 |                     let bytes = emu.memory.peek(VirtAddr(buf as usize),
 143 |                         len as usize, Perm(PERM_READ))?;
 144 | 
 145 |                     if VERBOSE_GUEST_PRINTS {
 146 |                         if let Ok(st) = core::str::from_utf8(bytes) {
 147 |                             print!("{}", st);
 148 |                         }
 149 |                     }
 150 | 
 151 |                     // Set that all bytes were read
 152 |                     emu.set_reg(Register::A0, len);
 153 |                 } else {
 154 |                     panic!("Write to valid but unhandled FD");
 155 |                 }
 156 |             } else {
 157 |                 // Unknown FD
 158 |                 emu.set_reg(Register::A0, !0);
 159 |             }
 160 | 
 161 |             Ok(())
 162 |         }
 163 |         169 => {
 164 |             // gettimeofday()
 165 |             emu.set_reg(Register::A0, 0);
 166 |             Ok(())
 167 |         }
 168 |         63 => {
 169 |             // read()
 170 |             let fd  = emu.reg(Register::A0) as usize;
 171 |             let buf = emu.reg(Register::A1) as usize;
 172 |             let len = emu.reg(Register::A2) as usize;
 173 |             
 174 |             // Check if the FD is valid
 175 |             let file = emu.files.get_file(fd);
 176 |             if file.is_none() || file.as_ref().unwrap().is_none() {
 177 |                 // FD was not valid, return out with an error
 178 |                 emu.set_reg(Register::A0, !0);
 179 |                 return Ok(());
 180 |             }
 181 |             
 182 |             if let Some(Some(EmuFile::FuzzInput { ref mut cursor })) = file {
 183 |                 // Compute the ending cursor from this read
 184 |                 let result_cursor = core::cmp::min(
 185 |                     cursor.saturating_add(len),
 186 |                     emu.fuzz_input.len());
 187 | 
 188 |                 // Write in the bytes
 189 |                 emu.memory.write_from(VirtAddr(buf),
 190 |                     &emu.fuzz_input[*cursor..result_cursor])?;
 191 | 
 192 |                 // Compute bytes read
 193 |                 let bread = result_cursor - *cursor;
 194 |                 
 195 |                 // Update the cursor
 196 |                 *cursor = result_cursor;
 197 | 
 198 |                 // Return number of bytes read
 199 |                 emu.set_reg(Register::A0, bread as u64);
 200 |             } else {
 201 |                 unreachable!();
 202 |             }
 203 | 
 204 |             Ok(())
 205 |         }
 206 |         62 => {
 207 |             // lseek()
 208 |             let fd     = emu.reg(Register::A0) as usize;
 209 |             let offset = emu.reg(Register::A1) as i64;
 210 |             let whence = emu.reg(Register::A2) as i32;
 211 | 
 212 |             const SEEK_SET: i32 = 0;
 213 |             const SEEK_CUR: i32 = 1;
 214 |             const SEEK_END: i32 = 2;
 215 | 
 216 |             // Check if the FD is valid
 217 |             let file = emu.files.get_file(fd);
 218 |             if file.is_none() || file.as_ref().unwrap().is_none() {
 219 |                 // FD was not valid, return out with an error
 220 |                 emu.set_reg(Register::A0, !0);
 221 |                 return Ok(());
 222 |             }
 223 | 
 224 |             if let Some(Some(EmuFile::FuzzInput { ref mut cursor })) = file {
 225 |                 let new_cursor = match whence {
 226 |                     SEEK_SET => offset,
 227 |                     SEEK_CUR => (*cursor as i64).saturating_add(offset),
 228 |                     SEEK_END => (emu.fuzz_input.len() as i64)
 229 |                         .saturating_add(offset),
 230 |                     _ => {
 231 |                         // Invalid whence, return error
 232 |                         emu.set_reg(Register::A0, !0);
 233 |                         return Ok(());
 234 |                     }
 235 |                 };
 236 | 
 237 |                 // Make sure the cursor falls in bounds of [0, file_size]
 238 |                 let new_cursor = core::cmp::max(0i64, new_cursor);
 239 |                 let new_cursor =
 240 |                     core::cmp::min(new_cursor, emu.fuzz_input.len() as i64);
 241 | 
 242 |                 // Update the cursor
 243 |                 *cursor = new_cursor as usize;
 244 | 
 245 |                 // Return the new cursor position
 246 |                 emu.set_reg(Register::A0, new_cursor as u64);
 247 |             } else {
 248 |                 unreachable!();
 249 |             }
 250 | 
 251 |             Ok(())
 252 |         }
 253 |         1024 => {
 254 |             // open()
 255 |             let filename = emu.reg(Register::A0) as usize;
 256 |             let flags    = emu.reg(Register::A1);
 257 |             let _mode    = emu.reg(Register::A2);
 258 | 
 259 |             assert!(flags == 0, "Currently we only handle O_RDONLY");
 260 | 
 261 |             // Determine the length of the filename
 262 |             let mut fnlen = 0;
 263 |             while emu.memory.read::<u8>(VirtAddr(filename + fnlen))? != 0 {
 264 |                 fnlen += 1;
 265 |             }
 266 |         
 267 |             // Get the filename bytes
 268 |             let bytes = emu.memory.peek(VirtAddr(filename),
 269 |                 fnlen, Perm(PERM_READ))?;
 270 | 
 271 |             print!("Open {:x?}\n", bytes);
 272 | 
 273 |             if bytes == b"testfn" {
 274 |                 // Create a new file descriptor
 275 |                 let fd = emu.alloc_file();
 276 | 
 277 |                 // Get access to the file, unwrap here is safe because there's
 278 |                 // no way the file is not a valid FD if we got it from our own
 279 |                 // APIs
 280 |                 let file = emu.files.get_file(fd).unwrap();
 281 | 
 282 |                 // Mark that this file should be backed by our fuzz input
 283 |                 *file = Some(EmuFile::FuzzInput { cursor: 0 });
 284 | 
 285 |                 // Return a new fd
 286 |                 emu.set_reg(Register::A0, fd as u64);
 287 |             } else {
 288 |                 // Unknown filename
 289 |                 emu.set_reg(Register::A0, !0);
 290 |             }
 291 | 
 292 |             Ok(())
 293 |         }
 294 |         1038 => {
 295 |             // stat()
 296 |             let filename = emu.reg(Register::A0) as usize;
 297 |             let statbuf  = emu.reg(Register::A1);
 298 |             
 299 |             // Determine the length of the filename
 300 |             let mut fnlen = 0;
 301 |             while emu.memory.read::<u8>(VirtAddr(filename + fnlen))? != 0 {
 302 |                 fnlen += 1;
 303 |             }
 304 |         
 305 |             // Get the filename bytes
 306 |             let bytes = emu.memory.peek(VirtAddr(filename),
 307 |                 fnlen, Perm(PERM_READ))?;
 308 | 
 309 |             if bytes == b"testfn" {
 310 |                 let mut stat = Stat::default();
 311 |                 stat.st_dev = 0x803;
 312 |                 stat.st_ino = 0x81889;
 313 |                 stat.st_mode = 0x81a4;
 314 |                 stat.st_nlink = 0x1;
 315 |                 stat.st_uid = 0x3e8;
 316 |                 stat.st_gid = 0x3e8;
 317 |                 stat.st_rdev = 0x0;
 318 |                 stat.st_size = emu.fuzz_input.len() as i64;
 319 |                 stat.st_blksize = 0x1000;
 320 |                 stat.st_blocks = (emu.fuzz_input.len() as i64 + 511) / 512;
 321 |                 stat.st_atime = 0x5f0fe246;
 322 |                 stat.st_mtime = 0x5f0fe244;
 323 |                 stat.st_ctime = 0x5f0fe244;
 324 | 
 325 |                 // Cast the stat structure to raw bytes
 326 |                 let stat = unsafe {
 327 |                     core::slice::from_raw_parts(
 328 |                         &stat as *const Stat as *const u8,
 329 |                         core::mem::size_of_val(&stat))
 330 |                 };
 331 | 
 332 |                 // Write in the stat data
 333 |                 emu.memory.write_from(VirtAddr(statbuf as usize), stat)?;
 334 |                 emu.set_reg(Register::A0, 0);
 335 |             } else {
 336 |                 // Error
 337 |                 emu.set_reg(Register::A0, !0);
 338 |             }
 339 | 
 340 |             Ok(())
 341 |         }
 342 |         80 => {
 343 |             // fstat()
 344 |             let fd      = emu.reg(Register::A0) as usize;
 345 |             let statbuf = emu.reg(Register::A1);
 346 | 
 347 |             // Check if the FD is valid
 348 |             let file = emu.files.get_file(fd);
 349 |             if file.is_none() || file.as_ref().unwrap().is_none() {
 350 |                 // FD was not valid, return out with an error
 351 |                 emu.set_reg(Register::A0, !0);
 352 |                 return Ok(());
 353 |             }
 354 | 
 355 |             if let Some(Some(EmuFile::FuzzInput { .. })) = file {
 356 |                 let mut stat = Stat::default();
 357 |                 stat.st_dev = 0x803;
 358 |                 stat.st_ino = 0x81889;
 359 |                 stat.st_mode = 0x81a4;
 360 |                 stat.st_nlink = 0x1;
 361 |                 stat.st_uid = 0x3e8;
 362 |                 stat.st_gid = 0x3e8;
 363 |                 stat.st_rdev = 0x0;
 364 |                 stat.st_size = emu.fuzz_input.len() as i64;
 365 |                 stat.st_blksize = 0x1000;
 366 |                 stat.st_blocks = (emu.fuzz_input.len() as i64 + 511) / 512;
 367 |                 stat.st_atime = 0x5f0fe246;
 368 |                 stat.st_mtime = 0x5f0fe244;
 369 |                 stat.st_ctime = 0x5f0fe244;
 370 | 
 371 |                 // Cast the stat structure to raw bytes
 372 |                 let stat = unsafe {
 373 |                     core::slice::from_raw_parts(
 374 |                         &stat as *const Stat as *const u8,
 375 |                         core::mem::size_of_val(&stat))
 376 |                 };
 377 | 
 378 |                 // Write in the stat data
 379 |                 emu.memory.write_from(VirtAddr(statbuf as usize), stat)?;
 380 |                 emu.set_reg(Register::A0, 0);
 381 |             } else {
 382 |                 // Error
 383 |                 emu.set_reg(Register::A0, !0);
 384 |             }
 385 | 
 386 |             Ok(())
 387 |         }
 388 |         57 => {
 389 |             // close()
 390 |             let fd = emu.reg(Register::A0) as usize;
 391 | 
 392 |             if let Some(file) = emu.files.get_file(fd) {
 393 |                 if file.is_some() {
 394 |                     // File was present and currently open, close it
 395 |                    
 396 |                     // Close the file
 397 |                     *file = None;
 398 | 
 399 |                     // Just return success for now
 400 |                     emu.set_reg(Register::A0, 0);
 401 |                 } else {
 402 |                     // File was in a closed state
 403 |                     emu.set_reg(Register::A0, !0);
 404 |                 }
 405 |             } else {
 406 |                 // FD out of bounds
 407 |                 emu.set_reg(Register::A0, !0);
 408 |             }
 409 | 
 410 |             Ok(())
 411 |         }
 412 |         93 => {
 413 |             // exit()
 414 |             Err(VmExit::Exit)
 415 |         }
 416 |         _ => {
 417 |             panic!("Unhandled syscall {} @ {:#x}\n", num,
 418 |                    emu.reg(Register::Pc));
 419 |         }
 420 |     }
 421 | }
 422 | 
 423 | #[derive(Default)]
 424 | /// Statistics during fuzzing
 425 | struct Statistics {
 426 |     /// Number of fuzz cases
 427 |     fuzz_cases: u64,
 428 | 
 429 |     /// Number of risc-v instructions executed
 430 |     instrs_execed: u64,
 431 |     
 432 |     /// Total number of crashes
 433 |     crashes: u64,
 434 | 
 435 |     /// Total number of CPU cycles spent in the workers
 436 |     total_cycles: u64,
 437 | 
 438 |     /// Total number of CPU cycles spent resetting the guest
 439 |     reset_cycles: u64,
 440 |     
 441 |     /// Total number of CPU cycles spent emulating
 442 |     vm_cycles: u64,
 443 | 
 444 |     /// Frequencies of vmexits
 445 |     vmexits: BTreeMap<VmExit, u64>,
 446 | }
 447 | 
 448 | fn worker(thr_id: usize, mut emu: Emulator, original: Arc<Emulator>,
 449 |           stats: Arc<Mutex<Statistics>>, corpus: Arc<Corpus>) {
 450 |     // Pin to a core
 451 |     affinity::set_affinity(thr_id).unwrap();
 452 | 
 453 |     // Create a new random number generator
 454 |     let mut rng = Rng::new();
 455 |         
 456 |     // Get the buffer and the length for the input
 457 |     let buf = emu.reg(Register::A0);
 458 |     let len = emu.reg(Register::A1) as usize;
 459 | 
 460 |     // Create a mutator
 461 |     let mut mutator = Mutator::new().seed(rng.rand() as u64)
 462 |         .max_input_size(1024 * 1024);
 463 | 
 464 |     loop {
 465 |         // Start a timer
 466 |         let batch_start = rdtsc();
 467 |         
 468 |         let mut local_stats = Statistics::default();
 469 | 
 470 |         let it = rdtsc();
 471 |         while (rdtsc() - it) < 500_000_000 {
 472 |             // Reset emu to original state
 473 |             let it = rdtsc();
 474 |             emu.reset(&*original, &corpus, |emu| {
 475 |                 let perms = emu.memory.peek_perms(VirtAddr(buf as usize),
 476 |                     len).unwrap();
 477 |                 perms.iter().map(|perm| (perm.0 & PERM_ACC) != 0).collect()
 478 |             });
 479 |             local_stats.reset_cycles += rdtsc() - it;
 480 | 
 481 |             // Number of instructions executed this fuzz case
 482 |             let mut run_instrs = 0u64;
 483 | 
 484 |             // Clear the fuzz input
 485 |             emu.fuzz_input.clear();
 486 |             mutator.input.clear();
 487 |             mutator.accessed.clear();
 488 | 
 489 |             let mut options: u32 = rng.rand() as u32;
 490 | 
 491 |             // Pick a random file from the corpus as an input
 492 |             if (rng.rand() % 32) != 0 && corpus.inputs.len() > 0 {
 493 |                 // Build upon a previous input from the coverage guided inputs
 494 |                 let sel = rng.rand() % corpus.inputs.len();
 495 |                 if let Some(input) = corpus.inputs.get(sel) {
 496 |                     // Copy the input
 497 |                     options = u32::from_ne_bytes(
 498 |                         input.data[input.data.len() - 4..]
 499 |                         .try_into().unwrap());
 500 |                     mutator.input.extend_from_slice(
 501 |                         &input.data[..input.data.len() - 4]);
 502 | 
 503 |                     // Set a timeout which can reach all coverage for this
 504 |                     // input
 505 |                     emu.set_timeout(input.instrs + 1_000_000);
 506 | 
 507 |                     // Update accessed information
 508 |                     //mutator.accessed.extend_from_slice(&input.accessed);
 509 |                 }
 510 |             }
 511 |            
 512 |             if mutator.input.len() == 0 && corpus.corpus.len() > 0 {
 513 |                 // Build upon an input from the corpus
 514 |                 let sel = rng.rand() % corpus.corpus.len();
 515 |                 if let Some(input) = corpus.corpus.get(sel) {
 516 |                     mutator.input.extend_from_slice(&input);
 517 |                 }
 518 |             }
 519 | 
 520 |             if mutator.input.len() == 0 {
 521 |                 // Just make a blank input
 522 |                 mutator.input.resize(1024 * 1024, 0u8);
 523 |             }
 524 | 
 525 |             // Mutate!
 526 |             mutator.mutate(rng.rand() % 10, &*corpus);
 527 | 
 528 |             // 1 in 8 Chance to change options
 529 |             if rng.rand() % 8 == 0 {
 530 |                 options = rng.rand() as u32;
 531 |             }
 532 |             
 533 |             // 1 in 2 Chance of no extra options
 534 |             if rng.rand() % 2 == 0 {
 535 |                 options = 0;
 536 |             }
 537 | 
 538 |             emu.fuzz_input.extend_from_slice(&mutator.input);
 539 |             assert!(emu.fuzz_input.len() <= len, "{} {}\n",
 540 |                 emu.fuzz_input.len(), len);
 541 | 
 542 |             emu.fuzz_input.extend_from_slice(&options.to_ne_bytes());
 543 | 
 544 |             // If we're in repro mode, use the repro file
 545 |             if let Some(repro_file) = REPRO_MODE {
 546 |                 emu.fuzz_input.clear();
 547 |                 emu.fuzz_input.extend_from_slice(&std::fs::read(repro_file)
 548 |                     .expect("Failed to read repro file"));
 549 |             }
 550 | 
 551 |             // Inject the input
 552 |             emu.memory.write_from(VirtAddr(buf as usize), &emu.fuzz_input)
 553 |                 .unwrap();
 554 |             emu.set_reg(Register::A1, emu.fuzz_input.len() as u64);
 555 | 
 556 |             let vmexit = loop {
 557 |                 let vmexit = emu.run(&mut run_instrs,
 558 |                                      &mut local_stats.vm_cycles,
 559 |                                      &*corpus)
 560 |                     .expect_err("Failed to execute emulator");
 561 | 
 562 |                 match vmexit {
 563 |                     VmExit::Syscall => {
 564 |                         if let Err(vmexit) = handle_syscall(&mut emu) {
 565 |                             break vmexit;
 566 |                         }
 567 |             
 568 |                         // Advance PC
 569 |                         let pc = emu.reg(Register::Pc);
 570 |                         emu.set_reg(Register::Pc, pc.wrapping_add(4));
 571 |                     }
 572 |                     _ => break vmexit,
 573 |                 }
 574 |             };
 575 | 
 576 |             if let Some((fault_type, vaddr)) = vmexit.is_crash() {
 577 |                 // Update crash stats
 578 |                 local_stats.crashes += 1;
 579 | 
 580 |                 // Attempt to update hash table
 581 |                 let pc  = VirtAddr(emu.reg(Register::Pc) as usize);
 582 |                 let key = (pc, fault_type, AddressType::from(vaddr));
 583 |                 corpus.unique_crashes.entry_or_insert(&key, pc.0, || {
 584 |                     // Save the input and log it in the hash table
 585 |                     let hash = corpus.hasher.hash(&emu.fuzz_input);
 586 |                     corpus.input_hashes.entry_or_insert(
 587 |                             &hash, hash as usize, || {
 588 |                         let perms = emu.memory.peek_perms(VirtAddr(buf as usize),
 589 |                             len).unwrap();
 590 |                         let accessed: Vec<bool> = 
 591 |                             perms.iter().map(|perm| (perm.0 & PERM_ACC) != 0)
 592 |                             .collect();
 593 |                         corpus.inputs.push(
 594 |                             Box::new(Input::new(emu.timeout(),
 595 |                                 emu.fuzz_input.clone(), accessed)));
 596 |                         Box::new(())
 597 |                     });
 598 | 
 599 |                     // Save the crashing file
 600 |                     let crash_fn = Path::new("crashes").join(
 601 |                         format!("{:#x}_{:?}_{:?}.crash",
 602 |                                 (key.0).0, key.1, key.2));
 603 |                     let reg_fn = crash_fn.with_extension("regs");
 604 |                     print!("New crash {:?}\n", crash_fn);
 605 |                     std::fs::write(&crash_fn,
 606 |                         &emu.fuzz_input).expect("Failed to write fuzz input");
 607 |                     std::fs::write(&reg_fn,
 608 |                         &format!("{}", emu))
 609 |                         .expect("Failed to write crash register state");
 610 | 
 611 |                     Box::new(())
 612 |                 });
 613 |             }
 614 | 
 615 |             // Update vmexit frequencies
 616 |             *local_stats.vmexits.entry(vmexit).or_insert(0) += 1;
 617 | 
 618 |             local_stats.instrs_execed += run_instrs;
 619 |             local_stats.fuzz_cases    += 1;
 620 |         }
 621 | 
 622 |         // Get access to statistics
 623 |         let mut stats = stats.lock().unwrap();
 624 | 
 625 |         stats.fuzz_cases    += local_stats.fuzz_cases;
 626 |         stats.crashes       += local_stats.crashes;
 627 |         stats.instrs_execed += local_stats.instrs_execed;
 628 |         stats.reset_cycles  += local_stats.reset_cycles;
 629 |         stats.vm_cycles     += local_stats.vm_cycles;
 630 | 
 631 |         for (vme, freq) in local_stats.vmexits.iter() {
 632 |             *stats.vmexits.entry(*vme).or_insert(0) += freq;
 633 |         }
 634 |         local_stats.vmexits.clear();
 635 | 
 636 |         // Compute amount of time during the batch
 637 |         let batch_elapsed = rdtsc() - batch_start;
 638 |         stats.total_cycles += batch_elapsed;
 639 |     }
 640 | }
 641 | 
 642 | pub struct Input {
 643 |     /// The instruction count of the most recently generated coverage from this
 644 |     /// input. This allows us to know how "deep" we need to fuzz this input
 645 |     instrs: u64,
 646 | 
 647 |     /// The raw input
 648 |     data: Vec<u8>,
 649 | 
 650 |     /// A sorted vector of indicies from `data` which are used during the fuzz
 651 |     /// case
 652 |     accessed: Vec<usize>,
 653 | }
 654 | 
 655 | impl Input {
 656 |     /// Creates a new input from an instruction count, a raw input, and the
 657 |     /// accessed mapping associating `data` bytes to accessed ones
 658 |     pub fn new(instrs: u64, data: Vec<u8>, accessed: Vec<bool>) -> Input {
 659 |         // Sorted vector of accessed indicies in `data`
 660 |         let mut avec = Vec::new();
 661 | 
 662 |         // Create the vector of indicies
 663 |         let num_acc = std::cmp::min(data.len(), accessed.len());
 664 |         for (ii, &is_acc) in accessed[..num_acc].iter().enumerate() {
 665 |             if is_acc {
 666 |                 avec.push(ii);
 667 |             }
 668 |         }
 669 | 
 670 |         /*
 671 |         print!("New input {} bytes, {} accessed [{:.4}]\n",
 672 |             data.len(), avec.len(), avec.len() as f64 / data.len() as f64);*/
 673 | 
 674 |         Input {
 675 |             instrs,
 676 |             data,
 677 |             accessed: avec
 678 |         }
 679 |     }
 680 | }
 681 | 
 682 | /// Information about inputs and coverage
 683 | pub struct Corpus {
 684 |     /// Input hash table to dedup inputs
 685 |     pub input_hashes: Aht<u128, (), 1048576>,
 686 |     
 687 |     /// Linear list of all inputs
 688 |     pub inputs: AtomicVec<Input, 1048576>,
 689 |     
 690 |     /// Linear list of all corpus inputs
 691 |     pub corpus: AtomicVec<Vec<u8>, 1048576>,
 692 |     
 693 |     /// Unique crashes
 694 |     /// Tuple is (PC, FaultType, AddressType)
 695 |     pub unique_crashes: Aht<(VirtAddr, FaultType, AddressType), (), 1048576>,
 696 | 
 697 |     /// Code coverage, (to, from) edges for _all_ branches, including
 698 |     /// taken, not taken, indirect, and unconditional
 699 |     pub code_coverage: Aht<(VirtAddr, VirtAddr), (), 1048576>,
 700 |     
 701 |     /// Coverage for all types of coverage, (typ, info0, info1, info2) 
 702 |     pub coverage: Aht<(CoverageType, u64, u64, u64), (), 134217728>,
 703 | 
 704 |     /// Hasher
 705 |     pub hasher: FalkHasher,
 706 | 
 707 |     /// Coverage table
 708 |     pub coverage_table: Vec<(AtomicU64, AtomicU64)>,
 709 | 
 710 |     /// Coverage log file
 711 |     coverage_log: Mutex<File>,
 712 |     
 713 |     /// Lighthouse coverage log file
 714 |     lighthouse_log: Mutex<File>,
 715 | 
 716 |     /// Active compile jobs
 717 |     compile_jobs: Mutex<BTreeSet<u128>>,
 718 | }
 719 | 
 720 | impl InputDatabase for Corpus {
 721 |     fn num_inputs(&self) -> usize { self.inputs.len() }
 722 |     fn input(&self, idx: usize) -> Option<&[u8]> {
 723 |         self.inputs.get(idx).map(|x| {
 724 |             &x.data[..x.data.len() - 4]
 725 |         })
 726 |     }   
 727 | }
 728 | 
 729 | fn malloc_bp(emu: &mut Emulator) -> Result<(), VmExit> {
 730 |     if let Some(alc) = emu.memory.allocate(emu.reg(Register::A1) as usize) {
 731 |         emu.set_reg(Register::A0, alc.0 as u64);
 732 |         emu.set_reg(Register::Pc, emu.reg(Register::Ra));
 733 |     
 734 |         if VERBOSE_ALLOCS {
 735 |             print!("malloc returned {:#018x} - size was {:#x}\n",
 736 |                    alc.0, emu.reg(Register::A1));
 737 |         }
 738 | 
 739 |         Ok(())
 740 |     } else {
 741 |         // Cannot satisfy allocation, return out
 742 |         Err(VmExit::OutOfMemory)
 743 |     }
 744 | }
 745 | 
 746 | fn _calloc_bp(emu: &mut Emulator) -> Result<(), VmExit> {
 747 |     let nmemb = emu.reg(Register::A1) as usize;
 748 |     let size  = emu.reg(Register::A2) as usize;
 749 | 
 750 |     let result = size.checked_mul(nmemb).and_then(|size| {
 751 |         let alc = emu.memory.allocate(size)?;
 752 |         let tmp = emu.memory.peek(alc, size, Perm(PERM_WRITE))
 753 |             .expect("New allocation not writable?");
 754 |         tmp.iter_mut().for_each(|x| *x = 0);
 755 |         Some(alc)
 756 |     }).unwrap_or(VirtAddr(0));
 757 | 
 758 |     if result.0 == 0 {
 759 |         // Cannot satisfy allocation, return out
 760 |         return Err(VmExit::OutOfMemory);
 761 |     }
 762 | 
 763 |     if VERBOSE_ALLOCS {
 764 |         print!("calloc returned {:#018x} - size was {:#x}\n", result.0,
 765 |                size * nmemb);
 766 |     }
 767 | 
 768 |     emu.set_reg(Register::A0, result.0 as u64);
 769 |     emu.set_reg(Register::Pc, emu.reg(Register::Ra));
 770 |     Ok(())
 771 | }
 772 | 
 773 | fn _realloc_bp(emu: &mut Emulator) -> Result<(), VmExit> {
 774 |     let old_alc = VirtAddr(emu.reg(Register::A1) as usize);
 775 |     let size    = emu.reg(Register::A2) as usize;
 776 | 
 777 |     // Get the old allocation size
 778 |     let old_size = if old_alc == VirtAddr(0) {
 779 |         // No previous allocation specified, thus no size
 780 |         0
 781 |     } else {
 782 |         // Attempt to get the old allocation size
 783 |         emu.memory.get_alc(old_alc).ok_or(VmExit::InvalidFree(old_alc))?
 784 |     };
 785 | 
 786 |     // Compute the size to copy
 787 |     let to_copy = core::cmp::min(size, old_size);
 788 | 
 789 |     // Allocate the new memory
 790 |     let new_alc = emu.memory.allocate(size).and_then(|new_alc| {
 791 |         if VERBOSE_ALLOCS {
 792 |             print!("realloc {:#018x} -> {:#018x} - size {:#x} -> {:#x}\n",
 793 |                    old_alc.0,
 794 |                    new_alc.0,
 795 |                    old_size, size);
 796 |         }
 797 | 
 798 |         if old_alc != VirtAddr(0) {
 799 |             // Copy memory
 800 |             for ii in 0..to_copy {
 801 |                 if let Ok(old) =
 802 |                         emu.memory.read::<u8>(VirtAddr(old_alc.0 + ii)) {
 803 |                     // Copy the memory only if we could read it from the old
 804 |                     // allocation. This will preserve the uninitialized state
 805 |                     // of bytes which haven't been initialized in the old
 806 |                     // allocation
 807 |                     emu.memory.write(VirtAddr(new_alc.0 + ii), old).unwrap();
 808 |                 }
 809 |             }
 810 |             
 811 |             // Free the old allocation
 812 |             emu.memory.free(old_alc).expect("Failed to free old allocation?");
 813 |         }
 814 | 
 815 |         Some(new_alc)
 816 |     }).unwrap_or(VirtAddr(0));
 817 |     
 818 |     if new_alc.0 == 0 {
 819 |         // Cannot satisfy allocation, return out
 820 |         return Err(VmExit::OutOfMemory);
 821 |     }
 822 | 
 823 |     emu.set_reg(Register::A0, new_alc.0 as u64);
 824 |     emu.set_reg(Register::Pc, emu.reg(Register::Ra));
 825 |     Ok(())
 826 | }
 827 | 
 828 | fn free_bp(emu: &mut Emulator) -> Result<(), VmExit> {
 829 |     let base = VirtAddr(emu.reg(Register::A1) as usize);
 830 |     if base != VirtAddr(0) {
 831 |         if VERBOSE_ALLOCS {
 832 |             print!("free {:#018x}\n", base.0);
 833 |         }
 834 |         //emu.memory.free(base)?;
 835 |     }
 836 |     emu.set_reg(Register::Pc, emu.reg(Register::Ra));
 837 |     Ok(())
 838 | }
 839 | 
 840 | fn _end_case(_emu: &mut Emulator) -> Result<(), VmExit> {
 841 |     Err(VmExit::Exit)
 842 | }
 843 | 
 844 | fn snapshot(_emu: &mut Emulator) -> Result<(), VmExit> {
 845 |     Err(VmExit::Snapshot)
 846 | }
 847 | 
 848 | pub fn load_elf<P: AsRef<Path>>(filename: P, emu: &mut Emulator)
 849 |         -> io::Result<()> {
 850 |     use std::process::Command;
 851 | 
 852 |     // Invoke readelf to get the LOAD section offsets and information
 853 |     let output = Command::new("readelf")
 854 |         .arg("-W")
 855 |         .arg("-l")
 856 |         .arg(filename.as_ref().to_str().unwrap())
 857 |         .output()?;
 858 |     assert!(output.status.success(), "readelf returned error");
 859 |     let stdout = core::str::from_utf8(&output.stdout)
 860 |         .expect("Failed to get readelf stdout as a string");
 861 | 
 862 |     let mut entry_point = None;
 863 |     for line in stdout.lines() {
 864 |         if line.starts_with("Entry point 0x") {
 865 |             // Parse out the entry point
 866 |             entry_point = Some(u64::from_str_radix(&line[14..], 16)
 867 |                 .expect("Entry point line malformed"));
 868 |         } else {
 869 |             let mut info = line.split_whitespace();
 870 | 
 871 |             // Check if this is a line indicating a load section
 872 |             if info.next() != Some("LOAD") {
 873 |                 continue;
 874 |             }
 875 | 
 876 |             // Parse out info from the readelf output
 877 |             let offset = info.next().and_then(|x|
 878 |                 usize::from_str_radix(&x[2..], 16).ok())
 879 |                 .expect("Failed to parse offset");
 880 |             let virt_addr = info.next().and_then(|x|
 881 |                 usize::from_str_radix(&x[2..], 16).ok())
 882 |                 .expect("Failed to parse virt addr");
 883 |             let _phys_addr = info.next();
 884 |             let file_size = info.next().and_then(|x|
 885 |                 usize::from_str_radix(&x[2..], 16).ok())
 886 |                 .expect("Failed to parse file size");
 887 |             let mem_size = info.next().and_then(|x|
 888 |                 usize::from_str_radix(&x[2..], 16).ok())
 889 |                 .expect("Failed to parse memory size");
 890 |             let _align = info.next_back();
 891 | 
 892 |             let mut flags = info.fold(String::new(), |acc, x| acc + x + " ");
 893 |             flags += "   ";
 894 | 
 895 |             let read  = if &flags[0..1] == "R" { PERM_READ  } else { 0 };
 896 |             let write = if &flags[1..2] == "W" { PERM_WRITE } else { 0 };
 897 |             let exec  = if &flags[2..3] == "E" { PERM_EXEC  } else { 0 };
 898 | 
 899 |             // Load into memory
 900 |             emu.memory.load(&filename, &[
 901 |                 Section {
 902 |                     file_off:    offset,
 903 |                     virt_addr:   VirtAddr(virt_addr),
 904 |                     file_size:   file_size,
 905 |                     mem_size:    mem_size,
 906 |                     permissions: Perm(read | write | exec),
 907 |                 },
 908 |             ]).expect("Failed to load into emulator");
 909 |         }
 910 |     }
 911 |     
 912 |     // Invoke nm to get some symbol information
 913 |     let output = Command::new("nm")
 914 |         .arg(filename.as_ref().to_str().unwrap())
 915 |         .output()?;
 916 |     assert!(output.status.success(), "nm returned error");
 917 |     let stdout = core::str::from_utf8(&output.stdout)
 918 |         .expect("Failed to get nm stdout as a string");
 919 | 
 920 |     // Parse NM output
 921 |     for line in stdout.lines() {
 922 |         let mut info = line.split_whitespace();
 923 |         if info.clone().count() != 3 { continue; }
 924 | 
 925 |         let addr  = usize::from_str_radix(info.next().unwrap(), 16).unwrap();
 926 |         let _flag = info.next().unwrap();
 927 |         let name  = info.next().unwrap();
 928 | 
 929 |         // Register this symbol
 930 |         emu.add_symbol(name, VirtAddr(addr));
 931 |     }
 932 |  
 933 |     // Set the program entry point
 934 |     emu.set_reg(Register::Pc, entry_point.unwrap());
 935 |     Ok(())
 936 | }
 937 | 
 938 | fn main() -> io::Result<()> {
 939 |     std::fs::create_dir_all("inputs")?;
 940 |     std::fs::create_dir_all("crashes")?;
 941 | 
 942 |     // Create a corpus
 943 |     let corpus = Arc::new(Corpus {
 944 |         input_hashes: Aht::new(),
 945 |         inputs: AtomicVec::new(),
 946 |         hasher: FalkHasher::new(),
 947 |         unique_crashes: Aht::new(),
 948 |         code_coverage: Aht::new(),
 949 |         coverage: Aht::new(),
 950 |         corpus: AtomicVec::new(),
 951 |         coverage_log: Mutex::new(File::create("coverage.txt")
 952 |             .expect("Failed to create coverage file")),
 953 |         lighthouse_log: Mutex::new(File::create("lighthouse.txt")
 954 |             .expect("Failed to create lighthouse coverage file")),
 955 |         compile_jobs: Default::default(),
 956 |         coverage_table: (0..32 * 1024 * 1024).map(|_| {
 957 |             (AtomicU64::new(COVERAGE_ENTRY_EMPTY), AtomicU64::new(0))
 958 |         }).collect(),
 959 |     });
 960 |     
 961 |     // Create a JIT cache
 962 |     let _jit_cache = Arc::new(JitCache::new(VirtAddr(4 * 1024 * 1024)));
 963 | 
 964 |     // Create an emulator using the JIT
 965 |     let emu = Emulator::new(64 * 1024 * 1024);
 966 |     let mut emu = if REPRO_MODE.is_some() {
 967 |         emu
 968 |     } else {
 969 |         emu.enable_jit(_jit_cache)
 970 |     };
 971 |    
 972 |     // Load the initial corpus
 973 |     for filename in std::fs::read_dir("inputs")?{
 974 |         let filename = filename?.path();
 975 |         let data = std::fs::read(filename)?;
 976 | 
 977 |         // Add the corpus input to the corpus
 978 |         corpus.corpus.push(Box::new(data));
 979 |     }
 980 | 
 981 |     // Load the ELF into the memory
 982 |     load_elf("/home/pleb/fuzz_xml/fuzzer/fuzzer.sym", &mut emu)?;
 983 |     
 984 |     const FUZZ_START_SYM: &str = "fuzzme";
 985 |     
 986 |     // Register breakpoints
 987 |     emu.add_breakpoint(emu.resolve_symbol("_malloc_r").unwrap(), malloc_bp);
 988 |     emu.add_breakpoint(emu.resolve_symbol("_calloc_r").unwrap(), _calloc_bp);
 989 |     emu.add_breakpoint(emu.resolve_symbol("_realloc_r").unwrap(), _realloc_bp);
 990 |     emu.add_breakpoint(emu.resolve_symbol("_free_r").unwrap(), free_bp);
 991 |     emu.add_breakpoint(emu.resolve_symbol(FUZZ_START_SYM).unwrap(), snapshot);
 992 | 
 993 |     // Set up a stack
 994 |     let stack = emu.memory.allocate(1024 * 1024)
 995 |         .expect("Failed to allocate stack");
 996 |     emu.set_reg(Register::Sp, stack.0 as u64 + 1024 * 1024);
 997 | 
 998 |     // Set up the program name
 999 |     let progname = emu.memory.allocate(4096)
1000 |         .expect("Failed to allocate program name");
1001 |     emu.memory.write_from(progname, b"objdump\0")
1002 |         .expect("Failed to write program name");
1003 |     let arg1 = emu.memory.allocate(4096)
1004 |         .expect("Failed to allocate arg1");
1005 |     emu.memory.write_from(arg1, b"testfn\0")
1006 |         .expect("Failed to write arg2");
1007 | 
1008 |     macro_rules! push {
1009 |         ($expr:expr) => {
1010 |             let sp = emu.reg(Register::Sp) -
1011 |                 core::mem::size_of_val(&$expr) as u64;
1012 |             emu.memory.write(VirtAddr(sp as usize), $expr)
1013 |                 .expect("Push failed");
1014 |             emu.set_reg(Register::Sp, sp);
1015 |         }
1016 |     }
1017 | 
1018 |     // Set up the initial program stack state
1019 |     push!(0u64);   // Auxp
1020 |     push!(0u64);   // Envp
1021 |     push!(0u64);   // Argv end
1022 |     push!(arg1.0); // Argv
1023 |     push!(progname.0); // Argv
1024 |     push!(2u64);   // Argc
1025 | 
1026 | 
1027 |     loop {
1028 |         // Run the emulator to a certain point
1029 |         let mut tmp = 0;
1030 |         let vmexit = emu.run_emu(&mut tmp, &*corpus)
1031 |             .expect_err("Failed to execute emulator");
1032 | 
1033 |         match vmexit {
1034 |             VmExit::Snapshot => {
1035 |                 emu.remove_breakpoint(
1036 |                     emu.resolve_symbol(FUZZ_START_SYM).unwrap());
1037 |                 break;
1038 |             }
1039 |             VmExit::Syscall => {
1040 |                 print!("Syscall {}\n", emu.reg(Register::A7));
1041 |                 if let Err(_vmexit) = handle_syscall(&mut emu) {
1042 |                     break;
1043 |                 }
1044 |     
1045 |                 // Advance PC
1046 |                 let pc = emu.reg(Register::Pc);
1047 |                 emu.set_reg(Register::Pc, pc.wrapping_add(4));
1048 |             }
1049 |             _ => break,
1050 |         }
1051 |     }
1052 | 
1053 |     print!("Took snapshot at {:#x}\n", emu.reg(Register::Pc));
1054 | 
1055 |     // Wrap the original emulator in an `Arc`
1056 |     let emu = Arc::new(emu);
1057 | 
1058 |     // Create a new stats structure
1059 |     let stats = Arc::new(Mutex::new(Statistics::default()));
1060 | 
1061 |     // Create the stats thread
1062 |     {
1063 |         let corpus = corpus.clone();
1064 |         let stats  = stats.clone();
1065 |         std::thread::spawn(move || {
1066 |             // Start a timer
1067 |             let start = Instant::now();
1068 | 
1069 |             let mut last_time = Instant::now();
1070 | 
1071 |             let mut log = File::create("stats.txt").unwrap();
1072 |             loop {
1073 |                 std::thread::sleep(Duration::from_millis(10));
1074 |                     
1075 |                 // Get access to the stats structure
1076 |                 let stats   = stats.lock().unwrap();
1077 |                 let elapsed = start.elapsed().as_secs_f64();
1078 | 
1079 |                 write!(log, "{:.6},{},{},{},{}\n", elapsed, stats.fuzz_cases,
1080 |                        corpus.code_coverage.len(), corpus.unique_crashes.len(),
1081 |                        corpus.inputs.len())
1082 |                     .unwrap();
1083 | 
1084 |                 if last_time.elapsed() >= Duration::from_millis(1000) {
1085 |                     let fuzz_cases = stats.fuzz_cases;
1086 |                     let instrs = stats.instrs_execed;
1087 | 
1088 |                     // Compute performance numbers
1089 |                     let resetc = stats.reset_cycles as f64 /
1090 |                         stats.total_cycles as f64;
1091 |                     let vmc = stats.vm_cycles as f64 /
1092 |                         stats.total_cycles as f64;
1093 | 
1094 |                     print!("[{:10.4}] cases {:10} | inputs {:10} | \
1095 |                             crashes {:8} | \
1096 |                             fcps {:8.0} | code {:7} | cov {:10} | \
1097 |                             eff Minst/sec {:10.1} | \
1098 |                             reset {:8.4} | vm {:8.4}\n",
1099 |                            elapsed, fuzz_cases, corpus.inputs.len(),
1100 |                            corpus.unique_crashes.len(),
1101 |                            fuzz_cases as f64 / elapsed,
1102 |                            corpus.code_coverage.len(),
1103 |                            corpus.coverage.len(),
1104 |                            (instrs as f64 / elapsed / 1_000_000.) / vmc,
1105 |                            resetc, vmc);
1106 | 
1107 |                     for (vmexit, &freq) in stats.vmexits.iter() {
1108 |                         if freq as f64 / fuzz_cases as f64 > 0.01 {
1109 |                             print!("{:15} [{:8.6}] {:x?}\n",
1110 |                                    freq, freq as f64 / fuzz_cases as f64,
1111 |                                    vmexit);
1112 |                         }
1113 |                     }
1114 | 
1115 |                     last_time = Instant::now();
1116 |                 }
1117 |             }
1118 |         });
1119 |     }
1120 | 
1121 |     // Limit cores during repro mode
1122 |     let num_cores = if REPRO_MODE.is_some() {
1123 |         1
1124 |     } else {
1125 |         192
1126 |     };
1127 | 
1128 |     for thr_id in 0..num_cores {
1129 |         let new_emu = emu.fork();
1130 |         let stats   = stats.clone();
1131 |         let parent  = emu.clone();
1132 |         let corpus  = corpus.clone();
1133 |         std::thread::spawn(move || {
1134 |             worker(thr_id, new_emu, parent, stats, corpus);
1135 |         });
1136 |     }
1137 | 
1138 |     loop {
1139 |         std::thread::sleep(Duration::from_millis(5000));
1140 |     }
1141 | }
1142 | 
1143 | 


--------------------------------------------------------------------------------
/src/emulator.rs:
--------------------------------------------------------------------------------
   1 | //! A 64-bit RISC-V RV64i interpreter
   2 | 
   3 | use std::io::Write;
   4 | use std::fmt;
   5 | use std::sync::Arc;
   6 | use std::sync::atomic::Ordering;
   7 | use std::path::Path;
   8 | use std::time::Duration;
   9 | use std::convert::TryInto;
  10 | use std::process::Command;
  11 | use std::collections::{BTreeMap, BTreeSet, VecDeque};
  12 | use crate::rdtsc;
  13 | use crate::{Input, Corpus};
  14 | use crate::mmu::{VirtAddr, Perm, PERM_READ, PERM_WRITE, PERM_EXEC, PERM_RAW};
  15 | use crate::mmu::{Mmu, DIRTY_BLOCK_SIZE, PERM_ACC};
  16 | use crate::jitcache::JitCache;
  17 | 
  18 | /// If `true` code coverage will be collected
  19 | const CODE_COVERAGE: bool = true;
  20 | 
  21 | /// If `true` compares will generate coverage for each unique combination of
  22 | /// matching bytes during conditional branches. This means that as more bytes
  23 | /// are found to match, coverage events will be generated and the input will
  24 | /// be saved.
  25 | const COMPARE_COVERAGE: bool = false;
  26 | 
  27 | /// If `true` the call stack will be maintained for the emulated code
  28 | const USE_CALL_STACK: bool = true;
  29 | 
  30 | /// If set, all register state will be saved before the execution of every
  31 | /// instruction.
  32 | /// This is INCREDIBLY slow and should only be used for debugging
  33 | const ENABLE_TRACING: bool = false;
  34 | 
  35 | /// Depth of the call stack for the program under test
  36 | const MAX_CALL_STACK: usize = 16 * 1024;
  37 | 
  38 | /// Indicates that a coverage entry is empty
  39 | pub const COVERAGE_ENTRY_EMPTY: u64 = 0xe66dd519dba260bb;
  40 | 
  41 | /// Indicates that a coverage entry is currently being populated
  42 | pub const COVERAGE_ENTRY_PENDING: u64 = 0xe66dd519dba260bc;
  43 | 
  44 | /// Make sure this stays in sync with the C++ JIT version of this structure
  45 | #[repr(C)]
  46 | #[derive(Clone, Copy, Debug, PartialEq, Eq)]
  47 | pub enum ExitReason {
  48 |     None,
  49 |     IndirectBranch,
  50 |     ReadFault,
  51 |     WriteFault,
  52 |     Ecall,
  53 |     Ebreak,
  54 |     Timeout,
  55 |     Breakpoint,
  56 |     InvalidOpcode,
  57 |     Coverage,
  58 |     CmpCoverage,
  59 |     CallStackFull,
  60 | }
  61 | 
  62 | /// Different types of coverage
  63 | #[derive(Clone, Copy, Debug, PartialEq, Eq)]
  64 | pub enum CoverageType {
  65 |     /// Coverage from new code being hit
  66 |     Code,
  67 | 
  68 |     /// Coverage from unique compares
  69 |     Compare,
  70 | }
  71 | 
  72 | /// Make sure this stays in sync with the C++ JIT version of this structure
  73 | #[repr(C)]
  74 | #[derive(Clone, Copy)]
  75 | struct GuestState {
  76 |     exit_reason:   ExitReason,
  77 |     reenter_pc:    u64,
  78 |     cov_from:      u64,
  79 |     cov_to:        u64,
  80 |     regs:          [u64; 33],
  81 |     memory:        usize,
  82 |     permissions:   usize,
  83 |     dirty:         usize,
  84 |     dirty_idx:     usize,
  85 |     dirty_bitmap:  usize,
  86 |     trace_buffer:  usize,
  87 |     trace_idx:     usize,
  88 |     trace_len:     usize,
  89 |     cov_table:     usize,
  90 |     instrs_execed: u64,
  91 |     timeout:       u64,
  92 | 
  93 |     call_stack:      [u64; MAX_CALL_STACK],
  94 |     call_stack_ents: usize,
  95 |     call_stack_hash: u64,
  96 | 
  97 |     path_hash: u64,
  98 | 
  99 |     blocks: usize,
 100 |     blocks_len: usize,
 101 | 
 102 |     revision: u64,
 103 | }
 104 | 
 105 | impl Default for GuestState {
 106 |     fn default() -> Self {
 107 |         GuestState {
 108 |             exit_reason:   ExitReason::None,
 109 |             reenter_pc:    0,
 110 |             cov_from:      0,
 111 |             cov_to:        0,
 112 |             regs:          [0; 33],
 113 |             memory:        0,
 114 |             permissions:   0,
 115 |             dirty:         0,
 116 |             dirty_idx:     0,
 117 |             dirty_bitmap:  0,
 118 |             trace_buffer:  0,
 119 |             trace_idx:     0,
 120 |             trace_len:     0,
 121 |             cov_table:     0,
 122 |             instrs_execed: 0,
 123 |             timeout:       10_000_000,
 124 | 
 125 |             call_stack:      [0; MAX_CALL_STACK],
 126 |             call_stack_ents: 0,
 127 |             call_stack_hash: 0,
 128 | 
 129 |             path_hash: 0,
 130 | 
 131 |             blocks: 0,
 132 |             blocks_len: 0,
 133 |             revision: 0,
 134 |         }
 135 |     }
 136 | }
 137 | 
 138 | /// An R-type instruction
 139 | #[derive(Debug)]
 140 | struct Rtype {
 141 |     funct7: u32,
 142 |     rs2:    Register,
 143 |     rs1:    Register,
 144 |     funct3: u32,
 145 |     rd:     Register,
 146 | }
 147 | 
 148 | impl From<u32> for Rtype {
 149 |     fn from(inst: u32) -> Self {
 150 |         Rtype {
 151 |             funct7: (inst >> 25) & 0b1111111,
 152 |             rs2:    Register::from((inst >> 20) & 0b11111),
 153 |             rs1:    Register::from((inst >> 15) & 0b11111),
 154 |             funct3: (inst >> 12) & 0b111,
 155 |             rd:     Register::from((inst >>  7) & 0b11111),
 156 |         }
 157 |     }
 158 | }
 159 | 
 160 | /// An S-type instruction
 161 | #[derive(Debug)]
 162 | struct Stype {
 163 |     imm:    i32,
 164 |     rs2:    Register,
 165 |     rs1:    Register,
 166 |     funct3: u32,
 167 | }
 168 | 
 169 | impl From<u32> for Stype {
 170 |     fn from(inst: u32) -> Self {
 171 |         let imm115 = (inst >> 25) & 0b1111111;
 172 |         let imm40  = (inst >>  7) & 0b11111;
 173 | 
 174 |         let imm = (imm115 << 5) | imm40;
 175 |         let imm = ((imm as i32) << 20) >> 20;
 176 | 
 177 |         Stype {
 178 |             imm:    imm,
 179 |             rs2:    Register::from((inst >> 20) & 0b11111),
 180 |             rs1:    Register::from((inst >> 15) & 0b11111),
 181 |             funct3: (inst >> 12) & 0b111,
 182 |         }
 183 |     }
 184 | }
 185 | 
 186 | /// A J-type instruction
 187 | #[derive(Debug)]
 188 | struct Jtype {
 189 |     imm: i32,
 190 |     rd:  Register,
 191 | }
 192 | 
 193 | impl From<u32> for Jtype {
 194 |     fn from(inst: u32) -> Self {
 195 |         let imm20   = (inst >> 31) & 1;
 196 |         let imm101  = (inst >> 21) & 0b1111111111;
 197 |         let imm11   = (inst >> 20) & 1;
 198 |         let imm1912 = (inst >> 12) & 0b11111111;
 199 | 
 200 |         let imm = (imm20 << 20) | (imm1912 << 12) | (imm11 << 11) |
 201 |             (imm101 << 1);
 202 |         let imm = ((imm as i32) << 11) >> 11;
 203 | 
 204 |         Jtype {
 205 |             imm: imm,
 206 |             rd:  Register::from((inst >> 7) & 0b11111),
 207 |         }
 208 |     }
 209 | }
 210 | 
 211 | /// A B-type instruction
 212 | #[derive(Debug)]
 213 | struct Btype {
 214 |     imm:    i32,
 215 |     rs2:    Register,
 216 |     rs1:    Register,
 217 |     funct3: u32,
 218 | }
 219 | 
 220 | impl From<u32> for Btype {
 221 |     fn from(inst: u32) -> Self {
 222 |         let imm12  = (inst >> 31) & 1;
 223 |         let imm105 = (inst >> 25) & 0b111111;
 224 |         let imm41  = (inst >>  8) & 0b1111;
 225 |         let imm11  = (inst >>  7) & 1;
 226 | 
 227 |         let imm = (imm12 << 12) | (imm11 << 11) |(imm105 << 5) | (imm41 << 1);
 228 |         let imm = ((imm as i32) << 19) >> 19;
 229 | 
 230 |         Btype {
 231 |             imm:    imm,
 232 |             rs2:    Register::from((inst >> 20) & 0b11111),
 233 |             rs1:    Register::from((inst >> 15) & 0b11111),
 234 |             funct3: (inst >> 12) & 0b111,
 235 |         }
 236 |     }
 237 | }
 238 | 
 239 | /// An I-type instruction
 240 | #[derive(Debug)]
 241 | struct Itype {
 242 |     imm:    i32,
 243 |     rs1:    Register,
 244 |     funct3: u32,
 245 |     rd:     Register,
 246 | }
 247 | 
 248 | impl From<u32> for Itype {
 249 |     fn from(inst: u32) -> Self {
 250 |         let imm = (inst as i32) >> 20;
 251 |         Itype {
 252 |             imm:    imm,
 253 |             rs1:    Register::from((inst >> 15) & 0b11111),
 254 |             funct3: (inst >> 12) & 0b111,
 255 |             rd:     Register::from((inst >>  7) & 0b11111),
 256 |         }
 257 |     }
 258 | }
 259 | 
 260 | #[derive(Debug)]
 261 | struct Utype {
 262 |     imm: i32,
 263 |     rd:  Register,
 264 | }
 265 | 
 266 | impl From<u32> for Utype {
 267 |     fn from(inst: u32) -> Self {
 268 |         Utype {
 269 |             imm: (inst & !0xfff) as i32,
 270 |             rd:  Register::from((inst >> 7) & 0b11111),
 271 |         }
 272 |     }
 273 | }
 274 | 
 275 | /// An open file
 276 | #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 277 | pub enum EmuFile {
 278 |     Stdin,
 279 |     Stdout,
 280 |     Stderr,
 281 | 
 282 |     // A file which is backed by the current fuzz input
 283 |     FuzzInput { cursor: usize },
 284 | }
 285 | 
 286 | /// A list of all open files
 287 | #[derive(Clone, Debug, PartialEq, Eq)]
 288 | pub struct Files(Vec<Option<EmuFile>>);
 289 | 
 290 | impl Files {
 291 |     /// Get access to a file descriptor for `fd`
 292 |     pub fn get_file(&mut self, fd: usize) -> Option<&mut Option<EmuFile>> {
 293 |         self.0.get_mut(fd)
 294 |     }
 295 | }
 296 | 
 297 | /// Callback for breakpoints
 298 | type BreakpointCallback = fn(&mut Emulator) -> Result<(), VmExit>;
 299 | 
 300 | /// All the state of the emulated system
 301 | pub struct Emulator {
 302 |     /// Memory for the emulator
 303 |     pub memory: Mmu,
 304 | 
 305 |     /// All RV64i registers
 306 |     state: GuestState,
 307 | 
 308 |     /// Fuzz input for the program
 309 |     pub fuzz_input: Vec<u8>,
 310 | 
 311 |     /// Number of resets on this emulator, not copied on a fork
 312 |     resets: u64,
 313 | 
 314 |     /// File handle table (indexed by file descriptor)
 315 |     pub files: Files,
 316 | 
 317 |     /// Mapping of symbol names to virtual addresses
 318 |     symbols: BTreeMap<String, VirtAddr>,
 319 | 
 320 |     /// Mapping of virtual addresses to their symbols
 321 |     vaddr_to_symbol: BTreeMap<VirtAddr, String>,
 322 | 
 323 |     /// Breakpoint callbacks
 324 |     breakpoints: BTreeMap<VirtAddr, BreakpointCallback>,
 325 | 
 326 |     /// JIT cache, if we are using a JIT
 327 |     jit_cache: Option<Arc<JitCache>>,
 328 | 
 329 |     /// Trace of register states prior to every instruction execution
 330 |     /// Only allocated if `ENABLE_TRACING` is `true`
 331 |     trace: Vec<[u64; 33]>,
 332 | 
 333 |     /// Tracks if the current fuzz case has generated new unique coverage.
 334 |     /// If `Some`, contains the instruction count of the most recent coverage
 335 |     /// increase.
 336 |     /// This allows us to defer reporting the input until the case is complete,
 337 |     /// and thus we can latch the timeout which was used to hit the coverage.
 338 |     new_coverage: Option<u64>,
 339 | }
 340 | 
 341 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 342 | /// Reasons why the VM exited
 343 | pub enum VmExit {
 344 |     /// The VM exited due to a syscall instruction
 345 |     Syscall,
 346 | 
 347 |     /// The VM exited cleanly as requested by the VM
 348 |     Exit,
 349 | 
 350 |     /// A RISC-V software breakpoint instruction was hit
 351 |     Ebreak,
 352 | 
 353 |     /// The instruction count limit was hit and a timeout has occurred
 354 |     Timeout,
 355 | 
 356 |     /// An invalid opcode was lifted
 357 |     InvalidOpcode,
 358 | 
 359 |     /// A free of an invalid region was performed
 360 |     InvalidFree(VirtAddr),
 361 | 
 362 |     /// An integer overflow occured during a syscall due to bad supplied
 363 |     /// arguments by the program
 364 |     SyscallIntegerOverflow,
 365 | 
 366 |     /// A read or write memory request overflowed the address size
 367 |     AddressIntegerOverflow,
 368 | 
 369 |     /// The address requested was not in bounds of the guest memory space
 370 |     AddressMiss(VirtAddr, usize),
 371 | 
 372 |     /// An read of `VirtAddr` failed due to missing permissions
 373 |     ReadFault(VirtAddr),
 374 | 
 375 |     /// An execution of a `VirtAddr` failed
 376 |     ExecFault(VirtAddr),
 377 | 
 378 |     /// A read of memory which is uninitialized, but otherwise readable failed
 379 |     /// at `VirtAddr`
 380 |     UninitFault(VirtAddr),
 381 |     
 382 |     /// An write of `VirtAddr` failed due to missing permissions
 383 |     WriteFault(VirtAddr),
 384 | 
 385 |     /// Used by breakpoints to indicate to take a snapshot
 386 |     Snapshot,
 387 | 
 388 |     /// The call stack was exhausted, likely infinite recursion or an uncommon
 389 |     /// call/ret instruction sequence leading to a broken call stack
 390 |     CallStackFull,
 391 | 
 392 |     /// The guest ran out of virtual memory and could not continue
 393 |     OutOfMemory,
 394 | }
 395 | 
 396 | /// Different types of faults
 397 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 398 | pub enum FaultType {
 399 |     /// Access occurred outside of program memory
 400 |     Bounds,
 401 | 
 402 |     /// Invalid free (eg, double free or corrupt free address)
 403 |     Free,
 404 | 
 405 |     /// An invalid opcode was executed (or lifted)
 406 |     InvalidOpcode,
 407 | 
 408 |     /// A breakpoint occurred in the target binary
 409 |     SoftwareBreakpoint,
 410 | 
 411 |     /// The call stack was exhausted, likely infinite recursion or an uncommon
 412 |     /// call/ret instruction sequence leading to a broken call stack
 413 |     CallStackFull,
 414 | 
 415 |     Exec,
 416 |     Read,
 417 |     Write,
 418 |     Uninit,
 419 | }
 420 | 
 421 | /// Different buckets for addresses
 422 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 423 | pub enum AddressType {
 424 |     /// Address was between [0, 32 KiB)
 425 |     Null,
 426 | 
 427 |     /// Address was between [-32 KiB, 0)
 428 |     Negative,
 429 | 
 430 |     /// Address was normal
 431 |     Normal,
 432 | }
 433 | 
 434 | impl From<VirtAddr> for AddressType {
 435 |     fn from(val: VirtAddr) -> Self {
 436 |         match val.0 as i64 {
 437 |             (0..=32767)   => AddressType::Null,
 438 |             (-32768..=-1) => AddressType::Negative,
 439 |             _ => AddressType::Normal,
 440 |         }
 441 |     }
 442 | }
 443 | 
 444 | impl VmExit {
 445 |     /// If this is a crash it returns the faulting address and the fault type
 446 |     #[inline]
 447 |     pub fn is_crash(&self) -> Option<(FaultType, VirtAddr)> {
 448 |         match *self {
 449 |             VmExit::AddressMiss(addr, _) => Some((FaultType::Bounds, addr)),
 450 |             VmExit::ReadFault(addr)      => Some((FaultType::Read,   addr)),
 451 |             VmExit::ExecFault(addr)      => Some((FaultType::Exec,   addr)),
 452 |             VmExit::UninitFault(addr)    => Some((FaultType::Uninit, addr)),
 453 |             VmExit::WriteFault(addr)     => Some((FaultType::Write,  addr)),
 454 |             VmExit::InvalidFree(addr)    => Some((FaultType::Free,   addr)),
 455 |             VmExit::InvalidOpcode =>
 456 |                 Some((FaultType::InvalidOpcode, VirtAddr(0))),
 457 |             VmExit::Ebreak =>
 458 |                 Some((FaultType::SoftwareBreakpoint, VirtAddr(0))),
 459 |             VmExit::CallStackFull =>
 460 |                 Some((FaultType::CallStackFull, VirtAddr(0))),
 461 |             _ => None,
 462 |         }
 463 |     }
 464 | }
 465 | 
 466 | impl fmt::Display for Emulator {
 467 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 468 |         write!(f,
 469 | r#"zero {:016x} ra {:016x} sp  {:016x} gp  {:016x}
 470 | tp   {:016x} t0 {:016x} t1  {:016x} t2  {:016x}
 471 | s0   {:016x} s1 {:016x} a0  {:016x} a1  {:016x}
 472 | a2   {:016x} a3 {:016x} a4  {:016x} a5  {:016x}
 473 | a6   {:016x} a7 {:016x} s2  {:016x} s3  {:016x}
 474 | s4   {:016x} s5 {:016x} s6  {:016x} s7  {:016x}
 475 | s8   {:016x} s9 {:016x} s10 {:016x} s11 {:016x}
 476 | t3   {:016x} t4 {:016x} t5  {:016x} t6  {:016x}
 477 | pc   {:016x}"#,
 478 |         self.reg(Register::Zero),
 479 |         self.reg(Register::Ra),
 480 |         self.reg(Register::Sp),
 481 |         self.reg(Register::Gp),
 482 |         self.reg(Register::Tp),
 483 |         self.reg(Register::T0),
 484 |         self.reg(Register::T1),
 485 |         self.reg(Register::T2),
 486 |         self.reg(Register::S0),
 487 |         self.reg(Register::S1),
 488 |         self.reg(Register::A0),
 489 |         self.reg(Register::A1),
 490 |         self.reg(Register::A2),
 491 |         self.reg(Register::A3),
 492 |         self.reg(Register::A4),
 493 |         self.reg(Register::A5),
 494 |         self.reg(Register::A6),
 495 |         self.reg(Register::A7),
 496 |         self.reg(Register::S2),
 497 |         self.reg(Register::S3),
 498 |         self.reg(Register::S4),
 499 |         self.reg(Register::S5),
 500 |         self.reg(Register::S6),
 501 |         self.reg(Register::S7),
 502 |         self.reg(Register::S8),
 503 |         self.reg(Register::S9),
 504 |         self.reg(Register::S10),
 505 |         self.reg(Register::S11),
 506 |         self.reg(Register::T3),
 507 |         self.reg(Register::T4),
 508 |         self.reg(Register::T5),
 509 |         self.reg(Register::T6),
 510 |         self.reg(Register::Pc))
 511 |     }
 512 | }
 513 | 
 514 | /// 64-bit RISC-V registers
 515 | #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 516 | #[repr(usize)]
 517 | pub enum Register {
 518 |     Zero = 0,
 519 |     Ra,
 520 |     Sp,
 521 |     Gp,
 522 |     Tp,
 523 |     T0,
 524 |     T1,
 525 |     T2,
 526 |     S0,
 527 |     S1,
 528 |     A0,
 529 |     A1,
 530 |     A2,
 531 |     A3,
 532 |     A4,
 533 |     A5,
 534 |     A6,
 535 |     A7,
 536 |     S2,
 537 |     S3,
 538 |     S4,
 539 |     S5,
 540 |     S6,
 541 |     S7,
 542 |     S8,
 543 |     S9,
 544 |     S10,
 545 |     S11,
 546 |     T3,
 547 |     T4,
 548 |     T5,
 549 |     T6,
 550 |     Pc,
 551 | }
 552 | 
 553 | impl From<u32> for Register {
 554 |     fn from(val: u32) -> Self {
 555 |         assert!(val < 33);
 556 |         unsafe {
 557 |             core::ptr::read_unaligned(&(val as usize) as
 558 |                                       *const usize as *const Register)
 559 |         }
 560 |     }
 561 | }
 562 | 
 563 | impl Emulator {
 564 |     /// Creates a new emulator with `size` bytes of memory
 565 |     pub fn new(size: usize) -> Self {
 566 |         assert!(size >= 8, "Must have at least 8 bytes of memory");
 567 | 
 568 |         Emulator {
 569 |             memory:          Mmu::new(size),
 570 |             state:           GuestState::default(),
 571 |             fuzz_input:      Vec::new(),
 572 |             jit_cache:       None,
 573 |             breakpoints:     BTreeMap::new(),
 574 |             symbols:         BTreeMap::new(),
 575 |             vaddr_to_symbol: BTreeMap::new(),
 576 |             resets:          0,
 577 |             new_coverage:    None,
 578 |             trace: Vec::with_capacity(
 579 |                 if ENABLE_TRACING { 10_000_000 } else { 0 }),
 580 |             files: Files(vec![
 581 |                 Some(EmuFile::Stdin),
 582 |                 Some(EmuFile::Stdout),
 583 |                 Some(EmuFile::Stderr),
 584 |             ]),
 585 |         }
 586 |     }
 587 |     
 588 |     /// Get the current timeout for the fuzz case. This may change during the
 589 |     /// fuzz case if we keep exploring new coverage, we may increase the
 590 |     /// timeout.
 591 |     pub fn timeout(&self) -> u64 {
 592 |         self.state.timeout
 593 |     }
 594 | 
 595 |     /// Set the timeout for the fuzz case in number of instructions, this will
 596 |     /// be reset to the default value upon a `reset()`
 597 |     pub fn set_timeout(&mut self, timeout: u64) {
 598 |         self.state.timeout = timeout;
 599 |     }
 600 | 
 601 |     /// Add a symbol to the symbol database
 602 |     pub fn add_symbol<S: AsRef<str>>(&mut self, name: S, vaddr: VirtAddr) {
 603 |         self.symbols.insert(name.as_ref().to_string(), vaddr);
 604 |         self.vaddr_to_symbol.insert(vaddr, name.as_ref().to_string());
 605 |     }
 606 | 
 607 |     /// Resolve a symbol name into a virtual address
 608 |     pub fn resolve_symbol<S: AsRef<str>>(&self, symbol: S)
 609 |             -> Option<VirtAddr> {
 610 |         self.symbols.get(symbol.as_ref()).copied()
 611 |     }
 612 | 
 613 |     /// Resolve a virtual address into a symbol + offset string, omitting the
 614 |     /// symbol if no symbol could be resolved
 615 |     pub fn get_symbol(&self, vaddr: VirtAddr) -> String {
 616 |         match self.get_symbol_offset(vaddr) {
 617 |             (Some(symbol), offset) => {
 618 |                 format!("{}+{:#x}", symbol, offset)
 619 |             }
 620 |             (None, offset) => {
 621 |                 format!("{:#x}", offset)
 622 |             }
 623 |         }
 624 |     }
 625 | 
 626 |     /// Resolve a virtual address into a symbol + offset
 627 |     pub fn get_symbol_offset(&self, vaddr: VirtAddr) -> (Option<&str>, usize) {
 628 |         if let Some((base, name)) = self.vaddr_to_symbol.range(..=vaddr)
 629 |                 .next_back() {
 630 |             (Some(name), vaddr.0 - base.0)
 631 |         } else {
 632 |             (None, vaddr.0)
 633 |         }
 634 |     }
 635 | 
 636 |     /// Fork an emulator into a new emulator which will diff from the original
 637 |     pub fn fork(&self) -> Self {
 638 |         let mut state = GuestState::default();
 639 |         state.regs = self.state.regs;
 640 | 
 641 |         Emulator {
 642 |             memory:          self.memory.fork(),
 643 |             state:           state,
 644 |             fuzz_input:      self.fuzz_input.clone(),
 645 |             files:           self.files.clone(),
 646 |             jit_cache:       self.jit_cache.clone(),
 647 |             breakpoints:     self.breakpoints.clone(),
 648 |             symbols:         self.symbols.clone(),
 649 |             vaddr_to_symbol: self.vaddr_to_symbol.clone(),
 650 |             resets:          0,
 651 |             new_coverage:    None,
 652 |             trace: Vec::with_capacity(
 653 |                 if ENABLE_TRACING { 10_000_000 } else { 0 }),
 654 |         }
 655 |     }
 656 | 
 657 |     /// Enable the JIT and use a specified `JitCache`
 658 |     pub fn enable_jit(mut self, jit_cache: Arc<JitCache>) -> Self {
 659 |         self.jit_cache = Some(jit_cache);
 660 |         self
 661 |     }
 662 |     
 663 |     /// Register a new breakpoint callback
 664 |     pub fn add_breakpoint(&mut self, pc: VirtAddr,
 665 |                           callback: BreakpointCallback) {
 666 |         self.breakpoints.insert(pc, callback);
 667 |     }
 668 | 
 669 |     /// Removes a breakpoint, returns `true` if a previous breakpoint was
 670 |     /// removed
 671 |     pub fn remove_breakpoint(&mut self, pc: VirtAddr) -> bool {
 672 |         self.breakpoints.remove(&pc).is_some()
 673 |     }
 674 | 
 675 |     /// Reset the state of `self` to `other`, assuming that `self` is
 676 |     /// forked off of `other`. If it is not, the results are invalid.
 677 |     pub fn reset<F>(&mut self, other: &Self, corpus: &Corpus,
 678 |                     accessed_bits: F)
 679 |             where F: FnOnce(&mut Emulator) -> Vec<bool> {
 680 |         if ENABLE_TRACING {
 681 |             let mut tracestr = String::new();
 682 |             let mut pctracestr = String::new();
 683 |             for trace in &self.trace {
 684 |                 self.state.regs = *trace;
 685 |                 let sym = self.get_symbol(VirtAddr(
 686 |                         self.reg(Register::Pc) as usize));
 687 |                 tracestr += &format!("\n{}\n{}\n", sym, self);
 688 |                 pctracestr += &format!("{:016x} {}\n",
 689 |                     self.reg(Register::Pc), sym);
 690 |             }
 691 |             if self.trace.len() > 0 {
 692 |                 std::fs::write("trace.txt", tracestr).unwrap();
 693 |                 std::fs::write("pctrace.txt", pctracestr).unwrap();
 694 |                 panic!();
 695 |             }
 696 |         
 697 |             // Reset trace state
 698 |             self.trace.clear();
 699 |         }
 700 | 
 701 |         // Check if the input for this fuzz case should be saved
 702 |         if let Some(instrs) = self.new_coverage {
 703 |             // Save the input and log it in the hash table
 704 |             let hash = corpus.hasher.hash(&self.fuzz_input);
 705 |             corpus.input_hashes.entry_or_insert(
 706 |                     &hash, hash as usize, || {
 707 |                 corpus.inputs.push(
 708 |                     Box::new(Input::new(instrs, self.fuzz_input.clone(),
 709 |                              accessed_bits(self))));
 710 |                 Box::new(())
 711 |             });
 712 | 
 713 |             // Reset that the case found new coverage
 714 |             self.new_coverage = None;
 715 |         }
 716 | 
 717 |         // Restore original timeout
 718 |         self.state.timeout = other.state.timeout;
 719 | 
 720 |         // Reset memory state
 721 |         self.memory.reset(&other.memory);
 722 | 
 723 |         // Reset register state
 724 |         self.state.regs = other.state.regs;
 725 | 
 726 |         // Reset call stack
 727 |         let cse = other.state.call_stack_ents as usize;
 728 |         self.state.call_stack[..cse]
 729 |             .copy_from_slice(&other.state.call_stack[..cse]);
 730 |         self.state.call_stack_ents = other.state.call_stack_ents;
 731 |         self.state.call_stack_hash = other.state.call_stack_hash;
 732 | 
 733 |         // Reset path hash
 734 |         self.state.path_hash = other.state.path_hash;
 735 | 
 736 |         // Reset file state
 737 |         self.files.0.clear();
 738 |         self.files.0.extend_from_slice(&other.files.0);
 739 | 
 740 |         // Update some stats
 741 |         self.resets += 1;
 742 | 
 743 |         // Update revision
 744 |         self.state.revision += 1;
 745 |     }
 746 | 
 747 |     /// Allocate a new file descriptor
 748 |     pub fn alloc_file(&mut self) -> usize {
 749 |         for (fd, file) in self.files.0.iter().enumerate() {
 750 |             if file.is_none() {
 751 |                 // File not present, we can reuse the FD
 752 |                 return fd;
 753 |             }
 754 |         }
 755 |         
 756 |         // If we got here, no FD is present, create a new one
 757 |         let fd = self.files.0.len();
 758 |         self.files.0.push(None);
 759 |         fd
 760 |     }
 761 | 
 762 |     /// Get a register from the guest
 763 |     pub fn reg(&self, register: Register) -> u64 {
 764 |         if register != Register::Zero {
 765 |             self.state.regs[register as usize]
 766 |         } else {
 767 |             0
 768 |         }
 769 |     }
 770 |     
 771 |     /// Set a register in the guest
 772 |     pub fn set_reg(&mut self, register: Register, val: u64) {
 773 |         if register != Register::Zero {
 774 |             self.state.regs[register as usize] = val;
 775 |         }
 776 |     }
 777 | 
 778 |     /// Run the VM using either the emulator or the JIT
 779 |     pub fn run(&mut self, instrs_execed: &mut u64,
 780 |                vm_cycles: &mut u64, corpus: &Corpus)
 781 |             -> Result<(), VmExit> {
 782 |         if self.jit_cache.is_some() {
 783 |             self.run_jit(instrs_execed, vm_cycles, corpus)
 784 |         } else {
 785 |             let it = rdtsc();
 786 |             let ret = self.run_emu(instrs_execed, corpus);
 787 |             *vm_cycles += rdtsc() - it;
 788 |             ret
 789 |         }
 790 |     }
 791 | 
 792 |     /// Used interally by the emulator and JIT to notify us when new code
 793 |     /// coverage is hit
 794 |     fn notify_code_coverage(&mut self, corpus: &Corpus, from: u64, to: u64) {
 795 |         // Update code coverage
 796 |         let key = (
 797 |             VirtAddr(from as usize),
 798 |             VirtAddr(to   as usize),
 799 |         );
 800 |         corpus.code_coverage.entry_or_insert(
 801 |             &key, to as usize, || {
 802 |                 {
 803 |                     let new_cov = format!("{:10} {:10} {:#x} {} -> {:#x} {}",
 804 |                         self.resets + 1,
 805 |                         corpus.code_coverage.len() + 1,
 806 |                         from, self.get_symbol(VirtAddr(from as usize)),
 807 |                         to, self.get_symbol(VirtAddr(to as usize)));
 808 |                     let mut cl =
 809 |                         corpus.coverage_log.lock().unwrap();
 810 |                     write!(cl, "{}\n", new_cov).unwrap();
 811 |                 }
 812 |                 
 813 |                 {
 814 |                     let mut cl =
 815 |                         corpus.lighthouse_log.lock().unwrap();
 816 |                     write!(cl, "{:#x}\n", from).unwrap();
 817 |                     write!(cl, "{:#x}\n", to).unwrap();
 818 |                 }
 819 | 
 820 |                 // Indicate that this case caused new coverage
 821 |                 self.new_coverage = Some(self.state.instrs_execed);
 822 | 
 823 |                 // Increase timeout temporarly for this fuzz case
 824 |                 // to explore more around the new code
 825 |                 self.state.timeout += 1_000_000;
 826 | 
 827 |                 Box::new(())
 828 |             });
 829 |     }
 830 | 
 831 |     /// Register that new compare coverage has occurred
 832 |     fn notify_compare_coverage(&mut self, _corpus: &Corpus) {
 833 |         let key = (
 834 |             CoverageType::Compare,
 835 |             self.state.cov_from,
 836 |             self.state.cov_to,
 837 |             (self.state.call_stack_hash & 0xf) ^
 838 |                 (self.state.path_hash & 0xf),
 839 |         );
 840 | 
 841 |         // Update code coverage
 842 |         _corpus.coverage.entry_or_insert(
 843 |             &key, self.state.cov_from as usize, || {
 844 |                 // Indicate that this case caused new coverage
 845 |                 self.new_coverage = Some(self.state.instrs_execed);
 846 | 
 847 |                 // Increase timeout temporarly for this fuzz case
 848 |                 // to explore more around the new code
 849 |                 self.state.timeout += 1_000_000;
 850 | 
 851 |                 Box::new(())
 852 |             });
 853 |     }
 854 | 
 855 |     /// Run the VM using the emulator
 856 |     pub fn run_emu(&mut self, instrs_execed: &mut u64, corpus: &Corpus)
 857 |             -> Result<(), VmExit> {
 858 |         'next_inst: loop {
 859 |             // Get the current program counter
 860 |             let pc = self.reg(Register::Pc);
 861 |             
 862 |             // Check alignment
 863 |             if pc & 3 != 0 {
 864 |                 // Code was unaligned, return a code fetch fault
 865 |                 return Err(VmExit::ExecFault(VirtAddr(pc as usize)));
 866 |             }
 867 | 
 868 |             // Read the instruction
 869 |             let inst: u32 = self.memory.read_perms(VirtAddr(pc as usize), 
 870 |                                                    Perm(PERM_EXEC))
 871 |                 .map_err(|x| VmExit::ExecFault(x.is_crash().unwrap().1))?;
 872 | 
 873 |             macro_rules! coverage_event {
 874 |                 ($cov_source:expr, $from:expr, $to:expr) => {
 875 |                     // Check for timeout
 876 |                     if *instrs_execed > self.state.timeout {
 877 |                         return Err(VmExit::Timeout);
 878 |                     }
 879 | 
 880 |                     // Update the path hash
 881 |                     self.state.path_hash =
 882 |                         self.state.path_hash.rotate_left(7) ^ $to;
 883 |     
 884 |                     const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
 885 |                     const PRIME64_3: u64 = 0x165667B19E3779F9;
 886 | 
 887 |                     // Get access to the coverage table
 888 |                     let ct = &corpus.coverage_table;
 889 |                     
 890 |                     // Compute the hash
 891 |                     let mut hash: u64 = $from;
 892 |                     hash ^= hash >> 33;
 893 |                     hash = hash.wrapping_mul(PRIME64_2);
 894 |                     hash = hash.wrapping_add($to);
 895 |                     hash ^= hash >> 29;
 896 |                     hash = hash.wrapping_mul(PRIME64_3);
 897 |                     hash ^= hash >> 32;
 898 |                     
 899 |                     // Convert the hash to a `usize`
 900 |                     let mut hash = hash as usize;
 901 | 
 902 |                     loop {
 903 |                         // Bounds the hash to the table
 904 |                         hash %= ct.len();
 905 | 
 906 |                         if ct[hash].0.compare_and_swap(COVERAGE_ENTRY_EMPTY,
 907 |                                 COVERAGE_ENTRY_PENDING, Ordering::SeqCst) ==
 908 |                                 COVERAGE_ENTRY_EMPTY {
 909 |                             // We own the entry, fill it in
 910 |                             ct[hash].1.store($to,   Ordering::SeqCst);
 911 |                             ct[hash].0.store($from, Ordering::SeqCst);
 912 |                             self.notify_code_coverage(corpus, $from, $to);
 913 |                         } else {
 914 |                             // We lost the race
 915 | 
 916 |                             // Wait for the entry to be filled in
 917 |                             while ct[hash].0.load(Ordering::SeqCst) ==
 918 |                                 COVERAGE_ENTRY_PENDING {}
 919 | 
 920 |                             if ct[hash].0.load(Ordering::Relaxed) == $from &&
 921 |                                     ct[hash].1.load(Ordering::Relaxed) == $to {
 922 |                                 // Coverage already recorded
 923 |                                 break;
 924 |                             }
 925 | 
 926 |                             // Go to the next
 927 |                             hash += 1;
 928 |                         }
 929 |                     }
 930 |                 }
 931 |             }
 932 |         
 933 |             macro_rules! compare_coverage {
 934 |                 ($a:expr, $b:expr) => {
 935 |                     if COMPARE_COVERAGE {
 936 |                         // Create a bitmap indicating which bytes in rs1 and
 937 |                         // rs2 match
 938 |                         let tmp = $a ^ (!$b);
 939 |                         let tmp = (tmp >> 1) & tmp;
 940 |                         let tmp = (tmp >> 2) & tmp;
 941 |                         let tmp = (tmp >> 4) & tmp;
 942 |                         let tmp = tmp & 0x0101010101010101;
 943 |                         let hash =
 944 |                             pc ^ (self.state.call_stack_hash & 0xf) ^
 945 |                             self.state.path_hash & 0xf;
 946 | 
 947 |                         // Register the coverage as compare coverage for this
 948 |                         // PC with the bitmask we identified
 949 |                         coverage_event!("CmpCoverage", hash, tmp);
 950 |                     }
 951 |                 }
 952 |             }
 953 |             
 954 |             // Update number of instructions executed
 955 |             *instrs_execed += 1;
 956 | 
 957 |             //print!("Executing {:#x}\n", pc);
 958 |             if ENABLE_TRACING {
 959 |                 self.trace.push(self.state.regs);
 960 |             }
 961 |            
 962 |             if let Some(callback) =
 963 |                     self.breakpoints.get(&VirtAddr(pc as usize)) {
 964 |                 // Invoke the breakpoint callback
 965 |                 callback(self)?;
 966 | 
 967 |                 if self.reg(Register::Pc) != pc {
 968 |                     // Callback changed PC, re-start emulation loop
 969 |                     continue 'next_inst;
 970 |                 }
 971 |             }
 972 | 
 973 |             // Extract the opcode from the instruction
 974 |             let opcode = inst & 0b1111111;
 975 | 
 976 |             match opcode {
 977 |                 0b0110111 => {
 978 |                     // LUI
 979 |                     let inst = Utype::from(inst);
 980 |                     self.set_reg(inst.rd, inst.imm as i64 as u64);
 981 |                 }
 982 |                 0b0010111 => {
 983 |                     // AUIPC
 984 |                     let inst = Utype::from(inst);
 985 |                     self.set_reg(inst.rd,
 986 |                                  (inst.imm as i64 as u64).wrapping_add(pc));
 987 |                 }
 988 |                 0b1101111 => {
 989 |                     // JAL
 990 |                     let inst    = Jtype::from(inst);
 991 |                     let tgt     = pc.wrapping_add(inst.imm as i64 as u64);
 992 |                     let retaddr = pc.wrapping_add(4);
 993 | 
 994 |                     coverage_event!("Coverage", pc, tgt);
 995 | 
 996 |                     if inst.rd == Register::Ra {
 997 |                         if self.state.call_stack_ents >= MAX_CALL_STACK {
 998 |                             return Err(VmExit::CallStackFull);
 999 |                         }
1000 | 
1001 |                         // Update call stack
1002 |                         self.state.call_stack[self.state.call_stack_ents] =
1003 |                             retaddr;
1004 |                         self.state.call_stack_ents += 1;
1005 | 
1006 |                         // Update call stack hash
1007 |                         self.state.call_stack_hash =
1008 |                             self.state.call_stack_hash.rotate_left(7) ^
1009 |                             retaddr;
1010 |                     }
1011 | 
1012 |                     self.set_reg(inst.rd, retaddr);
1013 |                     self.set_reg(Register::Pc, tgt);
1014 |                     continue 'next_inst;
1015 |                 }
1016 |                 0b1100111 => {
1017 |                     // We know it's an Itype
1018 |                     let inst = Itype::from(inst);
1019 | 
1020 |                     match inst.funct3 {
1021 |                         0b000 => {
1022 |                             // JALR
1023 |                             let target = self.reg(inst.rs1).wrapping_add(
1024 |                                     inst.imm as i64 as u64);
1025 | 
1026 |                             // Try to handle returns for checking to see if
1027 |                             // we're indirectly branching to a return address
1028 |                             if self.state.call_stack_ents > 0 {
1029 |                                 let cse = self.state.call_stack_ents - 1;
1030 |                                 if target == self.state.call_stack[cse] {
1031 |                                     self.state.call_stack_hash =
1032 |                                         (self.state.call_stack_hash ^ target)
1033 |                                         .rotate_right(7);
1034 |                                     self.state.call_stack_ents -= 1;
1035 |                                 }
1036 |                             }
1037 | 
1038 |                             coverage_event!("Coverage", pc, target);
1039 | 
1040 |                             let retaddr = pc.wrapping_add(4);
1041 |                             self.set_reg(inst.rd, retaddr);
1042 |                             self.set_reg(Register::Pc, target);
1043 |                             continue 'next_inst;
1044 |                         }
1045 |                         _ => unimplemented!("Unexpected 0b1100111"),
1046 |                     }
1047 |                 }
1048 |                 0b1100011 => {
1049 |                     // We know it's an Btype
1050 |                     let inst = Btype::from(inst);
1051 | 
1052 |                     let rs1 = self.reg(inst.rs1);
1053 |                     let rs2 = self.reg(inst.rs2);
1054 |                     let tgt = pc.wrapping_add(inst.imm as i64 as u64);
1055 | 
1056 |                     // Determine if we should take a branch
1057 |                     let take_branch = match inst.funct3 {
1058 |                         0b000 => /* BEQ  */ rs1 == rs2,
1059 |                         0b001 => /* BNE  */ rs1 != rs2,
1060 |                         0b100 => /* BLT  */ (rs1 as i64) <  (rs2 as i64),
1061 |                         0b101 => /* BGE  */ (rs1 as i64) >= (rs2 as i64),
1062 |                         0b110 => /* BLTU */ (rs1 as u64) <  (rs2 as u64),
1063 |                         0b111 => /* BGEU */ (rs1 as u64) >= (rs2 as u64),
1064 |                         _ => unimplemented!("Unexpected 0b1100011"),
1065 |                     };
1066 | 
1067 |                     // Generate compare coverage
1068 |                     compare_coverage!(rs1, rs2);
1069 | 
1070 |                     // Handle the conditional branch
1071 |                     if take_branch {
1072 |                         coverage_event!("Coverage", pc, tgt);
1073 |                         self.set_reg(Register::Pc, tgt);
1074 |                         continue 'next_inst;
1075 |                     } else {
1076 |                         coverage_event!("Coverage", pc, pc.wrapping_add(4));
1077 |                     }
1078 |                 }
1079 |                 0b0000011 => {
1080 |                     // We know it's an Itype
1081 |                     let inst = Itype::from(inst);
1082 | 
1083 |                     // Compute the address
1084 |                     let addr = VirtAddr(self.reg(inst.rs1)
1085 |                         .wrapping_add(inst.imm as i64 as u64)
1086 |                         as usize);
1087 | 
1088 |                     match inst.funct3 {
1089 |                         0b000 => {
1090 |                             // LB
1091 |                             let mut tmp = [0u8; 1];
1092 |                             self.memory.read_into(addr, &mut tmp)?;
1093 |                             self.set_reg(inst.rd,
1094 |                                 i8::from_le_bytes(tmp) as i64 as u64);
1095 |                         }
1096 |                         0b001 => {
1097 |                             // LH
1098 |                             let mut tmp = [0u8; 2];
1099 |                             self.memory.read_into(addr, &mut tmp)?;
1100 |                             self.set_reg(inst.rd,
1101 |                                 i16::from_le_bytes(tmp) as i64 as u64);
1102 |                         }
1103 |                         0b010 => {
1104 |                             // LW
1105 |                             let mut tmp = [0u8; 4];
1106 |                             self.memory.read_into(addr, &mut tmp)?;
1107 |                             self.set_reg(inst.rd,
1108 |                                 i32::from_le_bytes(tmp) as i64 as u64);
1109 |                         }
1110 |                         0b011 => {
1111 |                             // LD
1112 |                             let mut tmp = [0u8; 8];
1113 |                             self.memory.read_into(addr, &mut tmp)?;
1114 |                             self.set_reg(inst.rd,
1115 |                                 i64::from_le_bytes(tmp) as i64 as u64);
1116 |                         }
1117 |                         0b100 => {
1118 |                             // LBU
1119 |                             let mut tmp = [0u8; 1];
1120 |                             self.memory.read_into(addr, &mut tmp)?;
1121 |                             self.set_reg(inst.rd,
1122 |                                 u8::from_le_bytes(tmp) as u64);
1123 |                         }
1124 |                         0b101 => {
1125 |                             // LHU
1126 |                             let mut tmp = [0u8; 2];
1127 |                             self.memory.read_into(addr, &mut tmp)?;
1128 |                             self.set_reg(inst.rd,
1129 |                                 u16::from_le_bytes(tmp) as u64);
1130 |                         }
1131 |                         0b110 => {
1132 |                             // LWU
1133 |                             let mut tmp = [0u8; 4];
1134 |                             self.memory.read_into(addr, &mut tmp)?;
1135 |                             self.set_reg(inst.rd,
1136 |                                 u32::from_le_bytes(tmp) as u64);
1137 |                         }
1138 |                         _ => unimplemented!("Unexpected 0b0000011"),
1139 |                     }
1140 |                 }
1141 |                 0b0100011 => {
1142 |                     // We know it's an Stype
1143 |                     let inst = Stype::from(inst);
1144 | 
1145 |                     // Compute the address
1146 |                     let addr = VirtAddr(self.reg(inst.rs1)
1147 |                         .wrapping_add(inst.imm as i64 as u64)
1148 |                         as usize);
1149 | 
1150 |                     match inst.funct3 {
1151 |                         0b000 => {
1152 |                             // SB
1153 |                             let val = self.reg(inst.rs2) as u8;
1154 |                             self.memory.write(addr, val)?;
1155 |                         }
1156 |                         0b001 => {
1157 |                             // SH
1158 |                             let val = self.reg(inst.rs2) as u16;
1159 |                             self.memory.write(addr, val)?;
1160 |                         }
1161 |                         0b010 => {
1162 |                             // SW
1163 |                             let val = self.reg(inst.rs2) as u32;
1164 |                             self.memory.write(addr, val)?;
1165 |                         }
1166 |                         0b011 => {
1167 |                             // SD
1168 |                             let val = self.reg(inst.rs2) as u64;
1169 |                             self.memory.write(addr, val)?;
1170 |                         }
1171 |                         _ => unimplemented!("Unexpected 0b0100011"),
1172 |                     }
1173 |                 }
1174 |                 0b0010011 => {
1175 |                     // We know it's an Itype
1176 |                     let inst = Itype::from(inst);
1177 |                     
1178 |                     let rs1 = self.reg(inst.rs1);
1179 |                     let imm = inst.imm as i64 as u64;
1180 | 
1181 |                     match inst.funct3 {
1182 |                         0b000 => {
1183 |                             // ADDI
1184 |                             self.set_reg(inst.rd, rs1.wrapping_add(imm));
1185 |                         }
1186 |                         0b010 => {
1187 |                             // SLTI
1188 |                             compare_coverage!(rs1, imm as u64);
1189 |                             if (rs1 as i64) < (imm as i64) {
1190 |                                 self.set_reg(inst.rd, 1);
1191 |                             } else {
1192 |                                 self.set_reg(inst.rd, 0);
1193 |                             }
1194 |                         }
1195 |                         0b011 => {
1196 |                             // SLTIU
1197 |                             compare_coverage!(rs1, imm as u64);
1198 |                             if (rs1 as u64) < (imm as u64) {
1199 |                                 self.set_reg(inst.rd, 1);
1200 |                             } else {
1201 |                                 self.set_reg(inst.rd, 0);
1202 |                             }
1203 |                         }
1204 |                         0b100 => {
1205 |                             // XORI
1206 |                             self.set_reg(inst.rd, rs1 ^ imm);
1207 |                         }
1208 |                         0b110 => {
1209 |                             // ORI
1210 |                             self.set_reg(inst.rd, rs1 | imm);
1211 |                         }
1212 |                         0b111 => {
1213 |                             // ANDI
1214 |                             self.set_reg(inst.rd, rs1 & imm);
1215 |                         }
1216 |                         0b001 => {
1217 |                             let mode = (inst.imm >> 6) & 0b111111;
1218 |                             
1219 |                             match mode {
1220 |                                 0b000000 => {
1221 |                                     // SLLI
1222 |                                     let shamt = inst.imm & 0b111111;
1223 |                                     self.set_reg(inst.rd, rs1 << shamt);
1224 |                                 }
1225 |                                 _ => unreachable!(),
1226 |                             }
1227 |                         }
1228 |                         0b101 => {
1229 |                             let mode = (inst.imm >> 6) & 0b111111;
1230 |                             
1231 |                             match mode {
1232 |                                 0b000000 => {
1233 |                                     // SRLI
1234 |                                     let shamt = inst.imm & 0b111111;
1235 |                                     self.set_reg(inst.rd, rs1 >> shamt);
1236 |                                 }
1237 |                                 0b010000 => {
1238 |                                     // SRAI
1239 |                                     let shamt = inst.imm & 0b111111;
1240 |                                     self.set_reg(inst.rd,
1241 |                                         ((rs1 as i64) >> shamt) as u64);
1242 |                                 }
1243 |                                 _ => unreachable!(),
1244 |                             }
1245 |                         }
1246 |                         _ => unreachable!(),
1247 |                     }
1248 |                 }
1249 |                 0b0110011 => {
1250 |                     // We know it's an Rtype
1251 |                     let inst = Rtype::from(inst);
1252 | 
1253 |                     let rs1 = self.reg(inst.rs1);
1254 |                     let rs2 = self.reg(inst.rs2);
1255 | 
1256 |                     match (inst.funct7, inst.funct3) {
1257 |                         (0b0000000, 0b000) => {
1258 |                             // ADD
1259 |                             self.set_reg(inst.rd, rs1.wrapping_add(rs2));
1260 |                         }
1261 |                         (0b0100000, 0b000) => {
1262 |                             // SUB
1263 |                             self.set_reg(inst.rd, rs1.wrapping_sub(rs2));
1264 |                         }
1265 |                         (0b0000000, 0b001) => {
1266 |                             // SLL
1267 |                             let shamt = rs2 & 0b111111;
1268 |                             self.set_reg(inst.rd, rs1 << shamt);
1269 |                         }
1270 |                         (0b0000000, 0b010) => {
1271 |                             // SLT
1272 |                             compare_coverage!(rs1, rs2);
1273 |                             if (rs1 as i64) < (rs2 as i64) {
1274 |                                 self.set_reg(inst.rd, 1);
1275 |                             } else {
1276 |                                 self.set_reg(inst.rd, 0);
1277 |                             }
1278 |                         }
1279 |                         (0b0000000, 0b011) => {
1280 |                             // SLTU
1281 |                             compare_coverage!(rs1, rs2);
1282 |                             if (rs1 as u64) < (rs2 as u64) {
1283 |                                 self.set_reg(inst.rd, 1);
1284 |                             } else {
1285 |                                 self.set_reg(inst.rd, 0);
1286 |                             }
1287 |                         }
1288 |                         (0b0000000, 0b100) => {
1289 |                             // XOR
1290 |                             self.set_reg(inst.rd, rs1 ^ rs2);
1291 |                         }
1292 |                         (0b0000000, 0b101) => {
1293 |                             // SRL
1294 |                             let shamt = rs2 & 0b111111;
1295 |                             self.set_reg(inst.rd, rs1 >> shamt);
1296 |                         }
1297 |                         (0b0100000, 0b101) => {
1298 |                             // SRA
1299 |                             let shamt = rs2 & 0b111111;
1300 |                             self.set_reg(inst.rd,
1301 |                                 ((rs1 as i64) >> shamt) as u64);
1302 |                         }
1303 |                         (0b0000000, 0b110) => {
1304 |                             // OR
1305 |                             self.set_reg(inst.rd, rs1 | rs2);
1306 |                         }
1307 |                         (0b0000000, 0b111) => {
1308 |                             // AND
1309 |                             self.set_reg(inst.rd, rs1 & rs2);
1310 |                         }
1311 |                         (0b0000001, 0b000) => {
1312 |                             // MUL
1313 |                             self.set_reg(inst.rd, rs1.wrapping_mul(rs2));
1314 |                         }
1315 |                         (0b0000001, 0b001) => {
1316 |                             // MULH
1317 |                             let rs1 = rs1 as i64 as u128;
1318 |                             let rs2 = rs2 as i64 as u128;
1319 |                             let val = rs1.wrapping_mul(rs2);
1320 |                             self.set_reg(inst.rd, (val >> 64) as u64);
1321 |                         }
1322 |                         (0b0000001, 0b010) => {
1323 |                             // MULHSU
1324 |                             let rs1 = rs1 as i64 as u128;
1325 |                             let rs2 = rs2 as u64 as u128;
1326 |                             let val = rs1.wrapping_mul(rs2);
1327 |                             self.set_reg(inst.rd, (val >> 64) as u64);
1328 |                         }
1329 |                         (0b0000001, 0b011) => {
1330 |                             // MULHU
1331 |                             let rs1 = rs1 as u64 as u128;
1332 |                             let rs2 = rs2 as u64 as u128;
1333 |                             let val = rs1.wrapping_mul(rs2);
1334 |                             self.set_reg(inst.rd, (val >> 64) as u64);
1335 |                         }
1336 |                         (0b0000001, 0b100) => {
1337 |                             // DIV
1338 |                             let rs1 = rs1 as i64;
1339 |                             let rs2 = rs2 as i64;
1340 |                             let val = if rs2 == 0 {
1341 |                                 -1
1342 |                             } else {
1343 |                                 rs1.wrapping_div(rs2)
1344 |                             };
1345 |                             self.set_reg(inst.rd, val as u64);
1346 |                         }
1347 |                         (0b0000001, 0b101) => {
1348 |                             // DIVU
1349 |                             let val = if rs2 == 0 {
1350 |                                 core::u64::MAX
1351 |                             } else {
1352 |                                 rs1.wrapping_div(rs2)
1353 |                             };
1354 |                             self.set_reg(inst.rd, val);
1355 |                         }
1356 |                         (0b0000001, 0b110) => {
1357 |                             // REM
1358 |                             let rs1 = rs1 as i64;
1359 |                             let rs2 = rs2 as i64;
1360 |                             let val = if rs2 == 0 {
1361 |                                 rs1
1362 |                             } else {
1363 |                                 rs1.wrapping_rem(rs2)
1364 |                             };
1365 |                             self.set_reg(inst.rd, val as u64);
1366 |                         }
1367 |                         (0b0000001, 0b111) => {
1368 |                             // REMU
1369 |                             let val = if rs2 == 0 {
1370 |                                 rs1
1371 |                             } else {
1372 |                                 rs1.wrapping_rem(rs2)
1373 |                             };
1374 |                             self.set_reg(inst.rd, val);
1375 |                         }
1376 |                         _ => unreachable!(),
1377 |                     }
1378 |                 }
1379 |                 0b0111011 => {
1380 |                     // We know it's an Rtype
1381 |                     let inst = Rtype::from(inst);
1382 | 
1383 |                     let rs1 = self.reg(inst.rs1) as u32;
1384 |                     let rs2 = self.reg(inst.rs2) as u32;
1385 | 
1386 |                     match (inst.funct7, inst.funct3) {
1387 |                         (0b0000000, 0b000) => {
1388 |                             // ADDW
1389 |                             self.set_reg(inst.rd,
1390 |                                 rs1.wrapping_add(rs2) as i32 as i64 as u64);
1391 |                         }
1392 |                         (0b0100000, 0b000) => {
1393 |                             // SUBW
1394 |                             self.set_reg(inst.rd,
1395 |                                 rs1.wrapping_sub(rs2) as i32 as i64 as u64);
1396 |                         }
1397 |                         (0b0000000, 0b001) => {
1398 |                             // SLLW
1399 |                             let shamt = rs2 & 0b11111;
1400 |                             self.set_reg(inst.rd,
1401 |                                 (rs1 << shamt) as i32 as i64 as u64);
1402 |                         }
1403 |                         (0b0000000, 0b101) => {
1404 |                             // SRLW
1405 |                             let shamt = rs2 & 0b11111;
1406 |                             self.set_reg(inst.rd,
1407 |                                 (rs1 >> shamt) as i32 as i64 as u64);
1408 |                         }
1409 |                         (0b0100000, 0b101) => {
1410 |                             // SRAW
1411 |                             let shamt = rs2 & 0b11111;
1412 |                             self.set_reg(inst.rd,
1413 |                                 ((rs1 as i32) >> shamt) as i64 as u64);
1414 |                         }
1415 |                         (0b0000001, 0b000) => {
1416 |                             // MULW
1417 |                             self.set_reg(inst.rd,
1418 |                                 (rs1 as u32).wrapping_mul(rs2 as u32)
1419 |                                 as i32 as u64);
1420 |                         }
1421 |                         (0b0000001, 0b100) => {
1422 |                             // DIVW
1423 |                             let rs1 = rs1 as i32;
1424 |                             let rs2 = rs2 as i32;
1425 |                             let val = if rs2 == 0 {
1426 |                                 -1
1427 |                             } else {
1428 |                                 rs1.wrapping_div(rs2)
1429 |                             };
1430 |                             self.set_reg(inst.rd, val as i32 as u64);
1431 |                         }
1432 |                         (0b0000001, 0b101) => {
1433 |                             // DIVUW
1434 |                             let rs1 = rs1 as u32;
1435 |                             let rs2 = rs2 as u32;
1436 |                             let val = if rs2 == 0 {
1437 |                                 core::u32::MAX
1438 |                             } else {
1439 |                                 rs1.wrapping_div(rs2)
1440 |                             };
1441 |                             self.set_reg(inst.rd, val as i32 as u64);
1442 |                         }
1443 |                         (0b0000001, 0b110) => {
1444 |                             // REMW
1445 |                             let rs1 = rs1 as i32;
1446 |                             let rs2 = rs2 as i32;
1447 |                             let val = if rs2 == 0 {
1448 |                                 rs1
1449 |                             } else {
1450 |                                 rs1.wrapping_rem(rs2)
1451 |                             };
1452 |                             self.set_reg(inst.rd, val as i32 as u64);
1453 |                         }
1454 |                         (0b0000001, 0b111) => {
1455 |                             // REMUW
1456 |                             let rs1 = rs1 as u32;
1457 |                             let rs2 = rs2 as u32;
1458 |                             let val = if rs2 == 0 {
1459 |                                 rs1
1460 |                             } else {
1461 |                                 rs1.wrapping_rem(rs2)
1462 |                             };
1463 |                             self.set_reg(inst.rd, val as i32 as u64);
1464 |                         }
1465 |                         _ => unreachable!(),
1466 |                     }
1467 |                 }
1468 |                 0b0001111 => {
1469 |                     let inst = Itype::from(inst);
1470 | 
1471 |                     match inst.funct3 {
1472 |                         0b000 => {
1473 |                             // FENCE
1474 |                         }
1475 |                         _ => unreachable!(),
1476 |                     }
1477 |                 }
1478 |                 0b1110011 => {
1479 |                     if inst == 0b00000000000000000000000001110011 {
1480 |                         // ECALL
1481 |                         return Err(VmExit::Syscall);
1482 |                     } else if inst == 0b00000000000100000000000001110011 {
1483 |                         // EBREAK
1484 |                         return Err(VmExit::Ebreak);
1485 |                     } else {
1486 |                         unreachable!();
1487 |                     }
1488 |                 }
1489 |                 0b0011011 => {
1490 |                     // We know it's an Itype
1491 |                     let inst = Itype::from(inst);
1492 |                     
1493 |                     let rs1 = self.reg(inst.rs1) as u32;
1494 |                     let imm = inst.imm as u32;
1495 | 
1496 |                     match inst.funct3 {
1497 |                         0b000 => {
1498 |                             // ADDIW
1499 |                             self.set_reg(inst.rd,
1500 |                                 rs1.wrapping_add(imm) as i32 as i64 as u64);
1501 |                         }
1502 |                         0b001 => {
1503 |                             let mode = (inst.imm >> 5) & 0b1111111;
1504 |                             
1505 |                             match mode {
1506 |                                 0b0000000 => {
1507 |                                     // SLLIW
1508 |                                     let shamt = inst.imm & 0b11111;
1509 |                                     self.set_reg(inst.rd,
1510 |                                         (rs1 << shamt) as i32 as i64 as u64);
1511 |                                 }
1512 |                                 _ => unreachable!(),
1513 |                             }
1514 |                         }
1515 |                         0b101 => {
1516 |                             let mode = (inst.imm >> 5) & 0b1111111;
1517 |                             
1518 |                             match mode {
1519 |                                 0b0000000 => {
1520 |                                     // SRLIW
1521 |                                     let shamt = inst.imm & 0b11111;
1522 |                                     self.set_reg(inst.rd,
1523 |                                         (rs1 >> shamt) as i32 as i64 as u64)
1524 |                                 }
1525 |                                 0b0100000 => {
1526 |                                     // SRAIW
1527 |                                     let shamt = inst.imm & 0b11111;
1528 |                                     self.set_reg(inst.rd,
1529 |                                         ((rs1 as i32) >> shamt) as i64 as u64);
1530 |                                 }
1531 |                                 _ => unreachable!(),
1532 |                             }
1533 |                         }
1534 |                         _ => unreachable!(),
1535 |                     }
1536 |                 }
1537 |                 _ => unimplemented!("Unhandled opcode {:#09b}\n", opcode),
1538 |             }
1539 | 
1540 |             // Update PC to the next instruction
1541 |             self.set_reg(Register::Pc, pc.wrapping_add(4));
1542 |         }
1543 |     }
1544 |     
1545 |     /// Run the VM using the JIT
1546 |     pub fn run_jit(&mut self, instrs_execed: &mut u64, 
1547 |                    vm_cycles: &mut u64, corpus: &Corpus)
1548 |             -> Result<(), VmExit> {
1549 |         // Get the JIT addresses
1550 |         let (memory, perms, dirty, dirty_bitmap) = self.memory.jit_addrs();
1551 | 
1552 |         loop {
1553 |             // Get the current PC
1554 |             let pc = self.reg(Register::Pc);
1555 |             let jit_addr = {
1556 |                 let jit_cache = self.jit_cache.as_ref().unwrap();
1557 |                 jit_cache.lookup(VirtAddr(pc as usize))
1558 |             };
1559 | 
1560 |             let jit_addr = if let Some(jit_addr) = jit_addr {
1561 |                 jit_addr
1562 |             } else {
1563 |                 // Generate the JIT for this PC
1564 |                 let (jit, entry_points) =
1565 |                     self.compile_jit(VirtAddr(pc as usize), corpus)?;
1566 | 
1567 |                 // Update the JIT tables
1568 |                 self.jit_cache.as_ref().unwrap().add_mappings(
1569 |                     VirtAddr(pc as usize), &jit, &entry_points)
1570 |             };
1571 | 
1572 |             // Set up the JIT state
1573 |             let jit_cache = self.jit_cache.as_ref().unwrap();
1574 |             self.state.instrs_execed = *instrs_execed;
1575 |             self.state.memory        = memory;
1576 |             self.state.permissions   = perms;
1577 |             self.state.dirty         = dirty;
1578 |             self.state.dirty_idx     = self.memory.dirty_len();
1579 |             self.state.dirty_bitmap  = dirty_bitmap;
1580 |             self.state.trace_buffer  = self.trace.as_ptr() as usize;
1581 |             self.state.trace_idx     = self.trace.len();
1582 |             self.state.trace_len     = self.trace.capacity();
1583 |             self.state.blocks        = jit_cache.translation_table();
1584 |             self.state.blocks_len    = jit_cache.num_blocks();
1585 |             self.state.cov_table     =
1586 |                 corpus.coverage_table.as_ptr() as usize;
1587 | 
1588 |             unsafe {
1589 |                 // Create a function pointer to the JIT
1590 |                 let func =
1591 |                     *(&jit_addr as *const usize as
1592 |                       *const fn(&mut GuestState));
1593 | 
1594 |                 // Invoke the JIT
1595 |                 let it = rdtsc();
1596 |                 func(&mut self.state);
1597 |                 *vm_cycles += rdtsc() - it;
1598 |             }
1599 | 
1600 |             // Update instructions executed from JIT state
1601 |             *instrs_execed = self.state.instrs_execed;
1602 | 
1603 |             // Update the PC reentry point
1604 |             self.set_reg(Register::Pc, self.state.reenter_pc);
1605 |                     
1606 |             unsafe {
1607 |                 // Update trace length
1608 |                 self.trace.set_len(self.state.trace_idx);
1609 |             
1610 |                 // Update the dirty state
1611 |                 self.memory.set_dirty_len(self.state.dirty_idx);
1612 |             }
1613 | 
1614 |             match self.state.exit_reason {
1615 |                 ExitReason::None => unreachable!(),
1616 |                 ExitReason::CallStackFull => {
1617 |                     return Err(VmExit::CallStackFull);
1618 |                 }
1619 |                 ExitReason::CmpCoverage => {
1620 |                     self.notify_compare_coverage(corpus);
1621 |                 }
1622 |                 ExitReason::Coverage => {
1623 |                     self.notify_code_coverage(corpus,
1624 |                         self.state.cov_from, self.state.cov_to);
1625 |                 }
1626 |                 ExitReason::IndirectBranch => {
1627 |                     // Just fall through to translate to JIT
1628 |                 }
1629 |                 ExitReason::Ebreak => {
1630 |                     // RISC-V breakpoint instruction
1631 |                     return Err(VmExit::Ebreak);
1632 |                 }
1633 |                 ExitReason::Ecall => {
1634 |                     // Syscall
1635 |                     return Err(VmExit::Syscall);
1636 |                 }
1637 |                 ExitReason::ReadFault => {
1638 |                     // Read fault
1639 |                     // The JIT reports the address of the base of the
1640 |                     // access, invoke the emulator to get the specific
1641 |                     // byte which caused the fault
1642 |                     return self.run_emu(instrs_execed, corpus);
1643 |                 }
1644 |                 ExitReason::WriteFault => {
1645 |                     // Write fault
1646 |                     // The JIT reports the address of the base of the
1647 |                     // access, invoke the emulator to get the specific
1648 |                     // byte which caused the fault
1649 |                     return self.run_emu(instrs_execed, corpus);
1650 |                 }
1651 |                 ExitReason::Timeout => {
1652 |                     // Hit the instruction count timeout
1653 |                     return Err(VmExit::Timeout);
1654 |                 }
1655 |                 ExitReason::Breakpoint => {
1656 |                     // Hit breakpoint, invoke callback
1657 |                     let pc = VirtAddr(self.state.reenter_pc as usize);
1658 |                     if let Some(callback) = self.breakpoints.get(&pc) {
1659 |                         callback(self)?;
1660 |                     }
1661 | 
1662 |                     if self.reg(Register::Pc) == self.state.reenter_pc {
1663 |                         // Force execution at the return location, which
1664 |                         // will skip over the breakpoint return
1665 |                         panic!("WAT");
1666 |                     } else {
1667 |                         // PC was changed by the breakpoint handler,
1668 |                         // thus we respect its change and will jump
1669 |                         // to the target it specified
1670 |                     }
1671 |                 }
1672 |                 ExitReason::InvalidOpcode => {
1673 |                     // An invalid opcode was executed
1674 |                     return Err(VmExit::InvalidOpcode);
1675 |                 }
1676 |             }
1677 |         }
1678 |     }
1679 | 
1680 |     /// Compile a JIT function for `pc` until all paths lead to indirect
1681 |     /// jumps or calls
1682 |     pub fn compile_jit(&mut self, pc: VirtAddr, corpus: &Corpus)
1683 |             -> Result<(Vec<u8>, BTreeMap<VirtAddr, usize>), VmExit> {
1684 |         let mut visited = BTreeSet::new();
1685 |         let mut queued = VecDeque::new();
1686 |         
1687 |         // Insert the program counter into the queue
1688 |         queued.push_back(pc);
1689 | 
1690 |         let mut program = String::new();
1691 | 
1692 |         macro_rules! set_reg {
1693 |             ($reg:expr, $expr:expr) => {
1694 |                 if $reg != Register::Zero {
1695 |                     program += &format!("    state->regs[{}] = {};\n",
1696 |                         $reg as usize, $expr);
1697 |                 }
1698 |             }
1699 |         }
1700 |         
1701 |         macro_rules! get_reg {
1702 |             ($expr:expr, $reg:expr) => {
1703 |                 if $reg == Register::Zero {
1704 |                     program += &format!("    {} = 0x0ULL;\n", $expr);
1705 |                 } else {
1706 |                     program += &format!("    {} = state->regs[{}];\n",
1707 |                         $expr, $reg as usize);
1708 |                 }
1709 |             }
1710 |         }
1711 |         
1712 |         macro_rules! set_regw {
1713 |             ($reg:expr, $expr:expr) => {
1714 |                 if $reg != Register::Zero {
1715 |                     program +=
1716 |                         &format!("    state->regs[{}] = (int32_t)({});\n",
1717 |                         $reg as usize, $expr);
1718 |                 }
1719 |             }
1720 |         }
1721 |         
1722 |         macro_rules! get_regw {
1723 |             ($expr:expr, $reg:expr) => {
1724 |                 if $reg == Register::Zero {
1725 |                     program += &format!("    {} = 0x0U;\n", $expr);
1726 |                 } else {
1727 |                     program +=
1728 |                         &format!("    {} = (uint32_t)state->regs[{}];\n",
1729 |                         $expr, $reg as usize);
1730 |                 }
1731 |             }
1732 |         }
1733 | 
1734 |         macro_rules! compare_coverage {
1735 |             ($a:expr, $b:expr) => {
1736 |                 if COMPARE_COVERAGE {
1737 |                     // Create a bitmap indicating which bytes in rs1 and
1738 |                     // rs2 match
1739 |                     program += &format!("auto tmp1 = ({}) ^ (~({}));",
1740 |                         $a, $b);
1741 |                     program += "auto tmp2 = (tmp1 >> 1) & tmp1;";
1742 |                     program += "auto tmp3 = (tmp2 >> 2) & tmp2;";
1743 |                     program += "auto tmp4 = (tmp3 >> 4) & tmp3;";
1744 |                     program += "auto res  = tmp4 & 0x0101010101010101ULL;";
1745 | 
1746 |                     // Register the coverage as compare coverage for this
1747 |                     // PC with the bitmask we identified
1748 |                     coverage_event!("CmpCoverage",
1749 |                         format!(
1750 |                             "{:#x}ULL ^ (state->call_stack_hash & 0xf) ^ \
1751 |                              (state->path_hash & 0xf)", pc.0), "res", false);
1752 |                 }
1753 |             }
1754 |         }
1755 | 
1756 |         macro_rules! indirect_branch {
1757 |             ($target:expr) => {
1758 |                 program += &format!(r#"
1759 |     {{
1760 |         // Look up the JIT address for the target PC
1761 |         if(({target} / 4) < state->blocks_len) {{
1762 |             auto indir_target_addr = state->blocks[{target} / 4];
1763 |             if(indir_target_addr > 0) {{
1764 |                 // We know where to branch, just jump to it directly
1765 |                 void (*indir_target)(struct _state *__restrict const state) =
1766 |                     (void (*)(struct _state *__restrict const state))indir_target_addr;
1767 |                 return indir_target(state);
1768 |             }}
1769 |         }}
1770 | 
1771 |         state->exit_reason = IndirectBranch;
1772 |         state->reenter_pc = {target};
1773 |         return;
1774 |     }}
1775 | "#, target = $target);
1776 |             }
1777 |         }
1778 | 
1779 |         // C++ function declarations
1780 |         let mut decls = String::new();
1781 | 
1782 |         // Translates a guest virtual address into the offset of the JIT
1783 |         // buffer. This tells you where to enter the JIT for certain functions
1784 |         let mut inst_offsets = BTreeMap::new();
1785 | 
1786 |         while let Some(pc) = queued.pop_front() {
1787 |             // Attempt to notify of a coverage edge ($from, $to)
1788 |             // Note: This will cause the current instruction to be re-executed
1789 |             // if the coverage is new. Thus, it is critical that no side
1790 |             // effects occur prior to the coverage_event!() macro use.
1791 |             macro_rules! coverage_event {
1792 |                 ($cov_source:expr, $from:expr, $to:expr, $oneshot:expr) => {
1793 |                     if CODE_COVERAGE {
1794 |                         program += &format!(
1795 | r#"{{
1796 |     // Check for timeout
1797 |     if(state->instrs_execed > state->timeout) {{
1798 |         state->exit_reason = Timeout;
1799 |         state->reenter_pc  = {pc:#x}ULL;
1800 |         return;
1801 |     }}
1802 | 
1803 |     static char reported = 0;
1804 |     if(!{oneshot} || !reported) {{
1805 |         reported = 1;
1806 |         if(report_coverage(state, {cov_source}, {from}, {to}, {pc})) {{
1807 |             return;
1808 |         }}
1809 |     }}
1810 | }}"#, from = $from, to = $to, pc = pc.0, oneshot = $oneshot,
1811 |     cov_source = $cov_source);
1812 |                     }
1813 |                 }
1814 |             }
1815 | 
1816 |             if !visited.insert(pc) {
1817 |                 // Already JITted this PC
1818 |                 continue;
1819 |             }
1820 | 
1821 |             // Check alignment
1822 |             if pc.0 & 3 != 0 {
1823 |                 // Code was unaligned, return a code fetch fault
1824 |                 return Err(VmExit::ExecFault(pc));
1825 |             }
1826 | 
1827 |             // Read the instruction
1828 |             let inst: u32 = self.memory.read_perms(pc, Perm(PERM_EXEC))
1829 |                 .map_err(|x| VmExit::ExecFault(x.is_crash().unwrap().1))?;
1830 | 
1831 |             // Create the instruction function
1832 |             program += &format!("extern \"C\" void inst_{:016x}(\
1833 |                        struct _state *__restrict const state)  {{\n", pc.0);
1834 |             decls += &format!("extern \"C\" void inst_{:016x}(\
1835 |                 struct _state *__restrict const state);\n", pc.0);
1836 | 
1837 |             // Create an unresolved instruction offset
1838 |             inst_offsets.insert(pc, !0);
1839 | 
1840 |             // Update instructions executed stats
1841 |             //program += "    state->instrs_execed += 1;\n";
1842 |             
1843 |             if ENABLE_TRACING {
1844 |                 program += &format!(r#"
1845 |     if (state->trace_idx >= state->trace_len) {{
1846 |         __builtin_trap();
1847 |     }}
1848 |     for(int ii = 0; ii < 32; ii++) {{
1849 |         state->trace_buffer[state->trace_idx * 33 + ii] = state->regs[ii];
1850 |     }}
1851 |     state->trace_buffer[state->trace_idx * 33 + 32] = {:#x}ULL;
1852 |     state->trace_idx++;
1853 | "#, pc.0);
1854 |             }
1855 |             
1856 |             // Insert breakpoint if needed
1857 |             if self.breakpoints.contains_key(&pc) {
1858 |                 program += &format!(r#"
1859 |     state->exit_reason = Breakpoint;
1860 |     state->reenter_pc  = {:#x}ULL;
1861 |     return;
1862 | "#, pc.0);
1863 |             }
1864 | 
1865 |             // Extract the opcode from the instruction
1866 |             let opcode = inst & 0b1111111;
1867 | 
1868 |             match opcode {
1869 |                 0b0110111 => {
1870 |                     // LUI
1871 |                     let inst = Utype::from(inst);
1872 |                     set_reg!(inst.rd,
1873 |                              format!("{:#x}ULL", inst.imm as i64 as u64));
1874 |                 }
1875 |                 0b0010111 => {
1876 |                     // AUIPC
1877 |                     let inst = Utype::from(inst);
1878 |                     let val =
1879 |                         (inst.imm as i64 as u64).wrapping_add(pc.0 as u64);
1880 |                     set_reg!(inst.rd, format!("{:#x}ULL", val));
1881 |                 }
1882 |                 0b1101111 => {
1883 |                     // JAL
1884 |                     let inst = Jtype::from(inst);
1885 |                     let retaddr = pc.0.wrapping_add(4);
1886 |                     let target  = pc.0.wrapping_add(inst.imm as i64 as usize);
1887 | 
1888 |                     // Record coverage
1889 |                     coverage_event!("Coverage",
1890 |                         format!("{:#x}ULL", pc.0),
1891 |                         format!("{:#x}ULL", target), true);
1892 | 
1893 |                     if USE_CALL_STACK && inst.rd == Register::Ra {
1894 |                         program += &format!(r#"
1895 |     if(state->call_stack_ents >= {MAX_CALL_STACK}) {{
1896 |         state->exit_reason = CallStackFull;
1897 |         state->reenter_pc  = {pc:#x}ULL;
1898 |         return;
1899 |     }}
1900 | 
1901 |     state->call_stack[state->call_stack_ents++] = {retaddr:#x}ULL;
1902 |     state->call_stack_hash =
1903 |         rotl64(state->call_stack_hash, 7) ^ {retaddr:#x}ULL;
1904 |     "#, MAX_CALL_STACK = MAX_CALL_STACK, pc = pc.0, retaddr = retaddr);
1905 |                     }
1906 | 
1907 |                     // Set the return address
1908 |                     set_reg!(inst.rd, retaddr);
1909 | 
1910 |                     if inst.rd == Register::Zero {
1911 |                         // Unconditional branch == jal with an rd = zero
1912 |                         program += &format!("return inst_{:016x}(state);\n",
1913 |                             target);
1914 |                         queued.push_back(VirtAddr(target));
1915 |                     } else {
1916 |                         // Function call, treat as an indirect branch to
1917 |                         // avoid inlining boatloads of function calls into
1918 |                         // their parents.
1919 |                         indirect_branch!(format!("{:#x}ULL", target));
1920 |                     }
1921 | 
1922 |                     program += "}\n";
1923 |                     continue;
1924 |                 }
1925 |                 0b1100111 => {
1926 |                     // We know it's an Itype
1927 |                     let inst = Itype::from(inst);
1928 | 
1929 |                     match inst.funct3 {
1930 |                         0b000 => {
1931 |                             // JALR
1932 |                             let retaddr = pc.0.wrapping_add(4);
1933 |                             get_reg!("auto target", inst.rs1);
1934 |                             program += &format!("    target += {:#x}ULL;\n",
1935 |                                 inst.imm as i64 as u64);
1936 | 
1937 |                             if USE_CALL_STACK {
1938 |                                 program += &format!(r#"
1939 |         if(state->call_stack_ents > 0) {{
1940 |             auto cse = state->call_stack_ents - 1;
1941 |             if(target == state->call_stack[cse]) {{
1942 |                 state->call_stack_hash =
1943 |                     rotr64(state->call_stack_hash ^ target, 7);
1944 |                 state->call_stack_ents -= 1;
1945 |             }}
1946 |         }}
1947 |             "#);
1948 |                             }
1949 | 
1950 |                             // Record coverage
1951 |                             coverage_event!("Coverage",
1952 |                                 format!("{:#x}ULL", pc.0),
1953 |                                 "target", false);
1954 | 
1955 |                             // Set the return address
1956 |                             set_reg!(inst.rd, retaddr);
1957 | 
1958 |                             indirect_branch!("target");
1959 |                             program += "}\n";
1960 |                             continue;
1961 |                         }
1962 |                         _ => unimplemented!("Unexpected 0b1100111"),
1963 |                     }
1964 |                 }
1965 |                 0b1100011 => {
1966 |                     // We know it's an Btype
1967 |                     let inst = Btype::from(inst);
1968 | 
1969 |                     let (cmptyp, cmpop) = match inst.funct3 {
1970 |                         0b000 => /* BEQ  */ ("int64_t",  "=="),
1971 |                         0b001 => /* BNE  */ ("int64_t",  "!="),
1972 |                         0b100 => /* BLT  */ ("int64_t",  "<"),
1973 |                         0b101 => /* BGE  */ ("int64_t",  ">="),
1974 |                         0b110 => /* BLTU */ ("uint64_t", "<"),
1975 |                         0b111 => /* BGEU */ ("uint64_t", ">="),
1976 |                         _ => unimplemented!("Unexpected 0b1100011"),
1977 |                     };
1978 | 
1979 |                     // Compute branch target
1980 |                     let target = pc.0.wrapping_add(inst.imm as i64 as usize);
1981 | 
1982 |                     get_reg!("auto rs1", inst.rs1);
1983 |                     get_reg!("auto rs2", inst.rs2);
1984 | 
1985 |                     // Generate compare coverage
1986 |                     compare_coverage!("rs1", "rs2");
1987 | 
1988 |                     program += &format!("    if (({})rs1 {} ({})rs2) {{\n",
1989 |                         cmptyp, cmpop, cmptyp);
1990 | 
1991 |                     // Record coverage for true condition
1992 |                     coverage_event!("Coverage",
1993 |                         format!("{:#x}ULL", pc.0),
1994 |                         format!("{:#x}ULL", target), true);
1995 | 
1996 |                     program +=
1997 |                         &format!("        return inst_{:016x}(state);\n",
1998 |                             target);
1999 |                     program += "    }\n";
2000 |                     
2001 |                     // Record coverage for false condition
2002 |                     coverage_event!("Coverage",
2003 |                         format!("{:#x}ULL", pc.0),
2004 |                         format!("{:#x}ULL", pc.0.wrapping_add(4)), true);
2005 | 
2006 |                     // Queue exploration of this target
2007 |                     queued.push_back(VirtAddr(target));
2008 |                 }
2009 |                 0b0000011 => {
2010 |                     // We know it's an Itype
2011 |                     let inst = Itype::from(inst);
2012 |                      
2013 |                     let (loadtyp, access_size) = match inst.funct3 {
2014 |                         0b000 => /* LB  */ ("int8_t",   1),
2015 |                         0b001 => /* LH  */ ("int16_t",  2),
2016 |                         0b010 => /* LW  */ ("int32_t",  4),
2017 |                         0b011 => /* LD  */ ("int64_t",  8),
2018 |                         0b100 => /* LBU */ ("uint8_t",  1),
2019 |                         0b101 => /* LHU */ ("uint16_t", 2),
2020 |                         0b110 => /* LWU */ ("uint32_t", 4),
2021 |                         _ => unreachable!(),
2022 |                     };
2023 |                     
2024 |                     // Compute the read permission mask
2025 |                     let mut perm_mask = 0u64;
2026 |                     let mut access_mask = 0u64;
2027 |                     for ii in 0..access_size {
2028 |                         perm_mask   |= (PERM_READ as u64) << (ii * 8);
2029 |                         access_mask |= (PERM_ACC  as u64) << (ii * 8);
2030 |                     }
2031 | 
2032 |                     // Compute the address
2033 |                     get_reg!("auto addr", inst.rs1);
2034 |                     program += &format!("    addr += {:#x}ULL;\n",
2035 |                         inst.imm as i64 as u64);
2036 | 
2037 |                     // Check the bounds and permissions of the address
2038 |                     program += &format!(r#"
2039 |     if(addr > {}ULL - sizeof({}) ||
2040 |             (*({}*)(state->permissions + addr) & {:#x}ULL) != {:#x}ULL) {{
2041 |         state->exit_reason = ReadFault;
2042 |         state->reenter_pc  = {:#x}ULL;
2043 |         return;
2044 |     }}
2045 |     
2046 |     /*
2047 |     // Set the accessed bits
2048 |     auto perms = *({}*)(state->permissions + addr);
2049 |     *({}*)(state->permissions + addr) |= {:#x}ULL;
2050 | 
2051 |     auto block = addr / {};
2052 |     auto idx   = block / 64;
2053 |     auto bit   = 1ULL << (block % 64);
2054 |     if((state->dirty_bitmap[idx] & bit) == 0) {{
2055 |         state->dirty[state->dirty_idx++] = block;
2056 |         state->dirty_bitmap[idx] |= bit;
2057 |     }}*/
2058 |     "#, self.memory.len(), loadtyp, loadtyp, perm_mask, perm_mask, pc.0,
2059 |     loadtyp, loadtyp, access_mask, DIRTY_BLOCK_SIZE);
2060 | 
2061 |                     set_reg!(inst.rd, format!("*({}*)(state->memory + addr)",
2062 |                         loadtyp));
2063 |                 }
2064 |                 0b0100011 => {
2065 |                     // We know it's an Stype
2066 |                     let inst = Stype::from(inst);
2067 | 
2068 |                     let (storetyp, access_size) =
2069 |                             match inst.funct3 {
2070 |                         0b000 => /* SB */ ("uint8_t",  1),
2071 |                         0b001 => /* SH */ ("uint16_t", 2),
2072 |                         0b010 => /* SW */ ("uint32_t", 4),
2073 |                         0b011 => /* SD */ ("uint64_t", 8),
2074 |                         _ => unreachable!(),
2075 |                     };
2076 |                     
2077 |                     // Compute the write permission mask and the RAW permission
2078 |                     // mask
2079 |                     let mut perm_mask = 0u64;
2080 |                     let mut raw_mask = 0u64;
2081 |                     for ii in 0..access_size {
2082 |                         perm_mask |= (PERM_WRITE as u64) << (ii * 8);
2083 |                         raw_mask  |= (PERM_RAW as u64) << (ii * 8);
2084 |                     }
2085 |                     
2086 |                     // Compute the address
2087 |                     get_reg!("auto addr", inst.rs1);
2088 |                     program += &format!("    addr += {:#x}ULL;\n",
2089 |                         inst.imm as i64 as u64);
2090 |                     
2091 |                     // Check the bounds and permissions of the address
2092 |                     program += &format!(r#"
2093 |     if(addr > {}ULL - sizeof({}) ||
2094 |             (*({}*)(state->permissions + addr) & {:#x}ULL) != {:#x}ULL) {{
2095 |         state->exit_reason = WriteFault;
2096 |         state->reenter_pc  = {:#x}ULL;
2097 |         return;
2098 |     }}
2099 | 
2100 |     // Enable reads for memory with RAW set
2101 |     auto perms = *({}*)(state->permissions + addr);
2102 |     perms &= {:#x}ULL;
2103 |     *({}*)(state->permissions + addr) |= perms >> 3;
2104 | 
2105 |     auto block = addr / {};
2106 |     auto idx   = block / 64;
2107 |     auto bit   = 1ULL << (block % 64);
2108 |     if((state->dirty_bitmap[idx] & bit) == 0) {{
2109 |         state->dirty[state->dirty_idx++] = block;
2110 |         state->dirty_bitmap[idx] |= bit;
2111 |     }}
2112 |     "#, self.memory.len(),
2113 |         storetyp, storetyp, perm_mask, perm_mask, pc.0, storetyp, raw_mask,
2114 |         storetyp, DIRTY_BLOCK_SIZE);
2115 | 
2116 |                     // Write the memory!
2117 |                     get_reg!(format!("*({}*)(state->memory + addr)",
2118 |                         storetyp), inst.rs2);
2119 |                 }
2120 |                 0b0010011 => {
2121 |                     // We know it's an Itype
2122 |                     let inst = Itype::from(inst);
2123 |                     
2124 |                     match inst.funct3 {
2125 |                         0b000 => {
2126 |                             // ADDI
2127 |                             get_reg!("auto rs1", inst.rs1);
2128 |                             set_reg!(inst.rd, format!("rs1 + {:#x}ULL",
2129 |                                 inst.imm as i64 as u64));
2130 |                         }
2131 |                         0b010 => {
2132 |                             // SLTI
2133 |                             get_reg!("auto rs1", inst.rs1);
2134 |                     
2135 |                             // Compare coverage
2136 |                             compare_coverage!("rs1",
2137 |                                 format!("{:#x}ULL", inst.imm as u64));
2138 | 
2139 |                             set_reg!(inst.rd,
2140 |                                 format!("((int64_t)rs1 < {:#x}LL) ? 1 : 0",
2141 |                                 inst.imm as i64));
2142 |                         }
2143 |                         0b011 => {
2144 |                             // SLTIU
2145 |                             get_reg!("auto rs1", inst.rs1);
2146 |                             
2147 |                             // Compare coverage
2148 |                             compare_coverage!("rs1",
2149 |                                 format!("{:#x}ULL", inst.imm as u64));
2150 | 
2151 |                             set_reg!(inst.rd,
2152 |                                 format!("((uint64_t)rs1 < {:#x}ULL) ? 1 : 0",
2153 |                                 inst.imm as i64 as u64));
2154 |                         }
2155 |                         0b100 => {
2156 |                             // XORI
2157 |                             get_reg!("auto rs1", inst.rs1);
2158 |                             set_reg!(inst.rd, format!("rs1 ^ {:#x}ULL",
2159 |                                 inst.imm as i64 as u64));
2160 |                         }
2161 |                         0b110 => {
2162 |                             // ORI
2163 |                             get_reg!("auto rs1", inst.rs1);
2164 |                             set_reg!(inst.rd, format!("rs1 | {:#x}ULL",
2165 |                                 inst.imm as i64 as u64));
2166 |                         }
2167 |                         0b111 => {
2168 |                             // ANDI
2169 |                             get_reg!("auto rs1", inst.rs1);
2170 |                             set_reg!(inst.rd, format!("rs1 & {:#x}ULL",
2171 |                                 inst.imm as i64 as u64));
2172 |                         }
2173 |                         0b001 => {
2174 |                             let mode = (inst.imm >> 6) & 0b111111;
2175 |                             
2176 |                             match mode {
2177 |                                 0b000000 => {
2178 |                                     // SLLI
2179 |                                     let shamt = inst.imm & 0b111111;
2180 |                                     get_reg!("auto rs1", inst.rs1);
2181 |                                     set_reg!(inst.rd, format!("rs1 << {}",
2182 |                                         shamt));
2183 |                                 }
2184 |                                 _ => unreachable!(),
2185 |                             }
2186 |                         }
2187 |                         0b101 => {
2188 |                             let mode = (inst.imm >> 6) & 0b111111;
2189 |                             
2190 |                             match mode {
2191 |                                 0b000000 => {
2192 |                                     // SRLI
2193 |                                     let shamt = inst.imm & 0b111111;
2194 |                                     get_reg!("auto rs1", inst.rs1);
2195 |                                     set_reg!(inst.rd, format!("rs1 >> {}",
2196 |                                         shamt));
2197 |                                 }
2198 |                                 0b010000 => {
2199 |                                     // SRAI
2200 |                                     let shamt = inst.imm & 0b111111;
2201 |                                     get_reg!("auto rs1", inst.rs1);
2202 |                                     set_reg!(inst.rd,
2203 |                                              format!("(int64_t)rs1 >> {}",
2204 |                                         shamt));
2205 |                                 }
2206 |                                 _ => unreachable!(),
2207 |                             }
2208 |                         }
2209 |                         _ => unreachable!(),
2210 |                     }
2211 |                 }
2212 |                 0b0110011 => {
2213 |                     // We know it's an Rtype
2214 |                     let inst = Rtype::from(inst);
2215 | 
2216 |                     match (inst.funct7, inst.funct3) {
2217 |                         (0b0000000, 0b000) => {
2218 |                             // ADD
2219 |                             get_reg!("auto rs1", inst.rs1);
2220 |                             get_reg!("auto rs2", inst.rs2);
2221 |                             set_reg!(inst.rd, "rs1 + rs2");
2222 |                         }
2223 |                         (0b0100000, 0b000) => {
2224 |                             // SUB
2225 |                             get_reg!("auto rs1", inst.rs1);
2226 |                             get_reg!("auto rs2", inst.rs2);
2227 |                             set_reg!(inst.rd, "rs1 - rs2");
2228 |                         }
2229 |                         (0b0000000, 0b001) => {
2230 |                             // SLL
2231 |                             get_reg!("auto rs1", inst.rs1);
2232 |                             get_reg!("auto rs2", inst.rs2);
2233 |                             set_reg!(inst.rd, "rs1 << (rs2 & 0x3f)");
2234 |                         }
2235 |                         (0b0000000, 0b010) => {
2236 |                             // SLT
2237 |                             get_reg!("auto rs1", inst.rs1);
2238 |                             get_reg!("auto rs2", inst.rs2);
2239 | 
2240 |                             // Compare coverage
2241 |                             compare_coverage!("rs1", "rs2");
2242 | 
2243 |                             set_reg!(inst.rd,
2244 |                                 "((int64_t)rs1 < (int64_t)rs2) ? 1 : 0");
2245 |                         }
2246 |                         (0b0000000, 0b011) => {
2247 |                             // SLTU
2248 |                             get_reg!("auto rs1", inst.rs1);
2249 |                             get_reg!("auto rs2", inst.rs2);
2250 |                             
2251 |                             // Compare coverage
2252 |                             compare_coverage!("rs1", "rs2");
2253 | 
2254 |                             set_reg!(inst.rd,
2255 |                                 "((uint64_t)rs1 < (uint64_t)rs2) ? 1 : 0");
2256 |                         }
2257 |                         (0b0000000, 0b100) => {
2258 |                             // XOR
2259 |                             get_reg!("auto rs1", inst.rs1);
2260 |                             get_reg!("auto rs2", inst.rs2);
2261 |                             set_reg!(inst.rd, "rs1 ^ rs2");
2262 |                         }
2263 |                         (0b0000000, 0b101) => {
2264 |                             // SRL
2265 |                             get_reg!("auto rs1", inst.rs1);
2266 |                             get_reg!("auto rs2", inst.rs2);
2267 |                             set_reg!(inst.rd, "rs1 >> (rs2 & 0x3f)");
2268 |                         }
2269 |                         (0b0100000, 0b101) => {
2270 |                             // SRA
2271 |                             get_reg!("auto rs1", inst.rs1);
2272 |                             get_reg!("auto rs2", inst.rs2);
2273 |                             set_reg!(inst.rd,
2274 |                                      "(int64_t)rs1 >> ((int64_t)rs2 & 0x3f)");
2275 |                         }
2276 |                         (0b0000000, 0b110) => {
2277 |                             // OR
2278 |                             get_reg!("auto rs1", inst.rs1);
2279 |                             get_reg!("auto rs2", inst.rs2);
2280 |                             set_reg!(inst.rd, "rs1 | rs2");
2281 |                         }
2282 |                         (0b0000000, 0b111) => {
2283 |                             // AND
2284 |                             get_reg!("auto rs1", inst.rs1);
2285 |                             get_reg!("auto rs2", inst.rs2);
2286 |                             set_reg!(inst.rd, "rs1 & rs2");
2287 |                         }
2288 |                         (0b0000001, 0b000) => {
2289 |                             // MUL
2290 |                             get_reg!("auto rs1", inst.rs1);
2291 |                             get_reg!("auto rs2", inst.rs2);
2292 |                             set_reg!(inst.rd, "rs1 * rs2");
2293 |                         }
2294 |                         (0b0000001, 0b001) => {
2295 |                             // MULH
2296 |                             get_reg!("auto rs1", inst.rs1);
2297 |                             get_reg!("auto rs2", inst.rs2);
2298 |                             set_reg!(inst.rd,
2299 |                                 "((uint128_t)(int64_t)rs1 * \
2300 |                                   (uint128_t)(int64_t)rs2) >> 64");
2301 |                         }
2302 |                         (0b0000001, 0b010) => {
2303 |                             // MULHSU
2304 |                             get_reg!("auto rs1", inst.rs1);
2305 |                             get_reg!("auto rs2", inst.rs2);
2306 |                             set_reg!(inst.rd,
2307 |                                 "((uint128_t)(int64_t)rs1 * \
2308 |                                   (uint128_t)(uint64_t)rs2) >> 64");
2309 |                         }
2310 |                         (0b0000001, 0b011) => {
2311 |                             // MULHU
2312 |                             get_reg!("auto rs1", inst.rs1);
2313 |                             get_reg!("auto rs2", inst.rs2);
2314 |                             set_reg!(inst.rd,
2315 |                                 "((uint128_t)(uint64_t)rs1 * \
2316 |                                   (uint128_t)(uint64_t)rs2) >> 64");
2317 |                         }
2318 |                         (0b0000001, 0b100) => {
2319 |                             // DIV
2320 |                             get_reg!("auto rs1", inst.rs1);
2321 |                             get_reg!("auto rs2", inst.rs2);
2322 |                             set_reg!(inst.rd,
2323 |                                 "rs2 ? (((int64_t)rs1 == INT64_MIN && \
2324 |                                          (int64_t)rs2 == -1) ? \
2325 |                                     INT64_MIN : (int64_t)rs1 / (int64_t)rs2)\
2326 |                                     : -1");
2327 |                         }
2328 |                         (0b0000001, 0b101) => {
2329 |                             // DIVU
2330 |                             get_reg!("auto rs1", inst.rs1);
2331 |                             get_reg!("auto rs2", inst.rs2);
2332 |                             set_reg!(inst.rd, "rs2 ? rs1 / rs2 : UINT64_MAX")
2333 |                         }
2334 |                         (0b0000001, 0b110) => {
2335 |                             // REM
2336 |                             get_reg!("auto rs1", inst.rs1);
2337 |                             get_reg!("auto rs2", inst.rs2);
2338 |                             set_reg!(inst.rd,
2339 |                                 "rs2 ? (((int64_t)rs1 == INT64_MIN && \
2340 |                                          (int64_t)rs2 == -1) ? \
2341 |                                     0 : (int64_t)rs1 % (int64_t)rs2) : rs1");
2342 |                         }
2343 |                         (0b0000001, 0b111) => {
2344 |                             // REMU
2345 |                             get_reg!("auto rs1", inst.rs1);
2346 |                             get_reg!("auto rs2", inst.rs2);
2347 |                             set_reg!(inst.rd, "rs2 ? rs1 % rs2 : rs1")
2348 |                         }
2349 |                         _ => unreachable!(),
2350 |                     }
2351 |                 }
2352 |                 0b0111011 => {
2353 |                     // We know it's an Rtype
2354 |                     let inst = Rtype::from(inst);
2355 | 
2356 |                     match (inst.funct7, inst.funct3) {
2357 |                         (0b0000000, 0b000) => {
2358 |                             // ADDW
2359 |                             get_regw!("auto rs1", inst.rs1);
2360 |                             get_regw!("auto rs2", inst.rs2);
2361 |                             set_regw!(inst.rd, "rs1 + rs2");
2362 |                         }
2363 |                         (0b0100000, 0b000) => {
2364 |                             // SUBW
2365 |                             get_regw!("auto rs1", inst.rs1);
2366 |                             get_regw!("auto rs2", inst.rs2);
2367 |                             set_regw!(inst.rd, "rs1 - rs2");
2368 |                         }
2369 |                         (0b0000000, 0b001) => {
2370 |                             // SLLW
2371 |                             get_regw!("auto rs1", inst.rs1);
2372 |                             get_regw!("auto rs2", inst.rs2);
2373 |                             set_regw!(inst.rd, "rs1 << (rs2 & 0x1f)");
2374 |                         }
2375 |                         (0b0000000, 0b101) => {
2376 |                             // SRLW
2377 |                             get_regw!("auto rs1", inst.rs1);
2378 |                             get_regw!("auto rs2", inst.rs2);
2379 |                             set_regw!(inst.rd, "rs1 >> (rs2 & 0x1f)");
2380 |                         }
2381 |                         (0b0100000, 0b101) => {
2382 |                             // SRAW
2383 |                             get_regw!("auto rs1", inst.rs1);
2384 |                             get_regw!("auto rs2", inst.rs2);
2385 |                             set_regw!(inst.rd,
2386 |                                      "(int32_t)rs1 >> ((int32_t)rs2 & 0x1f)");
2387 |                         }
2388 |                         (0b0000001, 0b000) => {
2389 |                             // MULW
2390 |                             get_regw!("auto rs1", inst.rs1);
2391 |                             get_regw!("auto rs2", inst.rs2);
2392 |                             set_regw!(inst.rd, "rs1 * rs2");
2393 |                         }
2394 |                         (0b0000001, 0b100) => {
2395 |                             // DIVW
2396 |                             get_regw!("auto rs1", inst.rs1);
2397 |                             get_regw!("auto rs2", inst.rs2);
2398 |                             set_regw!(inst.rd,
2399 |                                 "rs2 ? (((int32_t)rs1 == INT32_MIN && \
2400 |                                          (int32_t)rs2 == -1) ? \
2401 |                                     INT32_MIN : (int32_t)rs1 / (int32_t)rs2)\
2402 |                                     : -1");
2403 |                         }
2404 |                         (0b0000001, 0b101) => {
2405 |                             // DIVUW
2406 |                             get_regw!("auto rs1", inst.rs1);
2407 |                             get_regw!("auto rs2", inst.rs2);
2408 |                             set_regw!(inst.rd, "rs2 ? rs1 / rs2 : UINT32_MAX")
2409 |                         }
2410 |                         (0b0000001, 0b110) => {
2411 |                             // REMW
2412 |                             get_regw!("auto rs1", inst.rs1);
2413 |                             get_regw!("auto rs2", inst.rs2);
2414 |                             set_regw!(inst.rd,
2415 |                                 "rs2 ? (((int32_t)rs1 == INT32_MIN && \
2416 |                                          (int32_t)rs2 == -1) ? \
2417 |                                     0 : (int32_t)rs1 % (int32_t)rs2) : rs1");
2418 |                         }
2419 |                         (0b0000001, 0b111) => {
2420 |                             // REMUW
2421 |                             get_regw!("auto rs1", inst.rs1);
2422 |                             get_regw!("auto rs2", inst.rs2);
2423 |                             set_regw!(inst.rd, "rs2 ? rs1 % rs2 : rs1")
2424 |                         }
2425 |                         _ => unreachable!(),
2426 |                     }
2427 |                 }
2428 |                 0b0001111 => {
2429 |                     let inst = Itype::from(inst);
2430 | 
2431 |                     match inst.funct3 {
2432 |                         0b000 => {
2433 |                             // FENCE
2434 |                         }
2435 |                         _ => unreachable!(),
2436 |                     }
2437 |                 }
2438 |                 0b1110011 => {
2439 |                     if inst == 0b00000000000000000000000001110011 {
2440 |                         // ECALL
2441 |                         program += &format!(r#"
2442 |     state->exit_reason = Ecall;
2443 |     state->reenter_pc  = {:#x}ULL;
2444 |     return;
2445 | "#, pc.0);
2446 |                     } else if inst == 0b00000000000100000000000001110011 {
2447 |                         // EBREAK
2448 |                         program += &format!(r#"
2449 |     state->exit_reason = Ebreak;
2450 |     state->reenter_pc  = {:#x}ULL;
2451 |     return;
2452 | "#, pc.0);
2453 |                     } else {
2454 |                         unreachable!();
2455 |                     }
2456 |                 }
2457 |                 0b0011011 => {
2458 |                     // We know it's an Itype
2459 |                     let inst = Itype::from(inst);
2460 |                     
2461 |                     match inst.funct3 {
2462 |                         0b000 => {
2463 |                             // ADDIW
2464 |                             get_regw!("auto rs1", inst.rs1);
2465 |                             set_regw!(inst.rd, format!("rs1 + {}U",
2466 |                                 inst.imm as i32 as u32));
2467 |                         }
2468 |                         0b001 => {
2469 |                             let mode = (inst.imm >> 5) & 0b1111111;
2470 |                             
2471 |                             match mode {
2472 |                                 0b0000000 => {
2473 |                                     // SLLIW
2474 |                                     let shamt = inst.imm & 0b11111;
2475 |                                     get_regw!("auto rs1", inst.rs1);
2476 |                                     set_regw!(inst.rd,
2477 |                                         format!("rs1 << {}",
2478 |                                         shamt));
2479 |                                 }
2480 |                                 _ => unreachable!(),
2481 |                             }
2482 |                         }
2483 |                         0b101 => {
2484 |                             let mode = (inst.imm >> 5) & 0b1111111;
2485 |                             
2486 |                             match mode {
2487 |                                 0b0000000 => {
2488 |                                     // SRLIW
2489 |                                     let shamt = inst.imm & 0b11111;
2490 |                                     get_regw!("auto rs1", inst.rs1);
2491 |                                     set_regw!(inst.rd,
2492 |                                         format!("rs1 >> {}",
2493 |                                         shamt));
2494 |                                 }
2495 |                                 0b0100000 => {
2496 |                                     // SRAIW
2497 |                                     let shamt = inst.imm & 0b11111;
2498 |                                     get_regw!("auto rs1", inst.rs1);
2499 |                                     set_regw!(inst.rd,
2500 |                                         format!("(int32_t)rs1 >> {}",
2501 |                                         shamt));
2502 |                                 }
2503 |                                 _ => unreachable!(),
2504 |                             }
2505 |                         }
2506 |                         _ => unreachable!(),
2507 |                     }
2508 |                 }
2509 |                 _ => unimplemented!("Unhandled opcode {:#09b}\n", opcode),
2510 |             }
2511 | 
2512 |             let next_inst = pc.0.wrapping_add(4);
2513 |             program += &format!("    return inst_{:016x}(state);\n", next_inst);
2514 |             program += "}\n";
2515 |             queued.push_back(VirtAddr(next_inst));
2516 |         }
2517 | 
2518 |         program = 
2519 | format!(r#"
2520 | #include <stddef.h>
2521 | #include <stdint.h>
2522 | 
2523 | enum _vmexit {{
2524 |     None,
2525 |     IndirectBranch,
2526 |     ReadFault,
2527 |     WriteFault,
2528 |     Ecall,
2529 |     Ebreak,
2530 |     Timeout,
2531 |     Breakpoint,
2532 |     InvalidOpcode,
2533 |     Coverage,
2534 |     CmpCoverage,
2535 |     CallStackFull,
2536 | }};
2537 | 
2538 | struct _state {{
2539 |     enum _vmexit exit_reason;
2540 |     uint64_t     reenter_pc;
2541 | 
2542 |     uint64_t cov_from;
2543 |     uint64_t cov_to;
2544 | 
2545 |     uint64_t regs[33];
2546 |     uint8_t *__restrict const memory;
2547 |     uint8_t *__restrict const permissions;
2548 |     uintptr_t *__restrict const dirty;
2549 |     size_t dirty_idx;
2550 |     uint64_t *__restrict const dirty_bitmap;
2551 | 
2552 |     uint64_t *__restrict const trace_buffer;
2553 |     size_t trace_idx;
2554 |     const size_t trace_len;
2555 |     uint64_t (*const cov_table)[2];
2556 |     uint64_t instrs_execed;
2557 |     const uint64_t timeout;
2558 | 
2559 |     uint64_t call_stack[{MAX_CALL_STACK}];
2560 |     uint64_t call_stack_ents;
2561 |     uint64_t call_stack_hash;
2562 | 
2563 |     uint64_t path_hash;
2564 | 
2565 |     size_t *const blocks;
2566 |     const size_t blocks_len;
2567 |     const size_t revision;
2568 | }};
2569 |     
2570 | const uint64_t PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;
2571 | const uint64_t PRIME64_3 = 0x165667B19E3779F9ULL;
2572 | 
2573 | static uint64_t rotl64 (uint64_t x, uint64_t n) {{
2574 |   n &= 0x3f;
2575 |   if(!n) return x;
2576 |   return (x<<n) | (x>>(0x40-n));
2577 | }}
2578 | 
2579 | static uint64_t rotr64 (uint64_t x, uint64_t n) {{
2580 |   n &= 0x3f;
2581 |   if(!n) return x;
2582 |   return (x>>n) | (x<<(0x40-n));
2583 | }}
2584 | 
2585 | static int report_coverage(struct _state *__restrict const state,
2586 |         enum _vmexit reason,
2587 |         uint64_t from, uint64_t to, uint64_t pc) {{
2588 |     // Update the path hash
2589 |     state->path_hash =
2590 |         rotl64(state->path_hash, 7) ^ (to);
2591 |  
2592 |     // Compute the hash
2593 |     uint64_t hash = from;
2594 |     hash ^= hash >> 33;
2595 |     hash *= PRIME64_2;
2596 |     hash += to;
2597 |     hash ^= hash >> 29;
2598 |     hash *= PRIME64_3;
2599 |     hash ^= hash >> 32;
2600 | 
2601 |     auto ct = state->cov_table;
2602 | 
2603 |     for( ; ; ) {{
2604 |         // Bounds the hash to the table
2605 |         hash %= {cov_table_len}ULL;
2606 | 
2607 |         if(ct[hash][0] == {EMPTY}ULL &&
2608 |                 __sync_val_compare_and_swap(&ct[hash][0], {EMPTY}ULL,
2609 |                 {PENDING}ULL) == {EMPTY}ULL) {{
2610 |             // We own the entry, fill it in
2611 |             __atomic_store_n(&ct[hash][1], to,   __ATOMIC_SEQ_CST);
2612 |             __atomic_store_n(&ct[hash][0], from, __ATOMIC_SEQ_CST);
2613 | 
2614 |             state->exit_reason = reason;
2615 |             state->cov_from    = from;
2616 |             state->cov_to      = to;
2617 |             state->reenter_pc  = pc;
2618 |             return 1;
2619 |         }} else {{
2620 |             // We lost the race
2621 | 
2622 |             // Wait for the entry to be filled in
2623 |             while(__atomic_load_n(&ct[hash][0], __ATOMIC_SEQ_CST) ==
2624 |                 {PENDING}ULL) {{}}
2625 | 
2626 |             uint64_t a = __atomic_load_n(&ct[hash][0], __ATOMIC_SEQ_CST);
2627 |             uint64_t b = __atomic_load_n(&ct[hash][1], __ATOMIC_SEQ_CST);
2628 |             if(a == (from) && b == (to)) {{
2629 |                 // Coverage already recorded
2630 |                 break;
2631 |             }}
2632 | 
2633 |             // Go to the next
2634 |             hash += 1;
2635 |         }}
2636 |     }}
2637 | 
2638 |     return 0;
2639 | }}
2640 | "#, MAX_CALL_STACK = MAX_CALL_STACK,
2641 |     cov_table_len = corpus.coverage_table.len(),
2642 |     EMPTY = COVERAGE_ENTRY_EMPTY,
2643 |     PENDING = COVERAGE_ENTRY_PENDING) + &decls + "\n" + &program;
2644 | 
2645 |         // Hash the C++ file contents
2646 |         let proghash = corpus.hasher.hash(program.as_bytes());
2647 | 
2648 |         // Check if we're the first core to try to compile this
2649 |         let first = {
2650 |             let mut jobs = corpus.compile_jobs.lock().unwrap();
2651 |             jobs.insert(proghash)
2652 |         };
2653 |         
2654 |         // Create the jitcache folder
2655 |         std::fs::create_dir_all("jitcache")
2656 |             .expect("Failed to create jitcache directory");
2657 | 
2658 |         // Create the cache name
2659 |         let cachename = Path::new("jitcache")
2660 |             .join(format!("{:032x}", proghash));
2661 | 
2662 |         // If we aren't the first to access the cache, idle loop until the
2663 |         // first person has compiled the code
2664 |         if !first {
2665 |             while !cachename.exists() {
2666 |                 std::thread::sleep(Duration::from_millis(100));
2667 |             }
2668 |         }
2669 | 
2670 |         // If the cache exists, read the cache
2671 |         if cachename.exists() {
2672 |             // Read the cache
2673 |             let cache = std::fs::read(&cachename).unwrap();
2674 |             let mut _ptr = &cache[..];
2675 | 
2676 |             macro_rules! consume {
2677 |                 ($ty:ty) => {{
2678 |                     const SOT: usize = core::mem::size_of::<$ty>();
2679 |                     let mut buf = [0u8; SOT];
2680 |                     buf.copy_from_slice(&_ptr[..SOT]);
2681 |                     _ptr = &_ptr[SOT..];
2682 |                     <$ty>::from_ne_bytes(buf)
2683 |                 }}
2684 |             }
2685 | 
2686 |             // Clear the existing instr offsets
2687 |             inst_offsets.clear();
2688 | 
2689 |             // Deserialize the metadata
2690 |             let entries = consume!(u64);
2691 |             for _ in 0..entries {
2692 |                 let gvaddr = VirtAddr(consume!(u64).try_into().unwrap());
2693 |                 let offset: usize = consume!(u64).try_into().unwrap();
2694 |                 inst_offsets.insert(gvaddr, offset);
2695 |             }
2696 | 
2697 |             // Return out the cached info
2698 |             return Ok((_ptr.into(), inst_offsets));
2699 |         }
2700 |         
2701 |         print!("Compiling cache for {:#018x} -> {:032x} {}\n",
2702 |                pc.0, proghash, inst_offsets.len());
2703 | 
2704 |         let cppfn = std::env::temp_dir().join(
2705 |             format!("fwetmp_{:?}.cpp",
2706 |                     std::thread::current().id()));
2707 |         let linkfn = std::env::temp_dir().join(
2708 |             format!("fwetmp_{:?}.lunk",
2709 |                     std::thread::current().id()));
2710 |         let binfn = std::env::temp_dir().join(
2711 |             format!("fwetmp_{:?}.bin",
2712 |                     std::thread::current().id()));
2713 |         
2714 |         // Write out the test program
2715 |         std::fs::write(&cppfn, program)
2716 |             .expect("Failed to write program");
2717 | 
2718 |         // Create the ELF
2719 |         let res = Command::new("clang++").args(&[
2720 |             "-O3", "-Wall",
2721 |             "-fno-asynchronous-unwind-tables",
2722 |             "-Wno-unused-label",
2723 |             "-Wno-unused-variable",
2724 |             "-Wno-unused-function",
2725 |             "-Wno-infinite-recursion",
2726 |             "-Werror",
2727 |             "-march=native",
2728 |             "-fno-strict-aliasing",
2729 |             "-static", "-nostdlib", "-ffreestanding",
2730 |             "-Wl,-Tldscript.ld", "-Wl,--build-id=none",
2731 |             "-o", linkfn.to_str().unwrap(),
2732 |             cppfn.to_str().unwrap()]).status()
2733 |             .expect("Failed to launch clang++");
2734 |         assert!(res.success(), "clang++ returned error");
2735 | 
2736 |         // Convert the ELF to a binary
2737 |         let res = Command::new("objcopy")
2738 |             .args(&["-O", "binary",
2739 |                     "--set-section-flags", ".bss=contents,alloc,load",
2740 |                     linkfn.to_str().unwrap(),
2741 |                     binfn.to_str().unwrap()]).status()
2742 |             .expect("Failed to launch objcopy");
2743 |         assert!(res.success(), "objcopy returned error");
2744 | 
2745 |         // Get the `nm` output indicating where function entries are
2746 |         let res = Command::new("nm")
2747 |             .arg(linkfn.to_str().unwrap())
2748 |             .output().unwrap();
2749 |         assert!(res.status.success(), "nm returned error");
2750 |         let stdout = std::str::from_utf8(&res.stdout).unwrap();
2751 |         let mut nm_func_to_addr = BTreeMap::new();
2752 |         for line in stdout.lines() {
2753 |             let mut spl = line.split(" T inst_");
2754 |             if spl.clone().count() != 2 { continue; }
2755 | 
2756 |             // Parse the JIT address and turn it into an offset
2757 |             let jit_addr =
2758 |                 usize::from_str_radix(spl.next().unwrap(), 16).unwrap() -
2759 |                 0x10000;
2760 | 
2761 |             // Insert the address to the function in our database
2762 |             nm_func_to_addr.insert(spl.next().unwrap(), jit_addr);
2763 |         }
2764 |          
2765 |         // Now, resolve the addresses
2766 |         for (gvaddr, res) in inst_offsets.iter_mut() {
2767 |             if let Some(&addr) =
2768 |                     nm_func_to_addr.get(format!("{:016x}", gvaddr.0).as_str()){
2769 |                 *res = addr;
2770 |             } else {
2771 |                 panic!("Could not resolve compiled function to jit addr?");
2772 |             }
2773 |         }
2774 | 
2775 |         // Create the JIT binary file with the metadata of the sections
2776 |         let mut jit = Vec::new();
2777 |         jit.extend_from_slice(&(inst_offsets.len() as u64).to_ne_bytes());
2778 |         for (&gvaddr, &res) in inst_offsets.iter() {
2779 |             jit.extend_from_slice(&(gvaddr.0 as u64).to_ne_bytes());
2780 |             jit.extend_from_slice(&(res      as u64).to_ne_bytes());
2781 |         }
2782 |         let jitbytes = std::fs::read(&binfn).unwrap();
2783 |         jit.extend_from_slice(&jitbytes);
2784 | 
2785 |         // Write the JIT + metadata to the cache
2786 |         std::fs::write(&cachename, jit)
2787 |             .expect("Failed to rename compiled JIT to cache file");
2788 |         
2789 |         if inst_offsets.len() > 50 {
2790 |             //std::process::exit(0);
2791 |         }
2792 | 
2793 |         Ok((jitbytes, inst_offsets))
2794 |     }
2795 | }
2796 | 
2797 | 


--------------------------------------------------------------------------------