├── samples ├── .gitignore └── check.sh ├── src ├── lib.rs ├── isa │ ├── mod.rs │ ├── i8051.rs │ ├── i8080.rs │ └── mips.rs ├── platform │ ├── unix.rs │ ├── mod.rs │ └── n64.rs ├── ir │ ├── context.rs │ └── build.rs ├── main.rs ├── nest.rs └── explore.rs ├── .gitignore ├── Cargo.toml └── Cargo.lock /samples/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !check.sh 4 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod explore; 2 | pub mod ir; 3 | pub mod isa; 4 | pub mod nest; 5 | pub mod platform; 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Cargo artifacts. 2 | target/ 3 | 4 | # IDE local settings. 5 | .vscode/ 6 | 7 | # Misc local dirs. 8 | \#* 9 | -------------------------------------------------------------------------------- /src/isa/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod i8051; 2 | pub mod i8080; 3 | pub mod mips; 4 | 5 | use crate::ir::{Const, Cx, Edge, Edges, IGlobal, State}; 6 | use crate::platform::Rom; 7 | 8 | pub trait Isa { 9 | fn mem_containing_rom(&self) -> IGlobal; 10 | 11 | // FIXME(eddyb) replace the `Result` with a dedicated enum. 12 | fn lift_instr( 13 | &self, 14 | cx: &Cx, 15 | rom: &dyn Rom, 16 | pc: &mut Const, 17 | state: State, 18 | ) -> Result>; 19 | } 20 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sobek" 3 | version = "0.0.1" 4 | authors = ["Eduard-Mihai Burtescu "] 5 | edition = "2021" 6 | repository = "https://github.com/eddyb/sobek" 7 | license = "MIT/Apache-2.0" 8 | description = "Reverse engineering framework." 9 | 10 | [dependencies] 11 | scoped-tls = "0.1.2" 12 | elsa = "1.3.2" 13 | ctrlc = "3.1.3" 14 | itertools = "0.8.2" 15 | term = "0.6.1" 16 | paw = "1.0.0" 17 | structopt = { version = "0.3.7", features = [ "paw" ] } 18 | memmap2 = "0.5.3" 19 | object = "0.28.3" 20 | smallvec = { version = "1.8.0", features = [ "union" ] } 21 | 22 | [lib] 23 | doctest = false 24 | test = false 25 | -------------------------------------------------------------------------------- /src/platform/unix.rs: -------------------------------------------------------------------------------- 1 | //! UNIX userspace processes, loaded from executable "images" (e.g. ELF). 2 | 3 | use crate::ir::{Const, MemSize, MemType}; 4 | use crate::platform::{RawRom, Rom, SimplePlatform, UnsupportedAddress}; 5 | use object::{Object, ObjectSegment}; 6 | use std::ops::Range; 7 | 8 | pub struct Executable<'a> { 9 | pub virtual_to_raw_file: Vec<(Range, RawRom<&'a [u8]>)>, 10 | pub virtual_entry: u64, 11 | } 12 | 13 | impl<'a> Executable<'a> { 14 | pub fn load_at_virtual_addr(raw_file: RawRom<&'a [u8]>, virtual_base_addr: u64) -> Self { 15 | let obj_file = object::File::parse(raw_file.0).unwrap(); 16 | let virtual_to_raw_file = obj_file 17 | .segments() 18 | .map(|segment| { 19 | let virtual_start = virtual_base_addr.checked_add(segment.address()).unwrap(); 20 | let (file_start, file_size) = segment.file_range(); 21 | let (file_start, file_size) = ( 22 | usize::try_from(file_start).unwrap(), 23 | usize::try_from(file_size).unwrap(), 24 | ); 25 | ( 26 | virtual_start..virtual_start.checked_add(segment.size()).unwrap(), 27 | RawRom(&raw_file.0[file_start..][..file_size]), 28 | ) 29 | }) 30 | .collect(); 31 | let virtual_entry = virtual_base_addr.checked_add(obj_file.entry()).unwrap(); 32 | 33 | Executable { 34 | virtual_to_raw_file, 35 | virtual_entry, 36 | } 37 | } 38 | } 39 | 40 | impl Rom for Executable<'_> { 41 | fn load( 42 | &self, 43 | mem_type: MemType, 44 | addr: Const, 45 | size: MemSize, 46 | ) -> Result { 47 | self.virtual_to_raw_file 48 | .iter() 49 | .find_map(|(virtual_range, raw_segment)| { 50 | let addr = addr.as_u64(); 51 | if virtual_range.contains(&addr) { 52 | let segment_offset = Const::new(mem_type.addr_size, addr - virtual_range.start); 53 | Some(raw_segment.load(mem_type, segment_offset, size)) 54 | } else { 55 | None 56 | } 57 | }) 58 | .ok_or(UnsupportedAddress(addr))? 59 | } 60 | } 61 | 62 | pub type UnixProcess<'a, A> = SimplePlatform>; 63 | -------------------------------------------------------------------------------- /src/platform/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod n64; 2 | pub mod unix; 3 | 4 | use crate::ir::{Const, MemSize, MemType}; 5 | use crate::isa::Isa; 6 | use std::ops::Deref; 7 | 8 | #[derive(Debug)] 9 | pub struct UnsupportedAddress(pub Const); 10 | 11 | pub trait Rom { 12 | fn load( 13 | &self, 14 | mem_type: MemType, 15 | addr: Const, 16 | size: MemSize, 17 | ) -> Result; 18 | } 19 | 20 | pub struct RawRom>(pub R); 21 | 22 | impl RawRom { 23 | pub fn mmap_file(path: impl AsRef) -> std::io::Result { 24 | let file = std::fs::File::open(path)?; 25 | // FIXME(eddyb) is this safe? ideally "read-only CoW" would enforce that. 26 | let data = unsafe { memmap2::MmapOptions::new().map_copy_read_only(&file)? }; 27 | Ok(Self(data)) 28 | } 29 | } 30 | 31 | impl> Rom for RawRom { 32 | fn load( 33 | &self, 34 | mem_type: MemType, 35 | addr: Const, 36 | size: MemSize, 37 | ) -> Result { 38 | let err = UnsupportedAddress(addr); 39 | let addr = addr.as_u64(); 40 | let bytes = usize::try_from(addr) 41 | .ok() 42 | .and_then(|addr| self.0.get(addr..)?.get(..usize::from(size.bytes()))) 43 | .ok_or(err)?; 44 | 45 | macro_rules! from_bytes { 46 | ($uint:ty) => {{ 47 | let &bytes = bytes.try_into().unwrap(); 48 | 49 | if mem_type.big_endian { 50 | <$uint>::from_be_bytes(bytes) 51 | } else { 52 | <$uint>::from_le_bytes(bytes) 53 | } 54 | .into() 55 | }}; 56 | } 57 | Ok(Const::new( 58 | size.into(), 59 | match size { 60 | MemSize::M8 => bytes[0].into(), 61 | MemSize::M16 => from_bytes!(u16), 62 | MemSize::M32 => from_bytes!(u32), 63 | MemSize::M64 => from_bytes!(u64), 64 | }, 65 | )) 66 | } 67 | } 68 | 69 | pub trait Platform { 70 | fn isa(&self) -> &dyn Isa; 71 | fn rom(&self) -> &dyn Rom; 72 | } 73 | 74 | pub struct SimplePlatform { 75 | pub isa: A, 76 | pub rom: R, 77 | } 78 | 79 | impl Platform for SimplePlatform { 80 | fn isa(&self) -> &dyn Isa { 81 | &self.isa 82 | } 83 | fn rom(&self) -> &dyn Rom { 84 | &self.rom 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /samples/check.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Pre-build `sobek` and ensure the resulting executable runs. 6 | sobek="target/release/sobek" 7 | cargo build --release 8 | "$sobek" --help > /dev/null 9 | 10 | # Styles. 11 | Sreset=$(tput sgr0) 12 | Sbold=$(tput bold) 13 | Sunderline=$(tput smul) 14 | Serr=$(tput setaf 9) 15 | Swarn=$(tput setaf 11) 16 | 17 | sample() { 18 | local platform="$1" 19 | local rom_filename="$2" 20 | shift 2 21 | local flags=("$@") 22 | 23 | local name rom log 24 | name=$(echo "$rom_filename" | sed -E 's/\.[a-z0-9]+$//') 25 | rom="samples/$platform/$rom_filename" 26 | log="samples/$platform/$name.log" 27 | 28 | echo 29 | echo "$Sbold""$Sunderline""# $name ($platform)""$Sreset" 30 | 31 | # Compare log.new and log, and collapse them if they're identical. 32 | # 33 | # Defined here so it can also be attempted before running `sobek` 34 | # (if preexisting files exist, including from previous runs). 35 | check_log() { 36 | if [ -f "$log" ] && (! cmp --quiet "$log"{,.new}); then 37 | if [ -n "$BLESS_FIRST" ]; then 38 | mv --backup=numbered --verbose "$log"{,.old} 39 | unset BLESS_FIRST 40 | else 41 | echo "$Serr"" $log.new differs from $log""$Sreset" 42 | ([ -n "$DIFF_TOOL" ] && $DIFF_TOOL "$log"{,.new}) || true 43 | echo " Run \`BLESS_FIRST=1 $0\` to accept this change" 44 | exit 1 45 | fi 46 | fi 47 | 48 | mv "$log"{.new,} 49 | } 50 | 51 | if [ -f "$log".new ]; then 52 | echo "$Swarn"" Found preexisting $log.new, checking...""$Sreset" 53 | check_log 54 | fi 55 | 56 | # Only run `sobek` if anything has changed since the last time. 57 | if [ "$log" -nt "$sobek" ] && [ "$log" -nt samples/samples.sh ] && [ "$log" -nt "$rom" ]; then 58 | echo " Skipping ($log is fresh according to mtime)..." 59 | else 60 | command time -f " Completed in %E (%Us user / %MkiB max RSS)" \ 61 | "$sobek" -p "$platform" "$rom" "${flags[@]}" > "$log".new 62 | check_log 63 | fi 64 | } 65 | 66 | # All `sample` invocations are in a separate (`.gitignore`d) file. 67 | if ! [ -f samples/samples.sh ]; then 68 | echo "$Serr""Missing samples/samples.sh""$Sreset" 69 | echo " Create \`samples/samples.sh\` and add to it commands like this:" 70 | echo " sample n64 foo.rom" 71 | echo " to have \`sobek n64 samples/n64/foo.rom\` be tested by this script" 72 | exit 1 73 | fi 74 | source samples/samples.sh 75 | -------------------------------------------------------------------------------- /src/platform/n64.rs: -------------------------------------------------------------------------------- 1 | use crate::ir::{BitSize, Const, Cx, MemSize, MemType}; 2 | use crate::isa::mips::{AddrSpace, Mips}; 3 | use crate::isa::Isa; 4 | use crate::platform::{Platform, RawRom, Rom, UnsupportedAddress}; 5 | use std::ops::Deref; 6 | 7 | pub struct Cartridge> { 8 | pub raw: RawRom, 9 | pub base: Const, 10 | } 11 | 12 | impl> Cartridge { 13 | pub fn new(raw: RawRom) -> Self { 14 | let base = raw 15 | .load( 16 | MemType { 17 | addr_size: BitSize::B32, 18 | big_endian: true, 19 | }, 20 | Const::new(BitSize::B32, 8), 21 | MemSize::M32, 22 | ) 23 | .unwrap(); 24 | Cartridge { raw, base } 25 | } 26 | 27 | fn load_physical( 28 | &self, 29 | mem_type: MemType, 30 | addr: Const, 31 | size: MemSize, 32 | ) -> Result { 33 | // FIXME(eddyb) do this only once. 34 | // FIXME(eddyb) use `decode_virtual_addr64` when it becomes available. 35 | let (base_addr_space, base) = { 36 | let virtual_addr64 = self.base.sext(BitSize::B64); 37 | let virtual_addr32 = self.base.trunc(BitSize::B32); 38 | 39 | // FIXME(eddyb) support addresses other than the 32->64 compatibility 40 | // subset (i.e. a sign-extended 32-bit address). 41 | if virtual_addr32.sext(BitSize::B64) != virtual_addr64 { 42 | return Err(UnsupportedAddress(addr)); 43 | } 44 | 45 | Mips::decode_virtual_addr32(virtual_addr32.as_u32()) 46 | }; 47 | assert_eq!(base_addr_space, AddrSpace::Direct { cached: true }); 48 | 49 | match addr.as_u32() { 50 | // TODO(eddyb) make sure this is actually correct now. 51 | addr @ 0..=0x003f_ffff if addr >= base => self.raw.load( 52 | mem_type, 53 | Const::new(BitSize::B32, (0x1000 + (addr - base)) as u64), 54 | size, 55 | ), 56 | _ => Err(UnsupportedAddress(addr)), 57 | } 58 | } 59 | } 60 | 61 | impl> Rom for Cartridge { 62 | fn load( 63 | &self, 64 | mem_type: MemType, 65 | addr: Const, 66 | size: MemSize, 67 | ) -> Result { 68 | let err = UnsupportedAddress(addr); 69 | 70 | // FIXME(eddyb) use `decode_virtual_addr64` when it becomes available. 71 | let (addr_space, addr) = { 72 | let virtual_addr64 = addr.sext(BitSize::B64); 73 | let virtual_addr32 = addr.trunc(BitSize::B32); 74 | 75 | // FIXME(eddyb) support addresses other than the 32->64 compatibility 76 | // subset (i.e. a sign-extended 32-bit address). 77 | if virtual_addr32.sext(BitSize::B64) != virtual_addr64 { 78 | return Err(UnsupportedAddress(addr)); 79 | } 80 | 81 | let (addr_space, addr) = Mips::decode_virtual_addr32(virtual_addr32.as_u32()); 82 | (addr_space, Const::new(BitSize::B32, addr as u64)) 83 | }; 84 | 85 | match addr_space { 86 | AddrSpace::Direct { .. } => self.load_physical(mem_type, addr, size).map_err(|_| err), 87 | AddrSpace::Mapped(_) => Err(err), 88 | } 89 | } 90 | } 91 | 92 | // FIXME(eddyb) this is only different from `SimplePlatform` in providing 93 | // a custom constructor. 94 | pub struct N64> { 95 | pub isa: Mips, 96 | pub rom: Cartridge, 97 | } 98 | 99 | impl> Platform for N64 { 100 | fn isa(&self) -> &dyn Isa { 101 | &self.isa 102 | } 103 | fn rom(&self) -> &dyn Rom { 104 | &self.rom 105 | } 106 | } 107 | 108 | impl> N64 { 109 | pub fn new(cx: &Cx, rom: Cartridge) -> Self { 110 | N64 { 111 | isa: Mips::new( 112 | cx, 113 | BitSize::B64, 114 | MemType { 115 | // HACK(eddyb) this will truncate addresses on access, but 116 | // the correct semantics would be more to require "canonical" 117 | // addresses (i.e. 64-bit sign-extended from 32-bit, so the 118 | // top half is either all 0s or all 1s) - however, that's 119 | // unlikely to be a real option without a robust "assumptions" 120 | // system that assumes `x == sext_64(trunc_32(x))` by default 121 | // while also paying attention to any likely counterexamples. 122 | addr_size: BitSize::B32, 123 | big_endian: true, 124 | }, 125 | ), 126 | rom, 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/ir/context.rs: -------------------------------------------------------------------------------- 1 | use crate::ir::{Global, Node}; 2 | use elsa::FrozenVec; 3 | use std::cell::RefCell; 4 | use std::collections::HashMap; 5 | use std::convert::TryInto; 6 | use std::fmt; 7 | use std::hash::Hash; 8 | use std::rc::Rc; 9 | 10 | pub struct Cx { 11 | interners: Interners, 12 | } 13 | 14 | /// Dispatch helper, to allow implementing interning logic on 15 | /// the type passed to `cx.a(...)`. 16 | pub trait InternInCx { 17 | type Interned; 18 | 19 | fn intern_in_cx(self, cx: &Cx) -> Self::Interned; 20 | } 21 | 22 | impl Cx { 23 | pub fn new() -> Self { 24 | Cx { 25 | interners: Interners::default(), 26 | } 27 | } 28 | 29 | // FIXME(eddyb) rename this to `intern`. 30 | pub fn a(&self, x: T) -> T::Interned { 31 | x.intern_in_cx(self) 32 | } 33 | } 34 | 35 | struct Interner { 36 | // FIXME(Manishearth/elsa#6) switch to `FrozenIndexSet` when available. 37 | map: RefCell, u32>>, 38 | vec: FrozenVec>, 39 | } 40 | 41 | impl Default for Interner { 42 | fn default() -> Self { 43 | Interner { 44 | map: RefCell::new(HashMap::default()), 45 | vec: FrozenVec::new(), 46 | } 47 | } 48 | } 49 | 50 | impl Interner { 51 | fn intern(&self, value: impl AsRef + Into>) -> u32 { 52 | if let Some(&i) = self.map.borrow().get(value.as_ref()) { 53 | return i; 54 | } 55 | let value = value.into(); 56 | let next = self.vec.len().try_into().unwrap(); 57 | self.map.borrow_mut().insert(value.clone(), next); 58 | self.vec.push(value); 59 | next 60 | } 61 | } 62 | 63 | macro_rules! interners { 64 | ($($name:ident => $ty:ty),* $(,)?) => { 65 | #[allow(non_snake_case)] 66 | #[derive(Default)] 67 | struct Interners { 68 | $($name: Interner<$ty>),* 69 | } 70 | 71 | $( 72 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 73 | pub struct $name(u32); 74 | 75 | impl std::ops::Index<$name> for Cx { 76 | type Output = $ty; 77 | 78 | fn index(&self, interned: $name) -> &Self::Output { 79 | &self.interners.$name.vec[interned.0 as usize] 80 | } 81 | } 82 | )* 83 | }; 84 | } 85 | 86 | interners! { 87 | IStr => str, 88 | IGlobal => Global, 89 | INode => Node, 90 | } 91 | 92 | impl InternInCx for &'_ str { 93 | type Interned = IStr; 94 | fn intern_in_cx(self, cx: &Cx) -> IStr { 95 | IStr(cx.interners.IStr.intern(self)) 96 | } 97 | } 98 | 99 | impl fmt::Display for IStr { 100 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 101 | if super::DBG_CX.is_set() { 102 | super::DBG_CX.with(|cx| write!(f, "{}", &cx[*self])) 103 | } else { 104 | write!(f, "str#{:x}", self.0) 105 | } 106 | } 107 | } 108 | 109 | impl fmt::Debug for IStr { 110 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 111 | if super::DBG_CX.is_set() { 112 | super::DBG_CX.with(|cx| write!(f, "{:?}", &cx[*self])) 113 | } else { 114 | write!(f, "str#{:x}", self.0) 115 | } 116 | } 117 | } 118 | 119 | // FIXME(eddyb) automate this away somehow. 120 | impl AsRef for Global { 121 | fn as_ref(&self) -> &Self { 122 | self 123 | } 124 | } 125 | 126 | impl InternInCx for Global { 127 | type Interned = IGlobal; 128 | fn intern_in_cx(self, cx: &Cx) -> IGlobal { 129 | IGlobal(cx.interners.IGlobal.intern(self)) 130 | } 131 | } 132 | 133 | impl fmt::Debug for IGlobal { 134 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 135 | if super::DBG_CX.is_set() { 136 | super::DBG_CX.with(|cx| write!(f, "{:?}", &cx[*self])) 137 | } else { 138 | write!(f, "global#{:x}", self.0) 139 | } 140 | } 141 | } 142 | 143 | // FIXME(eddyb) automate this away somehow. 144 | impl AsRef for Node { 145 | fn as_ref(&self) -> &Self { 146 | self 147 | } 148 | } 149 | 150 | impl InternInCx for Node { 151 | type Interned = INode; 152 | fn intern_in_cx(self, cx: &Cx) -> INode { 153 | match self.normalize_for_interning(cx) { 154 | Ok(x) => INode(cx.interners.INode.intern(x)), 155 | Err(x) => x, 156 | } 157 | } 158 | } 159 | 160 | impl fmt::Debug for INode { 161 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 162 | let local = if super::DBG_LOCALS.is_set() { 163 | super::DBG_LOCALS.with(|locals| locals.get(self).copied()) 164 | } else { 165 | None 166 | }; 167 | match local { 168 | Some((prefix, i)) => write!(f, "{}{}", prefix, i), 169 | None => { 170 | if super::DBG_CX.is_set() { 171 | super::DBG_CX.with(|cx| write!(f, "{:?}", &cx[*self])) 172 | } else { 173 | write!(f, "node#{:x}", self.0) 174 | } 175 | } 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use sobek::explore::Explorer; 2 | use sobek::ir::{Const, Cx}; 3 | use sobek::isa::i8051::I8051; 4 | use sobek::isa::i8080::I8080; 5 | use sobek::isa::mips::Mips; 6 | use sobek::platform::{n64, unix, Platform}; 7 | use sobek::platform::{RawRom, SimplePlatform}; 8 | use std::ops::Range; 9 | use std::path::PathBuf; 10 | use std::sync::atomic::{AtomicBool, Ordering}; 11 | use std::sync::Arc; 12 | 13 | // FIXME(eddyb) better error types. 14 | fn parse_addr(s: &str) -> Result { 15 | if s.starts_with("0x") { 16 | let s = &s[2..]; 17 | // FIXME(eddyb) make this cheaper somehow. 18 | let s = &s.replace('_', ""); 19 | u64::from_str_radix(s, 16).map_err(|e| e.to_string()) 20 | } else { 21 | Err("addresses must start with `0x`".to_string()) 22 | } 23 | } 24 | 25 | // FIXME(eddyb) better error types. 26 | fn parse_addr_range(s: &str) -> Result, String> { 27 | let mut parts = s.split(".."); 28 | match (parts.next(), parts.next(), parts.next()) { 29 | (Some(start), Some(end), None) => Ok(parse_addr(start)?..parse_addr(end)?), 30 | _ => Err("address ranges must be `start..end`".to_string()), 31 | } 32 | } 33 | 34 | #[derive(structopt::StructOpt)] 35 | struct Args { 36 | /// Platform to analyze for. 37 | #[structopt(short, long, name = "PLATFORM")] 38 | platform: Option, 39 | 40 | /// Additional entrypoint. 41 | #[structopt(short, long, name = "ENTRY")] 42 | #[structopt(number_of_values(1), parse(try_from_str = parse_addr))] 43 | entry: Vec, 44 | 45 | /// Memory range to treat as an array. 46 | #[structopt(short, long, name = "ARRAY")] 47 | #[structopt(number_of_values(1), parse(try_from_str = parse_addr_range))] 48 | array: Vec>, 49 | 50 | /// ROM file. 51 | #[structopt(parse(from_os_str), name = "ROM")] 52 | rom: PathBuf, 53 | } 54 | 55 | fn analyze_and_dump(mut args: Args, mk_platform: impl FnOnce(&Cx) -> P) { 56 | let cx = Cx::new(); 57 | let platform = mk_platform(&cx); 58 | 59 | let rom_addr_size = cx[platform.isa().mem_containing_rom()] 60 | .ty 61 | .mem() 62 | .unwrap() 63 | .addr_size; 64 | 65 | let cancel_token = Arc::new(AtomicBool::new(false)); 66 | let ctrcc_result = { 67 | let cancel_token = cancel_token.clone(); 68 | ctrlc::set_handler(move || { 69 | eprintln!(" (Ctrl-C: cancelling...)"); 70 | cancel_token.store(true, Ordering::SeqCst); 71 | }) 72 | }; 73 | match &ctrcc_result { 74 | Ok(()) => eprintln!("Press Ctrl-C at any time to cancel analysis"), 75 | Err(e) => eprintln!("warning: Ctrl-C not handled: {}", e), 76 | } 77 | let cancel_token = ctrcc_result.ok().map(|_| &*cancel_token); 78 | 79 | let mut explorer = Explorer::new(&cx, &platform, cancel_token); 80 | 81 | for array in &args.array { 82 | explorer 83 | .array_len 84 | .insert(array.start, array.end - array.start); 85 | } 86 | 87 | args.entry.sort(); 88 | for &entry_pc in &args.entry { 89 | explorer.explore_bbs(Const::new(rom_addr_size, entry_pc)); 90 | } 91 | 92 | explorer.split_overlapping_bbs(); 93 | 94 | let nester = sobek::nest::Nester::new(&explorer); 95 | 96 | let mut nested_pc = ..Const::new( 97 | rom_addr_size, 98 | explorer.blocks.keys().next().unwrap().entry_pc, 99 | ); 100 | let mut last_end = nested_pc.end.as_u64(); 101 | for (&bb, block) in &explorer.blocks { 102 | // Skip blocks in the last printed PC range, *unless* they overlap the 103 | // previous block (e.g. due to jumps into the middle of an instruction). 104 | if bb.entry_pc >= nested_pc.end.as_u64() || last_end > bb.entry_pc { 105 | println!("{}", nester.nested_block_to_string(bb, &mut nested_pc)); 106 | } 107 | last_end = block.pc.end.as_u64(); 108 | } 109 | } 110 | 111 | #[paw::main] 112 | fn main(mut args: Args) -> std::io::Result<()> { 113 | let rom = RawRom::mmap_file(&args.rom)?; 114 | let rom = RawRom(&rom.0[..]); 115 | 116 | let platform = match &args.platform { 117 | Some(p) => &p[..], 118 | None => panic!("unable auto-detect platform (NYI)"), 119 | }; 120 | match platform { 121 | "8051" => { 122 | args.entry.push(0); 123 | analyze_and_dump(args, |cx| SimplePlatform { 124 | isa: I8051::new(cx), 125 | rom, 126 | }); 127 | } 128 | "8080" => { 129 | args.entry.push(0); 130 | analyze_and_dump(args, |cx| SimplePlatform { 131 | isa: I8080::new(cx), 132 | rom, 133 | }); 134 | } 135 | "gb" => { 136 | args.entry.push(0x100); 137 | args.entry.extend((0..5).map(|i| 0x40 + i * 8)); 138 | analyze_and_dump(args, |cx| SimplePlatform { 139 | isa: I8080::new_lr35902(cx), 140 | rom, 141 | }); 142 | } 143 | "n64" => { 144 | let rom = n64::Cartridge::new(rom); 145 | args.entry.push(rom.base.as_u64()); 146 | analyze_and_dump(args, |cx| n64::N64::new(cx, rom)); 147 | } 148 | "mipsel-linux" => { 149 | // FIXME(eddyb) symbolic load addresses would be ideal here, 150 | // arbitrarily recognizable constant used instead for now. 151 | let exe = unix::Executable::load_at_virtual_addr(rom, 0x7000_0000); 152 | args.entry.push(exe.virtual_entry); 153 | analyze_and_dump(args, |cx| SimplePlatform { 154 | isa: Mips::new_32le(cx), 155 | rom: exe, 156 | }); 157 | } 158 | "mips64el-linux" => { 159 | // FIXME(eddyb) symbolic load addresses would be ideal here, 160 | // arbitrarily recognizable constant used instead for now. 161 | let exe = unix::Executable::load_at_virtual_addr(rom, 0x0070_0000_0000); 162 | args.entry.push(exe.virtual_entry); 163 | analyze_and_dump(args, |cx| SimplePlatform { 164 | isa: Mips::new_64le(cx), 165 | rom: exe, 166 | }); 167 | } 168 | _ => panic!("unsupported platform `{}`", platform), 169 | } 170 | 171 | Ok(()) 172 | } 173 | -------------------------------------------------------------------------------- /src/nest.rs: -------------------------------------------------------------------------------- 1 | use crate::explore::{BlockId, Explorer}; 2 | use crate::ir::{Const, Edges, Visit, Visitor}; 3 | use elsa::FrozenMap; 4 | use itertools::{Either, Itertools}; 5 | use std::collections::{BTreeMap, HashMap, HashSet}; 6 | use std::fmt::{self, Write}; 7 | use std::mem; 8 | use std::ops::RangeTo; 9 | 10 | struct NestedBlock { 11 | pc: RangeTo, 12 | 13 | per_edge_child: [Option; 2], 14 | children: Vec, 15 | static_exits: BTreeMap, 16 | } 17 | 18 | pub struct Nester<'a> { 19 | pub explorer: &'a Explorer<'a>, 20 | ref_counts: HashMap, 21 | function_like: HashSet, 22 | 23 | nested_block_cache: FrozenMap>, 24 | } 25 | 26 | impl<'a> Nester<'a> { 27 | pub fn new(explorer: &'a Explorer<'a>) -> Self { 28 | // HACK(eddyb) only consider blocks to be function entry points if the 29 | // presumed return continuation immediatelly follows the caller block. 30 | let function_like = explorer 31 | .takes_static_continuation 32 | .iter() 33 | .filter(|&(&_, callers)| { 34 | callers.iter().any(|caller_bb| { 35 | explorer 36 | .eventual_static_continuation 37 | .get(&caller_bb) 38 | .map_or(false, |&ret_bb| { 39 | ret_bb == BlockId::from(explorer.blocks[&caller_bb].pc.end) 40 | }) 41 | }) 42 | }) 43 | .map(|(&callee_bb, _)| callee_bb) 44 | .collect(); 45 | 46 | let mut nester = Nester { 47 | explorer, 48 | ref_counts: HashMap::new(), 49 | function_like, 50 | nested_block_cache: FrozenMap::new(), 51 | }; 52 | 53 | let mut refcount_target = |target| { 54 | *nester.ref_counts.entry(target).or_default() += 1; 55 | }; 56 | for &bb in explorer.blocks.keys() { 57 | explorer.get_block_direct_targets(bb).map(|targets, _| { 58 | for target in targets.into_iter() { 59 | refcount_target(target); 60 | } 61 | }); 62 | 63 | if let Some(&target_bb) = explorer.eventual_static_continuation.get(&bb) { 64 | refcount_target(target_bb); 65 | } 66 | } 67 | 68 | nester 69 | } 70 | 71 | fn get_or_compute_nested_block(&self, bb: BlockId) -> &NestedBlock { 72 | if let Some(nested_block) = self.nested_block_cache.get(&bb) { 73 | return nested_block; 74 | } 75 | 76 | let block = &self.explorer.blocks[&bb]; 77 | 78 | let edge_targets = self 79 | .explorer 80 | .get_block_direct_targets(bb) 81 | .map(|targets, br_cond| targets.into_iter().map(move |target| (target, br_cond))); 82 | let edge_targets = match edge_targets { 83 | Edges::One(targets) => Either::Left(targets), 84 | Edges::Branch { t, e, .. } => Either::Right(t.merge(e)), 85 | }; 86 | 87 | let mut pc = block.pc; 88 | let mut children = vec![]; 89 | let mut per_edge_child = [None, None]; 90 | let mut static_exits = BTreeMap::new(); 91 | 92 | for (target, br_cond) in edge_targets { 93 | // Don't nest jumps to targets that look like functions, and don't 94 | // even include them in `static_exits`. 95 | if self.function_like.contains(&target) { 96 | continue; 97 | } 98 | 99 | let next_bb = self 100 | .explorer 101 | .blocks 102 | .range(BlockId::from(pc.end)..) 103 | .map(|(&bb, _)| bb) 104 | .next(); 105 | 106 | if next_bb != Some(target) || self.ref_counts[&target] > 1 { 107 | *static_exits.entry(target).or_default() += 1; 108 | continue; 109 | } 110 | 111 | let child = self.get_or_compute_nested_block(target); 112 | pc.end = child.pc.end; 113 | for (&child_exit, &count) in &child.static_exits { 114 | *static_exits.entry(child_exit).or_default() += count; 115 | } 116 | 117 | if let Some(i) = br_cond { 118 | if per_edge_child[i as usize].is_none() { 119 | per_edge_child[i as usize] = Some(target); 120 | continue; 121 | } 122 | } 123 | 124 | children.push(target); 125 | } 126 | 127 | // Include any targets that could be the return from a call. 128 | if let Some(&target_bb) = self.explorer.eventual_static_continuation.get(&bb) { 129 | if target_bb > bb { 130 | *static_exits.entry(target_bb).or_default() += 1; 131 | } 132 | } 133 | 134 | // Also collect any merges (combined refcounts match total) as children. 135 | // This is done in two steps to allow non-loop jumps backwards within 136 | // a function (e.g. backwards goto, odd codegen, or handwritten asm). 137 | 138 | // Step 1: collect as many merges as possible, ignoring refcounts, 139 | // but accumulating a best-case version of `static_exits`. 140 | let mut merge_pc = pc; 141 | let mut merge_static_exits = static_exits.clone(); 142 | let mut merge_children = vec![]; 143 | while let Some((&next_bb, _)) = self 144 | .explorer 145 | .blocks 146 | .range(BlockId::from(merge_pc.end)..) 147 | .next() 148 | { 149 | // Only stop if we're past the last reachable block. 150 | // This allows some blocks to only be reached through later blocks 151 | // (this is properly checked in the second step). 152 | // FIXME(eddyb) perhaps optimize this? 153 | if merge_static_exits.range(next_bb..).next().is_none() { 154 | break; 155 | } 156 | 157 | // Don't nest exit targets that look like functions. 158 | if self.function_like.contains(&next_bb) { 159 | break; 160 | } 161 | 162 | let child = self.get_or_compute_nested_block(next_bb); 163 | if merge_pc.end == child.pc.end { 164 | // HACK(eddyb) avoid infinite loops with 0-length children. 165 | break; 166 | } 167 | merge_pc.end = child.pc.end; 168 | for (&child_exit, &count) in &child.static_exits { 169 | *merge_static_exits.entry(child_exit).or_default() += count; 170 | } 171 | merge_children.push((next_bb, child)); 172 | } 173 | 174 | // Step 2: truncate `merge_children`, based on `merge_static_exits` 175 | // matching the refcount, and recompute `merge_static_exits` using the 176 | // truncated list. Keep repeating until `merge_children` stops changing. 177 | loop { 178 | let old_merge_children_count = merge_children.len(); 179 | let valid_merge_children = merge_children 180 | .iter() 181 | .take_while(|&(child_bb, _)| match merge_static_exits.get(&child_bb) { 182 | Some(&count) => count >= self.ref_counts.get(&child_bb).copied().unwrap_or(0), 183 | None => false, 184 | }) 185 | .count(); 186 | merge_children.truncate(valid_merge_children); 187 | 188 | if old_merge_children_count == merge_children.len() { 189 | break; 190 | } 191 | 192 | // FIXME(eddyb) perhaps make this less expensive? 193 | merge_static_exits = static_exits.clone(); 194 | for (_, child) in &merge_children { 195 | for (&child_exit, &count) in &child.static_exits { 196 | *merge_static_exits.entry(child_exit).or_default() += count; 197 | } 198 | } 199 | } 200 | 201 | // Combine the above merge children into this nested block. 202 | if let Some((_, child)) = merge_children.last() { 203 | pc.end = child.pc.end; 204 | } 205 | children.extend(merge_children.into_iter().map(|(child_bb, _)| child_bb)); 206 | static_exits = merge_static_exits; 207 | 208 | // Don't include any `children` in `static_exits`. 209 | for &child_bb in &children { 210 | static_exits.remove(&child_bb); 211 | } 212 | 213 | self.nested_block_cache.insert( 214 | bb, 215 | Box::new(NestedBlock { 216 | pc, 217 | per_edge_child, 218 | children, 219 | static_exits, 220 | }), 221 | ) 222 | } 223 | 224 | // FIXME(eddyb) do this without allocating temporary `String`s. 225 | pub fn nested_block_to_string(&self, bb: BlockId, parent_pc: &mut RangeTo) -> String { 226 | struct WithSuffix(T, String); 227 | 228 | impl Visit for WithSuffix { 229 | fn walk(&self, visitor: &mut impl Visitor) { 230 | self.0.visit(visitor); 231 | } 232 | } 233 | 234 | impl fmt::Debug for WithSuffix { 235 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 236 | self.0.fmt(f)?; 237 | f.write_str(&self.1) 238 | } 239 | } 240 | 241 | let nested_block = self.get_or_compute_nested_block(bb); 242 | let block = &self.explorer.blocks[&bb]; 243 | 244 | // HACK(eddyb) sort branch edges if both have children. 245 | let mut edges = block.edges.as_ref(); 246 | let mut per_edge_child = [ 247 | nested_block.per_edge_child[0], 248 | nested_block.per_edge_child[1], 249 | ]; 250 | if let [Some(e_child), Some(t_child)] = &mut per_edge_child { 251 | if t_child > e_child { 252 | if let Edges::Branch { cond, t, e } = &mut edges { 253 | mem::swap(t_child, e_child); 254 | mem::swap(t, e); 255 | *cond = self.explorer.cx.a(!*cond); 256 | } 257 | } 258 | } 259 | 260 | let mut pc = block.pc; 261 | 262 | let edges = edges.map(|e, br_cond| { 263 | let suffix = 264 | br_cond 265 | .and_then(|i| per_edge_child[i as usize]) 266 | .map_or(String::new(), |child| { 267 | format!( 268 | "\n {}", 269 | self.nested_block_to_string(child, &mut pc) 270 | .replace("\n", "\n ") 271 | ) 272 | }); 273 | (&e.state, WithSuffix(&e.effect, suffix)) 274 | }); 275 | 276 | let mut body = self 277 | .explorer 278 | .cx 279 | .pretty_print_with_states_on_edges(edges.as_ref().map(|e, _| (e.0, &e.1))) 280 | .to_string(); 281 | 282 | if !nested_block.children.is_empty() { 283 | // HACK(eddyb) "re-open" the outermost `{...}`, or create it if missing. 284 | if body.starts_with("{\n") && body.ends_with("\n}") { 285 | body.pop(); 286 | } else { 287 | body.insert_str(0, "{\n "); 288 | body += "\n"; 289 | } 290 | 291 | for &child in &nested_block.children { 292 | body += " "; 293 | body += &self 294 | .nested_block_to_string(child, &mut pc) 295 | .replace("\n", "\n "); 296 | body += "\n"; 297 | } 298 | 299 | // Close the outermost `{...}` back up. 300 | body += "}"; 301 | } 302 | 303 | let mut s = String::new(); 304 | 305 | if parent_pc.end.as_u64() < bb.entry_pc { 306 | let _ = writeln!(s, "{:?} {{", parent_pc.end); 307 | let _ = writeln!( 308 | s, 309 | " /* {} unanalyzed bytes */", 310 | bb.entry_pc - parent_pc.end.as_u64() 311 | ); 312 | let _ = writeln!(s, "}}"); 313 | } 314 | parent_pc.end = nested_block.pc.end; 315 | 316 | // FIXME(eddyb) this is wasteful, avoid copying the string around like that. 317 | let _ = write!(s, "{:?} {}", bb, body); 318 | 319 | s 320 | } 321 | } 322 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "ansi_term" 13 | version = "0.12.1" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" 16 | dependencies = [ 17 | "winapi", 18 | ] 19 | 20 | [[package]] 21 | name = "atty" 22 | version = "0.2.14" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 25 | dependencies = [ 26 | "hermit-abi", 27 | "libc", 28 | "winapi", 29 | ] 30 | 31 | [[package]] 32 | name = "autocfg" 33 | version = "1.1.0" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 36 | 37 | [[package]] 38 | name = "bitflags" 39 | version = "1.3.2" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 42 | 43 | [[package]] 44 | name = "cc" 45 | version = "1.0.73" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" 48 | 49 | [[package]] 50 | name = "cfg-if" 51 | version = "0.1.10" 52 | source = "registry+https://github.com/rust-lang/crates.io-index" 53 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" 54 | 55 | [[package]] 56 | name = "cfg-if" 57 | version = "1.0.0" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 60 | 61 | [[package]] 62 | name = "clap" 63 | version = "2.34.0" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" 66 | dependencies = [ 67 | "ansi_term", 68 | "atty", 69 | "bitflags", 70 | "strsim", 71 | "textwrap", 72 | "unicode-width", 73 | "vec_map", 74 | ] 75 | 76 | [[package]] 77 | name = "crc32fast" 78 | version = "1.3.2" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" 81 | dependencies = [ 82 | "cfg-if 1.0.0", 83 | ] 84 | 85 | [[package]] 86 | name = "ctrlc" 87 | version = "3.2.1" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "a19c6cedffdc8c03a3346d723eb20bd85a13362bb96dc2ac000842c6381ec7bf" 90 | dependencies = [ 91 | "nix", 92 | "winapi", 93 | ] 94 | 95 | [[package]] 96 | name = "dirs" 97 | version = "2.0.2" 98 | source = "registry+https://github.com/rust-lang/crates.io-index" 99 | checksum = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" 100 | dependencies = [ 101 | "cfg-if 0.1.10", 102 | "dirs-sys", 103 | ] 104 | 105 | [[package]] 106 | name = "dirs-sys" 107 | version = "0.3.7" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" 110 | dependencies = [ 111 | "libc", 112 | "redox_users", 113 | "winapi", 114 | ] 115 | 116 | [[package]] 117 | name = "either" 118 | version = "1.6.1" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" 121 | 122 | [[package]] 123 | name = "elsa" 124 | version = "1.7.0" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "2b4b5d23ed6b6948d68240aafa4ac98e568c9a020efd9d4201a6288bc3006e09" 127 | dependencies = [ 128 | "stable_deref_trait", 129 | ] 130 | 131 | [[package]] 132 | name = "flate2" 133 | version = "1.0.22" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" 136 | dependencies = [ 137 | "cfg-if 1.0.0", 138 | "crc32fast", 139 | "libc", 140 | "miniz_oxide", 141 | ] 142 | 143 | [[package]] 144 | name = "getrandom" 145 | version = "0.2.6" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" 148 | dependencies = [ 149 | "cfg-if 1.0.0", 150 | "libc", 151 | "wasi", 152 | ] 153 | 154 | [[package]] 155 | name = "heck" 156 | version = "0.3.3" 157 | source = "registry+https://github.com/rust-lang/crates.io-index" 158 | checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" 159 | dependencies = [ 160 | "unicode-segmentation", 161 | ] 162 | 163 | [[package]] 164 | name = "hermit-abi" 165 | version = "0.1.19" 166 | source = "registry+https://github.com/rust-lang/crates.io-index" 167 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 168 | dependencies = [ 169 | "libc", 170 | ] 171 | 172 | [[package]] 173 | name = "itertools" 174 | version = "0.8.2" 175 | source = "registry+https://github.com/rust-lang/crates.io-index" 176 | checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" 177 | dependencies = [ 178 | "either", 179 | ] 180 | 181 | [[package]] 182 | name = "lazy_static" 183 | version = "1.4.0" 184 | source = "registry+https://github.com/rust-lang/crates.io-index" 185 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 186 | 187 | [[package]] 188 | name = "libc" 189 | version = "0.2.122" 190 | source = "registry+https://github.com/rust-lang/crates.io-index" 191 | checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259" 192 | 193 | [[package]] 194 | name = "memchr" 195 | version = "2.4.1" 196 | source = "registry+https://github.com/rust-lang/crates.io-index" 197 | checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" 198 | 199 | [[package]] 200 | name = "memmap2" 201 | version = "0.5.3" 202 | source = "registry+https://github.com/rust-lang/crates.io-index" 203 | checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" 204 | dependencies = [ 205 | "libc", 206 | ] 207 | 208 | [[package]] 209 | name = "memoffset" 210 | version = "0.6.5" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" 213 | dependencies = [ 214 | "autocfg", 215 | ] 216 | 217 | [[package]] 218 | name = "miniz_oxide" 219 | version = "0.4.4" 220 | source = "registry+https://github.com/rust-lang/crates.io-index" 221 | checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" 222 | dependencies = [ 223 | "adler", 224 | "autocfg", 225 | ] 226 | 227 | [[package]] 228 | name = "nix" 229 | version = "0.23.1" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6" 232 | dependencies = [ 233 | "bitflags", 234 | "cc", 235 | "cfg-if 1.0.0", 236 | "libc", 237 | "memoffset", 238 | ] 239 | 240 | [[package]] 241 | name = "object" 242 | version = "0.28.3" 243 | source = "registry+https://github.com/rust-lang/crates.io-index" 244 | checksum = "40bec70ba014595f99f7aa110b84331ffe1ee9aece7fe6f387cc7e3ecda4d456" 245 | dependencies = [ 246 | "flate2", 247 | "memchr", 248 | ] 249 | 250 | [[package]] 251 | name = "paw" 252 | version = "1.0.0" 253 | source = "registry+https://github.com/rust-lang/crates.io-index" 254 | checksum = "09c0fc9b564dbc3dc2ed7c92c0c144f4de340aa94514ce2b446065417c4084e9" 255 | dependencies = [ 256 | "paw-attributes", 257 | "paw-raw", 258 | ] 259 | 260 | [[package]] 261 | name = "paw-attributes" 262 | version = "1.0.2" 263 | source = "registry+https://github.com/rust-lang/crates.io-index" 264 | checksum = "0f35583365be5d148e959284f42526841917b7bfa09e2d1a7ad5dde2cf0eaa39" 265 | dependencies = [ 266 | "proc-macro2", 267 | "quote", 268 | "syn", 269 | ] 270 | 271 | [[package]] 272 | name = "paw-raw" 273 | version = "1.0.0" 274 | source = "registry+https://github.com/rust-lang/crates.io-index" 275 | checksum = "7f0b59668fe80c5afe998f0c0bf93322bf2cd66cafeeb80581f291716f3467f2" 276 | 277 | [[package]] 278 | name = "proc-macro-error" 279 | version = "1.0.4" 280 | source = "registry+https://github.com/rust-lang/crates.io-index" 281 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 282 | dependencies = [ 283 | "proc-macro-error-attr", 284 | "proc-macro2", 285 | "quote", 286 | "syn", 287 | "version_check", 288 | ] 289 | 290 | [[package]] 291 | name = "proc-macro-error-attr" 292 | version = "1.0.4" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 295 | dependencies = [ 296 | "proc-macro2", 297 | "quote", 298 | "version_check", 299 | ] 300 | 301 | [[package]] 302 | name = "proc-macro2" 303 | version = "1.0.37" 304 | source = "registry+https://github.com/rust-lang/crates.io-index" 305 | checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1" 306 | dependencies = [ 307 | "unicode-xid", 308 | ] 309 | 310 | [[package]] 311 | name = "quote" 312 | version = "1.0.17" 313 | source = "registry+https://github.com/rust-lang/crates.io-index" 314 | checksum = "632d02bff7f874a36f33ea8bb416cd484b90cc66c1194b1a1110d067a7013f58" 315 | dependencies = [ 316 | "proc-macro2", 317 | ] 318 | 319 | [[package]] 320 | name = "redox_syscall" 321 | version = "0.2.13" 322 | source = "registry+https://github.com/rust-lang/crates.io-index" 323 | checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" 324 | dependencies = [ 325 | "bitflags", 326 | ] 327 | 328 | [[package]] 329 | name = "redox_users" 330 | version = "0.4.3" 331 | source = "registry+https://github.com/rust-lang/crates.io-index" 332 | checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" 333 | dependencies = [ 334 | "getrandom", 335 | "redox_syscall", 336 | "thiserror", 337 | ] 338 | 339 | [[package]] 340 | name = "scoped-tls" 341 | version = "0.1.2" 342 | source = "registry+https://github.com/rust-lang/crates.io-index" 343 | checksum = "332ffa32bf586782a3efaeb58f127980944bbc8c4d6913a86107ac2a5ab24b28" 344 | 345 | [[package]] 346 | name = "smallvec" 347 | version = "1.8.0" 348 | source = "registry+https://github.com/rust-lang/crates.io-index" 349 | checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" 350 | 351 | [[package]] 352 | name = "sobek" 353 | version = "0.0.1" 354 | dependencies = [ 355 | "ctrlc", 356 | "elsa", 357 | "itertools", 358 | "memmap2", 359 | "object", 360 | "paw", 361 | "scoped-tls", 362 | "smallvec", 363 | "structopt", 364 | "term", 365 | ] 366 | 367 | [[package]] 368 | name = "stable_deref_trait" 369 | version = "1.2.0" 370 | source = "registry+https://github.com/rust-lang/crates.io-index" 371 | checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" 372 | 373 | [[package]] 374 | name = "strsim" 375 | version = "0.8.0" 376 | source = "registry+https://github.com/rust-lang/crates.io-index" 377 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" 378 | 379 | [[package]] 380 | name = "structopt" 381 | version = "0.3.26" 382 | source = "registry+https://github.com/rust-lang/crates.io-index" 383 | checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" 384 | dependencies = [ 385 | "clap", 386 | "lazy_static", 387 | "paw", 388 | "structopt-derive", 389 | ] 390 | 391 | [[package]] 392 | name = "structopt-derive" 393 | version = "0.4.18" 394 | source = "registry+https://github.com/rust-lang/crates.io-index" 395 | checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" 396 | dependencies = [ 397 | "heck", 398 | "proc-macro-error", 399 | "proc-macro2", 400 | "quote", 401 | "syn", 402 | ] 403 | 404 | [[package]] 405 | name = "syn" 406 | version = "1.0.91" 407 | source = "registry+https://github.com/rust-lang/crates.io-index" 408 | checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d" 409 | dependencies = [ 410 | "proc-macro2", 411 | "quote", 412 | "unicode-xid", 413 | ] 414 | 415 | [[package]] 416 | name = "term" 417 | version = "0.6.1" 418 | source = "registry+https://github.com/rust-lang/crates.io-index" 419 | checksum = "c0863a3345e70f61d613eab32ee046ccd1bcc5f9105fe402c61fcd0c13eeb8b5" 420 | dependencies = [ 421 | "dirs", 422 | "winapi", 423 | ] 424 | 425 | [[package]] 426 | name = "textwrap" 427 | version = "0.11.0" 428 | source = "registry+https://github.com/rust-lang/crates.io-index" 429 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 430 | dependencies = [ 431 | "unicode-width", 432 | ] 433 | 434 | [[package]] 435 | name = "thiserror" 436 | version = "1.0.30" 437 | source = "registry+https://github.com/rust-lang/crates.io-index" 438 | checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" 439 | dependencies = [ 440 | "thiserror-impl", 441 | ] 442 | 443 | [[package]] 444 | name = "thiserror-impl" 445 | version = "1.0.30" 446 | source = "registry+https://github.com/rust-lang/crates.io-index" 447 | checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" 448 | dependencies = [ 449 | "proc-macro2", 450 | "quote", 451 | "syn", 452 | ] 453 | 454 | [[package]] 455 | name = "unicode-segmentation" 456 | version = "1.9.0" 457 | source = "registry+https://github.com/rust-lang/crates.io-index" 458 | checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" 459 | 460 | [[package]] 461 | name = "unicode-width" 462 | version = "0.1.9" 463 | source = "registry+https://github.com/rust-lang/crates.io-index" 464 | checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" 465 | 466 | [[package]] 467 | name = "unicode-xid" 468 | version = "0.2.2" 469 | source = "registry+https://github.com/rust-lang/crates.io-index" 470 | checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" 471 | 472 | [[package]] 473 | name = "vec_map" 474 | version = "0.8.2" 475 | source = "registry+https://github.com/rust-lang/crates.io-index" 476 | checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" 477 | 478 | [[package]] 479 | name = "version_check" 480 | version = "0.9.4" 481 | source = "registry+https://github.com/rust-lang/crates.io-index" 482 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 483 | 484 | [[package]] 485 | name = "wasi" 486 | version = "0.10.2+wasi-snapshot-preview1" 487 | source = "registry+https://github.com/rust-lang/crates.io-index" 488 | checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" 489 | 490 | [[package]] 491 | name = "winapi" 492 | version = "0.3.9" 493 | source = "registry+https://github.com/rust-lang/crates.io-index" 494 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 495 | dependencies = [ 496 | "winapi-i686-pc-windows-gnu", 497 | "winapi-x86_64-pc-windows-gnu", 498 | ] 499 | 500 | [[package]] 501 | name = "winapi-i686-pc-windows-gnu" 502 | version = "0.4.0" 503 | source = "registry+https://github.com/rust-lang/crates.io-index" 504 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 505 | 506 | [[package]] 507 | name = "winapi-x86_64-pc-windows-gnu" 508 | version = "0.4.0" 509 | source = "registry+https://github.com/rust-lang/crates.io-index" 510 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 511 | -------------------------------------------------------------------------------- /src/ir/build.rs: -------------------------------------------------------------------------------- 1 | //! Builder abstraction allowing the use of overloaded operators and 2 | //! inherent methods to build IR subgraphs more conveniently. 3 | 4 | use crate::ir::{BitSize, Const, Cx, INode, IntOp, InternInCx, MemRef, Node, Type}; 5 | 6 | /// Newtype to simplify operator overloading by centralizing blanket impls. 7 | /// 8 | /// In general, `impl Add for T` doesn't pass coherence even 9 | /// when `Foo`/`Bar` are only implemented for a fixed set of type constructors, 10 | /// so instead one of these two approaches is needed: 11 | /// * expand the `impl` for every shape of `T`/`U` that it could possibly fit, 12 | /// as to not need the `Foo`/`Bar` bounds, at the cost of needing many `impl`s 13 | /// * use `impl Add for Builder` instead, at the const of 14 | /// additional wrapping/unwrapping (which may be abstracted away sometimes) 15 | /// 16 | /// The second approach is used here, hence this newtype. 17 | #[derive(Copy, Clone)] 18 | pub struct Builder(T); 19 | 20 | impl InternInCx for Builder 21 | where 22 | Self: Build, 23 | { 24 | type Interned = INode; 25 | fn intern_in_cx(self, cx: &Cx) -> INode { 26 | Cx::built_to_interned(cx, self.build(cx)) 27 | } 28 | } 29 | 30 | pub trait Build: Sized { 31 | fn build(self, cx: &C) -> C::Built; 32 | } 33 | 34 | pub trait BuildCx: Sized { 35 | type Built: Copy; 36 | fn built_from_const(c: Const) -> Self::Built; 37 | fn intern_to_built(&self, node: Node) -> Self::Built; 38 | fn built_to_interned(&self, built: Self::Built) -> INode; 39 | fn built_ty(&self, built: Self::Built) -> Type; 40 | fn built_as_const(&self, built: Self::Built) -> Option; 41 | } 42 | 43 | impl Build for Const { 44 | #[inline(always)] 45 | fn build(self, _cx: &C) -> C::Built { 46 | C::built_from_const(self) 47 | } 48 | } 49 | 50 | impl BuildCx for Cx { 51 | type Built = Result; 52 | 53 | #[inline(always)] 54 | fn built_from_const(c: Const) -> Self::Built { 55 | Ok(c) 56 | } 57 | 58 | #[inline] 59 | fn intern_to_built(&self, node: Node) -> Self::Built { 60 | Err(self.a(node)) 61 | } 62 | 63 | #[inline] 64 | fn built_to_interned(&self, built: Self::Built) -> INode { 65 | match built { 66 | Ok(c) => self.a(Node::Const(c)), 67 | Err(x) => x, 68 | } 69 | } 70 | 71 | #[inline] 72 | fn built_ty(&self, built: Self::Built) -> Type { 73 | match built { 74 | Ok(c) => Node::Const(c).ty(self), 75 | Err(x) => self[x].ty(self), 76 | } 77 | } 78 | 79 | #[inline] 80 | fn built_as_const(&self, built: Self::Built) -> Option { 81 | match built { 82 | Ok(c) => Some(c), 83 | Err(x) => self[x].as_const(), 84 | } 85 | } 86 | } 87 | 88 | impl Build for INode { 89 | #[inline(always)] 90 | fn build(self, _cx: &Cx) -> ::Built { 91 | Err(self) 92 | } 93 | } 94 | 95 | // HACK(eddyb) rather than declaring everything with configurable macros, these 96 | // "higher-order macros" effectively describe lists that can be then "iterated" 97 | // (they invoke the macro name `$m` they get passed in, for each list item). 98 | macro_rules! with_binops { 99 | ($m:ident!($($prefix:tt)*)) => { 100 | // These map to a single `Node::Int(IntOp::Foo, ...)` each. 101 | // (`[failible]` is used to indicate `Const` cannot be supported because 102 | // for some values the operation can fail to evaluate, e.g. `x / 0`) 103 | $m!($($prefix)* Add::add = IntOp::Add); 104 | $m!($($prefix)* Mul::mul = IntOp::Mul); 105 | $m!($($prefix)* [failible] DivS::div_s = IntOp::DivS); 106 | $m!($($prefix)* [failible] DivU::div_u = IntOp::DivU); 107 | $m!($($prefix)* [failible] RemS::rem_s = IntOp::RemS); 108 | $m!($($prefix)* [failible] RemU::rem_u = IntOp::RemU); 109 | $m!($($prefix)* CmpEq::cmp_eq = IntOp::Eq); 110 | $m!($($prefix)* CmpLtS::cmp_lt_s = IntOp::LtS); 111 | $m!($($prefix)* CmpLtU::cmp_lt_u = IntOp::LtU); 112 | $m!($($prefix)* BitAnd::bitand = IntOp::And); 113 | $m!($($prefix)* BitOr::bitor = IntOp::Or); 114 | $m!($($prefix)* BitXor::bitxor = IntOp::Xor); 115 | $m!($($prefix)* Shl::shl = IntOp::Shl); 116 | $m!($($prefix)* ShrS::shr_s = IntOp::ShrS); 117 | $m!($($prefix)* ShrU::shr_u = IntOp::ShrU); 118 | 119 | // These are more "artificial". 120 | $m!($($prefix)* Sub::sub); 121 | // FIXME(eddyb) these names are a bit too short maybe? 122 | $m!($($prefix)* Rol::rol); 123 | $m!($($prefix)* Ror::ror); 124 | }; 125 | } 126 | macro_rules! with_binops_needing_inherent_proxy { 127 | ($m:ident!($($prefix:tt)*)) => { 128 | // These aren't actually part of the Rust language, and need inherent methods. 129 | $m!($($prefix)* DivS::div_s); 130 | $m!($($prefix)* DivU::div_u); 131 | $m!($($prefix)* RemS::rem_s); 132 | $m!($($prefix)* RemU::rem_u); 133 | $m!($($prefix)* CmpEq::cmp_eq); 134 | $m!($($prefix)* CmpLtS::cmp_lt_s); 135 | $m!($($prefix)* CmpLtU::cmp_lt_u); 136 | $m!($($prefix)* ShrS::shr_s); 137 | $m!($($prefix)* ShrU::shr_u); 138 | $m!($($prefix)* Rol::rol); 139 | $m!($($prefix)* Ror::ror); 140 | }; 141 | } 142 | macro_rules! with_binops_shiftlike { 143 | ($m:ident!($($prefix:tt)*)) => { 144 | // These are "shift-like", i.e. their RHS is a bit count. 145 | $m!($($prefix)* Shl::shl); 146 | $m!($($prefix)* ShrS::shr_s); 147 | $m!($($prefix)* ShrU::shr_u); 148 | $m!($($prefix)* Rol::rol); 149 | $m!($($prefix)* Ror::ror); 150 | }; 151 | } 152 | macro_rules! with_unops { 153 | ($m:ident!($($prefix:tt)*)) => { 154 | $m!($($prefix)* Neg::neg); 155 | $m!($($prefix)* Not::not); 156 | } 157 | } 158 | macro_rules! with_cast_ops { 159 | ($m:ident!($($prefix:tt)*)) => { 160 | $m!($($prefix)* Trunc => trunc); 161 | $m!($($prefix)* Sext => sext); 162 | $m!($($prefix)* Zext => zext); 163 | }; 164 | } 165 | 166 | /// Marker types used in parameterizing `Builder` (to specify an operation). 167 | mod tag { 168 | use crate::ir::IntOp; 169 | 170 | macro_rules! decl_binop { 171 | ($([failible])? $Trait:ident :: $method:ident $(= $int_op:expr)?) => { 172 | pub struct $Trait; 173 | $(impl Into for $Trait { 174 | #[inline(always)] 175 | fn into(self) -> IntOp { 176 | $int_op 177 | } 178 | })? 179 | }; 180 | } 181 | 182 | macro_rules! decl_unop { 183 | ($Trait:ident :: $method:ident) => { 184 | pub struct $Trait; 185 | }; 186 | } 187 | macro_rules! decl_cast_op { 188 | ($Op:ident => $method:ident) => { 189 | pub struct $Op; 190 | }; 191 | } 192 | with_binops!(decl_binop!()); 193 | with_unops!(decl_unop!()); 194 | with_cast_ops!(decl_cast_op!()); 195 | 196 | // HACK(eddyb) manual for now (not macro-integrated). 197 | pub struct Load; 198 | pub struct Store; 199 | } 200 | 201 | /// `std::ops` traits, and additional ones, all used for multi-dispatch. 202 | mod ops { 203 | pub use std::ops::{Add, BitAnd, BitOr, BitXor, Mul, Neg, Not, Shl, Sub}; 204 | 205 | macro_rules! decl_faux_binop { 206 | ($Trait:ident :: $method:ident) => { 207 | pub trait $Trait { 208 | type Output; 209 | fn $method(self, other: Other) -> Self::Output; 210 | } 211 | }; 212 | } 213 | with_binops_needing_inherent_proxy!(decl_faux_binop!()); 214 | } 215 | 216 | /// Marker trait used by any `Builder`-producing operators/methods, for params 217 | /// not limited by coherence (e.g. the RHS type of `std::ops` binops - the LHS 218 | /// in that case has to be a local type like `Builder<_>` / `INode`, instead). 219 | pub trait OpInput {} 220 | 221 | /// Like `OpInput`, but excluding `Const`, to avoid covering the case where both 222 | /// operands are `Const` (which is separately implemented to return `Const`). 223 | pub trait OpInputNoConst {} 224 | 225 | macro_rules! impl_op_input { 226 | ( 227 | OpInputTypes { 228 | $($In:ident $(<$($InG:ident),+>)?),+ $(,)? 229 | } 230 | OpInputNoConstTypes { 231 | $($InNoConst:ident $(<$($InNoConstG:ident),+>)?),+ $(,)? 232 | } 233 | ) => { 234 | $(impl<$($($InG,)+)?> OpInput for $In<$($($InG,)+)?> {})+ 235 | $(impl<$($($InNoConstG,)+)?> OpInputNoConst for $InNoConst<$($($InNoConstG,)+)?> {})+ 236 | }; 237 | } 238 | 239 | macro_rules! impl_binop { 240 | ( 241 | OpInputTypes { 242 | $($In:ident $(<$($InG:ident),+>)?),+ $(,)? 243 | } 244 | OpInputNoConstTypes { 245 | $($InNoConst:ident $(<$($InNoConstG:ident),+>)?),+ $(,)? 246 | } 247 | 248 | $([failible])? $Trait:ident :: $method:ident $(= $int_op:expr)? 249 | ) => { 250 | // All `(any OpInput, any OpInput)` combinations except `(Const, _)`. 251 | $(impl<$($($InNoConstG,)+)? Other: OpInput> 252 | ops::$Trait for $InNoConst<$($($InNoConstG,)+)?> 253 | { 254 | type Output = Builder<(tag::$Trait, Self, Other)>; 255 | #[inline(always)] 256 | fn $method(self, other: Other) -> Self::Output { 257 | Builder((tag::$Trait, self, other)) 258 | } 259 | })+ 260 | 261 | // All `(Const, any OpInput)` combinations except `(Const, Const)`. 262 | impl ops::$Trait for Const { 263 | type Output = Builder<(tag::$Trait, Self, Other)>; 264 | #[inline(always)] 265 | fn $method(self, other: Other) -> Self::Output { 266 | Builder((tag::$Trait, self, other)) 267 | } 268 | } 269 | 270 | // The remaining `(Const, Const)` combination of operands is handled 271 | // elsewhere (see `impl_const_binop`). 272 | }; 273 | } 274 | macro_rules! impl_binop_inherent_proxy { 275 | ( 276 | OpInputTypes { 277 | $($In:ident $(<$($InG:ident),+>)?),+ $(,)? 278 | } 279 | OpInputNoConstTypes { 280 | $($InNoConst:ident $(<$($InNoConstG:ident),+>)?),+ $(,)? 281 | } 282 | 283 | $Trait:ident :: $method:ident 284 | ) => { 285 | $(impl<$($($InG,)+)?> $In<$($($InG,)+)?> { 286 | #[inline(always)] 287 | pub fn $method( 288 | self, 289 | other: Other, 290 | ) -> >::Output 291 | where Self: ops::$Trait 292 | { 293 | ops::$Trait::$method(self, other) 294 | } 295 | })+ 296 | }; 297 | } 298 | macro_rules! impl_binop_shiftlike { 299 | ( 300 | OpInputTypes { 301 | $($In:ident $(<$($InG:ident),+>)?),+ $(,)? 302 | } 303 | OpInputNoConstTypes { 304 | $($InNoConst:ident $(<$($InNoConstG:ident),+>)?),+ $(,)? 305 | } 306 | 307 | $Trait:ident :: $method:ident 308 | ) => { 309 | // FIXME(eddyb) more than one such impl makes integer literal inference 310 | // ambiguous, but it also can't be generic because it'd conflict. 311 | $(impl<$($($InG,)+)?> ops::$Trait for $In<$($($InG,)+)?> 312 | where Self: ops::$Trait 313 | { 314 | type Output = >::Output; 315 | #[inline(always)] 316 | fn $method(self, amount: u32) -> Self::Output { 317 | ops::$Trait::$method(self, Const::new(BitSize::B8, amount as u64)) 318 | } 319 | })+ 320 | }; 321 | } 322 | macro_rules! impl_unop { 323 | ( 324 | OpInputTypes { 325 | $($In:ident $(<$($InG:ident),+>)?),+ $(,)? 326 | } 327 | OpInputNoConstTypes { 328 | $($InNoConst:ident $(<$($InNoConstG:ident),+>)?),+ $(,)? 329 | } 330 | 331 | $Trait:ident :: $method:ident 332 | ) => { 333 | $(impl<$($($InG,)+)?> ops::$Trait for $In<$($($InG,)+)?> { 334 | type Output = Builder<(tag::$Trait, Self)>; 335 | #[inline(always)] 336 | fn $method(self) -> Self::Output { 337 | Builder((tag::$Trait, self)) 338 | } 339 | })+ 340 | }; 341 | } 342 | macro_rules! impl_cast_op { 343 | ( 344 | OpInputTypes { 345 | $($In:ident $(<$($InG:ident),+>)?),+ $(,)? 346 | } 347 | OpInputNoConstTypes { 348 | $($InNoConst:ident $(<$($InNoConstG:ident),+>)?),+ $(,)? 349 | } 350 | 351 | $Op:ident => $method:ident 352 | ) => { 353 | $(impl<$($($InNoConstG,)+)?> $InNoConst<$($($InNoConstG,)+)?> { 354 | #[inline(always)] 355 | pub fn $method(self, size: BitSize) -> Builder<(tag::$Op, BitSize, Self)> { 356 | Builder((tag::$Op, size, self)) 357 | } 358 | })+ 359 | 360 | impl> Build for Builder<(tag::$Op, BitSize, T)> { 361 | #[inline] 362 | fn build(self, cx: &C) -> C::Built { 363 | let Builder((tag::$Op, size, x)) = self; 364 | let x = x.build(cx); 365 | 366 | let x_size = cx.built_ty(x).bit_size().unwrap(); 367 | 368 | // FIXME(eddyb) this duplicates some `Node::normalize_for_interning` simplification. 369 | 370 | // Simplify noops. 371 | if size == x_size { 372 | return x; 373 | } 374 | 375 | if let Some(c) = cx.built_as_const(x) { 376 | return C::built_from_const(c.$method(size)); 377 | } 378 | 379 | cx.intern_to_built(Node::$Op(size, cx.built_to_interned(x))) 380 | } 381 | } 382 | }; 383 | } 384 | macro_rules! impl_all { 385 | ($($globals:tt)*) => { 386 | impl_op_input!($($globals)*); 387 | with_binops!(impl_binop!($($globals)*)); 388 | with_binops_needing_inherent_proxy!(impl_binop_inherent_proxy!($($globals)*)); 389 | with_binops_shiftlike!(impl_binop_shiftlike!($($globals)*)); 390 | with_unops!(impl_unop!($($globals)*)); 391 | with_cast_ops!(impl_cast_op!($($globals)*)); 392 | } 393 | } 394 | impl_all! { 395 | OpInputTypes { 396 | Builder, 397 | INode, 398 | Const, 399 | } 400 | OpInputNoConstTypes { 401 | Builder, 402 | INode, 403 | } 404 | } 405 | 406 | impl, A: Build, B: Build> Build for Builder<(O, A, B)> { 407 | #[inline] 408 | fn build(self, cx: &C) -> C::Built { 409 | let Builder((op, a, b)) = self; 410 | let op = op.into(); 411 | let (a, b) = (a.build(cx), b.build(cx)); 412 | 413 | // FIXME(eddyb) this duplicates some `Node::normalize_for_interning` simplification. 414 | let c_a = cx.built_as_const(a); 415 | let c_b = cx.built_as_const(b); 416 | if let Some(r) = op.simplify(c_a.ok_or(a), c_b.ok_or(b)) { 417 | return r.map(C::built_from_const).unwrap_or_else(|x| x); 418 | } 419 | 420 | let size = cx.built_ty(a).bit_size().unwrap(); 421 | 422 | cx.intern_to_built(Node::Int( 423 | op, 424 | size, 425 | cx.built_to_interned(a), 426 | cx.built_to_interned(b), 427 | )) 428 | } 429 | } 430 | 431 | // HACK(eddyb) this allows custom impls to reinput `C::Built` into operators. 432 | #[derive(Copy, Clone)] 433 | struct FromBuilt; 434 | impl Build for Builder<(FromBuilt, C::Built)> { 435 | #[inline(always)] 436 | fn build(self, _cx: &C) -> C::Built { 437 | let Builder((FromBuilt, x)) = self; 438 | x 439 | } 440 | } 441 | fn from_built(x: T) -> Builder<(FromBuilt, T)> { 442 | Builder((FromBuilt, x)) 443 | } 444 | 445 | // FIXME(eddyb) this could theoretically be evaluated when `Sub::sub` gets called, 446 | // and have it return `(Add, A, (Neg, B))` then. 447 | impl, B: Build> Build for Builder<(tag::Sub, A, B)> { 448 | #[inline] 449 | fn build(self, cx: &C) -> C::Built { 450 | let Builder((tag::Sub, a, b)) = self; 451 | let (a, b) = (a.build(cx), b.build(cx)); 452 | 453 | let (a, b) = (from_built(a), from_built(b)); 454 | (a + (-b)).build(cx) 455 | } 456 | } 457 | 458 | impl, B: Build> Build for Builder<(tag::Rol, A, B)> { 459 | #[inline] 460 | fn build(self, cx: &C) -> C::Built { 461 | let Builder((tag::Rol, v, n)) = self; 462 | let (v, n) = (v.build(cx), n.build(cx)); 463 | 464 | let v_size = cx.built_ty(v).bit_size().unwrap(); 465 | let n_size = cx.built_ty(n).bit_size().unwrap(); 466 | 467 | let (v, n) = (from_built(v), from_built(n)); 468 | ((v << n) | v.shr_u(Const::new(n_size, v_size.bits() as u64) - n)).build(cx) 469 | } 470 | } 471 | 472 | impl, B: Build> Build for Builder<(tag::Ror, A, B)> { 473 | #[inline] 474 | fn build(self, cx: &C) -> C::Built { 475 | let Builder((tag::Ror, v, n)) = self; 476 | let (v, n) = (v.build(cx), n.build(cx)); 477 | 478 | let v_size = cx.built_ty(v).bit_size().unwrap(); 479 | let n_size = cx.built_ty(n).bit_size().unwrap(); 480 | 481 | let (v, n) = (from_built(v), from_built(n)); 482 | (v.shr_u(n) | (v << (Const::new(n_size, v_size.bits() as u64) - n))).build(cx) 483 | } 484 | } 485 | 486 | impl> Build for Builder<(tag::Neg, T)> { 487 | #[inline] 488 | fn build(self, cx: &C) -> C::Built { 489 | let Builder((tag::Neg, x)) = self; 490 | let x = x.build(cx); 491 | 492 | let size = cx.built_ty(x).bit_size().unwrap(); 493 | let minus_1 = Const::new(size, size.mask()); 494 | 495 | let x = from_built(x); 496 | (x * minus_1).build(cx) 497 | } 498 | } 499 | 500 | impl> Build for Builder<(tag::Not, T)> { 501 | #[inline] 502 | fn build(self, cx: &C) -> C::Built { 503 | let Builder((tag::Not, x)) = self; 504 | let x = x.build(cx); 505 | 506 | let size = cx.built_ty(x).bit_size().unwrap(); 507 | let all_ones = Const::new(size, size.mask()); 508 | 509 | let x = from_built(x); 510 | (x ^ all_ones).build(cx) 511 | } 512 | } 513 | 514 | impl MemRef { 515 | #[inline(always)] 516 | pub fn load(self) -> Builder<(tag::Load, Self)> { 517 | Builder((tag::Load, self)) 518 | } 519 | 520 | #[inline(always)] 521 | pub fn store(self, v: V) -> Builder<(tag::Store, Self, V)> { 522 | Builder((tag::Store, self, v)) 523 | } 524 | } 525 | 526 | impl> Build for Builder<(tag::Load, MemRef)> { 527 | #[inline] 528 | fn build(self, cx: &Cx) -> ::Built { 529 | let Builder((tag::Load, r)) = self; 530 | let r = r.map_addr(|a| a.build(cx)); 531 | 532 | cx.intern_to_built(Node::Load(r.map_addr(|a| cx.built_to_interned(a)))) 533 | } 534 | } 535 | impl, V: Build> Build for Builder<(tag::Store, MemRef, V)> { 536 | #[inline] 537 | fn build(self, cx: &Cx) -> ::Built { 538 | let Builder((tag::Store, r, v)) = self; 539 | let (r, v) = (r.map_addr(|a| a.build(cx)), v.build(cx)); 540 | 541 | cx.intern_to_built(Node::Store( 542 | r.map_addr(|a| cx.built_to_interned(a)), 543 | cx.built_to_interned(v), 544 | )) 545 | } 546 | } 547 | 548 | // `Const` overloaded ops. 549 | 550 | /// `BuildCx` implementor that only supports `Const`s. 551 | struct ConstCx; 552 | 553 | impl BuildCx for ConstCx { 554 | type Built = Const; 555 | 556 | #[inline(always)] 557 | fn built_from_const(c: Const) -> Self::Built { 558 | c 559 | } 560 | 561 | #[inline] 562 | #[track_caller] 563 | fn intern_to_built(&self, _: Node) -> Self::Built { 564 | unreachable!("`Const` operator overloading shouldn't need interning") 565 | } 566 | 567 | #[inline] 568 | #[track_caller] 569 | fn built_to_interned(&self, _: Self::Built) -> INode { 570 | unreachable!("`Const` operator overloading shouldn't need interning") 571 | } 572 | 573 | #[inline(always)] 574 | fn built_ty(&self, built: Self::Built) -> Type { 575 | Type::Bits(built.size) 576 | } 577 | 578 | #[inline(always)] 579 | fn built_as_const(&self, built: Self::Built) -> Option { 580 | Some(built) 581 | } 582 | } 583 | 584 | macro_rules! impl_const_binop { 585 | ($Trait:ident :: $method:ident $(= $int_op:expr)?) => { 586 | impl ops::$Trait for Const { 587 | type Output = Const; 588 | #[inline] 589 | fn $method(self, other: Const) -> Const { 590 | Builder((tag::$Trait, self, other)).build(&ConstCx) 591 | } 592 | } 593 | }; 594 | ([failible] $Trait:ident :: $method:ident $(= $int_op:expr)?) => { 595 | // Ignore `{Div,Rem}{U,S}`, as they can fail for specific values, 596 | // e.g. `x / 0`. 597 | }; 598 | } 599 | with_binops!(impl_const_binop!()); 600 | -------------------------------------------------------------------------------- /src/isa/i8051.rs: -------------------------------------------------------------------------------- 1 | use crate::ir::{ 2 | BitSize::*, Const, Cx, Edge, Edges, Effect, Global, IGlobal, INode, MemRef, MemSize, MemType, 3 | State, Type, 4 | }; 5 | use crate::isa::Isa; 6 | use crate::platform::Rom; 7 | use std::ops::Index; 8 | 9 | pub struct I8051 { 10 | mem: IGlobal, 11 | rom: IGlobal, 12 | ext: IGlobal, 13 | regs: Regs, 14 | } 15 | 16 | impl I8051 { 17 | const ROM_MEM_TYPE: MemType = MemType { 18 | addr_size: B16, 19 | big_endian: false, 20 | }; 21 | 22 | pub fn new(cx: &Cx) -> Self { 23 | I8051 { 24 | mem: cx.a(Global { 25 | ty: Type::Mem(MemType { 26 | addr_size: B8, 27 | big_endian: false, 28 | }), 29 | name: cx.a("m"), 30 | }), 31 | rom: cx.a(Global { 32 | ty: Type::Mem(Self::ROM_MEM_TYPE), 33 | name: cx.a("rom"), 34 | }), 35 | ext: cx.a(Global { 36 | ty: Type::Mem(MemType { 37 | addr_size: B16, 38 | big_endian: false, 39 | }), 40 | name: cx.a("ext"), 41 | }), 42 | regs: Regs::new(cx), 43 | } 44 | } 45 | } 46 | 47 | struct Regs { 48 | // FIXME(eddyb) use an array, or separate fields. 49 | // FIXME(eddyb) don't make every SFR a register, if reads are not 50 | // "pure", e.g. they interact with I/O, they should use special ops. 51 | sfr: Vec, 52 | psw_c: IGlobal, 53 | } 54 | 55 | enum Sfr { 56 | SP = 0x01, 57 | DPL = 0x02, 58 | DPH = 0x03, 59 | PSW = 0x50, 60 | A = 0x60, 61 | B = 0x70, 62 | } 63 | 64 | impl Regs { 65 | fn new(cx: &Cx) -> Self { 66 | let reg = |size, name| { 67 | cx.a(Global { 68 | ty: Type::Bits(size), 69 | name, 70 | }) 71 | }; 72 | 73 | Regs { 74 | sfr: (0..0x80) 75 | .map(|i| { 76 | cx.a(if i == Sfr::SP as usize { 77 | "sp" 78 | } else if i == Sfr::DPL as usize { 79 | "dpl" 80 | } else if i == Sfr::DPH as usize { 81 | "dph" 82 | } else if i == Sfr::PSW as usize { 83 | "psw" 84 | } else if i == Sfr::A as usize { 85 | "a" 86 | } else if i == Sfr::B as usize { 87 | "b" 88 | } else { 89 | return cx.a(&format!("sfr_{:02x}", i)[..]); 90 | }) 91 | }) 92 | .map(|name| reg(B8, name)) 93 | .collect(), 94 | psw_c: reg(B1, cx.a("psw.c")), 95 | } 96 | } 97 | } 98 | 99 | impl Index for Regs { 100 | type Output = IGlobal; 101 | 102 | fn index(&self, r: Sfr) -> &IGlobal { 103 | &self.sfr[r as usize] 104 | } 105 | } 106 | 107 | impl Isa for I8051 { 108 | fn mem_containing_rom(&self) -> IGlobal { 109 | self.rom 110 | } 111 | 112 | fn lift_instr( 113 | &self, 114 | cx: &Cx, 115 | rom: &dyn Rom, 116 | pc: &mut Const, 117 | mut state: State, 118 | ) -> Result> { 119 | // FIXME(eddyb) make it possible to write this as `x + 1`. 120 | let add1 = |x: Const| x + Const::new(x.size, 1); 121 | 122 | macro_rules! error { 123 | ($($args:tt)*) => { 124 | return Err(Edges::One(Edge { 125 | state, 126 | effect: Effect::Error(format!($($args)*)), 127 | })) 128 | } 129 | } 130 | 131 | macro_rules! imm { 132 | (8) => {{ 133 | let v = match rom.load(Self::ROM_MEM_TYPE, *pc, MemSize::M8) { 134 | Ok(v) => v, 135 | Err(e) => error!("failed to read ROM: {:?}", e), 136 | }; 137 | *pc = add1(*pc); 138 | v 139 | }}; 140 | (16) => { 141 | (imm!(8).zext(B16) << 8) | imm!(8).zext(B16) 142 | }; 143 | } 144 | 145 | let op = imm!(8).as_u8(); 146 | 147 | macro_rules! mem_ref { 148 | ($addr:expr, $sz:ident) => { 149 | MemRef { 150 | mem: state.get(cx, self.mem), 151 | mem_type: cx[self.mem].ty.mem().unwrap(), 152 | addr: $addr, 153 | size: MemSize::$sz, 154 | } 155 | }; 156 | ($addr:expr) => { 157 | mem_ref!($addr, M8) 158 | }; 159 | } 160 | 161 | macro_rules! push { 162 | ($value:expr) => {{ 163 | let value = $value; 164 | let size = cx[value].ty(cx).bit_size().unwrap(); 165 | let sp = cx 166 | .a(state.get(cx, self.regs[Sfr::SP]) 167 | + Const::new(B8, (size.bits() / 8) as u64)); 168 | state.set(cx, self.regs[Sfr::SP], sp); 169 | let m = match size { 170 | B8 => mem_ref!(sp), 171 | B16 => mem_ref!(sp, M16), 172 | _ => unreachable!(), 173 | }; 174 | state.set(cx, self.mem, cx.a(m.store(value))); 175 | }}; 176 | } 177 | 178 | macro_rules! pop { 179 | ($sz:ident) => {{ 180 | let sp = state.get(cx, self.regs[Sfr::SP]); 181 | let value = cx.a(mem_ref!(sp, $sz).load()); 182 | state.set( 183 | cx, 184 | self.regs[Sfr::SP], 185 | cx.a(sp - Const::new(B8, MemSize::$sz.bytes() as u64)), 186 | ); 187 | value 188 | }}; 189 | } 190 | 191 | macro_rules! get_dptr { 192 | () => { 193 | cx.a((state.get(cx, self.regs[Sfr::DPH]).zext(B16) << 8) 194 | | state.get(cx, self.regs[Sfr::DPL]).zext(B16)) 195 | }; 196 | } 197 | 198 | macro_rules! jump { 199 | ($target:expr) => { 200 | Err(Edges::One(Edge { 201 | effect: Effect::Jump($target), 202 | state, 203 | })) 204 | }; 205 | } 206 | macro_rules! call { 207 | ($target:expr) => {{ 208 | let target = $target; 209 | push!(cx.a(*pc)); 210 | jump!(target) 211 | }}; 212 | } 213 | macro_rules! relative_target { 214 | () => {{ 215 | let offset = imm!(8).sext(B16); 216 | cx.a(*pc + offset) 217 | }}; 218 | } 219 | macro_rules! branch { 220 | ($cond:expr => $b:expr, $t:expr, $e:expr) => {{ 221 | let (cond, t, e) = ($cond, $t, $e); 222 | let (t, e) = if $b { 223 | (t, e) 224 | } else { 225 | (e, t) 226 | }; 227 | 228 | assert_eq!(cx[cond].ty(cx), Type::Bits(B1)); 229 | 230 | Err(Edges::Branch { 231 | cond, 232 | t: Edge { state: state.clone(), effect: Effect::Jump(t) }, 233 | e: Edge { state: state, effect: Effect::Jump(e) }, 234 | }) 235 | }}; 236 | ($cond:expr => $b:expr) => { 237 | branch!($cond => $b, relative_target!(), cx.a(*pc)) 238 | }; 239 | } 240 | 241 | if op == 0xa5 { 242 | error!("reserved opcode 0x{:x}", op); 243 | } 244 | 245 | if (op & 0xf) == 1 { 246 | let addr11 = (((op >> 5) as u16) << 8) | imm!(8).as_u16(); 247 | let target = cx.a(Const::new(B16, ((pc.as_u16() & 0xf800) | addr11) as u64)); 248 | if (op & 0x10) == 0 { 249 | return jump!(target); 250 | } else { 251 | return call!(target); 252 | } 253 | } 254 | 255 | enum Operand { 256 | Imm(Const), 257 | Sfr(u8), 258 | Mem(IGlobal, INode), 259 | } 260 | 261 | let operand; 262 | 263 | macro_rules! get { 264 | ($operand:expr) => { 265 | match $operand { 266 | Operand::Imm(x) => cx.a(x), 267 | Operand::Sfr(i) => { 268 | // FIXME(eddyb) emulate `PSW` reads by composing it out of bits. 269 | assert!(i != Sfr::PSW as u8); 270 | state.get(cx, self.regs.sfr[i as usize]) 271 | } 272 | Operand::Mem(mem, addr) => cx.a(MemRef { 273 | mem: state.get(cx, mem), 274 | mem_type: cx[mem].ty.mem().unwrap(), 275 | addr, 276 | size: MemSize::M8, 277 | } 278 | .load()), 279 | } 280 | }; 281 | () => { 282 | get!(operand) 283 | }; 284 | } 285 | macro_rules! set { 286 | ($operand:expr, $val:expr) => {{ 287 | let val = $val; 288 | match $operand { 289 | Operand::Imm(_) => unreachable!(), 290 | Operand::Sfr(i) => { 291 | // FIXME(eddyb) emulate `PSW` writes by splitting it into bits. 292 | assert!(i != Sfr::PSW as u8); 293 | state.set(cx, self.regs.sfr[i as usize], val); 294 | } 295 | Operand::Mem(mem, addr) => state.set( 296 | cx, 297 | mem, 298 | cx.a(MemRef { 299 | mem: state.get(cx, mem), 300 | mem_type: cx[mem].ty.mem().unwrap(), 301 | addr, 302 | size: MemSize::M8, 303 | } 304 | .store(val)), 305 | ), 306 | } 307 | }}; 308 | ($val:expr) => { 309 | set!(operand, $val) 310 | }; 311 | } 312 | macro_rules! direct { 313 | () => {{ 314 | let addr = imm!(8); 315 | if addr.as_u8() > 0x80 { 316 | Operand::Sfr(addr.as_u8() & 0x7f) 317 | } else { 318 | Operand::Mem(self.mem, cx.a(addr)) 319 | } 320 | }}; 321 | } 322 | 323 | if (op & 0xf) >= 4 { 324 | operand = if (op & 0xf) == 4 { 325 | match op >> 4 { 326 | 0..=1 | 7..=8 | 0xa | 0xc..=0xf => Operand::Sfr(Sfr::A as u8), 327 | 2..=6 | 9 | 0xb => Operand::Imm(imm!(8)), 328 | _ => unreachable!(), 329 | } 330 | } else if (op & 0xf) == 5 { 331 | direct!() 332 | } else { 333 | Operand::Mem( 334 | self.mem, 335 | if (op & 0xf) < 8 { 336 | cx.a(mem_ref!(Const::new(B8, (op & 1) as u64)).load()) 337 | } else { 338 | cx.a(Const::new(B8, (op & 7) as u64)) 339 | }, 340 | ) 341 | }; 342 | 343 | match op >> 4 { 344 | 0 => { 345 | set!(cx.a(get!() + Const::new(B8, 1))); 346 | } 347 | 1 => { 348 | set!(cx.a(get!() - Const::new(B8, 1))); 349 | } 350 | 2 => { 351 | let (a, b) = (state.get(cx, self.regs[Sfr::A]), get!()); 352 | // HACK(eddyb) this computes the result & carry by 353 | // doing the operation with 16 bits instead of 8. 354 | let wide = cx.a(a.zext(B16) + b.zext(B16)); 355 | state.set(cx, self.regs[Sfr::A], cx.a(wide.trunc(B8))); 356 | state.set(cx, self.regs.psw_c, cx.a(wide.shr_u(8).trunc(B1))); 357 | } 358 | 3 => { 359 | let (a, b) = (state.get(cx, self.regs[Sfr::A]), get!()); 360 | // HACK(eddyb) this computes the result & carry by 361 | // doing the operation with 16 bits instead of 8. 362 | let wide = 363 | cx.a(a.zext(B16) + b.zext(B16) + state.get(cx, self.regs.psw_c).zext(B16)); 364 | state.set(cx, self.regs[Sfr::A], cx.a(wide.trunc(B8))); 365 | state.set(cx, self.regs.psw_c, cx.a(wide.shr_u(8).trunc(B1))); 366 | } 367 | 4 => { 368 | state.set( 369 | cx, 370 | self.regs[Sfr::A], 371 | cx.a(state.get(cx, self.regs[Sfr::A]) | get!()), 372 | ); 373 | } 374 | 5 => { 375 | state.set( 376 | cx, 377 | self.regs[Sfr::A], 378 | cx.a(state.get(cx, self.regs[Sfr::A]) & get!()), 379 | ); 380 | } 381 | 6 => { 382 | state.set( 383 | cx, 384 | self.regs[Sfr::A], 385 | cx.a(state.get(cx, self.regs[Sfr::A]) ^ get!()), 386 | ); 387 | } 388 | 7 => set!(cx.a(imm!(8))), 389 | 8 if op == 0x84 => { 390 | let a = state.get(cx, self.regs[Sfr::A]); 391 | let b = state.get(cx, self.regs[Sfr::B]); 392 | let (a, b) = (cx.a(a.div_u(b)), cx.a(a.rem_u(b))); 393 | state.set(cx, self.regs[Sfr::A], a); 394 | state.set(cx, self.regs[Sfr::B], b); 395 | } 396 | 8 => set!(direct!(), get!()), 397 | 9 => { 398 | state.set( 399 | cx, 400 | self.regs[Sfr::A], 401 | cx.a(state.get(cx, self.regs[Sfr::A]) 402 | - get!() 403 | - state.get(cx, self.regs.psw_c).zext(B8)), 404 | ); 405 | // FIXME(eddyb) set the carry bit. 406 | } 407 | 0xa if op == 0xa4 => { 408 | let a = state.get(cx, self.regs[Sfr::A]); 409 | let b = state.get(cx, self.regs[Sfr::B]); 410 | let (a, b) = ( 411 | cx.a(a * b), 412 | cx.a((a.zext(B16) * b.zext(B16)).shr_u(8).trunc(B8)), 413 | ); 414 | state.set(cx, self.regs[Sfr::A], a); 415 | state.set(cx, self.regs[Sfr::B], b); 416 | } 417 | 0xa => set!(get!(direct!())), 418 | 0xb if op == 0xb4 || op == 0xb5 => { 419 | return branch!(cx.a(get!().cmp_eq(state.get(cx, self.regs[Sfr::A]))) => false); 420 | } 421 | 0xb => return branch!(cx.a(get!().cmp_eq(imm!(8))) => false), 422 | 0xc if op == 0xc4 => set!(cx.a(get!().rol(4))), 423 | 0xc => { 424 | let a = state.get(cx, self.regs[Sfr::A]); 425 | state.set(cx, self.regs[Sfr::A], get!()); 426 | set!(a); 427 | } 428 | 0xd if op == 0xd4 => { 429 | error!("unimplemented decimal adjust"); 430 | } 431 | 0xd if op == 0xd6 || op == 0xd7 => { 432 | macro_rules! nibbles { 433 | ($v:expr) => {{ 434 | let v = $v; 435 | ( 436 | cx.a(v & Const::new(B8, 0xf0)), 437 | cx.a(v & Const::new(B8, 0x0f)), 438 | ) 439 | }}; 440 | } 441 | let (a_hi, a_lo) = nibbles!(state.get(cx, self.regs[Sfr::A])); 442 | let (v_hi, v_lo) = nibbles!(get!()); 443 | state.set(cx, self.regs[Sfr::A], cx.a(a_hi | v_lo)); 444 | set!(cx.a(v_hi | a_lo)); 445 | } 446 | 0xd => { 447 | let val = cx.a(get!() - Const::new(B8, 1)); 448 | set!(val); 449 | return branch!(cx.a(val.cmp_eq(Const::new(B8, 0))) => false); 450 | } 451 | 0xe if op == 0xe4 => set!(cx.a(Const::new(B8, 0))), 452 | 0xe => state.set(cx, self.regs[Sfr::A], get!()), 453 | 0xf if op == 0xf4 => set!(cx.a(!get!())), 454 | 0xf => set!(state.get(cx, self.regs[Sfr::A])), 455 | _ => unreachable!(), 456 | } 457 | } else { 458 | macro_rules! bit_addr { 459 | () => {{ 460 | let addr = imm!(8).as_u8(); 461 | let byte = addr >> 3; 462 | let bit = addr & 7; 463 | operand = if addr > 0x80 { 464 | Operand::Sfr((byte << 3) & 0x7f) 465 | } else { 466 | Operand::Mem(self.mem, cx.a(Const::new(B8, (0x20 + byte) as u64))) 467 | }; 468 | bit 469 | }}; 470 | } 471 | 472 | match op { 473 | 0x00 => {} 474 | 0x02 => return jump!(cx.a(imm!(16))), 475 | 0x03 | 0x13 => { 476 | state.set( 477 | cx, 478 | self.regs[Sfr::A], 479 | cx.a(state.get(cx, self.regs[Sfr::A]).ror(1)), 480 | ); 481 | if op == 0x13 { 482 | // FIXME(eddyb) set (and read) the flags. 483 | } 484 | } 485 | 0x10 | 0x20 | 0x30 => { 486 | let bit = bit_addr!(); 487 | let val = get!(); 488 | let bit_was_set = cx.a(val & Const::new(B8, 1 << bit)); 489 | if op == 0x10 { 490 | set!(cx.a(val & Const::new(B8, (!(1u8 << bit)) as u64))); 491 | } 492 | return branch!(cx.a(bit_was_set.cmp_eq(Const::new(B8, 0))) => op == 0x30); 493 | } 494 | 0x12 => return call!(cx.a(imm!(16))), 495 | 0x22 | 0x32 => { 496 | if op == 0x32 { 497 | error!("unimplemented RETI"); 498 | } 499 | return jump!(pop!(M16)); 500 | } 501 | 0x23 | 0x33 => { 502 | state.set( 503 | cx, 504 | self.regs[Sfr::A], 505 | cx.a(state.get(cx, self.regs[Sfr::A]).rol(1)), 506 | ); 507 | if op == 0x33 { 508 | // FIXME(eddyb) set (and read) the flags. 509 | } 510 | } 511 | 0x40 | 0x50 => { 512 | return branch!(cx.a(state.get(cx, self.regs.psw_c).cmp_eq(Const::new(B1, 0))) => op == 0x50); 513 | } 514 | 0x60 | 0x70 => { 515 | return branch!(cx.a(state.get(cx, self.regs[Sfr::A]).cmp_eq(Const::new(B8, 0))) => op == 0x60); 516 | } 517 | 0x42 | 0x43 | 0x52 | 0x53 | 0x62 | 0x63 => { 518 | operand = direct!(); 519 | let (a, b) = ( 520 | get!(), 521 | if (op & 0xf) == 2 { 522 | state.get(cx, self.regs[Sfr::A]) 523 | } else { 524 | cx.a(imm!(8)) 525 | }, 526 | ); 527 | set!(match op >> 4 { 528 | 4 => cx.a(a | b), 529 | 5 => cx.a(a & b), 530 | 6 => cx.a(a ^ b), 531 | _ => unreachable!(), 532 | }); 533 | } 534 | 0x72 | 0x82 | 0xa0 | 0xa2 | 0xb0 => { 535 | let bit = bit_addr!(); 536 | let val = cx.a(get!().shr_u(bit as u32).trunc(B1)); 537 | state.set( 538 | cx, 539 | self.regs.psw_c, 540 | match op { 541 | 0xa2 => val, 542 | 543 | 0x72 => cx.a(state.get(cx, self.regs.psw_c) | val), 544 | 0xa0 => cx.a(state.get(cx, self.regs.psw_c) | !val), 545 | 0xb0 => cx.a(state.get(cx, self.regs.psw_c) & !val), 546 | _ => unreachable!(), 547 | }, 548 | ) 549 | } 550 | 0x73 | 0x83 | 0x93 => { 551 | let base = if op == 0x83 { cx.a(*pc) } else { get_dptr!() }; 552 | let addr = cx.a(base + state.get(cx, self.regs[Sfr::A]).zext(B16)); 553 | if op == 0x73 { 554 | return jump!(addr); 555 | } else { 556 | state.set(cx, self.regs[Sfr::A], get!(Operand::Mem(self.rom, addr))); 557 | } 558 | } 559 | 0x80 => return jump!(relative_target!()), 560 | 0x90 => { 561 | state.set(cx, self.regs[Sfr::DPH], cx.a(imm!(8))); 562 | state.set(cx, self.regs[Sfr::DPL], cx.a(imm!(8))); 563 | } 564 | 0x92 => { 565 | let bit = bit_addr!(); 566 | set!(cx.a(get!() | (state.get(cx, self.regs.psw_c).zext(B8) << (bit as u32)))); 567 | } 568 | 0xa3 => { 569 | let v = cx.a(get_dptr!() + Const::new(B16, 1)); 570 | state.set(cx, self.regs[Sfr::DPL], cx.a(v.trunc(B8))); 571 | state.set(cx, self.regs[Sfr::DPH], cx.a(v.shr_u(8).trunc(B8))); 572 | } 573 | 0xb2 => { 574 | let bit = bit_addr!(); 575 | set!(cx.a(get!() ^ Const::new(B8, 1 << bit))); 576 | } 577 | 0xc0 => push!(get!(direct!())), 578 | 0xc2 => { 579 | let bit = bit_addr!(); 580 | set!(cx.a(get!() & Const::new(B8, (!(1u8 << bit)) as u64))); 581 | } 582 | 0xc3 | 0xd3 => state.set(cx, self.regs.psw_c, cx.a(Const::from(op == 0xd3))), 583 | 0xd0 => set!(direct!(), pop!(M8)), 584 | 0xd2 => { 585 | let bit = bit_addr!(); 586 | set!(cx.a(get!() | Const::new(B8, 1 << bit))); 587 | } 588 | 0xe0 | 0xe2 | 0xe3 | 0xf0 | 0xf2 | 0xf3 => { 589 | operand = Operand::Mem( 590 | self.ext, 591 | if (op & 0xf) == 0 { 592 | get_dptr!() 593 | } else { 594 | cx.a(mem_ref!(Const::new(B8, (op & 1) as u64)).load().zext(B16)) 595 | }, 596 | ); 597 | if (op & 0xf0) == 0xe0 { 598 | state.set(cx, self.regs[Sfr::A], get!()); 599 | } else { 600 | set!(state.get(cx, self.regs[Sfr::A])); 601 | } 602 | } 603 | 604 | _ => error!("unsupported opcode 0x{:x}", op), 605 | } 606 | } 607 | 608 | Ok(state) 609 | } 610 | } 611 | -------------------------------------------------------------------------------- /src/isa/i8080.rs: -------------------------------------------------------------------------------- 1 | use crate::ir::{ 2 | BitSize::*, Const, Cx, Edge, Edges, Effect, Global, IGlobal, MemRef, MemSize, MemType, State, 3 | Type, 4 | }; 5 | use crate::isa::Isa; 6 | use crate::platform::Rom; 7 | use std::ops::Index; 8 | 9 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 10 | enum Flavor { 11 | Intel, 12 | LR35902, 13 | } 14 | 15 | pub struct I8080 { 16 | flavor: Flavor, 17 | mem: IGlobal, 18 | regs: Regs, 19 | } 20 | 21 | impl I8080 { 22 | const MEM_TYPE: MemType = MemType { 23 | addr_size: B16, 24 | big_endian: false, 25 | }; 26 | 27 | pub fn new(cx: &Cx) -> Self { 28 | I8080 { 29 | flavor: Flavor::Intel, 30 | mem: cx.a(Global { 31 | ty: Type::Mem(Self::MEM_TYPE), 32 | name: cx.a("m"), 33 | }), 34 | regs: Regs::new(cx), 35 | } 36 | } 37 | 38 | pub fn new_lr35902(cx: &Cx) -> Self { 39 | I8080 { 40 | flavor: Flavor::LR35902, 41 | mem: cx.a(Global { 42 | ty: Type::Mem(Self::MEM_TYPE), 43 | name: cx.a("m"), 44 | }), 45 | regs: Regs::new(cx), 46 | } 47 | } 48 | } 49 | 50 | struct Regs { 51 | a: IGlobal, 52 | 53 | reg16: [IGlobal; 4], 54 | 55 | // Flag bits. 56 | f_c: IGlobal, 57 | f_h: IGlobal, // AC on i8080, H on LR35902. 58 | f_n: IGlobal, // Missing on i8080, N on LR35902. 59 | f_z: IGlobal, 60 | f_s: IGlobal, // S on i8080, missing on LR35902. 61 | f_p: IGlobal, // P on i8080, missing on LR35902. 62 | 63 | ie: IGlobal, // Interrupt Enable. 64 | } 65 | 66 | impl Regs { 67 | fn new(cx: &Cx) -> Self { 68 | let reg = |size, name| { 69 | cx.a(Global { 70 | ty: Type::Bits(size), 71 | name: cx.a(name), 72 | }) 73 | }; 74 | 75 | Regs { 76 | a: reg(B8, "a"), 77 | 78 | reg16: [ 79 | reg(B16, "bc"), 80 | reg(B16, "de"), 81 | reg(B16, "hl"), 82 | reg(B16, "sp"), 83 | ], 84 | 85 | // FIXME(eddyb) perhaps change names or even use different 86 | // sets of flags, depending on flavor. 87 | // FIXME(eddyb) avoid the repetition here (between field and name). 88 | f_c: reg(B1, "f.c"), 89 | f_h: reg(B1, "f.h"), 90 | f_n: reg(B1, "f.n"), 91 | f_z: reg(B1, "f.z"), 92 | f_s: reg(B1, "f.s"), 93 | f_p: reg(B1, "f.p"), 94 | 95 | ie: reg(B1, "ie"), 96 | } 97 | } 98 | } 99 | 100 | #[derive(Copy, Clone)] 101 | pub enum Reg16 { 102 | BC, 103 | DE, 104 | HL, 105 | 106 | SP, 107 | } 108 | 109 | impl Index for Regs { 110 | type Output = IGlobal; 111 | 112 | fn index(&self, r: Reg16) -> &IGlobal { 113 | &self.reg16[r as usize] 114 | } 115 | } 116 | 117 | impl I8080 { 118 | fn flags(&self) -> [Result; 8] { 119 | let Regs { 120 | f_c, 121 | f_h, 122 | f_n, 123 | f_z, 124 | f_s, 125 | f_p, 126 | .. 127 | } = self.regs; 128 | 129 | match self.flavor { 130 | Flavor::Intel => [ 131 | Ok(f_c), 132 | Err(1), 133 | Ok(f_p), 134 | Err(0), 135 | Ok(f_h), 136 | Err(0), 137 | Ok(f_z), 138 | Ok(f_s), 139 | ], 140 | Flavor::LR35902 => [ 141 | Err(0), 142 | Err(0), 143 | Err(0), 144 | Err(0), 145 | Ok(f_c), 146 | Ok(f_h), 147 | Ok(f_n), 148 | Ok(f_z), 149 | ], 150 | } 151 | } 152 | } 153 | 154 | impl Isa for I8080 { 155 | fn mem_containing_rom(&self) -> IGlobal { 156 | self.mem 157 | } 158 | 159 | fn lift_instr( 160 | &self, 161 | cx: &Cx, 162 | rom: &dyn Rom, 163 | pc: &mut Const, 164 | mut state: State, 165 | ) -> Result> { 166 | let flavor = self.flavor; 167 | 168 | // FIXME(eddyb) make it possible to write this as `x + 1`. 169 | let add1 = |x: Const| x + Const::new(x.size, 1); 170 | 171 | macro_rules! error { 172 | ($($args:tt)*) => { 173 | return Err(Edges::One(Edge { 174 | state, 175 | effect: Effect::Error(format!($($args)*)), 176 | })) 177 | } 178 | } 179 | 180 | macro_rules! imm { 181 | (8) => {{ 182 | let v = match rom.load(Self::MEM_TYPE, *pc, MemSize::M8) { 183 | Ok(v) => v, 184 | Err(e) => error!("failed to read ROM: {:?}", e), 185 | }; 186 | *pc = add1(*pc); 187 | v 188 | }}; 189 | (16) => { 190 | imm!(8).zext(B16) | (imm!(8).zext(B16) << 8) 191 | }; 192 | } 193 | 194 | let op = imm!(8).as_u8(); 195 | 196 | macro_rules! mem_ref { 197 | ($addr:expr, $sz:ident) => { 198 | MemRef { 199 | mem: state.get(cx, self.mem), 200 | mem_type: Self::MEM_TYPE, 201 | addr: $addr, 202 | size: MemSize::$sz, 203 | } 204 | }; 205 | ($addr:expr) => { 206 | mem_ref!($addr, M8) 207 | }; 208 | } 209 | 210 | macro_rules! push { 211 | ($value:expr) => {{ 212 | let value = $value; 213 | let size = cx[value].ty(cx).bit_size().unwrap(); 214 | let sp = cx 215 | .a(state.get(cx, self.regs[Reg16::SP]) 216 | - Const::new(B16, (size.bits() / 8) as u64)); 217 | state.set(cx, self.regs[Reg16::SP], sp); 218 | let m = match size { 219 | B8 => mem_ref!(sp), 220 | B16 => mem_ref!(sp, M16), 221 | _ => unreachable!(), 222 | }; 223 | state.set(cx, self.mem, cx.a(m.store(value))); 224 | }}; 225 | } 226 | 227 | macro_rules! pop { 228 | ($sz:ident) => {{ 229 | let sp = state.get(cx, self.regs[Reg16::SP]); 230 | let value = cx.a(mem_ref!(sp, $sz).load()); 231 | state.set( 232 | cx, 233 | self.regs[Reg16::SP], 234 | cx.a(sp + Const::new(B16, MemSize::$sz.bytes() as u64)), 235 | ); 236 | value 237 | }}; 238 | } 239 | 240 | macro_rules! jump { 241 | ($target:expr) => { 242 | Err(Edges::One(Edge { 243 | effect: Effect::Jump($target), 244 | state, 245 | })) 246 | }; 247 | } 248 | macro_rules! relative_target { 249 | () => {{ 250 | let offset = imm!(8).sext(B16); 251 | cx.a(*pc + offset) 252 | }}; 253 | } 254 | macro_rules! condition_code { 255 | () => { 256 | // TODO(eddyb) actually implement this. 257 | cx.a(Const::new(B1, 0)) 258 | }; 259 | } 260 | // NOTE(eddyb) the expression inside `conditional!(...)` 261 | // *only* affects the state if the conditional is met 262 | // (i.e. on the "true" branch of the `Edges`). 263 | macro_rules! conditional { 264 | ($effect:expr) => {{ 265 | let cond = condition_code!(); 266 | let e_state = state.clone(); 267 | let t = Edge { 268 | effect: $effect, 269 | state, 270 | }; 271 | let e = Edge { 272 | state: e_state, 273 | effect: Effect::Jump(cx.a(*pc)), 274 | }; 275 | 276 | assert_eq!(cx[cond].ty(cx), Type::Bits(B1)); 277 | 278 | Err(Edges::Branch { cond, t, e }) 279 | }}; 280 | } 281 | 282 | let reserved = op == 0xdd 283 | || op == 0xed 284 | || op == 0xfd 285 | || match flavor { 286 | Flavor::Intel => (op & 0xc7) == 0 && op != 0 || op == 0xd9 || op == 0xcb, 287 | Flavor::LR35902 => { 288 | (op & 0xf7) == 0xd3 289 | || (op & 0xf7) == 0xe3 290 | || (op & 0xf7) == 0xe4 291 | || (op & 0xf7) == 0xf4 292 | } 293 | }; 294 | if reserved { 295 | error!("reserved opcode: 0x{:x}", op); 296 | } 297 | 298 | enum Operand { 299 | RegA, 300 | RegLo(Reg16), 301 | RegHi(Reg16), 302 | Mem, 303 | } 304 | impl Operand { 305 | fn decode(i: u8) -> Self { 306 | match i { 307 | 0 => Operand::RegHi(Reg16::BC), 308 | 1 => Operand::RegLo(Reg16::BC), 309 | 2 => Operand::RegHi(Reg16::DE), 310 | 3 => Operand::RegLo(Reg16::DE), 311 | 4 => Operand::RegHi(Reg16::HL), 312 | 5 => Operand::RegLo(Reg16::HL), 313 | 6 => Operand::Mem, 314 | 7 => Operand::RegA, 315 | _ => unreachable!(), 316 | } 317 | } 318 | } 319 | 320 | let mut dst = Operand::decode((op >> 3) & 7); 321 | let mut src = Operand::decode(op & 7); 322 | 323 | // FIXME(eddyb) move these macros into methods on helper types. 324 | macro_rules! get { 325 | ($operand:expr) => { 326 | match $operand { 327 | Operand::RegA => state.get(cx, self.regs.a), 328 | Operand::RegLo(r) => cx.a(state.get(cx, self.regs[r]).trunc(B8)), 329 | Operand::RegHi(r) => cx.a(state.get(cx, self.regs[r]).shr_u(8).trunc(B8)), 330 | Operand::Mem => cx.a(mem_ref!(state.get(cx, self.regs[Reg16::HL])).load()), 331 | } 332 | }; 333 | () => { 334 | get!(src) 335 | }; 336 | } 337 | macro_rules! set { 338 | ($operand:expr, $val:expr) => {{ 339 | let val = $val; 340 | match $operand { 341 | Operand::RegA => state.set(cx, self.regs.a, val), 342 | Operand::RegLo(r) => state.set( 343 | cx, 344 | self.regs[r], 345 | cx.a((state.get(cx, self.regs[r]) & Const::new(B16, 0xff00)) 346 | | (val.zext(B16))), 347 | ), 348 | Operand::RegHi(r) => state.set( 349 | cx, 350 | self.regs[r], 351 | cx.a((state.get(cx, self.regs[r]) & Const::new(B16, 0x00ff)) 352 | | (val.zext(B16) << 8)), 353 | ), 354 | Operand::Mem => state.set( 355 | cx, 356 | self.mem, 357 | cx.a(mem_ref!(state.get(cx, self.regs[Reg16::HL])).store(val)), 358 | ), 359 | } 360 | }}; 361 | ($val:expr) => { 362 | set!(dst, $val) 363 | }; 364 | } 365 | 366 | // FIXME(eddyb) clean up these hacks. 367 | macro_rules! get_src { 368 | () => { 369 | if (op & 0xc7) == 0x06 || (op & 0xc7) == 0xc6 { 370 | cx.a(imm!(8)) 371 | } else { 372 | get!() 373 | } 374 | }; 375 | } 376 | 377 | if flavor == Flavor::LR35902 { 378 | match op { 379 | 0x08 => { 380 | let m = mem_ref!(imm!(16), M16); 381 | state.set( 382 | cx, 383 | self.mem, 384 | cx.a(m.store(state.get(cx, self.regs[Reg16::SP]))), 385 | ); 386 | return Ok(state); 387 | } 388 | 0x18 => return jump!(relative_target!()), 389 | _ if (op & 0xe7) == 0x20 => { 390 | // FIXME(eddyb) fix the condition code decoding, 391 | // once implemented, to match these up correctly. 392 | return conditional!(Effect::Jump(relative_target!())); 393 | } 394 | _ if (op & 0xe7) == 0x22 => { 395 | if (op & 0x0f) == 0x02 { 396 | set!(Operand::Mem, state.get(cx, self.regs.a)); 397 | } else { 398 | state.set(cx, self.regs.a, get!(Operand::Mem)); 399 | } 400 | let hl = state.get(cx, self.regs[Reg16::HL]); 401 | let hl = if (op & 0xf0) == 0x20 { 402 | cx.a(hl + Const::new(B16, 1)) 403 | } else { 404 | cx.a(hl - Const::new(B16, 1)) 405 | }; 406 | state.set(cx, self.regs[Reg16::HL], hl); 407 | return Ok(state); 408 | } 409 | _ if (op & 0xed) == 0xe0 || (op & 0xef) == 0xea => { 410 | let addr = if (op & 0x0f) == 0x0a { 411 | cx.a(imm!(16)) 412 | } else { 413 | cx.a(Const::new(B16, 0xff00) 414 | + if (op & 2) == 0 { 415 | cx.a(Const::new(B16, imm!(8).as_u64())) 416 | } else { 417 | cx.a(get!(Operand::RegLo(Reg16::BC)).zext(B16)) 418 | }) 419 | }; 420 | let m = mem_ref!(addr); 421 | if (op & 0xf0) == 0xe0 { 422 | state.set(cx, self.mem, cx.a(m.store(state.get(cx, self.regs.a)))); 423 | } else { 424 | state.set(cx, self.regs.a, cx.a(m.load())); 425 | } 426 | return Ok(state); 427 | } 428 | 0xcb => { 429 | let sub_op = imm!(8).as_u8(); 430 | dst = Operand::decode(sub_op & 7); 431 | src = Operand::decode(sub_op & 7); 432 | 433 | // NB: only used by 0x40..=0xFF (BIT, RES, SET). 434 | let bit_mask: u8 = 1 << ((sub_op >> 3) & 7); 435 | 436 | let val = get!(); 437 | let val = match sub_op & 0xf8 { 438 | 0x00 => { 439 | cx.a(val.rol(1)) 440 | // FIXME(eddyb) set the flags. 441 | } 442 | 0x08 => { 443 | cx.a(val.ror(1)) 444 | // FIXME(eddyb) set the flags. 445 | } 446 | 0x10 => { 447 | cx.a(val.rol(1)) 448 | // FIXME(eddyb) set (and read) the flags. 449 | } 450 | 0x18 => { 451 | cx.a(val.ror(1)) 452 | // FIXME(eddyb) set (and read) the flags. 453 | } 454 | 0x20 => { 455 | cx.a(val << 1) 456 | // FIXME(eddyb) set the flags. 457 | } 458 | 0x28 => { 459 | cx.a(val.shr_s(1)) 460 | // FIXME(eddyb) set the flags. 461 | } 462 | 0x30 => { 463 | cx.a(val.rol(4)) 464 | // FIXME(eddyb) set the flags. 465 | } 466 | 0x38 => { 467 | cx.a(val.shr_u(1)) 468 | // FIXME(eddyb) set the flags. 469 | } 470 | 0x40..=0x78 => { 471 | state.set( 472 | cx, 473 | self.regs.f_z, 474 | cx.a((val & Const::new(B8, bit_mask as u64)) 475 | .cmp_eq(Const::new(B8, 0))), 476 | ); 477 | state.set(cx, self.regs.f_n, cx.a(Const::new(B1, 0))); 478 | state.set(cx, self.regs.f_h, cx.a(Const::new(B1, 1))); 479 | 480 | return Ok(state); 481 | } 482 | 0x80..=0xb8 => cx.a(val & Const::new(B8, !bit_mask as u64)), 483 | 0xc0..=0xf8 => cx.a(val | Const::new(B8, bit_mask as u64)), 484 | _ => unreachable!(), 485 | }; 486 | set!(val); 487 | return Ok(state); 488 | } 489 | 0xd9 => { 490 | state.set(cx, self.regs.ie, cx.a(Const::new(B1, 1))); 491 | return jump!(pop!(M16)); 492 | } 493 | 0x10 | 0xe8 | 0xf8 => { 494 | imm!(8); 495 | 496 | error!("unsupported LR35902 opcode 0x{:x}", op); 497 | } 498 | _ => {} 499 | } 500 | } 501 | 502 | match op { 503 | _ if (op & 0xc5) == 0x01 => { 504 | let mut i = Reg16::BC as usize + (op >> 4) as usize; 505 | let mut val = state.get(cx, self.regs.reg16[i]); 506 | val = match op & 0x0f { 507 | 0x01 => cx.a(imm!(16)), 508 | 0x03 => cx.a(val + Const::new(B16, 1)), 509 | 0x09 => { 510 | // HACK(eddyb) this allows reusing the rest of the code. 511 | i = Reg16::HL as usize; 512 | 513 | cx.a(state.get(cx, self.regs[Reg16::HL]) + val) 514 | } 515 | 0x0b => cx.a(val - Const::new(B16, 1)), 516 | _ => unreachable!(), 517 | }; 518 | state.set(cx, self.regs.reg16[i], val); 519 | } 520 | _ if (op & 0xe7) == 0x02 => { 521 | let addr = state.get(cx, self.regs.reg16[Reg16::BC as usize + (op >> 4) as usize]); 522 | let m = mem_ref!(addr); 523 | if (op & 0x0f) == 0x02 { 524 | state.set(cx, self.mem, cx.a(m.store(state.get(cx, self.regs.a)))); 525 | } else { 526 | state.set(cx, self.regs.a, cx.a(m.load())); 527 | } 528 | return Ok(state); 529 | } 530 | _ if (op & 0xc0) == 0x40 || (op & 0xc7) == 0x06 => { 531 | set!(get_src!()); 532 | } 533 | _ if (op & 0xc7) == 4 => { 534 | set!(cx.a(get!(dst) + Const::new(B8, 1))); 535 | } 536 | _ if (op & 0xc7) == 5 => { 537 | set!(cx.a(get!(dst) - Const::new(B8, 1))); 538 | } 539 | _ if (op & 0xc0) == 0x80 || (op & 0xc7) == 0xc6 => { 540 | let operand = get_src!(); 541 | state.set( 542 | cx, 543 | self.regs.a, 544 | match op & 0xb8 { 545 | 0x80 => { 546 | cx.a(state.get(cx, self.regs.a) + operand) 547 | // FIXME(eddyb) set the flags. 548 | } 549 | 0x88 => { 550 | cx.a(state.get(cx, self.regs.a) 551 | + operand 552 | + state.get(cx, self.regs.f_c).zext(B8)) 553 | // FIXME(eddyb) set the flags. 554 | } 555 | 0x90 => { 556 | cx.a(state.get(cx, self.regs.a) - operand) 557 | // FIXME(eddyb) set the flags. 558 | } 559 | 0x98 => { 560 | cx.a(state.get(cx, self.regs.a) 561 | - operand 562 | - state.get(cx, self.regs.f_c).zext(B8)) 563 | // FIXME(eddyb) set the flags. 564 | } 565 | 0xa0 => cx.a(state.get(cx, self.regs.a) & operand), 566 | 0xa8 => cx.a(state.get(cx, self.regs.a) ^ operand), 567 | 0xb0 => cx.a(state.get(cx, self.regs.a) | operand), 568 | 0xb8 => { 569 | // TODO(eddyb) figure out the subtraction direction. 570 | cx.a(operand - state.get(cx, self.regs.a)); 571 | // FIXME(eddyb) set the flags. 572 | return Ok(state); 573 | } 574 | _ => unreachable!(), 575 | }, 576 | ); 577 | } 578 | _ if (op & 0xc7) == 0xc0 => { 579 | return conditional!(Effect::Jump(pop!(M16))); 580 | } 581 | // HACK(eddyb) `push AF` / `pop AF` are special-cased because `AF` 582 | // is not a register (like `BC`/`DE`/`HL` are), as this is the only 583 | // place where flags are encoded/decoded into/from a byte. 584 | 0xf1 => { 585 | let flags = pop!(M8); 586 | for (i, &flag) in self.flags().iter().enumerate() { 587 | if let Ok(reg) = flag { 588 | state.set(cx, reg, cx.a(flags.shr_u(i as u32).trunc(B1))); 589 | } 590 | } 591 | 592 | let a = pop!(M8); 593 | state.set(cx, self.regs.a, a); 594 | } 595 | 0xf5 => { 596 | push!(state.get(cx, self.regs.a)); 597 | 598 | push!(self 599 | .flags() 600 | .iter() 601 | .map(|&flag| { 602 | match flag { 603 | Ok(reg) => state.get(cx, reg), 604 | Err(c) => cx.a(Const::new(B1, c as u64)), 605 | } 606 | }) 607 | .enumerate() 608 | .map(|(i, bit)| cx.a(bit.zext(B8) << (i as u32))) 609 | .fold(cx.a(Const::new(B8, 0)), |a, b| cx.a(a | b))); 610 | } 611 | _ if (op & 0xcb) == 0xc1 => { 612 | let i = (op >> 4) & 0x3; 613 | let i = Reg16::BC as usize + (i as usize); 614 | if (op & 4) == 0 { 615 | let v = pop!(M16); 616 | state.set(cx, self.regs.reg16[i], v); 617 | } else { 618 | push!(state.get(cx, self.regs.reg16[i])); 619 | } 620 | } 621 | _ if (op & 0xc7) == 0xc2 => { 622 | return conditional!(Effect::Jump(cx.a(imm!(16)))); 623 | } 624 | _ if (op & 0xc7) == 0xc4 => { 625 | return conditional!({ 626 | let target = cx.a(imm!(16)); 627 | push!(cx.a(*pc)); 628 | Effect::Jump(target) 629 | }); 630 | } 631 | _ if (op & 0xc7) == 0xc7 => { 632 | let i = (op >> 3) & 7; 633 | push!(cx.a(*pc)); 634 | return jump!(cx.a(Const::new(B16, ((i as u16) * 8) as u64))); 635 | } 636 | _ if op & 0xf7 == 0xf3 => { 637 | state.set( 638 | cx, 639 | self.regs.ie, 640 | cx.a(Const::new(B1, ((op >> 3) & 1) as u64)), 641 | ); 642 | } 643 | 644 | 0x00 => {} 645 | 0x07 => { 646 | state.set(cx, self.regs.a, cx.a(state.get(cx, self.regs.a).rol(1))); 647 | // FIXME(eddyb) set the flags. 648 | } 649 | 0x27 => { 650 | // FIXME(eddyb) actually implement. 651 | } 652 | 0x2f => { 653 | state.set(cx, self.regs.a, cx.a(!state.get(cx, self.regs.a))); 654 | } 655 | 0x32 => { 656 | state.set( 657 | cx, 658 | self.mem, 659 | cx.a(mem_ref!(imm!(16)).store(state.get(cx, self.regs.a))), 660 | ); 661 | } 662 | 0x3a => { 663 | state.set(cx, self.regs.a, cx.a(mem_ref!(imm!(16)).load())); 664 | } 665 | 0xc3 => return jump!(cx.a(imm!(16))), 666 | 0xc9 => return jump!(pop!(M16)), 667 | 0xcd => { 668 | let target = cx.a(imm!(16)); 669 | push!(cx.a(*pc)); 670 | return jump!(target); 671 | } 672 | 0xe9 => return jump!(state.get(cx, self.regs[Reg16::HL])), 673 | 0xeb => { 674 | let de = state.get(cx, self.regs[Reg16::DE]); 675 | let hl = state.get(cx, self.regs[Reg16::HL]); 676 | state.set(cx, self.regs[Reg16::DE], hl); 677 | state.set(cx, self.regs[Reg16::HL], de); 678 | } 679 | 680 | 0xd3 | 0xd8 | 0x22 | 0x2a => { 681 | assert_eq!(flavor, Flavor::Intel); 682 | error!("unsupported opcode 0x{:x} requires immediate", op); 683 | } 684 | 685 | _ => error!("unsupported opcode 0x{:x}", op), 686 | } 687 | 688 | Ok(state) 689 | } 690 | } 691 | -------------------------------------------------------------------------------- /src/isa/mips.rs: -------------------------------------------------------------------------------- 1 | use crate::ir::{ 2 | BitSize::{self, *}, 3 | Const, Cx, Edge, Edges, Effect, Global, IGlobal, INode, IntOp, MemRef, MemSize, MemType, Node, 4 | State, Type, 5 | }; 6 | use crate::isa::Isa; 7 | use crate::platform::Rom; 8 | use core::fmt; 9 | use std::{convert::Infallible, num::NonZeroU8, ops::Index}; 10 | 11 | pub struct Mips { 12 | mem: IGlobal, 13 | mem_type: MemType, 14 | regs: Regs, 15 | } 16 | 17 | impl Mips { 18 | pub fn new_32le(cx: &Cx) -> Self { 19 | Self::new_le(cx, B32) 20 | } 21 | pub fn new_64le(cx: &Cx) -> Self { 22 | Self::new_le(cx, B64) 23 | } 24 | fn new_le(cx: &Cx, reg_size: BitSize) -> Self { 25 | Self::new( 26 | cx, 27 | reg_size, 28 | MemType { 29 | addr_size: reg_size, 30 | big_endian: false, 31 | }, 32 | ) 33 | } 34 | 35 | pub fn new(cx: &Cx, reg_size: BitSize, mem_type: MemType) -> Self { 36 | // Only 32-bit and 64-bit variants/modes of MIPS are supported. 37 | assert!(matches!(reg_size, B32 | B64)); 38 | assert!(matches!(mem_type.addr_size, B32 | B64)); 39 | 40 | // Memory accesses can only at most truncate addresses, not widen them. 41 | assert!(mem_type.addr_size <= reg_size); 42 | 43 | Mips { 44 | mem: cx.a(Global { 45 | ty: Type::Mem(mem_type), 46 | name: cx.a("m"), 47 | }), 48 | mem_type, 49 | regs: Regs::new(cx, reg_size), 50 | } 51 | } 52 | } 53 | 54 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 55 | pub enum Mode { 56 | Kernel, 57 | Supervisor, 58 | User, 59 | } 60 | 61 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 62 | pub enum AddrSpace { 63 | Direct { cached: bool }, 64 | Mapped(Mode), 65 | } 66 | 67 | impl Mips { 68 | // FIXME(eddyb) extend this to 64-bit virtual addresses, and redefine the 69 | // 32-bit virtual address decoding as `decode64(sext_64(addr32))`. 70 | pub fn decode_virtual_addr32(addr: u32) -> (AddrSpace, u32) { 71 | let addr_space = match addr { 72 | 0x0000_0000..=0x7fff_ffff => return (AddrSpace::Mapped(Mode::User), addr), 73 | 74 | 0x8000_0000..=0x9fff_ffff => AddrSpace::Direct { cached: true }, 75 | 0xa000_0000..=0xbfff_ffff => AddrSpace::Direct { cached: false }, 76 | 0xc000_0000..=0xdfff_ffff => AddrSpace::Mapped(Mode::Supervisor), 77 | 0xe000_0000..=0xffff_ffff => AddrSpace::Mapped(Mode::Kernel), 78 | }; 79 | (addr_space, addr & 0x1fff_ffff) 80 | } 81 | } 82 | 83 | struct Regs { 84 | /// Cached register `BitSize`, to avoid looking it up in a register's type. 85 | size: BitSize, 86 | gpr_without_zero: [IGlobal; 31], 87 | lo: IGlobal, 88 | hi: IGlobal, 89 | } 90 | 91 | impl Regs { 92 | fn new(cx: &Cx, size: BitSize) -> Self { 93 | let reg = |name| { 94 | cx.a(Global { 95 | ty: Type::Bits(size), 96 | name: cx.a(name), 97 | }) 98 | }; 99 | 100 | macro_rules! reg_array { 101 | ($($name:ident)*) => { 102 | [$(reg(stringify!($name))),*] 103 | } 104 | } 105 | 106 | Regs { 107 | size, 108 | gpr_without_zero: reg_array![ 109 | // `zero` register omitted. 110 | at 111 | rv0 rv1 112 | a0 a1 a2 a3 113 | t0 t1 t2 t3 t4 t5 t6 t7 114 | s0 s1 s2 s3 s4 s5 s6 s7 115 | t8 t9 116 | k0 k1 117 | gp 118 | sp 119 | fp 120 | ra 121 | ], 122 | lo: reg("lo"), 123 | hi: reg("hi"), 124 | } 125 | } 126 | } 127 | 128 | #[derive(Copy, Clone)] 129 | enum Reg { 130 | Gpr(NonZeroU8), 131 | Lo, 132 | Hi, 133 | } 134 | 135 | /// Error type for attempting to refer to the `zero` register through `Reg`. 136 | struct ZeroReg; 137 | 138 | impl From for ZeroReg { 139 | fn from(never: Infallible) -> Self { 140 | match never {} 141 | } 142 | } 143 | 144 | impl TryFrom for Reg { 145 | type Error = ZeroReg; 146 | fn try_from(i: u32) -> Result { 147 | assert!(matches!(i, 0..=31)); 148 | Ok(Reg::Gpr(NonZeroU8::new(i as u8).ok_or(ZeroReg)?)) 149 | } 150 | } 151 | 152 | impl Index for Regs { 153 | type Output = IGlobal; 154 | 155 | fn index(&self, r: Reg) -> &IGlobal { 156 | match r { 157 | Reg::Gpr(i) => &self.gpr_without_zero[i.get() as usize - 1], 158 | Reg::Lo => &self.lo, 159 | Reg::Hi => &self.hi, 160 | } 161 | } 162 | } 163 | 164 | /// Error type for attempting to use a 64-bit operation (presumably from MIPS64) 165 | /// on MIPS32. 166 | // FIXME(eddyb) actually check opcodes instead of just reacting to `B64` values. 167 | #[derive(Debug)] 168 | enum Mips64OpNotSupportedOnMips32<'a> { 169 | RegRead { name: &'a str }, 170 | RegWrite { name: &'a str }, 171 | } 172 | 173 | impl fmt::Display for Mips64OpNotSupportedOnMips32<'_> { 174 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 175 | match self { 176 | Self::RegRead { name } => write!(f, "64-bit read from `{}` on MIPS32", name), 177 | Self::RegWrite { name } => write!(f, "64-bit write to `{}` on MIPS32", name), 178 | } 179 | } 180 | } 181 | 182 | // FIXME(eddyb) make a `State` wrapper to contain these helpers. 183 | impl State { 184 | fn mips_get_reg_with_explicit_size<'a>( 185 | &self, 186 | isa: &Mips, 187 | cx: &'a Cx, 188 | r: impl TryInto>, 189 | explicit_size: BitSize, 190 | ) -> Result> { 191 | let r = r.try_into().map_err(|e| e.into()); 192 | let v = match r { 193 | Ok(r) => self.get(cx, isa.regs[r]), 194 | Err(ZeroReg) => cx.a(Const::new(isa.regs.size, 0)), 195 | }; 196 | let v = match (isa.regs.size, explicit_size) { 197 | (B32, B32) | (B64, B64) => v, 198 | (B64, B32) => { 199 | // 32-bit register read on MIPS64, likely ALU. 200 | // FIXME(eddyb) need to encode the fact that the 64-bit `v` must 201 | // be equal to `sext_64(trunc_32(v))`, or otherwise this is UB. 202 | cx.a(Node::Trunc(B32, v)) 203 | } 204 | (B32, B64) => { 205 | return Err(Mips64OpNotSupportedOnMips32::RegRead { 206 | name: match r { 207 | Ok(r) => &cx[cx[isa.regs[r]].name], 208 | Err(ZeroReg) => "zero", 209 | }, 210 | }) 211 | } 212 | _ => unreachable!(), 213 | }; 214 | Ok(v) 215 | } 216 | 217 | fn mips_set_reg_with_explicit_size<'a>( 218 | &mut self, 219 | isa: &Mips, 220 | cx: &'a Cx, 221 | r: impl TryInto>, 222 | v: INode, 223 | explicit_size: BitSize, 224 | ) -> Result<(), Mips64OpNotSupportedOnMips32<'a>> { 225 | assert_eq!(explicit_size, cx[v].ty(cx).bit_size().unwrap()); 226 | 227 | let r = r.try_into().map_err(|e| e.into()); 228 | let v = match (isa.regs.size, explicit_size) { 229 | (B32, B32) | (B64, B64) => v, 230 | (B64, B32) => { 231 | // 32-bit register write on MIPS64, likely ALU or (small) loads. 232 | cx.a(Node::Sext(B64, v)) 233 | } 234 | (B32, B64) => { 235 | return Err(Mips64OpNotSupportedOnMips32::RegWrite { 236 | name: match r { 237 | Ok(r) => &cx[cx[isa.regs[r]].name], 238 | Err(ZeroReg) => "zero", 239 | }, 240 | }) 241 | } 242 | _ => unreachable!(), 243 | }; 244 | match r { 245 | Ok(r) => self.set(cx, isa.regs[r], v), 246 | // Writes to the zero register are noops. 247 | Err(ZeroReg) => {} 248 | } 249 | Ok(()) 250 | } 251 | } 252 | 253 | impl Isa for Mips { 254 | fn mem_containing_rom(&self) -> IGlobal { 255 | self.mem 256 | } 257 | 258 | fn lift_instr( 259 | &self, 260 | cx: &Cx, 261 | rom: &dyn Rom, 262 | pc: &mut Const, 263 | mut state: State, 264 | ) -> Result> { 265 | macro_rules! error { 266 | ($($args:tt)*) => { 267 | return Err(Edges::One(Edge { 268 | state, 269 | effect: Effect::Error(format!($($args)*)), 270 | })) 271 | } 272 | } 273 | 274 | let instr = match rom.load(self.mem_type, *pc, MemSize::M32) { 275 | Ok(x) => x, 276 | Err(e) => error!("failed to read ROM: {:?}", e), 277 | }; 278 | // FIXME(eddyb) make it possible to write this as `x + 4`. 279 | let add4 = |x: Const| x + Const::new(x.size, 4); 280 | *pc = add4(*pc); 281 | 282 | let field = |i, w| (instr.as_u32() >> i) & ((1u32 << w) - 1u32); 283 | 284 | let op = field(26, 6); 285 | let (rs, rt, rd) = { 286 | let r = |i| field(11 + 5 * i, 5); 287 | (r(2), r(1), r(0)) 288 | }; 289 | let imm16 = instr.trunc(B16); 290 | 291 | // Read the full width of a register. 292 | macro_rules! get_reg_native { 293 | ($r:expr) => { 294 | state 295 | .mips_get_reg_with_explicit_size(self, cx, $r, self.regs.size) 296 | .expect("get_reg_native forces `regs.size`, should always work") 297 | }; 298 | } 299 | // Write the width width of a register. 300 | macro_rules! set_reg_native { 301 | ($r:expr, $val:expr) => { 302 | state 303 | .mips_set_reg_with_explicit_size(self, cx, $r, $val, self.regs.size) 304 | .expect("set_reg_native forces `regs.size`, should always work") 305 | }; 306 | } 307 | 308 | // Read a register as a memory address (may be smaller than register size). 309 | macro_rules! get_reg_mem_addr { 310 | ($r:expr) => { 311 | state 312 | .mips_get_reg_with_explicit_size(self, cx, $r, self.mem_type.addr_size) 313 | .expect("get_reg_mem_addr forces `addr_size`, should always work") 314 | }; 315 | } 316 | 317 | macro_rules! link { 318 | ($r:expr) => { 319 | set_reg_native!($r, cx.a(add4(*pc).sext(self.regs.size))) 320 | }; 321 | () => { 322 | link!(31) 323 | }; 324 | } 325 | macro_rules! jump { 326 | ($target:expr) => {{ 327 | let target = $target; 328 | // Process delay slot. 329 | match self.lift_instr(cx, rom, pc, state) { 330 | Ok(state) => Err(Edges::One(Edge { 331 | state, 332 | effect: Effect::Jump(target), 333 | })), 334 | Err(Edges::One(Edge { 335 | state, 336 | effect: Effect::Opaque { call, next_pc: _ }, 337 | })) => { 338 | // HACK(eddyb) replace the `next_pc` but reuse the `Opaque`. 339 | Err(Edges::One(Edge { 340 | state, 341 | effect: Effect::Opaque { 342 | call, 343 | next_pc: target, 344 | }, 345 | })) 346 | } 347 | Err(edges) => { 348 | let effect = cx 349 | .pretty_print_on_edges(edges.as_ref().map(|e, _| &e.effect)) 350 | .to_string(); 351 | // HACK(eddyb) extract some `state` for `error!`. 352 | state = edges.map(|e, _| e.state).merge(|x, _| x); 353 | error!("jump delay slot had effect: {}", effect); 354 | } 355 | } 356 | }}; 357 | } 358 | macro_rules! branch_target { 359 | () => { 360 | cx.a(*pc + (imm16.sext(B32) << 2).sext(self.mem_type.addr_size)) 361 | }; 362 | } 363 | macro_rules! branch { 364 | ($cond:expr => $b:expr, $t:expr, $e:expr) => {{ 365 | let (cond, t, e) = ($cond, $t, $e); 366 | let (t, e) = if $b { 367 | (t, e) 368 | } else { 369 | (e, t) 370 | }; 371 | 372 | assert_eq!(cx[cond].ty(cx), Type::Bits(B1)); 373 | 374 | // Process delay slot. 375 | match self.lift_instr(cx, rom, pc, state) { 376 | Ok(state) => Err(Edges::Branch { 377 | cond, 378 | t: Edge { state: state.clone(), effect: Effect::Jump(t) }, 379 | e: Edge { state, effect: Effect::Jump(e) }, 380 | }), 381 | Err(Edges::One(Edge { 382 | state, 383 | effect: Effect::Opaque { call, next_pc: _ }, 384 | })) => { 385 | // HACK(eddyb) replace the `next_pc` but reuse the `Opaque`. 386 | // NOTE(eddyb) this is even worse than the `jump!` one, 387 | // because it duplicates the `Opaque`. 388 | Err(Edges::Branch { 389 | cond, 390 | t: Edge { 391 | state: state.clone(), 392 | effect: Effect::Opaque { 393 | call: call.clone(), 394 | next_pc: t, 395 | }, 396 | }, 397 | e: Edge { 398 | state, 399 | effect: Effect::Opaque { 400 | call, 401 | next_pc: e, 402 | }, 403 | }, 404 | }) 405 | } 406 | Err(edges) => { 407 | let effect = cx.pretty_print_on_edges( 408 | edges.as_ref().map(|e, _| &e.effect), 409 | ).to_string(); 410 | // HACK(eddyb) extract some `state` for `error!`. 411 | state = edges.map(|e, _| e.state).merge(|x, _| x); 412 | error!("branch delay slot had effect: {}", effect); 413 | } 414 | } 415 | }}; 416 | ($cond:expr => $b:expr) => { 417 | branch!($cond => $b, branch_target!(), cx.a(add4(*pc))) 418 | }; 419 | } 420 | 421 | // FIXME(eddyb) audit everything that ever interacts with this. 422 | let mut alu_size = B32; 423 | macro_rules! get_reg_alu_input { 424 | ($r:expr) => { 425 | match state.mips_get_reg_with_explicit_size(self, cx, $r, alu_size) { 426 | Ok(v) => v, 427 | Err(e) => error!("attempted {}", e), 428 | } 429 | }; 430 | } 431 | macro_rules! set_reg_alu_output { 432 | ($r:expr, $val:expr) => { 433 | match state.mips_set_reg_with_explicit_size(self, cx, $r, $val, alu_size) { 434 | Ok(()) => {} 435 | Err(e) => error!("attempted {}", e), 436 | } 437 | }; 438 | } 439 | 440 | if op == 0 { 441 | // SPECIAL (R format and syscall/break). 442 | let funct = field(0, 6); 443 | match funct { 444 | 12 | 13 => { 445 | return Err(Edges::One(Edge { 446 | state, 447 | effect: Effect::Opaque { 448 | call: format!( 449 | "{}(code={})", 450 | if funct == 12 { "syscall" } else { "break" }, 451 | field(6, 20) 452 | ), 453 | next_pc: cx.a(*pc), 454 | }, 455 | })); 456 | } 457 | _ => {} 458 | } 459 | 460 | if let 20..=23 | 28..=31 | 44..=47 | 56 | 58..=60 | 62 | 63 = funct { 461 | // HACK(eddyb) force `{get,set}_reg_alu_{input,output}` below into 64-bit mode. 462 | alu_size = B64; 463 | } 464 | 465 | if let 16..=19 | 36..=39 | 42 | 43 = funct { 466 | // HACK(eddyb) force `{get,set}_reg_alu_{input,output}` below into "native" mode. 467 | alu_size = self.regs.size; 468 | } 469 | 470 | if let 8 | 9 = funct { 471 | // HACK(eddyb) force `get_reg_alu_input` below into "memory address" mode. 472 | alu_size = self.mem_type.addr_size; 473 | } 474 | 475 | let rs = get_reg_alu_input!(rs); 476 | let rt = get_reg_alu_input!(rt); 477 | let sa = field(6, 5); 478 | let v = match funct { 479 | 0 | 56 => cx.a(rt << sa), 480 | 2 | 58 => cx.a(rt.shr_u(sa)), 481 | 3 | 59 => cx.a(rt.shr_s(sa)), 482 | 483 | 8 => return jump!(rs), 484 | 9 => { 485 | link!(rd); 486 | return jump!(rs); 487 | } 488 | 489 | 16 => get_reg_native!(Reg::Hi), 490 | 17 => { 491 | set_reg_native!(Reg::Hi, rs); 492 | return Ok(state); 493 | } 494 | 18 => get_reg_native!(Reg::Lo), 495 | 19 => { 496 | set_reg_native!(Reg::Lo, rs); 497 | return Ok(state); 498 | } 499 | 500 | 26 | 30 => { 501 | set_reg_alu_output!(Reg::Lo, cx.a(rs.div_s(rt))); 502 | set_reg_alu_output!(Reg::Hi, cx.a(rs.rem_s(rt))); 503 | return Ok(state); 504 | } 505 | 506 | 27 | 31 => { 507 | set_reg_alu_output!(Reg::Lo, cx.a(rs.div_u(rt))); 508 | set_reg_alu_output!(Reg::Hi, cx.a(rs.rem_u(rt))); 509 | return Ok(state); 510 | } 511 | 512 | 28 | 29 => { 513 | // FIXME(eddyb) perform actual 128-bit multiplies, using 514 | // `Sext(B128, ...)` for `funct=28`, and `Zext(B128, ...)` 515 | // for `funct=29`, or emulate it using 64-bit operations only. 516 | let result = cx.a(rs * rt); 517 | set_reg_alu_output!(Reg::Lo, result); 518 | set_reg_alu_output!(Reg::Hi, cx.a(Const::new(B64, 0))); 519 | return Ok(state); 520 | } 521 | 522 | 32 | 33 | 44 | 45 => cx.a(rs + rt), 523 | 34 | 35 | 46 | 47 => cx.a(rs - rt), 524 | 36 => cx.a(rs & rt), 525 | 37 => cx.a(rs | rt), 526 | 38 => cx.a(rs ^ rt), 527 | 39 => cx.a(!(rs | rt)), 528 | 42 => cx.a(rs.cmp_lt_s(rt).zext(self.regs.size)), 529 | 43 => cx.a(rs.cmp_lt_u(rt).zext(self.regs.size)), 530 | 531 | 60 => cx.a(rt << (sa + 32)), 532 | 63 => cx.a(rt.shr_u(sa + 32)), 533 | 534 | _ => error!("unknown SPECIAL funct={} (0b{0:06b} / 0x{0:02x})", funct), 535 | }; 536 | set_reg_alu_output!(rd, v); 537 | } else if op == 1 { 538 | // REGIMM (I format w/o rt). 539 | let rs_was_zero = rs == 0; 540 | let rs = get_reg_native!(rs); 541 | match rt { 542 | 0 | 16 => { 543 | if (rt & 16) != 0 { 544 | link!(); 545 | } 546 | if rs_was_zero { 547 | // Special-case `zero < zero` branches to noops - in the 548 | // case of `BLTZAL $zero`, the "And Link" (`link!()`) 549 | // effect may be the only reason the instruction is used. 550 | // HACK(eddyb) this is done here to avoid const-folding 551 | // away control-flow in the general case. 552 | return Ok(state); 553 | } 554 | return branch!(cx.a(rs.cmp_lt_s(Const::new(self.regs.size, 0))) => true); 555 | } 556 | 1 | 17 => { 557 | if (rt & 16) != 0 { 558 | link!(); 559 | } 560 | if rs_was_zero { 561 | // Special-case `zero >= zero` branches to jumps. 562 | // HACK(eddyb) this is done here to avoid const-folding 563 | // away control-flow in the general case. 564 | return jump!(branch_target!()); 565 | } 566 | return branch!(cx.a(rs.cmp_lt_s(Const::new(self.regs.size, 0))) => false); 567 | } 568 | _ => error!("unknown REGIMM rt={} (0b{0:06b} / 0x{0:02x})", rt), 569 | } 570 | } else if op == 2 || op == 3 { 571 | // J format. 572 | if op == 3 { 573 | link!(); 574 | } 575 | return jump!(cx.a(Const::new( 576 | self.mem_type.addr_size, 577 | (pc.as_u64() & !0x3fff_ffff) | ((field(0, 26) << 2) as u64) 578 | ))); 579 | } else if (op, rs, rt) == (4, 0, 0) { 580 | // Special-case `zero == zero` branches to jumps. 581 | // HACK(eddyb) this is done here to avoid const-folding 582 | // away control-flow in the general case. 583 | return jump!(branch_target!()); 584 | } else if op == 16 || op == 17 || op == 18 { 585 | // COPz. 586 | let cp = op - 16; 587 | let funct = field(0, 6); 588 | // FIXME(eddyb) implement basic floating-point instructions. 589 | if cp == 1 { 590 | return Err(Edges::One(Edge { 591 | state, 592 | effect: Effect::Opaque { 593 | call: format!("COP1_FPU(rs={}, rt={}, rd={}, funct={})", rs, rt, rd, funct), 594 | next_pc: cx.a(*pc), 595 | }, 596 | })); 597 | } 598 | 599 | // FIXME(eddyb) ssupport EPC, moves to/from it, and ERET. 600 | if cp == 0 && rs == 16 && funct == 24 { 601 | error!("ERET"); 602 | } 603 | 604 | return Err(Edges::One(Edge { 605 | state, 606 | effect: Effect::Opaque { 607 | call: format!( 608 | "COP{}(rs={}, rt={}, rd={}, funct={})", 609 | cp, rs, rt, rd, funct, 610 | ), 611 | next_pc: cx.a(*pc), 612 | }, 613 | })); 614 | } else if op == 28 { 615 | // SPECIAL2. 616 | let funct = field(0, 6); 617 | match funct { 618 | _ => error!("unknown SPECIAL2 funct={} (0b{0:06b} / 0x{0:02x})", funct), 619 | } 620 | } else if op == 31 { 621 | // SPECIAL3. 622 | let funct = field(0, 6); 623 | 624 | if let 1..=3 | 5..=7 | 36 | 39 | 55 = funct { 625 | // HACK(eddyb) force `{get,set}_reg_alu_{input,output}` below into 64-bit mode. 626 | alu_size = B64; 627 | } 628 | 629 | let rs = get_reg_alu_input!(rs); 630 | 631 | let v = match funct { 632 | 2 | 3 => { 633 | let lsb = field(6, 5) 634 | + match funct { 635 | 2 => 32, 636 | 3 => 0, 637 | _ => unreachable!(), 638 | }; 639 | let msbd = rd; 640 | let size = msbd + 1; 641 | let mask = !0u64 >> (64 - size); 642 | cx.a(rs.shr_u(lsb as u32) & Const::new(alu_size, mask)) 643 | } 644 | _ => error!("unknown SPECIAL3 funct={} (0b{0:06b} / 0x{0:02x})", funct), 645 | }; 646 | set_reg_alu_output!(rt, v); 647 | } else { 648 | // I format. 649 | 650 | if let 24..=27 | 39 | 44 | 45 | 52 | 55 | 60 | 63 = op { 651 | // HACK(eddyb) force `{get,set}_reg_alu_{input,output}` below into 64-bit mode. 652 | alu_size = B64; 653 | } 654 | 655 | if let 4..=7 | 10..=14 | 20..=23 = op { 656 | // HACK(eddyb) force `{get,set}_reg_alu_{input,output}` below into "native" mode. 657 | alu_size = self.regs.size; 658 | } 659 | 660 | let rd = rt; 661 | let rt = get_reg_alu_input!(rt); 662 | 663 | macro_rules! mem_ref { 664 | ($sz:ident) => { 665 | MemRef { 666 | mem: state.get(cx, self.mem), 667 | mem_type: self.mem_type, 668 | addr: get_reg_mem_addr!(rs) + imm16.sext(self.mem_type.addr_size), 669 | size: MemSize::$sz, 670 | } 671 | }; 672 | } 673 | 674 | match op { 675 | 4..=7 | 20..=23 => { 676 | let rs = get_reg_native!(rs); 677 | 678 | // FIXME(eddyb) for 20..=23, only execute the delay slot when 679 | // the branch is taken (the specs talk about "nullification"). 680 | let _is_likely = matches!(op, 20..=23); 681 | 682 | let (cond, negate) = match op { 683 | 4 | 5 | 20 | 21 => (cx.a(rs.cmp_eq(rt)), false), 684 | 6 | 7 | 22 | 23 => (cx.a(Const::new(self.regs.size, 0).cmp_lt_s(rs)), true), 685 | _ => unreachable!(), 686 | }; 687 | let negate = match op { 688 | 4 | 6 | 20 | 22 => negate, 689 | 5 | 7 | 21 | 23 => !negate, 690 | _ => unreachable!(), 691 | }; 692 | 693 | return branch!(cond => !negate); 694 | } 695 | 696 | 8..=14 | 24 | 25 => { 697 | let op = match op { 698 | 8 | 9 | 24 | 25 => IntOp::Add, 699 | 10 => IntOp::LtS, 700 | 11 => IntOp::LtU, 701 | 12 => IntOp::And, 702 | 13 => IntOp::Or, 703 | 14 => IntOp::Xor, 704 | 705 | _ => unreachable!(), 706 | }; 707 | // HACK(eddyb) pick sign- or zero-extension based on op. 708 | let imm32 = match op { 709 | IntOp::And | IntOp::Or | IntOp::Xor => imm16.zext(B32), 710 | _ => imm16.sext(B32), 711 | }; 712 | let imm = imm32.sext(alu_size); 713 | 714 | let rs = get_reg_alu_input!(rs); 715 | let mut v = cx.a(Node::Int(op, alu_size, rs, cx.a(imm))); 716 | if cx[v].ty(cx) == Type::Bits(B1) { 717 | v = cx.a(v.zext(alu_size)); 718 | } 719 | set_reg_alu_output!(rd, v); 720 | } 721 | 15 => set_reg_alu_output!(rd, cx.a(imm16.zext(B32) << 16)), 722 | 723 | // FIXME(eddyb) should `M32` loads also be `sext`ing? 724 | // (also, should `B32` be replaced by `alu_size` in all loads?) 725 | 32 => set_reg_alu_output!(rd, cx.a(mem_ref!(M8).load().sext(B32))), 726 | 33 => set_reg_alu_output!(rd, cx.a(mem_ref!(M16).load().sext(B32))), 727 | 35 => set_reg_alu_output!(rd, cx.a(mem_ref!(M32).load())), 728 | 36 => set_reg_alu_output!(rd, cx.a(mem_ref!(M8).load().zext(B32))), 729 | 37 => set_reg_alu_output!(rd, cx.a(mem_ref!(M16).load().zext(B32))), 730 | 731 | 40 => state.set(cx, self.mem, cx.a(mem_ref!(M8).store(rt.trunc(B8)))), 732 | 41 => state.set(cx, self.mem, cx.a(mem_ref!(M16).store(rt.trunc(B16)))), 733 | 43 => state.set(cx, self.mem, cx.a(mem_ref!(M32).store(rt))), 734 | 735 | 47 => { 736 | // FIXME(eddyb) use the result of rs+imm as an argument. 737 | return Err(Edges::One(Edge { 738 | state, 739 | effect: Effect::Opaque { 740 | call: format!( 741 | "CACHE(op={}, base={}, imm={:?})", 742 | field(16, 5), 743 | Reg::try_from(field(21, 5)) 744 | .map(|r| &cx[cx[self.regs[r]].name]) 745 | .unwrap_or_else(|ZeroReg| "zero"), 746 | imm16, 747 | ), 748 | next_pc: cx.a(*pc), 749 | }, 750 | })); 751 | } 752 | 753 | // FIXME(eddyb) implement basic floating-point instructions. 754 | 49 | 53 | 57 | 61 => { 755 | // FIXME(eddyb) use the result of rs+imm as an argument. 756 | return Err(Edges::One(Edge { 757 | state, 758 | effect: Effect::Opaque { 759 | call: format!( 760 | "{}{}C1_FPU(f{}, base={}, imm={:?})", 761 | if (op & 8) == 0 { 'L' } else { 'S' }, 762 | if (op & 4) == 0 { 'W' } else { 'D' }, 763 | rd, 764 | Reg::try_from(field(21, 5)) 765 | .map(|r| &cx[cx[self.regs[r]].name]) 766 | .unwrap_or_else(|ZeroReg| "zero"), 767 | imm16, 768 | ), 769 | next_pc: cx.a(*pc), 770 | }, 771 | })); 772 | } 773 | 774 | 55 => set_reg_alu_output!(rd, cx.a(mem_ref!(M64).load())), 775 | 63 => state.set(cx, self.mem, cx.a(mem_ref!(M64).store(rt))), 776 | 777 | _ => error!("unknown opcode {} (0b{0:06b} / 0x{0:02x})", op), 778 | } 779 | } 780 | 781 | Ok(state) 782 | } 783 | } 784 | -------------------------------------------------------------------------------- /src/explore.rs: -------------------------------------------------------------------------------- 1 | use crate::ir::{ 2 | BitSize, Block, Const, Cx, Edge, Edges, Effect, INode, IntOp, MemRef, MemSize, Node, State, 3 | Visit, Visitor, 4 | }; 5 | use crate::platform::Platform; 6 | use itertools::{Either, Itertools}; 7 | use smallvec::SmallVec; 8 | use std::collections::hash_map::Entry; 9 | use std::collections::{BTreeMap, BTreeSet, HashMap}; 10 | use std::io::Write; 11 | use std::mem; 12 | use std::sync::atomic::{AtomicBool, Ordering}; 13 | use std::{fmt, iter}; 14 | 15 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 16 | enum Set1 { 17 | Empty, 18 | One(T), 19 | Many, 20 | } 21 | 22 | impl Visit for Set1 { 23 | fn walk(&self, visitor: &mut impl Visitor) { 24 | match self { 25 | Set1::Empty | Set1::Many => {} 26 | Set1::One(x) => x.visit(visitor), 27 | } 28 | } 29 | } 30 | 31 | impl Set1 { 32 | fn insert(&mut self, value: T) { 33 | match *self { 34 | Set1::Empty => *self = Set1::One(value), 35 | Set1::One(ref prev) if *prev == value => {} 36 | _ => *self = Set1::Many, 37 | } 38 | } 39 | 40 | fn union(mut self, other: Self) -> Self { 41 | match other { 42 | Set1::Empty => {} 43 | Set1::One(x) => self.insert(x), 44 | Set1::Many => return Set1::Many, 45 | } 46 | self 47 | } 48 | 49 | fn map(self, f: impl FnOnce(T) -> U) -> Set1 { 50 | match self { 51 | Set1::Empty => Set1::Empty, 52 | Set1::One(x) => Set1::One(f(x)), 53 | Set1::Many => Set1::Many, 54 | } 55 | } 56 | 57 | fn flat_map(self, f: impl FnOnce(T) -> Set1) -> Set1 { 58 | match self { 59 | Set1::Empty => Set1::Empty, 60 | Set1::One(x) => f(x), 61 | Set1::Many => Set1::Many, 62 | } 63 | } 64 | } 65 | 66 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 67 | pub struct BlockId { 68 | pub entry_pc: u64, 69 | } 70 | 71 | impl fmt::Debug for BlockId { 72 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 73 | Const::new(BitSize::B64, self.entry_pc).fmt(f) 74 | } 75 | } 76 | 77 | impl From for BlockId { 78 | fn from(entry_pc: Const) -> Self { 79 | BlockId { 80 | entry_pc: entry_pc.as_u64(), 81 | } 82 | } 83 | } 84 | 85 | trait MaybeSet: From { 86 | fn maybe_set>(_f: impl FnOnce() -> I) -> Option { 87 | None 88 | } 89 | fn flat_map(self, f: impl FnMut(T) -> Self) -> Self; 90 | } 91 | 92 | impl MaybeSet for T { 93 | fn flat_map(self, mut f: impl FnMut(T) -> Self) -> Self { 94 | f(self) 95 | } 96 | } 97 | 98 | // FIXME(eddyb) maybe use a library for this? 99 | #[derive(Clone, Debug, PartialEq, Eq)] 100 | pub enum SmallSet { 101 | Empty, 102 | One(T), 103 | Many(BTreeSet), 104 | } 105 | 106 | impl From for SmallSet { 107 | fn from(x: T) -> Self { 108 | SmallSet::One(x) 109 | } 110 | } 111 | 112 | impl SmallSet { 113 | pub fn iter<'a>(&'a self) -> impl Iterator { 114 | match self { 115 | SmallSet::Empty => Either::Left(None.into_iter()), 116 | SmallSet::One(x) => Either::Left(Some(x).into_iter()), 117 | SmallSet::Many(set) => Either::Right(set.iter()), 118 | } 119 | } 120 | 121 | pub fn into_iter(self) -> impl Iterator { 122 | match self { 123 | SmallSet::Empty => Either::Left(None.into_iter()), 124 | SmallSet::One(x) => Either::Left(Some(x).into_iter()), 125 | SmallSet::Many(set) => Either::Right(set.into_iter()), 126 | } 127 | } 128 | 129 | fn insert(&mut self, value: T) { 130 | let prev = match self { 131 | SmallSet::Empty => { 132 | *self = SmallSet::One(value); 133 | return; 134 | } 135 | SmallSet::One(prev) if *prev == value => return, 136 | SmallSet::One(_) => match mem::replace(self, SmallSet::Many(BTreeSet::new())) { 137 | SmallSet::One(prev) => Some(prev), 138 | _ => unreachable!(), 139 | }, 140 | SmallSet::Many(_) => None, 141 | }; 142 | match self { 143 | SmallSet::Many(set) => { 144 | set.extend(prev); 145 | set.insert(value); 146 | } 147 | _ => unreachable!(), 148 | } 149 | } 150 | } 151 | 152 | impl MaybeSet for SmallSet { 153 | fn maybe_set>(f: impl FnOnce() -> I) -> Option { 154 | let mut set = SmallSet::Empty; 155 | for x in f().into_iter() { 156 | set.insert(x); 157 | } 158 | Some(set) 159 | } 160 | 161 | fn flat_map(self, mut f: impl FnMut(T) -> Self) -> Self { 162 | let mut result = SmallSet::Empty; 163 | for x in self.into_iter().map(|x| f(x).into_iter()).flatten() { 164 | result.insert(x); 165 | } 166 | result 167 | } 168 | } 169 | 170 | impl INode { 171 | // HACK(eddyb) try to get the last stored value. 172 | fn subst_reduce_load>( 173 | self, 174 | explorer: &Explorer<'_>, 175 | base: Option<&State>, 176 | addr: INode, 177 | size: MemSize, 178 | ) -> S { 179 | let cx = explorer.cx; 180 | match cx[self] { 181 | Node::GlobalIn(g) => { 182 | if let Some(m) = base.and_then(|base| base.globals.get(&g).copied()) { 183 | return m.subst_reduce_load(explorer, None, addr, size); 184 | } 185 | 186 | if g == explorer.platform.isa().mem_containing_rom() { 187 | let mem_type = cx[g].ty.mem().unwrap(); 188 | 189 | // HACK(eddyb) assume it's from the ROM, if in range of it. 190 | if let Some(addr) = cx[addr].as_const() { 191 | if let Ok(v) = explorer.platform.rom().load(mem_type, addr, size) { 192 | return cx.a(v).into(); 193 | } 194 | } 195 | 196 | // HACK(eddyb) assume that an array-like address has a 197 | // first element (and notify the user about it). 198 | if let Node::Int(IntOp::Add, _, index, base_addr) = cx[addr] { 199 | if let Some(base_addr) = cx[base_addr].as_const() { 200 | if let Some(&byte_len) = explorer.array_len.get(&base_addr.as_u64()) { 201 | let len = byte_len / size.bytes() as u64; 202 | let maybe_set = S::maybe_set(|| { 203 | (0..len).map(|i| { 204 | cx.a(explorer 205 | .platform 206 | .rom() 207 | .load( 208 | mem_type, 209 | Const::new(base_addr.size, base_addr.as_u64() + i), 210 | size, 211 | ) 212 | .unwrap()) 213 | .into() 214 | }) 215 | }); 216 | if let Some(set) = maybe_set { 217 | return set; 218 | } 219 | } else { 220 | // HACK(eddyb) try to guess when what we're assuming to 221 | // be the index can't realistically be a pointer itself. 222 | let index_may_be_pointer = match cx[index] { 223 | // FIXME(eddyb) actually check the parameters here. 224 | Node::Int(IntOp::Shl, ..) 225 | | Node::Int(IntOp::ShrU, ..) 226 | | Node::Zext(..) => false, 227 | 228 | _ => true, 229 | }; 230 | 231 | if !index_may_be_pointer { 232 | if let Ok(v) = 233 | explorer.platform.rom().load(mem_type, base_addr, size) 234 | { 235 | println!( 236 | "explore: possible array indexing with base {:?}, \ 237 | assuming index ({}) is 0 and ignoring other values", 238 | base_addr, 239 | cx.pretty_print(&index), 240 | ); 241 | println!( 242 | " help: you can indicate the array address range \ 243 | with e.g. `-a {:?}..{:?}` for a length of 1", 244 | base_addr, 245 | Const::new( 246 | base_addr.size, 247 | base_addr.as_u64() + size.bytes() as u64 248 | ), 249 | ); 250 | return cx.a(v).into(); 251 | } 252 | } 253 | } 254 | } 255 | } 256 | } 257 | 258 | cx.a(Node::Load(MemRef { 259 | mem: self, 260 | mem_type: cx[self].ty(cx).mem().unwrap(), 261 | addr, 262 | size, 263 | })) 264 | .into() 265 | } 266 | 267 | Node::Store(r, v) => r.addr.subst_reduce::(explorer, base).flat_map(|r_addr| { 268 | if r_addr == addr && r.size == size { 269 | v.subst_reduce(explorer, base) 270 | } else { 271 | r.mem.subst_reduce_load(explorer, base, addr, size) 272 | } 273 | }), 274 | 275 | _ => unreachable!(), 276 | } 277 | } 278 | 279 | // FIXME(eddyb) introduce a more general "folder" abstraction. 280 | fn subst_reduce>(self, explorer: &Explorer<'_>, base: Option<&State>) -> S { 281 | let subst_reduce = |x: INode| x.subst_reduce::(explorer, base); 282 | let cx = explorer.cx; 283 | match cx[self] { 284 | Node::GlobalIn(g) => base 285 | .and_then(|base| base.globals.get(&g).copied()) 286 | .map_or(self.into(), |node| node.subst_reduce(explorer, None)), 287 | 288 | Node::Const(_) => self.into(), 289 | 290 | Node::Int(op, size, a, b) => subst_reduce(a) 291 | .flat_map(|a| subst_reduce(b).flat_map(|b| cx.a(Node::Int(op, size, a, b)).into())), 292 | Node::Trunc(size, x) => subst_reduce(x).flat_map(|x| cx.a(Node::Trunc(size, x)).into()), 293 | Node::Sext(size, x) => subst_reduce(x).flat_map(|x| cx.a(Node::Sext(size, x)).into()), 294 | Node::Zext(size, x) => subst_reduce(x).flat_map(|x| cx.a(Node::Zext(size, x)).into()), 295 | Node::Load(r) => subst_reduce(r.addr) 296 | .flat_map(|addr| r.mem.subst_reduce_load(explorer, base, addr, r.size)), 297 | 298 | Node::Store(r, x) => subst_reduce(r.mem).flat_map(|mem| { 299 | subst_reduce(r.addr).flat_map(|addr| { 300 | subst_reduce(x).flat_map(|x| { 301 | cx.a(Node::Store( 302 | MemRef { 303 | mem, 304 | mem_type: r.mem_type, 305 | addr, 306 | size: r.size, 307 | }, 308 | x, 309 | )) 310 | .into() 311 | }) 312 | }) 313 | }), 314 | } 315 | } 316 | } 317 | 318 | /// Options for handling an exit "continuation". 319 | #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] 320 | struct ExitOptions { 321 | /// Argument values for the exit "continuation". 322 | /// These will be back-propagated through jumps, to the exit "continuation". 323 | // FIXME(eddyb) `SmallVec` doesn't seem to be faster than `Vec`?! 324 | args_values: SmallVec<[INode; 4]>, 325 | } 326 | 327 | struct Partial { 328 | /// Set to `true` when a cached partial value is used. 329 | observed: bool, 330 | } 331 | 332 | /// The cumulative exit "continuation" of a (sub-)CFG, 333 | /// computed from all the jumps that don't resolve to 334 | /// a static target. 335 | struct Exit { 336 | /// Set of non-constant jump destination values. 337 | /// An empty set indicates the (sub-)CFG diverges, by 338 | /// eventually reaching infinite loops and/or traps. 339 | targets: Set1, 340 | 341 | /// One set of values per "continuation argument". 342 | /// Must be the same length as the `args_values` in `ExitOptions`. 343 | // TODO(eddyb) should this be per `targets` value? 344 | // FIXME(eddyb) `SmallVec` doesn't seem to be faster than `Vec`?! 345 | args_values: SmallVec<[Set1; 4]>, 346 | 347 | /// Indicates whether this (sub-)CFG contains unresolved 348 | /// cycles, which may have resulted in the computed exit 349 | /// being different from the eventual fixpoint. 350 | partial: Option, 351 | } 352 | 353 | impl Exit { 354 | fn merge(self, cx: &Cx, bb: BlockId, other: Self) -> Self { 355 | if let (Set1::One(a), Set1::One(b)) = (self.targets, other.targets) { 356 | if a != b { 357 | println!( 358 | "explore: {:?}: ambiguous targets: {} vs {}", 359 | bb, 360 | cx.pretty_print(&a), 361 | cx.pretty_print(&b) 362 | ); 363 | } 364 | } 365 | Exit { 366 | targets: self.targets.union(other.targets), 367 | args_values: self 368 | .args_values 369 | .into_iter() 370 | .zip_eq(other.args_values) 371 | .map(|(a, b)| a.union(b)) 372 | .collect(), 373 | partial: self.partial.or(other.partial), 374 | } 375 | } 376 | } 377 | 378 | pub struct Explorer<'a> { 379 | pub cx: &'a Cx, 380 | pub platform: &'a dyn Platform, 381 | pub blocks: BTreeMap, 382 | 383 | /// Analysis input indicating the length of an array, in bytes. 384 | pub array_len: HashMap, 385 | 386 | /// Analysis output indicating that a block takes a "continuation" which is 387 | /// static in some ancestors, e.g. callees taking the return "continuation". 388 | /// The values are the ancestors (see also `eventual_static_continuation`). 389 | pub takes_static_continuation: HashMap>, 390 | 391 | /// Analysis output indicating that a block will eventually reach another 392 | /// block by going through some sub-CFG that takes a "continuation", 393 | /// e.g. calls reaching the return "continuation". 394 | pub eventual_static_continuation: HashMap, 395 | 396 | cancel_token: Option<&'a AtomicBool>, 397 | 398 | status_term: Option>, 399 | 400 | exit_cache: HashMap<(BlockId, ExitOptions), Exit>, 401 | } 402 | 403 | impl Drop for Explorer<'_> { 404 | fn drop(&mut self) { 405 | if let Some(term) = &mut self.status_term { 406 | let _ = term.reset(); 407 | let _ = writeln!(term.get_mut(), ""); 408 | let _ = term.get_mut().flush(); 409 | } 410 | } 411 | } 412 | 413 | impl<'a> Explorer<'a> { 414 | pub fn new( 415 | cx: &'a Cx, 416 | platform: &'a dyn Platform, 417 | cancel_token: Option<&'a AtomicBool>, 418 | ) -> Self { 419 | Explorer { 420 | cx, 421 | platform, 422 | blocks: BTreeMap::new(), 423 | array_len: HashMap::new(), 424 | takes_static_continuation: HashMap::new(), 425 | eventual_static_continuation: HashMap::new(), 426 | cancel_token, 427 | status_term: term::stderr(), 428 | exit_cache: HashMap::new(), 429 | } 430 | } 431 | 432 | pub fn get_or_lift_block(&mut self, bb: BlockId) -> &Block { 433 | // FIXME(eddyb) clean this up whenever NLL/Polonius can do the 434 | // efficient check (`if let Some(x) = map.get(k) { return x; }`). 435 | while !self.blocks.contains_key(&bb) { 436 | let mut state = State::default(); 437 | let mut pc = Const::new( 438 | self.cx[self.platform.isa().mem_containing_rom()] 439 | .ty 440 | .mem() 441 | .unwrap() 442 | .addr_size, 443 | bb.entry_pc, 444 | ); 445 | let edges = loop { 446 | match self 447 | .platform 448 | .isa() 449 | .lift_instr(self.cx, self.platform.rom(), &mut pc, state) 450 | { 451 | Ok(new_state) => state = new_state, 452 | Err(edges) => break edges, 453 | } 454 | 455 | // Prevent blocks from overlapping where possible. 456 | if self.blocks.contains_key(&BlockId::from(pc)) { 457 | break Edges::One(Edge { 458 | state, 459 | effect: Effect::Jump(self.cx.a(pc)), 460 | }); 461 | } 462 | }; 463 | 464 | // HACK(eddyb) detect the simplest self-loops, and split the block. 465 | let retry = edges 466 | .as_ref() 467 | .map(|e, _| match e.effect { 468 | Effect::Jump(target) 469 | | Effect::Opaque { 470 | next_pc: target, .. 471 | } => self.cx[target] 472 | .as_const() 473 | .map(BlockId::from) 474 | .filter(|&target| bb < target && target.entry_pc < pc.as_u64()) 475 | .map(|target| self.get_or_lift_block(target)) 476 | .is_some(), 477 | Effect::Error(_) => false, 478 | }) 479 | .merge(|a, b| a | b); 480 | if retry { 481 | continue; 482 | } 483 | 484 | self.blocks.insert(bb, Block { pc: ..pc, edges }); 485 | 486 | if let Some(term) = &mut self.status_term { 487 | if term.carriage_return().is_ok() && term.delete_line().is_ok() { 488 | let _ = write!( 489 | term.get_mut(), 490 | "Last lifted block: {:?} | Total found blocks: {}", 491 | bb, 492 | self.blocks.len() 493 | ); 494 | let _ = term.get_mut().flush(); 495 | } 496 | } 497 | 498 | break; 499 | } 500 | 501 | &self.blocks[&bb] 502 | } 503 | 504 | fn get_block_targets(&self, bb: BlockId) -> Edges> { 505 | self.blocks[&bb].edges.as_ref().map(|e, _| match e.effect { 506 | Effect::Jump(target) 507 | | Effect::Opaque { 508 | next_pc: target, .. 509 | } => target.subst_reduce::>(self, None), 510 | Effect::Error(_) => SmallSet::Empty, 511 | }) 512 | } 513 | 514 | /// Get the constant jump targets of a block, as seen by the analysis. 515 | // HACK(eddyb) this shouldn't be needed, but jump table support 516 | // (via `-a`/`--array`) isn't baked into the `Block`s themselves. 517 | pub fn get_block_direct_targets(&self, bb: BlockId) -> Edges> { 518 | self.get_block_targets(bb).map(|targets, _| { 519 | let mut direct_targets = SmallSet::Empty; 520 | for target in targets.into_iter() { 521 | if let Some(target_bb) = self.cx[target].as_const().map(BlockId::from) { 522 | direct_targets.insert(target_bb); 523 | } 524 | } 525 | direct_targets 526 | }) 527 | } 528 | 529 | /// Split any blocks that overlap the block following them. 530 | /// Warning: this may invalidate analyses sensitive to the distinction. 531 | pub fn split_overlapping_bbs(&mut self) { 532 | let bb_range_after = |this: &Self, start: Option| { 533 | use std::ops::Bound::*; 534 | this.blocks 535 | .range((start.map_or(Unbounded, Excluded), Unbounded)) 536 | .map(|(&bb, block)| bb..BlockId::from(block.pc.end)) 537 | .next() 538 | }; 539 | 540 | if let Some(mut bb) = bb_range_after(self, None) { 541 | while let Some(next) = bb_range_after(self, Some(bb.start)) { 542 | // Split overlapping blocks by discarding and re-lifting them. 543 | if bb.contains(&next.start) { 544 | self.blocks.remove(&bb.start); 545 | self.eventual_static_continuation.remove(&bb.start); 546 | self.get_or_lift_block(bb.start); 547 | } 548 | 549 | bb = next; 550 | } 551 | } 552 | } 553 | 554 | pub fn explore_bbs(&mut self, entry_pc: Const) { 555 | let entry_bb = BlockId::from(entry_pc); 556 | 557 | let exit = self.find_exit(entry_bb, &ExitOptions::default()); 558 | exit.targets.map(|target| { 559 | println!( 560 | "explore: entry {:?} reaches unknown exit target {}", 561 | entry_bb, 562 | self.cx.pretty_print(&target) 563 | ); 564 | }); 565 | } 566 | 567 | fn find_exit_uncached_on_edge( 568 | &mut self, 569 | bb: BlockId, 570 | options: &ExitOptions, 571 | br_cond: Option, 572 | direct_target: INode, 573 | ) -> Exit { 574 | let mut exit = 575 | Exit { 576 | targets: Set1::One(direct_target), 577 | 578 | // FIXME(eddyb) compute this lazily? (it may not be used) 579 | args_values: options 580 | .args_values 581 | .iter() 582 | .map(|&arg_value| { 583 | Set1::One(arg_value.subst_reduce( 584 | self, 585 | Some(&self.blocks[&bb].edges.as_ref()[br_cond].state), 586 | )) 587 | }) 588 | .collect(), 589 | 590 | partial: None, 591 | }; 592 | 593 | // HACK(eddyb) this uses a stack of targets to be able to handle a chain 594 | // of exit continuations, all resolved by `bb` simultaneously. 595 | // This can happen when e.g. there is a call in between a jump table 596 | // and a constant input to the jump table: the call will be the first 597 | // entry in the stack, followed by the jump table. 598 | let mut stack = vec![]; 599 | loop { 600 | let target_bb = match exit 601 | .targets 602 | .map(|target| self.cx[target].as_const().map(BlockId::from)) 603 | { 604 | Set1::One(Some(target_bb)) => target_bb, 605 | _ => return exit, 606 | }; 607 | 608 | // HACK(eddyb) detect trivial fixpoints/cycles. 609 | if stack.last() == Some(&target_bb) { 610 | let all_args_values_are_const = 611 | exit.args_values.iter().all(|&arg_values| match arg_values { 612 | Set1::Empty | Set1::Many => true, 613 | Set1::One(value) => self.cx[value].as_const().is_some(), 614 | }); 615 | if all_args_values_are_const { 616 | exit.targets = Set1::Empty; 617 | return exit; 618 | } 619 | } 620 | 621 | if let [prev_target_bb] = stack[..] { 622 | self.takes_static_continuation 623 | .entry(prev_target_bb) 624 | .or_default() 625 | .insert(bb); 626 | // HACK(eddyb) save the observed value without accounting 627 | // for multiple possible values etc. 628 | self.eventual_static_continuation.insert(bb, target_bb); 629 | } 630 | 631 | // Recurse on the current target. 632 | let target_exit = self.find_exit(target_bb, options); 633 | // FIXME(eddyb) abstract composing `partial`s better. 634 | exit.partial = exit.partial.or(target_exit.partial); 635 | let mut resolve_values = |all_values: &mut SmallVec<[Set1; 4]>| { 636 | // FIXME(eddyb) disabled because of potential overhead, must 637 | // measure before turning it back on! (especially as it's doing 638 | // some really inefficient searches... should build a map!) 639 | if false { 640 | // Reuse the already computed `args_values` where possible. 641 | let all_const_or_in_args_values = 642 | all_values.iter().all(|&values| match values { 643 | Set1::Empty | Set1::Many => true, 644 | Set1::One(v) => { 645 | self.cx[v].as_const().is_some() || options.args_values.contains(&v) 646 | } 647 | }); 648 | if all_const_or_in_args_values { 649 | for values in all_values { 650 | *values = (*values).flat_map(|value| { 651 | exit.args_values[options 652 | .args_values 653 | .iter() 654 | .position(|&a| a == value) 655 | .unwrap()] 656 | }); 657 | } 658 | return; 659 | } 660 | } 661 | 662 | for &frame_bb in stack.iter().rev() { 663 | // Common closure to avoid mismatching when filtering down 664 | // all the `Set1` to just the `INode`s, to resolve. 665 | let resolvable_all_values_slot = |&slot: &Set1| match slot { 666 | Set1::Empty | Set1::Many => None, 667 | Set1::One(v) => { 668 | // Constants don't need any propagation work. 669 | if self.cx[v].as_const().is_some() { 670 | return None; 671 | } 672 | 673 | Some(v) 674 | } 675 | }; 676 | let resolvable_values: SmallVec<[INode; 4]> = all_values 677 | .iter() 678 | .filter_map(resolvable_all_values_slot) 679 | .collect(); 680 | let resolvable_values_mut = all_values 681 | .iter_mut() 682 | .filter(|slot| resolvable_all_values_slot(slot).is_some()); 683 | if !resolvable_values.is_empty() { 684 | let frame_exit = self.find_exit( 685 | frame_bb, 686 | &ExitOptions { 687 | args_values: resolvable_values, 688 | }, 689 | ); 690 | exit.partial = exit.partial.take().or(frame_exit.partial); 691 | 692 | for (slot, frame_resolved) in 693 | resolvable_values_mut.zip_eq(frame_exit.args_values) 694 | { 695 | *slot = frame_resolved; 696 | } 697 | } 698 | } 699 | for values in all_values { 700 | *values = values.map(|value| { 701 | value.subst_reduce( 702 | self, 703 | Some(&self.blocks[&bb].edges.as_ref()[br_cond].state), 704 | ) 705 | }); 706 | } 707 | }; 708 | 709 | let mut args_values_and_targets = target_exit 710 | .args_values 711 | .iter() 712 | .copied() 713 | .chain([target_exit.targets]) 714 | .collect(); 715 | resolve_values(&mut args_values_and_targets); 716 | 717 | let targets = args_values_and_targets.pop().unwrap(); 718 | let args_values = args_values_and_targets; 719 | 720 | exit.targets = targets; 721 | exit.args_values = args_values; 722 | 723 | stack.push(target_bb); 724 | } 725 | } 726 | 727 | fn find_exit_uncached(&mut self, bb: BlockId, options: &ExitOptions) -> Exit { 728 | let cx = self.cx; 729 | self.get_or_lift_block(bb); 730 | 731 | // TODO(eddyb) avoid duplicating work between all the possible targets, 732 | // inside `exit_from_target`, when they converge early. 733 | let edge_targets = self.get_block_targets(bb); 734 | 735 | // HACK(eddyb) work around `get_or_lift_block` not splitting existing 736 | // blocks, and there being no mechanism to avoid overlapping blocks, 737 | // by eagerly lifting the branch target that has a higher address. 738 | if let Edges::Branch { .. } = edge_targets { 739 | edge_targets 740 | .as_ref() 741 | .map(|targets, _| match targets { 742 | SmallSet::One(target) => self.cx[*target].as_const().map(BlockId::from), 743 | _ => None, 744 | }) 745 | .merge(|t, e| Some(t?.max(e?))) 746 | .map(|max_target_bb| self.get_or_lift_block(max_target_bb)); 747 | } 748 | 749 | edge_targets 750 | .map(|targets, br_cond| { 751 | targets 752 | .into_iter() 753 | .map(|target| self.find_exit_uncached_on_edge(bb, options, br_cond, target)) 754 | .fold( 755 | Exit { 756 | targets: Set1::Empty, 757 | // FIXME(eddyb) avoid preallocating here somehow. 758 | args_values: iter::repeat(Set1::Empty) 759 | .take(options.args_values.len()) 760 | .collect(), 761 | partial: None, 762 | }, 763 | |a, b| a.merge(cx, bb, b), 764 | ) 765 | }) 766 | .merge(|t, e| t.merge(cx, bb, e)) 767 | } 768 | 769 | // FIXME(eddyb) reuse cached value when it doesn't interact with `options`. 770 | fn find_exit(&mut self, bb: BlockId, options: &ExitOptions) -> Exit { 771 | // FIXME(eddyb) avoid cloning here, perhaps by allocating `ExitOptions` 772 | // in an `elsa::FrozenVec`, since it's kept alive by the cache anyway. 773 | match self.exit_cache.entry((bb, options.clone())) { 774 | Entry::Occupied(mut entry) => { 775 | let cached = entry.get_mut(); 776 | return Exit { 777 | targets: cached.targets, 778 | // FIXME(eddyb) avoid cloning here (keep it in an `Rc`?). 779 | args_values: cached.args_values.clone(), 780 | partial: cached.partial.as_mut().map(|partial| { 781 | partial.observed = true; 782 | Partial { observed: false } 783 | }), 784 | }; 785 | } 786 | Entry::Vacant(entry) => { 787 | entry.insert(Exit { 788 | targets: Set1::Empty, 789 | // FIXME(eddyb) avoid preallocating here somehow. 790 | args_values: iter::repeat(Set1::Empty) 791 | .take(options.args_values.len()) 792 | .collect(), 793 | partial: Some(Partial { observed: false }), 794 | }); 795 | } 796 | } 797 | 798 | // TODO(eddyb) actually show that retrying `find_exit_uncached` 799 | // has *any* effect on the overall results! 800 | // It *might* be the case that not caching a partial value 801 | // (i.e. the `entry.remove()` call) has a similar effect? 802 | loop { 803 | let mut exit = self.find_exit_uncached(bb, &options); 804 | 805 | // HACK(eddyb) find a more principled place to stick this in. 806 | if let Some(cancel_token) = self.cancel_token { 807 | if cancel_token.load(Ordering::Relaxed) { 808 | return exit; 809 | } 810 | } 811 | 812 | // Cycles are irrelevant if we're already fully general. 813 | if let Set1::Many = exit.targets { 814 | exit.partial = None; 815 | } 816 | 817 | // FIXME(eddyb) avoid cloning here, perhaps by allocating `ExitOptions` 818 | // in an `elsa::FrozenVec`, since it's kept alive by the cache anyway. 819 | let mut entry = match self.exit_cache.entry((bb, options.clone())) { 820 | Entry::Occupied(entry) => entry, 821 | Entry::Vacant(_) => unreachable!(), 822 | }; 823 | let cached = entry.get_mut(); 824 | let old_targets = mem::replace(&mut cached.targets, exit.targets); 825 | // FIXME(eddyb) avoid cloning here (keep it in an `Rc`?). 826 | let old_args_values = mem::replace(&mut cached.args_values, exit.args_values.clone()); 827 | let old_observed = mem::replace(&mut cached.partial.as_mut().unwrap().observed, false); 828 | 829 | // Keep retrying as long as a now-obsolete `targets` / `arg_values` were observed. 830 | // TODO(eddyb) how should fixpoint be detected? 831 | // Can't assume that a certain `targets` set is final, 832 | // as there could be outer cycles blocking progress. 833 | let cx = self.cx; 834 | let progress = |old: Set1, new: Set1| match (old, new) { 835 | (Set1::One(old), Set1::One(new)) => { 836 | if old != new { 837 | println!( 838 | "explore: {:?} changed a value from {} to {}", 839 | bb, 840 | cx.pretty_print(&old), 841 | cx.pretty_print(&new) 842 | ) 843 | } 844 | false 845 | } 846 | (Set1::Empty, Set1::Empty) | (Set1::Many, Set1::Many) => false, 847 | (Set1::Empty, _) | (_, Set1::Many) => true, 848 | (_, Set1::Empty) | (Set1::Many, _) => unreachable!(), 849 | }; 850 | // Always check for progress, to ensure the sanity checks run. 851 | let progress = progress(old_targets, exit.targets) 852 | | old_args_values 853 | .iter() 854 | .zip_eq(&exit.args_values) 855 | .any(|(&old, &new)| progress(old, new)); 856 | if old_observed && progress { 857 | continue; 858 | } 859 | 860 | // Only cache final results. 861 | if let Some(partial) = &exit.partial { 862 | // The `observed` flag should only ever be set for 863 | // the `Exit` inside the cache, but nothing else. 864 | assert_eq!(partial.observed, false); 865 | 866 | entry.remove(); 867 | } else { 868 | cached.partial.take(); 869 | } 870 | 871 | return exit; 872 | } 873 | } 874 | } 875 | --------------------------------------------------------------------------------