├── .cargo └── config.toml ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── assets ├── iconx256.icns ├── iconx256.ico ├── iconx256.png ├── iconx64.png ├── logo_dark.png ├── logo_light.png ├── logo_text_dark.svg ├── logo_text_light.svg ├── screenshot.png └── split_source.png ├── binformat ├── Cargo.toml └── src │ ├── elf.rs │ ├── lib.rs │ ├── macho.rs │ └── pe.rs ├── build.rs ├── bundle.sh ├── commands ├── Cargo.toml └── src │ ├── cli.rs │ ├── debug.rs │ ├── gui.rs │ └── lib.rs ├── config ├── Cargo.toml └── src │ └── lib.rs ├── debugvault ├── Cargo.toml ├── bin │ ├── dsymutil_aarch64 │ └── dsymutil_x86_64 └── src │ ├── demangler.rs │ ├── dwarf.rs │ ├── error.rs │ ├── intern.rs │ ├── itanium │ ├── ast.rs │ ├── error.rs │ ├── index_str.rs │ ├── mod.rs │ ├── subs.rs │ └── tests.rs │ ├── lib.rs │ ├── msvc │ ├── context.rs │ ├── mod.rs │ └── tests.rs │ ├── pdb.rs │ ├── prefix.rs │ ├── rust │ ├── mod.rs │ └── tests.rs │ └── rust_legacy │ ├── mod.rs │ └── tests.rs ├── decoder-arm ├── Cargo.toml ├── src │ ├── armv7.rs │ ├── armv7 │ │ └── thumb.rs │ ├── armv8 │ │ ├── a64.rs │ │ └── mod.rs │ └── lib.rs └── tests │ ├── armv7 │ ├── mod.rs │ └── thumb.rs │ ├── armv8 │ ├── a64.rs │ └── mod.rs │ └── test.rs ├── decoder-mips ├── Cargo.toml └── src │ ├── lib.rs │ └── tests.rs ├── decoder-riscv ├── Cargo.toml └── src │ ├── lib.rs │ └── tests.rs ├── decoder-x86_64 ├── Cargo.toml └── src │ ├── lib.rs │ ├── long_mode │ ├── display.rs │ ├── evex.rs │ ├── mod.rs │ ├── tests │ │ ├── evex_generated.rs │ │ ├── mod.rs │ │ ├── opcode.rs │ │ ├── operand.rs │ │ └── regspec.rs │ ├── uarch.rs │ └── vex.rs │ ├── protected_mode │ ├── display.rs │ ├── evex.rs │ ├── mod.rs │ ├── tests │ │ ├── evex_generated.rs │ │ ├── mod.rs │ │ ├── opcode.rs │ │ ├── operand.rs │ │ └── regspec.rs │ ├── uarch.rs │ └── vex.rs │ └── safer_unchecked.rs ├── decoder ├── Cargo.toml └── src │ └── lib.rs ├── example_config.yaml ├── gui ├── Cargo.toml ├── fonts │ ├── Hack-Regular.ttf │ ├── IcoMoon.ttf │ └── LigaSFMonoNerdFont-Regular.ttf └── src │ ├── common.rs │ ├── fmt.rs │ ├── icon.rs │ ├── interp.rs │ ├── lib.rs │ ├── panes │ ├── functions.rs │ ├── listing.rs │ ├── mod.rs │ └── source_code.rs │ ├── style.rs │ ├── unix.rs │ ├── wgpu_backend │ ├── egui.rs │ ├── egui.wgsl │ └── mod.rs │ ├── widgets │ ├── donut.rs │ ├── mod.rs │ ├── terminal.rs │ ├── text_edit.rs │ └── text_select.rs │ ├── windows.rs │ └── winit_backend.rs ├── infinite_scroll ├── Cargo.toml └── src │ ├── egui_inbox.rs │ ├── egui_virtual_list.rs │ └── lib.rs ├── log ├── Cargo.toml └── src │ ├── lib.rs │ └── progress.rs ├── processor ├── Cargo.toml └── src │ ├── blocks.rs │ ├── fmt.rs │ └── lib.rs ├── processor_shared ├── Cargo.toml └── src │ └── lib.rs ├── rustfmt.toml ├── src ├── main.rs └── wayland.rs └── tokenizing ├── Cargo.toml └── src └── lib.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.'cfg(all())'] 2 | rustflags = [ 3 | "-Aclippy::unusual_byte_groupings", 4 | "-Aclippy::upper_case_acronyms", 5 | "-Aclippy::needless_range_loop", 6 | "-Aclippy::new_without_default", 7 | "-Ctarget-cpu=native", 8 | ] 9 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | - pull_request 4 | - push 5 | env: 6 | CARGO_TERM_COLOR: always 7 | jobs: 8 | build-ubuntu: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checking out repo 12 | uses: actions/checkout@v4 13 | - name: Checking out rust toolchain 14 | uses: dtolnay/rust-toolchain@stable 15 | with: 16 | toolchain: stable 17 | components: rustfmt 18 | - name: Caching cargo 19 | uses: Swatinem/rust-cache@v2 20 | - name: Install dependencies 21 | run: sudo apt-get install libgtk-3-dev 22 | - name: Build 23 | run: cargo build --release 24 | - name: Upload binary 25 | uses: actions/upload-artifact@v4 26 | with: 27 | name: Linux build 28 | path: ./target/release/bite 29 | build-macos: 30 | runs-on: macos-latest 31 | steps: 32 | - name: Checking out repo 33 | uses: actions/checkout@v4 34 | - name: Checking out rust toolchain 35 | uses: dtolnay/rust-toolchain@stable 36 | with: 37 | toolchain: stable 38 | components: rustfmt 39 | - name: Caching cargo 40 | uses: Swatinem/rust-cache@v2 41 | - name: Build 42 | run: cargo build --release 43 | - name: Upload binary 44 | uses: actions/upload-artifact@v4 45 | with: 46 | name: MacOS build 47 | path: ./target/release/bite 48 | build-windows: 49 | runs-on: windows-latest 50 | steps: 51 | - name: Checking out repo 52 | uses: actions/checkout@v4 53 | - name: Checking out rust toolchain 54 | uses: dtolnay/rust-toolchain@stable 55 | with: 56 | toolchain: stable 57 | components: rustfmt 58 | - name: Caching cargo 59 | uses: Swatinem/rust-cache@v2 60 | - name: Build 61 | run: cargo build --release 62 | - name: Upload binary 63 | uses: actions/upload-artifact@v4 64 | with: 65 | name: Windows build 66 | path: ./target/release/bite.exe 67 | test-ubuntu: 68 | runs-on: ubuntu-latest 69 | steps: 70 | - name: Checking out repo 71 | uses: actions/checkout@v4 72 | - name: Checking out rust toolchain 73 | uses: dtolnay/rust-toolchain@stable 74 | with: 75 | toolchain: stable 76 | targets: riscv64gc-unknown-none-elf 77 | - name: Caching cargo 78 | uses: Swatinem/rust-cache@v2 79 | - name: Install dependencies 80 | run: sudo apt-get install libgtk-3-dev 81 | - name: Test 82 | run: cargo test --workspace --lib 83 | test-macos: 84 | runs-on: macos-latest 85 | steps: 86 | - name: Checking out repo 87 | uses: actions/checkout@v4 88 | - name: Checking out rust toolchain 89 | uses: dtolnay/rust-toolchain@stable 90 | with: 91 | toolchain: stable 92 | targets: riscv64gc-unknown-none-elf 93 | - name: Caching cargo 94 | uses: Swatinem/rust-cache@v2 95 | - name: Test 96 | run: cargo test --workspace --lib 97 | test-windows: 98 | runs-on: windows-latest 99 | steps: 100 | - name: Checking out repo 101 | uses: actions/checkout@v4 102 | - name: Checking out rust toolchain 103 | uses: dtolnay/rust-toolchain@stable 104 | with: 105 | toolchain: stable 106 | targets: riscv64gc-unknown-none-elf 107 | - name: Caching cargo 108 | uses: Swatinem/rust-cache@v2 109 | - name: Test 110 | run: cargo test --workspace --lib 111 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.vs 3 | *.swp 4 | *~ 5 | .vscode/settings.json 6 | .DS_Store 7 | #Jetbrains IntelliJ/RustRover 8 | .idea/ 9 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bite" 3 | description = "Disassembler" 4 | version = "0.3.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | nix = { workspace = true } 9 | object = { workspace = true } 10 | commands = { path = "./commands" } 11 | log = { path = "./log" } 12 | gui = { path = "./gui" } 13 | debugvault = { path = "./debugvault" } 14 | 15 | [profile.release] 16 | lto = 'thin' 17 | 18 | [target.'cfg(windows)'.build-dependencies] 19 | winres = "0.1" 20 | 21 | [workspace] 22 | resolver = "2" 23 | members = [ 24 | "log", 25 | "gui", 26 | "commands", 27 | "tokenizing", 28 | "decoder", 29 | "decoder-x86_64", 30 | "decoder-arm", 31 | "decoder-riscv", 32 | "decoder-mips", 33 | "debugvault", 34 | "processor", 35 | "processor_shared", 36 | "infinite_scroll", 37 | "binformat", 38 | "config" 39 | ] 40 | 41 | [workspace.dependencies] 42 | egui = { version = "0.27", features = ["bytemuck"], default-features = false } 43 | rfd = "0.14" 44 | crossbeam-queue = "0.3" 45 | object = "0.32" 46 | gimli = "0.28" 47 | pdb = { git = "https://github.com/WINSDK/pdb-rs" } 48 | once_cell = "1.18" 49 | nix = { git = "https://github.com/mbyzhang/nix" } 50 | memmap2 = "0.9" 51 | dirs = "5" 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Nicolas Mazzon 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |

11 | 12 |

Ever wanted to inspect every bite of your binary.

13 | 14 | [![Test](https://github.com/WINSDK/bite/actions/workflows/ci.yml/badge.svg)](https://github.com/WINSDK/bite/actions/workflows/ci.yml) 15 | ![](https://img.shields.io/github/license/WINSDK/bite) 16 | 17 | `BiTE` is a platform-agnostic executable analysis tool. It aims to offer an 18 | environment for inspecting the content of binaries and their debug info. While it is 19 | still in early development, various architectures are supported. 20 | 21 | ## Showcase 22 | 23 | Here is an example of the assembly listing viewing. 24 | 25 | ![Assembly listing](./assets/screenshot.png) 26 | 27 | The ability to view a binary's disassembly and the associated source code. 28 | 29 | ![Source Code](./assets/split_source.png) 30 | 31 | ## Installation 32 | 33 | Building from source. 34 | ``` 35 | cargo install --path . 36 | ``` 37 | 38 | ## Features yet to be implemented 39 | 40 | Whenever I have time this year I'll try implementing most of these. \ 41 | If you feel like it, submit a pull request and I'll have a look at it! 42 | 43 | - [x] Port GUI to wgpu + winit 44 | - [x] Header with buttons and options 45 | - [x] Assembly listing exploration 46 | - [x] Interactive terminal 47 | - [ ] Assembly instruction byte patching 48 | - [x] Hex binary viewer 49 | - [ ] Debugging front-end's 50 | - [ ] [GDB](https://www.sourceware.org/gdb) 51 | - [ ] [LLDB](https://lldb.llvm.org) 52 | - [ ] [WinDbg](https://windbg.org) 53 | - [x] X86-64 support 54 | - [x] AArch64/Armv7 support 55 | - [x] Riscv64gc/Riscv32gc support 56 | - [x] MIPS-V support 57 | - [x] Demangling support for most targets 58 | - [x] MSVC 59 | - [x] Itanium 60 | - [x] Rust 61 | - [x] Decoding datastructures depending on each section 62 | - [ ] Assembly listing lifting 63 | - [x] Resolving addresses 64 | - [x] Interpreting non-code data 65 | - [ ] Creating labels for relative jumps 66 | -------------------------------------------------------------------------------- /assets/iconx256.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WINSDK/bite/20b4f68252c7f6c36c831ea925c78fd7819f5c17/assets/iconx256.icns -------------------------------------------------------------------------------- /assets/iconx256.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WINSDK/bite/20b4f68252c7f6c36c831ea925c78fd7819f5c17/assets/iconx256.ico -------------------------------------------------------------------------------- /assets/iconx256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WINSDK/bite/20b4f68252c7f6c36c831ea925c78fd7819f5c17/assets/iconx256.png -------------------------------------------------------------------------------- /assets/iconx64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WINSDK/bite/20b4f68252c7f6c36c831ea925c78fd7819f5c17/assets/iconx64.png -------------------------------------------------------------------------------- /assets/logo_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WINSDK/bite/20b4f68252c7f6c36c831ea925c78fd7819f5c17/assets/logo_dark.png -------------------------------------------------------------------------------- /assets/logo_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WINSDK/bite/20b4f68252c7f6c36c831ea925c78fd7819f5c17/assets/logo_light.png -------------------------------------------------------------------------------- /assets/logo_text_dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /assets/logo_text_light.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /assets/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WINSDK/bite/20b4f68252c7f6c36c831ea925c78fd7819f5c17/assets/screenshot.png -------------------------------------------------------------------------------- /assets/split_source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WINSDK/bite/20b4f68252c7f6c36c831ea925c78fd7819f5c17/assets/split_source.png -------------------------------------------------------------------------------- /binformat/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "binformat" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | processor_shared = { path = "../processor_shared" } 8 | log = { path = "../log" } 9 | object = { workspace = true } 10 | -------------------------------------------------------------------------------- /binformat/src/lib.rs: -------------------------------------------------------------------------------- 1 | use object::{Object, ObjectSection, ObjectSymbol}; 2 | use processor_shared::{AddressMap, Addressed}; 3 | 4 | pub mod elf; 5 | pub mod macho; 6 | pub mod pe; 7 | 8 | pub struct RawSymbol<'data> { 9 | pub name: &'data str, 10 | pub module: Option<&'data str>, 11 | } 12 | 13 | fn parse_symbol_table<'data, Obj: Object<'data, 'data>>( 14 | obj: &'data Obj, 15 | ) -> AddressMap> { 16 | let mut syms = AddressMap::default(); 17 | for sym in obj.symbols() { 18 | match sym.name() { 19 | Ok(name) => syms.push(Addressed { 20 | addr: sym.address() as usize, 21 | item: RawSymbol { name, module: None }, 22 | }), 23 | Err(err) => { 24 | log::complex!( 25 | w "[parse_symbol_table] ", 26 | y err.to_string(), 27 | y "." 28 | ); 29 | continue; 30 | } 31 | } 32 | } 33 | syms 34 | } 35 | 36 | fn parse_section_generics<'data, Obj: ObjectSection<'data>>( 37 | section: &'data Obj, 38 | ) -> (String, &'static [u8], usize, usize) { 39 | let name = match section.name() { 40 | Ok(name) => name, 41 | Err(_) => { 42 | log::complex!( 43 | w "[macho::parse_sections] ", 44 | y "Failed to read name.", 45 | ); 46 | "unknown" 47 | } 48 | }; 49 | 50 | let bytes: &'static [u8] = match section.data() { 51 | // The file is memory mapped so only the bytes are of lifetime &'static [u8]. 52 | Ok(data) => unsafe { std::mem::transmute(data) }, 53 | Err(..) => { 54 | log::complex!( 55 | w "[macho::parse_sections] ", 56 | y "Failed to read section ", 57 | b name, 58 | y "." 59 | ); 60 | &[] 61 | } 62 | }; 63 | 64 | let start = section.address() as usize; 65 | let end = start + section.size() as usize; 66 | 67 | (name.to_string(), bytes, start, end) 68 | } 69 | 70 | pub struct Datastructure { 71 | pub ident: &'static str, 72 | pub fields: Vec<(usize, &'static str, &'static str, String)>, 73 | } 74 | 75 | pub trait ToData { 76 | fn to_fields(&self, addr: usize) -> Datastructure; 77 | } 78 | 79 | // FIXME: This assumes little endianness. 80 | #[macro_export] 81 | macro_rules! datastructure { 82 | ( 83 | pub struct $name:ident { 84 | $($field:ident: $ftype:ty,)* 85 | } 86 | ) => { 87 | // Apply attributes to the struct 88 | #[repr(C)] 89 | #[derive(Copy, Clone, Debug)] 90 | pub struct $name { 91 | pub $($field: $ftype),* 92 | } 93 | 94 | impl $crate::ToData for $name { 95 | fn to_fields(&self, mut addr: usize) -> $crate::Datastructure { 96 | let mut fields = Vec::new(); 97 | $( 98 | fields.push(( 99 | addr, 100 | stringify!($field), 101 | stringify!($ftype), 102 | format!("{:#x}", self.$field) 103 | )); 104 | #[allow(unused_assignments)] 105 | { addr += ::std::mem::size_of::<$ftype>(); } 106 | )* 107 | $crate::Datastructure { 108 | ident: stringify!($name), 109 | fields, 110 | } 111 | } 112 | } 113 | 114 | unsafe impl object::Pod for $name {} 115 | }; 116 | } 117 | -------------------------------------------------------------------------------- /binformat/src/pe.rs: -------------------------------------------------------------------------------- 1 | use crate::{datastructure, RawSymbol}; 2 | use processor_shared::{AddressMap, Addressed, Section, SectionKind}; 3 | use object::pe; 4 | use object::read::pe::{ImageNtHeaders, ImageThunkData, PeFile}; 5 | use object::LittleEndian as LE; 6 | use object::Object; 7 | use std::mem::size_of; 8 | 9 | datastructure! { 10 | pub struct ExceptionDirectoryEntry { 11 | begin_addr: u32, 12 | end_addr: u32, 13 | unwind_info: u32, 14 | } 15 | } 16 | 17 | pub struct PeDebugInfo<'data, Pe: ImageNtHeaders> { 18 | /// Parsed PE32/64 header. 19 | obj: &'data PeFile<'data, Pe>, 20 | /// Parsed sections with extra metadata. 21 | pub sections: Vec
, 22 | /// Any parsed but not yet relocated symbols. 23 | pub syms: AddressMap>, 24 | } 25 | 26 | impl<'data, Pe: ImageNtHeaders> PeDebugInfo<'data, Pe> { 27 | pub fn parse(obj: &'data PeFile<'data, Pe>) -> Result { 28 | let mut this = Self { 29 | obj, 30 | syms: AddressMap::default(), 31 | sections: Vec::new(), 32 | }; 33 | this.sections = parse_sections(obj); 34 | this.parse_symbols(); 35 | this.parse_imports()?; 36 | Ok(this) 37 | } 38 | 39 | pub fn parse_imports(&mut self) -> Result<(), object::Error> { 40 | let import_table = match self.obj.import_table()? { 41 | Some(table) => table, 42 | None => return Ok(()), 43 | }; 44 | 45 | let mut import_descs = import_table.descriptors()?; 46 | while let Some(import_desc) = import_descs.next()? { 47 | let module = import_table.name(import_desc.name.get(LE))?; 48 | let first_thunk = import_desc.first_thunk.get(LE); 49 | let original_first_thunk = import_desc.original_first_thunk.get(LE); 50 | 51 | let thunk = if first_thunk == 0 { 52 | original_first_thunk 53 | } else { 54 | first_thunk 55 | }; 56 | 57 | let mut import_addr_table = import_table.thunks(thunk)?; 58 | let mut func_rva = first_thunk; 59 | while let Some(func) = import_addr_table.next::()? { 60 | if !func.is_ordinal() { 61 | let (hint, name) = match import_table.hint_name(func.address()) { 62 | Ok(val) => val, 63 | Err(..) => { 64 | // skip over an entry 65 | func_rva += size_of::() as u32; 66 | continue; 67 | } 68 | }; 69 | 70 | let name = match std::str::from_utf8(name) { 71 | Ok(name) => name, 72 | Err(..) => { 73 | // skip over an entry 74 | func_rva += size_of::() as u32; 75 | continue; 76 | } 77 | }; 78 | 79 | // `original_first_thunk` uses a `hint` into the export 80 | // table whilst iterating thourhg regular `thunk`'s is 81 | // a simple offset into the symbol export table 82 | let addr = if thunk == original_first_thunk { 83 | hint as u64 + self.obj.relative_address_base() 84 | } else { 85 | func_rva as u64 + self.obj.relative_address_base() 86 | }; 87 | 88 | let module = 89 | std::str::from_utf8(module).ok().and_then(|x| x.strip_suffix(".dll")); 90 | self.syms.push(Addressed { 91 | addr: addr as usize, 92 | item: RawSymbol { name, module }, 93 | }); 94 | } 95 | 96 | // skip over an entry 97 | func_rva += size_of::() as u32; 98 | } 99 | } 100 | 101 | Ok(()) 102 | } 103 | 104 | pub fn parse_symbols(&mut self) { 105 | self.syms.extend(crate::parse_symbol_table(self.obj)); 106 | self.syms.push(Addressed { 107 | addr: self.obj.entry() as usize, 108 | item: RawSymbol { 109 | name: "entry", 110 | module: None, 111 | }, 112 | }); 113 | } 114 | } 115 | 116 | /// Common ELF dwarf section names I've found so far. 117 | const DWARF_SECTIONS: [&str; 20] = [ 118 | ".debug_abbrev", 119 | ".debug_addr", 120 | ".debug_aranges", 121 | ".debug_cu_index", 122 | ".debug_frame", 123 | ".debug_info", 124 | ".debug_line", 125 | ".debug_line_str", 126 | ".debug_loc", 127 | ".debug_loclists", 128 | ".debug_macinfo", 129 | ".debug_macro", 130 | ".debug_pubnames", 131 | ".debug_pubtypes", 132 | ".debug_ranges", 133 | ".debug_rnglists", 134 | ".debug_str", 135 | ".debug_str_offsets", 136 | ".debug_tu_index", 137 | ".debug_types", 138 | ]; 139 | 140 | fn parse_sections<'data, Pe: ImageNtHeaders>(obj: &'data PeFile<'data, Pe>) -> Vec
{ 141 | let mut sections = Vec::new(); 142 | 143 | // Re-parsing all this data isn't amazing, all this just for getting the section headers. 144 | let data = obj.data(); 145 | let dos_header = object::pe::ImageDosHeader::parse(data).unwrap(); 146 | let mut offset = dos_header.nt_headers_offset().into(); 147 | let (nt_headers, _) = Pe::parse(data, &mut offset).unwrap(); 148 | let section_headers = nt_headers.sections(data, offset).unwrap(); 149 | 150 | for (header, section) in section_headers.iter().zip(obj.sections()) { 151 | let (name, bytes, start, end) = crate::parse_section_generics(§ion); 152 | 153 | let characteristics = header.characteristics.get(LE); 154 | let (mut kind, ident) = (SectionKind::Raw, "UNKNOWN"); 155 | 156 | // Section contains code. 157 | if characteristics & pe::IMAGE_SCN_CNT_CODE != 0 { 158 | kind = SectionKind::Code; 159 | } 160 | 161 | // ExceptionDirectoryEntry's. 162 | if name == ".pdata" { 163 | kind = SectionKind::ExceptionDirEntry; 164 | } 165 | 166 | // Section contains DWARF debug info. 167 | if DWARF_SECTIONS.contains(&name.as_str()) { 168 | kind = SectionKind::Debug; 169 | } 170 | 171 | sections.push(Section::new( 172 | name, 173 | ident, 174 | kind, 175 | bytes, 176 | start, 177 | end 178 | )); 179 | } 180 | 181 | sections 182 | } 183 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | #[cfg(target_family = "windows")] 3 | winres::WindowsResource::new() 4 | .set_icon("./assets/iconx256.ico") 5 | .compile() 6 | .unwrap(); 7 | } 8 | -------------------------------------------------------------------------------- /bundle.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Creates macos bundle in ./target/bite.app. 3 | 4 | set -e 5 | mkdir -p ./target/bite.app/Contents/MacOS 6 | mkdir -p ./target/bite.app/Contents/Resources 7 | cp ./target/release/bite ./target/bite.app/Contents/MacOS/bite 8 | cp ./assets/iconx256.icns ./target/bite.app/Contents/Resources/shortcut.icns 9 | cat > ./target/bite.app/Contents/Info.plist < 11 | 12 | 13 | 14 | CFBundleExecutable 15 | bite 16 | CFBundleIconFile 17 | shortcut.icns 18 | CFBundleInfoDictionaryVersion 19 | 1.0 20 | CFBundlePackageType 21 | APPL 22 | CFBundleSignature 23 | ???? 24 | CFBundleVersion 25 | 1.0 26 | 27 | 28 | EOF 29 | -------------------------------------------------------------------------------- /commands/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "commands" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | triple_accel = "0.4" 8 | debugvault = { path = "../debugvault" } 9 | log = { path = "../log" } 10 | dirs = { workspace = true } 11 | once_cell = { workspace = true } 12 | egui = { workspace = true } 13 | -------------------------------------------------------------------------------- /commands/src/cli.rs: -------------------------------------------------------------------------------- 1 | use std::path::{Path, PathBuf}; 2 | 3 | macro_rules! exit { 4 | ($code:expr => $($arg:tt)*) => {{ 5 | eprintln!($($arg)*); 6 | std::process::exit($code); 7 | }}; 8 | } 9 | 10 | const HELP: &str = "OVERVIEW: Debugger/Decompilation tool 11 | 12 | USAGE: bite [options] 13 | 14 | OPTIONS: 15 | -H, --help Print usage information 16 | -D, --disassemble Path to object you're disassembling 17 | -C, --config Path to config used for disassembling 18 | -B, --debug Enable verbose internal info"; 19 | 20 | const ABBRV: &[&str] = &["-H", "-D", "-C", "-B"]; 21 | const NAMES: &[&str] = &[ 22 | "--help", 23 | "--disassemble", 24 | "--config", 25 | "--debug", 26 | ]; 27 | 28 | #[derive(Default, Debug, Clone)] 29 | pub struct Cli { 30 | /// Path to symbol being disassembled. 31 | pub path: PathBuf, 32 | 33 | /// Optional path to config. 34 | pub config: Option, 35 | 36 | /// Show egui debug overlay. 37 | pub debug: bool, 38 | } 39 | 40 | impl Cli { 41 | pub fn parse() -> Self { 42 | let mut cli = Cli::default(); 43 | let mut args = std::env::args().skip(1).peekable(); 44 | 45 | while let Some(arg) = args.next() { 46 | match arg.as_str() { 47 | "-H" | "--help" => exit!(0 => "{HELP}"), 48 | "-D" | "--disassemble" => { 49 | if let Some(path) = args.next().as_deref() { 50 | if !NAMES.contains(&path) && !ABBRV.contains(&path) { 51 | if cli.path != Path::new("") { 52 | exit!(1 => "Path to object already given."); 53 | } 54 | cli.path = PathBuf::from(path); 55 | } 56 | } 57 | }, 58 | "-C" | "--config" => { 59 | if let Some(path) = args.next().as_deref() { 60 | if !NAMES.contains(&path) && !ABBRV.contains(&path) { 61 | if cli.config.is_some() { 62 | exit!(1 => "Path to config already given."); 63 | } 64 | cli.config = Some(PathBuf::from(path)); 65 | } 66 | } 67 | }, 68 | "-B" | "--debug" => { 69 | if cli.debug { 70 | exit!(1 => "Debug flag already set."); 71 | } 72 | cli.debug = true 73 | } 74 | unknown => { 75 | let mut distance = u32::MAX; 76 | let mut best_guess = ""; 77 | for name in NAMES { 78 | let d = triple_accel::levenshtein_exp(unknown.as_bytes(), name.as_bytes()); 79 | if d < distance { 80 | distance = d; 81 | best_guess = name; 82 | } 83 | } 84 | 85 | // A guess that's less than 3 `steps` away from a correct arg. 86 | if distance < 4 { 87 | exit!(1 => "Unknown cmd arg '{unknown}' did you mean '{best_guess}'?") 88 | } else { 89 | exit!(1 => "Unknown cmd arg '{unknown}' was entered."); 90 | } 91 | } 92 | } 93 | } 94 | 95 | cli.validate_args(); 96 | cli 97 | } 98 | 99 | fn validate_args(&mut self) { 100 | if self.path == Path::new("") { 101 | // exit!(1 => "You must provide a path to disassemble."); 102 | return; 103 | } 104 | 105 | if !self.path.exists() { 106 | exit!(1 => "Object {:?} does not exist.", self.path); 107 | } 108 | 109 | if let Some(ref cfg) = self.config { 110 | if !cfg.exists() { 111 | exit!(1 => "Config {cfg:?} does not exist."); 112 | } 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /commands/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod cli; 2 | mod debug; 3 | mod gui; 4 | 5 | pub use cli::Cli; 6 | pub use gui::{Command, Error as CommandError, HELP as CMD_HELP}; 7 | use once_cell::sync::Lazy; 8 | 9 | pub static ARGS: Lazy = Lazy::new(cli::Cli::parse); 10 | -------------------------------------------------------------------------------- /config/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "config" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | once_cell = { workspace = true } 8 | egui = { workspace = true } 9 | dirs = { workspace = true } 10 | log = { path = "../log" } 11 | serde = { version = "1.0", features = ["derive"] } 12 | serde_yaml = "0.9" 13 | -------------------------------------------------------------------------------- /debugvault/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "debugvault" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | log = { path = "../log" } 8 | binformat = { path = "../binformat" } 9 | config = { path = "../config" } 10 | tokenizing = { path = "../tokenizing" } 11 | processor_shared = { path = "../processor_shared" } 12 | bitflags = "2" 13 | gimli = { workspace = true } 14 | pdb = { workspace = true } 15 | object = { workspace = true } 16 | crossbeam-queue = { workspace = true } 17 | memmap2 = { workspace = true } 18 | dashmap = "5.5" 19 | rustc-hash = "1.1" 20 | typed-arena = "2.0.2" 21 | -------------------------------------------------------------------------------- /debugvault/bin/dsymutil_aarch64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WINSDK/bite/20b4f68252c7f6c36c831ea925c78fd7819f5c17/debugvault/bin/dsymutil_aarch64 -------------------------------------------------------------------------------- /debugvault/bin/dsymutil_x86_64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WINSDK/bite/20b4f68252c7f6c36c831ea925c78fd7819f5c17/debugvault/bin/dsymutil_x86_64 -------------------------------------------------------------------------------- /debugvault/src/demangler.rs: -------------------------------------------------------------------------------- 1 | //! Symbol demangler for common mangling schemes. 2 | 3 | use tokenizing::{Token, Color32}; 4 | use config::CONFIG; 5 | 6 | pub fn parse(s: &str) -> TokenStream { 7 | // symbols without leading underscores are accepted as 8 | // dbghelp in windows strips them away 9 | 10 | let s = s.strip_suffix("$got").unwrap_or(s); 11 | let s = s.strip_suffix("$plt").unwrap_or(s); 12 | let s = s.strip_suffix("$pltgot").unwrap_or(s); 13 | 14 | // parse rust symbols 15 | if let Some(s) = crate::rust_legacy::parse(s) { 16 | return s; 17 | } 18 | 19 | // parse gnu/llvm/C/C++ symbols 20 | if let Some(s) = crate::itanium::parse(s) { 21 | return s; 22 | } 23 | 24 | // parse rust symbols that match the v0 mangling scheme 25 | if let Some(s) = crate::rust::parse(s) { 26 | return s; 27 | } 28 | 29 | // parse windows msvc C/C++ symbols 30 | if let Some(s) = crate::msvc::parse(s) { 31 | return s; 32 | } 33 | 34 | // return the original mangled symbol on failure 35 | TokenStream::simple(s) 36 | } 37 | 38 | #[derive(Debug)] 39 | pub struct TokenStream { 40 | /// Unmovable string which the [Token]'s have a pointer to. 41 | inner: std::pin::Pin, 42 | 43 | /// Internal token representation which is unsafe to access outside of calling [Self::tokens]. 44 | tokens: Vec, 45 | } 46 | 47 | impl TokenStream { 48 | pub fn new(s: &str) -> Self { 49 | Self { 50 | inner: std::pin::Pin::new(s.to_string()), 51 | tokens: Vec::new(), 52 | } 53 | } 54 | 55 | pub fn simple(s: &str) -> Self { 56 | let mut this = Self { 57 | inner: std::pin::Pin::new(s.to_string()), 58 | tokens: Vec::with_capacity(1), 59 | }; 60 | 61 | this.tokens.push(Token::from_string(s.to_string(), CONFIG.colors.asm.component)); 62 | this 63 | } 64 | 65 | /// SAFETY: must downcast &'static str to a lifetime that matches the lifetime of self. 66 | #[inline] 67 | pub fn inner<'a>(&self) -> &'a str { 68 | unsafe { std::mem::transmute(self.inner.as_ref()) } 69 | } 70 | 71 | #[inline] 72 | pub fn push(&mut self, text: &'static str, color: Color32) { 73 | self.tokens.push(Token::from_str(text, color)); 74 | } 75 | 76 | #[inline] 77 | pub fn push_string(&mut self, text: String, color: Color32) { 78 | self.tokens.push(Token::from_string(text, color)); 79 | } 80 | 81 | #[inline] 82 | pub fn tokens(&self) -> &[Token] { 83 | self.tokens.as_slice() 84 | } 85 | } 86 | 87 | impl PartialEq for TokenStream { 88 | fn eq(&self, other: &Self) -> bool { 89 | self.inner == other.inner 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /debugvault/src/error.rs: -------------------------------------------------------------------------------- 1 | use super::Error; 2 | use std::fmt; 3 | 4 | impl fmt::Display for Error { 5 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 6 | match self { 7 | Self::Object(err) => { 8 | f.write_fmt(format_args!("Failed to parse object (symbols): '{err}'.")) 9 | } 10 | Self::Dwarf(err) => f.write_fmt(format_args!("Failed to parse dwarf info: '{err:?}'.")), 11 | Self::Pdb(err) => f.write_fmt(format_args!("Failed to parse pdb info: '{err}'.")), 12 | Self::Imports(err) => f.write_fmt(format_args!("Failed to parse imports: '{err}'.")), 13 | } 14 | } 15 | } 16 | 17 | impl From for Error { 18 | fn from(error: object::Error) -> Self { 19 | Error::Object(error) 20 | } 21 | } 22 | 23 | impl From for Error { 24 | fn from(error: pdb::Error) -> Self { 25 | Error::Pdb(error) 26 | } 27 | } 28 | 29 | impl From for Error { 30 | fn from(error: crate::dwarf::Error) -> Self { 31 | Error::Dwarf(error) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /debugvault/src/intern.rs: -------------------------------------------------------------------------------- 1 | use std::hash::{BuildHasherDefault, Hash}; 2 | use std::sync::Arc; 3 | use dashmap::DashMap; 4 | use rustc_hash::FxHasher; 5 | 6 | pub struct InternMap { 7 | map: DashMap, BuildHasherDefault>, 8 | } 9 | 10 | impl InternMap { 11 | pub fn new() -> Self { 12 | Self { 13 | map: DashMap::with_hasher(BuildHasherDefault::default()), 14 | } 15 | } 16 | 17 | pub fn add(&self, key: K, value: &V) -> Arc 18 | where 19 | for<'a> &'a V: Into>, 20 | { 21 | let value = value.into(); 22 | self.map.insert(key, Arc::clone(&value)); 23 | value 24 | } 25 | 26 | pub fn get(&self, key: &K) -> Option> { 27 | self.map.get(key).map(|v| v.clone()) 28 | } 29 | 30 | pub fn len(&self) -> usize { 31 | self.map.len() 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /debugvault/src/itanium/error.rs: -------------------------------------------------------------------------------- 1 | //! Custom `Error` and `Result` types for the `cpp_demangle` crate. 2 | 3 | use core::fmt; 4 | 5 | /// Errors that can occur while demangling a symbol. 6 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 7 | pub enum Error { 8 | /// The mangled symbol ends abruptly. 9 | UnexpectedEnd, 10 | 11 | /// The mangled symbol is not well-formed. 12 | UnexpectedText, 13 | 14 | /// Found a back reference that is out-of-bounds of the substitution 15 | /// table. 16 | BadBackReference, 17 | 18 | /// Found a reference to a template arg that is either out-of-bounds, or in 19 | /// a context without template args. 20 | BadTemplateArgReference, 21 | 22 | /// Found a reference to a template arg from within the arg itself (or from 23 | /// within an earlier arg). 24 | ForwardTemplateArgReference, 25 | 26 | /// Found a reference to a leaf name in a context where there is no current 27 | /// leaf name. 28 | BadLeafNameReference, 29 | 30 | /// An overflow or underflow would occur when parsing an integer in a 31 | /// mangled symbol. 32 | Overflow, 33 | 34 | /// Encountered too much recursion when demangling symbol. 35 | TooMuchRecursion, 36 | } 37 | 38 | #[test] 39 | fn size_of_error() { 40 | assert_eq!( 41 | core::mem::size_of::(), 42 | 1, 43 | "We should keep the size of our Error type in check" 44 | ); 45 | } 46 | 47 | impl fmt::Display for Error { 48 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 49 | match *self { 50 | Error::UnexpectedEnd => write!(f, "mangled symbol ends abruptly"), 51 | Error::UnexpectedText => write!(f, "mangled symbol is not well-formed"), 52 | Error::BadBackReference => { 53 | write!( 54 | f, 55 | "back reference that is out-of-bounds of the substitution table" 56 | ) 57 | } 58 | Error::BadTemplateArgReference => write!( 59 | f, 60 | "reference to a template arg that is either out-of-bounds, or in a context \ 61 | without template args" 62 | ), 63 | Error::ForwardTemplateArgReference => { 64 | write!( 65 | f, 66 | "reference to a template arg from itself or a later template arg" 67 | ) 68 | } 69 | Error::BadLeafNameReference => write!( 70 | f, 71 | "reference to a leaf name in a context where there is no current leaf name" 72 | ), 73 | Error::Overflow => write!( 74 | f, 75 | "an overflow or underflow would occur when parsing an integer in a mangled \ 76 | symbol" 77 | ), 78 | Error::TooMuchRecursion => { 79 | write!(f, "encountered too much recursion when demangling symbol") 80 | } 81 | } 82 | } 83 | } 84 | 85 | impl std::error::Error for Error { 86 | fn description(&self) -> &str { 87 | match *self { 88 | Error::UnexpectedEnd => "mangled symbol ends abruptly", 89 | Error::UnexpectedText => "mangled symbol is not well-formed", 90 | Error::BadBackReference => { 91 | "back reference that is out-of-bounds of the substitution table" 92 | } 93 | Error::BadTemplateArgReference => { 94 | "reference to a template arg that is either out-of-bounds, or in a context \ 95 | without template args" 96 | } 97 | Error::ForwardTemplateArgReference => { 98 | "reference to a template arg from itself or a later template arg" 99 | } 100 | Error::BadLeafNameReference => { 101 | "reference to a leaf name in a context where there is no current leaf name" 102 | } 103 | Error::Overflow => { 104 | "an overflow or underflow would occur when parsing an integer in a mangled symbol" 105 | } 106 | Error::TooMuchRecursion => "encountered too much recursion when demangling symbol", 107 | } 108 | } 109 | } 110 | 111 | /// A demangling result of `T` or a `cpp_demangle::error::Error`. 112 | pub type Result = ::core::result::Result; 113 | -------------------------------------------------------------------------------- /debugvault/src/itanium/index_str.rs: -------------------------------------------------------------------------------- 1 | //! Provides the `IndexStr` type to keep track of a substring's index into its 2 | //! original string is. 3 | 4 | use std::fmt; 5 | use std::ops::{RangeFrom, RangeTo}; 6 | 7 | /// The `IndexStr` type allows us to take substrings from an original input and 8 | /// keep track of what index the substring is at in the original input. 9 | #[derive(Clone, Copy, PartialEq, Eq)] 10 | pub struct IndexStr<'a> { 11 | idx: usize, 12 | string: &'a [u8], 13 | } 14 | 15 | impl<'a> IndexStr<'a> { 16 | /// Construct a new `IndexStr` (with `index == 0`) from the given input. 17 | #[inline] 18 | pub fn new(string: &'a [u8]) -> IndexStr<'a> { 19 | IndexStr { idx: 0, string } 20 | } 21 | 22 | /// Return the length of the string. 23 | #[inline] 24 | pub fn len(&self) -> usize { 25 | self.string.len() 26 | } 27 | 28 | /// Return true if the string is empty, false otherwise. 29 | #[inline] 30 | pub fn is_empty(&self) -> bool { 31 | self.string.is_empty() 32 | } 33 | 34 | /// Get the index into the original input that this `IndexStr` is at. 35 | #[inline] 36 | pub fn index(&self) -> usize { 37 | self.idx 38 | } 39 | 40 | /// Peek at the next byte in this `IndexStr`. 41 | #[inline] 42 | pub fn peek(&self) -> Option { 43 | self.as_ref().first().copied() 44 | } 45 | 46 | /// Peek at the second next byte in this `IndexStr`. 47 | #[inline] 48 | pub fn peek_second(&self) -> Option { 49 | self.as_ref().first().copied() 50 | } 51 | 52 | /// Split the string in two at the given index, resulting in the tuple where 53 | /// the first item has range `[0, idx)`, and the second has range `[idx, 54 | /// len)`. 55 | /// 56 | /// Panics if the index is out of bounds. 57 | #[inline] 58 | pub fn split_at(&self, idx: usize) -> (IndexStr<'a>, IndexStr<'a>) { 59 | (self.range_to(..idx), self.range_from(idx..)) 60 | } 61 | 62 | /// The same as `split_at`, but returns a `Result` rather than panicking 63 | /// when the index is out of bounds. 64 | #[inline] 65 | pub fn try_split_at(&self, idx: usize) -> Option<(IndexStr<'a>, IndexStr<'a>)> { 66 | if idx > self.len() { 67 | None 68 | } else { 69 | Some(self.split_at(idx)) 70 | } 71 | } 72 | 73 | /// Pop the next byte off the front of this string, returning it and the new 74 | /// tail string, or `None` if this string is empty. 75 | #[inline] 76 | pub fn next(&self) -> Option<(u8, IndexStr<'a>)> { 77 | if self.is_empty() { 78 | None 79 | } else { 80 | let byte = self.string[0]; 81 | Some((byte, self.range_from(1..))) 82 | } 83 | } 84 | 85 | /// Pop the next byte off the front of this string, returning it and the new 86 | /// tail string, or the given error if this string is empty. 87 | #[inline] 88 | pub fn next_or(&self, error: E) -> Result<(u8, IndexStr<'a>), E> { 89 | self.next().ok_or(error) 90 | } 91 | } 92 | 93 | /// # Range Methods 94 | /// 95 | /// Unfortunately, `std::ops::Index` *must* return a reference, so we can't 96 | /// implement `Index>` to return a new `IndexStr` the way we would 97 | /// like to. Instead, we abandon fancy indexing operators and have these plain 98 | /// old methods. 99 | /// 100 | /// All of these methods panic on an out-of-bounds index. 101 | impl<'a> IndexStr<'a> { 102 | /// Take the given `start..` range of the underlying string and return a new 103 | /// `IndexStr`. 104 | #[inline] 105 | pub fn range_from(&self, idx: RangeFrom) -> IndexStr<'a> { 106 | IndexStr { 107 | idx: self.idx + idx.start, 108 | string: &self.string[idx], 109 | } 110 | } 111 | 112 | /// Take the given `..end` range of the underlying string and return a new 113 | /// `IndexStr`. 114 | #[inline] 115 | pub fn range_to(&self, idx: RangeTo) -> IndexStr<'a> { 116 | IndexStr { 117 | idx: self.idx, 118 | string: &self.string[idx], 119 | } 120 | } 121 | } 122 | 123 | impl<'a> AsRef<[u8]> for IndexStr<'a> { 124 | #[inline] 125 | fn as_ref(&self) -> &[u8] { 126 | self.string 127 | } 128 | } 129 | 130 | impl<'a> From<&'a [u8]> for IndexStr<'a> { 131 | fn from(s: &[u8]) -> IndexStr { 132 | IndexStr::new(s) 133 | } 134 | } 135 | 136 | impl<'a> From> for &'a [u8] { 137 | fn from(val: IndexStr<'a>) -> &'a [u8] { 138 | val.string 139 | } 140 | } 141 | 142 | impl<'a, 'b> PartialEq<&'a [u8]> for IndexStr<'b> { 143 | fn eq(&self, rhs: &&[u8]) -> bool { 144 | self.string == *rhs 145 | } 146 | } 147 | 148 | impl<'a> fmt::Debug for IndexStr<'a> { 149 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 150 | write!( 151 | f, 152 | "IndexStr {{ idx: {}, string: \"{}\" }}", 153 | self.idx, 154 | String::from_utf8_lossy(self.as_ref()) 155 | ) 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /debugvault/src/itanium/mod.rs: -------------------------------------------------------------------------------- 1 | //! This crate can parse a C++ “mangled” linker symbol name into a Rust value 2 | //! describing what the name refers to: a variable, a function, a virtual table, 3 | //! etc. The description type implements `Display`, producing human-readable 4 | //! text describing the mangled name. Debuggers and profilers can use this crate 5 | //! to provide more meaningful output. 6 | //! 7 | //! C++ requires the compiler to choose names for linker symbols consistently 8 | //! across compilation units, so that two compilation units that have seen the 9 | //! same declarations can pair up definitions in one unit with references in 10 | //! another. Almost all platforms other than Microsoft Windows follow the 11 | //! [Itanium C++ ABI][itanium]'s rules for this. 12 | //! 13 | //! [itanium]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling 14 | //! 15 | //! For example, suppose a C++ compilation unit has the definition: 16 | //! 17 | //! ```c++ 18 | //! namespace space { 19 | //! int foo(int x, int y) { return x+y; } 20 | //! } 21 | //! ``` 22 | //! 23 | //! The Itanium C++ ABI specifies that the linker symbol for that function must 24 | //! be named `_ZN5space3fooEii`. This crate can parse that name into a Rust 25 | //! value representing its structure. Formatting the value with the `format!` 26 | //! macro or the `std::string::ToString::to_string` trait method yields the 27 | //! string `space::foo(int, int)`, which is more meaningful to the C++ 28 | //! developer. 29 | #![allow(rustdoc::invalid_html_tags, rustdoc::broken_intra_doc_links)] 30 | 31 | mod ast; 32 | mod error; 33 | mod index_str; 34 | mod subs; 35 | mod tests; 36 | 37 | use crate::TokenStream; 38 | use ast::{Demangle, Parse, ParseContext}; 39 | use error::{Error, Result}; 40 | use index_str::IndexStr; 41 | 42 | pub fn parse(s: &str) -> Option { 43 | let sym = Symbol::new(s).ok()?; 44 | Some(sym.demangle()) 45 | } 46 | 47 | /// A mangled symbol that has been parsed into an AST. 48 | /// 49 | /// This is generic over some storage type `T` which can be either owned or 50 | /// borrowed. See the `OwnedSymbol` and `BorrowedSymbol` type aliases. 51 | #[derive(Clone, Debug, PartialEq)] 52 | struct Symbol<'a> { 53 | raw: &'a str, 54 | substitutions: subs::SubstitutionTable, 55 | parsed: ast::MangledName, 56 | } 57 | 58 | impl Symbol<'_> { 59 | /// Given some raw storage, parse the mangled symbol from it with the default 60 | /// options. 61 | #[inline] 62 | fn new(raw: &str) -> Result { 63 | let mut substitutions = subs::SubstitutionTable::new(); 64 | 65 | let parsed = { 66 | let ctx = ParseContext::new(); 67 | let input = IndexStr::new(raw.as_bytes()); 68 | 69 | let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, input)?; 70 | 71 | if tail.is_empty() { 72 | parsed 73 | } else { 74 | return Err(Error::UnexpectedText); 75 | } 76 | }; 77 | 78 | Ok(Symbol { 79 | raw, 80 | substitutions, 81 | parsed, 82 | }) 83 | } 84 | 85 | /// Demangle the symbol and return it as a String. 86 | /// 87 | /// Unlike the `ToString` implementation, this function allows options to 88 | /// be specified. 89 | #[inline] 90 | fn demangle(&self) -> TokenStream { 91 | let mut ctx = ast::DemangleContext::new(&self.substitutions, self.raw); 92 | self.parsed.demangle(&mut ctx, None); 93 | ctx.stream 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /debugvault/src/itanium/subs.rs: -------------------------------------------------------------------------------- 1 | //! Types dealing with the substitutions table. 2 | 3 | use super::ast; 4 | use core::fmt; 5 | use core::iter::FromIterator; 6 | use core::ops::Deref; 7 | 8 | /// An enumeration of all of the types that can end up in the substitution 9 | /// table. 10 | #[doc(hidden)] 11 | #[derive(Clone, Debug, PartialEq, Eq)] 12 | #[allow(clippy::large_enum_variant)] 13 | pub(crate) enum Substitutable { 14 | /// An `` production. 15 | UnscopedTemplateName(ast::UnscopedTemplateName), 16 | 17 | /// A `` production. 18 | Type(ast::Type), 19 | 20 | /// A `` production. 21 | TemplateTemplateParam(ast::TemplateTemplateParam), 22 | 23 | /// An `` production. 24 | UnresolvedType(ast::UnresolvedType), 25 | 26 | /// A `` production. 27 | Prefix(ast::Prefix), 28 | } 29 | 30 | impl<'subs> ast::Demangle<'subs> for Substitutable { 31 | fn demangle<'prev, 'ctx>( 32 | &'subs self, 33 | ctx: &'ctx mut ast::DemangleContext<'subs>, 34 | scope: Option>, 35 | ) { 36 | match *self { 37 | Substitutable::UnscopedTemplateName(ref name) => name.demangle(ctx, scope), 38 | Substitutable::Type(ref ty) => ty.demangle(ctx, scope), 39 | Substitutable::TemplateTemplateParam(ref ttp) => ttp.demangle(ctx, scope), 40 | Substitutable::UnresolvedType(ref ty) => ty.demangle(ctx, scope), 41 | Substitutable::Prefix(ref prefix) => prefix.demangle(ctx, scope), 42 | } 43 | } 44 | } 45 | 46 | impl<'a> ast::GetLeafName<'a> for Substitutable { 47 | fn get_leaf_name(&'a self, subs: &'a SubstitutionTable) -> Option> { 48 | match *self { 49 | Substitutable::UnscopedTemplateName(ref name) => name.get_leaf_name(subs), 50 | Substitutable::Prefix(ref prefix) => prefix.get_leaf_name(subs), 51 | Substitutable::Type(ref ty) => ty.get_leaf_name(subs), 52 | _ => None, 53 | } 54 | } 55 | } 56 | 57 | impl ast::IsCtorDtorConversion for Substitutable { 58 | fn is_ctor_dtor_conversion(&self, subs: &SubstitutionTable) -> bool { 59 | match *self { 60 | Substitutable::Prefix(ref prefix) => prefix.is_ctor_dtor_conversion(subs), 61 | _ => false, 62 | } 63 | } 64 | } 65 | 66 | /// The table of substitutable components that we have parsed thus far, and for 67 | /// which there are potential back-references. 68 | #[doc(hidden)] 69 | #[derive(Clone, Default, PartialEq, Eq)] 70 | pub(crate) struct SubstitutionTable { 71 | substitutions: Vec, 72 | // There are components which are typically candidates for substitution, but 73 | // in some particular circumstances are not. Instances of such components 74 | // which are not candidates for substitution end up in this part of the 75 | // table. See `` parsing for further details. 76 | non_substitutions: Vec, 77 | } 78 | 79 | impl fmt::Debug for SubstitutionTable { 80 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 81 | f.pad("SubstitutionTable ")?; 82 | f.debug_map().entries(self.substitutions.iter().enumerate()).finish()?; 83 | f.pad("non_substitutions ")?; 84 | f.debug_map().entries(self.non_substitutions.iter().enumerate()).finish() 85 | } 86 | } 87 | 88 | impl SubstitutionTable { 89 | /// Construct a new `SubstitutionTable`. 90 | pub(crate) fn new() -> SubstitutionTable { 91 | Default::default() 92 | } 93 | 94 | /// Insert a freshly-parsed substitutable component into the table and 95 | /// return the index at which it now lives. 96 | pub(crate) fn insert(&mut self, entity: Substitutable) -> usize { 97 | let idx = self.substitutions.len(); 98 | self.substitutions.push(entity); 99 | idx 100 | } 101 | 102 | /// Insert a an entity into the table that is not a candidate for 103 | /// substitution. 104 | pub(crate) fn insert_non_substitution(&mut self, entity: Substitutable) -> usize { 105 | let idx = self.non_substitutions.len(); 106 | self.non_substitutions.push(entity); 107 | idx 108 | } 109 | 110 | /// Does this substitution table contain a component at the given index? 111 | pub(crate) fn contains(&self, idx: usize) -> bool { 112 | idx < self.substitutions.len() 113 | } 114 | 115 | /// Get the type referenced by the given handle, or None if there is no such 116 | /// entry, or there is an entry that is not a type. 117 | pub(crate) fn get_type(&self, handle: &ast::TypeHandle) -> Option<&ast::Type> { 118 | if let ast::TypeHandle::BackReference(idx) = *handle { 119 | self.substitutions.get(idx).and_then(|s| match *s { 120 | Substitutable::Type(ref ty) => Some(ty), 121 | _ => None, 122 | }) 123 | } else { 124 | None 125 | } 126 | } 127 | 128 | /// Get the `idx`th entity that is not a candidate for substitution. Panics 129 | /// if `idx` is out of bounds. 130 | pub(crate) fn non_substitution(&self, idx: usize) -> &Substitutable { 131 | &self.non_substitutions[idx] 132 | } 133 | 134 | /// Get the `idx`th entity that is not a candidate for substitution. Returns 135 | /// `None` if `idx` is out of bounds. 136 | pub(crate) fn get_non_substitution(&self, idx: usize) -> Option<&Substitutable> { 137 | self.non_substitutions.get(idx) 138 | } 139 | } 140 | 141 | impl FromIterator for SubstitutionTable { 142 | fn from_iter>(iter: I) -> Self { 143 | SubstitutionTable { 144 | substitutions: Vec::from_iter(iter), 145 | non_substitutions: vec![], 146 | } 147 | } 148 | } 149 | 150 | impl Deref for SubstitutionTable { 151 | type Target = [Substitutable]; 152 | 153 | fn deref(&self) -> &Self::Target { 154 | &self.substitutions[..] 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /debugvault/src/msvc/context.rs: -------------------------------------------------------------------------------- 1 | use super::{Literal, Modifiers, NestedPath, Scope, Type}; 2 | 3 | use crate::TokenStream; 4 | use tokenizing::Color32; 5 | 6 | /// Max recursion depth 7 | const MAX_DEPTH: usize = 256; 8 | 9 | #[derive(Debug)] 10 | pub(super) struct Backrefs { 11 | /// Up to 10 idents can be memorized for lookup using backref's: ?0, ?1, .. 12 | memorized: [NestedPath; 10], 13 | 14 | /// Number of so far memorized idents. 15 | memorized_count: usize, 16 | 17 | /// A max of 10 function parameters is supported. 18 | params: [Type; 10], 19 | 20 | /// Number of so far encountered function parameters. 21 | param_count: usize, 22 | } 23 | 24 | impl Backrefs { 25 | pub fn new() -> Self { 26 | const NO_PATH: NestedPath = NestedPath::Anonymous; 27 | const NO_TYPE: Type = Type::Unit; 28 | 29 | Self { 30 | memorized: [NO_PATH; 10], 31 | memorized_count: 0, 32 | params: [NO_TYPE; 10], 33 | param_count: 0, 34 | } 35 | } 36 | 37 | pub fn memorize_path<'b>(&'b mut self, path: &'b NestedPath) { 38 | let memorized = &self.memorized[..self.memorized_count]; 39 | 40 | if !memorized.contains(path) && self.memorized_count != 10 { 41 | self.memorized[self.memorized_count] = path.clone(); 42 | self.memorized_count += 1; 43 | } 44 | } 45 | 46 | pub fn get_memorized_path(&mut self, idx: usize) -> Option { 47 | if idx >= self.memorized_count { 48 | return None; 49 | } 50 | 51 | Some(self.memorized[idx].clone()) 52 | } 53 | 54 | // TODO: change interface to not be cloning a type 55 | pub fn memorize_param<'b>(&'b mut self, tipe: &'b Type) { 56 | let memorized = &self.params[..self.param_count]; 57 | 58 | if !memorized.contains(tipe) && self.param_count != 10 { 59 | self.params[self.param_count] = tipe.clone(); 60 | self.param_count += 1; 61 | } 62 | } 63 | 64 | // TODO: change interface to not be cloning a type 65 | pub fn get_memorized_param(&self, idx: usize) -> Option { 66 | if idx >= self.param_count { 67 | return None; 68 | } 69 | 70 | Some(self.params[idx].clone()) 71 | } 72 | } 73 | 74 | /// State that needs to be shared whilst traversing nodes of the AST. 75 | #[derive(Debug)] 76 | pub(super) struct Context<'a> { 77 | pub stream: TokenStream, 78 | pub offset: usize, 79 | pub parsing_qualifiers: bool, 80 | pub memorizing: bool, 81 | pub scope: &'a Scope, 82 | modifiers_in_use: Modifiers, 83 | depth: usize, 84 | } 85 | 86 | impl Context<'_> { 87 | /// Create an initialized parser that hasn't started parsing yet. 88 | pub fn new(s: &str) -> Self { 89 | static NO_SCOPE: Scope = Scope(Vec::new()); 90 | 91 | Self { 92 | stream: TokenStream::new(s), 93 | offset: 0, 94 | memorizing: true, 95 | parsing_qualifiers: true, 96 | scope: &NO_SCOPE, 97 | modifiers_in_use: Modifiers::empty(), 98 | depth: 0, 99 | } 100 | } 101 | 102 | /// Pushes a [`Literal`] to the [`TokenStream`], resolving any indexing within a literal. 103 | pub fn push_literal(&mut self, literal: &Literal, color: Color32) { 104 | let literal = &self.stream.inner()[literal.start..literal.end]; 105 | self.stream.push(literal, color); 106 | } 107 | 108 | /// Create a reference to the underlying pinned string that holds the mangled symbol. 109 | #[inline] 110 | pub fn src<'b>(&self) -> &'b str { 111 | &self.stream.inner()[self.offset..] 112 | } 113 | 114 | /// View the current byte in the mangled symbol without incrementing the offset. 115 | pub fn peek(&self) -> Option { 116 | self.src().bytes().next() 117 | } 118 | 119 | /// View a slice in the mangled symbol without incrementing the offset. 120 | pub fn peek_slice<'b>(&self, range: std::ops::RangeTo) -> Option<&'b [u8]> { 121 | self.src().as_bytes().get(range) 122 | } 123 | 124 | /// View the current byte in the mangled symbol, incrementing the offset. 125 | pub fn take(&mut self) -> Option { 126 | self.src().bytes().next().map(|byte| { 127 | self.offset += 1; 128 | byte 129 | }) 130 | } 131 | 132 | /// Increment the offset if the current byte equals the byte given. 133 | pub fn consume(&mut self, byte: u8) -> Option<()> { 134 | if self.src().bytes().next() == Some(byte) { 135 | self.offset += 1; 136 | return Some(()); 137 | } 138 | 139 | None 140 | } 141 | 142 | /// Increment the offset if the current byte equals the byte given. 143 | pub fn eat(&mut self, byte: u8) -> bool { 144 | let matches = self.src().bytes().next() == Some(byte); 145 | self.offset += matches as usize; 146 | matches 147 | } 148 | 149 | /// Increment the offset if the slices match. 150 | pub fn eat_slice(&mut self, slice: &[u8]) -> bool { 151 | let matches = self.src().as_bytes().get(..slice.len()) == Some(slice); 152 | self.offset += slice.len() * (matches as usize); 153 | matches 154 | } 155 | 156 | /// Parses a base10 number, incrementing the offset. 157 | pub fn base10(&mut self) -> Option { 158 | let n = match self.peek()? { 159 | c @ b'0'..=b'9' => (c - b'0') as usize, 160 | _ => return None, 161 | }; 162 | 163 | self.offset += 1; 164 | Some(n) 165 | } 166 | 167 | /// Parses a base16 number that's either in lowercase or upcase, incrementing the offset. 168 | pub fn base16(&mut self) -> Option { 169 | let n = match self.peek()? { 170 | c @ b'0'..=b'9' => (c - b'0') as usize, 171 | c @ b'a'..=b'f' => (c - b'a') as usize, 172 | c @ b'A'..=b'F' => (c - b'A') as usize, 173 | _ => return None, 174 | }; 175 | 176 | self.offset += 1; 177 | Some(n) 178 | } 179 | 180 | /// Parses a generic number (positive, negative, hex or decimal). 181 | /// 182 | /// ```text 183 | /// = 184 | /// | 185 | /// | 186 | /// 187 | /// = 1..=9 188 | /// = ? 189 | /// = A..=P 190 | /// | {} @ 191 | /// ``` 192 | pub fn number(&mut self) -> Option { 193 | let negative = self.eat(b'?'); 194 | 195 | if let Some(digit) = self.base10() { 196 | let mut digit = digit as isize + 1; 197 | 198 | if negative { 199 | digit = -digit; 200 | } 201 | return Some(digit); 202 | } 203 | 204 | let mut n = 0isize; 205 | loop { 206 | match self.take()? { 207 | chr @ b'A'..=b'P' => { 208 | n = n.checked_mul(16)?; 209 | n = n.checked_add((chr - b'A') as isize)?; 210 | } 211 | b'@' => { 212 | if negative { 213 | n = -n; 214 | } 215 | break Some(n); 216 | } 217 | _ => break None, 218 | } 219 | } 220 | } 221 | 222 | /// Parses a series of characters up to the character '@', incrementing the offset 223 | /// by the amount of characters parsed + the terminator. 224 | pub fn ident(&mut self) -> Option { 225 | let start = self.offset; 226 | let len = self.src().bytes().position(|c| c == b'@')?; 227 | self.offset += len + 1; 228 | Some(Literal { 229 | start, 230 | end: start + len, 231 | }) 232 | } 233 | 234 | /// Sets modifiers for children to pop and annotate their type with. 235 | #[inline] 236 | pub fn push_modifiers(&mut self, modifiers: Modifiers) { 237 | self.modifiers_in_use = modifiers; 238 | } 239 | 240 | /// Clears out any modifiers set and returns what was set. 241 | #[inline] 242 | pub fn pop_modifiers(&mut self) -> Modifiers { 243 | let modifiers = self.modifiers_in_use; 244 | self.modifiers_in_use = Modifiers::empty(); 245 | modifiers 246 | } 247 | 248 | /// Increments the depth of the current parser, failing when the depth surpassed [`MAX_DEPTH`]. 249 | #[inline] 250 | pub fn descent(&mut self) -> Option<()> { 251 | self.depth += 1; 252 | 253 | if self.depth > MAX_DEPTH { 254 | return None; 255 | } 256 | 257 | Some(()) 258 | } 259 | 260 | #[inline] 261 | /// Decrements the depth of the current parser, panics on negative depths. 262 | pub fn ascent(&mut self) { 263 | self.depth -= 1; 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /debugvault/src/prefix.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | use std::{cmp::Ordering, sync::Arc}; 3 | 4 | use crate::Symbol; 5 | 6 | #[inline] 7 | fn sort_cmp(a: &str, b: &str) -> Ordering { 8 | for (x, y) in a.chars().zip(b.chars()) { 9 | if x < y { 10 | return Ordering::Less; 11 | } else if x > y { 12 | return Ordering::Greater; 13 | } 14 | } 15 | 16 | a.len().cmp(&b.len()) 17 | } 18 | 19 | #[inline] 20 | fn find_cmp(a: &str, b: &str) -> Ordering { 21 | for (x, y) in a.chars().zip(b.chars()) { 22 | if x < y { 23 | return Ordering::Less; 24 | } else if x > y { 25 | return Ordering::Greater; 26 | } 27 | } 28 | 29 | Ordering::Equal 30 | } 31 | 32 | /// Datastructure for efficient string match searching. 33 | #[derive(Default, Debug)] 34 | pub struct PrefixMatcher { 35 | items: Vec>, 36 | } 37 | 38 | impl PrefixMatcher { 39 | /// Insert an item, doesn't ensure the items are sorted. 40 | pub fn insert(&mut self, s: &Arc) { 41 | self.items.push(s.clone()); 42 | } 43 | 44 | /// Sorts elements to allow for searching. 45 | pub fn reorder(&mut self) { 46 | self.items.sort_unstable_by(|a, b| sort_cmp(a.as_str(), b.as_str())); 47 | self.items.shrink_to_fit(); 48 | } 49 | 50 | /// Find some given prefix and return a range back into the items. 51 | /// Must call [`PrefixMatch::reorder`] before calling this. 52 | pub fn find(&self, prefix: &str) -> Match { 53 | // This works as cmp() will return Ordering::Equal if the prefix matches. 54 | let Ok(mid) = self.items.binary_search_by(|item| find_cmp(item.as_str(), prefix)) else { 55 | return Match { range: 0..0 }; 56 | }; 57 | 58 | // Look left and try to find more matching prefixes 59 | let mut start = mid; 60 | while start > 0 && self.items[start - 1].as_str().starts_with(prefix) { 61 | start -= 1; 62 | } 63 | 64 | // Look right and try to find more matching prefixes 65 | let mut end = mid; 66 | while end + 1 < self.items.len() && self.items[end + 1].as_str().starts_with(prefix) { 67 | end += 1; 68 | } 69 | 70 | Match { 71 | range: start..end + 1, 72 | } 73 | } 74 | } 75 | 76 | /// Storage mechanism for [`PrefixMatch::find`]. 77 | #[derive(Debug)] 78 | pub struct Match { 79 | range: Range, 80 | } 81 | 82 | impl Match { 83 | /// Iterate through all items that match. 84 | pub fn iter<'s>(&self, tree: &'s PrefixMatcher) -> impl Iterator> { 85 | tree.items[self.range.clone()].iter() 86 | } 87 | } 88 | 89 | #[cfg(test)] 90 | mod test { 91 | use crate::demangler::TokenStream; 92 | use super::*; 93 | 94 | fn symbol(s: &str) -> Arc { 95 | Arc::new(Symbol { 96 | name_as_str: Arc::from(s), 97 | name: TokenStream::simple(s), 98 | module: None, 99 | is_intrinsics: false 100 | }) 101 | } 102 | 103 | #[test] 104 | fn insert() { 105 | let mut tree = PrefixMatcher::default(); 106 | tree.insert(&symbol("file")); 107 | tree.insert(&symbol("file_name")); 108 | tree.insert(&symbol("file::name")); 109 | tree.insert(&symbol("file::no")); 110 | tree.reorder(); 111 | let expected = [ 112 | "file", 113 | "file::name", 114 | "file::no", 115 | "file_name", 116 | ]; 117 | assert_eq!(tree.items.len(), expected.len(), "Mismatched length"); 118 | for (x, y) in tree.items.iter().zip(expected.iter()) { 119 | assert_eq!(&x.as_str(), y, "Mismatch"); 120 | } 121 | } 122 | 123 | #[test] 124 | fn find() { 125 | let mut tree = PrefixMatcher::default(); 126 | tree.insert(&symbol("file")); 127 | tree.insert(&symbol("file_name")); 128 | tree.insert(&symbol("file::name")); 129 | tree.insert(&symbol("file::no")); 130 | tree.reorder(); 131 | let expected = [ 132 | "file::name", 133 | "file::no", 134 | ]; 135 | assert_eq!(tree.find("file::").range.len(), expected.len(), "Mismatched length"); 136 | for (x, y) in tree.find("file::").iter(&tree).zip(expected.iter()) { 137 | assert_eq!(&x.as_str(), y, "Mismatch"); 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /debugvault/src/rust/tests.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | 3 | use super::*; 4 | 5 | macro_rules! eq { 6 | ($mangled:literal => $demangled:literal) => { 7 | let symbol = parse($mangled).expect(&format!("Formatting '{}' failed.", $mangled)); 8 | 9 | assert_eq!( 10 | String::from_iter(symbol.tokens().iter().map(|t| &t.text[..])), 11 | $demangled 12 | ); 13 | }; 14 | } 15 | 16 | #[test] 17 | fn crate_ident() { 18 | eq!("_RC8demangle" => "demangle"); 19 | } 20 | 21 | #[test] 22 | fn generics() { 23 | eq!("_RINvNvC3std3mem8align_ofjdE" => "std::mem::align_of::"); 24 | eq!("_RINvNtC3std3mem8align_ofINtC3wow6HolderpEE" => 25 | "std::mem::align_of::>"); 26 | } 27 | 28 | #[test] 29 | fn namespaces() { 30 | eq!("_RNvC4bite6decode" => "bite::decode"); 31 | eq!("_RNvNvC4bite6decode6x86_64" => "bite::decode::x86_64"); 32 | eq!("_RINvNvC4bite6decode6x86_64NvC3lol4damnE" => 33 | "bite::decode::x86_64::"); 34 | } 35 | 36 | #[test] 37 | fn methods() { 38 | eq!("_RNvNvXs2_C7mycrateINtC7mycrate3FoopEINtNtC3std7convert4FrompE4from3MSG" => 39 | " as std::convert::From<_>>::from::MSG"); 40 | } 41 | 42 | #[test] 43 | fn pointers() { 44 | eq!("_RINvC4bite6decodeRL_eE" => "bite::decode::<&str>"); 45 | eq!("_RINvC4bite6decodeRL0_eE" => "bite::decode::<&'a str>"); 46 | 47 | eq!("_RINvC4bite6decodeQL_eE" => "bite::decode::<&mut str>"); 48 | eq!("_RINvC4bite6decodeQL0_eE" => "bite::decode::<&'a mut str>"); 49 | 50 | eq!("_RINvC4bite6decodePeE" => "bite::decode::<*const str>"); 51 | eq!("_RINvC4bite6decodeOeE" => "bite::decode::<*mut str>"); 52 | } 53 | 54 | #[test] 55 | fn arrays() { 56 | eq!("_RINvC4bite6decodeANtNvC3std5array5Arrayjf_E" => 57 | "bite::decode::<[std::array::Array; _]>"); 58 | } 59 | 60 | #[test] 61 | fn tupples() { 62 | eq!("_RINvNtC3std3mem8align_ofjTddNvC4core3ptrEE" => 63 | "std::mem::align_of::"); 64 | } 65 | 66 | #[test] 67 | fn backref() { 68 | eq!("_RNvMs1_NtNtCs9ltgdHTiPiY_4core3ptr8non_nullINtB5_7NonNullReE6as_ptrCslWKjbRFJPpS_3log" => 69 | ">::as_ptr"); 70 | } 71 | 72 | #[test] 73 | fn constants() { 74 | eq!("_RNvMNtCs9ltgdHTiPiY_4core5sliceSRe4iterCslWKjbRFJPpS_3log" => "<[&str]>::iter"); 75 | } 76 | 77 | #[test] 78 | fn fn_signature() { 79 | eq!("_RINvNtC3std3mem8align_ofFUdddEoE" => 80 | "std::mem::align_of:: u128>"); 81 | 82 | eq!("_RINvNtC3std3mem8align_ofFKCdddEoE" => 83 | "std::mem::align_of:: u128>"); 84 | 85 | eq!("_RINvNtC3std3mem8align_ofFdddEoE" => 86 | "std::mem::align_of:: u128>"); 87 | } 88 | 89 | #[test] 90 | fn dyn_traits() { 91 | eq!("_RINvNtC4core4simd3mulDNvNtC4core3mem4Readp4ItemReEL_E" => 92 | "core::simd::mul::>"); 93 | 94 | eq!("_RINvNtC4core4simd3mulDNvNtC4core3mem4ReadEL0_E" => 95 | "core::simd::mul::"); 96 | 97 | eq!("_RINvNtC4core4simd3mulDNvNtC4core3mem4ReadEL_E" => 98 | "core::simd::mul::"); 99 | } 100 | 101 | #[test] 102 | fn closures() { 103 | eq!("_RNCNvC4bite6decode0" => "bite::decode::{closure}"); 104 | eq!("_RNCNvC4bite6decodes_0" => "bite::decode::{closure#0}"); 105 | eq!("_RNCNvC4bite6decodes0_3wow" => "bite::decode::{closure:wow#1}"); 106 | } 107 | 108 | #[test] 109 | fn complex() { 110 | eq!("_RNvXs5_NtCsd4VYFwevHkG_4bite6decodeINtB5_5ArrayNtNtB5_6x86_646PrefixKj4_EINtNtNtCs9ltgdHTiPiY_4core3ops5index8IndexMutjE9index_mutB7_" => 111 | " as core::ops::index::IndexMut>::index_mut"); 112 | } 113 | 114 | #[test] 115 | #[should_panic] 116 | fn too_many_arguements() { 117 | parse( 118 | "IC3stdbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbE", 119 | ) 120 | .unwrap(); 121 | } 122 | -------------------------------------------------------------------------------- /debugvault/src/rust_legacy/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::TokenStream; 2 | use config::CONFIG; 3 | 4 | mod tests; 5 | 6 | pub fn parse(s: &str) -> Option { 7 | // macOS prefixes symbols with an extra underscore therefore '__S' is allowed 8 | let s = s.strip_prefix("ZN").or(s.strip_prefix("_ZN")).or(s.strip_prefix("__ZN"))?; 9 | 10 | // paths have to be ascii 11 | if !s.bytes().all(|c| c.is_ascii()) { 12 | return None; 13 | } 14 | 15 | let mut stream = TokenStream::new(s); 16 | let s = stream.inner(); 17 | 18 | let mut unparsed = s; 19 | let mut in_first_part = true; 20 | 21 | loop { 22 | // it's not valid to not have a closing 'E' character 23 | unparsed.as_bytes().first()?; 24 | 25 | // break on finding closing character 26 | if let Some(b'E') = unparsed.as_bytes().first() { 27 | break; 28 | } 29 | 30 | // there must be a length 31 | if !unparsed.as_bytes().first()?.is_ascii_digit() { 32 | return None; 33 | } 34 | 35 | // length of path component 36 | let mut len = 0usize; 37 | while let Some(digit @ b'0'..=b'9') = unparsed.as_bytes().first() { 38 | len = len.checked_mul(10)?.checked_add((digit - b'0') as usize)?; 39 | unparsed = &unparsed[1..]; 40 | } 41 | 42 | let mut part = unparsed.get(..len)?; 43 | unparsed = unparsed.get(len..)?; 44 | 45 | if is_rust_hash(part) { 46 | break; 47 | } 48 | 49 | if part.starts_with("_$") { 50 | part = &part[1..]; 51 | } 52 | 53 | if !in_first_part { 54 | stream.push("::", CONFIG.colors.delimiter); 55 | } 56 | 57 | loop { 58 | if part.starts_with('.') { 59 | if part[1..].starts_with('.') { 60 | stream.push("::", CONFIG.colors.delimiter); 61 | part = &part[2..]; 62 | } else { 63 | stream.push(".", CONFIG.colors.comment); 64 | part = &part[1..]; 65 | } 66 | } else if part.starts_with('$') { 67 | let (escape, after_escape) = match part[1..].find('$') { 68 | Some(end) => (&part[1..=end], &part[end + 2..]), 69 | None => break, 70 | }; 71 | 72 | // source: compiler/rustc_symbol_mangling/src/legacy.rs 73 | match escape { 74 | "SP" => stream.push("@", CONFIG.colors.comment), 75 | "BP" => stream.push("*", CONFIG.colors.asm.pointer), 76 | "RF" => stream.push("&", CONFIG.colors.asm.pointer), 77 | "LT" => stream.push("<", CONFIG.colors.asm.annotation), 78 | "GT" => stream.push(">", CONFIG.colors.asm.annotation), 79 | "LP" => stream.push("(", CONFIG.colors.asm.label), 80 | "RP" => stream.push(")", CONFIG.colors.asm.label), 81 | "C" => { 82 | // if the next character is a space don't print one 83 | // 84 | // this is to allow for a space between comma separated items 85 | if let Some(b"$u20$") = after_escape.as_bytes().get(..5) { 86 | stream.push(",", CONFIG.colors.asm.expr); 87 | } else { 88 | stream.push(", ", CONFIG.colors.asm.expr); 89 | } 90 | } 91 | _ => { 92 | if let Some(stripped) = escape.strip_prefix('u') { 93 | let digits = stripped; 94 | let all_lower_hex = 95 | digits.chars().all(|c| matches!(c, '0'..='9' | 'a'..='f')); 96 | 97 | let chr = u32::from_str_radix(digits, 16).ok().and_then(char::from_u32); 98 | 99 | if let (true, Some(chr)) = (all_lower_hex, chr) { 100 | if !chr.is_control() { 101 | let color = CONFIG.colors.asm.component; 102 | stream.push_string(chr.to_string(), color); 103 | part = after_escape; 104 | continue; 105 | } 106 | } 107 | } 108 | 109 | break; 110 | } 111 | } 112 | 113 | part = after_escape; 114 | } else if let Some(idx) = part.find(|c| c == '$' || c == '.') { 115 | let ident = &part[..idx]; 116 | stream.push(ident, CONFIG.colors.asm.component); 117 | part = &part[idx..]; 118 | } else { 119 | break; 120 | } 121 | } 122 | 123 | stream.push(part, CONFIG.colors.asm.component); 124 | in_first_part = false; 125 | } 126 | 127 | Some(stream) 128 | } 129 | 130 | fn is_rust_hash(s: &str) -> bool { 131 | s.starts_with('h') && s[1..].chars().all(|c| c.is_ascii_hexdigit()) 132 | } 133 | -------------------------------------------------------------------------------- /debugvault/src/rust_legacy/tests.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | 3 | use super::*; 4 | 5 | macro_rules! none { 6 | ($mangled:literal) => { 7 | if parse($mangled).is_some() { 8 | panic!( 9 | "Formatting '{}' succeeded when it wasn't supposed to.", 10 | $mangled 11 | ); 12 | } 13 | }; 14 | } 15 | 16 | macro_rules! eq { 17 | ($mangled:literal => $demangled:literal) => { 18 | let symbol = parse($mangled).expect(&format!("Formatting '{}' failed.", $mangled)); 19 | 20 | assert_eq!( 21 | String::from_iter(symbol.tokens().iter().map(|t| &t.text[..])), 22 | $demangled 23 | ); 24 | }; 25 | } 26 | 27 | // tests come from https://github.com/rust-lang/rustc-demangle/blob/main/src/legacy.rs 28 | 29 | #[test] 30 | fn basic() { 31 | none!("test"); 32 | eq!("_ZN4testE" => "test"); 33 | none!("_ZN4test"); 34 | eq!("_ZN4test1a2bcE" => "test::a::bc"); 35 | } 36 | 37 | #[test] 38 | fn dollars() { 39 | eq!("_ZN4$RP$E" => ")"); 40 | eq!("_ZN8$RF$testE" => "&test"); 41 | eq!("_ZN8$BP$test4foobE" => "*test::foob"); 42 | eq!("_ZN9$u20$test4foobE" => " test::foob"); 43 | eq!("_ZN35Bar$LT$$u5b$u32$u3b$$u20$4$u5d$$GT$E" => "Bar<[u32; 4]>"); 44 | } 45 | 46 | #[test] 47 | fn many_dollars() { 48 | eq!("_ZN13test$u20$test4foobE" => "test test::foob"); 49 | eq!("_ZN12test$BP$test4foobE" => "test*test::foob"); 50 | } 51 | 52 | #[test] 53 | fn osx() { 54 | eq!("__ZN5alloc9allocator6Layout9for_value17h02a996811f781011E" => 55 | "alloc::allocator::Layout::for_value"); 56 | 57 | eq!("__ZN38_$LT$core..option..Option$LT$T$GT$$GT$6unwrap18_MSG_FILE_LINE_COL17haf7cb8d5824ee659E" => 58 | ">::unwrap::_MSG_FILE_LINE_COL"); 59 | eq!("__ZN4core5slice89_$LT$impl$u20$core..iter..traits..IntoIterator$u20$for$u20$$RF$$u27$a$u20$$u5b$T$u5d$$GT$9into_iter17h450e234d27262170E" => 60 | "core::slice::::into_iter"); 61 | } 62 | 63 | #[test] 64 | fn windows() { 65 | eq!("ZN4testE" => "test"); 66 | eq!("ZN13test$u20$test4foobE" => "test test::foob"); 67 | eq!("ZN12test$RF$test4foobE" => "test&test::foob"); 68 | } 69 | 70 | #[test] 71 | fn elements_beginning_with_underscore() { 72 | eq!("_ZN13_$LT$test$GT$E" => ""); 73 | eq!("_ZN28_$u7b$$u7b$closure$u7d$$u7d$E" => "{{closure}}"); 74 | eq!("_ZN15__STATIC_FMTSTRE" => "__STATIC_FMTSTR"); 75 | } 76 | 77 | #[test] 78 | fn trait_impls() { 79 | eq!("_ZN71_$LT$Test$u20$$u2b$$u20$$u27$static$u20$as$u20$foo..Bar$LT$Test$GT$$GT$3barE" => 80 | ">::bar"); 81 | } 82 | 83 | #[test] 84 | fn without_hash() { 85 | eq!("_ZN3foo17h05af221e174051e9E" => "foo"); 86 | } 87 | 88 | #[test] 89 | fn without_hash_edgecases() { 90 | // One element, no hash. 91 | eq!("_ZN3fooE" => "foo"); 92 | // Two elements, no hash. 93 | eq!("_ZN3foo3barE" => "foo::bar"); 94 | } 95 | 96 | #[test] 97 | fn thinlto() { 98 | // One element, no hash. 99 | eq!("_ZN3fooE.llvm.9D1C9369" => "foo"); 100 | eq!("_ZN3fooE.llvm.9D1C9369@@16" => "foo"); 101 | } 102 | 103 | #[test] 104 | fn llvm_ir_branch_labels() { 105 | eq!("_ZN4core5slice77_$LT$impl$u20$core..ops..index..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17haf9727c2edfbc47bE.exit.i.i" => 106 | "core::slice:: for [T]>::index_mut"); 107 | } 108 | 109 | #[test] 110 | fn invalid_no_chop() { 111 | none!("_ZNfooE"); 112 | } 113 | 114 | #[test] 115 | fn handle_assoc_types() { 116 | eq!("_ZN151_$LT$alloc..boxed..Box$LT$alloc..boxed..FnBox$LT$A$C$$u20$Output$u3d$R$GT$$u20$$u2b$$u20$$u27$a$GT$$u20$as$u20$core..ops..function..FnOnce$LT$A$GT$$GT$9call_once17h69e8f44b3723e1caE" => 117 | " + 'a> as core::ops::function::FnOnce>::call_once"); 118 | } 119 | 120 | #[test] 121 | fn handle_bang() { 122 | eq!( 123 | "_ZN88_$LT$core..result..Result$LT$$u21$$C$$u20$E$GT$$u20$as$u20$std..process..Termination$GT$6report17hfc41d0da4a40b3e8E" => 124 | " as std::process::Termination>::report" 125 | ); 126 | } 127 | -------------------------------------------------------------------------------- /decoder-arm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "arm" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | decoder = { path = "../decoder" } 8 | tokenizing = { path = "../tokenizing" } 9 | debugvault = { path = "../debugvault" } 10 | config = { path = "../config" } 11 | bitvec = "0.19" # update this really old dep 12 | -------------------------------------------------------------------------------- /decoder-arm/src/armv8/mod.rs: -------------------------------------------------------------------------------- 1 | /// `yaxpeax-arm`'s `ARMv8/aarch64` decoder and `Arch` implementation. 2 | pub mod a64; 3 | -------------------------------------------------------------------------------- /decoder-arm/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # `yaxpeax-arm`, a decoder for `arm` instruction sets. 2 | //! 3 | //! `yaxpeax-arm` provides `armv7` (and below) decoders, including `thumb` support, as well a 4 | //! decoder for `armv8`/`a64`. 5 | 6 | /// `yaxpeax-arm`'s `ARMv7` decoder and `Arch` implementation. 7 | pub mod armv7; 8 | /// `yaxpeax-arm`'s `ARMv8` decoder and `Arch` implementation. 9 | pub mod armv8; 10 | -------------------------------------------------------------------------------- /decoder-arm/tests/armv8/mod.rs: -------------------------------------------------------------------------------- 1 | mod a64; 2 | -------------------------------------------------------------------------------- /decoder-arm/tests/test.rs: -------------------------------------------------------------------------------- 1 | mod armv7; 2 | mod armv8; 3 | 4 | use decoder::{Decodable, Reader, ToTokens}; 5 | use debugvault::Index; 6 | use tokenizing::TokenStream; 7 | 8 | fn test_range(decoder: &A, start: u64, end: u64) { 9 | let mut stream = TokenStream::new(); 10 | let symbols = Index::default(); 11 | 12 | for i in start..=end { 13 | if i & 0x01_ff_ff_ff == 0 { 14 | eprintln!("case {:08x}", i); 15 | } 16 | let i = i as u32; 17 | let bytes = i.to_le_bytes(); 18 | if let Ok(inst) = decoder.decode(&mut Reader::new(&bytes)) { 19 | stream.clear(); 20 | inst.tokenize(&mut stream, &symbols); 21 | } 22 | } 23 | } 24 | 25 | fn par_test_u32(test_range: fn(u64, u64)) { 26 | const NR_THREADS: u64 = 512; 27 | 28 | const RANGE_SIZE: u64 = (u32::MAX as u64 + 1) / NR_THREADS; 29 | 30 | let mut handles = Vec::new(); 31 | 32 | for i in 0..NR_THREADS { 33 | let handle = std::thread::spawn(move || test_range(i * RANGE_SIZE, (i + 1) * RANGE_SIZE)); 34 | handles.push(handle); 35 | } 36 | 37 | while let Some(handle) = handles.pop() { 38 | handle.join().unwrap(); 39 | } 40 | } 41 | 42 | #[test] 43 | #[ignore] 44 | fn test_armv7_does_not_panic() { 45 | par_test_u32(|start, end| { 46 | let armv7 = arm::armv7::Decoder::default(); 47 | test_range(&armv7, start, end); 48 | }); 49 | } 50 | #[test] 51 | #[ignore] 52 | fn test_armv7_thumb_does_not_panic() { 53 | par_test_u32(|start, end| { 54 | let mut armv7_t = arm::armv7::Decoder::default(); 55 | armv7_t.set_thumb_mode(true); 56 | 57 | test_range(&armv7_t, start, end); 58 | }); 59 | } 60 | 61 | #[test] 62 | #[ignore] 63 | fn test_armv8_does_not_panic() { 64 | par_test_u32(|start, end| { 65 | let armv8 = arm::armv8::a64::Decoder::default(); 66 | 67 | test_range(&armv8, start, end); 68 | }); 69 | } 70 | -------------------------------------------------------------------------------- /decoder-mips/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "mips" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | decoder = { path = "../decoder" } 8 | tokenizing = { path = "../tokenizing" } 9 | debugvault = { path = "../debugvault" } 10 | config = { path = "../config" } 11 | -------------------------------------------------------------------------------- /decoder-mips/src/tests.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | 3 | use decoder::{ToTokens, Decodable}; 4 | 5 | fn test_display(bytes: &[u8], str: &str) { 6 | let mut reader = decoder::Reader::new(bytes); 7 | let mut line = tokenizing::TokenStream::new(); 8 | let symbols = debugvault::Index::default(); 9 | let decoder = crate::Decoder::default(); 10 | 11 | let decoded = match decoder.decode(&mut reader) { 12 | Ok(inst) => { 13 | inst.tokenize(&mut line, &symbols); 14 | line.to_string() 15 | } 16 | Err(err) => format!("{err:?}"), 17 | }; 18 | 19 | assert_eq!(decoded, str); 20 | } 21 | 22 | #[test] 23 | fn jump() { 24 | test_display(&[0x9, 0, 0, 0], "j 0x0"); 25 | } 26 | 27 | #[test] 28 | fn beq() { 29 | test_display(&[0x11, 0x2a, 0x10, 0x0], "beq t1, t2, 0x1000"); 30 | } 31 | 32 | #[test] 33 | fn sll() { 34 | test_display(&[0x0, 0xa, 0x4c, 0x80], "sll t1, t2, 0x12"); 35 | } 36 | 37 | #[test] 38 | fn sllv() { 39 | test_display(&[0x1, 0x49, 0x48, 0x4], "sllv t1, t1, t2"); 40 | } 41 | 42 | #[test] 43 | fn lb() { 44 | test_display(&[0x81, 0x49, 0x0, 0x10], "lb t1, t2, 0x10"); 45 | } 46 | -------------------------------------------------------------------------------- /decoder-riscv/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "riscv" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | decoder = { path = "../decoder" } 8 | tokenizing = { path = "../tokenizing" } 9 | debugvault = { path = "../debugvault" } 10 | config = { path = "../config" } 11 | once_cell = { workspace = true } 12 | 13 | [dev-dependencies] 14 | crc = "3.0" 15 | object = { workspace = true } 16 | -------------------------------------------------------------------------------- /decoder-x86_64/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "x86_64" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | decoder = { path = "../decoder" } 8 | tokenizing = { path = "../tokenizing" } 9 | debugvault = { path = "../debugvault" } 10 | config = { path = "../config" } 11 | -------------------------------------------------------------------------------- /decoder-x86_64/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # `yaxpeax-x86`, a decoder for x86-family instruction sets 2 | //! 3 | //! `yaxpeax-x86` provides x86 decoders, for 64-bit and 32-bit modes. 4 | //! 5 | //! instructions, operands, registers, and generally all decoding structures, are in their mode's 6 | //! respective submodule: 7 | //! * `x86_64`/`amd64` decoding is under [`long_mode`] 8 | //! * `x86_32`/`x86` decoding is under [`protected_mode`] 9 | 10 | pub mod long_mode; 11 | pub mod protected_mode; 12 | mod safer_unchecked; 13 | 14 | use debugvault::Index; 15 | use tokenizing::TokenStream; 16 | use config::CONFIG; 17 | 18 | const MEM_SIZE_STRINGS: [&str; 64] = [ 19 | "byte ", "word ", "BUG ", "dword ", "ptr ", "far ", "BUG ", "qword ", "BUG ", "mword ", "BUG ", 20 | "BUG ", "BUG ", "BUG ", "BUG ", "xmmword ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", 21 | "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "ymmword ", "BUG ", 22 | "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", 23 | "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", 24 | "BUG ", "BUG ", "BUG ", "BUG ", "BUG ", "ptr ", "zmmword ", 25 | ]; 26 | 27 | struct Number(i32); 28 | 29 | impl decoder::ToTokens for Number { 30 | fn tokenize(&self, stream: &mut TokenStream, _: &Index) { 31 | if self.0 == i32::MIN { 32 | stream.push(" - ", CONFIG.colors.asm.expr); 33 | stream.push("0x7fffffff", CONFIG.colors.asm.immediate); 34 | } else if self.0 < 0 { 35 | stream.push(" - ", CONFIG.colors.asm.expr); 36 | stream.push_owned(decoder::encode_hex(-self.0 as i64), CONFIG.colors.asm.immediate); 37 | } else { 38 | stream.push(" + ", CONFIG.colors.asm.expr); 39 | stream.push_owned(decoder::encode_hex(self.0 as i64), CONFIG.colors.asm.immediate); 40 | } 41 | } 42 | } 43 | 44 | pub struct MemoryAccessSize { 45 | size: u8, 46 | } 47 | 48 | impl MemoryAccessSize { 49 | /// return the number of bytes referenced by this memory access. 50 | /// 51 | /// if the number of bytes cannot be confidently known by the instruction in isolation (as is 52 | /// the case for `xsave`/`xrstor`-style "operate on all processor state" instructions), this 53 | /// function will return `None`. 54 | pub fn bytes_size(&self) -> Option { 55 | if self.size == 63 { 56 | None 57 | } else { 58 | Some(self.size) 59 | } 60 | } 61 | 62 | /// a human-friendly label for the number of bytes this memory access references. 63 | /// 64 | /// there are some differences from size names that may be expected elsewhere; `yaxpeax-x86` 65 | /// prefers to use consistent names for a width even if the way those bytes are used varies. 66 | /// 67 | /// the sizes `yaxpeax-x86` knows are as follows: 68 | /// | size (bytes) | name | 69 | /// |--------------|------------| 70 | /// | 1 | `byte` | 71 | /// | 2 | `word` | 72 | /// | 4 | `dword` | 73 | /// | 6 | `far` | 74 | /// | 8 | `qword` | 75 | /// | 10 | `mword` | 76 | /// | 16 | `xmmword` | 77 | /// | 32 | `ymmword` | 78 | /// | 64 | `zmmword` | 79 | /// | variable | `ptr` | 80 | /// 81 | /// "mword" refers to an mmx-sized access - 80 bits, or 10 bytes. `mword` is also used for 82 | /// 64-bit far calls, because they reference a contiguous ten bytes; two bytes of segment 83 | /// selector and eight bytes of address. 84 | /// 85 | /// "variable" accesses access a number of bytes dependent on the physical processor and its 86 | /// operating mode. this is particularly relevant for `xsave`/`xrstor`-style instructions. 87 | pub fn size_name(&self) -> &'static str { 88 | MEM_SIZE_STRINGS[self.size as usize - 1] 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /decoder-x86_64/src/long_mode/tests/opcode.rs: -------------------------------------------------------------------------------- 1 | use crate::long_mode::{ConditionCode, Opcode}; 2 | 3 | #[test] 4 | fn conditional_instructions() { 5 | const JCC: &'static [(Opcode, ConditionCode); 16] = &[ 6 | (Opcode::JO, ConditionCode::O), 7 | (Opcode::JNO, ConditionCode::NO), 8 | (Opcode::JB, ConditionCode::B), 9 | (Opcode::JNB, ConditionCode::AE), 10 | (Opcode::JZ, ConditionCode::Z), 11 | (Opcode::JNZ, ConditionCode::NZ), 12 | (Opcode::JA, ConditionCode::A), 13 | (Opcode::JNA, ConditionCode::BE), 14 | (Opcode::JS, ConditionCode::S), 15 | (Opcode::JNS, ConditionCode::NS), 16 | (Opcode::JP, ConditionCode::P), 17 | (Opcode::JNP, ConditionCode::NP), 18 | (Opcode::JL, ConditionCode::L), 19 | (Opcode::JGE, ConditionCode::GE), 20 | (Opcode::JG, ConditionCode::G), 21 | (Opcode::JLE, ConditionCode::LE), 22 | ]; 23 | for (opc, cond) in JCC.iter() { 24 | assert!(opc.is_jcc()); 25 | assert!(!opc.is_setcc()); 26 | assert!(!opc.is_cmovcc()); 27 | assert_eq!(opc.condition(), Some(*cond)); 28 | } 29 | 30 | const SETCC: &'static [(Opcode, ConditionCode); 16] = &[ 31 | (Opcode::SETO, ConditionCode::O), 32 | (Opcode::SETNO, ConditionCode::NO), 33 | (Opcode::SETB, ConditionCode::B), 34 | (Opcode::SETAE, ConditionCode::AE), 35 | (Opcode::SETZ, ConditionCode::Z), 36 | (Opcode::SETNZ, ConditionCode::NZ), 37 | (Opcode::SETA, ConditionCode::A), 38 | (Opcode::SETBE, ConditionCode::BE), 39 | (Opcode::SETS, ConditionCode::S), 40 | (Opcode::SETNS, ConditionCode::NS), 41 | (Opcode::SETP, ConditionCode::P), 42 | (Opcode::SETNP, ConditionCode::NP), 43 | (Opcode::SETL, ConditionCode::L), 44 | (Opcode::SETGE, ConditionCode::GE), 45 | (Opcode::SETG, ConditionCode::G), 46 | (Opcode::SETLE, ConditionCode::LE), 47 | ]; 48 | for (opc, cond) in SETCC.iter() { 49 | assert!(!opc.is_jcc()); 50 | assert!(opc.is_setcc()); 51 | assert!(!opc.is_cmovcc()); 52 | assert_eq!(opc.condition(), Some(*cond)); 53 | } 54 | 55 | const CMOVCC: &'static [(Opcode, ConditionCode); 16] = &[ 56 | (Opcode::CMOVO, ConditionCode::O), 57 | (Opcode::CMOVNO, ConditionCode::NO), 58 | (Opcode::CMOVB, ConditionCode::B), 59 | (Opcode::CMOVNB, ConditionCode::AE), 60 | (Opcode::CMOVZ, ConditionCode::Z), 61 | (Opcode::CMOVNZ, ConditionCode::NZ), 62 | (Opcode::CMOVA, ConditionCode::A), 63 | (Opcode::CMOVNA, ConditionCode::BE), 64 | (Opcode::CMOVS, ConditionCode::S), 65 | (Opcode::CMOVNS, ConditionCode::NS), 66 | (Opcode::CMOVP, ConditionCode::P), 67 | (Opcode::CMOVNP, ConditionCode::NP), 68 | (Opcode::CMOVL, ConditionCode::L), 69 | (Opcode::CMOVGE, ConditionCode::GE), 70 | (Opcode::CMOVG, ConditionCode::G), 71 | (Opcode::CMOVLE, ConditionCode::LE), 72 | ]; 73 | for (opc, cond) in CMOVCC.iter() { 74 | assert!(!opc.is_jcc()); 75 | assert!(!opc.is_setcc()); 76 | assert!(opc.is_cmovcc()); 77 | assert_eq!(opc.condition(), Some(*cond)); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /decoder-x86_64/src/long_mode/tests/operand.rs: -------------------------------------------------------------------------------- 1 | use crate::long_mode::{Decoder, Operand, RegSpec}; 2 | use crate::MemoryAccessSize; 3 | 4 | #[test] 5 | fn register_widths() { 6 | assert_eq!(Operand::Register(RegSpec::rsp()).width(), Some(8)); 7 | assert_eq!(Operand::Register(RegSpec::esp()).width(), Some(4)); 8 | assert_eq!(Operand::Register(RegSpec::sp()).width(), Some(2)); 9 | assert_eq!(Operand::Register(RegSpec::cl()).width(), Some(1)); 10 | assert_eq!(Operand::Register(RegSpec::ch()).width(), Some(1)); 11 | assert_eq!(Operand::Register(RegSpec::gs()).width(), Some(2)); 12 | } 13 | 14 | #[test] 15 | fn memory_widths() { 16 | // the register operand directly doesn't report a size - it comes from the `Instruction` for 17 | // which this is an operand. 18 | assert_eq!(Operand::RegDeref(RegSpec::rsp()).width(), None); 19 | 20 | fn mem_size_of(data: &[u8]) -> MemoryAccessSize { 21 | let decoder = Decoder::default(); 22 | decoder.decode_slice(data).unwrap().mem_size().unwrap() 23 | } 24 | 25 | // and checking the memory size direcly reports correct names 26 | assert_eq!(mem_size_of(&[0x32, 0x00]).size_name(), "byte "); 27 | assert_eq!(mem_size_of(&[0x66, 0x33, 0x00]).size_name(), "word "); 28 | assert_eq!(mem_size_of(&[0x33, 0x00]).size_name(), "dword "); 29 | assert_eq!(mem_size_of(&[0x48, 0x33, 0x00]).size_name(), "qword "); 30 | } 31 | 32 | #[test] 33 | fn test_implied_memory_width() { 34 | fn mem_size_of(data: &[u8]) -> Option { 35 | let decoder = Decoder::default(); 36 | decoder.decode_slice(data).unwrap().mem_size().unwrap().bytes_size() 37 | } 38 | 39 | // test push, pop, call, and ret 40 | assert_eq!(mem_size_of(&[0xc3]), Some(8)); 41 | assert_eq!(mem_size_of(&[0xe8, 0x11, 0x22, 0x33, 0x44]), Some(8)); 42 | assert_eq!(mem_size_of(&[0x50]), Some(8)); 43 | assert_eq!(mem_size_of(&[0x58]), Some(8)); 44 | assert_eq!(mem_size_of(&[0x66, 0x50]), Some(8)); 45 | assert_eq!(mem_size_of(&[0x66, 0x58]), Some(8)); 46 | assert_eq!(mem_size_of(&[0xff, 0xf0]), Some(8)); 47 | assert_eq!(mem_size_of(&[0x66, 0xff, 0xf0]), Some(2)); 48 | // operand-size prefixed call and jump still reads 8 bytes (prefix ignored) 49 | assert_eq!(mem_size_of(&[0x66, 0xff, 0x10]), Some(8)); 50 | assert_eq!(mem_size_of(&[0x66, 0xff, 0x20]), Some(8)); 51 | } 52 | -------------------------------------------------------------------------------- /decoder-x86_64/src/long_mode/tests/regspec.rs: -------------------------------------------------------------------------------- 1 | use crate::long_mode::{register_class, RegSpec}; 2 | use std::collections::{BTreeMap, HashMap}; 3 | 4 | #[test] 5 | fn test_ord() { 6 | let _: BTreeMap = BTreeMap::new(); 7 | } 8 | 9 | #[test] 10 | fn test_hash() { 11 | let _: HashMap = HashMap::new(); 12 | } 13 | 14 | #[test] 15 | fn test_labels() { 16 | assert_eq!(RegSpec::rip().name(), "rip"); 17 | assert_eq!(RegSpec::eip().name(), "eip"); 18 | assert_eq!(RegSpec::rflags().name(), "rflags"); 19 | assert_eq!(RegSpec::rbp().name(), "rbp"); 20 | assert_eq!(RegSpec::gs().name(), "gs"); 21 | assert_eq!(RegSpec::al().name(), "al"); 22 | } 23 | 24 | #[test] 25 | fn test_bank_names() { 26 | assert_eq!(RegSpec::al().class().name(), "byte"); 27 | assert_eq!(RegSpec::r8b().class().name(), "rex-byte"); 28 | assert_eq!(RegSpec::ax().class().name(), "word"); 29 | assert_eq!(RegSpec::eax().class().name(), "dword"); 30 | assert_eq!(RegSpec::rax().class().name(), "qword"); 31 | assert_eq!(RegSpec::fs().class().name(), "segment"); 32 | assert_eq!(RegSpec::eflags().class().name(), "eflags"); 33 | assert_eq!(RegSpec::rflags().class().name(), "rflags"); 34 | assert_eq!(RegSpec::eip().class().name(), "eip"); 35 | assert_eq!(RegSpec::rip().class().name(), "rip"); 36 | assert_eq!(RegSpec::st0().class().name(), "x87-stack"); 37 | assert_eq!(RegSpec::mm0().class().name(), "mmx"); 38 | assert_eq!(RegSpec::xmm0().class().name(), "xmm"); 39 | assert_eq!(RegSpec::ymm0().class().name(), "ymm"); 40 | assert_eq!(RegSpec::zmm0().class().name(), "zmm"); 41 | } 42 | 43 | // this should compile. 44 | #[test] 45 | fn match_bank_kind() { 46 | match RegSpec::al().class() { 47 | register_class::X => { 48 | panic!("al is an xmm register? don't think so"); 49 | } 50 | register_class::B => { 51 | println!("al is a byte register"); 52 | } 53 | other => { 54 | panic!("unknown register kind: {:?}", other); 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /decoder-x86_64/src/long_mode/uarch.rs: -------------------------------------------------------------------------------- 1 | pub mod amd { 2 | //! most information about instruction set extensions for microarchitectures here was sourced 3 | //! from 4 | //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://docs.rs/yaxpeax-x86/0.0.12/x86_64/protected_mode/uarch/intel/index.html) 5 | //! and 6 | //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://docs.rs/yaxpeax-x86/0.0.12/x86_64/protected_mode/uarch/intel/index.html). 7 | //! these mappings are best-effort but fairly unused, so a critical eye should be kept towards 8 | //! these decoders rejecting instructions they should not, or incorrectly accepting 9 | //! instructions. 10 | //! 11 | //! microarchitectures as defined here are with respect to flags reported by CPUID. notably, 12 | //! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension 13 | //! reportedly function correctly (agner p217). 14 | //! 15 | //! [agner](https://www.agner.org/optimize/microarchitecture.pdf) 16 | //! as retrieved 2020 may 19, 17 | //! `sha256: 87ff152ae18c017dcbfb9f7ee6e88a9f971f6250fd15a70a3dd87c3546323bd5` 18 | 19 | use crate::long_mode::Decoder; 20 | 21 | /// `k8` was the first AMD microarchitecture to implement x86_64, launched in 2003. while later 22 | /// `k8`-based processors supported SSE3, these predefined decoders pick the lower end of 23 | /// support - SSE2 and no later. 24 | pub fn k8() -> Decoder { 25 | Decoder::minimal() 26 | } 27 | 28 | /// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through 29 | /// to SSE4.2a, as well as consistent `cmov` support, among other features. 30 | pub fn k10() -> Decoder { 31 | k8().with_cmov() 32 | .with_cmpxchg16b() 33 | .with_svm() 34 | .with_abm() 35 | .with_lahfsahf() 36 | .with_sse3() 37 | .with_ssse3() 38 | .with_sse4() 39 | .with_sse4_2() 40 | .with_sse4a() 41 | } 42 | 43 | /// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX 44 | /// support among other extensions, and are notable for including `AESNI`. 45 | pub fn bulldozer() -> Decoder { 46 | k10() 47 | .with_bmi1() 48 | .with_aesni() 49 | .with_pclmulqdq() 50 | .with_f16c() 51 | .with_avx() 52 | .with_fma4() 53 | .with_xop() 54 | } 55 | 56 | /// `Piledriver` was the successor to `Bulldozer`, launched in 2012. 57 | pub fn piledriver() -> Decoder { 58 | bulldozer().with_tbm().with_fma3().with_fma4() 59 | } 60 | 61 | /// `Steamroller` was the successor to `Piledriver`, launched in 2014. unlike `Piledriver` 62 | /// cores, these cores do not support `TBM` or `FMA3`. 63 | pub fn steamroller() -> Decoder { 64 | bulldozer() 65 | } 66 | 67 | /// `Excavator` was the successor to `Steamroller`, launched in 2015. 68 | pub fn excavator() -> Decoder { 69 | steamroller() 70 | .with_movbe() 71 | .with_bmi2() 72 | .with_rdrand() 73 | .with_avx() 74 | .with_xop() 75 | .with_bmi2() 76 | .with_sha() 77 | .with_rdrand() 78 | .with_avx2() 79 | } 80 | 81 | /// `Zen` was the successor to `Excavator`, launched in 2017. `Zen` cores extend SIMD 82 | /// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX, 83 | /// SHA, RDSEED, and other extensions. 84 | pub fn zen() -> Decoder { 85 | k10() 86 | .with_avx() 87 | .with_avx2() 88 | .with_bmi1() 89 | .with_aesni() 90 | .with_pclmulqdq() 91 | .with_f16c() 92 | .with_movbe() 93 | .with_bmi2() 94 | .with_rdrand() 95 | .with_adx() 96 | .with_sha() 97 | .with_rdseed() 98 | .with_fma3() 99 | // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO? 100 | } 101 | } 102 | 103 | pub mod intel { 104 | //! sourced by walking wikipedia pages. seriously! this stuff is kinda hard to figure out! 105 | 106 | use crate::long_mode::Decoder; 107 | 108 | /// `Netburst` was the first Intel microarchitecture to implement x86_64, beginning with the 109 | /// `Prescott` family launched in 2004. while the wider `Netburst` family launched in 2000 110 | /// with only SSE2, the first `x86_64`-supporting incarnation was `Prescott` which indeed 111 | /// included SSE3. 112 | pub fn netburst() -> Decoder { 113 | Decoder::minimal().with_cmov().with_cmpxchg16b().with_sse3() 114 | } 115 | 116 | /// `Core` was the successor to `Netburst`, launched in 2006. it included up to SSE4, with 117 | /// processors using this architecture shipped under the names "Merom", "Conroe", and 118 | /// "Woodcrest", for mobile, desktop, and server processors respectively. not to be confused 119 | /// with the later `Nehalem` microarchitecture that introduced the `Core i*` product lines, 120 | /// `Core 2 *` processors used the `Core` architecture. 121 | pub fn core() -> Decoder { 122 | netburst().with_ssse3().with_sse4() 123 | } 124 | 125 | /// `Penryn` was the successor to `Core`, launched in early 2008. it added SSE4.1, along with 126 | /// virtualization extensions. 127 | pub fn penryn() -> Decoder { 128 | core().with_sse4_1() 129 | } 130 | 131 | /// `Nehalem` was the successor to `Penryn`, launched in late 2008. not to be confused with the 132 | /// earlier `Core` microarchitecture, the `Core i*` products were based on `Nehalem` cores. 133 | /// `Nehalem` added SSE4.2 extensions, along with the `POPCNT` instruction. 134 | pub fn nehalem() -> Decoder { 135 | penryn().with_sse4_2().with_popcnt() 136 | } 137 | 138 | /// `Westmere` was the successor to `Nehalem`, launched in 2010. it added AES-NI and CLMUL 139 | /// extensions. 140 | pub fn westmere() -> Decoder { 141 | nehalem().with_aesni().with_pclmulqdq() 142 | } 143 | 144 | /// `Sandy Bridge` was the successor to `Westmere`, launched in 2011. it added AVX 145 | /// instructions. 146 | pub fn sandybridge() -> Decoder { 147 | westmere().with_avx() 148 | } 149 | 150 | /// `Ivy Bridge` was the successor to `Sandy Bridge`, launched in 2012. it added F16C 151 | /// extensions for 16-bit floating point conversion, and the RDRAND instruction. 152 | pub fn ivybridge() -> Decoder { 153 | sandybridge().with_f16c().with_rdrand() 154 | } 155 | 156 | /// `Haswell` was the successor to `Ivy Bridge`, launched in 2013. it added several instruction 157 | /// set extensions: AVX2, BMI1, BMI2, ABM, and FMA3. 158 | pub fn haswell() -> Decoder { 159 | ivybridge().with_bmi1().with_bmi2().with_abm().with_fma3().with_avx2() 160 | } 161 | 162 | /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores 163 | /// were shipped as `E7-48xx/E7-88xx v3` models of processors. 164 | pub fn haswell_ex() -> Decoder { 165 | haswell().with_tsx() 166 | } 167 | 168 | /// `Broadwell` was the successor to `Haswell`, launched in late 2014. it added ADX, RDSEED, 169 | /// and PREFETCHW, as well as broadly rolling out TSX. TSX is enabled on this decoder because 170 | /// some chips of this microarchitecture rolled out with TSX, and lack of TSX seems to be 171 | /// reported as an errata (for example, the `Broadwell-Y` line of parts). 172 | pub fn broadwell() -> Decoder { 173 | haswell_ex().with_adx().with_rdseed().with_prefetchw() 174 | } 175 | 176 | /// `Skylake` was the successor to `Broadwell`, launched in mid 2015. it added MPX and SGX 177 | /// extensions, as well as a mixed rollout of AVX512 in different subsets for different product 178 | /// lines. 179 | /// 180 | /// AVX512 is not enabled on this decoder by default because there doesn't seem to be a lowest 181 | /// common denominator: if you want a `Skylake` decoder with AVX512, something like the 182 | /// following: 183 | /// ``` 184 | /// x86_64::long_mode::uarch::intel::skylake() 185 | /// .with_avx512_f() 186 | /// .with_avx512_dq(); 187 | /// ``` 188 | /// is likely your best option. 189 | pub fn skylake() -> Decoder { 190 | broadwell().with_mpx().with_sgx() 191 | } 192 | 193 | /// `Kaby Lake` was the successor to `Sky Lake`, launched in 2016. it adds no extensions to 194 | /// x86_64 implementation beyond `skylake`. 195 | pub fn kabylake() -> Decoder { 196 | skylake() 197 | } 198 | // ice lake is shipping so that should probably be included... 199 | } 200 | -------------------------------------------------------------------------------- /decoder-x86_64/src/protected_mode/tests/opcode.rs: -------------------------------------------------------------------------------- 1 | use crate::protected_mode::{ConditionCode, Opcode}; 2 | 3 | #[test] 4 | fn conditional_instructions() { 5 | const JCC: &'static [(Opcode, ConditionCode); 16] = &[ 6 | (Opcode::JO, ConditionCode::O), 7 | (Opcode::JNO, ConditionCode::NO), 8 | (Opcode::JB, ConditionCode::B), 9 | (Opcode::JNB, ConditionCode::AE), 10 | (Opcode::JZ, ConditionCode::Z), 11 | (Opcode::JNZ, ConditionCode::NZ), 12 | (Opcode::JA, ConditionCode::A), 13 | (Opcode::JNA, ConditionCode::BE), 14 | (Opcode::JS, ConditionCode::S), 15 | (Opcode::JNS, ConditionCode::NS), 16 | (Opcode::JP, ConditionCode::P), 17 | (Opcode::JNP, ConditionCode::NP), 18 | (Opcode::JL, ConditionCode::L), 19 | (Opcode::JGE, ConditionCode::GE), 20 | (Opcode::JG, ConditionCode::G), 21 | (Opcode::JLE, ConditionCode::LE), 22 | ]; 23 | for (opc, cond) in JCC.iter() { 24 | assert!(opc.is_jcc()); 25 | assert!(!opc.is_setcc()); 26 | assert!(!opc.is_cmovcc()); 27 | assert_eq!(opc.condition(), Some(*cond)); 28 | } 29 | 30 | const SETCC: &'static [(Opcode, ConditionCode); 16] = &[ 31 | (Opcode::SETO, ConditionCode::O), 32 | (Opcode::SETNO, ConditionCode::NO), 33 | (Opcode::SETB, ConditionCode::B), 34 | (Opcode::SETAE, ConditionCode::AE), 35 | (Opcode::SETZ, ConditionCode::Z), 36 | (Opcode::SETNZ, ConditionCode::NZ), 37 | (Opcode::SETA, ConditionCode::A), 38 | (Opcode::SETBE, ConditionCode::BE), 39 | (Opcode::SETS, ConditionCode::S), 40 | (Opcode::SETNS, ConditionCode::NS), 41 | (Opcode::SETP, ConditionCode::P), 42 | (Opcode::SETNP, ConditionCode::NP), 43 | (Opcode::SETL, ConditionCode::L), 44 | (Opcode::SETGE, ConditionCode::GE), 45 | (Opcode::SETG, ConditionCode::G), 46 | (Opcode::SETLE, ConditionCode::LE), 47 | ]; 48 | for (opc, cond) in SETCC.iter() { 49 | assert!(!opc.is_jcc()); 50 | assert!(opc.is_setcc()); 51 | assert!(!opc.is_cmovcc()); 52 | assert_eq!(opc.condition(), Some(*cond)); 53 | } 54 | 55 | const CMOVCC: &'static [(Opcode, ConditionCode); 16] = &[ 56 | (Opcode::CMOVO, ConditionCode::O), 57 | (Opcode::CMOVNO, ConditionCode::NO), 58 | (Opcode::CMOVB, ConditionCode::B), 59 | (Opcode::CMOVNB, ConditionCode::AE), 60 | (Opcode::CMOVZ, ConditionCode::Z), 61 | (Opcode::CMOVNZ, ConditionCode::NZ), 62 | (Opcode::CMOVA, ConditionCode::A), 63 | (Opcode::CMOVNA, ConditionCode::BE), 64 | (Opcode::CMOVS, ConditionCode::S), 65 | (Opcode::CMOVNS, ConditionCode::NS), 66 | (Opcode::CMOVP, ConditionCode::P), 67 | (Opcode::CMOVNP, ConditionCode::NP), 68 | (Opcode::CMOVL, ConditionCode::L), 69 | (Opcode::CMOVGE, ConditionCode::GE), 70 | (Opcode::CMOVG, ConditionCode::G), 71 | (Opcode::CMOVLE, ConditionCode::LE), 72 | ]; 73 | for (opc, cond) in CMOVCC.iter() { 74 | assert!(!opc.is_jcc()); 75 | assert!(!opc.is_setcc()); 76 | assert!(opc.is_cmovcc()); 77 | assert_eq!(opc.condition(), Some(*cond)); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /decoder-x86_64/src/protected_mode/tests/operand.rs: -------------------------------------------------------------------------------- 1 | use crate::protected_mode::{Decoder, Operand, RegSpec}; 2 | use crate::MemoryAccessSize; 3 | 4 | #[test] 5 | fn register_widths() { 6 | assert_eq!(Operand::Register(RegSpec::esp()).width(), Some(4)); 7 | assert_eq!(Operand::Register(RegSpec::sp()).width(), Some(2)); 8 | assert_eq!(Operand::Register(RegSpec::cl()).width(), Some(1)); 9 | assert_eq!(Operand::Register(RegSpec::ch()).width(), Some(1)); 10 | assert_eq!(Operand::Register(RegSpec::gs()).width(), Some(2)); 11 | } 12 | 13 | #[test] 14 | fn memory_widths() { 15 | // the register operand directly doesn't report a size - it comes from the `Instruction` for 16 | // which this is an operand. 17 | assert_eq!(Operand::RegDeref(RegSpec::esp()).width(), None); 18 | 19 | fn mem_size_of(data: &[u8]) -> MemoryAccessSize { 20 | let decoder = Decoder::default(); 21 | decoder.decode_slice(data).unwrap().mem_size().unwrap() 22 | } 23 | 24 | // and checking the memory size direcly reports correct names 25 | assert_eq!(mem_size_of(&[0x32, 0x00]).size_name(), "byte "); 26 | assert_eq!(mem_size_of(&[0x66, 0x33, 0x00]).size_name(), "word "); 27 | assert_eq!(mem_size_of(&[0x33, 0x00]).size_name(), "dword "); 28 | } 29 | 30 | #[test] 31 | fn test_implied_memory_width() { 32 | fn mem_size_of(data: &[u8]) -> Option { 33 | let decoder = Decoder::default(); 34 | decoder.decode_slice(data).unwrap().mem_size().unwrap().bytes_size() 35 | } 36 | 37 | // test push, pop, call, and ret 38 | assert_eq!(mem_size_of(&[0xc3]), Some(4)); 39 | assert_eq!(mem_size_of(&[0xe8, 0x11, 0x22, 0x33, 0x44]), Some(4)); 40 | assert_eq!(mem_size_of(&[0x50]), Some(4)); 41 | assert_eq!(mem_size_of(&[0x58]), Some(4)); 42 | assert_eq!(mem_size_of(&[0x66, 0x50]), Some(4)); 43 | assert_eq!(mem_size_of(&[0x66, 0x58]), Some(4)); 44 | assert_eq!(mem_size_of(&[0xff, 0xf0]), Some(4)); 45 | assert_eq!(mem_size_of(&[0x66, 0xff, 0xf0]), Some(2)); 46 | // unlike 64-bit mode, operand-size prefixed call and jump do have a different size: they read 47 | // two bytes. 48 | assert_eq!(mem_size_of(&[0x66, 0xff, 0x10]), Some(2)); 49 | assert_eq!(mem_size_of(&[0x66, 0xff, 0x20]), Some(2)); 50 | } 51 | -------------------------------------------------------------------------------- /decoder-x86_64/src/protected_mode/tests/regspec.rs: -------------------------------------------------------------------------------- 1 | use crate::protected_mode::{register_class, RegSpec}; 2 | use std::collections::{BTreeMap, HashMap}; 3 | 4 | #[test] 5 | fn test_ord() { 6 | let _: BTreeMap = BTreeMap::new(); 7 | } 8 | 9 | #[test] 10 | fn test_hash() { 11 | let _: HashMap = HashMap::new(); 12 | } 13 | 14 | #[test] 15 | fn test_labels() { 16 | assert_eq!(RegSpec::eip().name(), "eip"); 17 | assert_eq!(RegSpec::ebp().name(), "ebp"); 18 | assert_eq!(RegSpec::gs().name(), "gs"); 19 | assert_eq!(RegSpec::al().name(), "al"); 20 | } 21 | 22 | #[test] 23 | fn test_bank_names() { 24 | assert_eq!(RegSpec::al().class().name(), "byte"); 25 | assert_eq!(RegSpec::ax().class().name(), "word"); 26 | assert_eq!(RegSpec::eax().class().name(), "dword"); 27 | assert_eq!(RegSpec::fs().class().name(), "segment"); 28 | assert_eq!(RegSpec::eflags().class().name(), "eflags"); 29 | assert_eq!(RegSpec::eip().class().name(), "eip"); 30 | assert_eq!(RegSpec::st0().class().name(), "x87-stack"); 31 | assert_eq!(RegSpec::mm0().class().name(), "mmx"); 32 | assert_eq!(RegSpec::xmm0().class().name(), "xmm"); 33 | assert_eq!(RegSpec::ymm0().class().name(), "ymm"); 34 | assert_eq!(RegSpec::zmm0().class().name(), "zmm"); 35 | } 36 | 37 | // this should compile. 38 | #[test] 39 | fn match_bank_kind() { 40 | match RegSpec::al().class() { 41 | register_class::X => { 42 | panic!("al is an xmm register? don't think so"); 43 | } 44 | register_class::B => { 45 | println!("al is a byte register"); 46 | } 47 | other => { 48 | panic!("unknown register kind: {:?}", other); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /decoder-x86_64/src/protected_mode/uarch.rs: -------------------------------------------------------------------------------- 1 | pub mod amd { 2 | //! most information about instruction set extensions for microarchitectures here was sourced 3 | //! from 4 | //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://docs.rs/yaxpeax-x86/0.0.12/x86_64/protected_mode/uarch/intel/index.html) 5 | //! and 6 | //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://docs.rs/yaxpeax-x86/0.0.12/x86_64/protected_mode/uarch/intel/index.html). 7 | //! these mappings are best-effort but fairly unused, so a critical eye should be kept towards 8 | //! these decoders rejecting instructions they should not, or incorrectly accepting 9 | //! instructions. 10 | //! 11 | //! microarchitectures as defined here are with respect to flags reported by CPUID. notably, 12 | //! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension 13 | //! reportedly function correctly (agner p217). 14 | //! 15 | //! [agner](https://www.agner.org/optimize/microarchitecture.pdf) 16 | //! as retrieved 2020 may 19, 17 | //! `sha256: 87ff152ae18c017dcbfb9f7ee6e88a9f971f6250fd15a70a3dd87c3546323bd5` 18 | 19 | use crate::protected_mode::Decoder; 20 | 21 | /// `k8` was the first AMD microarchitecture to implement x86_64, launched in 2003. while later 22 | /// `k8`-based processors supported SSE3, these predefined decoders pick the lower end of 23 | /// support - SSE2 and no later. 24 | pub fn k8() -> Decoder { 25 | Decoder::minimal() 26 | } 27 | 28 | /// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through 29 | /// to SSE4.2a, as well as consistent `cmov` support, among other features. 30 | pub fn k10() -> Decoder { 31 | k8().with_cmov() 32 | .with_cmpxchg16b() 33 | .with_svm() 34 | .with_abm() 35 | .with_lahfsahf() 36 | .with_sse3() 37 | .with_ssse3() 38 | .with_sse4() 39 | .with_sse4_2() 40 | .with_sse4a() 41 | } 42 | 43 | /// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX 44 | /// support among other extensions, and are notable for including `AESNI`. 45 | pub fn bulldozer() -> Decoder { 46 | k10() 47 | .with_bmi1() 48 | .with_aesni() 49 | .with_pclmulqdq() 50 | .with_f16c() 51 | .with_avx() 52 | .with_fma4() 53 | .with_xop() 54 | } 55 | 56 | /// `Piledriver` was the successor to `Bulldozer`, launched in 2012. 57 | pub fn piledriver() -> Decoder { 58 | bulldozer().with_tbm().with_fma3().with_fma4() 59 | } 60 | 61 | /// `Steamroller` was the successor to `Piledriver`, launched in 2014. unlike `Piledriver` 62 | /// cores, these cores do not support `TBM` or `FMA3`. 63 | pub fn steamroller() -> Decoder { 64 | bulldozer() 65 | } 66 | 67 | /// `Excavator` was the successor to `Steamroller`, launched in 2015. 68 | pub fn excavator() -> Decoder { 69 | steamroller() 70 | .with_movbe() 71 | .with_bmi2() 72 | .with_rdrand() 73 | .with_avx() 74 | .with_xop() 75 | .with_bmi2() 76 | .with_sha() 77 | .with_rdrand() 78 | .with_avx2() 79 | } 80 | 81 | /// `Zen` was the successor to `Excavator`, launched in 2017. `Zen` cores extend SIMD 82 | /// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX, 83 | /// SHA, RDSEED, and other extensions. 84 | pub fn zen() -> Decoder { 85 | k10() 86 | .with_avx() 87 | .with_avx2() 88 | .with_bmi1() 89 | .with_aesni() 90 | .with_pclmulqdq() 91 | .with_f16c() 92 | .with_movbe() 93 | .with_bmi2() 94 | .with_rdrand() 95 | .with_adx() 96 | .with_sha() 97 | .with_rdseed() 98 | .with_fma3() 99 | // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO? 100 | } 101 | } 102 | 103 | pub mod intel { 104 | //! sourced by walking wikipedia pages. seriously! this stuff is kinda hard to figure out! 105 | 106 | use crate::protected_mode::Decoder; 107 | 108 | /// `Netburst` was the first Intel microarchitecture to implement x86_64, beginning with the 109 | /// `Prescott` family launched in 2004. while the wider `Netburst` family launched in 2000 110 | /// with only SSE2, the first `x86_64`-supporting incarnation was `Prescott` which indeed 111 | /// included SSE3. 112 | pub fn netburst() -> Decoder { 113 | Decoder::minimal().with_cmov().with_sse3() 114 | } 115 | 116 | /// `Core` was the successor to `Netburst`, launched in 2006. it included up to SSE4, with 117 | /// processors using this architecture shipped under the names "Merom", "Conroe", and 118 | /// "Woodcrest", for mobile, desktop, and server processors respectively. not to be confused 119 | /// with the later `Nehalem` microarchitecture that introduced the `Core i*` product lines, 120 | /// `Core 2 *` processors used the `Core` architecture. 121 | pub fn core() -> Decoder { 122 | netburst().with_ssse3().with_sse4() 123 | } 124 | 125 | /// `Penryn` was the successor to `Core`, launched in early 2008. it added SSE4.1, along with 126 | /// virtualization extensions. 127 | pub fn penryn() -> Decoder { 128 | core().with_sse4_1() 129 | } 130 | 131 | /// `Nehalem` was the successor to `Penryn`, launched in late 2008. not to be confused with the 132 | /// earlier `Core` microarchitecture, the `Core i*` products were based on `Nehalem` cores. 133 | /// `Nehalem` added SSE4.2 extensions, along with the `POPCNT` instruction. 134 | pub fn nehalem() -> Decoder { 135 | penryn().with_sse4_2().with_popcnt() 136 | } 137 | 138 | /// `Westmere` was the successor to `Nehalem`, launched in 2010. it added AES-NI and CLMUL 139 | /// extensions. 140 | pub fn westmere() -> Decoder { 141 | nehalem().with_aesni().with_pclmulqdq() 142 | } 143 | 144 | /// `Sandy Bridge` was the successor to `Westmere`, launched in 2011. it added AVX 145 | /// instructions. 146 | pub fn sandybridge() -> Decoder { 147 | westmere().with_avx() 148 | } 149 | 150 | /// `Ivy Bridge` was the successor to `Sandy Bridge`, launched in 2012. it added F16C 151 | /// extensions for 16-bit floating point conversion, and the RDRAND instruction. 152 | pub fn ivybridge() -> Decoder { 153 | sandybridge().with_f16c().with_rdrand() 154 | } 155 | 156 | /// `Haswell` was the successor to `Ivy Bridge`, launched in 2013. it added several instruction 157 | /// set extensions: AVX2, BMI1, BMI2, ABM, and FMA3. 158 | pub fn haswell() -> Decoder { 159 | ivybridge().with_bmi1().with_bmi2().with_abm().with_fma3().with_avx2() 160 | } 161 | 162 | /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores 163 | /// were shipped as `E7-48xx/E7-88xx v3` models of processors. 164 | pub fn haswell_ex() -> Decoder { 165 | haswell().with_tsx() 166 | } 167 | 168 | /// `Broadwell` was the successor to `Haswell`, launched in late 2014. it added ADX, RDSEED, 169 | /// and PREFETCHW, as well as broadly rolling out TSX. TSX is enabled on this decoder because 170 | /// some chips of this microarchitecture rolled out with TSX, and lack of TSX seems to be 171 | /// reported as an errata (for example, the `Broadwell-Y` line of parts). 172 | pub fn broadwell() -> Decoder { 173 | haswell_ex().with_adx().with_rdseed().with_prefetchw() 174 | } 175 | 176 | /// `Skylake` was the successor to `Broadwell`, launched in mid 2015. it added MPX and SGX 177 | /// extensions, as well as a mixed rollout of AVX512 in different subsets for different product 178 | /// lines. 179 | /// 180 | /// AVX512 is not enabled on this decoder by default because there doesn't seem to be a lowest 181 | /// common denominator: if you want a `Skylake` decoder with AVX512, something like the 182 | /// following: 183 | /// ``` 184 | /// x86_64::protected_mode::uarch::intel::skylake() 185 | /// .with_avx512_f() 186 | /// .with_avx512_dq(); 187 | /// ``` 188 | /// is likely your best option. 189 | pub fn skylake() -> Decoder { 190 | broadwell().with_mpx().with_sgx() 191 | } 192 | 193 | /// `Kaby Lake` was the successor to `Sky Lake`, launched in 2016. it adds no extensions to 194 | /// x86_64 implementaiton beyond `skylake`. 195 | pub fn kabylake() -> Decoder { 196 | skylake() 197 | } 198 | // ice lake is shipping so that should probably be included... 199 | } 200 | -------------------------------------------------------------------------------- /decoder-x86_64/src/safer_unchecked.rs: -------------------------------------------------------------------------------- 1 | use std::slice::SliceIndex; 2 | 3 | pub trait GetSaferUnchecked { 4 | unsafe fn get_kinda_unchecked(&self, index: I) -> &>::Output 5 | where 6 | I: SliceIndex<[T]>; 7 | } 8 | 9 | impl GetSaferUnchecked for [T] { 10 | #[inline(always)] 11 | unsafe fn get_kinda_unchecked(&self, index: I) -> &>::Output 12 | where 13 | I: SliceIndex<[T]>, 14 | { 15 | if cfg!(debug_assertions) { 16 | &self[index] 17 | } else { 18 | self.get_unchecked(index) 19 | } 20 | } 21 | } 22 | 23 | #[inline(always)] 24 | pub unsafe fn unreachable_kinda_unchecked() -> ! { 25 | if cfg!(debug_assertions) { 26 | panic!("UB: Unreachable unchecked was executed") 27 | } else { 28 | std::hint::unreachable_unchecked() 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /decoder/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "decoder" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | tokenizing = { path = "../tokenizing" } 8 | debugvault = { path = "../debugvault" } 9 | -------------------------------------------------------------------------------- /decoder/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Shared behaviour required between decoder crates. 2 | 3 | use std::fmt::Debug; 4 | use debugvault::Index; 5 | use tokenizing::{TokenStream, Token}; 6 | 7 | #[derive(Debug, PartialEq, Eq, Copy, Clone)] 8 | pub struct Error { 9 | /// What kind of error happened in decoding an instruction. 10 | pub kind: ErrorKind, 11 | 12 | /// How many bytes in the stream did the invalid instruction consume. 13 | size: u8, 14 | } 15 | 16 | impl Error { 17 | pub fn new(kind: ErrorKind, size: usize) -> Self { 18 | Self { 19 | kind, 20 | size: size as u8, 21 | } 22 | } 23 | 24 | pub fn size(&self) -> usize { 25 | self.size as usize 26 | } 27 | } 28 | 29 | #[derive(Debug, PartialEq, Eq, Copy, Clone)] 30 | pub enum ErrorKind { 31 | /// Opcode in instruction is impossible/unknown. 32 | InvalidOpcode, 33 | /// Operand in instruction is impossible/unknown. 34 | InvalidOperand, 35 | /// Prefix in instruction is impossible/unknown. 36 | InvalidPrefixes, 37 | /// Register in instruction is impossible/unknown. 38 | InvalidRegister, 39 | /// There weren't any bytes left in the stream to decode. 40 | ExhaustedInput, 41 | /// Impossibly long instruction (x86/64 specific). 42 | TooLong, 43 | /// Some unknown variation of errors happened. 44 | IncompleteDecoder, 45 | /// `decoder-arm` doesn't know how to decode this, but it may be a valid instruction. the 46 | /// instruction decoder is not complete, sorry. :( 47 | /// 48 | /// In practice this typically indicates some kinds of coprocessor instruction, or `ARMv7` SIMD 49 | /// instruction. 50 | Incomplete, 51 | /// the instruction includes reserved bits that were not set as required. 52 | Nonconforming, 53 | /// the input encodes an instruction that is explicitly undefined. 54 | Undefined, 55 | /// the input encodes an instruction with unpredictable behavior. 56 | Unpredictable, 57 | } 58 | 59 | pub trait ToTokens { 60 | fn tokenize(&self, stream: &mut TokenStream, symbols: &Index); 61 | } 62 | 63 | pub trait Decoded: ToTokens { 64 | fn width(&self) -> usize; 65 | fn tokens(&self, symbols: &Index) -> Vec { 66 | let mut stream = TokenStream::new(); 67 | self.tokenize(&mut stream, symbols); 68 | stream.inner 69 | } 70 | fn update_rel_addrs(&mut self, addr: usize, prev_inst: Option<&Self>); 71 | } 72 | 73 | pub trait Decodable { 74 | type Instruction: Decoded; 75 | 76 | fn decode(&self, reader: &mut Reader) -> Result; 77 | fn max_width(&self) -> usize; 78 | } 79 | 80 | pub struct Reader<'data> { 81 | start: *const u8, 82 | position: *const u8, 83 | end: *const u8, 84 | mark: *const u8, 85 | _marker: core::marker::PhantomData<&'data [u8]>, 86 | } 87 | 88 | impl<'data> Reader<'data> { 89 | pub fn new(data: &'data [u8]) -> Self { 90 | Self { 91 | start: data.as_ptr(), 92 | position: data.as_ptr(), 93 | end: unsafe { data.as_ptr().add(data.len()) }, 94 | mark: data.as_ptr(), 95 | _marker: core::marker::PhantomData, 96 | } 97 | } 98 | 99 | #[inline] 100 | pub fn as_ptr(&self) -> *const u8 { 101 | self.position 102 | } 103 | 104 | #[inline] 105 | #[allow(clippy::should_implement_trait)] 106 | pub fn next(&mut self) -> Option { 107 | let width = self.end as usize - self.position as usize; 108 | 109 | if width == 0 { 110 | return None; 111 | } 112 | 113 | unsafe { 114 | let byte = self.position.read(); 115 | self.position = self.position.add(1); 116 | Some(byte) 117 | } 118 | } 119 | 120 | /// read `buf`-many items from this reader in bulk. if `Reader` cannot read `buf`-many items, 121 | /// return [`ErrorKind::ExhaustedInput`]. 122 | #[inline] 123 | pub fn next_n(&mut self, buf: &mut [u8]) -> Option<()> { 124 | let width = self.end as usize - self.position as usize; 125 | 126 | if buf.len() > width { 127 | return None; 128 | } 129 | 130 | unsafe { 131 | core::ptr::copy_nonoverlapping(self.position, buf.as_mut_ptr(), buf.len()); 132 | 133 | self.position = self.position.add(buf.len()); 134 | Some(()) 135 | } 136 | } 137 | 138 | /// mark the current position as where to measure `offset` against. 139 | #[inline] 140 | pub fn mark(&mut self) { 141 | self.mark = self.position; 142 | } 143 | 144 | /// the difference, between the current [`Reader`] position and its last `mark`. 145 | /// when created, a [`Reader`]'s initial position is `mark`ed, so creating a [`Reader`] and 146 | /// immediately calling [`Reader::offset`] must return 0. 147 | #[inline] 148 | pub fn offset(&mut self) -> usize { 149 | self.position as usize - self.mark as usize 150 | } 151 | 152 | /// the difference, between the current [`Reader`] position and the initial offset 153 | /// when constructed. 154 | #[inline] 155 | pub fn total_offset(&mut self) -> usize { 156 | self.position as usize - self.start as usize 157 | } 158 | } 159 | 160 | const HEX_NUGGET: [u8; 16] = *b"0123456789abcdef"; 161 | 162 | #[inline] 163 | #[cold] 164 | fn cold() {} 165 | 166 | #[inline] 167 | fn likely(b: bool) -> bool { 168 | if !b { 169 | cold() 170 | } 171 | b 172 | } 173 | 174 | #[inline] 175 | fn unlikely(b: bool) -> bool { 176 | if b { 177 | cold() 178 | } 179 | b 180 | } 181 | 182 | /// Encode 64-bit signed integer with a leading '0x' and in lowercase. 183 | pub fn encode_hex(mut imm: i64) -> String { 184 | unsafe { 185 | let mut buffer = Vec::with_capacity(19); 186 | let slice = buffer.spare_capacity_mut(); 187 | let slice = std::mem::transmute::<_, &mut [u8]>(slice); 188 | let mut idx = 0; 189 | 190 | if imm.is_negative() { 191 | *slice.get_unchecked_mut(idx) = b'-'; 192 | idx += 1; 193 | imm = imm.wrapping_neg() 194 | } 195 | 196 | *slice.get_unchecked_mut(idx) = b'0'; 197 | idx += 1; 198 | *slice.get_unchecked_mut(idx) = b'x'; 199 | idx += 1; 200 | 201 | if unlikely(imm == 0) { 202 | *slice.get_unchecked_mut(idx) = b'0'; 203 | idx += 1; 204 | buffer.set_len(idx); 205 | return String::from_utf8_unchecked(buffer); 206 | } 207 | 208 | // imm is already checked to not be zero, therefore this can't fail 209 | let len = imm.checked_ilog(16).unwrap_unchecked() as usize + 1; 210 | let mut jdx = idx + len; 211 | 212 | while likely(jdx != idx) { 213 | let digit = imm & 0b1111; 214 | let chr = HEX_NUGGET[digit as usize]; 215 | 216 | imm >>= 4; 217 | jdx -= 1; 218 | 219 | *slice.get_unchecked_mut(jdx) = chr; 220 | } 221 | 222 | buffer.set_len(idx + len); 223 | String::from_utf8_unchecked(buffer) 224 | } 225 | } 226 | 227 | /// Encode 64-bit unsigned integer with a leading '0x' and in lowercase. 228 | pub fn encode_uhex(mut imm: u64) -> String { 229 | unsafe { 230 | let mut buffer = Vec::with_capacity(19); 231 | let slice = buffer.spare_capacity_mut(); 232 | let slice = std::mem::transmute::<_, &mut [u8]>(slice); 233 | let mut idx = 0; 234 | 235 | *slice.get_unchecked_mut(idx) = b'0'; 236 | idx += 1; 237 | *slice.get_unchecked_mut(idx) = b'x'; 238 | idx += 1; 239 | 240 | if unlikely(imm == 0) { 241 | *slice.get_unchecked_mut(idx) = b'0'; 242 | idx += 1; 243 | buffer.set_len(idx); 244 | return String::from_utf8_unchecked(buffer); 245 | } 246 | 247 | // imm is already checked to not be zero, therefore this can't fail 248 | let len = imm.checked_ilog(16).unwrap_unchecked() as usize + 1; 249 | let mut jdx = idx + len; 250 | 251 | while likely(jdx != idx) { 252 | let digit = imm & 0b1111; 253 | let chr = HEX_NUGGET[digit as usize]; 254 | 255 | imm >>= 4; 256 | jdx -= 1; 257 | 258 | *slice.get_unchecked_mut(jdx) = chr; 259 | } 260 | 261 | buffer.set_len(idx + len); 262 | String::from_utf8_unchecked(buffer) 263 | } 264 | } 265 | 266 | #[cfg(test)] 267 | mod tests { 268 | #[test] 269 | fn encode_hex() { 270 | assert_eq!(super::encode_hex(0x123123), "0x123123"); 271 | assert_eq!(super::encode_hex(-0x123123), "-0x123123"); 272 | assert_eq!(super::encode_hex(-0x48848), "-0x48848"); 273 | 274 | assert_eq!(super::encode_hex(0x0), "0x0"); 275 | assert_eq!(super::encode_hex(-0x800000000000000), "-0x800000000000000"); 276 | assert_eq!(super::encode_hex(0x7fffffffffffffff), "0x7fffffffffffffff"); 277 | } 278 | 279 | #[test] 280 | fn encode_uhex() { 281 | assert_eq!(super::encode_uhex(0x123123), "0x123123"); 282 | assert_eq!(super::encode_uhex(0x0), "0x0"); 283 | assert_eq!(super::encode_uhex(0x7fffffffffffffff), "0x7fffffffffffffff"); 284 | } 285 | } 286 | -------------------------------------------------------------------------------- /example_config.yaml: -------------------------------------------------------------------------------- 1 | # linux/windows: $HOME/.local/share/bite/config.yaml 2 | # macos: $HOME/Library/Application Support/bite/config.yaml 3 | 4 | colors: 5 | src: 6 | keyword: "#ff5900" 7 | tipe: "#faa51b" 8 | field: "#288cc7" 9 | function: "#02ed6e" 10 | operator: "#ffa500" 11 | string: "#02ed6e" 12 | variable: "#d46ccb" 13 | constant: "#9b51c2" 14 | highlight: "#ff6400" 15 | asm: 16 | section: "#3ebce6" 17 | opcode: "#ffffff" 18 | # component1::component2::component3 or just component for function names without mangling. 19 | # Also used for section type identifiers. 20 | component: "#f56281" 21 | register: "#f56281" 22 | #