├── .gitignore ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── Readme.md ├── examples ├── unwindinfodump.rs └── unwindinfolookup.rs ├── fixtures ├── arm64 │ ├── fp │ │ ├── libmozglue-fat.dylib │ │ ├── query-api │ │ └── query-api.__unwind_info │ └── nofp │ │ ├── rustup │ │ └── rustup.__unwind_info └── x86_64 │ ├── fp │ └── libmozglue.dylib │ └── nofp │ └── libmozglue.dylib └── src ├── error.rs ├── lib.rs ├── num_display.rs ├── opcodes ├── arm64.rs ├── bitfield.rs ├── mod.rs ├── permutation.rs ├── x86.rs └── x86_64.rs ├── raw ├── compressed_function.rs ├── consts.rs ├── format.rs ├── impls.rs ├── mod.rs └── unaligned.rs └── reader.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | .DS_Store 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "macho-unwind-info" 3 | version = "0.5.0" 4 | edition = "2021" 5 | authors = ["Markus Stange "] 6 | categories = ["development-tools::debugging"] 7 | description = "A parser for Apple's Compact Unwinding Format, which is used in the __unwind_info section of mach-O binaries." 8 | keywords = ["unwinding", "exception", "apple", "object", "parser"] 9 | repository = "https://github.com/mstange/macho-unwind-info" 10 | license = "MIT/Apache-2.0" 11 | readme = "Readme.md" 12 | exclude = ["/.github", "/tests", "/fixtures"] 13 | 14 | [dependencies] 15 | thiserror = "2" 16 | zerocopy = "0.8" 17 | zerocopy-derive = "0.8" 18 | 19 | [dev-dependencies] 20 | object = "0.36" 21 | 22 | [[example]] 23 | name = "unwindinfodump" 24 | 25 | [[example]] 26 | name = "unwindinfolookup" 27 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Markus Stange 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | [![crates.io page](https://img.shields.io/crates/v/macho-unwind-info.svg)](https://crates.io/crates/macho-unwind-info) 2 | [![docs.rs page](https://docs.rs/macho-unwind-info/badge.svg)](https://docs.rs/macho-unwind-info/) 3 | 4 | # macho-unwind-info 5 | 6 | A zero-copy parser for the contents of the `__unwind_info` section of a 7 | mach-O binary. 8 | 9 | Quickly look up the unwinding opcode for an address. Then parse the opcode to find 10 | out how to recover the return address and the caller frame's register values. 11 | 12 | This crate is intended to be fast enough to be used in a sampling profiler. 13 | Re-parsing from scratch is cheap and can be done on every sample. 14 | 15 | For the full unwinding experience, both `__unwind_info` and `__eh_frame` may need 16 | to be consulted. The two sections are complementary: `__unwind_info` handles the 17 | easy cases, and refers to an `__eh_frame` FDE for the hard cases. Conversely, 18 | `__eh_frame` only includes FDEs for functions whose unwinding info cannot be 19 | represented in `__unwind_info`. 20 | 21 | On x86 and x86_64, `__unwind_info` can represent most functions regardless of 22 | whether they were compiled with framepointers or without. 23 | 24 | On arm64, compiling without framepointers is strongly discouraged, and 25 | `__unwind_info` can only represent functions which have framepointers or 26 | which don't need to restore any registers. As a result, if you have an arm64 27 | binary without framepointers (rare!), then the `__unwind_info` basically just 28 | acts as an index for `__eh_frame`, similarly to `.eh_frame_hdr` for ELF. 29 | 30 | In clang's default configuration for arm64, non-leaf functions have framepointers 31 | and leaf functions without stored registers on the stack don't have framepointers. 32 | For leaf functions, the return address is kept in the `lr` register for the entire 33 | duration of the function. And the unwind info lets you discern between these two 34 | types of functions ("frame-based" and "frameless"). 35 | 36 | ## Example 37 | 38 | ```rust 39 | use macho_unwind_info::UnwindInfo; 40 | use macho_unwind_info::opcodes::OpcodeX86_64; 41 | 42 | let unwind_info = UnwindInfo::parse(data)?; 43 | 44 | if let Some(function) = unwind_info.lookup(0x1234)? { 45 | println!("Found function entry covering the address 0x1234:"); 46 | let opcode = OpcodeX86_64::parse(function.opcode); 47 | println!("0x{:08x}..0x{:08x}: {}", function.start_address, function.end_address, opcode); 48 | } 49 | ``` 50 | 51 | ## Command-line usage 52 | 53 | This repository also contains two CLI executables. You can install them like so: 54 | 55 | ``` 56 | % cargo install --examples macho-unwind-info 57 | ``` 58 | 59 | ## Acknowledgements 60 | 61 | Thanks a ton to [**@Gankra**](https://github.com/Gankra/) for documenting this format at https://gankra.github.io/blah/compact-unwinding/. 62 | 63 | ## License 64 | 65 | Licensed under either of 66 | 67 | * Apache License, Version 2.0 ([`LICENSE-APACHE`](./LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 68 | * MIT license ([`LICENSE-MIT`](./LICENSE-MIT) or http://opensource.org/licenses/MIT) 69 | 70 | at your option. 71 | 72 | Unless you explicitly state otherwise, any contribution intentionally submitted 73 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be 74 | dual licensed as above, without any additional terms or conditions. 75 | -------------------------------------------------------------------------------- /examples/unwindinfodump.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Display, fs::File, io::Read}; 2 | 3 | use macho_unwind_info::opcodes::{OpcodeArm64, OpcodeX86, OpcodeX86_64}; 4 | use macho_unwind_info::UnwindInfo; 5 | use object::{Architecture, ObjectSection}; 6 | 7 | fn main() { 8 | let mut args = std::env::args_os().skip(1); 9 | if args.len() < 1 { 10 | eprintln!("Usage: {} ", std::env::args().next().unwrap()); 11 | std::process::exit(1); 12 | } 13 | let path = args.next().unwrap(); 14 | 15 | let mut data = Vec::new(); 16 | let mut file = File::open(path).unwrap(); 17 | file.read_to_end(&mut data).unwrap(); 18 | let data = &data[..]; 19 | 20 | let file = object::File::parse(data).expect("Could not parse object file"); 21 | use object::Object; 22 | let unwind_info_data_section = file 23 | .section_by_name_bytes(b"__unwind_info") 24 | .expect("Could not find __unwind_info section"); 25 | let data = unwind_info_data_section.data().unwrap(); 26 | let arch = file.architecture(); 27 | 28 | let info = UnwindInfo::parse(data).unwrap(); 29 | let address_range = info.address_range(); 30 | println!( 31 | "Unwind info for address range 0x{:08x}-0x{:08x}", 32 | address_range.start, address_range.end 33 | ); 34 | println!(); 35 | let mut function_iter = info.functions(); 36 | while let Some(function) = function_iter.next().unwrap() { 37 | print_entry(function.start_address, function.opcode, arch); 38 | } 39 | } 40 | 41 | fn print_entry(address: u32, opcode: u32, arch: Architecture) { 42 | match arch { 43 | Architecture::I386 => print_entry_impl(address, OpcodeX86::parse(opcode)), 44 | Architecture::X86_64 => print_entry_impl(address, OpcodeX86_64::parse(opcode)), 45 | Architecture::Aarch64 => print_entry_impl(address, OpcodeArm64::parse(opcode)), 46 | _ => {} 47 | } 48 | } 49 | 50 | fn print_entry_impl(address: u32, opcode: impl Display) { 51 | println!("0x{:08x}: {}", address, opcode); 52 | } 53 | -------------------------------------------------------------------------------- /examples/unwindinfolookup.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Display, fs::File, io::Read}; 2 | 3 | use macho_unwind_info::opcodes::{OpcodeArm64, OpcodeX86, OpcodeX86_64}; 4 | use macho_unwind_info::UnwindInfo; 5 | use object::{Architecture, ObjectSection}; 6 | 7 | fn main() { 8 | let mut args = std::env::args().skip(1); 9 | if args.len() < 1 { 10 | eprintln!("Usage: {} ", std::env::args().next().unwrap()); 11 | std::process::exit(1); 12 | } 13 | let path = args.next().unwrap(); 14 | let pc = args.next().unwrap(); 15 | let pc: u32 = if let Some(hexstr) = pc.strip_prefix("0x") { 16 | u32::from_str_radix(hexstr, 16).unwrap() 17 | } else { 18 | pc.parse().unwrap() 19 | }; 20 | 21 | let mut data = Vec::new(); 22 | let mut file = File::open(path).unwrap(); 23 | file.read_to_end(&mut data).unwrap(); 24 | let data = &data[..]; 25 | 26 | let file = object::File::parse(data).expect("Could not parse object file"); 27 | use object::Object; 28 | let unwind_info_data_section = file 29 | .section_by_name_bytes(b"__unwind_info") 30 | .expect("Could not find __unwind_info section"); 31 | let data = unwind_info_data_section.data().unwrap(); 32 | let arch = file.architecture(); 33 | 34 | let unwind_info = UnwindInfo::parse(data).unwrap(); 35 | let function = match unwind_info.lookup(pc) { 36 | Ok(Some(f)) => f, 37 | Ok(None) => { 38 | println!("No entry was found for address 0x{:x}", pc); 39 | std::process::exit(1); 40 | } 41 | Err(e) => { 42 | println!( 43 | "There was an error when looking up address 0x{:x}: {}", 44 | pc, e 45 | ); 46 | std::process::exit(1); 47 | } 48 | }; 49 | print_entry(function.start_address, function.opcode, arch); 50 | } 51 | 52 | fn print_entry(address: u32, opcode: u32, arch: Architecture) { 53 | match arch { 54 | Architecture::I386 => print_entry_impl(address, OpcodeX86::parse(opcode)), 55 | Architecture::X86_64 => print_entry_impl(address, OpcodeX86_64::parse(opcode)), 56 | Architecture::Aarch64 => print_entry_impl(address, OpcodeArm64::parse(opcode)), 57 | _ => {} 58 | } 59 | } 60 | 61 | fn print_entry_impl(address: u32, opcode: impl Display) { 62 | println!( 63 | "Found entry with function address 0x{:08x} and opcode {}", 64 | address, opcode 65 | ); 66 | } 67 | -------------------------------------------------------------------------------- /fixtures/arm64/fp/libmozglue-fat.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstange/macho-unwind-info/1e8dddc8fee60b94657b1b6255ed65cc0331de8a/fixtures/arm64/fp/libmozglue-fat.dylib -------------------------------------------------------------------------------- /fixtures/arm64/fp/query-api: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstange/macho-unwind-info/1e8dddc8fee60b94657b1b6255ed65cc0331de8a/fixtures/arm64/fp/query-api -------------------------------------------------------------------------------- /fixtures/arm64/fp/query-api.__unwind_info: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstange/macho-unwind-info/1e8dddc8fee60b94657b1b6255ed65cc0331de8a/fixtures/arm64/fp/query-api.__unwind_info -------------------------------------------------------------------------------- /fixtures/arm64/nofp/rustup: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstange/macho-unwind-info/1e8dddc8fee60b94657b1b6255ed65cc0331de8a/fixtures/arm64/nofp/rustup -------------------------------------------------------------------------------- /fixtures/arm64/nofp/rustup.__unwind_info: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstange/macho-unwind-info/1e8dddc8fee60b94657b1b6255ed65cc0331de8a/fixtures/arm64/nofp/rustup.__unwind_info -------------------------------------------------------------------------------- /fixtures/x86_64/fp/libmozglue.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstange/macho-unwind-info/1e8dddc8fee60b94657b1b6255ed65cc0331de8a/fixtures/x86_64/fp/libmozglue.dylib -------------------------------------------------------------------------------- /fixtures/x86_64/nofp/libmozglue.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstange/macho-unwind-info/1e8dddc8fee60b94657b1b6255ed65cc0331de8a/fixtures/x86_64/nofp/libmozglue.dylib -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | /// The error type used in this crate. 2 | #[derive(thiserror::Error, Debug, Clone, Copy, PartialEq, Eq)] 3 | pub enum Error { 4 | /// The data slice was not big enough to read the struct, or we 5 | /// were trying to follow an invalid offset to somewhere outside 6 | /// of the data bounds. 7 | #[error("Read error: {0}")] 8 | ReadError(#[from] ReadError), 9 | 10 | /// Each page has a first_address which is supposed to match the 11 | /// start address of its first function entry. If the two addresses 12 | /// don't match, then the lookup will fail for addresses which fall 13 | /// in the gap between the page start address and the page's first 14 | /// function's start address. 15 | #[error("The page entry's first_address didn't match the address of its first function")] 16 | InvalidPageEntryFirstAddress, 17 | 18 | /// The page kind was set to an unrecognized value. 19 | #[error("Invalid page kind")] 20 | InvalidPageKind, 21 | 22 | /// There is only supposed to be one sentinel page, at the very end 23 | /// of the pages list - its first_address gives the end address of 24 | /// the unwind info address range. If a sentinel page is encountered 25 | /// somewhere else, this error is thrown. 26 | #[error("Unexpected sentinel page")] 27 | UnexpectedSentinelPage, 28 | } 29 | 30 | /// This error indicates that the data slice was not large enough to 31 | /// read the respective item. 32 | #[derive(thiserror::Error, Debug, Clone, Copy, PartialEq, Eq)] 33 | pub enum ReadError { 34 | #[error("Could not read CompactUnwindInfoHeader")] 35 | Header, 36 | 37 | #[error("Could not read global opcodes")] 38 | GlobalOpcodes, 39 | 40 | #[error("Could not read pages")] 41 | Pages, 42 | 43 | #[error("Could not read RegularPage")] 44 | RegularPage, 45 | 46 | #[error("Could not read RegularPage functions")] 47 | RegularPageFunctions, 48 | 49 | #[error("Could not read CompressedPage")] 50 | CompressedPage, 51 | 52 | #[error("Could not read CompressedPage functions")] 53 | CompressedPageFunctions, 54 | 55 | #[error("Could not read local opcodes")] 56 | LocalOpcodes, 57 | 58 | #[error("Could not read page kind")] 59 | PageKind, 60 | } 61 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A zero-copy parser for the contents of the `__unwind_info` section of a 2 | //! mach-O binary. 3 | //! 4 | //! Quickly look up the unwinding opcode for an address. Then parse the opcode to find 5 | //! out how to recover the return address and the caller frame's register values. 6 | //! 7 | //! This crate is intended to be fast enough to be used in a sampling profiler. 8 | //! Re-parsing from scratch is cheap and can be done on every sample. 9 | //! 10 | //! For the full unwinding experience, both `__unwind_info` and `__eh_frame` may need 11 | //! to be consulted. The two sections are complementary: `__unwind_info` handles the 12 | //! easy cases, and refers to an `__eh_frame` FDE for the hard cases. Conversely, 13 | //! `__eh_frame` only includes FDEs for functions whose unwinding info cannot be 14 | //! represented in `__unwind_info`. 15 | //! 16 | //! On x86 and x86_64, `__unwind_info` can represent most functions regardless of 17 | //! whether they were compiled with framepointers or without. 18 | //! 19 | //! On arm64, compiling without framepointers is strongly discouraged, and 20 | //! `__unwind_info` can only represent functions which have framepointers or 21 | //! which don't need to restore any registers. As a result, if you have an arm64 22 | //! binary without framepointers (rare!), then the `__unwind_info` basically just 23 | //! acts as an index for `__eh_frame`, similarly to `.eh_frame_hdr` for ELF. 24 | //! 25 | //! In clang's default configuration for arm64, non-leaf functions have framepointers 26 | //! and leaf functions without stored registers on the stack don't have framepointers. 27 | //! For leaf functions, the return address is kept in the `lr` register for the entire 28 | //! duration of the function. And the unwind info lets you discern between these two 29 | //! types of functions ("frame-based" and "frameless"). 30 | //! 31 | //! # Example 32 | //! 33 | //! ```rust 34 | //! use macho_unwind_info::UnwindInfo; 35 | //! use macho_unwind_info::opcodes::OpcodeX86_64; 36 | //! 37 | //! # fn example(data: &[u8]) -> Result<(), macho_unwind_info::Error> { 38 | //! let unwind_info = UnwindInfo::parse(data)?; 39 | //! 40 | //! if let Some(function) = unwind_info.lookup(0x1234)? { 41 | //! println!("Found function entry covering the address 0x1234:"); 42 | //! let opcode = OpcodeX86_64::parse(function.opcode); 43 | //! println!("0x{:08x}..0x{:08x}: {}", function.start_address, function.end_address, opcode); 44 | //! } 45 | //! # Ok(()) 46 | //! # } 47 | //! ``` 48 | 49 | mod error; 50 | mod num_display; 51 | 52 | /// Provides architecture-specific opcode parsing. 53 | pub mod opcodes; 54 | /// Lower-level structs for interpreting the format data. Can be used if the convenience APIs are too limiting. 55 | pub mod raw; 56 | 57 | mod reader; 58 | 59 | pub use error::*; 60 | use raw::*; 61 | 62 | /// A parsed representation of the unwind info. 63 | /// 64 | /// The UnwindInfo contains a list of pages, each of which contain a list of 65 | /// function entries. 66 | pub struct UnwindInfo<'a> { 67 | /// The full __unwind_info section data. 68 | data: &'a [u8], 69 | 70 | /// The list of global opcodes. 71 | global_opcodes: &'a [Opcode], 72 | 73 | /// The list of page entries in this UnwindInfo. 74 | pages: &'a [PageEntry], 75 | } 76 | 77 | /// The information about a single function in the UnwindInfo. 78 | #[derive(Clone, Debug, PartialEq, Eq, Hash)] 79 | pub struct Function { 80 | /// The address where this function starts. 81 | pub start_address: u32, 82 | 83 | /// The address where this function ends. Includes the padding at the end of 84 | /// the function. In reality, this is the address of the *next* function 85 | /// entry, or for the last function this is the address of the sentinel page 86 | /// entry. 87 | pub end_address: u32, 88 | 89 | /// The opcode which describes the unwinding information for this function. 90 | /// This opcode needs to be parsed in an architecture-specific manner. 91 | /// See the [opcodes] module for the facilities to do so. 92 | pub opcode: u32, 93 | } 94 | 95 | impl<'a> UnwindInfo<'a> { 96 | /// Create an [UnwindInfo] instance which wraps the raw bytes of a mach-O binary's 97 | /// `__unwind_info` section. The data can have arbitrary alignment. The parsing done 98 | /// in this function is minimal; it's basically just three bounds checks. 99 | pub fn parse(data: &'a [u8]) -> Result { 100 | let header = CompactUnwindInfoHeader::parse(data)?; 101 | let global_opcodes = header.global_opcodes(data)?; 102 | let pages = header.pages(data)?; 103 | Ok(Self { 104 | data, 105 | global_opcodes, 106 | pages, 107 | }) 108 | } 109 | 110 | /// Returns an iterator over all the functions in this UnwindInfo. 111 | pub fn functions(&self) -> FunctionIter<'a> { 112 | FunctionIter { 113 | data: self.data, 114 | global_opcodes: self.global_opcodes, 115 | pages: self.pages, 116 | cur_page: None, 117 | } 118 | } 119 | 120 | /// Returns the range of addresses covered by unwind information. 121 | pub fn address_range(&self) -> core::ops::Range { 122 | if self.pages.is_empty() { 123 | return 0..0; 124 | } 125 | let first_page = self.pages.first().unwrap(); 126 | let last_page = self.pages.last().unwrap(); 127 | first_page.first_address()..last_page.first_address() 128 | } 129 | 130 | /// Looks up the unwind information for the function that covers the given address. 131 | /// Returns `Ok(Some(function))` if a function was found. 132 | /// Returns `Ok(None)` if the address was outside of the range of addresses covered 133 | /// by the unwind info. 134 | /// Returns `Err(error)` if there was a problem with the format of the `__unwind_info` 135 | /// data. 136 | /// 137 | /// This lookup is architecture agnostic. The opcode is returned as a u32. 138 | /// To actually perform unwinding, the opcode needs to be parsed in an 139 | /// architecture-specific manner. 140 | /// 141 | /// The design of the compact unwinding format makes this lookup extremely cheap. 142 | /// It's just two binary searches: First to find the right page, end then to find 143 | /// the right function within a page. The search happens inside the wrapped data, 144 | /// with no extra copies. 145 | pub fn lookup(&self, pc: u32) -> Result, Error> { 146 | let Self { 147 | pages, 148 | data, 149 | global_opcodes, 150 | } = self; 151 | let page_index = match pages.binary_search_by_key(&pc, PageEntry::first_address) { 152 | Ok(i) => i, 153 | Err(insertion_index) => { 154 | if insertion_index == 0 { 155 | return Ok(None); 156 | } 157 | insertion_index - 1 158 | } 159 | }; 160 | if page_index == pages.len() - 1 { 161 | // We found the sentinel last page, which just marks the end of the range. 162 | // So the looked up address is at or after the end address, i.e. outside the 163 | // range of addresses covered by this UnwindInfo. 164 | return Ok(None); 165 | } 166 | let page_entry = &pages[page_index]; 167 | let next_page_entry = &pages[page_index + 1]; 168 | let page_offset = page_entry.page_offset(); 169 | match page_entry.page_kind(data)? { 170 | consts::PAGE_KIND_REGULAR => { 171 | let page = RegularPage::parse(data, page_offset.into())?; 172 | let functions = page.functions(data, page_offset)?; 173 | let function_index = 174 | match functions.binary_search_by_key(&pc, RegularFunctionEntry::address) { 175 | Ok(i) => i, 176 | Err(insertion_index) => { 177 | if insertion_index == 0 { 178 | return Err(Error::InvalidPageEntryFirstAddress); 179 | } 180 | insertion_index - 1 181 | } 182 | }; 183 | let entry = &functions[function_index]; 184 | let fun_address = entry.address(); 185 | let next_fun_address = if let Some(next_entry) = functions.get(function_index + 1) { 186 | next_entry.address() 187 | } else { 188 | next_page_entry.first_address() 189 | }; 190 | Ok(Some(Function { 191 | start_address: fun_address, 192 | end_address: next_fun_address, 193 | opcode: entry.opcode(), 194 | })) 195 | } 196 | consts::PAGE_KIND_COMPRESSED => { 197 | let page = CompressedPage::parse(data, page_offset.into())?; 198 | let functions = page.functions(data, page_offset)?; 199 | let page_address = page_entry.first_address(); 200 | let rel_pc = pc - page_address; 201 | let function_index = match functions.binary_search_by_key(&rel_pc, |&entry| { 202 | CompressedFunctionEntry::new(entry.into()).relative_address() 203 | }) { 204 | Ok(i) => i, 205 | Err(insertion_index) => { 206 | if insertion_index == 0 { 207 | return Err(Error::InvalidPageEntryFirstAddress); 208 | } 209 | insertion_index - 1 210 | } 211 | }; 212 | 213 | let entry = CompressedFunctionEntry::new(functions[function_index].into()); 214 | let fun_address = page_address + entry.relative_address(); 215 | let next_fun_address = if let Some(next_entry) = functions.get(function_index + 1) { 216 | let next_entry = CompressedFunctionEntry::new((*next_entry).into()); 217 | page_address + next_entry.relative_address() 218 | } else { 219 | next_page_entry.first_address() 220 | }; 221 | 222 | let opcode_index: usize = entry.opcode_index().into(); 223 | let opcode = if opcode_index < global_opcodes.len() { 224 | global_opcodes[opcode_index].opcode() 225 | } else { 226 | let local_opcodes = page.local_opcodes(data, page_offset)?; 227 | let local_index = opcode_index - global_opcodes.len(); 228 | local_opcodes[local_index].opcode() 229 | }; 230 | Ok(Some(Function { 231 | start_address: fun_address, 232 | end_address: next_fun_address, 233 | opcode, 234 | })) 235 | } 236 | consts::PAGE_KIND_SENTINEL => { 237 | // Only the last page should be a sentinel page, and we've already checked earlier 238 | // that we're not in the last page. 239 | Err(Error::UnexpectedSentinelPage) 240 | } 241 | _ => Err(Error::InvalidPageKind), 242 | } 243 | } 244 | } 245 | 246 | /// An iterator over the functions in an UnwindInfo page. 247 | pub struct FunctionIter<'a> { 248 | /// The full __unwind_info section data. 249 | data: &'a [u8], 250 | 251 | /// The list of global opcodes. 252 | global_opcodes: &'a [Opcode], 253 | 254 | /// The slice of the remaining to-be-iterated-over pages. 255 | pages: &'a [PageEntry], 256 | 257 | /// The page whose functions we're iterating over at the moment. 258 | cur_page: Option>, 259 | } 260 | 261 | /// The current page of the function iterator. 262 | /// The functions field is the slice of the remaining to-be-iterated-over functions. 263 | #[derive(Clone, Copy)] 264 | enum PageWithPartialFunctions<'a> { 265 | Regular { 266 | next_page_address: u32, 267 | functions: &'a [RegularFunctionEntry], 268 | }, 269 | Compressed { 270 | page_address: u32, 271 | next_page_address: u32, 272 | local_opcodes: &'a [Opcode], 273 | functions: &'a [U32], 274 | }, 275 | } 276 | 277 | impl<'a> FunctionIter<'a> { 278 | #[allow(clippy::should_implement_trait)] 279 | pub fn next(&mut self) -> Result, Error> { 280 | loop { 281 | let cur_page = if let Some(cur_page) = self.cur_page.as_mut() { 282 | cur_page 283 | } else { 284 | let cur_page = match self.next_page()? { 285 | Some(page) => page, 286 | None => return Ok(None), 287 | }; 288 | self.cur_page.insert(cur_page) 289 | }; 290 | 291 | match cur_page { 292 | PageWithPartialFunctions::Regular { 293 | next_page_address, 294 | functions, 295 | } => { 296 | if let Some((entry, remainder)) = functions.split_first() { 297 | *functions = remainder; 298 | let start_address = entry.address(); 299 | let end_address = remainder 300 | .first() 301 | .map(RegularFunctionEntry::address) 302 | .unwrap_or(*next_page_address); 303 | return Ok(Some(Function { 304 | start_address, 305 | end_address, 306 | opcode: entry.opcode(), 307 | })); 308 | } 309 | } 310 | PageWithPartialFunctions::Compressed { 311 | page_address, 312 | functions, 313 | next_page_address, 314 | local_opcodes, 315 | } => { 316 | if let Some((entry, remainder)) = functions.split_first() { 317 | *functions = remainder; 318 | let entry = CompressedFunctionEntry::new((*entry).into()); 319 | let start_address = *page_address + entry.relative_address(); 320 | let end_address = match remainder.first() { 321 | Some(next_entry) => { 322 | let next_entry = CompressedFunctionEntry::new((*next_entry).into()); 323 | *page_address + next_entry.relative_address() 324 | } 325 | None => *next_page_address, 326 | }; 327 | let opcode_index: usize = entry.opcode_index().into(); 328 | let opcode = if opcode_index < self.global_opcodes.len() { 329 | self.global_opcodes[opcode_index].opcode() 330 | } else { 331 | let local_index = opcode_index - self.global_opcodes.len(); 332 | local_opcodes[local_index].opcode() 333 | }; 334 | return Ok(Some(Function { 335 | start_address, 336 | end_address, 337 | opcode, 338 | })); 339 | } 340 | } 341 | } 342 | self.cur_page = None; 343 | } 344 | } 345 | 346 | fn next_page(&mut self) -> Result>, Error> { 347 | let (page_entry, remainder) = match self.pages.split_first() { 348 | Some(split) => split, 349 | None => return Ok(None), 350 | }; 351 | 352 | self.pages = remainder; 353 | 354 | let next_page_entry = match remainder.first() { 355 | Some(entry) => entry, 356 | None => return Ok(None), 357 | }; 358 | 359 | let page_offset = page_entry.page_offset(); 360 | let page_address = page_entry.first_address(); 361 | let next_page_address = next_page_entry.first_address(); 362 | let data = self.data; 363 | let cur_page = match page_entry.page_kind(data)? { 364 | consts::PAGE_KIND_REGULAR => { 365 | let page = RegularPage::parse(data, page_offset.into())?; 366 | PageWithPartialFunctions::Regular { 367 | functions: page.functions(data, page_offset)?, 368 | next_page_address, 369 | } 370 | } 371 | consts::PAGE_KIND_COMPRESSED => { 372 | let page = CompressedPage::parse(data, page_offset.into())?; 373 | PageWithPartialFunctions::Compressed { 374 | page_address, 375 | next_page_address, 376 | functions: page.functions(data, page_offset)?, 377 | local_opcodes: page.local_opcodes(data, page_offset)?, 378 | } 379 | } 380 | consts::PAGE_KIND_SENTINEL => return Err(Error::UnexpectedSentinelPage), 381 | _ => return Err(Error::InvalidPageKind), 382 | }; 383 | Ok(Some(cur_page)) 384 | } 385 | } 386 | -------------------------------------------------------------------------------- /src/num_display.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Binary, Debug, LowerHex}; 2 | 3 | pub struct HexNum(pub N); 4 | 5 | impl Debug for HexNum { 6 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 7 | LowerHex::fmt(&self.0, f) 8 | } 9 | } 10 | 11 | pub struct BinNum(pub N); 12 | 13 | impl Debug for BinNum { 14 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 15 | Binary::fmt(&self.0, f) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/opcodes/arm64.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use super::bitfield::OpcodeBitfield; 4 | use crate::raw::consts::*; 5 | 6 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 7 | pub enum OpcodeArm64 { 8 | Null, 9 | Frameless { 10 | stack_size_in_bytes: u16, 11 | }, 12 | Dwarf { 13 | eh_frame_fde: u32, 14 | }, 15 | FrameBased { 16 | saved_reg_pair_count: u8, 17 | 18 | // Whether each register pair was pushed 19 | d14_and_d15_saved: bool, 20 | d12_and_d13_saved: bool, 21 | d10_and_d11_saved: bool, 22 | d8_and_d9_saved: bool, 23 | 24 | x27_and_x28_saved: bool, 25 | x25_and_x26_saved: bool, 26 | x23_and_x24_saved: bool, 27 | x21_and_x22_saved: bool, 28 | x19_and_x20_saved: bool, 29 | }, 30 | UnrecognizedKind(u8), 31 | } 32 | 33 | impl OpcodeArm64 { 34 | pub fn parse(opcode: u32) -> Self { 35 | match OpcodeBitfield::new(opcode).kind() { 36 | OPCODE_KIND_NULL => OpcodeArm64::Null, 37 | OPCODE_KIND_ARM64_FRAMELESS => OpcodeArm64::Frameless { 38 | stack_size_in_bytes: (((opcode >> 12) & 0b1111_1111_1111) as u16) * 16, 39 | }, 40 | OPCODE_KIND_ARM64_DWARF => OpcodeArm64::Dwarf { 41 | eh_frame_fde: (opcode & 0xffffff), 42 | }, 43 | OPCODE_KIND_ARM64_FRAMEBASED => { 44 | let saved_reg_pair_count = (opcode & 0b1_1111_1111).count_ones() as u8; 45 | OpcodeArm64::FrameBased { 46 | saved_reg_pair_count, 47 | d14_and_d15_saved: ((opcode >> 8) & 1) == 1, 48 | d12_and_d13_saved: ((opcode >> 7) & 1) == 1, 49 | d10_and_d11_saved: ((opcode >> 6) & 1) == 1, 50 | d8_and_d9_saved: ((opcode >> 5) & 1) == 1, 51 | x27_and_x28_saved: ((opcode >> 4) & 1) == 1, 52 | x25_and_x26_saved: ((opcode >> 3) & 1) == 1, 53 | x23_and_x24_saved: ((opcode >> 2) & 1) == 1, 54 | x21_and_x22_saved: ((opcode >> 1) & 1) == 1, 55 | x19_and_x20_saved: (opcode & 1) == 1, 56 | } 57 | } 58 | kind => OpcodeArm64::UnrecognizedKind(kind), 59 | } 60 | } 61 | } 62 | 63 | impl Display for OpcodeArm64 { 64 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 65 | match self { 66 | OpcodeArm64::Null => { 67 | write!(f, "(uncovered)")?; 68 | } 69 | OpcodeArm64::Frameless { 70 | stack_size_in_bytes, 71 | } => { 72 | if *stack_size_in_bytes == 0 { 73 | write!(f, "CFA=reg31")?; 74 | } else { 75 | write!(f, "CFA=reg31+{}", stack_size_in_bytes)?; 76 | } 77 | } 78 | OpcodeArm64::Dwarf { eh_frame_fde } => { 79 | write!(f, "(check eh_frame FDE 0x{:x})", eh_frame_fde)?; 80 | } 81 | OpcodeArm64::FrameBased { 82 | d14_and_d15_saved, 83 | d12_and_d13_saved, 84 | d10_and_d11_saved, 85 | d8_and_d9_saved, 86 | x27_and_x28_saved, 87 | x25_and_x26_saved, 88 | x23_and_x24_saved, 89 | x21_and_x22_saved, 90 | x19_and_x20_saved, 91 | .. 92 | } => { 93 | write!(f, "CFA=reg29+16: reg29=[CFA-16], reg30=[CFA-8]")?; 94 | let mut offset = 32; 95 | let mut next_pair = |pair_saved, a, b| { 96 | if pair_saved { 97 | let r = write!(f, ", {}=[CFA-{}], {}=[CFA-{}]", a, offset, b, offset + 8); 98 | offset += 16; 99 | r 100 | } else { 101 | Ok(()) 102 | } 103 | }; 104 | next_pair(*d14_and_d15_saved, "reg14", "reg15")?; 105 | next_pair(*d12_and_d13_saved, "reg12", "reg13")?; 106 | next_pair(*d10_and_d11_saved, "reg10", "reg11")?; 107 | next_pair(*d8_and_d9_saved, "reg8", "reg9")?; 108 | next_pair(*x27_and_x28_saved, "reg27", "reg28")?; 109 | next_pair(*x25_and_x26_saved, "reg25", "reg26")?; 110 | next_pair(*x23_and_x24_saved, "reg23", "reg24")?; 111 | next_pair(*x21_and_x22_saved, "reg21", "reg22")?; 112 | next_pair(*x19_and_x20_saved, "reg19", "reg20")?; 113 | } 114 | OpcodeArm64::UnrecognizedKind(kind) => { 115 | write!(f, "!! Unrecognized kind {}", kind)?; 116 | } 117 | } 118 | Ok(()) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/opcodes/bitfield.rs: -------------------------------------------------------------------------------- 1 | use crate::num_display::BinNum; 2 | use std::fmt::Debug; 3 | 4 | pub struct OpcodeBitfield(pub u32); 5 | 6 | impl OpcodeBitfield { 7 | pub fn new(value: u32) -> Self { 8 | Self(value) 9 | } 10 | 11 | /// Whether this instruction is the start of a function. 12 | pub fn is_function_start(&self) -> bool { 13 | self.0 >> 31 == 1 14 | } 15 | 16 | /// Whether there is an lsda entry for this instruction. 17 | pub fn has_lsda(&self) -> bool { 18 | (self.0 >> 30) & 0b1 == 1 19 | } 20 | 21 | /// An index into the global personalities array 22 | /// (TODO: ignore if has_lsda() == false?) 23 | pub fn personality_index(&self) -> u8 { 24 | ((self.0 >> 28) & 0b11) as u8 25 | } 26 | 27 | /// The architecture-specific kind of opcode this is, specifying how to 28 | /// interpret the remaining 24 bits of the opcode. 29 | pub fn kind(&self) -> u8 { 30 | ((self.0 >> 24) & 0b1111) as u8 31 | } 32 | 33 | /// The architecture-specific remaining 24 bits. 34 | pub fn specific_bits(&self) -> u32 { 35 | self.0 & 0xffffff 36 | } 37 | } 38 | 39 | impl From for OpcodeBitfield { 40 | fn from(opcode: u32) -> OpcodeBitfield { 41 | OpcodeBitfield::new(opcode) 42 | } 43 | } 44 | 45 | impl Debug for OpcodeBitfield { 46 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 47 | f.debug_struct("Opcode") 48 | .field("kind", &self.kind()) 49 | .field("is_function_start", &self.is_function_start()) 50 | .field("has_lsda", &self.has_lsda()) 51 | .field("personality_index", &self.personality_index()) 52 | .field("specific_bits", &BinNum(self.specific_bits())) 53 | .finish() 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/opcodes/mod.rs: -------------------------------------------------------------------------------- 1 | mod arm64; 2 | mod bitfield; 3 | mod permutation; 4 | mod x86; 5 | mod x86_64; 6 | 7 | pub use arm64::*; 8 | pub use bitfield::*; 9 | pub use x86::*; 10 | pub use x86_64::*; 11 | -------------------------------------------------------------------------------- /src/opcodes/permutation.rs: -------------------------------------------------------------------------------- 1 | /// Magically unpack up to 6 values from 10 bits. 2 | /// 3 | /// Background: 4 | /// 5 | /// Let's start with a simpler example of packing a list of numbers. 6 | /// Let's say you want to store 2 values a and b, which can each be 0, 1, or 2. 7 | /// You can store this as x = a * 3 + b. Then you can get out (a, b) by doing a 8 | /// division by 3 with remainder, because this has the form of n * 3 + (something less than 3) 9 | /// 10 | /// Similar, for four values, you can use: 11 | /// 12 | /// ```text 13 | /// x = a * 27 + b * 9 + c * 3 + d. 14 | /// ^^^^^^^^^^^^^^^^^ == x % 27 15 | /// ^^^^^^^^^ == x % 9 16 | /// ^ == x % 3 17 | /// x == 27 * a + rem27 18 | /// rem27 == 9 * b + rem9 19 | /// rem9 == 3 * c + rem3 20 | /// rem3 = d 21 | /// ``` 22 | /// 23 | /// Written differently: 24 | /// `x = d + 3 * (c + 3 * (b + (3 * a)))` 25 | /// 26 | /// So that was the case for when all digits have the same range (0..3 in this example). 27 | /// 28 | /// In this function we want to decode a permutation. In a permutation of n items, 29 | /// for the first digit we can choose one of n items, for the second digit we can 30 | /// choose one of the remaining n - 1 items, for the third one of the remaining n - 2 etc. 31 | /// 32 | /// We have the choice between 6 registers, so n = 6 in this function. 33 | /// Each digit is stored zero-based. So a is in 0..6, b is in 0..5, c in 0..4 etc. 34 | /// 35 | /// We encode as (a, b, c) as c + 4 * (b + 5 * a) 36 | /// [...] 37 | pub fn decode_permutation_6(count: u32, mut encoding: u32) -> std::result::Result<[u8; 6], ()> { 38 | if count > 6 { 39 | return Err(()); 40 | } 41 | 42 | let mut compressed_regindexes = [0; 6]; 43 | 44 | if count > 4 { 45 | compressed_regindexes[4] = encoding % 2; 46 | encoding /= 2; 47 | } 48 | if count > 3 { 49 | compressed_regindexes[3] = encoding % 3; 50 | encoding /= 3; 51 | } 52 | if count > 2 { 53 | compressed_regindexes[2] = encoding % 4; 54 | encoding /= 4; 55 | } 56 | if count > 1 { 57 | compressed_regindexes[1] = encoding % 5; 58 | encoding /= 5; 59 | } 60 | if count > 0 { 61 | compressed_regindexes[0] = encoding; 62 | } 63 | 64 | if compressed_regindexes[0] >= 6 { 65 | return Err(()); 66 | } 67 | 68 | let mut registers = [0; 6]; 69 | let mut used = [false; 6]; 70 | for i in 0..count { 71 | let compressed_regindex = compressed_regindexes[i as usize]; 72 | debug_assert!(compressed_regindex < 6 - i); 73 | let uncompressed_regindex = (0..6) 74 | .filter(|ri| !used[*ri]) 75 | .nth(compressed_regindex as usize) 76 | .unwrap(); 77 | used[uncompressed_regindex] = true; 78 | registers[i as usize] = (uncompressed_regindex + 1) as u8; 79 | } 80 | Ok(registers) 81 | } 82 | -------------------------------------------------------------------------------- /src/opcodes/x86.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use super::bitfield::OpcodeBitfield; 4 | use super::permutation::decode_permutation_6; 5 | use crate::consts::*; 6 | 7 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 8 | pub enum RegisterNameX86 { 9 | Ebx, 10 | Ecx, 11 | Edx, 12 | Edi, 13 | Esi, 14 | Ebp, 15 | } 16 | 17 | impl RegisterNameX86 { 18 | pub fn parse(n: u8) -> Option { 19 | match n { 20 | 1 => Some(RegisterNameX86::Ebx), 21 | 2 => Some(RegisterNameX86::Ecx), 22 | 3 => Some(RegisterNameX86::Edx), 23 | 4 => Some(RegisterNameX86::Edi), 24 | 5 => Some(RegisterNameX86::Esi), 25 | 6 => Some(RegisterNameX86::Ebp), 26 | _ => None, 27 | } 28 | } 29 | 30 | pub fn dwarf_name(&self) -> &'static str { 31 | match self { 32 | RegisterNameX86::Ebx => "reg3", 33 | RegisterNameX86::Ecx => "reg1", 34 | RegisterNameX86::Edx => "reg2", 35 | RegisterNameX86::Edi => "reg7", 36 | RegisterNameX86::Esi => "reg6", 37 | RegisterNameX86::Ebp => "reg5", 38 | } 39 | } 40 | } 41 | 42 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 43 | pub enum OpcodeX86 { 44 | Null, 45 | FrameBased { 46 | stack_offset_in_bytes: u16, 47 | saved_regs: [Option; 5], 48 | }, 49 | FramelessImmediate { 50 | stack_size_in_bytes: u16, 51 | saved_regs: [Option; 6], 52 | }, 53 | FramelessIndirect { 54 | /// Offset from the start of the function into the middle of a `sub` 55 | /// instruction, pointing right at the instruction's "immediate" which 56 | /// is a u32 value with the offset we need. (NOTE: not divided by anything!) 57 | immediate_offset_from_function_start: u8, 58 | 59 | /// An offset to add to the loaded stack size. 60 | /// This allows the stack size to differ slightly from the `sub`, to 61 | /// compensate for any function prologue that pushes a bunch of 62 | /// pointer-sized registers. This adjust value includes the return 63 | /// address on the stack. For example, if the function begins with six push 64 | /// instructions, followed by a sub instruction, then stack_adjust_in_bytes 65 | /// is 28: 4 bytes for the return address + 6 * 4 for each pushed register. 66 | stack_adjust_in_bytes: u8, 67 | 68 | /// The registers, in the order that they need to be popped in when 69 | /// returning / unwinding from this function. (Reverse order from 70 | /// function prologue!) 71 | /// Can have leading `None`s. 72 | saved_regs: [Option; 6], 73 | }, 74 | Dwarf { 75 | eh_frame_fde: u32, 76 | }, 77 | InvalidFrameless, 78 | UnrecognizedKind(u8), 79 | } 80 | 81 | impl OpcodeX86 { 82 | pub fn parse(opcode: u32) -> Self { 83 | match OpcodeBitfield::new(opcode).kind() { 84 | OPCODE_KIND_NULL => OpcodeX86::Null, 85 | OPCODE_KIND_X86_FRAMEBASED => OpcodeX86::FrameBased { 86 | stack_offset_in_bytes: (((opcode >> 16) & 0xff) as u16) * 4, 87 | saved_regs: [ 88 | RegisterNameX86::parse(((opcode >> 12) & 0b111) as u8), 89 | RegisterNameX86::parse(((opcode >> 9) & 0b111) as u8), 90 | RegisterNameX86::parse(((opcode >> 6) & 0b111) as u8), 91 | RegisterNameX86::parse(((opcode >> 3) & 0b111) as u8), 92 | RegisterNameX86::parse((opcode & 0b111) as u8), 93 | ], 94 | }, 95 | OPCODE_KIND_X86_FRAMELESS_IMMEDIATE => { 96 | let stack_size_in_bytes = (((opcode >> 16) & 0xff) as u16) * 4; 97 | let register_count = (opcode >> 10) & 0b111; 98 | let register_permutation = opcode & 0b11_1111_1111; 99 | let saved_registers = 100 | match decode_permutation_6(register_count, register_permutation) { 101 | Ok(regs) => regs, 102 | Err(_) => return OpcodeX86::InvalidFrameless, 103 | }; 104 | OpcodeX86::FramelessImmediate { 105 | stack_size_in_bytes, 106 | saved_regs: [ 107 | RegisterNameX86::parse(saved_registers[0]), 108 | RegisterNameX86::parse(saved_registers[1]), 109 | RegisterNameX86::parse(saved_registers[2]), 110 | RegisterNameX86::parse(saved_registers[3]), 111 | RegisterNameX86::parse(saved_registers[4]), 112 | RegisterNameX86::parse(saved_registers[5]), 113 | ], 114 | } 115 | } 116 | OPCODE_KIND_X86_FRAMELESS_INDIRECT => { 117 | let immediate_offset_from_function_start = (opcode >> 16) as u8; 118 | let stack_adjust_in_bytes = ((opcode >> 13) & 0b111) as u8 * 4; 119 | let register_count = (opcode >> 10) & 0b111; 120 | let register_permutation = opcode & 0b11_1111_1111; 121 | let saved_registers = 122 | match decode_permutation_6(register_count, register_permutation) { 123 | Ok(regs) => regs, 124 | Err(_) => return OpcodeX86::InvalidFrameless, 125 | }; 126 | OpcodeX86::FramelessIndirect { 127 | immediate_offset_from_function_start, 128 | stack_adjust_in_bytes, 129 | saved_regs: [ 130 | RegisterNameX86::parse(saved_registers[0]), 131 | RegisterNameX86::parse(saved_registers[1]), 132 | RegisterNameX86::parse(saved_registers[2]), 133 | RegisterNameX86::parse(saved_registers[3]), 134 | RegisterNameX86::parse(saved_registers[4]), 135 | RegisterNameX86::parse(saved_registers[5]), 136 | ], 137 | } 138 | } 139 | OPCODE_KIND_X86_DWARF => OpcodeX86::Dwarf { 140 | eh_frame_fde: (opcode & 0xffffff), 141 | }, 142 | kind => OpcodeX86::UnrecognizedKind(kind), 143 | } 144 | } 145 | } 146 | 147 | impl Display for OpcodeX86 { 148 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 149 | match self { 150 | OpcodeX86::Null => { 151 | write!(f, "(uncovered)")?; 152 | } 153 | OpcodeX86::FrameBased { 154 | stack_offset_in_bytes, 155 | saved_regs, 156 | } => { 157 | // ebp was set to esp before the saved registers were pushed. 158 | // The first pushed register is at ebp - 4 (== CFA - 12), the last at ebp - stack_offset_in_bytes. 159 | write!(f, "CFA=reg6+8: reg6=[CFA-8], reg16=[CFA-4]")?; 160 | let max_count = (*stack_offset_in_bytes / 4) as usize; 161 | let mut offset = *stack_offset_in_bytes + 8; // + 2 for rbp, return address 162 | for reg in saved_regs.iter().rev().take(max_count) { 163 | if let Some(reg) = reg { 164 | write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; 165 | } 166 | offset -= 4; 167 | } 168 | } 169 | OpcodeX86::FramelessImmediate { 170 | stack_size_in_bytes, 171 | saved_regs, 172 | } => { 173 | if *stack_size_in_bytes == 0 { 174 | write!(f, "CFA=reg7:",)?; 175 | } else { 176 | write!(f, "CFA=reg7+{}:", *stack_size_in_bytes)?; 177 | } 178 | write!(f, " reg16=[CFA-4]")?; 179 | let mut offset = 2 * 4; 180 | for reg in saved_regs.iter().rev().flatten() { 181 | write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; 182 | offset += 4; 183 | } 184 | } 185 | OpcodeX86::FramelessIndirect { 186 | immediate_offset_from_function_start, 187 | stack_adjust_in_bytes, 188 | saved_regs, 189 | } => { 190 | write!( 191 | f, 192 | "CFA=[function_start+{}]+{}", 193 | immediate_offset_from_function_start, stack_adjust_in_bytes 194 | )?; 195 | write!(f, " reg16=[CFA-4]")?; 196 | let mut offset = 2 * 4; 197 | for reg in saved_regs.iter().rev().flatten() { 198 | write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; 199 | offset += 4; 200 | } 201 | } 202 | OpcodeX86::Dwarf { eh_frame_fde } => { 203 | write!(f, "(check eh_frame FDE 0x{:x})", eh_frame_fde)?; 204 | } 205 | OpcodeX86::InvalidFrameless => { 206 | write!( 207 | f, 208 | "!! frameless immediate or indirect with invalid permutation encoding" 209 | )?; 210 | } 211 | OpcodeX86::UnrecognizedKind(kind) => { 212 | write!(f, "!! Unrecognized kind {}", kind)?; 213 | } 214 | } 215 | Ok(()) 216 | } 217 | } 218 | 219 | #[cfg(test)] 220 | mod test { 221 | use super::*; 222 | 223 | #[test] 224 | fn test_frameless_indirect() { 225 | use RegisterNameX86::*; 226 | assert_eq!( 227 | OpcodeX86::parse(0x30df800), 228 | OpcodeX86::FramelessIndirect { 229 | immediate_offset_from_function_start: 13, 230 | stack_adjust_in_bytes: 28, 231 | saved_regs: [ 232 | Some(Ebx), 233 | Some(Ecx), 234 | Some(Edx), 235 | Some(Edi), 236 | Some(Esi), 237 | Some(Ebp) 238 | ] 239 | } 240 | ) 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /src/opcodes/x86_64.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use super::bitfield::OpcodeBitfield; 4 | use super::permutation::decode_permutation_6; 5 | use crate::consts::*; 6 | 7 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 8 | pub enum RegisterNameX86_64 { 9 | Rbx, 10 | R12, 11 | R13, 12 | R14, 13 | R15, 14 | Rbp, 15 | } 16 | 17 | impl RegisterNameX86_64 { 18 | pub fn parse(n: u8) -> Option { 19 | match n { 20 | 1 => Some(RegisterNameX86_64::Rbx), 21 | 2 => Some(RegisterNameX86_64::R12), 22 | 3 => Some(RegisterNameX86_64::R13), 23 | 4 => Some(RegisterNameX86_64::R14), 24 | 5 => Some(RegisterNameX86_64::R15), 25 | 6 => Some(RegisterNameX86_64::Rbp), 26 | _ => None, 27 | } 28 | } 29 | 30 | pub fn dwarf_name(&self) -> &'static str { 31 | match self { 32 | RegisterNameX86_64::Rbx => "reg3", 33 | RegisterNameX86_64::R12 => "reg12", 34 | RegisterNameX86_64::R13 => "reg13", 35 | RegisterNameX86_64::R14 => "reg14", 36 | RegisterNameX86_64::R15 => "reg15", 37 | RegisterNameX86_64::Rbp => "reg6", 38 | } 39 | } 40 | } 41 | 42 | #[derive(Clone, Debug, PartialEq, Eq)] 43 | pub enum OpcodeX86_64 { 44 | Null, 45 | FrameBased { 46 | stack_offset_in_bytes: u16, 47 | saved_regs: [Option; 5], 48 | }, 49 | FramelessImmediate { 50 | stack_size_in_bytes: u16, 51 | saved_regs: [Option; 6], 52 | }, 53 | FramelessIndirect { 54 | /// Offset from the start of the function into the middle of a `sub` 55 | /// instruction, pointing right at the instruction's "immediate" which 56 | /// is a u32 value with the offset we need. (NOTE: not divided by anything!) 57 | /// Example: 58 | /// - function_start is 0x1c20 59 | /// - immediate_offset_from_function_start is 13 (= 0xd), 60 | /// - there's sub instruction at 0x1c2a: sub rsp, 0xc28. 61 | /// 62 | /// This instruction is encoded as 48 81 EC 28 0C 00 00, with the 28 63 | /// byte at 0x1c2d (= 0x1c20 + 13). The immediate is 28 0C 00 00, 64 | /// interpreted as a little-endian u32: 0xc28. 65 | immediate_offset_from_function_start: u8, 66 | 67 | /// An offset to add to the loaded stack size. 68 | /// This allows the stack size to differ slightly from the `sub`, to 69 | /// compensate for any function prologue that pushes a bunch of 70 | /// pointer-sized registers. This adjust value includes the return 71 | /// address on the stack. For example, if the function begins with six push 72 | /// instructions, followed by a sub instruction, then stack_adjust_in_bytes 73 | /// is 56: 8 bytes for the return address + 6 * 8 for each pushed register. 74 | stack_adjust_in_bytes: u8, 75 | 76 | /// The registers, in the order that they need to be popped in when 77 | /// returning / unwinding from this function. (Reverse order from 78 | /// function prologue!) 79 | /// Can have leading `None`s. 80 | saved_regs: [Option; 6], 81 | }, 82 | Dwarf { 83 | eh_frame_fde: u32, 84 | }, 85 | InvalidFrameless, 86 | UnrecognizedKind(u8), 87 | } 88 | 89 | impl OpcodeX86_64 { 90 | pub fn parse(opcode: u32) -> Self { 91 | match OpcodeBitfield::new(opcode).kind() { 92 | OPCODE_KIND_NULL => OpcodeX86_64::Null, 93 | OPCODE_KIND_X86_FRAMEBASED => OpcodeX86_64::FrameBased { 94 | stack_offset_in_bytes: (((opcode >> 16) & 0xff) as u16) * 8, 95 | saved_regs: [ 96 | RegisterNameX86_64::parse(((opcode >> 12) & 0b111) as u8), 97 | RegisterNameX86_64::parse(((opcode >> 9) & 0b111) as u8), 98 | RegisterNameX86_64::parse(((opcode >> 6) & 0b111) as u8), 99 | RegisterNameX86_64::parse(((opcode >> 3) & 0b111) as u8), 100 | RegisterNameX86_64::parse((opcode & 0b111) as u8), 101 | ], 102 | }, 103 | OPCODE_KIND_X86_FRAMELESS_IMMEDIATE => { 104 | let stack_size_in_bytes = (((opcode >> 16) & 0xff) as u16) * 8; 105 | let register_count = (opcode >> 10) & 0b111; 106 | let register_permutation = opcode & 0b11_1111_1111; 107 | let saved_registers = 108 | match decode_permutation_6(register_count, register_permutation) { 109 | Ok(regs) => regs, 110 | Err(_) => return OpcodeX86_64::InvalidFrameless, 111 | }; 112 | OpcodeX86_64::FramelessImmediate { 113 | stack_size_in_bytes, 114 | saved_regs: [ 115 | RegisterNameX86_64::parse(saved_registers[0]), 116 | RegisterNameX86_64::parse(saved_registers[1]), 117 | RegisterNameX86_64::parse(saved_registers[2]), 118 | RegisterNameX86_64::parse(saved_registers[3]), 119 | RegisterNameX86_64::parse(saved_registers[4]), 120 | RegisterNameX86_64::parse(saved_registers[5]), 121 | ], 122 | } 123 | } 124 | OPCODE_KIND_X86_FRAMELESS_INDIRECT => { 125 | let immediate_offset_from_function_start = (opcode >> 16) as u8; 126 | let stack_adjust_in_bytes = ((opcode >> 13) & 0b111) as u8 * 8; 127 | let register_count = (opcode >> 10) & 0b111; 128 | let register_permutation = opcode & 0b11_1111_1111; 129 | let saved_registers = 130 | match decode_permutation_6(register_count, register_permutation) { 131 | Ok(regs) => regs, 132 | Err(_) => return OpcodeX86_64::InvalidFrameless, 133 | }; 134 | OpcodeX86_64::FramelessIndirect { 135 | immediate_offset_from_function_start, 136 | stack_adjust_in_bytes, 137 | saved_regs: [ 138 | RegisterNameX86_64::parse(saved_registers[0]), 139 | RegisterNameX86_64::parse(saved_registers[1]), 140 | RegisterNameX86_64::parse(saved_registers[2]), 141 | RegisterNameX86_64::parse(saved_registers[3]), 142 | RegisterNameX86_64::parse(saved_registers[4]), 143 | RegisterNameX86_64::parse(saved_registers[5]), 144 | ], 145 | } 146 | } 147 | OPCODE_KIND_X86_DWARF => OpcodeX86_64::Dwarf { 148 | eh_frame_fde: (opcode & 0xffffff), 149 | }, 150 | kind => OpcodeX86_64::UnrecognizedKind(kind), 151 | } 152 | } 153 | } 154 | 155 | impl Display for OpcodeX86_64 { 156 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 157 | match self { 158 | OpcodeX86_64::Null => { 159 | write!(f, "(uncovered)")?; 160 | } 161 | OpcodeX86_64::FrameBased { 162 | stack_offset_in_bytes, 163 | saved_regs, 164 | } => { 165 | // rbp was set to rsp before the saved registers were pushed. 166 | // The first pushed register is at rbp - 8 (== CFA - 24), the last at rbp - stack_offset_in_bytes. 167 | write!(f, "CFA=reg6+16: reg6=[CFA-16], reg16=[CFA-8]")?; 168 | let max_count = (*stack_offset_in_bytes / 8) as usize; 169 | let mut offset = *stack_offset_in_bytes + 16; // + 2 for rbp, return address 170 | for reg in saved_regs.iter().rev().take(max_count) { 171 | if let Some(reg) = reg { 172 | write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; 173 | } 174 | offset -= 8; 175 | } 176 | } 177 | OpcodeX86_64::FramelessImmediate { 178 | stack_size_in_bytes, 179 | saved_regs, 180 | } => { 181 | if *stack_size_in_bytes == 0 { 182 | write!(f, "CFA=reg7:",)?; 183 | } else { 184 | write!(f, "CFA=reg7+{}:", *stack_size_in_bytes)?; 185 | } 186 | write!(f, " reg16=[CFA-8]")?; 187 | let mut offset = 2 * 8; 188 | for reg in saved_regs.iter().rev().flatten() { 189 | write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; 190 | offset += 8; 191 | } 192 | } 193 | OpcodeX86_64::FramelessIndirect { 194 | immediate_offset_from_function_start, 195 | stack_adjust_in_bytes, 196 | saved_regs, 197 | } => { 198 | write!( 199 | f, 200 | "CFA=[function_start+{}]+{}", 201 | immediate_offset_from_function_start, stack_adjust_in_bytes 202 | )?; 203 | write!(f, " reg16=[CFA-8]")?; 204 | let mut offset = 2 * 8; 205 | for reg in saved_regs.iter().rev().flatten() { 206 | write!(f, ", {}=[CFA-{}]", reg.dwarf_name(), offset)?; 207 | offset += 8; 208 | } 209 | } 210 | OpcodeX86_64::Dwarf { eh_frame_fde } => { 211 | write!(f, "(check eh_frame FDE 0x{:x})", eh_frame_fde)?; 212 | } 213 | OpcodeX86_64::InvalidFrameless => { 214 | write!( 215 | f, 216 | "!! frameless immediate or indirect with invalid permutation encoding" 217 | )?; 218 | } 219 | OpcodeX86_64::UnrecognizedKind(kind) => { 220 | write!(f, "!! Unrecognized kind {}", kind)?; 221 | } 222 | } 223 | Ok(()) 224 | } 225 | } 226 | 227 | #[cfg(test)] 228 | mod test { 229 | use super::*; 230 | 231 | #[test] 232 | fn test_frameless_indirect() { 233 | use RegisterNameX86_64::*; 234 | assert_eq!( 235 | OpcodeX86_64::parse(0x30df800), 236 | OpcodeX86_64::FramelessIndirect { 237 | immediate_offset_from_function_start: 13, 238 | stack_adjust_in_bytes: 56, 239 | saved_regs: [ 240 | Some(Rbx), 241 | Some(R12), 242 | Some(R13), 243 | Some(R14), 244 | Some(R15), 245 | Some(Rbp) 246 | ] 247 | } 248 | ) 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /src/raw/compressed_function.rs: -------------------------------------------------------------------------------- 1 | use crate::num_display::HexNum; 2 | use std::fmt::Debug; 3 | 4 | /// Allows accessing the two packed values from a "compressed" function entry. 5 | #[derive(Clone, Copy, PartialEq, Eq)] 6 | pub struct CompressedFunctionEntry(pub u32); 7 | 8 | /// Entries are a u32 that contains two packed values (from high to low): 9 | /// * 8 bits: opcode index 10 | /// * 24 bits: function address 11 | impl CompressedFunctionEntry { 12 | /// Wrap the u32. 13 | pub fn new(value: u32) -> Self { 14 | Self(value) 15 | } 16 | 17 | /// The opcode index. 18 | /// * 0..global_opcodes_len => index into global palette 19 | /// * global_opcodes_len..255 => index into local palette 20 | /// (subtract global_opcodes_len to get the real local index) 21 | pub fn opcode_index(&self) -> u8 { 22 | (self.0 >> 24) as u8 23 | } 24 | 25 | /// The function address, relative to the page's first_address. 26 | pub fn relative_address(&self) -> u32 { 27 | self.0 & 0xffffff 28 | } 29 | } 30 | 31 | impl From for CompressedFunctionEntry { 32 | fn from(entry: u32) -> CompressedFunctionEntry { 33 | CompressedFunctionEntry::new(entry) 34 | } 35 | } 36 | 37 | impl Debug for CompressedFunctionEntry { 38 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 39 | f.debug_struct("CompressedFunctionEntry") 40 | .field("opcode_index", &HexNum(self.opcode_index())) 41 | .field("relative_address", &HexNum(self.relative_address())) 42 | .finish() 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/raw/consts.rs: -------------------------------------------------------------------------------- 1 | pub const PAGE_KIND_SENTINEL: u32 = 1; // used in the last page, whose first_address is the end address 2 | pub const PAGE_KIND_REGULAR: u32 = 2; 3 | pub const PAGE_KIND_COMPRESSED: u32 = 3; 4 | 5 | pub const OPCODE_KIND_NULL: u8 = 0; 6 | 7 | pub const OPCODE_KIND_X86_FRAMEBASED: u8 = 1; 8 | pub const OPCODE_KIND_X86_FRAMELESS_IMMEDIATE: u8 = 2; 9 | pub const OPCODE_KIND_X86_FRAMELESS_INDIRECT: u8 = 3; 10 | pub const OPCODE_KIND_X86_DWARF: u8 = 4; 11 | 12 | pub const OPCODE_KIND_ARM64_FRAMELESS: u8 = 2; 13 | pub const OPCODE_KIND_ARM64_DWARF: u8 = 3; 14 | pub const OPCODE_KIND_ARM64_FRAMEBASED: u8 = 4; 15 | -------------------------------------------------------------------------------- /src/raw/format.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use zerocopy_derive::*; 3 | 4 | use super::unaligned::{U16, U32}; 5 | 6 | // Written with help from https://gankra.github.io/blah/compact-unwinding/ 7 | 8 | /// The `__unwind_info` header. 9 | #[derive(Unaligned, FromBytes, KnownLayout, Immutable, Debug, Clone, Copy)] 10 | #[repr(C)] 11 | pub struct CompactUnwindInfoHeader { 12 | /// The version. Only version 1 is currently defined 13 | pub version: U32, 14 | 15 | /// The array of U32 global opcodes (offset relative to start of root page). 16 | /// 17 | /// These may be indexed by "compressed" second-level pages. 18 | pub global_opcodes_offset: U32, 19 | pub global_opcodes_len: U32, 20 | 21 | /// The array of U32 global personality codes (offset relative to start of root page). 22 | /// 23 | /// Personalities define the style of unwinding that an unwinder should use, 24 | /// and how to interpret the LSDA functions for a function (see below). 25 | pub personalities_offset: U32, 26 | pub personalities_len: U32, 27 | 28 | /// The array of [`PageEntry`]'s describing the second-level pages 29 | /// (offset relative to start of root page). 30 | pub pages_offset: U32, 31 | pub pages_len: U32, 32 | // After this point there are several dynamically-sized arrays whose precise 33 | // order and positioning don't matter, because they are all accessed using 34 | // offsets like the ones above. The arrays are: 35 | 36 | // global_opcodes: [u32; global_opcodes_len], 37 | // personalities: [u32; personalities_len], 38 | // pages: [PageEntry; pages_len], 39 | // lsdas: [LsdaEntry; unknown_len], 40 | } 41 | 42 | /// One element of the array of pages. 43 | #[derive(Unaligned, FromBytes, KnownLayout, Immutable, Clone, Copy)] 44 | #[repr(C)] 45 | pub struct PageEntry { 46 | /// The first address mapped by this page. 47 | /// 48 | /// This is useful for binary-searching for the page that can map 49 | /// a specific address in the binary (the primary kind of lookup 50 | /// performed by an unwinder). 51 | pub first_address: U32, 52 | 53 | /// Offset of the second-level page. 54 | /// 55 | /// This may point to either a [`RegularPage`] or a [`CompressedPage`]. 56 | /// Which it is can be determined by the 32-bit "kind" value that is at 57 | /// the start of both layouts. 58 | pub page_offset: U32, 59 | 60 | /// Base offset into the lsdas array that functions in this page will be 61 | /// relative to. 62 | pub lsda_index_offset: U32, 63 | } 64 | 65 | /// A non-compressed page. 66 | #[derive(Unaligned, FromBytes, KnownLayout, Immutable, Debug, Clone, Copy)] 67 | #[repr(C)] 68 | pub struct RegularPage { 69 | /// Always 2 (use to distinguish from CompressedPage). 70 | pub kind: U32, 71 | 72 | /// The Array of [`RegularFunctionEntry`]'s (offset relative to **start of this page**). 73 | pub functions_offset: U16, 74 | pub functions_len: U16, 75 | } 76 | 77 | /// A "compressed" page. 78 | #[derive(Unaligned, FromBytes, KnownLayout, Immutable, Debug, Clone, Copy)] 79 | #[repr(C)] 80 | pub struct CompressedPage { 81 | /// Always 3 (use to distinguish from RegularPage). 82 | pub kind: U32, 83 | 84 | /// The array of compressed u32 function entries (offset relative to **start of this page**). 85 | /// 86 | /// Entries are a u32 that contains two packed values (from highest to lowest bits): 87 | /// * 8 bits: opcode index 88 | /// * 0..global_opcodes_len => index into global palette 89 | /// * global_opcodes_len..255 => index into local palette (subtract global_opcodes_len) 90 | /// * 24 bits: instruction address 91 | /// * address is relative to this page's first_address! 92 | pub functions_offset: U16, 93 | pub functions_len: U16, 94 | 95 | /// The array of u32 local opcodes for this page (offset relative to **start of this page**). 96 | pub local_opcodes_offset: U16, 97 | pub local_opcodes_len: U16, 98 | } 99 | 100 | /// An opcode. 101 | #[derive(Unaligned, FromBytes, KnownLayout, Immutable, Debug, Clone, Copy)] 102 | #[repr(C)] 103 | pub struct Opcode(pub U32); 104 | 105 | /// A function entry from a non-compressed page. 106 | #[derive(Unaligned, FromBytes, KnownLayout, Immutable, Debug, Clone, Copy)] 107 | #[repr(C)] 108 | pub struct RegularFunctionEntry { 109 | /// The address in the binary for this function entry (absolute). 110 | pub address: U32, 111 | 112 | /// The opcode for this address. 113 | pub opcode: Opcode, 114 | } 115 | -------------------------------------------------------------------------------- /src/raw/impls.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use super::format::{ 4 | CompactUnwindInfoHeader, CompressedPage, Opcode, PageEntry, RegularFunctionEntry, RegularPage, 5 | }; 6 | use super::unaligned::U32; 7 | use crate::error::ReadError; 8 | use crate::num_display::HexNum; 9 | use crate::reader::Reader; 10 | 11 | type Result = std::result::Result; 12 | 13 | impl CompactUnwindInfoHeader { 14 | pub fn parse(data: &[u8]) -> Result<&Self> { 15 | data.read_at::(0) 16 | .ok_or(ReadError::Header) 17 | } 18 | 19 | pub fn global_opcodes_offset(&self) -> u32 { 20 | self.global_opcodes_offset.into() 21 | } 22 | 23 | pub fn global_opcodes_len(&self) -> u32 { 24 | self.global_opcodes_len.into() 25 | } 26 | 27 | pub fn pages_offset(&self) -> u32 { 28 | self.pages_offset.into() 29 | } 30 | 31 | pub fn pages_len(&self) -> u32 { 32 | self.pages_len.into() 33 | } 34 | 35 | /// Return the list of global opcodes. 36 | pub fn global_opcodes<'data>(&self, data: &'data [u8]) -> Result<&'data [Opcode]> { 37 | data.read_slice_at::( 38 | self.global_opcodes_offset().into(), 39 | self.global_opcodes_len() as usize, 40 | ) 41 | .ok_or(ReadError::GlobalOpcodes) 42 | } 43 | 44 | /// Return the list of pages. 45 | pub fn pages<'data>(&self, data: &'data [u8]) -> Result<&'data [PageEntry]> { 46 | data.read_slice_at::(self.pages_offset().into(), self.pages_len() as usize) 47 | .ok_or(ReadError::Pages) 48 | } 49 | } 50 | 51 | impl RegularPage { 52 | pub fn parse(data: &[u8], page_offset: u64) -> Result<&Self> { 53 | data.read_at::(page_offset) 54 | .ok_or(ReadError::RegularPage) 55 | } 56 | 57 | pub fn functions_offset(&self) -> u16 { 58 | self.functions_offset.into() 59 | } 60 | 61 | pub fn functions_len(&self) -> u16 { 62 | self.functions_len.into() 63 | } 64 | 65 | pub fn functions<'data>( 66 | &self, 67 | data: &'data [u8], 68 | page_offset: u32, 69 | ) -> Result<&'data [RegularFunctionEntry]> { 70 | let relative_functions_offset = self.functions_offset(); 71 | let functions_len: usize = self.functions_len().into(); 72 | let functions_offset = page_offset as u64 + relative_functions_offset as u64; 73 | data.read_slice_at::(functions_offset, functions_len) 74 | .ok_or(ReadError::RegularPageFunctions) 75 | } 76 | } 77 | 78 | impl CompressedPage { 79 | pub fn parse(data: &[u8], page_offset: u64) -> Result<&Self> { 80 | data.read_at::(page_offset) 81 | .ok_or(ReadError::CompressedPage) 82 | } 83 | 84 | pub fn functions_offset(&self) -> u16 { 85 | self.functions_offset.into() 86 | } 87 | 88 | pub fn functions_len(&self) -> u16 { 89 | self.functions_len.into() 90 | } 91 | 92 | pub fn local_opcodes_offset(&self) -> u16 { 93 | self.local_opcodes_offset.into() 94 | } 95 | 96 | pub fn local_opcodes_len(&self) -> u16 { 97 | self.local_opcodes_len.into() 98 | } 99 | 100 | pub fn functions<'data>(&self, data: &'data [u8], page_offset: u32) -> Result<&'data [U32]> { 101 | let relative_functions_offset = self.functions_offset(); 102 | let functions_len: usize = self.functions_len().into(); 103 | let functions_offset = page_offset as u64 + relative_functions_offset as u64; 104 | data.read_slice_at::(functions_offset, functions_len) 105 | .ok_or(ReadError::CompressedPageFunctions) 106 | } 107 | 108 | /// Return the list of local opcodes. 109 | pub fn local_opcodes<'data>( 110 | &self, 111 | data: &'data [u8], 112 | page_offset: u32, 113 | ) -> Result<&'data [Opcode]> { 114 | let relative_local_opcodes_offset = self.local_opcodes_offset(); 115 | let local_opcodes_len: usize = self.local_opcodes_len().into(); 116 | let local_opcodes_offset = page_offset as u64 + relative_local_opcodes_offset as u64; 117 | data.read_slice_at::(local_opcodes_offset, local_opcodes_len) 118 | .ok_or(ReadError::LocalOpcodes) 119 | } 120 | } 121 | 122 | impl Opcode { 123 | pub fn opcode(&self) -> u32 { 124 | self.0.into() 125 | } 126 | } 127 | 128 | impl RegularFunctionEntry { 129 | pub fn address(&self) -> u32 { 130 | self.address.into() 131 | } 132 | 133 | pub fn opcode(&self) -> u32 { 134 | self.opcode.opcode() 135 | } 136 | } 137 | 138 | impl PageEntry { 139 | pub fn page_offset(&self) -> u32 { 140 | self.page_offset.into() 141 | } 142 | 143 | pub fn first_address(&self) -> u32 { 144 | self.first_address.into() 145 | } 146 | 147 | pub fn lsda_index_offset(&self) -> u32 { 148 | self.lsda_index_offset.into() 149 | } 150 | 151 | pub fn page_kind(&self, data: &[u8]) -> Result { 152 | let kind = *data 153 | .read_at::(self.page_offset().into()) 154 | .ok_or(ReadError::PageKind)?; 155 | Ok(kind.into()) 156 | } 157 | } 158 | 159 | impl Debug for PageEntry { 160 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 161 | f.debug_struct("PageEntry") 162 | .field("first_address", &HexNum(self.first_address())) 163 | .field("page_offset", &HexNum(self.page_offset())) 164 | .field("lsda_index_offset", &HexNum(self.lsda_index_offset())) 165 | .finish() 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/raw/mod.rs: -------------------------------------------------------------------------------- 1 | mod compressed_function; 2 | pub mod consts; 3 | mod format; 4 | mod impls; 5 | mod unaligned; 6 | 7 | pub use compressed_function::*; 8 | pub use format::*; 9 | pub use unaligned::*; 10 | -------------------------------------------------------------------------------- /src/raw/unaligned.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use zerocopy_derive::*; 4 | 5 | /// An unaligned little-endian `u32` value. 6 | #[derive( 7 | Unaligned, 8 | FromBytes, 9 | KnownLayout, 10 | Immutable, 11 | Default, 12 | Copy, 13 | Clone, 14 | PartialEq, 15 | Eq, 16 | PartialOrd, 17 | Ord, 18 | Hash, 19 | )] 20 | #[repr(transparent)] 21 | pub struct U32([u8; 4]); 22 | 23 | impl From for U32 { 24 | fn from(n: u32) -> Self { 25 | U32(n.to_le_bytes()) 26 | } 27 | } 28 | 29 | impl From for u32 { 30 | fn from(n: U32) -> Self { 31 | u32::from_le_bytes(n.0) 32 | } 33 | } 34 | 35 | impl Debug for U32 { 36 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 37 | u32::fmt(&(*self).into(), f) 38 | } 39 | } 40 | 41 | /// An unaligned little-endian `u16` value. 42 | #[derive( 43 | Unaligned, 44 | FromBytes, 45 | KnownLayout, 46 | Immutable, 47 | Default, 48 | Copy, 49 | Clone, 50 | PartialEq, 51 | Eq, 52 | PartialOrd, 53 | Ord, 54 | Hash, 55 | )] 56 | #[repr(transparent)] 57 | pub struct U16([u8; 2]); 58 | 59 | impl From for U16 { 60 | fn from(n: u16) -> Self { 61 | U16(n.to_le_bytes()) 62 | } 63 | } 64 | 65 | impl From for u16 { 66 | fn from(n: U16) -> Self { 67 | u16::from_le_bytes(n.0) 68 | } 69 | } 70 | 71 | impl Debug for U16 { 72 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 73 | u16::fmt(&(*self).into(), f) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/reader.rs: -------------------------------------------------------------------------------- 1 | use zerocopy::{FromBytes, Immutable, KnownLayout, Ref}; 2 | 3 | pub trait Reader { 4 | fn read_at(&self, offset: u64) -> Option<&T> 5 | where 6 | T: FromBytes + KnownLayout + Immutable; 7 | fn read_slice_at(&self, offset: u64, len: usize) -> Option<&[T]> 8 | where 9 | T: FromBytes + KnownLayout + Immutable; 10 | } 11 | 12 | impl Reader for [u8] { 13 | fn read_at(&self, offset: u64) -> Option<&T> 14 | where 15 | T: FromBytes + KnownLayout + Immutable, 16 | { 17 | let offset: usize = offset.try_into().ok()?; 18 | let end: usize = offset.checked_add(core::mem::size_of::())?; 19 | let lv = Ref::<&[u8], T>::from_bytes(self.get(offset..end)?).ok()?; 20 | Some(Ref::into_ref(lv)) 21 | } 22 | 23 | fn read_slice_at(&self, offset: u64, len: usize) -> Option<&[T]> 24 | where 25 | T: FromBytes + KnownLayout + Immutable, 26 | { 27 | let offset: usize = offset.try_into().ok()?; 28 | let end: usize = offset.checked_add(core::mem::size_of::().checked_mul(len)?)?; 29 | let lv = Ref::<&[u8], [T]>::from_bytes(self.get(offset..end)?).ok()?; 30 | Some(Ref::into_ref(lv)) 31 | } 32 | } 33 | --------------------------------------------------------------------------------