├── .gitignore ├── .rustfmt.toml ├── Cargo.toml ├── LICENSE ├── README.mkdn └── src ├── bin ├── just-parse.rs └── tysh.rs ├── dwarf_parser.rs ├── lib.rs ├── load.rs ├── model.rs ├── unify.rs └── value.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 80 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "debugdb" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | gimli = "0.26.1" 10 | fallible-iterator = "0.2.0" 11 | object = "0.26.0" 12 | indexmap = "1.7" 13 | scroll = "0.10" 14 | rustyline = "11.0" 15 | ansi_term = "0.12.1" 16 | anyhow = { version = "1.0.68", features = ["backtrace"] } 17 | rangemap = "1.3.0" 18 | clap = { version = "4.3.5", features = ["derive"] } 19 | thiserror = "1.0.40" 20 | regex = "1.8.4" 21 | parse_int = "0.6.0" 22 | 23 | [profile.release] 24 | debug = true 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /README.mkdn: -------------------------------------------------------------------------------- 1 | # debugdb: a program analysis crate 2 | 3 | This is an attempt at a general debug information loading and analysis crate. It 4 | can: 5 | 6 | - Read the DWARF information for a subset of programs (primarily focused on Rust 7 | but with some C support). 8 | - Answer questions about it. 9 | - Provide basic reflection of data structures within a program memory image, 10 | e.g. load a struct into a map-like type given the ID of a type in the program. 11 | 12 | For a somewhat hacked-up but detailed example of what you can do with this 13 | crate, see `src/bin/tysh.rs`, which provides an interactive shell for answering 14 | questions about types in a program. Wondering about the concrete memory layout 15 | of a Rust enum in your program? `tysh` will literally draw you a picture of it. 16 | 17 | # Focus 18 | 19 | This is mostly focused on embedded Rust programs, specifically those built with 20 | release settings and LTO (because doing so dramatically simplifies the debug 21 | info). It has limited support for 22 | 23 | - Unix programs, 24 | - C programs, and 25 | - Type unification across compile units for programs not built with LTO. 26 | 27 | # Hack alert 28 | 29 | Significant portions of this crate were written after consuming too much 30 | caffeine and immersing myself in the DWARF spec, which is a document that tends 31 | to warp your mind. This code could use a good rewritin' and refactorin', but 32 | unfortunately it works pretty well, which has reduced my motivation to do so. 33 | Wanna help? 34 | 35 | # This crate attempts to be strict 36 | 37 | This crate may crash when you feed it a program. Believe it or not, this is 38 | deliberate! In my experience a lot of DWARF loaders will silently skip over 39 | information they don't understand, which can cause them to miss important 40 | portions of the DWARF spec or misinterpret certain programs. 41 | 42 | This crate attempts to make a one-sided error in the other direction: 43 | 44 | - If it can load a program, its interpretation is probably correct. (If not, 45 | report a bug please!) 46 | - If it hits a DWARF feature, or use of a DWARF feature, that it doesn't either 47 | explicitly support or explicitly ignore, it will reject the program. 48 | 49 | This can be kind of annoying, particularly since compilers don't bother to 50 | version their debug info, so small changes can require updates to this crate. 51 | However, it also means this crate gets a bunch of things _right_ that I've seen 52 | others get wrong. 53 | 54 | That's not to suggest the crate is perfect. This crate almost certainly contains 55 | bugs, because the DWARF spec is huge and ambiguous, and compilers are 56 | inconsistent about whether they actually follow it. But it means the bugs fall 57 | into two categories: 58 | 59 | - If a program loads, and the interpretation is wrong, there's a serious and 60 | genuine bug in the crate's logic. 61 | 62 | - If a program does not load, extending this crate to cover it is more of a 63 | feature request than a bug. Which is not to suggest it's not important -- just 64 | that the sort of work required will be different. 65 | -------------------------------------------------------------------------------- /src/bin/just-parse.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use clap::Parser; 3 | 4 | #[derive(Debug, Parser)] 5 | struct Sketch { 6 | filename: std::path::PathBuf, 7 | } 8 | 9 | fn main() -> Result<()> { 10 | let args = Sketch::parse(); 11 | 12 | let buffer = std::fs::read(args.filename)?; 13 | let object = object::File::parse(&*buffer)?; 14 | 15 | debugdb::parse_file(&object)?; 16 | 17 | Ok(()) 18 | } 19 | -------------------------------------------------------------------------------- /src/bin/tysh.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Display, io::BufRead}; 2 | 3 | use anyhow::Result; 4 | use clap::Parser; 5 | use debugdb::value::ValueWithDb; 6 | use object::{Object, ObjectSegment}; 7 | use rangemap::{RangeMap, RangeInclusiveMap}; 8 | 9 | use debugdb::{Type, Encoding, TypeId, Struct, Member, DebugDb, Enum, VariantShape, value::Value}; 10 | use debugdb::load::{Load, ImgMachine}; 11 | use regex::Regex; 12 | 13 | #[derive(Debug, Parser)] 14 | struct TySh { 15 | filename: std::path::PathBuf, 16 | } 17 | 18 | fn main() -> Result<()> { 19 | let args = TySh::parse(); 20 | 21 | let buffer = std::fs::read(args.filename)?; 22 | let object = object::File::parse(&*buffer)?; 23 | let mut segments = RangeInclusiveMap::new(); 24 | for seg in object.segments() { 25 | if seg.size() == 0 { 26 | continue; 27 | } 28 | segments.insert( 29 | seg.address()..=seg.address() + (seg.size() - 1), 30 | seg.data()?.to_vec(), 31 | ); 32 | } 33 | let everything = debugdb::parse_file(&object)?; 34 | 35 | println!("Loaded; {} types found in program.", everything.type_count()); 36 | println!("To quit: ^D or exit"); 37 | 38 | let mut rl = rustyline::Editor::<(), _>::new()?; 39 | let prompt = ansi_term::Colour::Green.paint(">> ").to_string(); 40 | let mut ctx = Ctx { segments }; 41 | 'lineloop: 42 | loop { 43 | match rl.readline(&prompt) { 44 | Ok(line) => { 45 | let line = line.trim(); 46 | let (cmd, rest) = line.split_once(char::is_whitespace) 47 | .unwrap_or((line, "")); 48 | if line.is_empty() { 49 | continue 'lineloop; 50 | } 51 | 52 | rl.add_history_entry(line)?; 53 | 54 | match cmd { 55 | "exit" => break, 56 | "help" => { 57 | println!("commands:"); 58 | let name_len = COMMANDS.iter() 59 | .map(|(name, _, _)| name.len()) 60 | .max() 61 | .unwrap_or(12); 62 | for (name, _, desc) in COMMANDS { 63 | println!("{:name_len$} {}", name, desc); 64 | } 65 | } 66 | _ => { 67 | for (name, imp, _) in COMMANDS { 68 | if *name == cmd { 69 | imp(&everything, &mut ctx, rest); 70 | continue 'lineloop; 71 | } 72 | } 73 | println!("unknown command: {}", cmd); 74 | println!("for help, try: help"); 75 | } 76 | } 77 | } 78 | Err(rustyline::error::ReadlineError::Interrupted) => { 79 | println!("^C"); 80 | continue; 81 | } 82 | Err(e) => { 83 | println!("{:?}", e); 84 | break; 85 | } 86 | } 87 | } 88 | 89 | Ok(()) 90 | } 91 | 92 | struct Goff(gimli::UnitSectionOffset); 93 | 94 | impl std::fmt::Display for Goff { 95 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 96 | match self.0 { 97 | gimli::UnitSectionOffset::DebugInfoOffset(gimli::DebugInfoOffset(x)) => { 98 | write!(f, "<.debug_info+0x{:08x}>", x) 99 | } 100 | gimli::UnitSectionOffset::DebugTypesOffset(gimli::DebugTypesOffset(x)) => { 101 | write!(f, "<.debug_types+0x{:08x}>", x) 102 | } 103 | } 104 | } 105 | } 106 | 107 | struct NamedGoff<'a>(&'a debugdb::DebugDb, TypeId); 108 | 109 | impl std::fmt::Display for NamedGoff<'_> { 110 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 111 | let bold = ansi_term::Style::new().bold(); 112 | let dim = ansi_term::Style::new().dimmed(); 113 | 114 | let n = if let Some(name) = self.0.type_name(self.1) { 115 | name 116 | } else { 117 | "".into() 118 | }; 119 | 120 | write!(f, "{}", bold.paint(n))?; 121 | match self.1.0 { 122 | gimli::UnitSectionOffset::DebugInfoOffset(gimli::DebugInfoOffset(x)) => { 123 | write!(f, " {}<.debug_info+0x{:08x}>{}", dim.prefix(), x, dim.suffix()) 124 | } 125 | gimli::UnitSectionOffset::DebugTypesOffset(gimli::DebugTypesOffset(x)) => { 126 | write!(f, " {}<.debug_types+0x{:08x}>{}", dim.prefix(), x, dim.suffix()) 127 | } 128 | } 129 | } 130 | } 131 | 132 | struct Ctx { 133 | segments: RangeInclusiveMap>, 134 | } 135 | 136 | type Command = fn(&debugdb::DebugDb, &mut Ctx, &str); 137 | 138 | static COMMANDS: &[(&str, Command, &str)] = &[ 139 | ("list", cmd_list, "print names of ALL types, or types containing a string"), 140 | ("info", cmd_info, "print a summary of a type"), 141 | ("load", cmd_load, "loads additional segment data"), 142 | ("def", cmd_def, "print a type as a pseudo-Rust definition"), 143 | ("sizeof", cmd_sizeof, "print size of type in bytes"), 144 | ("alignof", cmd_alignof, "print alignment of type in bytes"), 145 | ("addr", cmd_addr, "look up information about an address"), 146 | ("addr2line", cmd_addr2line, "look up line number information"), 147 | ("addr2stack", cmd_addr2stack, "display inlined stack frames"), 148 | ("vars", cmd_vars, "list static variables"), 149 | ("var", cmd_var, "get info on a static variable"), 150 | ("unwind", cmd_unwind, "get unwind info for an address"), 151 | ("decode", cmd_decode, "interpret RAM/ROM as a type"), 152 | ("decode-async", cmd_decode_async, "interpret RAM/ROM as a suspended future"), 153 | ("decode-blob", cmd_decode_blob, "attempt to interpret bytes as a type"), 154 | ("decode-async-blob", cmd_decode_async_blob, "attempt to interpret bytes as a suspended future"), 155 | ]; 156 | 157 | fn cmd_list( 158 | db: &debugdb::DebugDb, 159 | _ctx: &mut Ctx, 160 | args: &str, 161 | ) { 162 | // We're gonna make a copy to sort it, because alphabetical order seems 163 | // polite. 164 | let mut types_copy = db.canonical_types() 165 | .filter(|(goff, _ty)| { 166 | if !args.is_empty() { 167 | if let Some(name) = db.type_name(*goff) { 168 | return name.contains(args); 169 | } else { 170 | return false; 171 | } 172 | } 173 | true 174 | }) 175 | .collect::>(); 176 | 177 | types_copy.sort_by_key(|(goff, _ty)| db.type_name(*goff)); 178 | 179 | for (goff, ty) in types_copy { 180 | let kind = match ty { 181 | Type::Base(_) => "base", 182 | Type::Struct(_) => "struct", 183 | Type::Enum(_) => "enum", 184 | Type::CEnum(_) => "c-enum", 185 | Type::Array(_) => "array", 186 | Type::Pointer(_) => "ptr", 187 | Type::Union(_) => "union", 188 | Type::Subroutine(_) => "subr", 189 | Type::Unresolved(_) => "missing", 190 | }; 191 | 192 | let aliases = db.aliases_of_type(goff); 193 | if let Some(aliases) = aliases { 194 | println!("{:6} {} ({} aliases)", kind, NamedGoff(db, goff), aliases.len()); 195 | } else { 196 | println!("{:6} {}", kind, NamedGoff(db, goff)); 197 | } 198 | } 199 | } 200 | 201 | fn parse_type_name(s: &str) -> Option> { 202 | if s.starts_with("<.debug_") && s.ends_with('>') { 203 | // Try parsing as a debug section reference. 204 | let rest = &s[8..]; 205 | return if rest.starts_with("info+0x") { 206 | let num = &rest[7..rest.len() - 1]; 207 | if let Ok(n) = usize::from_str_radix(num, 16) { 208 | Some(ParsedTypeName::Goff(TypeId(gimli::DebugInfoOffset(n).into()))) 209 | } else { 210 | println!("can't parse {} as hex", num); 211 | None 212 | } 213 | } else if rest.starts_with("types+0x") { 214 | let num = &rest[8..rest.len() - 1]; 215 | if let Ok(n) = usize::from_str_radix(num, 16) { 216 | Some(ParsedTypeName::Goff(TypeId(gimli::DebugTypesOffset(n).into()))) 217 | } else { 218 | println!("can't parse {} as hex", num); 219 | None 220 | } 221 | } else { 222 | println!("bad offset reference: {}", s); 223 | None 224 | }; 225 | } 226 | 227 | Some(ParsedTypeName::Name(s)) 228 | } 229 | 230 | enum ParsedTypeName<'a> { 231 | Name(&'a str), 232 | Goff(TypeId), 233 | } 234 | 235 | fn simple_query_cmd( 236 | db: &debugdb::DebugDb, 237 | args: &str, 238 | q: fn(&debugdb::DebugDb, &debugdb::Type), 239 | ) { 240 | let type_name = args.trim(); 241 | let types: Vec<_> = match parse_type_name(type_name) { 242 | None => return, 243 | Some(ParsedTypeName::Name(n)) => { 244 | db.types_by_name(n).collect() 245 | } 246 | Some(ParsedTypeName::Goff(o)) => { 247 | db.type_by_id(o).into_iter() 248 | .map(|t| (o, t)) 249 | .collect() 250 | } 251 | }; 252 | if type_name.starts_with("<.debug_") && type_name.ends_with('>') { 253 | // Try parsing as a debug section reference. 254 | let rest = &type_name[8..]; 255 | if rest.starts_with("info+0x") { 256 | // TODO what was I doing here 257 | } else if rest.starts_with("types+0x") { 258 | // TODO no seriously 259 | } 260 | } 261 | 262 | let many = match types.len() { 263 | 0 => { 264 | println!("{}", ansi_term::Colour::Red.paint("No types found.")); 265 | return; 266 | } 267 | 1 => false, 268 | n => { 269 | println!("{}{} types found with that name:", 270 | ansi_term::Color::Yellow.paint("note: "), 271 | n, 272 | ); 273 | true 274 | } 275 | }; 276 | 277 | for (goff, t) in types { 278 | if many { println!() } 279 | print!("{}: ", NamedGoff(db, goff)); 280 | q(db, t); 281 | } 282 | } 283 | 284 | fn cmd_info(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 285 | simple_query_cmd(db, args, |db, t| { 286 | match t { 287 | Type::Base(s) => { 288 | println!("base type"); 289 | println!("- encoding: {:?}", s.encoding); 290 | println!("- byte size: {}", s.byte_size); 291 | } 292 | Type::Pointer(s) => { 293 | println!("pointer type"); 294 | println!("- points to: {}", NamedGoff(db, s.type_id)); 295 | } 296 | Type::Array(s) => { 297 | println!("array type"); 298 | println!("- element type: {}", NamedGoff(db, s.element_type_id)); 299 | println!("- lower bound: {}", s.lower_bound); 300 | if let Some(n) = s.count { 301 | println!("- count: {}", n); 302 | } else { 303 | println!("- size not given"); 304 | } 305 | } 306 | Type::Struct(s) => { 307 | if s.tuple_like { 308 | println!("struct type (tuple-like)"); 309 | } else { 310 | println!("struct type"); 311 | } 312 | if s.decl_coord.is_useful() { 313 | print!("- declared at: {}", s.decl_coord.file.as_deref().unwrap_or("???")); 314 | if let Some(n) = s.decl_coord.line { 315 | print!(":{n}"); 316 | } else { 317 | print!(":???"); 318 | } 319 | // Be more tolerant of missing column number. 320 | if let Some(n) = s.decl_coord.column { 321 | print!(":{n}"); 322 | } 323 | println!(); 324 | } 325 | if let Some(z) = s.byte_size { 326 | println!("- byte size: {z}"); 327 | } 328 | if let Some(a) = s.alignment { 329 | println!("- alignment: {}", a); 330 | } else { 331 | println!("- not aligned"); 332 | } 333 | if !s.template_type_parameters.is_empty() { 334 | println!("- template type parameters:"); 335 | for ttp in &s.template_type_parameters { 336 | println!(" - {} = {}", ttp.name, NamedGoff(db, ttp.type_id)); 337 | } 338 | } 339 | if !s.members.is_empty() { 340 | println!("- members:"); 341 | for (i, mem) in s.members.iter().enumerate() { 342 | if let Some(name) = &mem.name { 343 | println!(" {i}. {name}: {}", NamedGoff(db, mem.type_id)); 344 | } else { 345 | println!(" - : {}", NamedGoff(db, mem.type_id)); 346 | } 347 | println!(" - offset: {} bytes", mem.location); 348 | if let Some(s) = db.type_by_id(mem.type_id).unwrap().byte_size(db) { 349 | println!(" - size: {} bytes", s); 350 | } 351 | if let Some(a) = mem.alignment { 352 | println!(" - aligned: {} bytes", a); 353 | } 354 | if mem.artificial { 355 | println!(" - artificial"); 356 | } 357 | } 358 | } else { 359 | println!("- no members"); 360 | } 361 | 362 | struct_picture(db, s, db.pointer_size()); 363 | } 364 | Type::Enum(s) => { 365 | println!("enum type"); 366 | if let Some(z) = s.byte_size { 367 | println!("- byte size: {z}"); 368 | } 369 | if let Some(a) = s.alignment { 370 | println!("- alignment: {}", a); 371 | } else { 372 | println!("- not aligned"); 373 | } 374 | if !s.template_type_parameters.is_empty() { 375 | println!("- type parameters:"); 376 | for ttp in &s.template_type_parameters { 377 | println!(" - {} = {}", ttp.name, NamedGoff(db, ttp.type_id)); 378 | } 379 | } 380 | 381 | match &s.shape { 382 | debugdb::VariantShape::Zero => { 383 | println!("- empty (uninhabited) enum"); 384 | } 385 | debugdb::VariantShape::One(v) => { 386 | println!("- single variant enum w/o discriminator"); 387 | println!(" - content type: {}", NamedGoff(db, v.member.type_id)); 388 | println!(" - offset: {} bytes", v.member.location); 389 | if let Some(a) = v.member.alignment { 390 | println!(" - aligned: {} bytes", a); 391 | } 392 | if !v.member.artificial { 393 | println!(" - not artificial, oddly"); 394 | } 395 | } 396 | debugdb::VariantShape::Many { member, variants, .. }=> { 397 | if let Some(dname) = db.type_name(member.type_id) { 398 | println!("- {} variants discriminated by {} at offset {}", variants.len(), dname, member.location); 399 | } else { 400 | println!("- {} variants discriminated by an anonymous type at offset {}", variants.len(), member.location); 401 | } 402 | if !member.artificial { 403 | println!(" - not artificial, oddly"); 404 | } 405 | 406 | // Print explicit values first 407 | for (val, var) in variants { 408 | if let Some(val) = val { 409 | println!("- when discriminator == {}", val); 410 | println!(" - contains type: {}", NamedGoff(db, var.member.type_id)); 411 | println!(" - at offset: {} bytes", var.member.location); 412 | if let Some(a) = var.member.alignment { 413 | println!(" - aligned: {} bytes", a); 414 | } 415 | } 416 | } 417 | // Now, default. 418 | for (val, var) in variants { 419 | if val.is_none() { 420 | println!("- any other discriminator value"); 421 | println!(" - contains type: {}", NamedGoff(db, var.member.type_id)); 422 | println!(" - at offset: {} bytes", var.member.location); 423 | if let Some(a) = var.member.alignment { 424 | println!(" - aligned: {} bytes", a); 425 | } 426 | } 427 | } 428 | } 429 | } 430 | enum_picture(db, s, db.pointer_size()); 431 | } 432 | Type::CEnum(s) => { 433 | println!("C-like enum type"); 434 | println!("- representation: {}", NamedGoff(db, s.repr_type_id)); 435 | println!("- byte size: {}", s.byte_size); 436 | if let Some(a) = s.alignment { 437 | println!("- alignment: {a}"); 438 | } 439 | println!("- {} values defined", s.enumerators.len()); 440 | for e in s.enumerators.values() { 441 | println!(" - {} = 0x{:x}", e.name, e.const_value); 442 | 443 | } 444 | } 445 | Type::Union(s) => { 446 | println!("union type"); 447 | println!("- byte size: {}", s.byte_size); 448 | println!("- alignment: {}", s.alignment); 449 | if !s.template_type_parameters.is_empty() { 450 | println!("- template type parameters:"); 451 | for ttp in &s.template_type_parameters { 452 | println!(" - {} = {}", ttp.name, NamedGoff(db, ttp.type_id)); 453 | } 454 | } 455 | if !s.members.is_empty() { 456 | println!("- members:"); 457 | for mem in &s.members { 458 | if let Some(name) = &mem.name { 459 | println!(" - {}: {}", name, NamedGoff(db, mem.type_id)); 460 | } else { 461 | println!(" - : {}", NamedGoff(db, mem.type_id)); 462 | } 463 | println!(" - offset: {} bytes", mem.location); 464 | if let Some(a) = mem.alignment { 465 | println!(" - aligned: {} bytes", a); 466 | } 467 | if mem.artificial { 468 | println!(" - artificial"); 469 | } 470 | } 471 | } else { 472 | println!("- no members"); 473 | } 474 | } 475 | Type::Subroutine(s) => { 476 | println!("subroutine type"); 477 | if let Some(rt) = s.return_type_id { 478 | println!("- return type: {}", NamedGoff(db, rt)); 479 | } 480 | if !s.formal_parameters.is_empty() { 481 | println!("- formal parameters:"); 482 | for &fp in &s.formal_parameters { 483 | println!(" - {}", NamedGoff(db, fp)); 484 | } 485 | } 486 | } 487 | Type::Unresolved(_) => { 488 | println!("type not found in debug info!"); 489 | } 490 | } 491 | }) 492 | } 493 | 494 | fn cmd_sizeof(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 495 | simple_query_cmd(db, args, |db, t| { 496 | if let Some(sz) = t.byte_size(db) { 497 | println!("{} bytes", sz); 498 | } else { 499 | println!("unsized"); 500 | } 501 | }) 502 | } 503 | 504 | fn cmd_alignof(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 505 | simple_query_cmd(db, args, |db, t| { 506 | if let Some(sz) = t.alignment(db) { 507 | println!("align to {} bytes", sz); 508 | } else { 509 | println!("no alignment information"); 510 | } 511 | }) 512 | } 513 | 514 | fn cmd_def(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 515 | simple_query_cmd(db, args, |db, t| { 516 | println!(); 517 | match t { 518 | Type::Base(s) => { 519 | print!("type _ = "); 520 | match (s.encoding, s.byte_size) { 521 | (_, 0) => print!("()"), 522 | (Encoding::Unsigned, 1) => print!("u8"), 523 | (Encoding::Unsigned, 2) => print!("u16"), 524 | (Encoding::Unsigned, 4) => print!("u32"), 525 | (Encoding::Unsigned, 8) => print!("u64"), 526 | (Encoding::Unsigned, 16) => print!("u128"), 527 | (Encoding::Signed, 1) => print!("i8"), 528 | (Encoding::Signed, 2) => print!("i16"), 529 | (Encoding::Signed, 4) => print!("i32"), 530 | (Encoding::Signed, 8) => print!("i64"), 531 | (Encoding::Signed, 16) => print!("i128"), 532 | (Encoding::Float, 4) => print!("f32"), 533 | (Encoding::Float, 8) => print!("f64"), 534 | (Encoding::Boolean, 1) => print!("bool"), 535 | (Encoding::UnsignedChar, 1) => print!("c_uchar"), 536 | (Encoding::SignedChar, 1) => print!("c_schar"), 537 | (Encoding::UtfChar, 4) => print!("char"), 538 | 539 | (e, s) => print!("Unhandled{:?}{}", e, s), 540 | } 541 | println!(";"); 542 | } 543 | Type::Pointer(_s) => { 544 | print!("type _ = {};", t.name(db)); 545 | } 546 | Type::Array(s) => { 547 | let name = db.type_name(s.element_type_id).unwrap(); 548 | if let Some(n) = s.count { 549 | println!("[{}; {}]", name, n); 550 | } else { 551 | println!("[{}]", name); 552 | } 553 | } 554 | Type::Struct(s) => { 555 | print!("struct {}", s.name); 556 | 557 | if !s.template_type_parameters.is_empty() { 558 | print!("<"); 559 | for ttp in &s.template_type_parameters { 560 | print!("{},", ttp.name); 561 | } 562 | print!(">"); 563 | } 564 | 565 | if s.members.is_empty() { 566 | println!(";"); 567 | } else if s.tuple_like { 568 | println!("("); 569 | for mem in &s.members { 570 | println!(" {},", db.type_name(mem.type_id).unwrap()); 571 | } 572 | println!(");"); 573 | } else { 574 | println!(" {{"); 575 | for mem in &s.members { 576 | if let Some(name) = &mem.name { 577 | println!(" {}: {},", name, db.type_name(mem.type_id).unwrap()); 578 | } else { 579 | println!(" ANON: {},", db.type_name(mem.type_id).unwrap()); 580 | } 581 | } 582 | println!("}}"); 583 | } 584 | } 585 | Type::Enum(s) => { 586 | print!("enum {}", s.name); 587 | if !s.template_type_parameters.is_empty() { 588 | print!("<"); 589 | for ttp in &s.template_type_parameters { 590 | print!("{}", ttp.name); 591 | } 592 | print!(">"); 593 | } 594 | println!(" {{"); 595 | 596 | match &s.shape { 597 | debugdb::VariantShape::Zero => (), 598 | debugdb::VariantShape::One(var) => { 599 | if let Some(name) = &var.member.name { 600 | print!(" {}", name); 601 | } else { 602 | print!(" ANON"); 603 | } 604 | 605 | let mty = db.type_by_id(var.member.type_id) 606 | .unwrap(); 607 | if let Type::Struct(s) = mty { 608 | if !s.members.is_empty() { 609 | if s.tuple_like { 610 | println!("("); 611 | for mem in &s.members { 612 | let mtn = db.type_name(mem.type_id).unwrap(); 613 | println!(" {},", mtn); 614 | } 615 | print!(" )"); 616 | } else { 617 | println!(" {{"); 618 | for mem in &s.members { 619 | let mtn = db.type_name(mem.type_id).unwrap(); 620 | println!(" {}: {},", mem.name.as_ref().unwrap(), mtn); 621 | } 622 | print!(" }}"); 623 | } 624 | } 625 | } else { 626 | print!("(unexpected weirdness)"); 627 | } 628 | 629 | println!(","); 630 | } 631 | debugdb::VariantShape::Many { variants, .. }=> { 632 | for var in variants.values() { 633 | if let Some(name) = &var.member.name { 634 | print!(" {}", name); 635 | } else { 636 | print!(" ANON"); 637 | } 638 | 639 | let mty = db.type_by_id(var.member.type_id) 640 | .unwrap(); 641 | if let Type::Struct(s) = mty { 642 | if !s.members.is_empty() { 643 | if s.tuple_like { 644 | println!("("); 645 | for mem in &s.members { 646 | let mtn = db.type_name(mem.type_id).unwrap(); 647 | println!(" {},", mtn); 648 | } 649 | print!(" )"); 650 | } else { 651 | println!(" {{"); 652 | for mem in &s.members { 653 | let mtn = db.type_name(mem.type_id).unwrap(); 654 | println!(" {}: {},", mem.name.as_ref().unwrap(), mtn); 655 | } 656 | print!(" }}"); 657 | } 658 | } 659 | } else { 660 | print!("(unexpected weirdness)"); 661 | } 662 | 663 | println!(","); 664 | } 665 | } 666 | } 667 | println!("}}"); 668 | 669 | } 670 | Type::CEnum(s) => { 671 | println!("enum {} {{", s.name); 672 | for (val, e) in &s.enumerators { 673 | println!(" {} = 0x{:x},", e.name, val); 674 | } 675 | println!("}}"); 676 | } 677 | Type::Union(s) => { 678 | print!("union {}", s.name); 679 | 680 | if !s.template_type_parameters.is_empty() { 681 | print!("<"); 682 | for ttp in &s.template_type_parameters { 683 | print!("{},", ttp.name); 684 | } 685 | print!(">"); 686 | } 687 | 688 | println!(" {{"); 689 | for mem in &s.members { 690 | if let Some(name) = &mem.name { 691 | println!(" {}: {},", name, db.type_name(mem.type_id).unwrap()); 692 | } else { 693 | println!(" ANON: {},", db.type_name(mem.type_id).unwrap()); 694 | } 695 | } 696 | println!("}}"); 697 | } 698 | Type::Subroutine(s) => { 699 | println!("fn("); 700 | for &p in &s.formal_parameters { 701 | println!(" {},", db.type_name(p).unwrap()); 702 | } 703 | if let Some(rt) = s.return_type_id { 704 | println!(") -> {} {{", db.type_name(rt).unwrap()); 705 | } else { 706 | println!(") {{"); 707 | } 708 | println!(" // code goes here"); 709 | println!(" // (this is a subroutine type, _not_ a fn ptr)"); 710 | println!(" unimplemented!();"); 711 | println!("}}"); 712 | } 713 | Type::Unresolved(_) => { 714 | println!("(type not found in debug info!)"); 715 | } 716 | } 717 | }) 718 | } 719 | 720 | fn cmd_addr2line(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 721 | let addr = if let Some(rest) = args.strip_prefix("0x") { 722 | if let Ok(a) = u64::from_str_radix(rest, 16) { 723 | a 724 | } else { 725 | println!("can't parse {} as an address", args); 726 | return; 727 | } 728 | } else if let Ok(a) = args.parse::() { 729 | a 730 | } else { 731 | println!("can't parse {} as an address", args); 732 | return; 733 | }; 734 | 735 | if let Some(row) = db.lookup_line_row(addr) { 736 | print!("{}:", row.file); 737 | if let Some(line) = row.line { 738 | print!("{}:", line); 739 | } else { 740 | print!("?:"); 741 | } 742 | if let Some(col) = row.column { 743 | print!("{}", col); 744 | } else { 745 | print!("?"); 746 | } 747 | println!(); 748 | } else { 749 | println!("no line number information available for address"); 750 | } 751 | } 752 | 753 | fn cmd_addr2stack(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 754 | let addr = if let Some(rest) = args.strip_prefix("0x") { 755 | if let Ok(a) = u64::from_str_radix(rest, 16) { 756 | a 757 | } else { 758 | println!("can't parse {} as an address", args); 759 | return; 760 | } 761 | } else if let Ok(a) = args.parse::() { 762 | a 763 | } else { 764 | println!("can't parse {} as an address", args); 765 | return; 766 | }; 767 | 768 | let bold = ansi_term::Style::new().bold(); 769 | let dim = ansi_term::Style::new().dimmed(); 770 | 771 | match db.static_stack_for_pc(addr) { 772 | Ok(Some(trc)) => { 773 | println!("Static stack trace fragment for address 0x{:x}", addr); 774 | println!("(innermost / most recent first)"); 775 | for (i, record) in trc.iter().rev().enumerate() { 776 | let subp = db.subprogram_by_id(record.subprogram).unwrap(); 777 | 778 | print!("{:4} ", i); 779 | if let Some(n) = &subp.name { 780 | println!("{}", bold.paint(n)); 781 | } else { 782 | println!("{}", bold.paint("")); 783 | } 784 | print!("{}", dim.prefix()); 785 | print!(" {}:", record.file); 786 | if let Some(line) = record.line { 787 | print!("{}:", line); 788 | } else { 789 | print!("?:"); 790 | } 791 | if let Some(col) = record.column { 792 | print!("{}", col); 793 | } else { 794 | print!("?"); 795 | } 796 | print!("{}", dim.suffix()); 797 | println!(); 798 | } 799 | } 800 | Ok(None) => { 801 | println!("no stack information available for address {addr:#x?}"); 802 | } 803 | Err(e) => { 804 | println!("failed: {e}"); 805 | } 806 | } 807 | } 808 | 809 | fn cmd_vars(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 810 | for (_id, v) in db.static_variables() { 811 | if !args.is_empty() && !v.name.contains(args) { 812 | continue; 813 | } 814 | 815 | println!("0x{:0width$x} {}: {}", v.location, v.name, NamedGoff(db, v.type_id), 816 | width = db.pointer_size() * 2); 817 | } 818 | } 819 | 820 | fn cmd_var(db: &debugdb::DebugDb, ctx: &mut Ctx, args: &str) { 821 | let results = db.static_variables_by_name(args).collect::>(); 822 | 823 | match results.len() { 824 | 0 => println!("no variables found by that name"), 825 | 1 => (), 826 | n => println!("note: {} variables found by that name", n), 827 | } 828 | 829 | for (_id, v) in results { 830 | println!("{} @ {}", v.name, Goff(v.offset)); 831 | println!("- type: {}", NamedGoff(db, v.type_id)); 832 | println!("- address: 0x{:x}", v.location); 833 | let Some(ty) = db.type_by_id(v.type_id) else { continue }; 834 | 835 | match Value::from_state(&ctx.segments, v.location, db, ty) { 836 | Ok(v) => { 837 | println!("- current contents: {}", 838 | ValueWithDb(v, db)); 839 | } 840 | Err(e) => { 841 | println!("- unable to display: {e}"); 842 | } 843 | } 844 | } 845 | } 846 | 847 | fn cmd_addr(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 848 | let addr = if let Some(rest) = args.strip_prefix("0x") { 849 | if let Ok(a) = u64::from_str_radix(rest, 16) { 850 | a 851 | } else { 852 | println!("can't parse {} as an address", args); 853 | return; 854 | } 855 | } else if let Ok(a) = args.parse::() { 856 | a 857 | } else { 858 | println!("can't parse {} as an address", args); 859 | return; 860 | }; 861 | 862 | let es = db.entities_by_address(addr).collect::>(); 863 | 864 | match es.len() { 865 | 0 => println!("Nothing known about address 0x{:x}.", addr), 866 | 1 => (), 867 | n => println!("note: {} overlapping entities claim address 0x{:x}", n, addr), 868 | } 869 | 870 | let bold = ansi_term::Style::new().bold(); 871 | let dim = ansi_term::Style::new().dimmed(); 872 | 873 | for e in es { 874 | let offset = addr - e.range.start; 875 | print!("Offset +0x{:x} into ", offset); 876 | match e.entity { 877 | debugdb::EntityId::Var(vid) => { 878 | let v = db.static_variable_by_id(vid).unwrap(); 879 | println!("static {}", bold.paint(&v.name)); 880 | println!("- range 0x{:x}..0x{:x}", 881 | e.range.start, e.range.end); 882 | println!("- type {}", NamedGoff(db, v.type_id)); 883 | 884 | // Try to determine path within type. 885 | offset_to_path(db, v.type_id, offset); 886 | } 887 | debugdb::EntityId::Prog(pid) => { 888 | let p = db.subprogram_by_id(pid).unwrap(); 889 | if let Some(n) = &p.name { 890 | println!("subprogram {}", bold.paint(n)); 891 | } else { 892 | println!("subprogram {}", bold.paint("ANON")); 893 | } 894 | println!("- range 0x{:x}..0x{:x}", 895 | e.range.start, e.range.end); 896 | match db.static_stack_for_pc(addr) { 897 | Ok(Some(trc)) => { 898 | println!("- stack fragment with inlines:"); 899 | for (i, record) in trc.iter().rev().enumerate() { 900 | let subp = db.subprogram_by_id(record.subprogram).unwrap(); 901 | 902 | print!(" {:4} ", i); 903 | if let Some(n) = &subp.name { 904 | println!("{}", bold.paint(n)); 905 | } else { 906 | println!("{}", bold.paint("")); 907 | } 908 | print!("{}", dim.prefix()); 909 | print!(" {}:", record.file); 910 | if let Some(line) = record.line { 911 | print!("{}:", line); 912 | } else { 913 | print!("?:"); 914 | } 915 | if let Some(col) = record.column { 916 | print!("{}", col); 917 | } else { 918 | print!("?"); 919 | } 920 | print!("{}", dim.suffix()); 921 | println!(); 922 | } 923 | } 924 | Ok(None) => { 925 | println!("- no stack fragment is available"); 926 | } 927 | Err(e) => { 928 | println!("- could not get stack fragment: {}", e); 929 | } 930 | } 931 | } 932 | } 933 | } 934 | } 935 | 936 | fn offset_to_path( 937 | db: &debugdb::DebugDb, 938 | tid: TypeId, 939 | offset: u64, 940 | ) { 941 | let t = db.type_by_id(tid).unwrap(); 942 | match t { 943 | Type::Array(a) => { 944 | let et = db.type_by_id(a.element_type_id).unwrap(); 945 | if let Some(esz) = et.byte_size(db) { 946 | if esz > 0 { 947 | let index = offset / esz; 948 | let new_offset = offset % esz; 949 | println!(" - index [{}] +0x{:x}", index, new_offset); 950 | offset_to_path(db, a.element_type_id, new_offset); 951 | } 952 | } 953 | } 954 | Type::Struct(s) => { 955 | // This is where an offsetof-to-member index would be convenient 956 | 957 | for m in &s.members { 958 | if offset < m.location { 959 | continue; 960 | } 961 | let new_offset = offset - m.location; 962 | let mt = db.type_by_id(m.type_id).unwrap(); 963 | if let Some(msz) = mt.byte_size(db) { 964 | if msz > 0 { 965 | if let Some(n) = &m.name { 966 | println!(" - .{} +0x{:x} (in {})", n, new_offset, s.name); 967 | } else { 968 | return; 969 | } 970 | offset_to_path(db, m.type_id, new_offset); 971 | break; 972 | } 973 | } 974 | } 975 | } 976 | _ => (), 977 | } 978 | } 979 | 980 | fn cmd_unwind(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 981 | let addr = if let Some(rest) = args.strip_prefix("0x") { 982 | if let Ok(a) = u64::from_str_radix(rest, 16) { 983 | a 984 | } else { 985 | println!("can't parse {} as an address", args); 986 | return; 987 | } 988 | } else if let Ok(a) = args.parse::() { 989 | a 990 | } else { 991 | println!("can't parse {} as an address", args); 992 | return; 993 | }; 994 | 995 | use gimli::UnwindSection; 996 | let mut ctx = gimli::UnwindContext::new(); 997 | let bases = gimli::BaseAddresses::default(); 998 | match db.debug_frame.unwind_info_for_address(&bases, &mut ctx, addr, gimli::DebugFrame::cie_from_offset) { 999 | Ok(ui) => { 1000 | println!("saved args: {} bytes", ui.saved_args_size()); 1001 | print!("cfa: "); 1002 | match ui.cfa() { 1003 | gimli::CfaRule::RegisterAndOffset { register, offset } => { 1004 | println!("reg #{}, offset {}", register.0, offset); 1005 | } 1006 | other => panic!("unsupported CFA rule type: {:?}", other), 1007 | } 1008 | for (n, rule) in ui.registers() { 1009 | print!(" caller reg #{} ", n.0); 1010 | match rule { 1011 | gimli::RegisterRule::Offset(n) => { 1012 | if *n < 0 { 1013 | println!("at CFA-{}", -n); 1014 | } else { 1015 | println!("at CFA+{}", n); 1016 | } 1017 | } 1018 | gimli::RegisterRule::ValOffset(n) => { 1019 | if *n < 0 { 1020 | println!("= CFA-{}", -n); 1021 | } else { 1022 | println!("= CFA+{}", n); 1023 | } 1024 | } 1025 | gimli::RegisterRule::SameValue => { 1026 | println!("preserved"); 1027 | } 1028 | gimli::RegisterRule::Register(n) => { 1029 | println!("in reg# {}", n.0); 1030 | } 1031 | _ => println!("{:?}", rule), 1032 | } 1033 | } 1034 | } 1035 | Err(e) => { 1036 | println!("failed: {}", e); 1037 | } 1038 | } 1039 | } 1040 | 1041 | fn struct_picture(db: &DebugDb, s: &Struct, width: usize) { 1042 | struct_picture_inner( 1043 | db, 1044 | s.byte_size, 1045 | s.members.iter().enumerate().map(|(i, m)| (i, m, true)), 1046 | width, 1047 | ) 1048 | } 1049 | 1050 | fn struct_picture_inner<'a, N: Eq + Clone + Display>( 1051 | db: &DebugDb, 1052 | byte_size: Option, 1053 | members: impl IntoIterator, 1054 | width: usize, 1055 | ) { 1056 | let Some(size) = byte_size else { 1057 | println!("type has no size"); 1058 | return; 1059 | }; 1060 | 1061 | if size == 0 { 1062 | println!("(type is 0 bytes long)"); 1063 | return; 1064 | } 1065 | 1066 | let mut member_spans: RangeMap = RangeMap::new(); 1067 | let mut member_labels = vec![]; 1068 | for (i, m, in_legend) in members { 1069 | if in_legend { 1070 | member_labels.push({ 1071 | let label = if db.type_by_id(m.type_id).unwrap().byte_size(db) == Some(0) { 1072 | "(ZST)".to_string() 1073 | } else { 1074 | i.to_string() 1075 | }; 1076 | 1077 | let name = if let Some(name) = &m.name { 1078 | name.as_str() 1079 | } else { 1080 | "_" 1081 | }; 1082 | if label == name { 1083 | format!("{name}: {}", NamedGoff(db, m.type_id)) 1084 | } else { 1085 | format!("{label} = {name}: {}", NamedGoff(db, m.type_id)) 1086 | } 1087 | }); 1088 | } 1089 | let offset = m.location; 1090 | let Some(size) = db.type_by_id(m.type_id).unwrap().byte_size(db) else { 1091 | continue; 1092 | }; 1093 | if size != 0 { 1094 | member_spans.insert(offset..offset + size, i); 1095 | } 1096 | } 1097 | 1098 | byte_picture(size, width, |off| { 1099 | member_spans.get(&off).map(|x| x.to_string()) 1100 | }); 1101 | if !member_labels.is_empty() { 1102 | println!(" where:"); 1103 | for label in member_labels { 1104 | println!(" {label}"); 1105 | } 1106 | } 1107 | } 1108 | 1109 | fn enum_picture(db: &DebugDb, s: &Enum, width: usize) { 1110 | let Some(size) = s.byte_size else { 1111 | println!("type has no size"); 1112 | return; 1113 | }; 1114 | 1115 | if size == 0 { 1116 | println!("(type is 0 bytes long)"); 1117 | return; 1118 | } 1119 | 1120 | println!(); 1121 | 1122 | match &s.shape { 1123 | VariantShape::Zero => { 1124 | println!("this enum is empty and cannot be diagrammed."); 1125 | } 1126 | VariantShape::One(_v) => { 1127 | println!("this enum has only one variant (TODO)"); 1128 | } 1129 | VariantShape::Many { member, .. } => { 1130 | let Some(dlen) = db.type_by_id(member.type_id).unwrap().byte_size(db) else { 1131 | println!("discriminator type has no size?"); 1132 | return; 1133 | }; 1134 | let drange = member.location .. member.location + dlen; 1135 | println!("Discriminator position:"); 1136 | byte_picture(size, width, |off| { 1137 | if drange.contains(&off) { 1138 | Some("DISC".to_string()) 1139 | } else { 1140 | Some("body".to_string()) 1141 | } 1142 | }); 1143 | /* 1144 | for (disc, var) in variants { 1145 | let show_disc = if let Some(v) = disc { 1146 | print!("DISC == {v:#x} => body: "); 1147 | true 1148 | } else { 1149 | print!("else => body: "); 1150 | false 1151 | }; 1152 | println!("{}", NamedGoff(db, var.member.type_id)); 1153 | let vt = db.type_by_id(var.member.type_id).unwrap(); 1154 | match vt { 1155 | Type::Struct(s) => { 1156 | let mut all_members = vec![]; 1157 | if show_disc { 1158 | all_members.push(("DISC", member, false)); 1159 | } 1160 | all_members.extend( 1161 | s.members.iter().map(|(n, m)| { 1162 | let mut n = n.as_str(); 1163 | if n.len() > 6 { 1164 | n = &n[..6]; 1165 | } 1166 | 1167 | (n, m, true) 1168 | }) 1169 | ); 1170 | struct_picture_inner(db, s.byte_size, all_members, width); 1171 | }, 1172 | _ => println!("(can't display non-struct)"), 1173 | } 1174 | } 1175 | */ 1176 | } 1177 | } 1178 | } 1179 | 1180 | fn byte_picture( 1181 | size: u64, 1182 | width: usize, 1183 | owner: impl Fn(u64) -> Option, 1184 | ) { 1185 | let width = width as u64; 1186 | print!(" "); 1187 | for byte in 0..u64::min(size, width) { 1188 | print!(" {byte:^6}"); 1189 | } 1190 | println!(); 1191 | 1192 | let wordcount = (size + (width - 1)) / width; 1193 | let mut current = None; 1194 | let mut above = vec![None; width as usize]; 1195 | for word in 0..wordcount { 1196 | print!(" +"); 1197 | for byte in 0..width { 1198 | let n = owner(word * width + byte); 1199 | if above[byte as usize] == Some(n) { 1200 | print!(" +"); 1201 | } else { 1202 | print!("------+"); 1203 | } 1204 | } 1205 | println!(); 1206 | 1207 | print!("{:04x} |", word * width); 1208 | for byte in 0..width { 1209 | let off = word * width + byte; 1210 | let n = owner(off); 1211 | if Some(&n) != current.as_ref() { 1212 | if byte != 0 { 1213 | print!("|"); 1214 | } 1215 | if let Some(i) = &n { 1216 | print!("{:^6}", i); 1217 | } else if off < size { 1218 | print!(" pad "); 1219 | } else { 1220 | print!(" "); 1221 | } 1222 | current = Some(n.clone()); 1223 | } else { 1224 | if byte != 0 { 1225 | print!(" "); 1226 | } 1227 | print!(" "); 1228 | } 1229 | 1230 | if byte == width - 1 { 1231 | if off < size { 1232 | println!("|"); 1233 | } else { 1234 | println!(); 1235 | } 1236 | } 1237 | 1238 | above[byte as usize] = Some(n); 1239 | } 1240 | } 1241 | print!(" +"); 1242 | let final_bar = if size % width == 0 { width } else { size % width }; 1243 | for _ in 0..final_bar { 1244 | print!("------+"); 1245 | } 1246 | println!(); 1247 | } 1248 | 1249 | fn cmd_decode(db: &debugdb::DebugDb, ctx: &mut Ctx, args: &str) { 1250 | let (addrstr, typestr) = if let Some(space) = args.find(' ') { 1251 | args.split_at(space) 1252 | } else { 1253 | println!("usage: decode [addr] [typename blah blah]"); 1254 | return; 1255 | }; 1256 | let addr = match parse_int::parse::(addrstr) { 1257 | Ok(x) => x, 1258 | Err(e) => { 1259 | println!("bad address: {e}"); 1260 | return; 1261 | } 1262 | }; 1263 | let types: Vec<_> = match parse_type_name(typestr.trim()) { 1264 | None => return, 1265 | Some(ParsedTypeName::Name(n)) => { 1266 | db.types_by_name(n).collect() 1267 | } 1268 | Some(ParsedTypeName::Goff(o)) => { 1269 | db.type_by_id(o).into_iter() 1270 | .map(|t| (o, t)) 1271 | .collect() 1272 | } 1273 | }; 1274 | 1275 | let many = match types.len() { 1276 | 0 => { 1277 | println!("{}", ansi_term::Colour::Red.paint("No types found.")); 1278 | return; 1279 | } 1280 | 1 => false, 1281 | n => { 1282 | println!("{}{} types found with that name:", 1283 | ansi_term::Color::Yellow.paint("note: "), 1284 | n, 1285 | ); 1286 | true 1287 | } 1288 | }; 1289 | 1290 | for (goff, t) in types { 1291 | if many { println!() } 1292 | println!("{}: ", NamedGoff(db, goff)); 1293 | match Value::from_state(&ctx.segments, addr, db, t) { 1294 | Ok(v) => { 1295 | println!("{}", ValueWithDb(v, db)); 1296 | } 1297 | Err(e) => { 1298 | println!("could not parse as this type: {e}"); 1299 | } 1300 | } 1301 | } 1302 | } 1303 | 1304 | fn cmd_decode_async(db: &debugdb::DebugDb, ctx: &mut Ctx, args: &str) { 1305 | let (addrstr, typestr) = if let Some(space) = args.find(' ') { 1306 | args.split_at(space) 1307 | } else { 1308 | println!("usage: decode-async [addr] [typename blah blah]"); 1309 | return; 1310 | }; 1311 | let addr = match parse_int::parse::(addrstr) { 1312 | Ok(x) => x, 1313 | Err(e) => { 1314 | println!("bad address: {e}"); 1315 | return; 1316 | } 1317 | }; 1318 | let types: Vec<_> = match parse_type_name(typestr.trim()) { 1319 | None => return, 1320 | Some(ParsedTypeName::Name(n)) => { 1321 | db.types_by_name(n).collect() 1322 | } 1323 | Some(ParsedTypeName::Goff(o)) => { 1324 | db.type_by_id(o).into_iter() 1325 | .map(|t| (o, t)) 1326 | .collect() 1327 | } 1328 | }; 1329 | 1330 | let many = match types.len() { 1331 | 0 => { 1332 | println!("{}", ansi_term::Colour::Red.paint("No types found.")); 1333 | return; 1334 | } 1335 | 1 => false, 1336 | n => { 1337 | println!("{}{} types found with that name:", 1338 | ansi_term::Color::Yellow.paint("note: "), 1339 | n, 1340 | ); 1341 | true 1342 | } 1343 | }; 1344 | 1345 | for (goff, t) in types { 1346 | if many { println!() } 1347 | println!("{}: ", NamedGoff(db, goff)); 1348 | let mut v = &match Value::from_state(&ctx.segments, addr, db, t) { 1349 | Ok(v) => v, 1350 | Err(e) => { 1351 | println!("could not parse as this type: {e}"); 1352 | return; 1353 | } 1354 | }; 1355 | let parts = Regex::new(r#"^(.*)::\{async_fn_env#0\}(<.*)?$"#).unwrap(); 1356 | let suspend_state = Regex::new(r#"::Suspend([0-9]+)$"#).unwrap(); 1357 | let mut first = true; 1358 | let bold = ansi_term::Style::new().bold(); 1359 | loop { 1360 | if !first { 1361 | print!("waiting on: "); 1362 | } 1363 | first = false; 1364 | let Value::Enum(e) = v else { 1365 | println!("{}hand-rolled future{}", bold.prefix(), bold.suffix()); 1366 | println!(" type: {}", v.type_name()); 1367 | break; 1368 | }; 1369 | let Some(parts) = parts.captures(&e.name) else { 1370 | println!("(name is weird for an async fn env)"); 1371 | break; 1372 | }; 1373 | let name = &parts[1]; 1374 | let parms = parts.get(2).map(|m| m.as_str()).unwrap_or(""); 1375 | println!("async fn {}{name}{parms}{}", bold.prefix(), bold.suffix()); 1376 | let state = &e.disc; 1377 | let state_name = &e.value.name; 1378 | 1379 | if state_name.ends_with("Unresumed") { 1380 | println!(" future has not yet been polled"); 1381 | break; 1382 | } else if state_name.ends_with("Returned") { 1383 | println!(" future has already resolved"); 1384 | break; 1385 | } else if state_name.ends_with("Panicked") { 1386 | println!(" future panicked on previous poll"); 1387 | break; 1388 | } else if let Some(sc) = suspend_state.captures(state_name) { 1389 | if let Ok(n) = sc[1].parse::() { 1390 | println!(" suspended at await point {n}"); 1391 | } else { 1392 | println!(" unrecognized state {state}: {state_name}"); 1393 | } 1394 | } else { 1395 | println!(" unrecognized state {state}: {state_name}"); 1396 | } 1397 | 1398 | let mut awaitees = e.value.members_named("__awaitee"); 1399 | let Some(awaitee) = awaitees.next() else { 1400 | println!(" (stopped unexpectedly)"); 1401 | break; 1402 | }; 1403 | if awaitees.next().is_some() { 1404 | println!(" (multiple __awaitee fields)"); 1405 | break; 1406 | } 1407 | v = awaitee; 1408 | } 1409 | } 1410 | } 1411 | 1412 | fn cmd_decode_blob(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 1413 | let type_name = args.trim(); 1414 | let types: Vec<_> = match parse_type_name(type_name) { 1415 | None => return, 1416 | Some(ParsedTypeName::Name(n)) => { 1417 | db.types_by_name(n).collect() 1418 | } 1419 | Some(ParsedTypeName::Goff(o)) => { 1420 | db.type_by_id(o).into_iter() 1421 | .map(|t| (o, t)) 1422 | .collect() 1423 | } 1424 | }; 1425 | 1426 | let many = match types.len() { 1427 | 0 => { 1428 | println!("{}", ansi_term::Colour::Red.paint("No types found.")); 1429 | return; 1430 | } 1431 | 1 => false, 1432 | n => { 1433 | println!("{}{} types found with that name:", 1434 | ansi_term::Color::Yellow.paint("note: "), 1435 | n, 1436 | ); 1437 | true 1438 | } 1439 | }; 1440 | 1441 | println!("Paste hex-encoded memory blob. Whitespace OK."); 1442 | println!("Address prefix ending in colon will be removed."); 1443 | println!("Enter a blank line to end."); 1444 | 1445 | let stdin = std::io::stdin().lock(); 1446 | let mut img = vec![]; 1447 | for line in stdin.lines() { 1448 | let line = match line { 1449 | Err(e) => { 1450 | println!("input error: {e}"); 1451 | return; 1452 | } 1453 | Ok(v) => v, 1454 | }; 1455 | let mut line = line.trim(); 1456 | if line.is_empty() { 1457 | break; 1458 | } 1459 | if let Some(colon) = line.find(':') { 1460 | line = &line.split_at(colon).1[1..]; 1461 | } 1462 | 1463 | let mut hexits = vec![]; 1464 | for b in line.bytes() { 1465 | match b { 1466 | b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => { 1467 | hexits.push(b); 1468 | } 1469 | b' ' | b'\t' | b'\r' | b'\n' => (), 1470 | _ => { 1471 | println!("unexpected byte in input: {b:#x?}"); 1472 | return; 1473 | } 1474 | } 1475 | } 1476 | 1477 | let bytes = hexits.chunks_exact(2) 1478 | .map(|chunk| u8::from_str_radix(std::str::from_utf8(chunk).unwrap(), 16)) 1479 | .collect::, _>>(); 1480 | match bytes { 1481 | Err(e) => { 1482 | println!("couldn't parse that: {e}"); 1483 | return; 1484 | } 1485 | Ok(b) => img.extend(b), 1486 | } 1487 | } 1488 | 1489 | for (goff, t) in types { 1490 | if many { println!() } 1491 | println!("{}: ", NamedGoff(db, goff)); 1492 | let Some(size) = t.byte_size(db) else { 1493 | println!(" (type is unsized, cannot decode)"); 1494 | continue; 1495 | }; 1496 | let Ok(size) = usize::try_from(size) else { 1497 | println!(" (type too big for this platform)"); 1498 | continue; 1499 | }; 1500 | let mut this_img = img.clone(); 1501 | if size > this_img.len() { 1502 | println!("(padding entered data to {size} bytes)"); 1503 | this_img.resize(size, 0); 1504 | } 1505 | let machine = ImgMachine::new(this_img); 1506 | match Value::from_state(&machine, 0, db, t) { 1507 | Ok(v) => { 1508 | println!("{}", ValueWithDb(v, db)); 1509 | } 1510 | Err(e) => { 1511 | println!("could not parse as this type: {e}"); 1512 | } 1513 | } 1514 | } 1515 | } 1516 | 1517 | fn cmd_decode_async_blob(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) { 1518 | let type_name = args.trim(); 1519 | let types: Vec<_> = match parse_type_name(type_name) { 1520 | None => return, 1521 | Some(ParsedTypeName::Name(n)) => { 1522 | db.types_by_name(n).collect() 1523 | } 1524 | Some(ParsedTypeName::Goff(o)) => { 1525 | db.type_by_id(o).into_iter() 1526 | .map(|t| (o, t)) 1527 | .collect() 1528 | } 1529 | }; 1530 | 1531 | let many = match types.len() { 1532 | 0 => { 1533 | println!("{}", ansi_term::Colour::Red.paint("No types found.")); 1534 | return; 1535 | } 1536 | 1 => false, 1537 | n => { 1538 | println!("{}{} types found with that name:", 1539 | ansi_term::Color::Yellow.paint("note: "), 1540 | n, 1541 | ); 1542 | true 1543 | } 1544 | }; 1545 | 1546 | println!("Paste hex-encoded memory blob. Whitespace OK."); 1547 | println!("Address prefix ending in colon will be removed."); 1548 | println!("Enter a blank line to end."); 1549 | 1550 | let stdin = std::io::stdin().lock(); 1551 | let mut img = vec![]; 1552 | for line in stdin.lines() { 1553 | let line = match line { 1554 | Err(e) => { 1555 | println!("input error: {e}"); 1556 | return; 1557 | } 1558 | Ok(v) => v, 1559 | }; 1560 | let mut line = line.trim(); 1561 | if line.is_empty() { 1562 | break; 1563 | } 1564 | if let Some(colon) = line.find(':') { 1565 | line = &line.split_at(colon).1[1..]; 1566 | } 1567 | 1568 | let mut hexits = vec![]; 1569 | for b in line.bytes() { 1570 | match b { 1571 | b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => { 1572 | hexits.push(b); 1573 | } 1574 | b' ' | b'\t' | b'\r' | b'\n' => (), 1575 | _ => { 1576 | println!("unexpected byte in input: {b:#x?}"); 1577 | return; 1578 | } 1579 | } 1580 | } 1581 | 1582 | let bytes = hexits.chunks_exact(2) 1583 | .map(|chunk| u8::from_str_radix(std::str::from_utf8(chunk).unwrap(), 16)) 1584 | .collect::, _>>(); 1585 | match bytes { 1586 | Err(e) => { 1587 | println!("couldn't parse that: {e}"); 1588 | return; 1589 | } 1590 | Ok(b) => img.extend(b), 1591 | } 1592 | } 1593 | 1594 | for (goff, t) in types { 1595 | if many { println!() } 1596 | println!("{}: ", NamedGoff(db, goff)); 1597 | let Some(size) = t.byte_size(db) else { 1598 | println!(" (type is unsized, cannot decode)"); 1599 | continue; 1600 | }; 1601 | let Ok(size) = usize::try_from(size) else { 1602 | println!(" (type too big for this platform)"); 1603 | continue; 1604 | }; 1605 | let mut this_img = img.clone(); 1606 | if size > this_img.len() { 1607 | println!("(padding entered data to {size} bytes)"); 1608 | this_img.resize(size, 0); 1609 | } 1610 | let machine = ImgMachine::new(this_img); 1611 | let mut v = &match Value::from_state(&machine, 0, db, t) { 1612 | Ok(v) => v, 1613 | Err(e) => { 1614 | println!("could not parse as this type: {e}"); 1615 | return; 1616 | } 1617 | }; 1618 | let parts = Regex::new(r#"^(.*)::\{async_fn_env#0\}(<.*)?$"#).unwrap(); 1619 | let suspend_state = Regex::new(r#"::Suspend([0-9]+)$"#).unwrap(); 1620 | let mut first = true; 1621 | loop { 1622 | if !first { 1623 | print!("waiting on: "); 1624 | } 1625 | first = false; 1626 | let Value::Enum(e) = v else { 1627 | println!("hand-rolled future"); 1628 | println!(" type: {}", v.type_name()); 1629 | break; 1630 | }; 1631 | let Some(parts) = parts.captures(&e.name) else { 1632 | println!("(name is weird for an async fn env)"); 1633 | break; 1634 | }; 1635 | let name = &parts[1]; 1636 | let parms = parts.get(2).map(|m| m.as_str()).unwrap_or(""); 1637 | println!("async fn {name}{parms}"); 1638 | let state = &e.disc; 1639 | let state_name = &e.value.name; 1640 | 1641 | if state_name.ends_with("Unresumed") { 1642 | println!(" future has not yet been polled"); 1643 | break; 1644 | } else if state_name.ends_with("Returned") { 1645 | println!(" future has already resolved"); 1646 | break; 1647 | } else if state_name.ends_with("Panicked") { 1648 | println!(" future panicked on previous poll"); 1649 | break; 1650 | } else if let Some(sc) = suspend_state.captures(state_name) { 1651 | if let Ok(n) = sc[1].parse::() { 1652 | println!(" suspended at await point {n}"); 1653 | } else { 1654 | println!(" unrecognized state {state}: {state_name}"); 1655 | } 1656 | } else { 1657 | println!(" unrecognized state {state}: {state_name}"); 1658 | } 1659 | 1660 | let mut awaitees = e.value.members_named("__awaitee"); 1661 | let Some(awaitee) = awaitees.next() else { 1662 | println!(" (stopped unexpectedly)"); 1663 | break; 1664 | }; 1665 | if awaitees.next().is_some() { 1666 | println!(" (multiple __awaitee fields)"); 1667 | break; 1668 | } 1669 | v = awaitee; 1670 | } 1671 | } 1672 | } 1673 | 1674 | 1675 | fn cmd_load( 1676 | _db: &debugdb::DebugDb, 1677 | ctx: &mut Ctx, 1678 | args: &str, 1679 | ) { 1680 | let args = args.trim(); 1681 | let words = args.split_whitespace().collect::>(); 1682 | if words.len() != 2 { 1683 | println!("usage: load [filename] [address]"); 1684 | return; 1685 | } 1686 | let filename = words[0]; 1687 | let address = match parse_int::parse::(words[1]) { 1688 | Ok(a) => a, 1689 | Err(e) => { 1690 | println!("bad address: {e}"); 1691 | return; 1692 | } 1693 | }; 1694 | 1695 | let image = match std::fs::read(filename) { 1696 | Ok(bytes) => bytes, 1697 | Err(e) => { 1698 | println!("unable to read file: {e}"); 1699 | return; 1700 | } 1701 | }; 1702 | 1703 | let end = address + u64::try_from(image.len()).unwrap(); 1704 | 1705 | ctx.segments.insert(address..=end, image); 1706 | } 1707 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Collects debug information from a program into a queryable, cross-referenced 2 | //! form. 3 | 4 | pub mod load; 5 | pub mod value; 6 | pub mod model; 7 | pub mod unify; 8 | 9 | mod dwarf_parser; 10 | 11 | use crate::unify::Unify; 12 | use crate::dwarf_parser::ParseError; 13 | 14 | pub use self::model::*; 15 | 16 | use object::{Object, ObjectSection, ObjectSymbol}; 17 | use thiserror::Error; 18 | use std::borrow::Cow; 19 | use std::collections::{BTreeMap, BTreeSet}; 20 | use std::convert::Infallible; 21 | use std::sync::Arc; 22 | 23 | // Internal type abbreviations 24 | type BTreeIndex = BTreeMap>; 25 | type RtArcReader = gimli::EndianReader>; 26 | 27 | /// A database of information extracted from the debug info of a program. 28 | /// 29 | /// This is primarily focused on correctly representing Rust programs, but it 30 | /// can represent a large subset of C types as a side effect -- currently only 31 | /// unnamed types present a problem. This could be fixed. 32 | #[derive(Clone, Debug)] 33 | pub struct DebugDb { 34 | /// Endianness of the target system. 35 | endian: gimli::RunTimeEndian, 36 | /// Pointer width of the target system. Currently only 32 and 64 are 37 | /// supported here. 38 | is_64: bool, 39 | 40 | /// All types in the program, indexed by location in the debug section(s). 41 | /// 42 | /// This is the authoritative set of types, other type-related fields index 43 | /// into this. 44 | /// 45 | /// Invariant: within each entry, the key is the same as the type's `offset` 46 | /// field. 47 | types: BTreeMap, 48 | 49 | /// Type canonicalization relationships. If a type ID is present as a key in 50 | /// this map, then it is _not_ the canonical instance of its type, and 51 | /// should be replaced by the corresponding value in the map for analysis 52 | /// purposes. 53 | type_canon: BTreeMap, 54 | 55 | /// Reverse type canonicalization relationship. Each key in this map is the 56 | /// ID of a canonical instance of a family of types, and the value lists 57 | /// those types. 58 | type_rcanon: BTreeMap>, 59 | 60 | /// Index: type name to location(s) that can be looked up in `types`. 61 | /// 62 | /// Invariant: all string keys correspond to names of types in `types`. 63 | /// 64 | /// Invariant: all UnitSectionOffset values have corresponding entries in 65 | /// `types`. 66 | type_name_index: BTreeIndex, 67 | 68 | /// Index: array element type and size to location(s) in `types`. Since 69 | /// arrays do not have names in DWARF, they can't be looked up in the 70 | /// `type_name_index`. 71 | array_index: BTreeIndex)>, 72 | 73 | /// Index: subroutine argument and return types to location(s) in `types`. 74 | /// Since subroutine types do not have names in DWARF, they can't be looked 75 | /// up in the `type_name_index`. 76 | /// 77 | /// The specific structure here is a nested map: argument types -> return 78 | /// type -> type goffs. This allows the first lookup to happen with a slice, 79 | /// thanks to the `Borrow` trait, which would not be possible if the key 80 | /// were instead a `(Vec, Option)`. 81 | /// 82 | /// Note that this is subroutine _types,_ not subprograms. 83 | subroutine_index: BTreeMap, BTreeIndex>>, 84 | 85 | /// All subprograms, indexed by location in the debug section(s). 86 | subprograms: BTreeMap, 87 | 88 | /// Mapping of text address to line number information. 89 | line_table: BTreeMap>, 90 | 91 | /// All static variables, indexed by ID. 92 | variables: BTreeMap, 93 | 94 | /// Index: static variables by name. 95 | variables_by_name: BTreeIndex, 96 | 97 | /// All entities with fixed addresses, indexed by base address. 98 | entities_by_address: BTreeMap>, 99 | 100 | // TODO 101 | pub debug_frame: gimli::DebugFrame>>, 102 | 103 | raw_symbols_by_address: BTreeMap>, 104 | raw_symbols_by_name: BTreeMap>, 105 | } 106 | 107 | impl DebugDb { 108 | /// Gets the endianness of the program. 109 | pub fn endian(&self) -> gimli::RunTimeEndian { 110 | self.endian 111 | } 112 | 113 | /// Gets the size of a pointer in the program, in bytes. 114 | pub fn pointer_size(&self) -> usize { 115 | if self.is_64 { 116 | 8 117 | } else { 118 | 4 119 | } 120 | } 121 | 122 | /// Returns the number of types in the debug info. 123 | pub fn type_count(&self) -> usize { 124 | self.types.len() 125 | } 126 | 127 | /// Produces an iterator over all types defined in the debug info, together 128 | /// with their IDs. 129 | pub fn types( 130 | &self, 131 | ) -> impl Iterator + '_ { 132 | self.types.iter().map(|(&id, ty)| (id, ty)) 133 | } 134 | 135 | /// Produces an iterator over all canonical types defined in the debug info, 136 | /// together with their IDs. 137 | pub fn canonical_types( 138 | &self, 139 | ) -> impl Iterator + '_ { 140 | self.types() 141 | .filter(move |(tid, _t)| !self.type_canon.contains_key(tid)) 142 | } 143 | 144 | pub fn aliases_of_type(&self, id: TypeId) -> Option<&BTreeSet> { 145 | self.type_rcanon.get(&id) 146 | } 147 | 148 | /// Looks up the type with the given ID. 149 | /// 150 | /// If you got `id` from this instance, our consistency invariant ensures 151 | /// that the result will be `Some`. If `id` is from another instance, or 152 | /// made up, you may get `None`. 153 | pub fn type_by_id( 154 | &self, 155 | id: TypeId, 156 | ) -> Option<&Type> { 157 | self.types.get(&id) 158 | } 159 | 160 | /// Shorthand for looking up the name of a type. 161 | /// 162 | /// Note that not all types have names, so this may return `None` even if 163 | /// the type exists. 164 | pub fn type_name( 165 | &self, 166 | id: TypeId, 167 | ) -> Option> { 168 | Some(self.type_by_id(id)?.name(self)) 169 | } 170 | 171 | /// Consults the type-name index and returns an iterator over types with a 172 | /// given name. 173 | /// 174 | /// Names are matched in their entirety, e.g. the name `"Option"` does not 175 | /// match a type `"core::option::Option"`. 176 | /// 177 | /// Not all types are in the type name index. In particular, array types and 178 | /// subroutine types. 179 | pub fn types_by_name( 180 | &self, 181 | name: &str, 182 | ) -> impl Iterator + '_ { 183 | self.consult_index(&self.type_name_index, name) 184 | } 185 | 186 | /// Consults the array index and returns an iterator over array types with a 187 | /// particular shape. 188 | pub fn array_types( 189 | &self, 190 | element: TypeId, 191 | count: Option, 192 | ) -> impl Iterator + '_ { 193 | self.consult_index(&self.array_index, &(element, count)) 194 | } 195 | 196 | /// Consults the subroutine index and returns an iterator over subroutine 197 | /// types with a particular shape. 198 | /// 199 | /// The return type is optional because, in both C and Rust, DWARF will omit 200 | /// the return type for subroutines returning `void` / `()`. As a result, 201 | /// looking up subroutines returning the `()` type will not produce results. 202 | pub fn subroutine_types( 203 | &self, 204 | argument_tys: &[TypeId], 205 | return_ty: Option, 206 | ) -> impl Iterator + '_ { 207 | self.subroutine_index 208 | .get(argument_tys) 209 | .into_iter() 210 | .flat_map(move |index| 211 | self.consult_index(index, &return_ty) 212 | ) 213 | } 214 | 215 | /// Returns an iterator over all subprograms defined in this program. 216 | pub fn subprograms( 217 | &self, 218 | ) -> impl Iterator + '_ { 219 | self.subprograms.iter().map(|(&goff, ty)| (goff, ty)) 220 | } 221 | 222 | /// Looks up a subprogram given its `ProgramId`. 223 | pub fn subprogram_by_id( 224 | &self, 225 | pid: ProgramId, 226 | ) -> Option<&Subprogram> { 227 | self.subprograms.get(&pid) 228 | } 229 | 230 | /// Returns an iterator over _all_ rows in the computed line number table. 231 | /// 232 | /// You probably don't want to do this. 233 | pub fn line_table_rows( 234 | &self, 235 | ) -> impl Iterator + '_ { 236 | self.line_table.iter().map(|(&a, row)| (a, &**row)) 237 | } 238 | 239 | /// Looks up the line number table entry associated with `pc`. 240 | pub fn lookup_line_row( 241 | &self, 242 | pc: u64, 243 | ) -> Option<&LineNumberRow> { 244 | self.line_table.range(..=pc) 245 | .rev() 246 | .flat_map(|(_, rows)| rows) 247 | .take_while(move |row| row.pc_range.end > pc) 248 | .find(move |row| row.pc_range.contains(&pc)) 249 | } 250 | 251 | /// Computes the static stack slice implied by a PC value. 252 | /// 253 | /// For simple cases of subroutines without inlined code, the stack slice 254 | /// contains a single entry describing the subroutine and the line number 255 | /// within it corresponding to the PC. 256 | /// 257 | /// For more complex cases involving inlines, possibly multiple layers of 258 | /// inlines, the stack slice will be deeper. In this case, the last element 259 | /// of the returned vec is the _innermost_ inline, and the first element is 260 | /// the enclosing (non-inlined) subprogram. 261 | pub fn static_stack_for_pc( 262 | &self, 263 | pc: u64, 264 | ) -> Result>, ParseError> { 265 | // Find subprogram containing PC. 266 | let Some((pid, subp)) = self.subprograms() 267 | .find(|(_, subp)| subp.pc_range 268 | .as_ref() 269 | .map(|r| r.contains(&pc)) 270 | .unwrap_or(false)) 271 | else { return Ok(None); }; 272 | 273 | let mut frag = vec![]; 274 | 275 | // Follow inlined subroutine tree to the tip, recording call info at 276 | // each step. 277 | let mut enclosing_prog = pid; 278 | let mut inlines = Some(&subp.inlines); 279 | 'inline_loop: 280 | while let Some(inl) = inlines.take() { 281 | for inlsub in inl { 282 | for pcr in &inlsub.pc_ranges { 283 | if pcr.begin <= pc && pc < pcr.end { 284 | // We're in this one. 285 | if let Some(file) = &inlsub.call_coord.file { 286 | frag.push(PcInfo { 287 | subprogram: enclosing_prog, 288 | file: file.clone(), 289 | line: inlsub.call_coord.line, 290 | column: inlsub.call_coord.column, 291 | }); 292 | 293 | enclosing_prog = ProgramId( 294 | inlsub.abstract_origin 295 | .expect("inlined sub w/o abstract_origin") 296 | ); 297 | inlines = Some(&inlsub.inlines); 298 | continue 'inline_loop; 299 | } 300 | } 301 | } 302 | } 303 | } 304 | 305 | // Finally, find the innermost record from the line number info. 306 | if let Some(row) = self.lookup_line_row(pc) { 307 | frag.push(PcInfo { 308 | subprogram: enclosing_prog, 309 | file: row.file.clone(), 310 | line: row.line, 311 | column: row.column, 312 | }); 313 | } 314 | 315 | Ok(Some(frag)) 316 | } 317 | 318 | pub fn unique_raw_symbol_by_name( 319 | &self, 320 | name: &str, 321 | ) -> Option { 322 | let addresses = self.raw_symbols_by_name.get(name)?; 323 | let mut i = addresses.iter().cloned(); 324 | let result = i.next()?; 325 | if i.next().is_some() { 326 | None 327 | } else { 328 | Some(result) 329 | } 330 | } 331 | 332 | pub fn raw_symbols_for_address( 333 | &self, 334 | address: u64, 335 | ) -> impl Iterator { 336 | self.raw_symbols_by_address.get(&address) 337 | .into_iter() 338 | .flat_map(|set| set.iter().map(String::as_str)) 339 | } 340 | 341 | /// Returns an iterator over all static variables defined in this program. 342 | pub fn static_variables( 343 | &self, 344 | ) -> impl Iterator + '_ { 345 | self.variables.iter().map(|(&goff, ty)| (goff, ty)) 346 | } 347 | 348 | pub fn static_variable_by_id( 349 | &self, 350 | id: VarId, 351 | ) -> Option<&StaticVariable> { 352 | self.variables.get(&id) 353 | } 354 | 355 | pub fn static_variables_by_name( 356 | &self, 357 | name: &str, 358 | ) -> impl Iterator + '_ { 359 | self.consult_index_generic(&self.variables_by_name, name, &self.variables) 360 | } 361 | 362 | pub fn unique_static_variable_by_name( 363 | &self, 364 | name: &str, 365 | ) -> Option<(VarId, &StaticVariable)> { 366 | let mut vs = self.static_variables_by_name(name); 367 | let result = vs.next()?; 368 | if vs.next().is_some() { 369 | None 370 | } else { 371 | Some(result) 372 | } 373 | } 374 | 375 | pub fn entities_by_address( 376 | &self, 377 | address: u64, 378 | ) -> impl Iterator + '_ { 379 | self.entities_by_address.range(..=address) 380 | .rev() 381 | .flat_map(|(_, rec)| rec) 382 | .filter(move |rec| rec.range.contains(&address)) 383 | } 384 | 385 | /// Looks up `key` in `index`, and then transforms the result by (1) copying 386 | /// the goffs and (2) attaching the associated `Type` to each item. 387 | fn consult_index<'d, K, Q>( 388 | &'d self, 389 | index: &'d BTreeIndex, 390 | key: &Q, 391 | ) -> impl Iterator + 'd 392 | where K: std::borrow::Borrow + Ord, 393 | Q: Ord + ?Sized + 'd, 394 | { 395 | self.consult_index_generic(index, key, &self.types) 396 | } 397 | 398 | /// Looks up `key` in `index`, and then transforms the result by (1) copying 399 | /// the goffs and (2) attaching the associated `Type` to each item. 400 | fn consult_index_generic<'d, I, K, Q, E>( 401 | &'d self, 402 | index: &'d BTreeIndex, 403 | key: &Q, 404 | lookup: &'d BTreeMap, 405 | ) -> impl Iterator + 'd 406 | where K: std::borrow::Borrow + Ord, 407 | Q: Ord + ?Sized, 408 | I: Copy + Eq + Ord, 409 | E: 'd, 410 | { 411 | index 412 | .get(key) 413 | .into_iter() 414 | .flat_map(move |set| { 415 | set.iter().map(move |&goff| (goff, &lookup[&goff])) 416 | }) 417 | } 418 | } 419 | 420 | /// Builder that accumulates the type information from a program and produces a 421 | /// `DebugDb` database. 422 | /// 423 | /// This is primarily intended as a write-only sink for type information. After 424 | /// everything is stuffed in, `build()` will validate the information, generate 425 | /// indices, and produce a `DebugDb` database. 426 | #[derive(Clone, Debug)] 427 | pub struct DebugDbBuilder { 428 | path: Vec, 429 | endian: gimli::RunTimeEndian, 430 | is_64: bool, 431 | types: BTreeMap, 432 | decls: BTreeMap>, 433 | debug_frame: gimli::DebugFrame>>, 434 | 435 | subprograms: BTreeMap, 436 | line_table: BTreeMap>, 437 | variables: BTreeMap, 438 | 439 | raw_symbols: Vec<(String, u64)>, 440 | } 441 | 442 | impl DebugDbBuilder { 443 | /// Creates a new `DebugDbBuilder` for information from a program with the 444 | /// given endianness and pointer width. 445 | pub fn new( 446 | endian: gimli::RunTimeEndian, 447 | is_64: bool, 448 | debug_frame: gimli::DebugFrame>>, 449 | ) -> Self { 450 | Self { 451 | endian, 452 | path: vec![], 453 | is_64, 454 | debug_frame, 455 | types: BTreeMap::new(), 456 | decls: BTreeMap::new(), 457 | subprograms: BTreeMap::new(), 458 | line_table: BTreeMap::new(), 459 | variables: BTreeMap::new(), 460 | raw_symbols: vec![], 461 | } 462 | } 463 | 464 | pub fn build(self) -> Result { 465 | let mut types = self.types; 466 | 467 | // Build type name index. 468 | let mut type_name_index = index_by_key(&types, |_, t| match t { 469 | Type::Struct(s) => Some(s.name.clone()), 470 | Type::Enum(s) => Some(s.name.clone()), 471 | Type::Base(s) => Some(s.name.clone()), 472 | Type::CEnum(s) => Some(s.name.clone()), 473 | Type::Union(s) => Some(s.name.clone()), 474 | Type::Pointer(s) => s.name.clone(), 475 | _ => None, 476 | }); 477 | 478 | // Attempt to unify similarly named types, narrowing the type name index 479 | // as we go. 480 | let mut u = crate::unify::State::new(&types); 481 | for homonyms in type_name_index.values_mut() { 482 | let mut workset = homonyms.clone(); 483 | let mut group_u = crate::unify::State::new(&types); 484 | while let Some(t) = workset.pop_first() { 485 | for o in &workset { 486 | t.try_unify(o, &mut group_u); 487 | } 488 | } 489 | // Reduce the set of homonyms for this name to only those types that 490 | // were not found to have equivalent partners. 491 | homonyms.retain(|t| !group_u.is_subbed(*t)); 492 | u.merge(group_u); 493 | } 494 | 495 | // Attempt to resolve decls. 496 | let mut ambiguous_decl_count = 0; 497 | for (name, decl_ids) in &self.decls { 498 | if let Some(tids) = type_name_index.get(name) { 499 | if tids.len() != 1 { 500 | // The name is still ambiguous after unification. 501 | eprintln!("WARN: decl ambiguous; {name} could be:"); 502 | for tid in tids { 503 | eprintln!("- {tid:x?}"); 504 | } 505 | ambiguous_decl_count += 1; 506 | } 507 | // Assume it's the first one. 508 | let tid = *tids.iter().next().unwrap(); 509 | for &alias in decl_ids { 510 | u.equate(alias, tid); 511 | } 512 | } else { 513 | eprintln!("WARN: unresolved declaration {name}:"); 514 | for id in decl_ids { 515 | eprintln!(" - {id:x?}"); 516 | } 517 | } 518 | } 519 | if ambiguous_decl_count > 0 { 520 | eprintln!("WARN: {ambiguous_decl_count} ambiguous declarations found"); 521 | } 522 | 523 | let mut unresolved_types = BTreeMap::new(); 524 | 525 | let mut check = |mut id| -> Result<(), Infallible> { 526 | id = u.canonicalize(id); 527 | if types.contains_key(&id) { 528 | Ok(()) 529 | } else { 530 | unresolved_types.insert(id, Type::Unresolved(Unresolved { 531 | offset: id.0, 532 | })); 533 | Ok(()) // TODO 534 | } 535 | }; 536 | 537 | // Validate that the world is complete and internally consistent. 538 | for t in types.values() { 539 | match t { 540 | Type::Base(_) => (), 541 | Type::CEnum(_) => (), 542 | Type::Unresolved(_) => (), 543 | 544 | Type::Struct(s) => { 545 | for ttp in &s.template_type_parameters { 546 | check(ttp.type_id)?; 547 | } 548 | for m in &s.members { 549 | check(m.type_id)?; 550 | } 551 | } 552 | Type::Union(s) => { 553 | for ttp in &s.template_type_parameters { 554 | check(ttp.type_id)?; 555 | } 556 | for m in &s.members { 557 | check(m.type_id)?; 558 | } 559 | } 560 | Type::Enum(s) => { 561 | for ttp in &s.template_type_parameters { 562 | check(ttp.type_id)?; 563 | } 564 | match &s.shape { 565 | VariantShape::Zero => (), 566 | VariantShape::One(variant) => { 567 | check(variant.member.type_id)?; 568 | } 569 | VariantShape::Many { 570 | member, variants, .. 571 | } => { 572 | check(member.type_id)?; 573 | for v in variants.values() { 574 | check(v.member.type_id)?; 575 | } 576 | } 577 | } 578 | } 579 | Type::Array(s) => { 580 | check(s.element_type_id)?; 581 | // The index type is synthetic, but, might as well. 582 | check(s.index_type_id)?; 583 | } 584 | Type::Pointer(s) => { 585 | check(s.type_id)?; 586 | } 587 | Type::Subroutine(s) => { 588 | if let Some(t) = s.return_type_id { 589 | check(t)?; 590 | } 591 | for &t in &s.formal_parameters { 592 | check(t)?; 593 | } 594 | } 595 | } 596 | } 597 | 598 | let type_canon = u.finish(); 599 | types.extend(unresolved_types); 600 | 601 | // Build array index. 602 | let array_index = index_by_key(&types, |_, t| match t { 603 | Type::Array(a) => Some((a.element_type_id, a.count)), 604 | _ => None, 605 | }); 606 | // Build subroutine index. This is more complex in shape than the other 607 | // indices. 608 | let subroutine_index = { 609 | let mut ind = BTreeMap::<_, BTreeIndex<_, _>>::new(); 610 | for (k, v) in &types { 611 | if let Type::Subroutine(s) = v { 612 | ind.entry(s.formal_parameters.clone()) 613 | .or_default() 614 | .entry(s.return_type_id) 615 | .or_default() 616 | .insert(*k); 617 | } 618 | } 619 | ind 620 | }; 621 | 622 | let variables_by_name = index_by_key(&self.variables, |_, v| Some(v.name.clone())); 623 | 624 | // Build address map. 625 | let mut entities_by_address: BTreeMap<_, Vec<_>> = BTreeMap::new(); 626 | for (&vid, v) in &self.variables { 627 | let Some(t) = types.get(&v.type_id) else { 628 | eprintln!("WARN: type of variable {} not found: {:x?}", 629 | v.name, v.type_id); 630 | continue; 631 | }; 632 | let sz = t.byte_size_early( 633 | if self.is_64 { 8 } else { 4 }, 634 | |t| types.get(&t), 635 | ); 636 | if let Some(sz) = sz { 637 | entities_by_address.entry(v.location) 638 | .or_default() 639 | .push(AddressRange { 640 | range: v.location..v.location + sz, 641 | entity: EntityId::Var(vid), 642 | }); 643 | } 644 | } 645 | for (&pid, p) in &self.subprograms { 646 | if let Some(pc_range) = p.pc_range.clone() { 647 | entities_by_address.entry(pc_range.start) 648 | .or_default() 649 | .push(AddressRange { 650 | range: pc_range, 651 | entity: EntityId::Prog(pid), 652 | }); 653 | } 654 | } 655 | 656 | fn check_inl(inl: &InlinedSubroutine) -> Result<(), ParseError> { 657 | if inl.abstract_origin.is_none() { 658 | return Err(ParseError::UnboundSubroutine(inl.offset)); 659 | } 660 | for inner in &inl.inlines { 661 | check_inl(inner)?; 662 | } 663 | Ok(()) 664 | } 665 | 666 | // Check that inlined subroutines match our expectations. 667 | for subprogram in self.subprograms.values() { 668 | for inl in &subprogram.inlines { 669 | check_inl(inl)?; 670 | } 671 | } 672 | 673 | let type_rcanon = invert(&type_canon); 674 | 675 | let raw_symbols_by_name = index_by_key( 676 | self.raw_symbols.iter().map(|(k, v)| (v, k)), 677 | |_, name| Some(name.to_string()), 678 | ); 679 | 680 | let raw_symbols_by_address = index_by_key( 681 | self.raw_symbols.iter().map(|(k, v)| (k, v)), 682 | |_, addr| Some(*addr), 683 | ); 684 | 685 | 686 | Ok(DebugDb { 687 | endian: self.endian, 688 | types, 689 | type_canon, 690 | type_rcanon, 691 | is_64: self.is_64, 692 | subprograms: self.subprograms, 693 | line_table: self.line_table, 694 | variables: self.variables, 695 | debug_frame: self.debug_frame, 696 | type_name_index, 697 | array_index, 698 | subroutine_index, 699 | variables_by_name, 700 | entities_by_address, 701 | raw_symbols_by_name, 702 | raw_symbols_by_address, 703 | }) 704 | } 705 | 706 | pub fn record_raw_symbol(&mut self, addr: u64, name: String) { 707 | self.raw_symbols.push((name, addr)); 708 | } 709 | 710 | /// Adds a type to the database. 711 | /// 712 | /// It's unusual to call this from outside the library, but it might be 713 | /// useful if you have additional type information from some outside source. 714 | pub fn record_type(&mut self, t: impl Into) { 715 | let t = t.into(); 716 | self.types.insert(TypeId(t.offset()), t); 717 | } 718 | 719 | pub fn record_subprogram(&mut self, t: Subprogram) { 720 | self.subprograms.insert(ProgramId(t.offset), t); 721 | } 722 | 723 | pub fn record_variable(&mut self, t: StaticVariable) { 724 | self.variables.insert(VarId(t.offset), t); 725 | } 726 | 727 | pub fn record_line_table_row(&mut self, addr: u64, r: LineNumberRow) { 728 | self.line_table.entry(addr) 729 | .or_default() 730 | .push(r) 731 | } 732 | 733 | pub fn record_decl(&mut self, name: impl std::fmt::Display, id: TypeId) { 734 | self.decls.entry(self.format_path(name)) 735 | .or_default() 736 | .insert(id); 737 | } 738 | 739 | fn format_path(&self, name: impl std::fmt::Display) -> String { 740 | if self.path.is_empty() { 741 | name.to_string() 742 | } else { 743 | format!("{}::{}", self.path.join("::"), name) 744 | } 745 | } 746 | 747 | /// Pushes a path component onto the namespace path stack and runs `body`, 748 | /// popping the stack when it completes. 749 | fn path_component( 750 | &mut self, 751 | c: impl Into, 752 | body: impl FnOnce(&mut Self) -> T, 753 | ) -> T { 754 | self.path.push(c.into()); 755 | let result = body(self); 756 | self.path.pop(); 757 | result 758 | } 759 | } 760 | 761 | /// Utility for indexing entries in a key-value table by some projection. 762 | /// 763 | /// `table` is a sequence of keys and values in arbitrary order. 764 | /// 765 | /// `project` takes a key-value pair and produces some datum to be indexed. 766 | /// 767 | /// The result is a mapping from the data produced by `project` to keys in 768 | /// `table`. 769 | fn index_by_key<'t, K: 't, V: 't, T>( 770 | table: impl IntoIterator, 771 | mut project: impl FnMut(&K, &V) -> Option, 772 | ) -> BTreeMap> 773 | where 774 | T: Ord, 775 | K: Ord + Clone, 776 | { 777 | let mut index: BTreeMap> = BTreeMap::new(); 778 | 779 | for (k, v) in table { 780 | if let Some(i) = project(k, v) { 781 | index.entry(i).or_default().insert(k.clone()); 782 | } 783 | } 784 | 785 | index 786 | } 787 | 788 | #[derive(Clone, Debug, Error)] 789 | pub enum FileError { 790 | #[error("DWARF data structures could not be understood")] 791 | Parse(#[from] ParseError), 792 | #[error("Object file format parsing error")] 793 | Obj(#[from] object::Error), 794 | #[error("DWARF failed to parse")] 795 | Dwarf(#[from] gimli::Error), 796 | } 797 | 798 | /// Parses type information from an `object::File`. 799 | pub fn parse_file( 800 | object: &object::File, 801 | ) -> Result { 802 | let endian = if object.is_little_endian() { 803 | gimli::RunTimeEndian::Little 804 | } else { 805 | gimli::RunTimeEndian::Big 806 | }; 807 | 808 | let load_section = 809 | |id: gimli::SectionId| -> Result { 810 | let cow = object.section_by_name(id.name()) 811 | .map(|sect| sect.uncompressed_data()) 812 | .transpose()? 813 | .unwrap_or_else(Default::default); 814 | Ok(gimli::EndianReader::new(Arc::from(cow), endian)) 815 | }; 816 | 817 | let dwarf = gimli::Dwarf::load(&load_section)?; 818 | 819 | use gimli::Section; 820 | let debug_frame = gimli::DebugFrame::load(load_section)?; 821 | 822 | let mut builder = DebugDbBuilder::new(endian, object.is_64(), debug_frame); 823 | 824 | let mut iter = dwarf.units(); 825 | while let Some(header) = iter.next()? { 826 | let unit = dwarf.unit(header)?; 827 | 828 | if let Some(lp) = &unit.line_program { 829 | let lp = lp.clone(); 830 | let mut rows = lp.rows(); 831 | 832 | let mut last_row: Option = None; 833 | while let Some((header, row)) = rows.next_row()? { 834 | let file = if let Some(file) = row.file(header) { 835 | if let Some(directory) = file.directory(header) { 836 | format!( 837 | "{}/{}", 838 | String::from_utf8_lossy(dwarf.attr_string(&unit, directory)?.bytes()), 839 | String::from_utf8_lossy( 840 | dwarf 841 | .attr_string(&unit, file.path_name())? 842 | .bytes()) 843 | ) 844 | } else { 845 | String::from_utf8_lossy( 846 | dwarf 847 | .attr_string(&unit, file.path_name())? 848 | .bytes()) 849 | .into_owned() 850 | } 851 | } else { 852 | "???".into() 853 | }; 854 | if let Some(mut pending) = last_row.take() { 855 | pending.pc_range.end = row.address(); 856 | builder.record_line_table_row(pending.pc_range.start, pending); 857 | } 858 | 859 | if !row.end_sequence() { 860 | last_row = Some(LineNumberRow { 861 | pc_range: row.address()..0, 862 | file, 863 | line: row.line(), 864 | column: match row.column() { 865 | gimli::ColumnType::Column(c) => Some(c), 866 | gimli::ColumnType::LeftEdge => None, 867 | }, 868 | }); 869 | } 870 | } 871 | if last_row.is_some() { 872 | eprintln!("WARN: line number program not terminated by end sequence"); 873 | } 874 | } 875 | let mut entries = unit.entries(); 876 | while let Some(()) = entries.next_entry()? { 877 | if entries.current().is_none() { 878 | break; 879 | } 880 | dwarf_parser::parse_entry(&dwarf, &unit, &mut entries, &mut builder)?; 881 | } 882 | } 883 | 884 | for sym in object.symbols() { 885 | let Ok(name) = sym.name() else { continue; }; 886 | let addr = sym.address(); 887 | builder.record_raw_symbol(addr, name.to_string()); 888 | } 889 | 890 | Ok(builder.build()?) 891 | } 892 | 893 | #[derive(Clone, Debug)] 894 | pub struct AddressRange { 895 | pub range: std::ops::Range, 896 | pub entity: EntityId, 897 | } 898 | 899 | #[derive(Copy, Clone, Debug)] 900 | pub enum EntityId { 901 | Var(VarId), 902 | Prog(ProgramId), 903 | } 904 | 905 | fn invert(map: &BTreeMap) -> BTreeMap> 906 | where K: Eq + Ord + Clone, 907 | V: Eq + Ord + Clone, 908 | { 909 | let mut result: BTreeMap> = BTreeMap::new(); 910 | for (k, v) in map { 911 | result.entry(v.clone()).or_default().insert(k.clone()); 912 | } 913 | result 914 | } 915 | -------------------------------------------------------------------------------- /src/load.rs: -------------------------------------------------------------------------------- 1 | //! Support for extracting values from a program image, processing them using 2 | //! debug information, and turning them into Rust values in the observing 3 | //! program. 4 | 5 | use crate::{Encoding, Enum, Type, DebugDb, Variant, VariantShape}; 6 | use gimli::Endianity; 7 | use rangemap::RangeInclusiveMap; 8 | use thiserror::Error; 9 | use std::convert::{TryFrom, Infallible}; 10 | 11 | pub trait Load: Sized { 12 | fn from_state( 13 | machine: &M, 14 | addr: u64, 15 | world: &DebugDb, 16 | ty: &Type, 17 | ) -> Result>; 18 | } 19 | 20 | pub trait Machine { 21 | /// Error type that indicates that we had a failure to access machine state. 22 | type Error; 23 | /// Reads memory in the program's address space (or on a physically 24 | /// addressed system, _the_ address space) starting at `address`. Up to 25 | /// `dest.len()` bytes will be read, and copied into `dest` starting from 26 | /// the beginning. 27 | /// 28 | /// "Success" here means that access did not fail, so the rest of the output 29 | /// is valid. In this case, `read_memory` will return `Ok(n)`, where `n` is 30 | /// the number of bytes it was able to read starting at `address`. **Note 31 | /// that this value may be smaller than you requested, or zero.** This 32 | /// indicates that fewer than `dest.len()` contiguous bytes _exist_ starting 33 | /// at `address`. This may be due to: address space holes, incomplete dumps, 34 | /// reading an ELF file without a RAM image, etc. 35 | /// 36 | /// These holes are a valid part of the machine state, and so this is not 37 | /// failure. 38 | /// 39 | /// Failure happens if we can't _access_ the machine state to find this out, 40 | /// or to get the data -- for instance, if a USB transaction to a JTAG probe 41 | /// fails, or if we get a filesystem error reading an ELF file. In that 42 | /// case, we'll return `Err`. 43 | fn read_memory(&self, address: u64, dest: &mut [u8]) -> Result; 44 | } 45 | 46 | #[derive(Clone)] 47 | pub struct ImgMachine { 48 | img: Vec, 49 | } 50 | 51 | impl ImgMachine { 52 | pub fn new(img: impl Into>) -> Self { 53 | Self { 54 | img: img.into(), 55 | } 56 | } 57 | } 58 | 59 | impl Machine for ImgMachine { 60 | type Error = Infallible; 61 | 62 | fn read_memory(&self, address: u64, dest: &mut [u8]) -> Result { 63 | let Ok(address) = usize::try_from(address) else { return Ok(0) }; 64 | let end = address.checked_add(dest.len()) 65 | .unwrap_or(usize::MAX); 66 | let end = usize::min(end, self.img.len()); 67 | let Some(chunk) = end.checked_sub(address) else { return Ok(0) }; 68 | 69 | dest[..chunk].copy_from_slice(&self.img[address..end]); 70 | Ok(chunk) 71 | } 72 | } 73 | 74 | impl Machine for RangeInclusiveMap> { 75 | type Error = Infallible; 76 | 77 | fn read_memory(&self, address: u64, dest: &mut [u8]) -> Result { 78 | let Some((range, segment)) = self.get_key_value(&address) else { return Ok(0) }; 79 | let offset = address - range.start(); 80 | 81 | let Ok(offset) = usize::try_from(offset) else { return Ok(0) }; 82 | let end = offset.checked_add(dest.len()) 83 | .unwrap_or(usize::MAX); 84 | let end = usize::min(end, segment.len()); 85 | let Some(chunk) = end.checked_sub(offset) else { return Ok(0) }; 86 | 87 | dest[..chunk].copy_from_slice(&segment[offset..end]); 88 | Ok(chunk) 89 | } 90 | } 91 | 92 | #[derive(Clone, Debug, Error)] 93 | pub enum LoadError { 94 | #[error("tuple type missing member {0}")] 95 | MissingTupleMember(usize), 96 | #[error("struct was not tuple-like")] 97 | NotATuple, 98 | #[error("not a struct")] 99 | NotAStruct, 100 | #[error("expected encoding {expected:?}, type had encoding {got:?}")] 101 | WrongEncoding { expected: Encoding, got: Encoding }, 102 | #[error("expected type with size {expected}, but type had size {got}")] 103 | WrongSize { expected: u64, got: u64 }, 104 | #[error("base type required")] 105 | NotABase, 106 | #[error("enum type required")] 107 | NotAnEnum, 108 | #[error("C-like enum type required")] 109 | NotACEnum, 110 | #[error("pointer type required")] 111 | NotAPointer, 112 | #[error("array type is not finite and can't be loaded")] 113 | InfiniteArray, 114 | #[error("arrays with non-zero lower bounds ({0}) are not supported")] 115 | NonZeroLowerBound(u64), 116 | #[error("array has element type without defined size")] 117 | UnsizedElement, 118 | #[error("array too big: {count} x {elt_size}-byte elements")] 119 | ArrayTooBig { 120 | count: u64, 121 | elt_size: u64, 122 | }, 123 | #[error("type too big for this platform: {0} bytes")] 124 | TypeTooBig(u64), 125 | #[error("array type required")] 126 | NotAnArray, 127 | #[error("expected enum with {expected} variants, found {got}")] 128 | WrongVariantCount { expected: usize, got: usize }, 129 | #[error("unexpected variant: {0}")] 130 | UnexpectedVariant(String), 131 | #[error("expected struct/tuple with {expected} members, found {got}")] 132 | WrongMemberCount { expected: usize, got: usize }, 133 | #[error("can't load an uninhabited (empty) enum")] 134 | Uninhabited, 135 | #[error("discriminator value {0} not valid for type")] 136 | BadDiscriminator(u64), 137 | #[error("unsupported type (TODO)")] 138 | UnsupportedType, 139 | #[error("expected member `{0}` not found")] 140 | MissingMember(String), 141 | #[error("a type named {expected} was required, but found: {got}")] 142 | WrongTypeName { expected: String, got: String}, 143 | #[error("some of the bytes required to load this type are not present in the machine")] 144 | DataUnavailable, 145 | 146 | #[error("an error occurred accessing the underlying machine state")] 147 | Machine(#[from] E), 148 | } 149 | 150 | /* 151 | 152 | impl Load for (A, B) { 153 | fn from_state( 154 | machine: &M, 155 | addr: u64, 156 | world: &DebugDb, 157 | ty: &Type, 158 | ) -> Result> { 159 | if let Type::Struct(s) = ty { 160 | if s.tuple_like { 161 | let m0 = s.unique_member("__0") 162 | .ok_or(LoadError::MissingTupleMember(0))?; 163 | let m0ty = world.type_by_id(m0.type_id).unwrap(); 164 | let m1 = s.unique_member("__1") 165 | .ok_or(LoadError::MissingTupleMember(0))?; 166 | let m1ty = world.type_by_id(m1.type_id).unwrap(); 167 | Ok(( 168 | A::from_buffer( 169 | buffer, 170 | addr + usize::try_from(m0.location).unwrap(), 171 | world, 172 | m0ty, 173 | )?, 174 | B::from_buffer( 175 | buffer, 176 | addr + usize::try_from(m1.location).unwrap(), 177 | world, 178 | m1ty, 179 | )?, 180 | )) 181 | } else { 182 | Err(LoadError::NotATuple) 183 | } 184 | } else { 185 | Err(LoadError::NotAStruct) 186 | } 187 | } 188 | } 189 | */ 190 | 191 | fn generic_base_load( 192 | encoding: Encoding, 193 | ty: &Type, 194 | machine: &M, 195 | addr: u64, 196 | extract: impl FnOnce([u8; N]) -> B, 197 | ) -> Result> { 198 | if let Type::Base(b) = ty { 199 | if b.encoding != encoding { 200 | return Err(LoadError::WrongEncoding { 201 | expected: encoding, 202 | got: b.encoding, 203 | }); 204 | } 205 | if b.byte_size != N as u64 { 206 | return Err(LoadError::WrongSize { 207 | expected: N as u64, 208 | got: b.byte_size, 209 | }); 210 | } 211 | let mut ary = [0; N]; 212 | let n = machine.read_memory(addr, &mut ary)?; 213 | if n != N { 214 | return Err(LoadError::DataUnavailable); 215 | } 216 | Ok(extract(ary)) 217 | } else { 218 | Err(LoadError::NotABase) 219 | } 220 | } 221 | 222 | impl Load for u8 { 223 | fn from_state( 224 | machine: &M, 225 | addr: u64, 226 | _world: &DebugDb, 227 | ty: &Type, 228 | ) -> Result> { 229 | generic_base_load( 230 | Encoding::Unsigned, 231 | ty, 232 | machine, 233 | addr, 234 | |[b]| b, 235 | ) 236 | } 237 | } 238 | 239 | impl Load for i8 { 240 | fn from_state( 241 | machine: &M, 242 | addr: u64, 243 | _world: &DebugDb, 244 | ty: &Type, 245 | ) -> Result> { 246 | generic_base_load( 247 | Encoding::Signed, 248 | ty, 249 | machine, 250 | addr, 251 | |[b]| b as i8, 252 | ) 253 | } 254 | } 255 | 256 | macro_rules! base_impl { 257 | ($t:ty, $sz:expr, $enc:ident, $read:ident) => { 258 | impl Load for $t { 259 | fn from_state( 260 | machine: &M, 261 | addr: u64, 262 | world: &DebugDb, 263 | ty: &Type, 264 | ) -> Result> { 265 | generic_base_load::<_, $t, $sz>( 266 | Encoding::$enc, 267 | ty, 268 | machine, 269 | addr, 270 | |a| world.endian().$read(&a), 271 | ) 272 | } 273 | } 274 | }; 275 | } 276 | 277 | base_impl!(u16, 2, Unsigned, read_u16); 278 | base_impl!(u32, 4, Unsigned, read_u32); 279 | base_impl!(u64, 8, Unsigned, read_u64); 280 | 281 | base_impl!(i16, 2, Signed, read_i16); 282 | base_impl!(i32, 4, Signed, read_i32); 283 | base_impl!(i64, 8, Signed, read_i64); 284 | 285 | impl Load for core::sync::atomic::AtomicU32 { 286 | fn from_state( 287 | machine: &M, 288 | addr: u64, 289 | world: &DebugDb, 290 | ty: &Type, 291 | ) -> Result> { 292 | let Type::Struct(ty) = ty else { 293 | return Err(LoadError::NotAStruct); 294 | }; 295 | if ty.name != "core::sync::atomic::AtomicU32" { 296 | return Err(LoadError::WrongTypeName { 297 | expected: "core::sync::atomic::AtomicU32".to_string(), 298 | got: ty.name.clone(), 299 | }); 300 | } 301 | let Some(m_v) = ty.unique_member("v") else { 302 | return Err(LoadError::MissingMember("v".to_string())); 303 | }; 304 | let unsafecell = world.type_by_id(m_v.type_id).unwrap(); 305 | let Type::Struct(unsafecell) = unsafecell else { 306 | return Err(LoadError::NotAStruct); 307 | }; 308 | if unsafecell.name != "core::cell::UnsafeCell" { 309 | return Err(LoadError::WrongTypeName { 310 | expected: "core::cell::UnsafeCell".to_string(), 311 | got: unsafecell.name.clone(), 312 | }); 313 | } 314 | let Some(m_value) = unsafecell.unique_member("value") else { 315 | return Err(LoadError::MissingMember("value".to_string())); 316 | }; 317 | 318 | let value_ty = world.type_by_id(m_value.type_id).unwrap(); 319 | 320 | let x = u32::from_state(machine, addr, world, value_ty)?; 321 | Ok(core::sync::atomic::AtomicU32::new(x)) 322 | } 323 | } 324 | 325 | impl Load for Vec { 326 | fn from_state( 327 | machine: &M, 328 | addr: u64, 329 | world: &DebugDb, 330 | ty: &Type, 331 | ) -> Result> { 332 | if let Type::Array(s) = ty { 333 | let count = s.count.ok_or(LoadError::InfiniteArray)?; 334 | if s.lower_bound != 0 { 335 | return Err(LoadError::NonZeroLowerBound(s.lower_bound)); 336 | } 337 | let elty = world.type_by_id(s.element_type_id).unwrap(); 338 | 339 | let elt_size = elty 340 | .byte_size(world) 341 | .ok_or(LoadError::UnsizedElement)?; 342 | let elt_size = elt_size.max(elty.alignment(world).unwrap_or(0)); 343 | 344 | let mut elts = Vec::with_capacity(usize::try_from(count).unwrap()); 345 | for i in 0..count { 346 | elts.push(T::from_state(machine, addr + i * elt_size, world, elty)?); 347 | } 348 | Ok(elts) 349 | } else { 350 | Err(LoadError::NotAnArray) 351 | } 352 | } 353 | } 354 | 355 | /* 356 | 357 | /// A `Load` impl for Option-shaped types. 358 | /// 359 | /// This will work for any enum with two variants, where one is named None and 360 | /// has no payload, and the other is named Some and has one field. 361 | impl Load for Option { 362 | fn from_state( 363 | machine: &M, 364 | addr: u64, 365 | world: &DebugDb, 366 | ty: &Type, 367 | ) -> Result> { 368 | let Type::Enum(s) = ty else { 369 | return Err(LoadError::NotAnEnum); 370 | }; 371 | // Option-like enums have two variants. 372 | if let VariantShape::Many { variants, .. } = &s.shape { 373 | if variants.len() != 2 { 374 | return Err(LoadError::WrongVariantCount { 375 | expected: 2, 376 | got: variants.len(), 377 | }); 378 | } 379 | // Those variants are named None and Some. 380 | for v in variants.values() { 381 | if let Some(n) = &v.member.name { 382 | if n == "None" || n == "Some" { 383 | continue; 384 | } 385 | return Err(LoadError::UnexpectedVariant(n.clone())); 386 | } 387 | } 388 | } 389 | // Ok, that's the extent of the type validation I'm comfortable 390 | // doing here for performance reasons. 391 | 392 | let v = choose_variant(buffer, addr, world, s)?; 393 | let is_some = v.member.name.as_ref().unwrap() == "Some"; 394 | let vty = world.type_by_id(v.member.type_id).unwrap(); 395 | // Option-like enums have tuple variants. 396 | let Type::Struct(s) = vty else { 397 | // TODO: this error is probably not descriptive enough. 398 | return Err(LoadError::NotAStruct); 399 | }; 400 | if !s.tuple_like { 401 | // TODO: this error is probably not descriptive enough. 402 | return Err(LoadError::NotATuple); 403 | } 404 | if is_some { 405 | if s.members.len() != 1 { 406 | return Err(LoadError::WrongMemberCount { 407 | expected: 1, 408 | got: s.members.len(), 409 | }); 410 | } 411 | 412 | let m = &s.members[0]; 413 | let mty = 414 | world.type_by_id(m.type_id).unwrap(); 415 | let ma = addr + usize::try_from(m.location).unwrap(); 416 | Ok(Some(T::from_buffer(buffer, ma, world, mty)?)) 417 | } else { 418 | if !s.members.is_empty() { 419 | return Err(LoadError::WrongMemberCount { 420 | expected: 0, 421 | got: s.members.len(), 422 | }); 423 | } 424 | Ok(None) 425 | } 426 | } 427 | } 428 | */ 429 | 430 | pub(crate) fn choose_variant<'e, M: Machine>( 431 | machine: &M, 432 | addr: u64, 433 | world: &'e DebugDb, 434 | e: &'e Enum, 435 | ) -> Result<&'e Variant, LoadError> { 436 | match &e.shape { 437 | VariantShape::Zero => { 438 | Err(LoadError::Uninhabited) 439 | } 440 | VariantShape::One(v) => Ok(v), 441 | VariantShape::Many { 442 | member, variants, .. 443 | } => { 444 | let dtype_id = member.type_id; 445 | let dty = world.type_by_id(dtype_id).unwrap(); 446 | let da = addr + member.location; 447 | let dsize = usize::try_from(dty.byte_size(world).unwrap()).unwrap(); 448 | let d = load_unsigned(world.endian(), machine, da, dsize)? 449 | .ok_or(LoadError::DataUnavailable)?; 450 | let v = variants 451 | .get(&Some(d)) 452 | .or_else(|| variants.get(&None)) 453 | .ok_or(LoadError::BadDiscriminator(d))?; 454 | Ok(v) 455 | } 456 | } 457 | } 458 | 459 | pub(crate) fn load_unsigned( 460 | endian: gimli::RunTimeEndian, 461 | machine: &M, 462 | addr: u64, 463 | size: usize, 464 | ) -> Result, M::Error> { 465 | let mut buffer = [0; 8]; 466 | let buffer = &mut buffer[..size]; 467 | let n = machine.read_memory(addr, buffer)?; 468 | Ok(if n < size { 469 | None 470 | } else { 471 | Some(match size { 472 | 1 => u64::from(buffer[0]), 473 | 2 => u64::from(endian.read_u16(buffer)), 474 | 4 => u64::from(endian.read_u32(buffer)), 475 | 8 => endian.read_u64(buffer), 476 | _ => unimplemented!(), 477 | }) 478 | }) 479 | } 480 | /* 481 | #[cfg(test)] 482 | mod test { 483 | use super::*; 484 | use crate::{TypeId, DebugDbBuilder}; 485 | 486 | #[derive(Debug, Default)] 487 | struct OffsetMaker { 488 | next_offset: usize, 489 | } 490 | 491 | impl OffsetMaker { 492 | fn next(&mut self) -> gimli::UnitSectionOffset { 493 | let n = self.next_offset; 494 | self.next_offset += 1; 495 | gimli::DebugInfoOffset(n).into() 496 | } 497 | } 498 | 499 | fn make_option_u16( 500 | builder: &mut DebugDbBuilder, 501 | om: &mut OffsetMaker, 502 | ) -> TypeId { 503 | let u16_goff = om.next(); 504 | builder.record_type(crate::Base { 505 | name: "u16".to_string(), 506 | encoding: Encoding::Unsigned, 507 | byte_size: 2, 508 | offset: u16_goff.into(), 509 | }); 510 | 511 | let none_goff = om.next(); 512 | builder.record_type(crate::Struct { 513 | name: "core::option::Option::None".to_string(), 514 | byte_size: 4, 515 | alignment: Some(2), 516 | offset: none_goff, 517 | tuple_like: true, 518 | template_type_parameters: vec![], 519 | members: indexmap::indexmap! {}, 520 | }); 521 | 522 | let some_goff = om.next(); 523 | builder.record_type(crate::Struct { 524 | name: "core::option::Option::Some".to_string(), 525 | byte_size: 4, 526 | alignment: Some(2), 527 | offset: some_goff, 528 | tuple_like: true, 529 | template_type_parameters: vec![], 530 | members: indexmap::indexmap! { 531 | "__0".to_string() => crate::Member { 532 | name: Some("__0".to_string()), 533 | artificial: false, 534 | alignment: Some(2), 535 | location: 2, 536 | offset: om.next(), 537 | type_id: u16_goff.into(), 538 | }, 539 | }, 540 | }); 541 | 542 | let option_goff = om.next(); 543 | builder.record_type(crate::Enum { 544 | name: "core::option::Option".to_string(), 545 | byte_size: 4, 546 | alignment: Some(2), 547 | template_type_parameters: vec![], 548 | shape: VariantShape::Many { 549 | discr: om.next(), 550 | member: crate::Member { 551 | name: None, 552 | artificial: true, 553 | type_id: u16_goff.into(), 554 | alignment: Some(2), 555 | location: 0, 556 | offset: om.next(), 557 | }, 558 | variants: indexmap::indexmap! { 559 | Some(0) => crate::Variant { 560 | offset: om.next(), 561 | member: crate::Member { 562 | name: Some("None".to_string()), 563 | artificial: false, 564 | alignment: Some(2), 565 | location: 0, 566 | type_id: none_goff.into(), 567 | offset: om.next(), 568 | }, 569 | }, 570 | Some(1) => crate::Variant { 571 | offset: om.next(), 572 | member: crate::Member { 573 | name: Some("Some".to_string()), 574 | artificial: false, 575 | alignment: Some(2), 576 | location: 0, 577 | type_id: some_goff.into(), 578 | offset: om.next(), 579 | }, 580 | }, 581 | }, 582 | }, 583 | offset: option_goff.into(), 584 | }); 585 | 586 | option_goff.into() 587 | } 588 | 589 | #[test] 590 | fn load_option_u16() { 591 | let mut om = OffsetMaker::default(); 592 | let mut builder = 593 | DebugDbBuilder::new(gimli::RunTimeEndian::Little, false); 594 | 595 | let option_goff = make_option_u16(&mut builder, &mut om); 596 | 597 | let world = builder.build().unwrap(); 598 | let oty = world.type_by_id(option_goff).unwrap(); 599 | 600 | let img = [0, 0, 0xAB, 0xCD]; 601 | assert_eq!( 602 | Option::::from_buffer(&img, 0, &world, oty).unwrap(), 603 | None 604 | ); 605 | let img = [1, 0, 0xAB, 0xCD]; 606 | assert_eq!( 607 | Option::::from_buffer(&img, 0, &world, oty).unwrap(), 608 | Some(0xCDAB) 609 | ); 610 | } 611 | 612 | #[test] 613 | fn load_u8_array() { 614 | let mut om = OffsetMaker::default(); 615 | let mut builder = 616 | DebugDbBuilder::new(gimli::RunTimeEndian::Little, false); 617 | 618 | let u8_goff = om.next(); 619 | builder.record_type(crate::Base { 620 | name: "u8".to_string(), 621 | encoding: Encoding::Unsigned, 622 | byte_size: 1, 623 | offset: u8_goff, 624 | }); 625 | 626 | let index_type_goff = om.next(); 627 | builder.record_type(crate::Base { 628 | name: "__ARRAY_INDEX_TYPE__".to_string(), 629 | encoding: Encoding::Unsigned, 630 | byte_size: 8, 631 | offset: index_type_goff, 632 | }); 633 | 634 | let ary_goff = om.next(); 635 | builder.record_type(crate::Array { 636 | element_type_id: TypeId(u8_goff), 637 | index_type_id: TypeId(index_type_goff), 638 | lower_bound: 0, 639 | count: Some(5), 640 | offset: ary_goff, 641 | }); 642 | 643 | let world = builder.build().unwrap(); 644 | let aty = world.type_by_id(TypeId(ary_goff)).unwrap(); 645 | 646 | let img = [0, 1, 2, 3, 4]; 647 | let ary: Vec = Load::from_buffer(&img, 0, &world, aty).unwrap(); 648 | assert_eq!(ary, [0, 1, 2, 3, 4]); 649 | } 650 | } 651 | */ 652 | -------------------------------------------------------------------------------- /src/model.rs: -------------------------------------------------------------------------------- 1 | //! Data model types. 2 | //! 3 | //! This is our abstract description of types and routines in a program. 4 | 5 | use std::borrow::Cow; 6 | use std::hash::Hash; 7 | use std::num::NonZeroU64; 8 | use crate::DebugDb; 9 | use indexmap::IndexMap; 10 | 11 | /// Identifies a specific type within a program, using its offset within the 12 | /// debug section(s). 13 | /// 14 | /// Sometimes types appear more than once in debug info. In that case, each type 15 | /// will have a distinct `TypeId`. 16 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] 17 | pub struct TypeId(pub gimli::UnitSectionOffset); 18 | 19 | impl From for TypeId { 20 | fn from(x: gimli::UnitSectionOffset) -> Self { 21 | Self(x) 22 | } 23 | } 24 | 25 | /// Identifies a subprogram within a program -- a function or subroutine. 26 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] 27 | pub struct ProgramId(pub gimli::UnitSectionOffset); 28 | 29 | impl From for ProgramId { 30 | fn from(x: gimli::UnitSectionOffset) -> Self { 31 | Self(x) 32 | } 33 | } 34 | 35 | /// Identifies a static variable. 36 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] 37 | pub struct VarId(pub gimli::UnitSectionOffset); 38 | 39 | impl From for VarId { 40 | fn from(x: gimli::UnitSectionOffset) -> Self { 41 | Self(x) 42 | } 43 | } 44 | 45 | /// Information about a type from a program. 46 | /// 47 | /// There are many kinds of types; this enum distinguishes between them. 48 | #[derive(Debug, Clone)] 49 | pub enum Type { 50 | Struct(Struct), 51 | Enum(Enum), 52 | Base(Base), 53 | CEnum(CEnum), 54 | Array(Array), 55 | Pointer(Pointer), 56 | Union(Union), 57 | Subroutine(Subroutine), 58 | Unresolved(Unresolved), 59 | } 60 | 61 | impl Type { 62 | /// Returns the location of the type's definition within the debug info 63 | /// section(s). 64 | pub fn offset(&self) -> gimli::UnitSectionOffset { 65 | // TODO so this field should clearly get factored out.... 66 | match self { 67 | Self::Struct(s) => s.offset, 68 | Self::Enum(s) => s.offset, 69 | Self::Base(s) => s.offset, 70 | Self::CEnum(s) => s.offset, 71 | Self::Array(s) => s.offset, 72 | Self::Pointer(s) => s.offset, 73 | Self::Union(s) => s.offset, 74 | Self::Subroutine(s) => s.offset, 75 | Self::Unresolved(s) => s.offset, 76 | } 77 | } 78 | 79 | /// Determines the alignment of the type, in bytes. 80 | /// 81 | /// Not all types have alignment. 82 | pub fn alignment(&self, world: &DebugDb) -> Option { 83 | match self { 84 | Self::Struct(s) => s.alignment, 85 | Self::Enum(s) => s.alignment, 86 | Self::Base(s) => s.alignment, 87 | Self::CEnum(s) => s.alignment, 88 | Self::Union(s) => Some(s.alignment), 89 | Self::Array(a) => { 90 | let eltty = world.type_by_id(a.element_type_id)?; 91 | eltty.alignment(world) 92 | } 93 | Self::Pointer(_) => Some(world.pointer_size() as u64), 94 | 95 | _ => None, 96 | } 97 | } 98 | 99 | /// Determines the inherent size of the type, in bytes. The inherent size is 100 | /// the size that can be computed without referring to the debug information 101 | /// of other types. 102 | /// 103 | /// Not all types have sizes; even fewer have inherent sizes. This is an 104 | /// implementation detail of the full `byte_size` algorithm. 105 | pub fn inherent_byte_size(&self) -> Option { 106 | match self { 107 | Self::Struct(s) => s.byte_size, 108 | Self::Enum(s) => s.byte_size, 109 | Self::Base(s) => Some(s.byte_size), 110 | Self::CEnum(s) => Some(s.byte_size), 111 | Self::Union(s) => Some(s.byte_size), 112 | 113 | _ => None, 114 | } 115 | } 116 | 117 | pub(crate) fn byte_size_early<'a>( 118 | &'a self, 119 | pointer_size: usize, 120 | lookup_type: impl Fn(TypeId) -> Option<&'a Type>, 121 | ) -> Option { 122 | let mut factor = 1; 123 | let mut t = self; 124 | loop { 125 | match t.inherent_byte_size() { 126 | Some(x) => break Some(factor * x), 127 | None => match t { 128 | Self::Array(a) => { 129 | factor *= a.count?; 130 | t = lookup_type(a.element_type_id)?; 131 | } 132 | Self::Pointer(_) => break Some(factor * pointer_size as u64), 133 | Self::Subroutine(_) => break None, 134 | 135 | _ => panic!("inconsistency btw byte_size_early and inherent_byte_size"), 136 | }, 137 | } 138 | } 139 | } 140 | 141 | /// Determines the size of the type, in bytes. 142 | /// 143 | /// Not all types have sizes. 144 | pub fn byte_size(&self, world: &DebugDb) -> Option { 145 | self.byte_size_early( 146 | world.pointer_size(), 147 | |t| world.type_by_id(t), 148 | ) 149 | } 150 | 151 | /// Determines the name of the type. 152 | pub fn name(&self, world: &DebugDb) -> Cow<'_, str> { 153 | match self { 154 | Self::Struct(s) => (&s.name).into(), 155 | Self::Enum(s) => (&s.name).into(), 156 | Self::Base(s) => (&s.name).into(), 157 | Self::CEnum(s) => (&s.name).into(), 158 | Self::Union(s) => (&s.name).into(), 159 | Self::Pointer(s) => { 160 | if let Some(assigned_name) = &s.name { 161 | assigned_name.into() 162 | } else { 163 | let pointee_name = world 164 | .type_by_id(s.type_id) 165 | .map(|t| t.name(world)) 166 | .unwrap_or("???".into()); 167 | format!("*_ {pointee_name}").into() 168 | } 169 | } 170 | Self::Array(a) => { 171 | let eltname = world 172 | .type_by_id(a.element_type_id) 173 | .map(|t| t.name(world)) 174 | .unwrap_or("???".into()); 175 | 176 | if let Some(n) = a.count { 177 | format!("[{}; {}]", eltname, n).into() 178 | } else { 179 | format!("[{}; ???]", eltname).into() 180 | } 181 | } 182 | Self::Subroutine(_) => "subroutine".into(), // TODO 183 | Self::Unresolved(_) => "".into(), 184 | } 185 | } 186 | } 187 | 188 | impl From for Type { 189 | fn from(x: Base) -> Self { 190 | Self::Base(x) 191 | } 192 | } 193 | 194 | impl From for Type { 195 | fn from(x: Struct) -> Self { 196 | Self::Struct(x) 197 | } 198 | } 199 | 200 | impl From for Type { 201 | fn from(x: Enum) -> Self { 202 | Self::Enum(x) 203 | } 204 | } 205 | 206 | impl From for Type { 207 | fn from(x: Union) -> Self { 208 | Self::Union(x) 209 | } 210 | } 211 | 212 | impl From for Type { 213 | fn from(x: Pointer) -> Self { 214 | Self::Pointer(x) 215 | } 216 | } 217 | 218 | impl From for Type { 219 | fn from(x: Array) -> Self { 220 | Self::Array(x) 221 | } 222 | } 223 | 224 | impl From for Type { 225 | fn from(x: CEnum) -> Self { 226 | Self::CEnum(x) 227 | } 228 | } 229 | 230 | impl From for Type { 231 | fn from(x: Subroutine) -> Self { 232 | Self::Subroutine(x) 233 | } 234 | } 235 | 236 | /// A "base type," also known as a "primitive type," is not constructed by 237 | /// aggregating other types. Examples in Rust include `u32` and `bool`. 238 | /// 239 | /// Note that, in Rust in particular, there are several "base types" that you 240 | /// might not think of as such. Both `()` and `!` are represented as zero-sized 241 | /// base types. 242 | #[derive(Clone, Debug)] 243 | pub struct Base { 244 | /// Name of the type. 245 | pub name: String, 246 | /// How to interpret the type's bits. 247 | pub encoding: Encoding, 248 | /// Number of bytes in a value of the type. 249 | pub byte_size: u64, 250 | /// Explicit alignment, if given. 251 | pub alignment: Option, 252 | /// Location in debug info. 253 | pub offset: gimli::UnitSectionOffset, 254 | } 255 | 256 | /// A "struct type" describes a record containing members, each of which has its 257 | /// own type. 258 | /// 259 | /// Rust defines both normal structs (with named members) and _tuple structs_ 260 | /// (with numbered members). This type is used for both. A tuple struct will 261 | /// have the `tuple_like` flag set, and its members will be in numeric order. 262 | /// (They can also be accessed by names of the form `__0`, `__1`, etc.) 263 | #[derive(Debug, Clone)] 264 | pub struct Struct { 265 | /// Name of the struct type. 266 | pub name: String, 267 | /// Size of a value of this struct in bytes. 268 | pub byte_size: Option, 269 | /// Alignment required for values of this struct. 270 | pub alignment: Option, 271 | /// If this struct is generic, a list of template parameters. Non-generic 272 | /// structs have an empty list. 273 | pub template_type_parameters: Vec, 274 | /// When `true`, this struct appears to originate from a Rust "tuple struct" 275 | /// with numbered fields. When `false`, this is a normal struct. 276 | pub tuple_like: bool, 277 | /// Member fields of the struct. 278 | /// 279 | /// These are in an `IndexMap` so that order is preserved. The members are 280 | /// recorded in the order they appear in the debug info, which in practice 281 | /// is also the order they're declared in the source. They are _not_ in 282 | /// order of position in the struct in memory. 283 | pub members: Vec, 284 | /// Location in debug info. 285 | pub offset: gimli::UnitSectionOffset, 286 | /// Location of the declaration of this subprogram in the source. 287 | pub decl_coord: DeclCoord, 288 | } 289 | 290 | impl Struct { 291 | pub fn unique_member(&self, name: &str) -> Option<&Member> { 292 | let mut matches = self.members.iter() 293 | .filter(|m| m.name.as_deref() == Some(name)); 294 | let first = matches.next()?; 295 | if matches.next().is_some() { 296 | // There is no _unique_ member by this name. 297 | None 298 | } else { 299 | Some(first) 300 | } 301 | } 302 | } 303 | 304 | /// An "enum type," in the Rust sense of the term, is a tagged union (or 305 | /// discriminated union). It can contain multiple different types of values, but 306 | /// only one at a time, and the options are distinguished through a 307 | /// "discriminator" member -- except if there is only one variant, in which case 308 | /// the compiler usually eliminates that member. See `VariantShape` for details. 309 | /// 310 | /// This library distinguishes between Rust-style enums (this type) and C-style 311 | /// enums (the `CEnum`) type. Rust programs will generate C-style enums when 312 | /// none of the enum variants have a payload or fields. 313 | #[derive(Debug, Clone)] 314 | pub struct Enum { 315 | /// Name of the enum type. 316 | pub name: String, 317 | /// Size of a value of the enum type, in bytes. 318 | pub byte_size: Option, 319 | /// Alignment required for values of this enum. 320 | pub alignment: Option, 321 | /// If this struct is generic, a list of template parameters. Non-generic 322 | /// structs have an empty list. 323 | pub template_type_parameters: Vec, 324 | /// Description of the variants in this enum. 325 | pub shape: VariantShape, 326 | /// Location in debug info. 327 | pub offset: gimli::UnitSectionOffset, 328 | } 329 | 330 | /// A "C-style enum" type -- a type with several value variants, each of which 331 | /// can be represented by an integer. 332 | #[derive(Debug, Clone)] 333 | pub struct CEnum { 334 | /// Name of the enum type. 335 | pub name: String, 336 | /// Representation type. 337 | pub repr_type_id: TypeId, 338 | /// Flag indicating that this enum is a distinct type, rather than 339 | /// evaluating as values of some base type. This is set for all enums in 340 | /// Rust, some enums in C++, and no enums in C. 341 | pub enum_class: bool, 342 | /// Size of a value of the enum type, in bytes. 343 | pub byte_size: u64, 344 | /// Alignment required for values of this enum. 345 | pub alignment: Option, 346 | /// Variants ("enumerators") of this type. 347 | pub enumerators: IndexMap, 348 | /// Location in debug info. 349 | pub offset: gimli::UnitSectionOffset, 350 | } 351 | 352 | /// An array type. 353 | /// 354 | /// An array consists of an element type and a count. Not all array types in 355 | /// DWARF have counts, but in Rust, they do. 356 | /// 357 | /// Array types can also technically have a `lower_bound` that is not 0, but in 358 | /// practice to observe this you need to link with a Modula or Fortran binary. 359 | #[derive(Debug, Clone)] 360 | pub struct Array { 361 | /// Type of elements of the array. 362 | pub element_type_id: TypeId, 363 | /// Type of the array index. This is synthetic and rarely useful; all Rust 364 | /// arrays point to the same index type. 365 | pub index_type_id: TypeId, 366 | /// First index in the array. Always 0 in Rust and C. 367 | pub lower_bound: u64, 368 | /// Number of elements in the array, if specified. 369 | pub count: Option, 370 | /// Location in debug info. 371 | pub offset: gimli::UnitSectionOffset, 372 | } 373 | 374 | /// A pointer type. 375 | /// 376 | /// There are many flavors of pointers -- `const`, not-`const`, Rust references, 377 | /// C raw pointers, etc. This models them all. The differences between them are 378 | /// not present in DWARF -- though they can be inferred from the `name`. 379 | /// 380 | /// Pointer size is implicit and fixed for the whole program; it can be queried 381 | /// from the `DebugDb` instance. 382 | #[derive(Debug, Clone)] 383 | pub struct Pointer { 384 | /// Type of data this points _to_. 385 | pub type_id: TypeId, 386 | /// Name of the pointer type. Compilers don't name all pointer types. 387 | pub name: Option, 388 | /// Location in debug info. 389 | pub offset: gimli::UnitSectionOffset, 390 | } 391 | 392 | /// A C-style non-tagged union. 393 | /// 394 | /// A union has multiple members, like a struct, except that those members are 395 | /// overlaid in memory, and only one is valid at a time. Unlike an `Enum`, there 396 | /// is no information in union to tell you _which_ variant is valid. 397 | #[derive(Debug, Clone)] 398 | pub struct Union { 399 | /// Name of this union type. 400 | pub name: String, 401 | /// Size of a value of this union type, in bytes. 402 | pub byte_size: u64, 403 | /// Alignment required for a value of this union type, in bytes. 404 | pub alignment: u64, 405 | /// If this union is generic, this contains an array of template type 406 | /// parameters. If it is not generic, this is empty. 407 | pub template_type_parameters: Vec, 408 | /// Members of the union in declaration order. 409 | pub members: Vec, 410 | /// Location in debug info. 411 | pub offset: gimli::UnitSectionOffset, 412 | } 413 | 414 | /// A subroutine type. Note that this is different from a `Subprogram` -- this 415 | /// is used as the pointed-to type for function pointers. 416 | #[derive(Clone, Debug)] 417 | pub struct Subroutine { 418 | /// Type of value returned, if any. In both C and Rust, functions that 419 | /// return nothing (`void` and `()`, respectively) have no return type, 420 | /// rather than `Some(typeid_of_void)`. 421 | pub return_type_id: Option, 422 | /// Types of parameters to a routine of this type. 423 | pub formal_parameters: Vec, 424 | /// Location in debug info. 425 | pub offset: gimli::UnitSectionOffset, 426 | } 427 | 428 | /// A type that was not found in the debug info. 429 | /// 430 | /// Usually this is because it's not actually used in the program, and only 431 | /// indirectly referenced. 432 | #[derive(Debug, Clone)] 433 | pub struct Unresolved { 434 | /// Location in debug info. 435 | pub offset: gimli::UnitSectionOffset, 436 | } 437 | 438 | /// Possible encodings for a `Base` type. 439 | #[derive(Copy, Clone, Debug, Eq, PartialEq)] 440 | pub enum Encoding { 441 | /// Unsigned integer. 442 | Unsigned, 443 | /// Signed integer. 444 | Signed, 445 | /// Unsigned char. This is used for Rust `char` (with `byte_size == 4`) as 446 | /// well as for C `unsigned char` (`byte_size == 1`) and sometimes for C 447 | /// `char` depending on the platform ABI because reasons. 448 | UnsignedChar, 449 | /// Unsigned char. This is used for C `unsigned char` (`byte_size == 1`) and 450 | /// sometimes for C `char` depending on the platform ABI because reasons. 451 | SignedChar, 452 | /// Boolean -- 0 is false, non-zero is true. 453 | /// 454 | /// In Rust, true is always 1, but DWARF doesn't seem to mandate that, and 455 | /// so here we are. 456 | Boolean, 457 | /// IEEE754 floating point number. 458 | Float, 459 | /// IEEE754 complex floating point number, i.e. probably a pair of floats. 460 | /// Support for this encoding is currently somewhat limited as none of our 461 | /// programs use complex floats. 462 | /// 463 | /// Note that this encoding is specific to the `__Complex` C language 464 | /// extension, and is _not used_ for Rust complex numbers. 465 | ComplexFloat, 466 | 467 | UtfChar, 468 | } 469 | 470 | /// Information on a type parameter binding for an instance of a generic type. 471 | /// 472 | /// This is called "Template Type Parameter" because that's what DWARF calls it, 473 | /// because DWARF is rather C-specific. 474 | #[derive(Debug, Clone)] 475 | pub struct TemplateTypeParameter { 476 | /// Name of parameter. 477 | pub name: String, 478 | /// Type the parameter is bound to. 479 | pub type_id: TypeId, 480 | } 481 | 482 | /// A component of a struct or union. 483 | #[derive(Debug, Clone, Eq, PartialEq)] 484 | pub struct Member { 485 | /// Name of the member. Not all members have names, though in Rust they all 486 | /// do. 487 | pub name: Option, 488 | /// If `true`, this member is compiler-generated and will not make very much 489 | /// sense to the user. 490 | pub artificial: bool, 491 | /// Type of data stored in this member. 492 | pub type_id: TypeId, 493 | /// Alignment specified for this member. If missing, check the alignment for 494 | /// `type_id`. 495 | pub alignment: Option, 496 | /// Offset of this member within the enclosing type. 497 | pub location: u64, 498 | /// Location in debug info. 499 | pub offset: gimli::UnitSectionOffset, 500 | pub decl_coord: DeclCoord, 501 | } 502 | 503 | /// Description of the potential variant shapes for a Rust-style enum (tagged 504 | /// union). 505 | #[derive(Debug, Clone)] 506 | pub enum VariantShape { 507 | /// The enum has no variants. No discriminator member has been generated. 508 | /// These enums are typically zero-sized. 509 | Zero, 510 | /// The enum contains only one variant, and so the compiler has not 511 | /// generated a discriminator member, because it would go unused. The 512 | /// `Variant` is embedded directly. 513 | One(Variant), 514 | /// The enum contains a discriminator. This generally implies that there are 515 | /// two or more variants, though nothing in the spec requires this. 516 | Many { 517 | /// Location of the definition of the discriminator in debug info. 518 | discr: gimli::UnitSectionOffset, 519 | /// Member describing the discriminator. Note that this member will 520 | /// typically be nameless. 521 | member: Member, 522 | /// Variants that may be selected depending on the value of the 523 | /// discriminator. The key `None` is used for a "default" `Variant` that 524 | /// is chosen if none of the explicit values match; this is used to 525 | /// implement various enum layout optimizations in Rust. 526 | variants: IndexMap, Variant>, 527 | }, 528 | } 529 | 530 | /// A variant of a Rust-style enum. 531 | #[derive(Debug, Clone)] 532 | pub struct Variant { 533 | /// Member containing the variant's data. An enum in Rust that is not 534 | /// C-style always has data in every variant, but if the variant has no 535 | /// fields from the user's perspective, the embedded data will be an empty 536 | /// struct. 537 | pub member: Member, 538 | /// Location in debug info. 539 | pub offset: gimli::UnitSectionOffset, 540 | pub decl_coord: DeclCoord, 541 | } 542 | 543 | /// One of the options in a C-style enum type. 544 | #[derive(Debug, Clone)] 545 | pub struct Enumerator { 546 | /// Name of this variant. 547 | pub name: String, 548 | /// Numeric value associated with this invariant. 549 | pub const_value: u64, 550 | /// Location in debug info. 551 | pub offset: gimli::UnitSectionOffset, 552 | } 553 | 554 | /// A function or subroutine in a program. 555 | /// 556 | /// Note that this is different from `Subroutine`, which defines the _type_ of a 557 | /// function; this defines the _identity_ of a function. 558 | #[derive(Clone, Debug)] 559 | pub struct Subprogram { 560 | /// Name of the subprogram. Not all subprograms have names. TODO: why not? 561 | pub name: Option, 562 | /// Range of PC values that are contained within the code generated for this 563 | /// subprogram, when code has been generated at the top level (i.e. the 564 | /// subprogram is not inlined). 565 | /// 566 | /// Subprograms that are completely inlined will often have nonsense 567 | /// `pc_range` values starting at address 0. 568 | pub pc_range: Option>, 569 | /// Location of the declaration of this subprogram in the source. 570 | pub decl_coord: DeclCoord, 571 | /// If this subprogram is an instance of a generic subprogram, this provides 572 | /// the bindings for the type parameters. If this subprogram is not generic, 573 | /// this is empty. 574 | pub template_type_parameters: Vec, 575 | /// Type returned by subprogram, or `None` for `()`/`void`. 576 | pub return_type_id: Option, 577 | /// Information about parameters needed by this subprogram. 578 | pub formal_parameters: Vec, 579 | /// Subprograms that have been inlined into this one. 580 | pub inlines: Vec, 581 | /// If this subprogram represents a specialization of another, this provides 582 | /// a link to the prototype. The prototype may have information that this 583 | /// record does not, such as a valid name. 584 | pub abstract_origin: Option, 585 | /// Actual symbol name used to refer to this subprogram, if it is different 586 | /// from `name` -- which it tends to be in languages with hierarchical 587 | /// namespaces. 588 | pub linkage_name: Option, 589 | /// If `true`, this subprogram is expected not to return, meaning that any 590 | /// code after a call to this subprogram is theoretically unreachable. 591 | /// 592 | /// In Rust, `noreturn` functions tend to have `!` as their return type. 593 | pub noreturn: bool, 594 | /// Location in debug info. 595 | pub offset: gimli::UnitSectionOffset, 596 | } 597 | 598 | /// Parameter to a subprogram. 599 | /// 600 | /// This is more detailed than the `formal_parameters` used for function type 601 | /// definitions. 602 | /// 603 | /// Note that it's common for subprogram parameters to be abstract. In that 604 | /// case, most useful content will be missing from `SubParameter`, and you'll 605 | /// need to go consult the `abstract_origin`. 606 | #[derive(Clone, Debug)] 607 | pub struct SubParameter { 608 | /// Name of parameter, if available. 609 | pub name: Option, 610 | /// Location of declaration of this parameter in the source. 611 | pub decl_coord: DeclCoord, 612 | /// Type of the parameter, if available. 613 | pub type_id: Option, 614 | /// Reference to a different `SubParameter` that this specializes. 615 | pub abstract_origin: Option, 616 | /// Fixed value for this parameter. This can happen in cases where a 617 | /// specialized `Subprogram` fixes one or more parameter values to 618 | /// constants. 619 | /// 620 | /// TODO: type probably needs to be more general. 621 | pub const_value: Option, 622 | /// Location in debug info. 623 | pub offset: gimli::UnitSectionOffset, 624 | } 625 | 626 | /// File "coordinates" -- path, line number, column number. 627 | /// 628 | /// Note that, in accordance with tradition, both lines and columns are numbered 629 | /// starting at one. 630 | #[derive(Clone, Debug, Default, PartialEq, Eq)] 631 | pub struct DeclCoord { 632 | /// Path to source file, if available. 633 | pub file: Option, 634 | /// Line number, if available. 635 | pub line: Option, 636 | /// Column number, if available. 637 | pub column: Option, 638 | } 639 | 640 | impl DeclCoord { 641 | pub fn is_useful(&self) -> bool { 642 | self.file.is_some() || self.line.is_some() || self.column.is_some() 643 | } 644 | } 645 | 646 | /// Information about a subroutine that has been inlined into a subprogram. 647 | #[derive(Clone, Debug)] 648 | pub struct InlinedSubroutine { 649 | /// Location of the subprogram abstract root that defines this. 650 | pub abstract_origin: Option, 651 | /// Ranges of PC values that are included in this inlined subroutine. 652 | pub pc_ranges: Vec, 653 | /// Location of the callsite that was inlined. 654 | pub call_coord: DeclCoord, 655 | /// Further inlined subroutines within this one. 656 | pub inlines: Vec, 657 | /// Definition of the formal parameters to this inlined subroutine. 658 | pub formal_parameters: Vec, 659 | /// Location in debug info. 660 | pub offset: gimli::UnitSectionOffset, 661 | } 662 | 663 | /// A row of the computed line number table. 664 | #[derive(Clone, Debug)] 665 | pub struct LineNumberRow { 666 | /// Range of PC values that should use this entry. 667 | pub pc_range: std::ops::Range, 668 | /// Filename. 669 | pub file: String, 670 | /// Line number, if available. 671 | pub line: Option, 672 | /// Column number, if available. 673 | pub column: Option, 674 | } 675 | 676 | /// Information about a static stack frame associated with a PC value. 677 | /// 678 | /// TODO: the name of this type should become more meaningful as we learn how it 679 | /// is used. 680 | pub struct PcInfo { 681 | /// Subprogram being run. 682 | pub subprogram: ProgramId, 683 | /// File containing code being run. 684 | pub file: String, 685 | /// Line number of code being run, if available. 686 | pub line: Option, 687 | /// Column number of code being run, if available. 688 | pub column: Option, 689 | } 690 | 691 | /// A static variable with a fixed address. 692 | #[derive(Clone, Debug)] 693 | pub struct StaticVariable { 694 | /// Name of variable. 695 | pub name: String, 696 | /// Type contained in variable. 697 | pub type_id: TypeId, 698 | /// Location of variable declaration. 699 | pub decl: DeclCoord, 700 | /// Address in memory. 701 | pub location: u64, 702 | /// Location in debug info. 703 | pub offset: gimli::UnitSectionOffset, 704 | } 705 | 706 | pub trait Equiv { 707 | /// Tests if `self` and `other` are structurally equivalent, such that they 708 | /// could be unified into a single definition despite appearing in separate 709 | /// compilation units. 710 | /// 711 | /// Returns `None` if there is no way to make the definitions match, or 712 | /// `Some(tids)` if the definitions match if all the types in `tids` are 713 | /// also equivalent to each other. 714 | fn equiv(&self, other: &Self) -> Option>; 715 | } 716 | 717 | impl Equiv for TypeId { 718 | fn equiv(&self, other: &Self) -> Option> { 719 | Some(vec![(*self, *other)]) 720 | } 721 | } 722 | 723 | impl Equiv for Member { 724 | fn equiv(&self, other: &Self) -> Option> { 725 | let self_easy = (&self.name, self.artificial, self.alignment, self.location); 726 | let other_easy = (&other.name, other.artificial, other.alignment, other.location); 727 | if self_easy != other_easy { 728 | return None; 729 | } 730 | 731 | Some(vec![(self.type_id, other.type_id)]) 732 | } 733 | } 734 | 735 | impl Equiv for Variant { 736 | fn equiv(&self, other: &Self) -> Option> { 737 | self.member.equiv(&other.member) 738 | } 739 | } 740 | 741 | impl Equiv for VariantShape { 742 | fn equiv(&self, other: &Self) -> Option> { 743 | match (self, other) { 744 | (Self::Zero, Self::Zero) => Some(vec![]), 745 | (Self::One(a), Self::One(b)) => a.equiv(b), 746 | (Self::Many { member: ma, variants: va, .. }, Self::Many { member: mb, variants: vb, .. }) => { 747 | let mut conditions = vec![]; 748 | conditions.extend(ma.equiv(mb)?); 749 | conditions.extend(va.equiv(vb)?); 750 | Some(conditions) 751 | } 752 | _ => None, 753 | } 754 | } 755 | } 756 | 757 | impl Equiv for TemplateTypeParameter { 758 | fn equiv(&self, other: &Self) -> Option> { 759 | if self.name != other.name { 760 | return None; 761 | } 762 | 763 | Some(vec![(self.type_id, other.type_id)]) 764 | } 765 | } 766 | 767 | impl Equiv for Vec 768 | where T: Equiv, 769 | { 770 | fn equiv(&self, other: &Self) -> Option> { 771 | if self.len() != other.len() { 772 | return None; 773 | } 774 | 775 | let mut conditions = vec![]; 776 | for (a, b) in self.iter().zip(other) { 777 | conditions.extend(a.equiv(b)?); 778 | } 779 | Some(conditions) 780 | } 781 | } 782 | 783 | impl Equiv for Option 784 | where T: Equiv, 785 | { 786 | fn equiv(&self, other: &Self) -> Option> { 787 | match (self, other) { 788 | (Some(a), Some(b)) => a.equiv(b), 789 | _ => None 790 | } 791 | } 792 | } 793 | 794 | impl Equiv for IndexMap 795 | where T: Equiv, 796 | K: Eq + Hash, 797 | { 798 | fn equiv(&self, other: &Self) -> Option> { 799 | if self.len() != other.len() { 800 | return None; 801 | } 802 | 803 | let mut conditions = vec![]; 804 | for (ak, a) in self { 805 | conditions.extend(a.equiv(other.get(ak)?)?); 806 | } 807 | Some(conditions) 808 | } 809 | } 810 | 811 | impl Equiv for Struct { 812 | fn equiv(&self, other: &Self) -> Option> { 813 | let self_easy = (&self.name, self.byte_size, self.alignment, self.tuple_like); 814 | let other_easy = (&other.name, other.byte_size, other.alignment, other.tuple_like); 815 | if self_easy != other_easy { 816 | return None; 817 | } 818 | 819 | let mut conditions = vec![]; 820 | conditions.extend(self.template_type_parameters.equiv(&other.template_type_parameters)?); 821 | conditions.extend(self.members.equiv(&other.members)?); 822 | 823 | Some(conditions) 824 | } 825 | } 826 | 827 | impl Equiv for Union { 828 | fn equiv(&self, other: &Self) -> Option> { 829 | let self_easy = (&self.name, self.byte_size, self.alignment); 830 | let other_easy = (&other.name, other.byte_size, other.alignment); 831 | if self_easy != other_easy { 832 | return None; 833 | } 834 | 835 | let mut conditions = vec![]; 836 | conditions.extend(self.template_type_parameters.equiv(&other.template_type_parameters)?); 837 | conditions.extend(self.members.equiv(&other.members)?); 838 | 839 | Some(conditions) 840 | } 841 | } 842 | 843 | impl Equiv for Enum { 844 | fn equiv(&self, other: &Self) -> Option> { 845 | let self_easy = (&self.name, self.byte_size, self.alignment); 846 | let other_easy = (&other.name, other.byte_size, other.alignment); 847 | if self_easy != other_easy { 848 | return None; 849 | } 850 | 851 | let mut conditions = vec![]; 852 | conditions.extend(self.template_type_parameters.equiv(&other.template_type_parameters)?); 853 | conditions.extend(self.shape.equiv(&other.shape)?); 854 | 855 | Some(conditions) 856 | } 857 | } 858 | 859 | impl Equiv for Pointer { 860 | fn equiv(&self, other: &Self) -> Option> { 861 | if self.name != other.name { 862 | // TODO: should this allow for one unnamed type? 863 | return None; 864 | } 865 | 866 | Some(vec![(self.type_id, other.type_id)]) 867 | } 868 | } 869 | 870 | impl Equiv for Base { 871 | fn equiv(&self, other: &Self) -> Option> { 872 | let self_easy = (&self.name, self.encoding, self.byte_size, self.alignment); 873 | let other_easy = (&other.name, other.encoding, other.byte_size, other.alignment); 874 | if self_easy != other_easy { 875 | return None; 876 | } 877 | 878 | Some(vec![]) 879 | } 880 | } 881 | 882 | impl Equiv for Array { 883 | fn equiv(&self, other: &Self) -> Option> { 884 | if self.lower_bound != other.lower_bound || self.count != other.count { 885 | return None; 886 | } 887 | 888 | Some(vec![ 889 | (self.element_type_id, other.element_type_id), 890 | (self.index_type_id, other.index_type_id), 891 | ]) 892 | } 893 | } 894 | 895 | impl Equiv for Enumerator { 896 | fn equiv(&self, other: &Self) -> Option> { 897 | if self.name != other.name || self.const_value != other.const_value { 898 | return None; 899 | } 900 | Some(vec![]) 901 | } 902 | } 903 | 904 | impl Equiv for CEnum { 905 | fn equiv(&self, other: &Self) -> Option> { 906 | let self_easy = (&self.name, self.enum_class, self.byte_size, self.alignment); 907 | let other_easy = (&other.name, other.enum_class, other.byte_size, other.alignment); 908 | if self_easy != other_easy { 909 | return None; 910 | } 911 | 912 | self.enumerators.equiv(&other.enumerators) 913 | } 914 | } 915 | 916 | impl Equiv for Subroutine { 917 | fn equiv(&self, other: &Self) -> Option> { 918 | let mut conditions = vec![]; 919 | conditions.extend(self.return_type_id.equiv(&other.return_type_id)?); 920 | conditions.extend(self.formal_parameters.equiv(&other.formal_parameters)?); 921 | Some(conditions) 922 | } 923 | } 924 | 925 | impl Equiv for Type { 926 | fn equiv(&self, other: &Self) -> Option> { 927 | match (self, other) { 928 | (Self::Struct(a), Self::Struct(b)) => a.equiv(b), 929 | (Self::Enum(a), Self::Enum(b)) => a.equiv(b), 930 | (Self::Pointer(a), Self::Pointer(b)) => a.equiv(b), 931 | (Self::Base(a), Self::Base(b)) => a.equiv(b), 932 | (Self::Array(a), Self::Array(b)) => a.equiv(b), 933 | (Self::CEnum(a), Self::CEnum(b)) => a.equiv(b), 934 | (Self::Union(a), Self::Union(b)) => a.equiv(b), 935 | (Self::Subroutine(a), Self::Subroutine(b)) => a.equiv(b), 936 | _ => None, 937 | } 938 | } 939 | } 940 | -------------------------------------------------------------------------------- /src/unify.rs: -------------------------------------------------------------------------------- 1 | use crate::TypeId; 2 | use crate::model::*; 3 | use indexmap::IndexMap; 4 | use core::hash::Hash; 5 | use std::collections::BTreeMap; 6 | 7 | #[derive(Clone)] 8 | pub struct State<'a> { 9 | /// Substitution map. An entry `(key, value)` in this map means that the 10 | /// type identified by `key` has been found to be equivalent to earlier type 11 | /// `value`, for canonicalization purposes. 12 | subs: BTreeMap, 13 | 14 | types: &'a BTreeMap, 15 | } 16 | 17 | impl<'a> State<'a> { 18 | pub fn new(types: &'a BTreeMap) -> Self { 19 | Self { 20 | subs: BTreeMap::new(), 21 | types, 22 | } 23 | } 24 | 25 | pub fn merge(&mut self, other: Self) { 26 | for (k, v) in other.subs { 27 | self.equate(k, v); 28 | } 29 | } 30 | 31 | /// Iteratively applies substitutions to `t` until a type with no 32 | /// substitutions is found. 33 | pub fn canonicalize(&self, t: TypeId) -> TypeId { 34 | let mut result = t; 35 | while let Some(next) = self.subs.get(&result) { 36 | result = *next; 37 | } 38 | result 39 | } 40 | 41 | pub fn is_subbed(&self, t: TypeId) -> bool { 42 | self.subs.contains_key(&t) 43 | } 44 | 45 | pub fn find_type(&self, t: TypeId) -> &'a Type { 46 | &self.types[&self.canonicalize(t)] 47 | } 48 | 49 | pub fn finish(self) -> BTreeMap { 50 | let mut result = BTreeMap::new(); 51 | for &t in self.types.keys() { 52 | let c = self.canonicalize(t); 53 | // Prune. 54 | if c != t { 55 | result.insert(t, self.canonicalize(t)); 56 | } 57 | } 58 | result 59 | } 60 | 61 | /// Unifies `a` and `b` such that they will look up to the same typeid in 62 | /// the future. The "canonical" type is the lower number of the two. 63 | /// 64 | /// This does no checking of similarity of `a` and `b`. 65 | pub fn equate(&mut self, a: TypeId, b: TypeId) { 66 | let ca = self.canonicalize(a); 67 | let cb = self.canonicalize(b); 68 | match ca.cmp(&cb) { 69 | std::cmp::Ordering::Less => { 70 | self.subs.insert(cb, ca); 71 | } 72 | std::cmp::Ordering::Equal => (), 73 | std::cmp::Ordering::Greater => { 74 | self.subs.insert(ca, cb); 75 | } 76 | } 77 | } 78 | 79 | fn checkpoint(&mut self, body: impl FnOnce(&mut Self) -> bool) -> bool { 80 | let mut cp = self.clone(); 81 | if body(&mut cp) { 82 | *self = cp; 83 | true 84 | } else { 85 | false 86 | } 87 | } 88 | } 89 | 90 | pub trait Unify { 91 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool; 92 | } 93 | 94 | impl Unify for Vec 95 | where T: Unify, 96 | { 97 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 98 | if self.len() != other.len() { 99 | return false; 100 | } 101 | 102 | state.checkpoint(|state| { 103 | self.iter().zip(other).all(|(a, b)| a.try_unify(b, state)) 104 | }) 105 | } 106 | } 107 | 108 | impl Unify for Option 109 | where T: Unify, 110 | { 111 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 112 | match (self, other) { 113 | (Some(a), Some(b)) => a.try_unify(b, state), 114 | _ => false 115 | } 116 | } 117 | } 118 | 119 | impl Unify for IndexMap 120 | where T: Unify, 121 | K: Eq + Hash, 122 | { 123 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 124 | if self.len() != other.len() { 125 | return false; 126 | } 127 | 128 | state.checkpoint(|state| { 129 | self.iter().all(|(ak, a)| a.try_unify(&other[ak], state)) 130 | 131 | }) 132 | } 133 | } 134 | 135 | impl Unify for TypeId { 136 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 137 | let cself = state.canonicalize(*self); 138 | let cother = state.canonicalize(*other); 139 | 140 | if cself == cother { 141 | return true; 142 | } 143 | 144 | state.checkpoint(|state| { 145 | // Insert a provisional substitution. 146 | state.equate(cself, cother); 147 | // Attempt recursive unification. 148 | state.find_type(cself).try_unify(state.find_type(cother), state) 149 | }) 150 | } 151 | } 152 | 153 | impl Unify for Member { 154 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 155 | let self_easy = (&self.name, self.artificial, self.alignment, self.location); 156 | let other_easy = (&other.name, other.artificial, other.alignment, other.location); 157 | if self_easy != other_easy { 158 | return false; 159 | } 160 | 161 | self.type_id.try_unify(&other.type_id, state) 162 | } 163 | } 164 | 165 | impl Unify for Variant { 166 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 167 | self.member.try_unify(&other.member, state) 168 | } 169 | } 170 | 171 | impl Unify for VariantShape { 172 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 173 | match (self, other) { 174 | (Self::Zero, Self::Zero) => true, 175 | (Self::One(a), Self::One(b)) => a.try_unify(b, state), 176 | ( 177 | Self::Many { member: ma, variants: va, .. }, 178 | Self::Many { member: mb, variants: vb, .. }, 179 | ) => { 180 | state.checkpoint(|state| { 181 | ma.try_unify(mb, state) 182 | && va.try_unify(vb, state) 183 | }) 184 | } 185 | _ => false, 186 | } 187 | } 188 | } 189 | 190 | impl Unify for TemplateTypeParameter { 191 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 192 | if self.name != other.name { 193 | return false; 194 | } 195 | 196 | self.type_id.try_unify(&other.type_id, state) 197 | } 198 | } 199 | 200 | impl Unify for Struct { 201 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 202 | let self_easy = (&self.name, self.byte_size, self.alignment, self.tuple_like); 203 | let other_easy = (&other.name, other.byte_size, other.alignment, other.tuple_like); 204 | if self_easy != other_easy { 205 | return false; 206 | } 207 | 208 | state.checkpoint(|state| { 209 | self.template_type_parameters.try_unify( 210 | &other.template_type_parameters, 211 | state, 212 | ) && self.members.try_unify(&other.members, state) 213 | }) 214 | } 215 | } 216 | 217 | impl Unify for Union { 218 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 219 | let self_easy = (&self.name, self.byte_size, self.alignment); 220 | let other_easy = (&other.name, other.byte_size, other.alignment); 221 | if self_easy != other_easy { 222 | return false; 223 | } 224 | 225 | state.checkpoint(|state| { 226 | self.template_type_parameters.try_unify( 227 | &other.template_type_parameters, 228 | state, 229 | ) && self.members.try_unify(&other.members, state) 230 | }) 231 | } 232 | } 233 | 234 | impl Unify for Enum { 235 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 236 | let self_easy = (&self.name, self.byte_size, self.alignment); 237 | let other_easy = (&other.name, other.byte_size, other.alignment); 238 | if self_easy != other_easy { 239 | return false; 240 | } 241 | 242 | state.checkpoint(|state| { 243 | self.template_type_parameters.try_unify( 244 | &other.template_type_parameters, 245 | state, 246 | ) && self.shape.try_unify(&other.shape, state) 247 | }) 248 | } 249 | } 250 | 251 | impl Unify for Pointer { 252 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 253 | if self.name != other.name { 254 | // TODO: should this allow for one unnamed type? 255 | return false; 256 | } 257 | 258 | self.type_id.try_unify(&other.type_id, state) 259 | } 260 | } 261 | 262 | impl Unify for Base { 263 | fn try_unify(&self, other: &Self, _state: &mut State<'_>) -> bool { 264 | let self_easy = (&self.name, self.encoding, self.byte_size, self.alignment); 265 | let other_easy = (&other.name, other.encoding, other.byte_size, other.alignment); 266 | self_easy == other_easy 267 | } 268 | } 269 | 270 | impl Unify for Array { 271 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 272 | if self.lower_bound != other.lower_bound || self.count != other.count { 273 | return false; 274 | } 275 | 276 | state.checkpoint(|state| { 277 | self.element_type_id.try_unify(&other.element_type_id, state) 278 | && self.index_type_id.try_unify(&other.index_type_id, state) 279 | }) 280 | } 281 | } 282 | 283 | impl Unify for Enumerator { 284 | fn try_unify(&self, other: &Self, _state: &mut State<'_>) -> bool { 285 | self.name == other.name && self.const_value == other.const_value 286 | } 287 | } 288 | 289 | impl Unify for CEnum { 290 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 291 | let self_easy = (&self.name, self.enum_class, self.byte_size, self.alignment); 292 | let other_easy = (&other.name, other.enum_class, other.byte_size, other.alignment); 293 | if self_easy != other_easy { 294 | return false; 295 | } 296 | 297 | self.enumerators.try_unify(&other.enumerators, state) 298 | } 299 | } 300 | 301 | impl Unify for Subroutine { 302 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 303 | state.checkpoint(|state| { 304 | self.return_type_id.try_unify(&other.return_type_id, state) 305 | && self.formal_parameters.try_unify(&other.formal_parameters, state) 306 | }) 307 | } 308 | } 309 | 310 | impl Unify for Type { 311 | fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool { 312 | match (self, other) { 313 | (Self::Struct(a), Self::Struct(b)) => a.try_unify(b, state), 314 | (Self::Enum(a), Self::Enum(b)) => a.try_unify(b, state), 315 | (Self::Pointer(a), Self::Pointer(b)) => a.try_unify(b, state), 316 | (Self::Base(a), Self::Base(b)) => a.try_unify(b, state), 317 | (Self::Array(a), Self::Array(b)) => a.try_unify(b, state), 318 | (Self::CEnum(a), Self::CEnum(b)) => a.try_unify(b, state), 319 | (Self::Union(a), Self::Union(b)) => a.try_unify(b, state), 320 | (Self::Subroutine(a), Self::Subroutine(b)) => a.try_unify(b, state), 321 | _ => false, 322 | } 323 | } 324 | } 325 | -------------------------------------------------------------------------------- /src/value.rs: -------------------------------------------------------------------------------- 1 | //! Abstract, dynamic, JSON-like representation of Rust values. 2 | //! 3 | //! This can be read from a program image using `Load` even if the program doing 4 | //! the reading doesn't know the type shape in advance. 5 | 6 | use regex::Regex; 7 | 8 | use crate::load::{choose_variant, load_unsigned, Load, LoadError, Machine}; 9 | use crate::{Encoding, Type, DebugDb, TypeId, EntityId}; 10 | use std::borrow::Cow; 11 | use std::convert::TryFrom; 12 | use std::fmt::Display; 13 | use std::collections::{BTreeSet, BTreeMap}; 14 | 15 | #[derive(Clone, Debug)] 16 | pub enum Value { 17 | Array(Vec), 18 | Base(Base), 19 | Struct(Struct), 20 | CEnum(CEnum), 21 | Enum(Enum), 22 | Pointer(Pointer), 23 | } 24 | 25 | impl Value { 26 | pub fn u64_value(&self) -> Option { 27 | if let Self::Base(b) = self { 28 | match b { 29 | Base::U8(x) => return Some(u64::from(*x)), 30 | Base::U32(x) => return Some(u64::from(*x)), 31 | Base::U64(x) => return Some(*x), 32 | _ => (), 33 | } 34 | } 35 | None 36 | } 37 | 38 | pub fn pointer_value(&self) -> Option { 39 | let Self::Pointer(p) = self else { return None; }; 40 | Some(p.value) 41 | } 42 | 43 | pub fn newtype(&self, name: &str) -> Option<&Value> { 44 | let Self::Struct(s) = self else { return None }; 45 | if s.name != name { return None; } 46 | if s.members.len() != 1 { return None }; 47 | s.any_member_named("__0") 48 | } 49 | 50 | pub fn type_name(&self) -> Cow<'_, str> { 51 | match self { 52 | Self::Array(es) => { 53 | let elt_type = es.first() 54 | .map(|v| v.type_name()) 55 | .unwrap_or("???".into()); 56 | format!("[{}; {}]", elt_type, es.len()).into() 57 | } 58 | Self::Base(b) => match b { 59 | Base::U8(_) => "u8".into(), 60 | Base::U32(_) => "u32".into(), 61 | Base::U64(_) => "u64".into(), 62 | Base::Bool(_) => "bool".into(), 63 | Base::Unit => "()".into(), 64 | }, 65 | Self::Struct(s) => (&s.name).into(), 66 | Self::CEnum(s) => (&s.name).into(), 67 | Self::Enum(s) => (&s.name).into(), 68 | Self::Pointer(s) => (&s.name).into(), 69 | } 70 | } 71 | 72 | pub fn collect_names(&self, set: &mut BTreeSet) { 73 | match self { 74 | Self::Array(v) => for elt in v { 75 | elt.collect_names(set); 76 | }, 77 | Self::Base(_) => (), 78 | Self::Struct(s) => { 79 | set.insert(s.name.clone()); 80 | for (_, value) in &s.members { 81 | value.collect_names(set); 82 | } 83 | } 84 | Self::CEnum(e) => { 85 | set.insert(e.name.clone()); 86 | } 87 | Self::Enum(e) => { 88 | set.insert(e.name.clone()); 89 | // We are deliberately skipping the name of the variant struct. 90 | for (_, value) in &e.value.members { 91 | value.collect_names(set); 92 | } 93 | } 94 | Self::Pointer(p) => { 95 | set.insert(p.name.clone()); 96 | } 97 | } 98 | } 99 | 100 | fn text(&self, world: &DebugDb, indent: usize, use_table: &UseTable, f: &mut core::fmt::Formatter) -> core::fmt::Result { 101 | match self { 102 | Self::Base(b) => match b { 103 | Base::U8(x) => write!(f, "{x}_u8"), 104 | Base::U32(x) => write!(f, "{x}_u32"), 105 | Base::U64(x) => write!(f, "{x}_u64"), 106 | Base::Bool(0) => write!(f, "false"), 107 | Base::Bool(1) => write!(f, "true"), 108 | Base::Bool(x) => write!(f, "{x}_bool"), 109 | Base::Unit => write!(f, "()"), 110 | }, 111 | Self::Pointer(p) => { 112 | let nearest = world.entities_by_address(p.value) 113 | .filter_map(|ar| if let EntityId::Var(v) = ar.entity { 114 | Some((v, ar.range.clone())) 115 | } else { 116 | None 117 | }) 118 | .min_by_key(|(_, range)| range.start.abs_diff(p.value)); 119 | if let Some((vid, _range)) = nearest { 120 | let var = world.static_variable_by_id(vid).unwrap(); 121 | let name = &var.name; 122 | let prefix = if p.is_probably_mut() { 123 | "&mut " 124 | } else { 125 | "&" 126 | }; 127 | write!(f, "{prefix}{name} /* {:#x} */ as {}", p.value, p.name) 128 | } else { 129 | write!(f, "{:#x} as {}", p.value, p.name) 130 | } 131 | }, 132 | Self::CEnum(e) => write!(f, "{}::{}", use_table.rewrite(&e.name), e.disc), 133 | Self::Array(v) => { 134 | // TODO: special-case bases for more compact printering 135 | writeln!(f, "[")?; 136 | for elt in v { 137 | write!(f, "{:indent$} ", "")?; 138 | elt.text(world, indent + 4, use_table, f)?; 139 | writeln!(f, ",")?; 140 | } 141 | write!(f, "{:indent$}]", "") 142 | } 143 | Self::Struct(s) => { 144 | if !display_dyn(world, s, f)? { 145 | write!(f, "{}", use_table.rewrite(&s.name))?; 146 | fmt_struct_body(s, world, indent, use_table, f)?; 147 | } 148 | Ok(()) 149 | } 150 | Self::Enum(e) => { 151 | write!(f, "{}::{}", use_table.rewrite(&e.name), e.disc)?; 152 | fmt_struct_body(&e.value, world, indent, use_table, f) 153 | } 154 | } 155 | } 156 | } 157 | 158 | fn display_dyn( 159 | world: &DebugDb, 160 | s: &Struct, 161 | f: &mut core::fmt::Formatter, 162 | ) -> Result { 163 | let dynptr = Regex::new(r#"^[&*](mut )?dyn (.*)$"#).unwrap(); 164 | if s.members.len() != 2 { return Ok(false); } 165 | 166 | let Some(c) = dynptr.captures(&s.name) else { return Ok(false); }; 167 | let _trait_name = &c[2]; 168 | let ismut = &c[1]; 169 | let Some((_, value)) = s.members.iter() 170 | .find(|(name, _)| name.as_ref().map(String::as_str) == Some("vtable")) 171 | else { return Ok(false); }; 172 | 173 | let Some((_, dest)) = s.members.iter() 174 | .find(|(name, _)| name.as_ref().map(String::as_str) == Some("pointer")) 175 | else { return Ok(false); }; 176 | 177 | let Some(addr) = value.pointer_value() else { return Ok(false); }; 178 | let Some(dest_addr) = dest.pointer_value() else { return Ok(false); }; 179 | 180 | for e in world.entities_by_address(addr) { 181 | if addr != e.range.start { 182 | continue; 183 | } 184 | let EntityId::Var(v) = e.entity else { return Ok(false); }; 185 | let Some(v) = world.static_variable_by_id(v) else { return Ok(false); }; 186 | 187 | let vtable = Regex::new(r#"^<(.*) as (.*)>::\{vtable\}$"#).unwrap(); 188 | let Some(vc) = vtable.captures(&v.name) else { return Ok(false); }; 189 | let concrete = &vc[1]; 190 | let trait_name = &vc[2]; 191 | 192 | write!(f, "{dest_addr:#x} as &{ismut}{concrete} as &{ismut}dyn {trait_name}")?; 193 | return Ok(true); 194 | } 195 | 196 | Ok(false) 197 | } 198 | 199 | fn fmt_struct_body(s: &Struct, world: &DebugDb, indent: usize, use_table: &UseTable, f: &mut core::fmt::Formatter) -> core::fmt::Result { 200 | if s.members.is_empty() { 201 | Ok(()) 202 | } else if s.is_tuple_like() { 203 | if s.members.len() == 1 { 204 | write!(f, "(")?; 205 | for (_, value) in &s.members { 206 | value.text(world, indent, use_table, f)?; 207 | } 208 | write!(f, ")") 209 | } else { 210 | writeln!(f, "(")?; 211 | for (_, value) in &s.members { 212 | write!(f, "{:indent$} ", "")?; 213 | value.text(world, indent + 4, use_table, f)?; 214 | writeln!(f, ",")?; 215 | } 216 | write!(f, "{:indent$})", "") 217 | } 218 | } else { 219 | writeln!(f, " {{")?; 220 | for (name, value) in &s.members { 221 | if let Some(name) = name { 222 | write!(f, "{:indent$} {name}: ", "")?; 223 | } else { 224 | write!(f, "{:indent$} _: ", "")?; 225 | } 226 | value.text(world, indent + 4, use_table, f)?; 227 | writeln!(f, ",")?; 228 | } 229 | write!(f, "{:indent$}}}", "") 230 | } 231 | } 232 | 233 | pub struct ValueWithDb<'a>(pub Value, pub &'a DebugDb); 234 | 235 | impl Display for ValueWithDb<'_> { 236 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 237 | let mut names = BTreeSet::new(); 238 | self.0.collect_names(&mut names); 239 | let use_table = UseTable::new(names); 240 | for (long, stub) in &use_table.0 { 241 | if long == stub { 242 | writeln!(f, "use {long};")?; 243 | } else { 244 | writeln!(f, "use {long} as {stub};")?; 245 | } 246 | } 247 | self.0.text(self.1, 0, &use_table, f) 248 | } 249 | } 250 | 251 | struct UseTable(BTreeMap); 252 | 253 | impl UseTable { 254 | fn new(names: BTreeSet) -> Self { 255 | let simple = Regex::new(r#"^([a-zA-Z_0-9{}#]+::)*([A-Za-z0-9_]+)$"#).unwrap(); 256 | let mut rewrites = BTreeMap::new(); 257 | let mut taken = BTreeSet::new(); 258 | for name in names { 259 | if let Some(c) = simple.captures(&name) { 260 | let stub = &c[2]; 261 | if !taken.contains(stub) { 262 | taken.insert(stub.to_string()); 263 | rewrites.insert(name.clone(), stub.to_string()); 264 | } 265 | } 266 | } 267 | Self(rewrites) 268 | } 269 | 270 | fn rewrite<'a>(&'a self, name: &'a str) -> &str { 271 | self.0.get(name).map(String::as_str).unwrap_or(name) 272 | } 273 | } 274 | 275 | impl Load for Value { 276 | fn from_state( 277 | machine: &M, 278 | addr: u64, 279 | world: &DebugDb, 280 | ty: &Type, 281 | ) -> Result> { 282 | match ty { 283 | Type::Base(_) => { 284 | Ok(Self::Base(Base::from_state(machine, addr, world, ty)?)) 285 | } 286 | Type::Array(_) => { 287 | Ok(Self::Array(Vec::from_state(machine, addr, world, ty)?)) 288 | } 289 | Type::Struct(_) => { 290 | Ok(Self::Struct(Struct::from_state(machine, addr, world, ty)?)) 291 | } 292 | Type::CEnum(_) => { 293 | Ok(Self::CEnum(CEnum::from_state(machine, addr, world, ty)?)) 294 | } 295 | Type::Enum(_) => { 296 | Ok(Self::Enum(Enum::from_state(machine, addr, world, ty)?)) 297 | } 298 | Type::Pointer(_) => Ok(Self::Pointer(Pointer::from_state( 299 | machine, addr, world, ty, 300 | )?)), 301 | _ => unimplemented!(), 302 | } 303 | } 304 | } 305 | 306 | #[derive(Copy, Clone, Debug)] 307 | pub enum Base { 308 | Unit, 309 | U8(u8), 310 | U32(u32), 311 | U64(u64), 312 | Bool(u8), 313 | } 314 | 315 | impl Base { 316 | pub fn as_u64(self) -> Option { 317 | match self { 318 | Self::U8(x) => Some(u64::from(x)), 319 | Self::U32(x) => Some(u64::from(x)), 320 | Self::U64(x) => Some(x), 321 | _ => None, 322 | } 323 | } 324 | } 325 | 326 | impl Load for Base { 327 | fn from_state( 328 | machine: &M, 329 | addr: u64, 330 | world: &DebugDb, 331 | ty: &Type, 332 | ) -> Result> { 333 | let Type::Base(b) = ty else { return Err(LoadError::NotABase); }; 334 | match (b.encoding, b.byte_size) { 335 | (Encoding::Unsigned, 1) => Ok(Base::U8(load_unsigned( 336 | world.endian(), 337 | machine, 338 | addr, 339 | 1, 340 | )?.ok_or(LoadError::DataUnavailable)? as u8)), 341 | (Encoding::Unsigned, 4) => Ok(Base::U32(load_unsigned( 342 | world.endian(), 343 | machine, 344 | addr, 345 | 4, 346 | )?.ok_or(LoadError::DataUnavailable)? as u32)), 347 | (Encoding::Unsigned, 8) => Ok(Base::U64(load_unsigned( 348 | world.endian(), 349 | machine, 350 | addr, 351 | 8, 352 | )?.ok_or(LoadError::DataUnavailable)?)), 353 | (Encoding::Boolean, 1) => Ok(Base::Bool(load_unsigned( 354 | world.endian(), 355 | machine, 356 | addr, 357 | 1, 358 | )?.ok_or(LoadError::DataUnavailable)? as u8)), 359 | (Encoding::Unsigned, 0) => Ok(Base::Unit), 360 | _ => { 361 | println!("{:?} {}", b.encoding, b.byte_size); 362 | Err(LoadError::UnsupportedType) 363 | }, 364 | } 365 | } 366 | } 367 | 368 | #[derive(Clone, Debug)] 369 | pub struct Struct { 370 | pub name: String, 371 | pub members: Vec<(Option, Value)>, 372 | } 373 | 374 | impl Struct { 375 | // TODO: better to have a Value::Tuple and distinguish at creation 376 | pub fn is_tuple_like(&self) -> bool { 377 | for (name, _) in &self.members { 378 | let Some(name) = name else { return false; }; 379 | if !name.starts_with("__") { return false; } 380 | if name[2..].parse::().is_err() { 381 | return false; 382 | } 383 | } 384 | true 385 | } 386 | 387 | pub fn members_named<'s, 'n>(&'s self, name: &'n str) -> impl Iterator + 'n 388 | where 's: 'n { 389 | self.members.iter() 390 | .filter(|(n, _)| n.as_deref() == Some(name)) 391 | .map(|(_, value)| value) 392 | } 393 | 394 | pub fn unique_member_named<'s>(&'s self, name: &str) -> Option<&'s Value> { 395 | let mut m = self.members_named(name); 396 | let r = m.next()?; 397 | if m.next().is_some() { 398 | None 399 | } else { 400 | Some(r) 401 | } 402 | } 403 | 404 | pub fn any_member_named(&self, name: &str) -> Option<&Value> { 405 | self.members.iter() 406 | .find(|(n, _)| n.as_deref() == Some(name)) 407 | .map(|(_, v)| v) 408 | } 409 | } 410 | 411 | impl Load for Struct { 412 | fn from_state( 413 | machine: &M, 414 | addr: u64, 415 | world: &DebugDb, 416 | ty: &Type, 417 | ) -> Result> { 418 | let Type::Struct(s) = ty else { return Err(LoadError::NotAStruct); }; 419 | let mut members = vec![]; 420 | 421 | for m in &s.members { 422 | let t = world.type_by_id(m.type_id).unwrap(); 423 | let ma = addr + m.location; 424 | let v = Value::from_state(machine, ma, world, t)?; 425 | members.push((m.name.clone(), v)); 426 | } 427 | 428 | Ok(Self { 429 | name: s.name.clone(), 430 | members, 431 | }) 432 | } 433 | } 434 | 435 | #[derive(Clone, Debug)] 436 | pub struct Enum { 437 | pub name: String, 438 | pub disc: String, 439 | pub value: Struct, 440 | } 441 | 442 | impl Load for Enum { 443 | fn from_state( 444 | machine: &M, 445 | addr: u64, 446 | world: &DebugDb, 447 | ty: &Type, 448 | ) -> Result> { 449 | let Type::Enum(s) = ty else { return Err(LoadError::NotAnEnum); }; 450 | let v = choose_variant(machine, addr, world, s)?; 451 | 452 | let vtype_id = v.member.type_id; 453 | let vty = world.type_by_id(vtype_id).unwrap(); 454 | let va = addr + v.member.location; 455 | let value = Struct::from_state(machine, va, world, vty)?; 456 | 457 | Ok(Self { 458 | name: s.name.clone(), 459 | disc: v.member.name.as_ref().unwrap().clone(), 460 | value, 461 | }) 462 | } 463 | } 464 | 465 | #[derive(Clone, Debug)] 466 | pub struct CEnum { 467 | name: String, 468 | disc: String, 469 | } 470 | 471 | impl Load for CEnum { 472 | fn from_state( 473 | machine: &M, 474 | addr: u64, 475 | world: &DebugDb, 476 | ty: &Type, 477 | ) -> Result> { 478 | let Type::CEnum(s) = ty else { return Err(LoadError::NotACEnum) }; 479 | 480 | let disc_value = load_unsigned( 481 | world.endian(), 482 | machine, 483 | addr, 484 | usize::try_from(s.byte_size).unwrap(), 485 | )?.ok_or(LoadError::DataUnavailable)?; 486 | 487 | let e = s 488 | .enumerators 489 | .get(&disc_value) 490 | .ok_or(LoadError::BadDiscriminator(disc_value))?; 491 | 492 | Ok(Self { 493 | name: s.name.clone(), 494 | disc: e.name.clone(), 495 | }) 496 | } 497 | } 498 | 499 | #[derive(Clone, Debug)] 500 | pub struct Pointer { 501 | pub name: String, 502 | pub dest_type_id: TypeId, 503 | pub value: u64, 504 | } 505 | 506 | impl Pointer { 507 | fn is_probably_mut(&self) -> bool { 508 | self.name.starts_with("&mut") || self.name.starts_with("*mut") || self.name.starts_with("*_") 509 | } 510 | } 511 | 512 | impl Load for Pointer { 513 | fn from_state( 514 | machine: &M, 515 | addr: u64, 516 | world: &DebugDb, 517 | ty: &Type, 518 | ) -> Result> { 519 | // TODO support pointer sizes 520 | 521 | let Type::Pointer(s) = ty else { return Err(LoadError::NotAPointer); }; 522 | 523 | let value = load_unsigned(world.endian(), machine, addr, world.pointer_size())? 524 | .ok_or(LoadError::DataUnavailable)?; 525 | 526 | Ok(Self { 527 | name: Cow::into_owned(ty.name(world)), 528 | dest_type_id: s.type_id, 529 | value, 530 | }) 531 | } 532 | } 533 | --------------------------------------------------------------------------------