├── .gitignore
├── .rustfmt.toml
├── Cargo.toml
├── LICENSE
├── README.mkdn
└── src
    ├── bin
        ├── just-parse.rs
        └── tysh.rs
    ├── dwarf_parser.rs
    ├── lib.rs
    ├── load.rs
    ├── model.rs
    ├── unify.rs
    └── value.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/.rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 80
2 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "debugdb"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | gimli = "0.26.1"
10 | fallible-iterator = "0.2.0"
11 | object = "0.26.0"
12 | indexmap = "1.7"
13 | scroll = "0.10"
14 | rustyline = "11.0"
15 | ansi_term = "0.12.1"
16 | anyhow = { version = "1.0.68", features = ["backtrace"] }
17 | rangemap = "1.3.0"
18 | clap = { version = "4.3.5", features = ["derive"] }
19 | thiserror = "1.0.40"
20 | regex = "1.8.4"
21 | parse_int = "0.6.0"
22 | 
23 | [profile.release]
24 | debug = true
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Mozilla Public License Version 2.0
  2 | ==================================
  3 | 
  4 | 1. Definitions
  5 | --------------
  6 | 
  7 | 1.1. "Contributor"
  8 |     means each individual or legal entity that creates, contributes to
  9 |     the creation of, or owns Covered Software.
 10 | 
 11 | 1.2. "Contributor Version"
 12 |     means the combination of the Contributions of others (if any) used
 13 |     by a Contributor and that particular Contributor's Contribution.
 14 | 
 15 | 1.3. "Contribution"
 16 |     means Covered Software of a particular Contributor.
 17 | 
 18 | 1.4. "Covered Software"
 19 |     means Source Code Form to which the initial Contributor has attached
 20 |     the notice in Exhibit A, the Executable Form of such Source Code
 21 |     Form, and Modifications of such Source Code Form, in each case
 22 |     including portions thereof.
 23 | 
 24 | 1.5. "Incompatible With Secondary Licenses"
 25 |     means
 26 | 
 27 |     (a) that the initial Contributor has attached the notice described
 28 |         in Exhibit B to the Covered Software; or
 29 | 
 30 |     (b) that the Covered Software was made available under the terms of
 31 |         version 1.1 or earlier of the License, but not also under the
 32 |         terms of a Secondary License.
 33 | 
 34 | 1.6. "Executable Form"
 35 |     means any form of the work other than Source Code Form.
 36 | 
 37 | 1.7. "Larger Work"
 38 |     means a work that combines Covered Software with other material, in
 39 |     a separate file or files, that is not Covered Software.
 40 | 
 41 | 1.8. "License"
 42 |     means this document.
 43 | 
 44 | 1.9. "Licensable"
 45 |     means having the right to grant, to the maximum extent possible,
 46 |     whether at the time of the initial grant or subsequently, any and
 47 |     all of the rights conveyed by this License.
 48 | 
 49 | 1.10. "Modifications"
 50 |     means any of the following:
 51 | 
 52 |     (a) any file in Source Code Form that results from an addition to,
 53 |         deletion from, or modification of the contents of Covered
 54 |         Software; or
 55 | 
 56 |     (b) any new file in Source Code Form that contains any Covered
 57 |         Software.
 58 | 
 59 | 1.11. "Patent Claims" of a Contributor
 60 |     means any patent claim(s), including without limitation, method,
 61 |     process, and apparatus claims, in any patent Licensable by such
 62 |     Contributor that would be infringed, but for the grant of the
 63 |     License, by the making, using, selling, offering for sale, having
 64 |     made, import, or transfer of either its Contributions or its
 65 |     Contributor Version.
 66 | 
 67 | 1.12. "Secondary License"
 68 |     means either the GNU General Public License, Version 2.0, the GNU
 69 |     Lesser General Public License, Version 2.1, the GNU Affero General
 70 |     Public License, Version 3.0, or any later versions of those
 71 |     licenses.
 72 | 
 73 | 1.13. "Source Code Form"
 74 |     means the form of the work preferred for making modifications.
 75 | 
 76 | 1.14. "You" (or "Your")
 77 |     means an individual or a legal entity exercising rights under this
 78 |     License. For legal entities, "You" includes any entity that
 79 |     controls, is controlled by, or is under common control with You. For
 80 |     purposes of this definition, "control" means (a) the power, direct
 81 |     or indirect, to cause the direction or management of such entity,
 82 |     whether by contract or otherwise, or (b) ownership of more than
 83 |     fifty percent (50%) of the outstanding shares or beneficial
 84 |     ownership of such entity.
 85 | 
 86 | 2. License Grants and Conditions
 87 | --------------------------------
 88 | 
 89 | 2.1. Grants
 90 | 
 91 | Each Contributor hereby grants You a world-wide, royalty-free,
 92 | non-exclusive license:
 93 | 
 94 | (a) under intellectual property rights (other than patent or trademark)
 95 |     Licensable by such Contributor to use, reproduce, make available,
 96 |     modify, display, perform, distribute, and otherwise exploit its
 97 |     Contributions, either on an unmodified basis, with Modifications, or
 98 |     as part of a Larger Work; and
 99 | 
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 |     for sale, have made, import, and otherwise transfer either its
102 |     Contributions or its Contributor Version.
103 | 
104 | 2.2. Effective Date
105 | 
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 | 
110 | 2.3. Limitations on Grant Scope
111 | 
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 | 
118 | (a) for any code that a Contributor has removed from Covered Software;
119 |     or
120 | 
121 | (b) for infringements caused by: (i) Your and any other third party's
122 |     modifications of Covered Software, or (ii) the combination of its
123 |     Contributions with other software (except as part of its Contributor
124 |     Version); or
125 | 
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 |     its Contributions.
128 | 
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 | 
133 | 2.4. Subsequent Licenses
134 | 
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 | 
140 | 2.5. Representation
141 | 
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 | 
146 | 2.6. Fair Use
147 | 
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 | 
152 | 2.7. Conditions
153 | 
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 | 
157 | 3. Responsibilities
158 | -------------------
159 | 
160 | 3.1. Distribution of Source Form
161 | 
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 | 
170 | 3.2. Distribution of Executable Form
171 | 
172 | If You distribute Covered Software in Executable Form then:
173 | 
174 | (a) such Covered Software must also be made available in Source Code
175 |     Form, as described in Section 3.1, and You must inform recipients of
176 |     the Executable Form how they can obtain a copy of such Source Code
177 |     Form by reasonable means in a timely manner, at a charge no more
178 |     than the cost of distribution to the recipient; and
179 | 
180 | (b) You may distribute such Executable Form under the terms of this
181 |     License, or sublicense it under different terms, provided that the
182 |     license for the Executable Form does not attempt to limit or alter
183 |     the recipients' rights in the Source Code Form under this License.
184 | 
185 | 3.3. Distribution of a Larger Work
186 | 
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 | 
198 | 3.4. Notices
199 | 
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 | 
206 | 3.5. Application of Additional Terms
207 | 
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 | 
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 | 
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 | 
232 | 5. Termination
233 | --------------
234 | 
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 | 
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 | 
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 | 
261 | ************************************************************************
262 | *                                                                      *
263 | *  6. Disclaimer of Warranty                                           *
264 | *  -------------------------                                           *
265 | *                                                                      *
266 | *  Covered Software is provided under this License on an "as is"       *
267 | *  basis, without warranty of any kind, either expressed, implied, or  *
268 | *  statutory, including, without limitation, warranties that the       *
269 | *  Covered Software is free of defects, merchantable, fit for a        *
270 | *  particular purpose or non-infringing. The entire risk as to the     *
271 | *  quality and performance of the Covered Software is with You.        *
272 | *  Should any Covered Software prove defective in any respect, You     *
273 | *  (not any Contributor) assume the cost of any necessary servicing,   *
274 | *  repair, or correction. This disclaimer of warranty constitutes an   *
275 | *  essential part of this License. No use of any Covered Software is   *
276 | *  authorized under this License except under this disclaimer.         *
277 | *                                                                      *
278 | ************************************************************************
279 | 
280 | ************************************************************************
281 | *                                                                      *
282 | *  7. Limitation of Liability                                          *
283 | *  --------------------------                                          *
284 | *                                                                      *
285 | *  Under no circumstances and under no legal theory, whether tort      *
286 | *  (including negligence), contract, or otherwise, shall any           *
287 | *  Contributor, or anyone who distributes Covered Software as          *
288 | *  permitted above, be liable to You for any direct, indirect,         *
289 | *  special, incidental, or consequential damages of any character      *
290 | *  including, without limitation, damages for lost profits, loss of    *
291 | *  goodwill, work stoppage, computer failure or malfunction, or any    *
292 | *  and all other commercial damages or losses, even if such party      *
293 | *  shall have been informed of the possibility of such damages. This   *
294 | *  limitation of liability shall not apply to liability for death or   *
295 | *  personal injury resulting from such party's negligence to the       *
296 | *  extent applicable law prohibits such limitation. Some               *
297 | *  jurisdictions do not allow the exclusion or limitation of           *
298 | *  incidental or consequential damages, so this exclusion and          *
299 | *  limitation may not apply to You.                                    *
300 | *                                                                      *
301 | ************************************************************************
302 | 
303 | 8. Litigation
304 | -------------
305 | 
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 | 
313 | 9. Miscellaneous
314 | ----------------
315 | 
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 | 
323 | 10. Versions of the License
324 | ---------------------------
325 | 
326 | 10.1. New Versions
327 | 
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 | 
333 | 10.2. Effect of New Versions
334 | 
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 | 
340 | 10.3. Modified Versions
341 | 
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 | 
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 | 
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 | 
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 | 
358 |   This Source Code Form is subject to the terms of the Mozilla Public
359 |   License, v. 2.0. If a copy of the MPL was not distributed with this
360 |   file, You can obtain one at http://mozilla.org/MPL/2.0/.
361 | 
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 | 
367 | You may add additional accurate notices of copyright ownership.
368 | 
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 | 
372 |   This Source Code Form is "Incompatible With Secondary Licenses", as
373 |   defined by the Mozilla Public License, v. 2.0.
374 | 


--------------------------------------------------------------------------------
/README.mkdn:
--------------------------------------------------------------------------------
 1 | # debugdb: a program analysis crate
 2 | 
 3 | This is an attempt at a general debug information loading and analysis crate. It
 4 | can:
 5 | 
 6 | - Read the DWARF information for a subset of programs (primarily focused on Rust
 7 |   but with some C support).
 8 | - Answer questions about it.
 9 | - Provide basic reflection of data structures within a program memory image,
10 |   e.g. load a struct into a map-like type given the ID of a type in the program.
11 | 
12 | For a somewhat hacked-up but detailed example of what you can do with this
13 | crate, see `src/bin/tysh.rs`, which provides an interactive shell for answering
14 | questions about types in a program. Wondering about the concrete memory layout
15 | of a Rust enum in your program? `tysh` will literally draw you a picture of it.
16 | 
17 | # Focus
18 | 
19 | This is mostly focused on embedded Rust programs, specifically those built with
20 | release settings and LTO (because doing so dramatically simplifies the debug
21 | info). It has limited support for
22 | 
23 | - Unix programs,
24 | - C programs, and
25 | - Type unification across compile units for programs not built with LTO.
26 | 
27 | # Hack alert
28 | 
29 | Significant portions of this crate were written after consuming too much
30 | caffeine and immersing myself in the DWARF spec, which is a document that tends
31 | to warp your mind. This code could use a good rewritin' and refactorin', but
32 | unfortunately it works pretty well, which has reduced my motivation to do so.
33 | Wanna help?
34 | 
35 | # This crate attempts to be strict
36 | 
37 | This crate may crash when you feed it a program. Believe it or not, this is
38 | deliberate! In my experience a lot of DWARF loaders will silently skip over
39 | information they don't understand, which can cause them to miss important
40 | portions of the DWARF spec or misinterpret certain programs.
41 | 
42 | This crate attempts to make a one-sided error in the other direction:
43 | 
44 | - If it can load a program, its interpretation is probably correct. (If not,
45 |   report a bug please!)
46 | - If it hits a DWARF feature, or use of a DWARF feature, that it doesn't either
47 |   explicitly support or explicitly ignore, it will reject the program.
48 | 
49 | This can be kind of annoying, particularly since compilers don't bother to
50 | version their debug info, so small changes can require updates to this crate.
51 | However, it also means this crate gets a bunch of things _right_ that I've seen
52 | others get wrong.
53 | 
54 | That's not to suggest the crate is perfect. This crate almost certainly contains
55 | bugs, because the DWARF spec is huge and ambiguous, and compilers are
56 | inconsistent about whether they actually follow it. But it means the bugs fall
57 | into two categories:
58 | 
59 | - If a program loads, and the interpretation is wrong, there's a serious and
60 |   genuine bug in the crate's logic.
61 | 
62 | - If a program does not load, extending this crate to cover it is more of a
63 |   feature request than a bug. Which is not to suggest it's not important -- just
64 |   that the sort of work required will be different.
65 | 


--------------------------------------------------------------------------------
/src/bin/just-parse.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::Result;
 2 | use clap::Parser;
 3 | 
 4 | #[derive(Debug, Parser)]
 5 | struct Sketch {
 6 |     filename: std::path::PathBuf,
 7 | }
 8 | 
 9 | fn main() -> Result<()> {
10 |     let args = Sketch::parse();
11 | 
12 |     let buffer = std::fs::read(args.filename)?;
13 |     let object = object::File::parse(&*buffer)?;
14 | 
15 |     debugdb::parse_file(&object)?;
16 | 
17 |     Ok(())
18 | }
19 | 


--------------------------------------------------------------------------------
/src/bin/tysh.rs:
--------------------------------------------------------------------------------
   1 | use std::{fmt::Display, io::BufRead};
   2 | 
   3 | use anyhow::Result;
   4 | use clap::Parser;
   5 | use debugdb::value::ValueWithDb;
   6 | use object::{Object, ObjectSegment};
   7 | use rangemap::{RangeMap, RangeInclusiveMap};
   8 | 
   9 | use debugdb::{Type, Encoding, TypeId, Struct, Member, DebugDb, Enum, VariantShape, value::Value};
  10 | use debugdb::load::{Load, ImgMachine};
  11 | use regex::Regex;
  12 | 
  13 | #[derive(Debug, Parser)]
  14 | struct TySh {
  15 |     filename: std::path::PathBuf,
  16 | }
  17 | 
  18 | fn main() -> Result<()> {
  19 |     let args = TySh::parse();
  20 | 
  21 |     let buffer = std::fs::read(args.filename)?;
  22 |     let object = object::File::parse(&*buffer)?;
  23 |     let mut segments = RangeInclusiveMap::new();
  24 |     for seg in object.segments() {
  25 |         if seg.size() == 0 {
  26 |             continue;
  27 |         }
  28 |         segments.insert(
  29 |             seg.address()..=seg.address() + (seg.size() - 1),
  30 |             seg.data()?.to_vec(),
  31 |         );
  32 |     }
  33 |     let everything = debugdb::parse_file(&object)?;
  34 | 
  35 |     println!("Loaded; {} types found in program.", everything.type_count());
  36 |     println!("To quit: ^D or exit");
  37 | 
  38 |     let mut rl = rustyline::Editor::<(), _>::new()?;
  39 |     let prompt = ansi_term::Colour::Green.paint(">> ").to_string();
  40 |     let mut ctx = Ctx { segments };
  41 |     'lineloop:
  42 |     loop {
  43 |         match rl.readline(&prompt) {
  44 |             Ok(line) => {
  45 |                 let line = line.trim();
  46 |                 let (cmd, rest) = line.split_once(char::is_whitespace)
  47 |                     .unwrap_or((line, ""));
  48 |                 if line.is_empty() {
  49 |                     continue 'lineloop;
  50 |                 }
  51 | 
  52 |                 rl.add_history_entry(line)?;
  53 | 
  54 |                 match cmd {
  55 |                     "exit" => break,
  56 |                     "help" => {
  57 |                         println!("commands:");
  58 |                         let name_len = COMMANDS.iter()
  59 |                             .map(|(name, _, _)| name.len())
  60 |                             .max()
  61 |                             .unwrap_or(12);
  62 |                         for (name, _, desc) in COMMANDS {
  63 |                             println!("{:name_len$} {}", name, desc);
  64 |                         }
  65 |                     }
  66 |                     _ => {
  67 |                         for (name, imp, _) in COMMANDS {
  68 |                             if *name == cmd {
  69 |                                 imp(&everything, &mut ctx, rest);
  70 |                                 continue 'lineloop;
  71 |                             }
  72 |                         }
  73 |                         println!("unknown command: {}", cmd);
  74 |                         println!("for help, try: help");
  75 |                     }
  76 |                 }
  77 |             }
  78 |             Err(rustyline::error::ReadlineError::Interrupted) => {
  79 |                 println!("^C");
  80 |                 continue;
  81 |             }
  82 |             Err(e) => {
  83 |                 println!("{:?}", e);
  84 |                 break;
  85 |             }
  86 |         }
  87 |     }
  88 | 
  89 |     Ok(())
  90 | }
  91 | 
  92 | struct Goff(gimli::UnitSectionOffset);
  93 | 
  94 | impl std::fmt::Display for Goff {
  95 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  96 |         match self.0 {
  97 |             gimli::UnitSectionOffset::DebugInfoOffset(gimli::DebugInfoOffset(x)) => {
  98 |                 write!(f, "<.debug_info+0x{:08x}>", x)
  99 |             }
 100 |             gimli::UnitSectionOffset::DebugTypesOffset(gimli::DebugTypesOffset(x)) => {
 101 |                 write!(f, "<.debug_types+0x{:08x}>", x)
 102 |             }
 103 |         }
 104 |     }
 105 | }
 106 | 
 107 | struct NamedGoff<'a>(&'a debugdb::DebugDb, TypeId);
 108 | 
 109 | impl std::fmt::Display for NamedGoff<'_> {
 110 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 111 |         let bold = ansi_term::Style::new().bold();
 112 |         let dim = ansi_term::Style::new().dimmed();
 113 | 
 114 |         let n = if let Some(name) = self.0.type_name(self.1) {
 115 |             name
 116 |         } else {
 117 |             "<anonymous type>".into()
 118 |         };
 119 | 
 120 |         write!(f, "{}", bold.paint(n))?;
 121 |         match self.1.0 {
 122 |             gimli::UnitSectionOffset::DebugInfoOffset(gimli::DebugInfoOffset(x)) => {
 123 |                 write!(f, " {}<.debug_info+0x{:08x}>{}", dim.prefix(), x, dim.suffix())
 124 |             }
 125 |             gimli::UnitSectionOffset::DebugTypesOffset(gimli::DebugTypesOffset(x)) => {
 126 |                 write!(f, " {}<.debug_types+0x{:08x}>{}", dim.prefix(), x, dim.suffix())
 127 |             }
 128 |         }
 129 |     }
 130 | }
 131 | 
 132 | struct Ctx {
 133 |     segments: RangeInclusiveMap<u64, Vec<u8>>,
 134 | }
 135 | 
 136 | type Command = fn(&debugdb::DebugDb, &mut Ctx, &str);
 137 | 
 138 | static COMMANDS: &[(&str, Command, &str)] = &[
 139 |     ("list", cmd_list, "print names of ALL types, or types containing a string"),
 140 |     ("info", cmd_info, "print a summary of a type"),
 141 |     ("load", cmd_load, "loads additional segment data"),
 142 |     ("def", cmd_def, "print a type as a pseudo-Rust definition"),
 143 |     ("sizeof", cmd_sizeof, "print size of type in bytes"),
 144 |     ("alignof", cmd_alignof, "print alignment of type in bytes"),
 145 |     ("addr", cmd_addr, "look up information about an address"),
 146 |     ("addr2line", cmd_addr2line, "look up line number information"),
 147 |     ("addr2stack", cmd_addr2stack, "display inlined stack frames"),
 148 |     ("vars", cmd_vars, "list static variables"),
 149 |     ("var", cmd_var, "get info on a static variable"),
 150 |     ("unwind", cmd_unwind, "get unwind info for an address"),
 151 |     ("decode", cmd_decode, "interpret RAM/ROM as a type"),
 152 |     ("decode-async", cmd_decode_async, "interpret RAM/ROM as a suspended future"),
 153 |     ("decode-blob", cmd_decode_blob, "attempt to interpret bytes as a type"),
 154 |     ("decode-async-blob", cmd_decode_async_blob, "attempt to interpret bytes as a suspended future"),
 155 | ];
 156 | 
 157 | fn cmd_list(
 158 |     db: &debugdb::DebugDb,
 159 |     _ctx: &mut Ctx,
 160 |     args: &str,
 161 | ) {
 162 |     // We're gonna make a copy to sort it, because alphabetical order seems
 163 |     // polite.
 164 |     let mut types_copy = db.canonical_types()
 165 |         .filter(|(goff, _ty)| {
 166 |             if !args.is_empty() {
 167 |                 if let Some(name) = db.type_name(*goff) {
 168 |                     return name.contains(args);
 169 |                 } else {
 170 |                     return false;
 171 |                 }
 172 |             }
 173 |             true
 174 |         })
 175 |         .collect::<Vec<_>>();
 176 | 
 177 |     types_copy.sort_by_key(|(goff, _ty)| db.type_name(*goff));
 178 | 
 179 |     for (goff, ty) in types_copy {
 180 |         let kind = match ty {
 181 |             Type::Base(_) => "base",
 182 |             Type::Struct(_) => "struct",
 183 |             Type::Enum(_) => "enum",
 184 |             Type::CEnum(_) => "c-enum",
 185 |             Type::Array(_) => "array",
 186 |             Type::Pointer(_) => "ptr",
 187 |             Type::Union(_) => "union",
 188 |             Type::Subroutine(_) => "subr",
 189 |             Type::Unresolved(_) => "missing",
 190 |         };
 191 | 
 192 |         let aliases = db.aliases_of_type(goff);
 193 |         if let Some(aliases) = aliases {
 194 |             println!("{:6} {} ({} aliases)", kind, NamedGoff(db, goff), aliases.len());
 195 |         } else {
 196 |             println!("{:6} {}", kind, NamedGoff(db, goff));
 197 |         }
 198 |     }
 199 | }
 200 | 
 201 | fn parse_type_name(s: &str) -> Option<ParsedTypeName<'_>> {
 202 |     if s.starts_with("<.debug_") && s.ends_with('>') {
 203 |         // Try parsing as a debug section reference.
 204 |         let rest = &s[8..];
 205 |         return if rest.starts_with("info+0x") {
 206 |             let num = &rest[7..rest.len() - 1];
 207 |             if let Ok(n) = usize::from_str_radix(num, 16) {
 208 |                 Some(ParsedTypeName::Goff(TypeId(gimli::DebugInfoOffset(n).into())))
 209 |             } else {
 210 |                 println!("can't parse {} as hex", num);
 211 |                 None
 212 |             }
 213 |         } else if rest.starts_with("types+0x") {
 214 |             let num = &rest[8..rest.len() - 1];
 215 |             if let Ok(n) = usize::from_str_radix(num, 16) {
 216 |                 Some(ParsedTypeName::Goff(TypeId(gimli::DebugTypesOffset(n).into())))
 217 |             } else {
 218 |                 println!("can't parse {} as hex", num);
 219 |                 None
 220 |             }
 221 |         } else {
 222 |             println!("bad offset reference: {}", s);
 223 |             None
 224 |         };
 225 |     }
 226 | 
 227 |     Some(ParsedTypeName::Name(s))
 228 | }
 229 | 
 230 | enum ParsedTypeName<'a> {
 231 |     Name(&'a str),
 232 |     Goff(TypeId),
 233 | }
 234 | 
 235 | fn simple_query_cmd(
 236 |     db: &debugdb::DebugDb,
 237 |     args: &str,
 238 |     q: fn(&debugdb::DebugDb, &debugdb::Type),
 239 | ) {
 240 |     let type_name = args.trim();
 241 |     let types: Vec<_> = match parse_type_name(type_name) {
 242 |         None => return,
 243 |         Some(ParsedTypeName::Name(n)) => {
 244 |             db.types_by_name(n).collect()
 245 |         }
 246 |         Some(ParsedTypeName::Goff(o)) => {
 247 |             db.type_by_id(o).into_iter()
 248 |                 .map(|t| (o, t))
 249 |                 .collect()
 250 |         }
 251 |     };
 252 |     if type_name.starts_with("<.debug_") && type_name.ends_with('>') {
 253 |         // Try parsing as a debug section reference.
 254 |         let rest = &type_name[8..];
 255 |         if rest.starts_with("info+0x") {
 256 |             // TODO what was I doing  here
 257 |         } else if rest.starts_with("types+0x") {
 258 |             // TODO no seriously
 259 |         }
 260 |     }
 261 | 
 262 |     let many = match types.len() {
 263 |         0 => {
 264 |             println!("{}", ansi_term::Colour::Red.paint("No types found."));
 265 |             return;
 266 |         }
 267 |         1 => false,
 268 |         n => {
 269 |             println!("{}{} types found with that name:",
 270 |                 ansi_term::Color::Yellow.paint("note: "),
 271 |                 n,
 272 |             );
 273 |             true
 274 |         }
 275 |     };
 276 | 
 277 |     for (goff, t) in types {
 278 |         if many { println!() }
 279 |         print!("{}: ", NamedGoff(db, goff));
 280 |         q(db, t);
 281 |     }
 282 | }
 283 | 
 284 | fn cmd_info(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
 285 |     simple_query_cmd(db, args, |db, t| {
 286 |         match t {
 287 |             Type::Base(s) => {
 288 |                 println!("base type");
 289 |                 println!("- encoding: {:?}", s.encoding);
 290 |                 println!("- byte size: {}", s.byte_size);
 291 |             }
 292 |             Type::Pointer(s) => {
 293 |                 println!("pointer type");
 294 |                 println!("- points to: {}", NamedGoff(db, s.type_id));
 295 |             }
 296 |             Type::Array(s) => {
 297 |                 println!("array type");
 298 |                 println!("- element type: {}", NamedGoff(db, s.element_type_id));
 299 |                 println!("- lower bound: {}", s.lower_bound);
 300 |                 if let Some(n) = s.count {
 301 |                     println!("- count: {}", n);
 302 |                 } else {
 303 |                     println!("- size not given");
 304 |                 }
 305 |             }
 306 |             Type::Struct(s) => {
 307 |                 if s.tuple_like {
 308 |                     println!("struct type (tuple-like)");
 309 |                 } else {
 310 |                     println!("struct type");
 311 |                 }
 312 |                 if s.decl_coord.is_useful() {
 313 |                     print!("- declared at: {}", s.decl_coord.file.as_deref().unwrap_or("???"));
 314 |                     if let Some(n) = s.decl_coord.line {
 315 |                         print!(":{n}");
 316 |                     } else {
 317 |                         print!(":???");
 318 |                     }
 319 |                     // Be more tolerant of missing column number.
 320 |                     if let Some(n) = s.decl_coord.column {
 321 |                         print!(":{n}");
 322 |                     }
 323 |                     println!();
 324 |                 }
 325 |                 if let Some(z) = s.byte_size {
 326 |                     println!("- byte size: {z}");
 327 |                 }
 328 |                 if let Some(a) = s.alignment {
 329 |                     println!("- alignment: {}", a);
 330 |                 } else {
 331 |                     println!("- not aligned");
 332 |                 }
 333 |                 if !s.template_type_parameters.is_empty() {
 334 |                     println!("- template type parameters:");
 335 |                     for ttp in &s.template_type_parameters {
 336 |                         println!("  - {} = {}", ttp.name, NamedGoff(db, ttp.type_id));
 337 |                     }
 338 |                 }
 339 |                 if !s.members.is_empty() {
 340 |                     println!("- members:");
 341 |                     for (i, mem) in s.members.iter().enumerate() {
 342 |                         if let Some(name) = &mem.name {
 343 |                             println!("  {i}. {name}: {}", NamedGoff(db, mem.type_id));
 344 |                         } else {
 345 |                             println!("  - <unnamed>: {}", NamedGoff(db, mem.type_id));
 346 |                         }
 347 |                         println!("    - offset: {} bytes", mem.location);
 348 |                         if let Some(s) = db.type_by_id(mem.type_id).unwrap().byte_size(db) {
 349 |                             println!("    - size: {} bytes", s);
 350 |                         }
 351 |                         if let Some(a) = mem.alignment {
 352 |                             println!("    - aligned: {} bytes", a);
 353 |                         }
 354 |                         if mem.artificial {
 355 |                             println!("    - artificial");
 356 |                         }
 357 |                     }
 358 |                 } else {
 359 |                     println!("- no members");
 360 |                 }
 361 | 
 362 |                 struct_picture(db, s, db.pointer_size());
 363 |             }
 364 |             Type::Enum(s) => {
 365 |                 println!("enum type");
 366 |                 if let Some(z) = s.byte_size {
 367 |                     println!("- byte size: {z}");
 368 |                 }
 369 |                 if let Some(a) = s.alignment {
 370 |                     println!("- alignment: {}", a);
 371 |                 } else {
 372 |                     println!("- not aligned");
 373 |                 }
 374 |                 if !s.template_type_parameters.is_empty() {
 375 |                     println!("- type parameters:");
 376 |                     for ttp in &s.template_type_parameters {
 377 |                         println!("  - {} = {}", ttp.name, NamedGoff(db, ttp.type_id));
 378 |                     }
 379 |                 }
 380 | 
 381 |                 match &s.shape {
 382 |                     debugdb::VariantShape::Zero => {
 383 |                         println!("- empty (uninhabited) enum");
 384 |                     }
 385 |                     debugdb::VariantShape::One(v) => {
 386 |                         println!("- single variant enum w/o discriminator");
 387 |                         println!("  - content type: {}", NamedGoff(db, v.member.type_id));
 388 |                         println!("  - offset: {} bytes", v.member.location);
 389 |                         if let Some(a) = v.member.alignment {
 390 |                             println!("  - aligned: {} bytes", a);
 391 |                         }
 392 |                         if !v.member.artificial {
 393 |                             println!("  - not artificial, oddly");
 394 |                         }
 395 |                     }
 396 |                     debugdb::VariantShape::Many { member, variants, .. }=> {
 397 |                         if let Some(dname) = db.type_name(member.type_id) {
 398 |                             println!("- {} variants discriminated by {} at offset {}", variants.len(), dname, member.location);
 399 |                         } else {
 400 |                             println!("- {} variants discriminated by an anonymous type at offset {}", variants.len(), member.location);
 401 |                         }
 402 |                         if !member.artificial {
 403 |                             println!("  - not artificial, oddly");
 404 |                         }
 405 |                         
 406 |                         // Print explicit values first
 407 |                         for (val, var) in variants {
 408 |                             if let Some(val) = val {
 409 |                                 println!("- when discriminator == {}", val);
 410 |                                 println!("  - contains type: {}", NamedGoff(db, var.member.type_id));
 411 |                                 println!("  - at offset: {} bytes", var.member.location);
 412 |                                 if let Some(a) = var.member.alignment {
 413 |                                     println!("  - aligned: {} bytes", a);
 414 |                                 }
 415 |                             }
 416 |                         }
 417 |                         // Now, default.
 418 |                         for (val, var) in variants {
 419 |                             if val.is_none() {
 420 |                                 println!("- any other discriminator value");
 421 |                                 println!("  - contains type: {}", NamedGoff(db, var.member.type_id));
 422 |                                 println!("  - at offset: {} bytes", var.member.location);
 423 |                                 if let Some(a) = var.member.alignment {
 424 |                                     println!("  - aligned: {} bytes", a);
 425 |                                 }
 426 |                             }
 427 |                         }
 428 |                     }
 429 |                 }
 430 |                 enum_picture(db, s, db.pointer_size());
 431 |             }
 432 |             Type::CEnum(s) => {
 433 |                 println!("C-like enum type");
 434 |                 println!("- representation: {}", NamedGoff(db, s.repr_type_id));
 435 |                 println!("- byte size: {}", s.byte_size);
 436 |                 if let Some(a) = s.alignment {
 437 |                     println!("- alignment: {a}");
 438 |                 }
 439 |                 println!("- {} values defined", s.enumerators.len());
 440 |                 for e in s.enumerators.values() {
 441 |                     println!("  - {} = 0x{:x}", e.name, e.const_value);
 442 | 
 443 |                 }
 444 |             }
 445 |             Type::Union(s) => {
 446 |                 println!("union type");
 447 |                 println!("- byte size: {}", s.byte_size);
 448 |                 println!("- alignment: {}", s.alignment);
 449 |                 if !s.template_type_parameters.is_empty() {
 450 |                     println!("- template type parameters:");
 451 |                     for ttp in &s.template_type_parameters {
 452 |                         println!("  - {} = {}", ttp.name, NamedGoff(db, ttp.type_id));
 453 |                     }
 454 |                 }
 455 |                 if !s.members.is_empty() {
 456 |                     println!("- members:");
 457 |                     for mem in &s.members {
 458 |                         if let Some(name) = &mem.name {
 459 |                             println!("  - {}: {}", name, NamedGoff(db, mem.type_id));
 460 |                         } else {
 461 |                             println!("  - <unnamed>: {}", NamedGoff(db, mem.type_id));
 462 |                         }
 463 |                         println!("    - offset: {} bytes", mem.location);
 464 |                         if let Some(a) = mem.alignment {
 465 |                             println!("    - aligned: {} bytes", a);
 466 |                         }
 467 |                         if mem.artificial {
 468 |                             println!("    - artificial");
 469 |                         }
 470 |                     }
 471 |                 } else {
 472 |                     println!("- no members");
 473 |                 }
 474 |             }
 475 |             Type::Subroutine(s) => {
 476 |                 println!("subroutine type");
 477 |                 if let Some(rt) = s.return_type_id {
 478 |                     println!("- return type: {}", NamedGoff(db, rt));
 479 |                 }
 480 |                 if !s.formal_parameters.is_empty() {
 481 |                     println!("- formal parameters:");
 482 |                     for &fp in &s.formal_parameters {
 483 |                         println!("  - {}", NamedGoff(db, fp));
 484 |                     }
 485 |                 }
 486 |             }
 487 |             Type::Unresolved(_) => {
 488 |                 println!("type not found in debug info!");
 489 |             }
 490 |         }
 491 |     })
 492 | }
 493 | 
 494 | fn cmd_sizeof(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
 495 |     simple_query_cmd(db, args, |db, t| {
 496 |         if let Some(sz) = t.byte_size(db) {
 497 |             println!("{} bytes", sz);
 498 |         } else {
 499 |             println!("unsized");
 500 |         }
 501 |     })
 502 | }
 503 | 
 504 | fn cmd_alignof(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
 505 |     simple_query_cmd(db, args, |db, t| {
 506 |         if let Some(sz) = t.alignment(db) {
 507 |             println!("align to {} bytes", sz);
 508 |         } else {
 509 |             println!("no alignment information");
 510 |         }
 511 |     })
 512 | }
 513 | 
 514 | fn cmd_def(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
 515 |     simple_query_cmd(db, args, |db, t| {
 516 |         println!();
 517 |         match t {
 518 |             Type::Base(s) => {
 519 |                 print!("type _ = ");
 520 |                 match (s.encoding, s.byte_size) {
 521 |                     (_, 0) => print!("()"),
 522 |                     (Encoding::Unsigned, 1) => print!("u8"),
 523 |                     (Encoding::Unsigned, 2) => print!("u16"),
 524 |                     (Encoding::Unsigned, 4) => print!("u32"),
 525 |                     (Encoding::Unsigned, 8) => print!("u64"),
 526 |                     (Encoding::Unsigned, 16) => print!("u128"),
 527 |                     (Encoding::Signed, 1) => print!("i8"),
 528 |                     (Encoding::Signed, 2) => print!("i16"),
 529 |                     (Encoding::Signed, 4) => print!("i32"),
 530 |                     (Encoding::Signed, 8) => print!("i64"),
 531 |                     (Encoding::Signed, 16) => print!("i128"),
 532 |                     (Encoding::Float, 4) => print!("f32"),
 533 |                     (Encoding::Float, 8) => print!("f64"),
 534 |                     (Encoding::Boolean, 1) => print!("bool"),
 535 |                     (Encoding::UnsignedChar, 1) => print!("c_uchar"),
 536 |                     (Encoding::SignedChar, 1) => print!("c_schar"),
 537 |                     (Encoding::UtfChar, 4) => print!("char"),
 538 | 
 539 |                     (e, s) => print!("Unhandled{:?}{}", e, s),
 540 |                 }
 541 |                 println!(";");
 542 |             }
 543 |             Type::Pointer(_s) => {
 544 |                 print!("type _ = {};", t.name(db));
 545 |             }
 546 |             Type::Array(s) => {
 547 |                 let name = db.type_name(s.element_type_id).unwrap();
 548 |                 if let Some(n) = s.count {
 549 |                     println!("[{}; {}]", name, n);
 550 |                 } else {
 551 |                     println!("[{}]", name);
 552 |                 }
 553 |             }
 554 |             Type::Struct(s) => {
 555 |                 print!("struct {}", s.name);
 556 | 
 557 |                 if !s.template_type_parameters.is_empty() {
 558 |                     print!("<");
 559 |                     for ttp in &s.template_type_parameters {
 560 |                         print!("{},", ttp.name);
 561 |                     }
 562 |                     print!(">");
 563 |                 }
 564 |                 
 565 |                 if s.members.is_empty() {
 566 |                     println!(";");
 567 |                 } else if s.tuple_like {
 568 |                     println!("(");
 569 |                     for mem in &s.members {
 570 |                         println!("    {},", db.type_name(mem.type_id).unwrap());
 571 |                     }
 572 |                     println!(");");
 573 |                 } else {
 574 |                     println!(" {{");
 575 |                     for mem in &s.members {
 576 |                         if let Some(name) = &mem.name {
 577 |                             println!("    {}: {},", name, db.type_name(mem.type_id).unwrap());
 578 |                         } else {
 579 |                             println!("    ANON: {},", db.type_name(mem.type_id).unwrap());
 580 |                         }
 581 |                     }
 582 |                     println!("}}");
 583 |                 }
 584 |             }
 585 |             Type::Enum(s) => {
 586 |                 print!("enum {}", s.name);
 587 |                 if !s.template_type_parameters.is_empty() {
 588 |                     print!("<");
 589 |                     for ttp in &s.template_type_parameters {
 590 |                         print!("{}", ttp.name);
 591 |                     }
 592 |                     print!(">");
 593 |                 }
 594 |                 println!(" {{");
 595 | 
 596 |                 match &s.shape {
 597 |                     debugdb::VariantShape::Zero => (),
 598 |                     debugdb::VariantShape::One(var) => {
 599 |                         if let Some(name) = &var.member.name {
 600 |                             print!("    {}", name);
 601 |                         } else {
 602 |                             print!("    ANON");
 603 |                         }
 604 | 
 605 |                         let mty = db.type_by_id(var.member.type_id)
 606 |                             .unwrap();
 607 |                         if let Type::Struct(s) = mty {
 608 |                             if !s.members.is_empty() {
 609 |                                 if s.tuple_like {
 610 |                                     println!("(");
 611 |                                     for mem in &s.members {
 612 |                                         let mtn = db.type_name(mem.type_id).unwrap();
 613 |                                         println!("        {},", mtn);
 614 |                                     }
 615 |                                     print!("    )");
 616 |                                 } else {
 617 |                                     println!(" {{");
 618 |                                     for mem in &s.members {
 619 |                                         let mtn = db.type_name(mem.type_id).unwrap();
 620 |                                         println!("        {}: {},", mem.name.as_ref().unwrap(), mtn);
 621 |                                     }
 622 |                                     print!("    }}");
 623 |                                 }
 624 |                             }
 625 |                         } else {
 626 |                             print!("(unexpected weirdness)");
 627 |                         }
 628 | 
 629 |                         println!(",");
 630 |                     }
 631 |                     debugdb::VariantShape::Many { variants, .. }=> {
 632 |                         for var in variants.values() {
 633 |                             if let Some(name) = &var.member.name {
 634 |                                 print!("    {}", name);
 635 |                             } else {
 636 |                                 print!("    ANON");
 637 |                             }
 638 | 
 639 |                             let mty = db.type_by_id(var.member.type_id)
 640 |                                 .unwrap();
 641 |                             if let Type::Struct(s) = mty {
 642 |                                 if !s.members.is_empty() {
 643 |                                     if s.tuple_like {
 644 |                                         println!("(");
 645 |                                         for mem in &s.members {
 646 |                                             let mtn = db.type_name(mem.type_id).unwrap();
 647 |                                             println!("        {},", mtn);
 648 |                                         }
 649 |                                         print!("    )");
 650 |                                     } else {
 651 |                                         println!(" {{");
 652 |                                         for mem in &s.members {
 653 |                                             let mtn = db.type_name(mem.type_id).unwrap();
 654 |                                             println!("        {}: {},", mem.name.as_ref().unwrap(), mtn);
 655 |                                         }
 656 |                                         print!("    }}");
 657 |                                     }
 658 |                                 }
 659 |                             } else {
 660 |                                 print!("(unexpected weirdness)");
 661 |                             }
 662 | 
 663 |                             println!(",");
 664 |                         }
 665 |                     }
 666 |                 }
 667 |                 println!("}}");
 668 | 
 669 |             }
 670 |             Type::CEnum(s) => {
 671 |                 println!("enum {} {{", s.name);
 672 |                 for (val, e) in &s.enumerators {
 673 |                     println!("    {} = 0x{:x},", e.name, val);
 674 |                 }
 675 |                 println!("}}");
 676 |             }
 677 |             Type::Union(s) => {
 678 |                 print!("union {}", s.name);
 679 | 
 680 |                 if !s.template_type_parameters.is_empty() {
 681 |                     print!("<");
 682 |                     for ttp in &s.template_type_parameters {
 683 |                         print!("{},", ttp.name);
 684 |                     }
 685 |                     print!(">");
 686 |                 }
 687 | 
 688 |                 println!(" {{");
 689 |                 for mem in &s.members {
 690 |                     if let Some(name) = &mem.name {
 691 |                         println!("    {}: {},", name, db.type_name(mem.type_id).unwrap());
 692 |                     } else {
 693 |                         println!("    ANON: {},", db.type_name(mem.type_id).unwrap());
 694 |                     }
 695 |                 }
 696 |                 println!("}}");
 697 |             }
 698 |             Type::Subroutine(s) => {
 699 |                 println!("fn(");
 700 |                 for &p in &s.formal_parameters {
 701 |                     println!("    {},", db.type_name(p).unwrap());
 702 |                 }
 703 |                 if let Some(rt) = s.return_type_id {
 704 |                     println!(") -> {} {{", db.type_name(rt).unwrap());
 705 |                 } else {
 706 |                     println!(") {{");
 707 |                 }
 708 |                 println!("    // code goes here");
 709 |                 println!("    // (this is a subroutine type, _not_ a fn ptr)");
 710 |                 println!("    unimplemented!();");
 711 |                 println!("}}");
 712 |             }
 713 |             Type::Unresolved(_) => {
 714 |                 println!("(type not found in debug info!)");
 715 |             }
 716 |         }
 717 |     })
 718 | }
 719 | 
 720 | fn cmd_addr2line(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
 721 |     let addr = if let Some(rest) = args.strip_prefix("0x") {
 722 |         if let Ok(a) = u64::from_str_radix(rest, 16) {
 723 |             a
 724 |         } else {
 725 |             println!("can't parse {} as an address", args);
 726 |             return;
 727 |         }
 728 |     } else if let Ok(a) = args.parse::<u64>() {
 729 |         a
 730 |     } else {
 731 |         println!("can't parse {} as an address", args);
 732 |         return;
 733 |     };
 734 | 
 735 |     if let Some(row) = db.lookup_line_row(addr) {
 736 |         print!("{}:", row.file);
 737 |         if let Some(line) = row.line {
 738 |             print!("{}:", line);
 739 |         } else {
 740 |             print!("?:");
 741 |         }
 742 |         if let Some(col) = row.column {
 743 |             print!("{}", col);
 744 |         } else {
 745 |             print!("?");
 746 |         }
 747 |         println!();
 748 |     } else {
 749 |         println!("no line number information available for address");
 750 |     }
 751 | }
 752 | 
 753 | fn cmd_addr2stack(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
 754 |     let addr = if let Some(rest) = args.strip_prefix("0x") {
 755 |         if let Ok(a) = u64::from_str_radix(rest, 16) {
 756 |             a
 757 |         } else {
 758 |             println!("can't parse {} as an address", args);
 759 |             return;
 760 |         }
 761 |     } else if let Ok(a) = args.parse::<u64>() {
 762 |         a
 763 |     } else {
 764 |         println!("can't parse {} as an address", args);
 765 |         return;
 766 |     };
 767 | 
 768 |     let bold = ansi_term::Style::new().bold();
 769 |     let dim = ansi_term::Style::new().dimmed();
 770 | 
 771 |     match db.static_stack_for_pc(addr) {
 772 |         Ok(Some(trc)) => {
 773 |             println!("Static stack trace fragment for address 0x{:x}", addr);
 774 |             println!("(innermost / most recent first)");
 775 |             for (i, record) in trc.iter().rev().enumerate() {
 776 |                 let subp = db.subprogram_by_id(record.subprogram).unwrap();
 777 | 
 778 |                 print!("{:4}   ", i);
 779 |                 if let Some(n) = &subp.name {
 780 |                     println!("{}", bold.paint(n));
 781 |                 } else {
 782 |                     println!("{}", bold.paint("<unknown-subprogram>"));
 783 |                 }
 784 |                 print!("{}", dim.prefix());
 785 |                 print!("    {}:", record.file);
 786 |                 if let Some(line) = record.line {
 787 |                     print!("{}:", line);
 788 |                 } else {
 789 |                     print!("?:");
 790 |                 }
 791 |                 if let Some(col) = record.column {
 792 |                     print!("{}", col);
 793 |                 } else {
 794 |                     print!("?");
 795 |                 }
 796 |                 print!("{}", dim.suffix());
 797 |                 println!();
 798 |             }
 799 |         }
 800 |         Ok(None) => {
 801 |             println!("no stack information available for address {addr:#x?}");
 802 |         }
 803 |         Err(e) => {
 804 |             println!("failed: {e}");
 805 |         }
 806 |     }
 807 | }
 808 | 
 809 | fn cmd_vars(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
 810 |     for (_id, v) in db.static_variables() {
 811 |         if !args.is_empty() && !v.name.contains(args) {
 812 |             continue;
 813 |         }
 814 | 
 815 |         println!("0x{:0width$x} {}: {}", v.location, v.name, NamedGoff(db, v.type_id),
 816 |             width = db.pointer_size() * 2);
 817 |     }
 818 | }
 819 | 
 820 | fn cmd_var(db: &debugdb::DebugDb, ctx: &mut Ctx, args: &str) {
 821 |     let results = db.static_variables_by_name(args).collect::<Vec<_>>();
 822 | 
 823 |     match results.len() {
 824 |         0 => println!("no variables found by that name"),
 825 |         1 => (),
 826 |         n => println!("note: {} variables found by that name", n),
 827 |     }
 828 | 
 829 |     for (_id, v) in results {
 830 |         println!("{} @ {}", v.name, Goff(v.offset));
 831 |         println!("- type: {}", NamedGoff(db, v.type_id));
 832 |         println!("- address: 0x{:x}", v.location);
 833 |         let Some(ty) = db.type_by_id(v.type_id) else { continue };
 834 | 
 835 |         match Value::from_state(&ctx.segments, v.location, db, ty) {
 836 |             Ok(v) => {
 837 |                 println!("- current contents: {}",
 838 |                     ValueWithDb(v, db));
 839 |             }
 840 |             Err(e) => {
 841 |                 println!("- unable to display: {e}");
 842 |             }
 843 |         }
 844 |     }
 845 | }
 846 | 
 847 | fn cmd_addr(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
 848 |     let addr = if let Some(rest) = args.strip_prefix("0x") {
 849 |         if let Ok(a) = u64::from_str_radix(rest, 16) {
 850 |             a
 851 |         } else {
 852 |             println!("can't parse {} as an address", args);
 853 |             return;
 854 |         }
 855 |     } else if let Ok(a) = args.parse::<u64>() {
 856 |         a
 857 |     } else {
 858 |         println!("can't parse {} as an address", args);
 859 |         return;
 860 |     };
 861 | 
 862 |     let es = db.entities_by_address(addr).collect::<Vec<_>>();
 863 | 
 864 |     match es.len() {
 865 |         0 => println!("Nothing known about address 0x{:x}.", addr),
 866 |         1 => (),
 867 |         n => println!("note: {} overlapping entities claim address 0x{:x}", n, addr),
 868 |     }
 869 | 
 870 |     let bold = ansi_term::Style::new().bold();
 871 |     let dim = ansi_term::Style::new().dimmed();
 872 | 
 873 |     for e in es {
 874 |         let offset = addr - e.range.start;
 875 |         print!("Offset +0x{:x} into ", offset);
 876 |         match e.entity {
 877 |             debugdb::EntityId::Var(vid) => {
 878 |                 let v = db.static_variable_by_id(vid).unwrap();
 879 |                 println!("static {}", bold.paint(&v.name));
 880 |                 println!("- range 0x{:x}..0x{:x}", 
 881 |                     e.range.start, e.range.end);
 882 |                 println!("- type {}", NamedGoff(db, v.type_id));
 883 | 
 884 |                 // Try to determine path within type.
 885 |                 offset_to_path(db, v.type_id, offset);
 886 |             }
 887 |             debugdb::EntityId::Prog(pid) => {
 888 |                 let p = db.subprogram_by_id(pid).unwrap();
 889 |                 if let Some(n) = &p.name {
 890 |                     println!("subprogram {}", bold.paint(n));
 891 |                 } else {
 892 |                     println!("subprogram {}", bold.paint("ANON"));
 893 |                 }
 894 |                 println!("- range 0x{:x}..0x{:x}", 
 895 |                     e.range.start, e.range.end);
 896 |                 match db.static_stack_for_pc(addr) {
 897 |                     Ok(Some(trc)) => {
 898 |                         println!("- stack fragment with inlines:");
 899 |                         for (i, record) in trc.iter().rev().enumerate() {
 900 |                             let subp = db.subprogram_by_id(record.subprogram).unwrap();
 901 | 
 902 |                             print!("    {:4}   ", i);
 903 |                             if let Some(n) = &subp.name {
 904 |                                 println!("{}", bold.paint(n));
 905 |                             } else {
 906 |                                 println!("{}", bold.paint("<unknown-subprogram>"));
 907 |                             }
 908 |                             print!("{}", dim.prefix());
 909 |                             print!("        {}:", record.file);
 910 |                             if let Some(line) = record.line {
 911 |                                 print!("{}:", line);
 912 |                             } else {
 913 |                                 print!("?:");
 914 |                             }
 915 |                             if let Some(col) = record.column {
 916 |                                 print!("{}", col);
 917 |                             } else {
 918 |                                 print!("?");
 919 |                             }
 920 |                             print!("{}", dim.suffix());
 921 |                             println!();
 922 |                         }
 923 |                     }
 924 |                     Ok(None) => {
 925 |                         println!("- no stack fragment is available");
 926 |                     }
 927 |                     Err(e) => {
 928 |                         println!("- could not get stack fragment: {}", e);
 929 |                     }
 930 |                 }
 931 |             }
 932 |         }
 933 |     }
 934 | }
 935 | 
 936 | fn offset_to_path(
 937 |     db: &debugdb::DebugDb,
 938 |     tid: TypeId,
 939 |     offset: u64,
 940 | ) {
 941 |     let t = db.type_by_id(tid).unwrap();
 942 |     match t {
 943 |         Type::Array(a) => {
 944 |             let et = db.type_by_id(a.element_type_id).unwrap();
 945 |             if let Some(esz) = et.byte_size(db) {
 946 |                 if esz > 0 {
 947 |                     let index = offset / esz;
 948 |                     let new_offset = offset % esz;
 949 |                     println!("  - index [{}] +0x{:x}", index, new_offset);
 950 |                     offset_to_path(db, a.element_type_id, new_offset);
 951 |                 }
 952 |             }
 953 |         }
 954 |         Type::Struct(s) => {
 955 |             // This is where an offsetof-to-member index would be convenient
 956 | 
 957 |             for m in &s.members {
 958 |                 if offset < m.location {
 959 |                     continue;
 960 |                 }
 961 |                 let new_offset = offset - m.location;
 962 |                 let mt = db.type_by_id(m.type_id).unwrap();
 963 |                 if let Some(msz) = mt.byte_size(db) {
 964 |                     if msz > 0 {
 965 |                         if let Some(n) = &m.name {
 966 |                             println!("  - .{} +0x{:x} (in {})", n, new_offset, s.name);
 967 |                         } else {
 968 |                             return;
 969 |                         }
 970 |                         offset_to_path(db, m.type_id, new_offset);
 971 |                         break;
 972 |                     }
 973 |                 }
 974 |             }
 975 |         }
 976 |         _ => (),
 977 |     }
 978 | }
 979 | 
 980 | fn cmd_unwind(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
 981 |     let addr = if let Some(rest) = args.strip_prefix("0x") {
 982 |         if let Ok(a) = u64::from_str_radix(rest, 16) {
 983 |             a
 984 |         } else {
 985 |             println!("can't parse {} as an address", args);
 986 |             return;
 987 |         }
 988 |     } else if let Ok(a) = args.parse::<u64>() {
 989 |         a
 990 |     } else {
 991 |         println!("can't parse {} as an address", args);
 992 |         return;
 993 |     };
 994 | 
 995 |     use gimli::UnwindSection;
 996 |     let mut ctx = gimli::UnwindContext::new();
 997 |     let bases = gimli::BaseAddresses::default();
 998 |     match db.debug_frame.unwind_info_for_address(&bases, &mut ctx, addr, gimli::DebugFrame::cie_from_offset) {
 999 |         Ok(ui) => {
1000 |             println!("saved args: {} bytes", ui.saved_args_size());
1001 |             print!("cfa: ");
1002 |             match ui.cfa() {
1003 |                 gimli::CfaRule::RegisterAndOffset { register, offset } => {
1004 |                     println!("reg #{}, offset {}", register.0, offset);
1005 |                 }
1006 |                 other => panic!("unsupported CFA rule type: {:?}", other),
1007 |             }
1008 |             for (n, rule) in ui.registers() {
1009 |                 print!("  caller reg #{} ", n.0);
1010 |                 match rule {
1011 |                     gimli::RegisterRule::Offset(n) => {
1012 |                         if *n < 0 {
1013 |                             println!("at CFA-{}", -n);
1014 |                         } else {
1015 |                             println!("at CFA+{}", n);
1016 |                         }
1017 |                     }
1018 |                     gimli::RegisterRule::ValOffset(n) => {
1019 |                         if *n < 0 {
1020 |                             println!("= CFA-{}", -n);
1021 |                         } else {
1022 |                             println!("= CFA+{}", n);
1023 |                         }
1024 |                     }
1025 |                     gimli::RegisterRule::SameValue => {
1026 |                         println!("preserved");
1027 |                     }
1028 |                     gimli::RegisterRule::Register(n) => {
1029 |                         println!("in reg# {}", n.0);
1030 |                     }
1031 |                     _ => println!("{:?}", rule),
1032 |                 }
1033 |             }
1034 |         }
1035 |         Err(e) => {
1036 |             println!("failed: {}", e);
1037 |         }
1038 |     }
1039 | }
1040 | 
1041 | fn struct_picture(db: &DebugDb, s: &Struct, width: usize) {
1042 |     struct_picture_inner(
1043 |         db,
1044 |         s.byte_size,
1045 |         s.members.iter().enumerate().map(|(i, m)| (i, m, true)),
1046 |         width,
1047 |     )
1048 | }
1049 | 
1050 | fn struct_picture_inner<'a, N: Eq + Clone + Display>(
1051 |     db: &DebugDb,
1052 |     byte_size: Option<u64>,
1053 |     members: impl IntoIterator<Item = (N, &'a Member, bool)>,
1054 |     width: usize,
1055 | ) {
1056 |     let Some(size) = byte_size else {
1057 |         println!("type has no size");
1058 |         return;
1059 |     };
1060 | 
1061 |     if size == 0 {
1062 |         println!("(type is 0 bytes long)");
1063 |         return;
1064 |     }
1065 | 
1066 |     let mut member_spans: RangeMap<u64, N> = RangeMap::new();
1067 |     let mut member_labels = vec![];
1068 |     for (i, m, in_legend) in members {
1069 |         if in_legend {
1070 |             member_labels.push({
1071 |                 let label = if db.type_by_id(m.type_id).unwrap().byte_size(db) == Some(0) {
1072 |                     "(ZST)".to_string()
1073 |                 } else {
1074 |                     i.to_string()
1075 |                 };
1076 | 
1077 |                 let name = if let Some(name) = &m.name {
1078 |                     name.as_str()
1079 |                 } else {
1080 |                     "_"
1081 |                 };
1082 |                 if label == name {
1083 |                     format!("{name}: {}", NamedGoff(db, m.type_id))
1084 |                 } else {
1085 |                     format!("{label} = {name}: {}", NamedGoff(db, m.type_id))
1086 |                 }
1087 |             });
1088 |         }
1089 |         let offset = m.location;
1090 |         let Some(size) = db.type_by_id(m.type_id).unwrap().byte_size(db) else {
1091 |             continue;
1092 |         };
1093 |         if size != 0 {
1094 |             member_spans.insert(offset..offset + size, i);
1095 |         }
1096 |     }
1097 | 
1098 |     byte_picture(size, width, |off| {
1099 |         member_spans.get(&off).map(|x| x.to_string())
1100 |     });
1101 |     if !member_labels.is_empty() {
1102 |         println!("     where:");
1103 |         for label in member_labels {
1104 |             println!("       {label}");
1105 |         }
1106 |     }
1107 | }
1108 | 
1109 | fn enum_picture(db: &DebugDb, s: &Enum, width: usize) {
1110 |     let Some(size) = s.byte_size else {
1111 |         println!("type has no size");
1112 |         return;
1113 |     };
1114 | 
1115 |     if size == 0 {
1116 |         println!("(type is 0 bytes long)");
1117 |         return;
1118 |     }
1119 | 
1120 |     println!();
1121 | 
1122 |     match &s.shape {
1123 |         VariantShape::Zero => {
1124 |             println!("this enum is empty and cannot be diagrammed.");
1125 |         }
1126 |         VariantShape::One(_v) => {
1127 |             println!("this enum has only one variant (TODO)");
1128 |         }
1129 |         VariantShape::Many { member, .. } => {
1130 |             let Some(dlen) = db.type_by_id(member.type_id).unwrap().byte_size(db) else {
1131 |                 println!("discriminator type has no size?");
1132 |                 return;
1133 |             };
1134 |             let drange = member.location .. member.location + dlen;
1135 |             println!("Discriminator position:");
1136 |             byte_picture(size, width, |off| {
1137 |                 if drange.contains(&off) {
1138 |                     Some("DISC".to_string())
1139 |                 } else {
1140 |                     Some("body".to_string())
1141 |                 }
1142 |             });
1143 |             /*
1144 |             for (disc, var) in variants {
1145 |                 let show_disc = if let Some(v) = disc {
1146 |                     print!("DISC == {v:#x} => body: ");
1147 |                     true
1148 |                 } else {
1149 |                     print!("else => body: ");
1150 |                     false
1151 |                 };
1152 |                 println!("{}", NamedGoff(db, var.member.type_id));
1153 |                 let vt = db.type_by_id(var.member.type_id).unwrap();
1154 |                 match vt {
1155 |                     Type::Struct(s) => {
1156 |                         let mut all_members = vec![];
1157 |                         if show_disc {
1158 |                             all_members.push(("DISC", member, false));
1159 |                         }
1160 |                         all_members.extend(
1161 |                             s.members.iter().map(|(n, m)| {
1162 |                                 let mut n = n.as_str();
1163 |                                 if n.len() > 6 {
1164 |                                     n = &n[..6];
1165 |                                 }
1166 | 
1167 |                                 (n, m, true)
1168 |                             })
1169 |                         );
1170 |                         struct_picture_inner(db, s.byte_size, all_members, width);
1171 |                     },
1172 |                     _ => println!("(can't display non-struct)"),
1173 |                 }
1174 |             }
1175 |             */
1176 |         }
1177 |     }
1178 | }
1179 | 
1180 | fn byte_picture(
1181 |     size: u64,
1182 |     width: usize,
1183 |     owner: impl Fn(u64) -> Option<String>,
1184 | ) {
1185 |     let width = width as u64;
1186 |     print!("      ");
1187 |     for byte in 0..u64::min(size, width) {
1188 |         print!(" {byte:^6}");
1189 |     }
1190 |     println!();
1191 | 
1192 |     let wordcount = (size + (width - 1)) / width;
1193 |     let mut current = None;
1194 |     let mut above = vec![None; width as usize];
1195 |     for word in 0..wordcount {
1196 |         print!("     +");
1197 |         for byte in 0..width {
1198 |             let n = owner(word * width + byte);
1199 |             if above[byte as usize] == Some(n) {
1200 |                 print!("      +");
1201 |             } else {
1202 |                 print!("------+");
1203 |             }
1204 |         }
1205 |         println!();
1206 | 
1207 |         print!("{:04x} |", word * width);
1208 |         for byte in 0..width {
1209 |             let off = word * width + byte;
1210 |             let n = owner(off);
1211 |             if Some(&n) != current.as_ref() {
1212 |                 if byte != 0 {
1213 |                     print!("|");
1214 |                 }
1215 |                 if let Some(i) = &n {
1216 |                     print!("{:^6}", i);
1217 |                 } else if off < size {
1218 |                     print!(" pad  ");
1219 |                 } else {
1220 |                     print!("      ");
1221 |                 }
1222 |                 current = Some(n.clone());
1223 |             } else {
1224 |                 if byte != 0 {
1225 |                     print!(" ");
1226 |                 }
1227 |                 print!("      ");
1228 |             }
1229 | 
1230 |             if byte == width - 1 {
1231 |                 if off < size {
1232 |                     println!("|");
1233 |                 } else {
1234 |                     println!();
1235 |                 }
1236 |             }
1237 | 
1238 |             above[byte as usize] = Some(n);
1239 |         }
1240 |     }
1241 |     print!("     +");
1242 |     let final_bar = if size % width == 0 { width } else { size % width };
1243 |     for _ in 0..final_bar {
1244 |         print!("------+");
1245 |     }
1246 |     println!();
1247 | }
1248 | 
1249 | fn cmd_decode(db: &debugdb::DebugDb, ctx: &mut Ctx, args: &str) {
1250 |     let (addrstr, typestr) = if let Some(space) = args.find(' ') {
1251 |         args.split_at(space)
1252 |     } else {
1253 |         println!("usage: decode [addr] [typename blah blah]");
1254 |         return;
1255 |     };
1256 |     let addr = match parse_int::parse::<u64>(addrstr) {
1257 |         Ok(x) => x,
1258 |         Err(e) => {
1259 |             println!("bad address: {e}");
1260 |             return;
1261 |         }
1262 |     };
1263 |     let types: Vec<_> = match parse_type_name(typestr.trim()) {
1264 |         None => return,
1265 |         Some(ParsedTypeName::Name(n)) => {
1266 |             db.types_by_name(n).collect()
1267 |         }
1268 |         Some(ParsedTypeName::Goff(o)) => {
1269 |             db.type_by_id(o).into_iter()
1270 |                 .map(|t| (o, t))
1271 |                 .collect()
1272 |         }
1273 |     };
1274 | 
1275 |     let many = match types.len() {
1276 |         0 => {
1277 |             println!("{}", ansi_term::Colour::Red.paint("No types found."));
1278 |             return;
1279 |         }
1280 |         1 => false,
1281 |         n => {
1282 |             println!("{}{} types found with that name:",
1283 |                 ansi_term::Color::Yellow.paint("note: "),
1284 |                 n,
1285 |             );
1286 |             true
1287 |         }
1288 |     };
1289 | 
1290 |     for (goff, t) in types {
1291 |         if many { println!() }
1292 |         println!("{}: ", NamedGoff(db, goff));
1293 |         match Value::from_state(&ctx.segments, addr, db, t) {
1294 |             Ok(v) => {
1295 |                 println!("{}", ValueWithDb(v, db));
1296 |             }
1297 |             Err(e) => {
1298 |                 println!("could not parse as this type: {e}");
1299 |             }
1300 |         }
1301 |     }
1302 | }
1303 | 
1304 | fn cmd_decode_async(db: &debugdb::DebugDb, ctx: &mut Ctx, args: &str) {
1305 |     let (addrstr, typestr) = if let Some(space) = args.find(' ') {
1306 |         args.split_at(space)
1307 |     } else {
1308 |         println!("usage: decode-async [addr] [typename blah blah]");
1309 |         return;
1310 |     };
1311 |     let addr = match parse_int::parse::<u64>(addrstr) {
1312 |         Ok(x) => x,
1313 |         Err(e) => {
1314 |             println!("bad address: {e}");
1315 |             return;
1316 |         }
1317 |     };
1318 |     let types: Vec<_> = match parse_type_name(typestr.trim()) {
1319 |         None => return,
1320 |         Some(ParsedTypeName::Name(n)) => {
1321 |             db.types_by_name(n).collect()
1322 |         }
1323 |         Some(ParsedTypeName::Goff(o)) => {
1324 |             db.type_by_id(o).into_iter()
1325 |                 .map(|t| (o, t))
1326 |                 .collect()
1327 |         }
1328 |     };
1329 | 
1330 |     let many = match types.len() {
1331 |         0 => {
1332 |             println!("{}", ansi_term::Colour::Red.paint("No types found."));
1333 |             return;
1334 |         }
1335 |         1 => false,
1336 |         n => {
1337 |             println!("{}{} types found with that name:",
1338 |                 ansi_term::Color::Yellow.paint("note: "),
1339 |                 n,
1340 |             );
1341 |             true
1342 |         }
1343 |     };
1344 | 
1345 |     for (goff, t) in types {
1346 |         if many { println!() }
1347 |         println!("{}: ", NamedGoff(db, goff));
1348 |         let mut v = &match Value::from_state(&ctx.segments, addr, db, t) {
1349 |             Ok(v) => v,
1350 |             Err(e) => {
1351 |                 println!("could not parse as this type: {e}");
1352 |                 return;
1353 |             }
1354 |         };
1355 |         let parts = Regex::new(r#"^(.*)::\{async_fn_env#0\}(<.*)?$"#).unwrap();
1356 |         let suspend_state = Regex::new(r#"::Suspend([0-9]+)$"#).unwrap();
1357 |         let mut first = true;
1358 |         let bold = ansi_term::Style::new().bold();
1359 |         loop {
1360 |             if !first {
1361 |                 print!("waiting on: ");
1362 |             }
1363 |             first = false;
1364 |             let Value::Enum(e) = v else {
1365 |                 println!("{}hand-rolled future{}", bold.prefix(), bold.suffix());
1366 |                 println!("    type: {}", v.type_name());
1367 |                 break;
1368 |             };
1369 |             let Some(parts) = parts.captures(&e.name) else {
1370 |                 println!("(name is weird for an async fn env)");
1371 |                 break;
1372 |             };
1373 |             let name = &parts[1];
1374 |             let parms = parts.get(2).map(|m| m.as_str()).unwrap_or("");
1375 |             println!("async fn {}{name}{parms}{}", bold.prefix(), bold.suffix());
1376 |             let state = &e.disc;
1377 |             let state_name = &e.value.name;
1378 | 
1379 |             if state_name.ends_with("Unresumed") {
1380 |                 println!("    future has not yet been polled");
1381 |                 break;
1382 |             } else if state_name.ends_with("Returned") {
1383 |                 println!("    future has already resolved");
1384 |                 break;
1385 |             } else if state_name.ends_with("Panicked") {
1386 |                 println!("    future panicked on previous poll");
1387 |                 break;
1388 |             } else if let Some(sc) = suspend_state.captures(state_name) {
1389 |                 if let Ok(n) = sc[1].parse::<usize>() {
1390 |                     println!("    suspended at await point {n}");
1391 |                 } else {
1392 |                     println!("    unrecognized state {state}: {state_name}");
1393 |                 }
1394 |             } else {
1395 |                 println!("    unrecognized state {state}: {state_name}");
1396 |             }
1397 | 
1398 |             let mut awaitees = e.value.members_named("__awaitee");
1399 |             let Some(awaitee) = awaitees.next() else {
1400 |                 println!(" (stopped unexpectedly)");
1401 |                 break;
1402 |             };
1403 |             if awaitees.next().is_some() {
1404 |                 println!(" (multiple __awaitee fields)");
1405 |                 break;
1406 |             }
1407 |             v = awaitee;
1408 |         }
1409 |     }
1410 | }
1411 | 
1412 | fn cmd_decode_blob(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
1413 |     let type_name = args.trim();
1414 |     let types: Vec<_> = match parse_type_name(type_name) {
1415 |         None => return,
1416 |         Some(ParsedTypeName::Name(n)) => {
1417 |             db.types_by_name(n).collect()
1418 |         }
1419 |         Some(ParsedTypeName::Goff(o)) => {
1420 |             db.type_by_id(o).into_iter()
1421 |                 .map(|t| (o, t))
1422 |                 .collect()
1423 |         }
1424 |     };
1425 | 
1426 |     let many = match types.len() {
1427 |         0 => {
1428 |             println!("{}", ansi_term::Colour::Red.paint("No types found."));
1429 |             return;
1430 |         }
1431 |         1 => false,
1432 |         n => {
1433 |             println!("{}{} types found with that name:",
1434 |                 ansi_term::Color::Yellow.paint("note: "),
1435 |                 n,
1436 |             );
1437 |             true
1438 |         }
1439 |     };
1440 | 
1441 |     println!("Paste hex-encoded memory blob. Whitespace OK.");
1442 |     println!("Address prefix ending in colon will be removed.");
1443 |     println!("Enter a blank line to end.");
1444 | 
1445 |     let stdin = std::io::stdin().lock();
1446 |     let mut img = vec![];
1447 |     for line in stdin.lines() {
1448 |         let line = match line {
1449 |             Err(e) => {
1450 |                 println!("input error: {e}");
1451 |                 return;
1452 |             }
1453 |             Ok(v) => v,
1454 |         };
1455 |         let mut line = line.trim();
1456 |         if line.is_empty() {
1457 |             break;
1458 |         }
1459 |         if let Some(colon) = line.find(':') {
1460 |             line = &line.split_at(colon).1[1..];
1461 |         }
1462 | 
1463 |         let mut hexits = vec![];
1464 |         for b in line.bytes() {
1465 |             match b {
1466 |                 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
1467 |                     hexits.push(b);
1468 |                 }
1469 |                 b' ' | b'\t' | b'\r' | b'\n' => (),
1470 |                 _ => {
1471 |                     println!("unexpected byte in input: {b:#x?}");
1472 |                     return;
1473 |                 }
1474 |             }
1475 |         }
1476 | 
1477 |         let bytes = hexits.chunks_exact(2)
1478 |             .map(|chunk| u8::from_str_radix(std::str::from_utf8(chunk).unwrap(), 16))
1479 |             .collect::<Result<Vec<_>, _>>();
1480 |         match bytes {
1481 |             Err(e) => {
1482 |                 println!("couldn't parse that: {e}");
1483 |                 return;
1484 |             }
1485 |             Ok(b) => img.extend(b),
1486 |         }
1487 |     }
1488 | 
1489 |     for (goff, t) in types {
1490 |         if many { println!() }
1491 |         println!("{}: ", NamedGoff(db, goff));
1492 |         let Some(size) = t.byte_size(db) else {
1493 |             println!("  (type is unsized, cannot decode)");
1494 |             continue;
1495 |         };
1496 |         let Ok(size) = usize::try_from(size) else {
1497 |             println!("  (type too big for this platform)");
1498 |             continue;
1499 |         };
1500 |         let mut this_img = img.clone();
1501 |         if size > this_img.len() {
1502 |             println!("(padding entered data to {size} bytes)");
1503 |             this_img.resize(size, 0);
1504 |         }
1505 |         let machine = ImgMachine::new(this_img);
1506 |         match Value::from_state(&machine, 0, db, t) {
1507 |             Ok(v) => {
1508 |                 println!("{}", ValueWithDb(v, db));
1509 |             }
1510 |             Err(e) => {
1511 |                 println!("could not parse as this type: {e}");
1512 |             }
1513 |         }
1514 |     }
1515 | }
1516 | 
1517 | fn cmd_decode_async_blob(db: &debugdb::DebugDb, _ctx: &mut Ctx, args: &str) {
1518 |     let type_name = args.trim();
1519 |     let types: Vec<_> = match parse_type_name(type_name) {
1520 |         None => return,
1521 |         Some(ParsedTypeName::Name(n)) => {
1522 |             db.types_by_name(n).collect()
1523 |         }
1524 |         Some(ParsedTypeName::Goff(o)) => {
1525 |             db.type_by_id(o).into_iter()
1526 |                 .map(|t| (o, t))
1527 |                 .collect()
1528 |         }
1529 |     };
1530 | 
1531 |     let many = match types.len() {
1532 |         0 => {
1533 |             println!("{}", ansi_term::Colour::Red.paint("No types found."));
1534 |             return;
1535 |         }
1536 |         1 => false,
1537 |         n => {
1538 |             println!("{}{} types found with that name:",
1539 |                 ansi_term::Color::Yellow.paint("note: "),
1540 |                 n,
1541 |             );
1542 |             true
1543 |         }
1544 |     };
1545 | 
1546 |     println!("Paste hex-encoded memory blob. Whitespace OK.");
1547 |     println!("Address prefix ending in colon will be removed.");
1548 |     println!("Enter a blank line to end.");
1549 | 
1550 |     let stdin = std::io::stdin().lock();
1551 |     let mut img = vec![];
1552 |     for line in stdin.lines() {
1553 |         let line = match line {
1554 |             Err(e) => {
1555 |                 println!("input error: {e}");
1556 |                 return;
1557 |             }
1558 |             Ok(v) => v,
1559 |         };
1560 |         let mut line = line.trim();
1561 |         if line.is_empty() {
1562 |             break;
1563 |         }
1564 |         if let Some(colon) = line.find(':') {
1565 |             line = &line.split_at(colon).1[1..];
1566 |         }
1567 | 
1568 |         let mut hexits = vec![];
1569 |         for b in line.bytes() {
1570 |             match b {
1571 |                 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
1572 |                     hexits.push(b);
1573 |                 }
1574 |                 b' ' | b'\t' | b'\r' | b'\n' => (),
1575 |                 _ => {
1576 |                     println!("unexpected byte in input: {b:#x?}");
1577 |                     return;
1578 |                 }
1579 |             }
1580 |         }
1581 | 
1582 |         let bytes = hexits.chunks_exact(2)
1583 |             .map(|chunk| u8::from_str_radix(std::str::from_utf8(chunk).unwrap(), 16))
1584 |             .collect::<Result<Vec<_>, _>>();
1585 |         match bytes {
1586 |             Err(e) => {
1587 |                 println!("couldn't parse that: {e}");
1588 |                 return;
1589 |             }
1590 |             Ok(b) => img.extend(b),
1591 |         }
1592 |     }
1593 | 
1594 |     for (goff, t) in types {
1595 |         if many { println!() }
1596 |         println!("{}: ", NamedGoff(db, goff));
1597 |         let Some(size) = t.byte_size(db) else {
1598 |             println!("  (type is unsized, cannot decode)");
1599 |             continue;
1600 |         };
1601 |         let Ok(size) = usize::try_from(size) else {
1602 |             println!("  (type too big for this platform)");
1603 |             continue;
1604 |         };
1605 |         let mut this_img = img.clone();
1606 |         if size > this_img.len() {
1607 |             println!("(padding entered data to {size} bytes)");
1608 |             this_img.resize(size, 0);
1609 |         }
1610 |         let machine = ImgMachine::new(this_img);
1611 |         let mut v = &match Value::from_state(&machine, 0, db, t) {
1612 |             Ok(v) => v,
1613 |             Err(e) => {
1614 |                 println!("could not parse as this type: {e}");
1615 |                 return;
1616 |             }
1617 |         };
1618 |         let parts = Regex::new(r#"^(.*)::\{async_fn_env#0\}(<.*)?$"#).unwrap();
1619 |         let suspend_state = Regex::new(r#"::Suspend([0-9]+)$"#).unwrap();
1620 |         let mut first = true;
1621 |         loop {
1622 |             if !first {
1623 |                 print!("waiting on: ");
1624 |             }
1625 |             first = false;
1626 |             let Value::Enum(e) = v else {
1627 |                 println!("hand-rolled future");
1628 |                 println!("    type: {}", v.type_name());
1629 |                 break;
1630 |             };
1631 |             let Some(parts) = parts.captures(&e.name) else {
1632 |                 println!("(name is weird for an async fn env)");
1633 |                 break;
1634 |             };
1635 |             let name = &parts[1];
1636 |             let parms = parts.get(2).map(|m| m.as_str()).unwrap_or("");
1637 |             println!("async fn {name}{parms}");
1638 |             let state = &e.disc;
1639 |             let state_name = &e.value.name;
1640 | 
1641 |             if state_name.ends_with("Unresumed") {
1642 |                 println!("    future has not yet been polled");
1643 |                 break;
1644 |             } else if state_name.ends_with("Returned") {
1645 |                 println!("    future has already resolved");
1646 |                 break;
1647 |             } else if state_name.ends_with("Panicked") {
1648 |                 println!("    future panicked on previous poll");
1649 |                 break;
1650 |             } else if let Some(sc) = suspend_state.captures(state_name) {
1651 |                 if let Ok(n) = sc[1].parse::<usize>() {
1652 |                     println!("    suspended at await point {n}");
1653 |                 } else {
1654 |                     println!("    unrecognized state {state}: {state_name}");
1655 |                 }
1656 |             } else {
1657 |                 println!("    unrecognized state {state}: {state_name}");
1658 |             }
1659 | 
1660 |             let mut awaitees = e.value.members_named("__awaitee");
1661 |             let Some(awaitee) = awaitees.next() else {
1662 |                 println!(" (stopped unexpectedly)");
1663 |                 break;
1664 |             };
1665 |             if awaitees.next().is_some() {
1666 |                 println!(" (multiple __awaitee fields)");
1667 |                 break;
1668 |             }
1669 |             v = awaitee;
1670 |         }
1671 |     }
1672 | }
1673 | 
1674 | 
1675 | fn cmd_load(
1676 |     _db: &debugdb::DebugDb,
1677 |     ctx: &mut Ctx,
1678 |     args: &str,
1679 | ) {
1680 |     let args = args.trim();
1681 |     let words = args.split_whitespace().collect::<Vec<_>>();
1682 |     if words.len() != 2 {
1683 |         println!("usage: load [filename] [address]");
1684 |         return;
1685 |     }
1686 |     let filename = words[0];
1687 |     let address = match parse_int::parse::<u64>(words[1]) {
1688 |         Ok(a) => a,
1689 |         Err(e) => {
1690 |             println!("bad address: {e}");
1691 |             return;
1692 |         }
1693 |     };
1694 | 
1695 |     let image = match std::fs::read(filename) {
1696 |         Ok(bytes) => bytes,
1697 |         Err(e) => {
1698 |             println!("unable to read file: {e}");
1699 |             return;
1700 |         }
1701 |     };
1702 | 
1703 |     let end = address + u64::try_from(image.len()).unwrap();
1704 | 
1705 |     ctx.segments.insert(address..=end, image);
1706 | }
1707 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Collects debug information from a program into a queryable, cross-referenced
  2 | //! form.
  3 | 
  4 | pub mod load;
  5 | pub mod value;
  6 | pub mod model;
  7 | pub mod unify;
  8 | 
  9 | mod dwarf_parser;
 10 | 
 11 | use crate::unify::Unify;
 12 | use crate::dwarf_parser::ParseError;
 13 | 
 14 | pub use self::model::*;
 15 | 
 16 | use object::{Object, ObjectSection, ObjectSymbol};
 17 | use thiserror::Error;
 18 | use std::borrow::Cow;
 19 | use std::collections::{BTreeMap, BTreeSet};
 20 | use std::convert::Infallible;
 21 | use std::sync::Arc;
 22 | 
 23 | // Internal type abbreviations
 24 | type BTreeIndex<I, K> = BTreeMap<K, BTreeSet<I>>;
 25 | type RtArcReader = gimli::EndianReader<gimli::RunTimeEndian, Arc<[u8]>>;
 26 | 
 27 | /// A database of information extracted from the debug info of a program.
 28 | ///
 29 | /// This is primarily focused on correctly representing Rust programs, but it
 30 | /// can represent a large subset of C types as a side effect -- currently only
 31 | /// unnamed types present a problem. This could be fixed.
 32 | #[derive(Clone, Debug)]
 33 | pub struct DebugDb {
 34 |     /// Endianness of the target system.
 35 |     endian: gimli::RunTimeEndian,
 36 |     /// Pointer width of the target system. Currently only 32 and 64 are
 37 |     /// supported here.
 38 |     is_64: bool,
 39 | 
 40 |     /// All types in the program, indexed by location in the debug section(s).
 41 |     ///
 42 |     /// This is the authoritative set of types, other type-related fields index
 43 |     /// into this.
 44 |     ///
 45 |     /// Invariant: within each entry, the key is the same as the type's `offset`
 46 |     /// field.
 47 |     types: BTreeMap<TypeId, Type>,
 48 | 
 49 |     /// Type canonicalization relationships. If a type ID is present as a key in
 50 |     /// this map, then it is _not_ the canonical instance of its type, and
 51 |     /// should be replaced by the corresponding value in the map for analysis
 52 |     /// purposes.
 53 |     type_canon: BTreeMap<TypeId, TypeId>,
 54 | 
 55 |     /// Reverse type canonicalization relationship. Each key in this map is the
 56 |     /// ID of a canonical instance of a family of types, and the value lists
 57 |     /// those types.
 58 |     type_rcanon: BTreeMap<TypeId, BTreeSet<TypeId>>,
 59 | 
 60 |     /// Index: type name to location(s) that can be looked up in `types`.
 61 |     ///
 62 |     /// Invariant: all string keys correspond to names of types in `types`.
 63 |     ///
 64 |     /// Invariant: all UnitSectionOffset values have corresponding entries in
 65 |     /// `types`.
 66 |     type_name_index: BTreeIndex<TypeId, String>,
 67 | 
 68 |     /// Index: array element type and size to location(s) in `types`. Since
 69 |     /// arrays do not have names in DWARF, they can't be looked up in the
 70 |     /// `type_name_index`.
 71 |     array_index: BTreeIndex<TypeId, (TypeId, Option<u64>)>,
 72 | 
 73 |     /// Index: subroutine argument and return types to location(s) in `types`.
 74 |     /// Since subroutine types do not have names in DWARF, they can't be looked
 75 |     /// up in the `type_name_index`.
 76 |     ///
 77 |     /// The specific structure here is a nested map: argument types -> return
 78 |     /// type -> type goffs. This allows the first lookup to happen with a slice,
 79 |     /// thanks to the `Borrow` trait, which would not be possible if the key
 80 |     /// were instead a `(Vec<Goff>, Option<Goff>)`.
 81 |     ///
 82 |     /// Note that this is subroutine _types,_ not subprograms.
 83 |     subroutine_index: BTreeMap<Vec<TypeId>, BTreeIndex<TypeId, Option<TypeId>>>,
 84 | 
 85 |     /// All subprograms, indexed by location in the debug section(s).
 86 |     subprograms: BTreeMap<ProgramId, Subprogram>,
 87 | 
 88 |     /// Mapping of text address to line number information.
 89 |     line_table: BTreeMap<u64, Vec<LineNumberRow>>,
 90 | 
 91 |     /// All static variables, indexed by ID.
 92 |     variables: BTreeMap<VarId, StaticVariable>,
 93 | 
 94 |     /// Index: static variables by name.
 95 |     variables_by_name: BTreeIndex<VarId, String>,
 96 | 
 97 |     /// All entities with fixed addresses, indexed by base address.
 98 |     entities_by_address: BTreeMap<u64, Vec<AddressRange>>,
 99 | 
100 |     // TODO
101 |     pub debug_frame: gimli::DebugFrame<gimli::EndianReader<gimli::RunTimeEndian, Arc<[u8]>>>,
102 | 
103 |     raw_symbols_by_address: BTreeMap<u64, BTreeSet<String>>,
104 |     raw_symbols_by_name: BTreeMap<String, BTreeSet<u64>>,
105 | }
106 | 
107 | impl DebugDb {
108 |     /// Gets the endianness of the program.
109 |     pub fn endian(&self) -> gimli::RunTimeEndian {
110 |         self.endian
111 |     }
112 | 
113 |     /// Gets the size of a pointer in the program, in bytes.
114 |     pub fn pointer_size(&self) -> usize {
115 |         if self.is_64 {
116 |             8
117 |         } else {
118 |             4
119 |         }
120 |     }
121 | 
122 |     /// Returns the number of types in the debug info.
123 |     pub fn type_count(&self) -> usize {
124 |         self.types.len()
125 |     }
126 | 
127 |     /// Produces an iterator over all types defined in the debug info, together
128 |     /// with their IDs.
129 |     pub fn types(
130 |         &self,
131 |     ) -> impl Iterator<Item = (TypeId, &Type)> + '_ {
132 |         self.types.iter().map(|(&id, ty)| (id, ty))
133 |     }
134 | 
135 |     /// Produces an iterator over all canonical types defined in the debug info,
136 |     /// together with their IDs.
137 |     pub fn canonical_types(
138 |         &self,
139 |     ) -> impl Iterator<Item = (TypeId, &Type)> + '_ {
140 |         self.types()
141 |             .filter(move |(tid, _t)| !self.type_canon.contains_key(tid))
142 |     }
143 | 
144 |     pub fn aliases_of_type(&self, id: TypeId) -> Option<&BTreeSet<TypeId>> {
145 |         self.type_rcanon.get(&id)
146 |     }
147 | 
148 |     /// Looks up the type with the given ID.
149 |     ///
150 |     /// If you got `id` from this instance, our consistency invariant ensures
151 |     /// that the result will be `Some`. If `id` is from another instance, or
152 |     /// made up, you may get `None`.
153 |     pub fn type_by_id(
154 |         &self,
155 |         id: TypeId,
156 |     ) -> Option<&Type> {
157 |         self.types.get(&id)
158 |     }
159 | 
160 |     /// Shorthand for looking up the name of a type.
161 |     ///
162 |     /// Note that not all types have names, so this may return `None` even if
163 |     /// the type exists.
164 |     pub fn type_name(
165 |         &self,
166 |         id: TypeId,
167 |     ) -> Option<Cow<'_, str>> {
168 |         Some(self.type_by_id(id)?.name(self))
169 |     }
170 | 
171 |     /// Consults the type-name index and returns an iterator over types with a
172 |     /// given name.
173 |     ///
174 |     /// Names are matched in their entirety, e.g. the name `"Option"` does not
175 |     /// match a type `"core::option::Option<u16>"`.
176 |     ///
177 |     /// Not all types are in the type name index. In particular, array types and
178 |     /// subroutine types.
179 |     pub fn types_by_name(
180 |         &self,
181 |         name: &str,
182 |     ) -> impl Iterator<Item = (TypeId, &Type)> + '_ {
183 |         self.consult_index(&self.type_name_index, name)
184 |     }
185 | 
186 |     /// Consults the array index and returns an iterator over array types with a
187 |     /// particular shape.
188 |     pub fn array_types(
189 |         &self,
190 |         element: TypeId,
191 |         count: Option<u64>,
192 |     ) -> impl Iterator<Item = (TypeId, &Type)> + '_ {
193 |         self.consult_index(&self.array_index, &(element, count))
194 |     }
195 | 
196 |     /// Consults the subroutine index and returns an iterator over subroutine
197 |     /// types with a particular shape.
198 |     ///
199 |     /// The return type is optional because, in both C and Rust, DWARF will omit
200 |     /// the return type for subroutines returning `void` / `()`. As a result,
201 |     /// looking up subroutines returning the `()` type will not produce results.
202 |     pub fn subroutine_types(
203 |         &self,
204 |         argument_tys: &[TypeId],
205 |         return_ty: Option<TypeId>,
206 |     ) -> impl Iterator<Item = (TypeId, &Type)> + '_ {
207 |         self.subroutine_index
208 |             .get(argument_tys)
209 |             .into_iter()
210 |             .flat_map(move |index|
211 |                 self.consult_index(index, &return_ty)
212 |             )
213 |     }
214 | 
215 |     /// Returns an iterator over all subprograms defined in this program.
216 |     pub fn subprograms(
217 |         &self,
218 |     ) -> impl Iterator<Item = (ProgramId, &Subprogram)> + '_ {
219 |         self.subprograms.iter().map(|(&goff, ty)| (goff, ty))
220 |     }
221 | 
222 |     /// Looks up a subprogram given its `ProgramId`.
223 |     pub fn subprogram_by_id(
224 |         &self,
225 |         pid: ProgramId,
226 |     ) -> Option<&Subprogram> {
227 |         self.subprograms.get(&pid)
228 |     }
229 | 
230 |     /// Returns an iterator over _all_ rows in the computed line number table.
231 |     ///
232 |     /// You probably don't want to do this.
233 |     pub fn line_table_rows(
234 |         &self,
235 |     ) -> impl Iterator<Item = (u64, &[LineNumberRow])> + '_ {
236 |         self.line_table.iter().map(|(&a, row)| (a, &**row))
237 |     }
238 | 
239 |     /// Looks up the line number table entry associated with `pc`.
240 |     pub fn lookup_line_row(
241 |         &self,
242 |         pc: u64,
243 |     ) -> Option<&LineNumberRow> {
244 |         self.line_table.range(..=pc)
245 |             .rev()
246 |             .flat_map(|(_, rows)| rows)
247 |             .take_while(move |row| row.pc_range.end > pc)
248 |             .find(move |row| row.pc_range.contains(&pc))
249 |     }
250 | 
251 |     /// Computes the static stack slice implied by a PC value.
252 |     ///
253 |     /// For simple cases of subroutines without inlined code, the stack slice
254 |     /// contains a single entry describing the subroutine and the line number
255 |     /// within it corresponding to the PC.
256 |     ///
257 |     /// For more complex cases involving inlines, possibly multiple layers of
258 |     /// inlines, the stack slice will be deeper. In this case, the last element
259 |     /// of the returned vec is the _innermost_ inline, and the first element is
260 |     /// the enclosing (non-inlined) subprogram.
261 |     pub fn static_stack_for_pc(
262 |         &self,
263 |         pc: u64,
264 |     ) -> Result<Option<Vec<PcInfo>>, ParseError> {
265 |         // Find subprogram containing PC.
266 |         let Some((pid, subp)) = self.subprograms()
267 |             .find(|(_, subp)| subp.pc_range
268 |                 .as_ref()
269 |                 .map(|r| r.contains(&pc))
270 |                 .unwrap_or(false))
271 |             else { return Ok(None); };
272 | 
273 |         let mut frag = vec![];
274 | 
275 |         // Follow inlined subroutine tree to the tip, recording call info at
276 |         // each step.
277 |         let mut enclosing_prog = pid;
278 |         let mut inlines = Some(&subp.inlines);
279 |         'inline_loop:
280 |             while let Some(inl) = inlines.take() {
281 |                 for inlsub in inl {
282 |                     for pcr in &inlsub.pc_ranges {
283 |                         if pcr.begin <= pc && pc < pcr.end {
284 |                             // We're in this one.
285 |                             if let Some(file) = &inlsub.call_coord.file {
286 |                                 frag.push(PcInfo {
287 |                                     subprogram: enclosing_prog,
288 |                                     file: file.clone(),
289 |                                     line: inlsub.call_coord.line,
290 |                                     column: inlsub.call_coord.column,
291 |                                 });
292 | 
293 |                                 enclosing_prog = ProgramId(
294 |                                     inlsub.abstract_origin
295 |                                     .expect("inlined sub w/o abstract_origin")
296 |                                 );
297 |                                 inlines = Some(&inlsub.inlines);
298 |                                 continue 'inline_loop;
299 |                             }
300 |                         }
301 |                     }
302 |                 }
303 |             }
304 | 
305 |         // Finally, find the innermost record from the line number info.
306 |         if let Some(row) = self.lookup_line_row(pc) {
307 |             frag.push(PcInfo {
308 |                 subprogram: enclosing_prog,
309 |                 file: row.file.clone(),
310 |                 line: row.line,
311 |                 column: row.column,
312 |             });
313 |         }
314 | 
315 |         Ok(Some(frag))
316 |     }
317 | 
318 |     pub fn unique_raw_symbol_by_name(
319 |         &self,
320 |         name: &str,
321 |     ) -> Option<u64> {
322 |         let addresses = self.raw_symbols_by_name.get(name)?;
323 |         let mut i = addresses.iter().cloned();
324 |         let result = i.next()?;
325 |         if i.next().is_some() {
326 |             None
327 |         } else {
328 |             Some(result)
329 |         }
330 |     }
331 | 
332 |     pub fn raw_symbols_for_address(
333 |         &self,
334 |         address: u64,
335 |     ) -> impl Iterator<Item = &str> {
336 |         self.raw_symbols_by_address.get(&address)
337 |             .into_iter()
338 |             .flat_map(|set| set.iter().map(String::as_str))
339 |     }
340 | 
341 |     /// Returns an iterator over all static variables defined in this program.
342 |     pub fn static_variables(
343 |         &self,
344 |     ) -> impl Iterator<Item = (VarId, &StaticVariable)> + '_ {
345 |         self.variables.iter().map(|(&goff, ty)| (goff, ty))
346 |     }
347 | 
348 |     pub fn static_variable_by_id(
349 |         &self,
350 |         id: VarId,
351 |     ) -> Option<&StaticVariable> {
352 |         self.variables.get(&id)
353 |     }
354 | 
355 |     pub fn static_variables_by_name(
356 |         &self,
357 |         name: &str,
358 |     ) -> impl Iterator<Item = (VarId, &StaticVariable)> + '_ {
359 |         self.consult_index_generic(&self.variables_by_name, name, &self.variables)
360 |     }
361 | 
362 |     pub fn unique_static_variable_by_name(
363 |         &self,
364 |         name: &str,
365 |     ) -> Option<(VarId, &StaticVariable)> {
366 |         let mut vs = self.static_variables_by_name(name);
367 |         let result = vs.next()?;
368 |         if vs.next().is_some() {
369 |             None
370 |         } else {
371 |             Some(result)
372 |         }
373 |     }
374 | 
375 |     pub fn entities_by_address(
376 |         &self,
377 |         address: u64,
378 |     ) -> impl Iterator<Item = &AddressRange> + '_ {
379 |         self.entities_by_address.range(..=address)
380 |             .rev()
381 |             .flat_map(|(_, rec)| rec)
382 |             .filter(move |rec| rec.range.contains(&address))
383 |     }
384 | 
385 |     /// Looks up `key` in `index`, and then transforms the result by (1) copying
386 |     /// the goffs and (2) attaching the associated `Type` to each item.
387 |     fn consult_index<'d, K, Q>(
388 |         &'d self,
389 |         index: &'d BTreeIndex<TypeId, K>,
390 |         key: &Q,
391 |     ) -> impl Iterator<Item = (TypeId, &'d Type)> + 'd
392 |         where K: std::borrow::Borrow<Q> + Ord,
393 |               Q: Ord + ?Sized + 'd,
394 |     {
395 |         self.consult_index_generic(index, key, &self.types)
396 |     }
397 | 
398 |     /// Looks up `key` in `index`, and then transforms the result by (1) copying
399 |     /// the goffs and (2) attaching the associated `Type` to each item.
400 |     fn consult_index_generic<'d, I, K, Q, E>(
401 |         &'d self,
402 |         index: &'d BTreeIndex<I, K>,
403 |         key: &Q,
404 |         lookup: &'d BTreeMap<I, E>,
405 |     ) -> impl Iterator<Item = (I, &'d E)> + 'd
406 |         where K: std::borrow::Borrow<Q> + Ord,
407 |               Q: Ord + ?Sized,
408 |               I: Copy + Eq + Ord,
409 |               E: 'd,
410 |     {
411 |         index
412 |             .get(key)
413 |             .into_iter()
414 |             .flat_map(move |set| {
415 |                 set.iter().map(move |&goff| (goff, &lookup[&goff]))
416 |             })
417 |     }
418 | }
419 | 
420 | /// Builder that accumulates the type information from a program and produces a
421 | /// `DebugDb` database.
422 | ///
423 | /// This is primarily intended as a write-only sink for type information. After
424 | /// everything is stuffed in, `build()` will validate the information, generate
425 | /// indices, and produce a `DebugDb` database.
426 | #[derive(Clone, Debug)]
427 | pub struct DebugDbBuilder {
428 |     path: Vec<String>,
429 |     endian: gimli::RunTimeEndian,
430 |     is_64: bool,
431 |     types: BTreeMap<TypeId, Type>,
432 |     decls: BTreeMap<String, BTreeSet<TypeId>>,
433 |     debug_frame: gimli::DebugFrame<gimli::EndianReader<gimli::RunTimeEndian, Arc<[u8]>>>,
434 | 
435 |     subprograms: BTreeMap<ProgramId, Subprogram>,
436 |     line_table: BTreeMap<u64, Vec<LineNumberRow>>,
437 |     variables: BTreeMap<VarId, StaticVariable>,
438 | 
439 |     raw_symbols: Vec<(String, u64)>,
440 | }
441 | 
442 | impl DebugDbBuilder {
443 |     /// Creates a new `DebugDbBuilder` for information from a program with the
444 |     /// given endianness and pointer width.
445 |     pub fn new(
446 |         endian: gimli::RunTimeEndian,
447 |         is_64: bool,
448 |         debug_frame: gimli::DebugFrame<gimli::EndianReader<gimli::RunTimeEndian, Arc<[u8]>>>,
449 |     ) -> Self {
450 |         Self {
451 |             endian,
452 |             path: vec![],
453 |             is_64,
454 |             debug_frame,
455 |             types: BTreeMap::new(),
456 |             decls: BTreeMap::new(),
457 |             subprograms: BTreeMap::new(),
458 |             line_table: BTreeMap::new(),
459 |             variables: BTreeMap::new(),
460 |             raw_symbols: vec![],
461 |         }
462 |     }
463 | 
464 |     pub fn build(self) -> Result<DebugDb, ParseError> {
465 |         let mut types = self.types;
466 | 
467 |         // Build type name index.
468 |         let mut type_name_index = index_by_key(&types, |_, t| match t {
469 |             Type::Struct(s) => Some(s.name.clone()),
470 |             Type::Enum(s) => Some(s.name.clone()),
471 |             Type::Base(s) => Some(s.name.clone()),
472 |             Type::CEnum(s) => Some(s.name.clone()),
473 |             Type::Union(s) => Some(s.name.clone()),
474 |             Type::Pointer(s) => s.name.clone(),
475 |             _ => None,
476 |         });
477 | 
478 |         // Attempt to unify similarly named types, narrowing the type name index
479 |         // as we go.
480 |         let mut u = crate::unify::State::new(&types);
481 |         for homonyms in type_name_index.values_mut() {
482 |             let mut workset = homonyms.clone();
483 |             let mut group_u = crate::unify::State::new(&types);
484 |             while let Some(t) = workset.pop_first() {
485 |                 for o in &workset {
486 |                     t.try_unify(o, &mut group_u);
487 |                 }
488 |             }
489 |             // Reduce the set of homonyms for this name to only those types that
490 |             // were not found to have equivalent partners.
491 |             homonyms.retain(|t| !group_u.is_subbed(*t));
492 |             u.merge(group_u);
493 |         }
494 | 
495 |         // Attempt to resolve decls.
496 |         let mut ambiguous_decl_count = 0;
497 |         for (name, decl_ids) in &self.decls {
498 |             if let Some(tids) = type_name_index.get(name) {
499 |                 if tids.len() != 1 {
500 |                     // The name is still ambiguous after unification.
501 |                     eprintln!("WARN: decl ambiguous; {name} could be:");
502 |                     for tid in tids {
503 |                         eprintln!("- {tid:x?}");
504 |                     }
505 |                     ambiguous_decl_count += 1;
506 |                 }
507 |                 // Assume it's the first one.
508 |                 let tid = *tids.iter().next().unwrap();
509 |                 for &alias in decl_ids {
510 |                     u.equate(alias, tid);
511 |                 }
512 |             } else {
513 |                 eprintln!("WARN: unresolved declaration {name}:");
514 |                 for id in decl_ids {
515 |                     eprintln!(" - {id:x?}");
516 |                 }
517 |             }
518 |         }
519 |         if ambiguous_decl_count > 0 {
520 |             eprintln!("WARN: {ambiguous_decl_count} ambiguous declarations found");
521 |         }
522 | 
523 |         let mut unresolved_types = BTreeMap::new();
524 | 
525 |         let mut check = |mut id| -> Result<(), Infallible> {
526 |             id = u.canonicalize(id);
527 |             if types.contains_key(&id) {
528 |                 Ok(())
529 |             } else {
530 |                 unresolved_types.insert(id, Type::Unresolved(Unresolved {
531 |                     offset: id.0,
532 |                 }));
533 |                 Ok(()) // TODO
534 |             }
535 |         };
536 |         
537 |         // Validate that the world is complete and internally consistent.
538 |         for t in types.values() {
539 |             match t {
540 |                 Type::Base(_) => (),
541 |                 Type::CEnum(_) => (),
542 |                 Type::Unresolved(_) => (),
543 | 
544 |                 Type::Struct(s) => {
545 |                     for ttp in &s.template_type_parameters {
546 |                         check(ttp.type_id)?;
547 |                     }
548 |                     for m in &s.members {
549 |                         check(m.type_id)?;
550 |                     }
551 |                 }
552 |                 Type::Union(s) => {
553 |                     for ttp in &s.template_type_parameters {
554 |                         check(ttp.type_id)?;
555 |                     }
556 |                     for m in &s.members {
557 |                         check(m.type_id)?;
558 |                     }
559 |                 }
560 |                 Type::Enum(s) => {
561 |                     for ttp in &s.template_type_parameters {
562 |                         check(ttp.type_id)?;
563 |                     }
564 |                     match &s.shape {
565 |                         VariantShape::Zero => (),
566 |                         VariantShape::One(variant) => {
567 |                             check(variant.member.type_id)?;
568 |                         }
569 |                         VariantShape::Many {
570 |                             member, variants, ..
571 |                         } => {
572 |                             check(member.type_id)?;
573 |                             for v in variants.values() {
574 |                                 check(v.member.type_id)?;
575 |                             }
576 |                         }
577 |                     }
578 |                 }
579 |                 Type::Array(s) => {
580 |                     check(s.element_type_id)?;
581 |                     // The index type is synthetic, but, might as well.
582 |                     check(s.index_type_id)?;
583 |                 }
584 |                 Type::Pointer(s) => {
585 |                     check(s.type_id)?;
586 |                 }
587 |                 Type::Subroutine(s) => {
588 |                     if let Some(t) = s.return_type_id {
589 |                         check(t)?;
590 |                     }
591 |                     for &t in &s.formal_parameters {
592 |                         check(t)?;
593 |                     }
594 |                 }
595 |             }
596 |         }
597 | 
598 |         let type_canon = u.finish();
599 |         types.extend(unresolved_types);
600 | 
601 |         // Build array index.
602 |         let array_index = index_by_key(&types, |_, t| match t {
603 |             Type::Array(a) => Some((a.element_type_id, a.count)),
604 |             _ => None,
605 |         });
606 |         // Build subroutine index. This is more complex in shape than the other
607 |         // indices.
608 |         let subroutine_index = {
609 |             let mut ind = BTreeMap::<_, BTreeIndex<_, _>>::new();
610 |             for (k, v) in &types {
611 |                 if let Type::Subroutine(s) = v {
612 |                     ind.entry(s.formal_parameters.clone())
613 |                         .or_default()
614 |                         .entry(s.return_type_id)
615 |                         .or_default()
616 |                         .insert(*k);
617 |                 }
618 |             }
619 |             ind
620 |         };
621 | 
622 |         let variables_by_name = index_by_key(&self.variables, |_, v| Some(v.name.clone()));
623 | 
624 |         // Build address map.
625 |         let mut entities_by_address: BTreeMap<_, Vec<_>> = BTreeMap::new();
626 |         for (&vid, v) in &self.variables {
627 |             let Some(t) = types.get(&v.type_id) else {
628 |                 eprintln!("WARN: type of variable {} not found: {:x?}",
629 |                     v.name, v.type_id);
630 |                 continue;
631 |             };
632 |             let sz = t.byte_size_early(
633 |                 if self.is_64 { 8 } else { 4 },
634 |                 |t| types.get(&t),
635 |             );
636 |             if let Some(sz) = sz {
637 |                 entities_by_address.entry(v.location)
638 |                     .or_default()
639 |                     .push(AddressRange {
640 |                         range: v.location..v.location + sz,
641 |                         entity: EntityId::Var(vid),
642 |                     });
643 |             }
644 |         }
645 |         for (&pid, p) in &self.subprograms {
646 |             if let Some(pc_range) = p.pc_range.clone() {
647 |                 entities_by_address.entry(pc_range.start)
648 |                     .or_default()
649 |                     .push(AddressRange {
650 |                         range: pc_range,
651 |                         entity: EntityId::Prog(pid),
652 |                     });
653 |             }
654 |         }
655 | 
656 |         fn check_inl(inl: &InlinedSubroutine) -> Result<(), ParseError> {
657 |             if inl.abstract_origin.is_none() {
658 |                 return Err(ParseError::UnboundSubroutine(inl.offset));
659 |             }
660 |             for inner in &inl.inlines {
661 |                 check_inl(inner)?;
662 |             }
663 |             Ok(())
664 |         }
665 | 
666 |         // Check that inlined subroutines match our expectations.
667 |         for subprogram in self.subprograms.values() {
668 |             for inl in &subprogram.inlines {
669 |                 check_inl(inl)?;
670 |             }
671 |         }
672 | 
673 |         let type_rcanon = invert(&type_canon);
674 | 
675 |         let raw_symbols_by_name = index_by_key(
676 |             self.raw_symbols.iter().map(|(k, v)| (v, k)),
677 |             |_, name| Some(name.to_string()),
678 |         );
679 | 
680 |         let raw_symbols_by_address = index_by_key(
681 |             self.raw_symbols.iter().map(|(k, v)| (k, v)),
682 |             |_, addr| Some(*addr),
683 |         );
684 | 
685 | 
686 |         Ok(DebugDb {
687 |             endian: self.endian,
688 |             types,
689 |             type_canon,
690 |             type_rcanon,
691 |             is_64: self.is_64,
692 |             subprograms: self.subprograms,
693 |             line_table: self.line_table,
694 |             variables: self.variables,
695 |             debug_frame: self.debug_frame,
696 |             type_name_index,
697 |             array_index,
698 |             subroutine_index,
699 |             variables_by_name,
700 |             entities_by_address,
701 |             raw_symbols_by_name,
702 |             raw_symbols_by_address,
703 |         })
704 |     }
705 | 
706 |     pub fn record_raw_symbol(&mut self, addr: u64, name: String) {
707 |         self.raw_symbols.push((name, addr));
708 |     }
709 | 
710 |     /// Adds a type to the database.
711 |     ///
712 |     /// It's unusual to call this from outside the library, but it might be
713 |     /// useful if you have additional type information from some outside source.
714 |     pub fn record_type(&mut self, t: impl Into<Type>) {
715 |         let t = t.into();
716 |         self.types.insert(TypeId(t.offset()), t);
717 |     }
718 | 
719 |     pub fn record_subprogram(&mut self, t: Subprogram) {
720 |         self.subprograms.insert(ProgramId(t.offset), t);
721 |     }
722 | 
723 |     pub fn record_variable(&mut self, t: StaticVariable) {
724 |         self.variables.insert(VarId(t.offset), t);
725 |     }
726 | 
727 |     pub fn record_line_table_row(&mut self, addr: u64, r: LineNumberRow) {
728 |         self.line_table.entry(addr)
729 |             .or_default()
730 |             .push(r)
731 |     }
732 | 
733 |     pub fn record_decl(&mut self, name: impl std::fmt::Display, id: TypeId) {
734 |         self.decls.entry(self.format_path(name))
735 |             .or_default()
736 |             .insert(id);
737 |     }
738 | 
739 |     fn format_path(&self, name: impl std::fmt::Display) -> String {
740 |         if self.path.is_empty() {
741 |             name.to_string()
742 |         } else {
743 |             format!("{}::{}", self.path.join("::"), name)
744 |         }
745 |     }
746 | 
747 |     /// Pushes a path component onto the namespace path stack and runs `body`,
748 |     /// popping the stack when it completes.
749 |     fn path_component<T>(
750 |         &mut self,
751 |         c: impl Into<String>,
752 |         body: impl FnOnce(&mut Self) -> T,
753 |     ) -> T {
754 |         self.path.push(c.into());
755 |         let result = body(self);
756 |         self.path.pop();
757 |         result
758 |     }
759 | }
760 | 
761 | /// Utility for indexing entries in a key-value table by some projection.
762 | ///
763 | /// `table` is a sequence of keys and values in arbitrary order.
764 | ///
765 | /// `project` takes a key-value pair and produces some datum to be indexed.
766 | ///
767 | /// The result is a mapping from the data produced by `project` to keys in
768 | /// `table`.
769 | fn index_by_key<'t, K: 't, V: 't, T>(
770 |     table: impl IntoIterator<Item = (&'t K, &'t V)>,
771 |     mut project: impl FnMut(&K, &V) -> Option<T>,
772 | ) -> BTreeMap<T, BTreeSet<K>>
773 | where
774 |     T: Ord,
775 |     K: Ord + Clone,
776 | {
777 |     let mut index: BTreeMap<T, BTreeSet<K>> = BTreeMap::new();
778 | 
779 |     for (k, v) in table {
780 |         if let Some(i) = project(k, v) {
781 |             index.entry(i).or_default().insert(k.clone());
782 |         }
783 |     }
784 | 
785 |     index
786 | }
787 | 
788 | #[derive(Clone, Debug, Error)]
789 | pub enum FileError {
790 |     #[error("DWARF data structures could not be understood")]
791 |     Parse(#[from] ParseError),
792 |     #[error("Object file format parsing error")]
793 |     Obj(#[from] object::Error),
794 |     #[error("DWARF failed to parse")]
795 |     Dwarf(#[from] gimli::Error),
796 | }
797 | 
798 | /// Parses type information from an `object::File`.
799 | pub fn parse_file(
800 |     object: &object::File,
801 | ) -> Result<DebugDb, FileError> {
802 |     let endian = if object.is_little_endian() {
803 |         gimli::RunTimeEndian::Little
804 |     } else {
805 |         gimli::RunTimeEndian::Big
806 |     };
807 | 
808 |     let load_section =
809 |         |id: gimli::SectionId| -> Result<RtArcReader, FileError> {
810 |             let cow = object.section_by_name(id.name())
811 |                 .map(|sect| sect.uncompressed_data())
812 |                 .transpose()?
813 |                 .unwrap_or_else(Default::default);
814 |             Ok(gimli::EndianReader::new(Arc::from(cow), endian))
815 |         };
816 | 
817 |     let dwarf = gimli::Dwarf::load(&load_section)?;
818 | 
819 |     use gimli::Section;
820 |     let debug_frame = gimli::DebugFrame::load(load_section)?;
821 | 
822 |     let mut builder = DebugDbBuilder::new(endian, object.is_64(), debug_frame);
823 | 
824 |     let mut iter = dwarf.units();
825 |     while let Some(header) = iter.next()? {
826 |         let unit = dwarf.unit(header)?;
827 | 
828 |         if let Some(lp) = &unit.line_program {
829 |             let lp = lp.clone();
830 |             let mut rows = lp.rows();
831 | 
832 |             let mut last_row: Option<LineNumberRow> = None;
833 |             while let Some((header, row)) = rows.next_row()? {
834 |                 let file = if let Some(file) = row.file(header) {
835 |                     if let Some(directory) = file.directory(header) {
836 |                         format!(
837 |                             "{}/{}",
838 |                             String::from_utf8_lossy(dwarf.attr_string(&unit, directory)?.bytes()),
839 |                             String::from_utf8_lossy(
840 |                             dwarf
841 |                             .attr_string(&unit, file.path_name())?
842 |                             .bytes())
843 |                         )
844 |                     } else {
845 |                         String::from_utf8_lossy(
846 |                         dwarf
847 |                             .attr_string(&unit, file.path_name())?
848 |                             .bytes())
849 |                             .into_owned()
850 |                     }
851 |                 } else {
852 |                     "???".into()
853 |                 };
854 |                 if let Some(mut pending) = last_row.take() {
855 |                     pending.pc_range.end = row.address();
856 |                     builder.record_line_table_row(pending.pc_range.start, pending);
857 |                 }
858 | 
859 |                 if !row.end_sequence() {
860 |                     last_row = Some(LineNumberRow {
861 |                         pc_range: row.address()..0,
862 |                         file,
863 |                         line: row.line(),
864 |                         column: match row.column() {
865 |                             gimli::ColumnType::Column(c) => Some(c),
866 |                             gimli::ColumnType::LeftEdge => None,
867 |                         },
868 |                     });
869 |                 }
870 |             }
871 |             if last_row.is_some() {
872 |                 eprintln!("WARN: line number program not terminated by end sequence");
873 |             }
874 |         }
875 |         let mut entries = unit.entries();
876 |         while let Some(()) = entries.next_entry()? {
877 |             if entries.current().is_none() {
878 |                 break;
879 |             }
880 |             dwarf_parser::parse_entry(&dwarf, &unit, &mut entries, &mut builder)?;
881 |         }
882 |     }
883 | 
884 |     for sym in object.symbols() {
885 |         let Ok(name) = sym.name() else { continue; };
886 |         let addr = sym.address();
887 |         builder.record_raw_symbol(addr, name.to_string());
888 |     }
889 | 
890 |     Ok(builder.build()?)
891 | }
892 | 
893 | #[derive(Clone, Debug)]
894 | pub struct AddressRange {
895 |     pub range: std::ops::Range<u64>,
896 |     pub entity: EntityId,
897 | }
898 | 
899 | #[derive(Copy, Clone, Debug)]
900 | pub enum EntityId {
901 |     Var(VarId),
902 |     Prog(ProgramId),
903 | }
904 | 
905 | fn invert<K, V>(map: &BTreeMap<K, V>) -> BTreeMap<V, BTreeSet<K>>
906 |     where K: Eq + Ord + Clone,
907 |           V: Eq + Ord + Clone,
908 | {
909 |     let mut result: BTreeMap<V, BTreeSet<K>> = BTreeMap::new();
910 |     for (k, v) in map {
911 |         result.entry(v.clone()).or_default().insert(k.clone());
912 |     }
913 |     result
914 | }
915 | 


--------------------------------------------------------------------------------
/src/load.rs:
--------------------------------------------------------------------------------
  1 | //! Support for extracting values from a program image, processing them using
  2 | //! debug information, and turning them into Rust values in the observing
  3 | //! program.
  4 | 
  5 | use crate::{Encoding, Enum, Type, DebugDb, Variant, VariantShape};
  6 | use gimli::Endianity;
  7 | use rangemap::RangeInclusiveMap;
  8 | use thiserror::Error;
  9 | use std::convert::{TryFrom, Infallible};
 10 | 
 11 | pub trait Load: Sized {
 12 |     fn from_state<M: Machine>(
 13 |         machine: &M,
 14 |         addr: u64,
 15 |         world: &DebugDb,
 16 |         ty: &Type,
 17 |     ) -> Result<Self, LoadError<M::Error>>;
 18 | }
 19 | 
 20 | pub trait Machine {
 21 |     /// Error type that indicates that we had a failure to access machine state.
 22 |     type Error;
 23 |     /// Reads memory in the program's address space (or on a physically
 24 |     /// addressed system, _the_ address space) starting at `address`. Up to
 25 |     /// `dest.len()` bytes will be read, and copied into `dest` starting from
 26 |     /// the beginning.
 27 |     ///
 28 |     /// "Success" here means that access did not fail, so the rest of the output
 29 |     /// is valid. In this case, `read_memory` will return `Ok(n)`, where `n` is
 30 |     /// the number of bytes it was able to read starting at `address`. **Note
 31 |     /// that this value may be smaller than you requested, or zero.** This
 32 |     /// indicates that fewer than `dest.len()` contiguous bytes _exist_ starting
 33 |     /// at `address`. This may be due to: address space holes, incomplete dumps,
 34 |     /// reading an ELF file without a RAM image, etc.
 35 |     ///
 36 |     /// These holes are a valid part of the machine state, and so this is not
 37 |     /// failure.
 38 |     ///
 39 |     /// Failure happens if we can't _access_ the machine state to find this out,
 40 |     /// or to get the data -- for instance, if a USB transaction to a JTAG probe
 41 |     /// fails, or if we get a filesystem error reading an ELF file. In that
 42 |     /// case, we'll return `Err`.
 43 |     fn read_memory(&self, address: u64, dest: &mut [u8]) -> Result<usize, Self::Error>;
 44 | }
 45 | 
 46 | #[derive(Clone)]
 47 | pub struct ImgMachine {
 48 |     img: Vec<u8>,
 49 | }
 50 | 
 51 | impl ImgMachine {
 52 |     pub fn new(img: impl Into<Vec<u8>>) -> Self {
 53 |         Self {
 54 |             img: img.into(),
 55 |         }
 56 |     }
 57 | }
 58 | 
 59 | impl Machine for ImgMachine {
 60 |     type Error = Infallible;
 61 | 
 62 |     fn read_memory(&self, address: u64, dest: &mut [u8]) -> Result<usize, Self::Error> {
 63 |         let Ok(address) = usize::try_from(address) else { return Ok(0) };
 64 |         let end = address.checked_add(dest.len())
 65 |             .unwrap_or(usize::MAX);
 66 |         let end = usize::min(end, self.img.len());
 67 |         let Some(chunk) = end.checked_sub(address) else { return Ok(0) };
 68 | 
 69 |         dest[..chunk].copy_from_slice(&self.img[address..end]);
 70 |         Ok(chunk)
 71 |     }
 72 | }
 73 | 
 74 | impl Machine for RangeInclusiveMap<u64, Vec<u8>> {
 75 |     type Error = Infallible;
 76 | 
 77 |     fn read_memory(&self, address: u64, dest: &mut [u8]) -> Result<usize, Self::Error> {
 78 |         let Some((range, segment)) = self.get_key_value(&address) else { return Ok(0) };
 79 |         let offset = address - range.start();
 80 | 
 81 |         let Ok(offset) = usize::try_from(offset) else { return Ok(0) };
 82 |         let end = offset.checked_add(dest.len())
 83 |             .unwrap_or(usize::MAX);
 84 |         let end = usize::min(end, segment.len());
 85 |         let Some(chunk) = end.checked_sub(offset) else { return Ok(0) };
 86 | 
 87 |         dest[..chunk].copy_from_slice(&segment[offset..end]);
 88 |         Ok(chunk)
 89 |     }
 90 | }
 91 | 
 92 | #[derive(Clone, Debug, Error)]
 93 | pub enum LoadError<E> {
 94 |     #[error("tuple type missing member {0}")]
 95 |     MissingTupleMember(usize),
 96 |     #[error("struct was not tuple-like")]
 97 |     NotATuple,
 98 |     #[error("not a struct")]
 99 |     NotAStruct,
100 |     #[error("expected encoding {expected:?}, type had encoding {got:?}")]
101 |     WrongEncoding { expected: Encoding, got: Encoding },
102 |     #[error("expected type with size {expected}, but type had size {got}")]
103 |     WrongSize { expected: u64, got: u64 },
104 |     #[error("base type required")]
105 |     NotABase,
106 |     #[error("enum type required")]
107 |     NotAnEnum,
108 |     #[error("C-like enum type required")]
109 |     NotACEnum,
110 |     #[error("pointer type required")]
111 |     NotAPointer,
112 |     #[error("array type is not finite and can't be loaded")]
113 |     InfiniteArray,
114 |     #[error("arrays with non-zero lower bounds ({0}) are not supported")]
115 |     NonZeroLowerBound(u64),
116 |     #[error("array has element type without defined size")]
117 |     UnsizedElement,
118 |     #[error("array too big: {count} x {elt_size}-byte elements")]
119 |     ArrayTooBig {
120 |         count: u64,
121 |         elt_size: u64,
122 |     },
123 |     #[error("type too big for this platform: {0} bytes")]
124 |     TypeTooBig(u64),
125 |     #[error("array type required")]
126 |     NotAnArray,
127 |     #[error("expected enum with {expected} variants, found {got}")]
128 |     WrongVariantCount { expected: usize, got: usize },
129 |     #[error("unexpected variant: {0}")]
130 |     UnexpectedVariant(String),
131 |     #[error("expected struct/tuple with {expected} members, found {got}")]
132 |     WrongMemberCount { expected: usize, got: usize },
133 |     #[error("can't load an uninhabited (empty) enum")]
134 |     Uninhabited,
135 |     #[error("discriminator value {0} not valid for type")]
136 |     BadDiscriminator(u64),
137 |     #[error("unsupported type (TODO)")]
138 |     UnsupportedType,
139 |     #[error("expected member `{0}` not found")]
140 |     MissingMember(String),
141 |     #[error("a type named {expected} was required, but found: {got}")]
142 |     WrongTypeName { expected: String, got: String},
143 |     #[error("some of the bytes required to load this type are not present in the machine")]
144 |     DataUnavailable,
145 | 
146 |     #[error("an error occurred accessing the underlying machine state")]
147 |     Machine(#[from] E),
148 | }
149 | 
150 | /*
151 | 
152 | impl<A: Load, B: Load> Load for (A, B) {
153 |     fn from_state<M: Machine>(
154 |         machine: &M,
155 |         addr: u64,
156 |         world: &DebugDb,
157 |         ty: &Type,
158 |     ) -> Result<Self, LoadError<M::Error>> {
159 |         if let Type::Struct(s) = ty {
160 |             if s.tuple_like {
161 |                 let m0 = s.unique_member("__0")
162 |                     .ok_or(LoadError::MissingTupleMember(0))?;
163 |                 let m0ty = world.type_by_id(m0.type_id).unwrap();
164 |                 let m1 = s.unique_member("__1")
165 |                     .ok_or(LoadError::MissingTupleMember(0))?;
166 |                 let m1ty = world.type_by_id(m1.type_id).unwrap();
167 |                 Ok((
168 |                     A::from_buffer(
169 |                         buffer,
170 |                         addr + usize::try_from(m0.location).unwrap(),
171 |                         world,
172 |                         m0ty,
173 |                     )?,
174 |                     B::from_buffer(
175 |                         buffer,
176 |                         addr + usize::try_from(m1.location).unwrap(),
177 |                         world,
178 |                         m1ty,
179 |                     )?,
180 |                 ))
181 |             } else {
182 |                 Err(LoadError::NotATuple)
183 |             }
184 |         } else {
185 |             Err(LoadError::NotAStruct)
186 |         }
187 |     }
188 | }
189 | */
190 | 
191 | fn generic_base_load<M: Machine, B, const N: usize>(
192 |     encoding: Encoding,
193 |     ty: &Type,
194 |     machine: &M,
195 |     addr: u64,
196 |     extract: impl FnOnce([u8; N]) -> B,
197 | ) -> Result<B, LoadError<M::Error>> {
198 |     if let Type::Base(b) = ty {
199 |         if b.encoding != encoding {
200 |             return Err(LoadError::WrongEncoding {
201 |                 expected: encoding,
202 |                 got: b.encoding,
203 |             });
204 |         }
205 |         if b.byte_size != N as u64 {
206 |             return Err(LoadError::WrongSize {
207 |                 expected: N as u64,
208 |                 got: b.byte_size,
209 |             });
210 |         }
211 |         let mut ary = [0; N];
212 |         let n = machine.read_memory(addr, &mut ary)?;
213 |         if n != N {
214 |             return Err(LoadError::DataUnavailable);
215 |         }
216 |         Ok(extract(ary))
217 |     } else {
218 |         Err(LoadError::NotABase)
219 |     }
220 | }
221 | 
222 | impl Load for u8 {
223 |     fn from_state<M: Machine>(
224 |         machine: &M,
225 |         addr: u64,
226 |         _world: &DebugDb,
227 |         ty: &Type,
228 |     ) -> Result<u8, LoadError<M::Error>> {
229 |         generic_base_load(
230 |             Encoding::Unsigned,
231 |             ty,
232 |             machine,
233 |             addr,
234 |             |[b]| b,
235 |         )
236 |     }
237 | }
238 | 
239 | impl Load for i8 {
240 |     fn from_state<M: Machine>(
241 |         machine: &M,
242 |         addr: u64,
243 |         _world: &DebugDb,
244 |         ty: &Type,
245 |     ) -> Result<Self, LoadError<M::Error>> {
246 |         generic_base_load(
247 |             Encoding::Signed,
248 |             ty,
249 |             machine,
250 |             addr,
251 |             |[b]| b as i8,
252 |         )
253 |     }
254 | }
255 | 
256 | macro_rules! base_impl {
257 |     ($t:ty, $sz:expr, $enc:ident, $read:ident) => {
258 |         impl Load for $t {
259 |             fn from_state<M: Machine>(
260 |                 machine: &M,
261 |                 addr: u64,
262 |                 world: &DebugDb,
263 |                 ty: &Type,
264 |             ) -> Result<Self, LoadError<M::Error>> {
265 |                 generic_base_load::<_, $t, $sz>(
266 |                     Encoding::$enc,
267 |                     ty,
268 |                     machine,
269 |                     addr,
270 |                     |a| world.endian().$read(&a),
271 |                 )
272 |             }
273 |         }
274 |     };
275 | }
276 | 
277 | base_impl!(u16, 2, Unsigned, read_u16);
278 | base_impl!(u32, 4, Unsigned, read_u32);
279 | base_impl!(u64, 8, Unsigned, read_u64);
280 | 
281 | base_impl!(i16, 2, Signed, read_i16);
282 | base_impl!(i32, 4, Signed, read_i32);
283 | base_impl!(i64, 8, Signed, read_i64);
284 | 
285 | impl Load for core::sync::atomic::AtomicU32 {
286 |     fn from_state<M: Machine>(
287 |         machine: &M,
288 |         addr: u64,
289 |         world: &DebugDb,
290 |         ty: &Type,
291 |     ) -> Result<Self, LoadError<M::Error>> {
292 |         let Type::Struct(ty) = ty else {
293 |             return Err(LoadError::NotAStruct);
294 |         };
295 |         if ty.name != "core::sync::atomic::AtomicU32" {
296 |             return Err(LoadError::WrongTypeName {
297 |                 expected: "core::sync::atomic::AtomicU32".to_string(),
298 |                 got: ty.name.clone(),
299 |             });
300 |         }
301 |         let Some(m_v) = ty.unique_member("v") else {
302 |             return Err(LoadError::MissingMember("v".to_string()));
303 |         };
304 |         let unsafecell = world.type_by_id(m_v.type_id).unwrap();
305 |         let Type::Struct(unsafecell) = unsafecell else {
306 |             return Err(LoadError::NotAStruct);
307 |         };
308 |         if unsafecell.name != "core::cell::UnsafeCell<u32>" {
309 |             return Err(LoadError::WrongTypeName {
310 |                 expected: "core::cell::UnsafeCell<u32>".to_string(),
311 |                 got: unsafecell.name.clone(),
312 |             });
313 |         }
314 |         let Some(m_value) = unsafecell.unique_member("value") else {
315 |             return Err(LoadError::MissingMember("value".to_string()));
316 |         };
317 | 
318 |         let value_ty = world.type_by_id(m_value.type_id).unwrap();
319 |         
320 |         let x = u32::from_state(machine, addr, world, value_ty)?;
321 |         Ok(core::sync::atomic::AtomicU32::new(x))
322 |     }
323 | }
324 | 
325 | impl<T: Load> Load for Vec<T> {
326 |     fn from_state<M: Machine>(
327 |         machine: &M,
328 |         addr: u64,
329 |         world: &DebugDb,
330 |         ty: &Type,
331 |     ) -> Result<Self, LoadError<M::Error>> {
332 |         if let Type::Array(s) = ty {
333 |             let count = s.count.ok_or(LoadError::InfiniteArray)?;
334 |             if s.lower_bound != 0 {
335 |                 return Err(LoadError::NonZeroLowerBound(s.lower_bound));
336 |             }
337 |             let elty = world.type_by_id(s.element_type_id).unwrap();
338 | 
339 |             let elt_size = elty
340 |                 .byte_size(world)
341 |                 .ok_or(LoadError::UnsizedElement)?;
342 |             let elt_size = elt_size.max(elty.alignment(world).unwrap_or(0));
343 | 
344 |             let mut elts = Vec::with_capacity(usize::try_from(count).unwrap());
345 |             for i in 0..count {
346 |                 elts.push(T::from_state(machine, addr + i * elt_size, world, elty)?);
347 |             }
348 |             Ok(elts)
349 |         } else {
350 |             Err(LoadError::NotAnArray)
351 |         }
352 |     }
353 | }
354 | 
355 | /*
356 | 
357 | /// A `Load` impl for Option-shaped types.
358 | ///
359 | /// This will work for any enum with two variants, where one is named None and
360 | /// has no payload, and the other is named Some and has one field.
361 | impl<T: Load> Load for Option<T> {
362 |     fn from_state<M: Machine>(
363 |         machine: &M,
364 |         addr: u64,
365 |         world: &DebugDb,
366 |         ty: &Type,
367 |     ) -> Result<Self, LoadError<M::Error>> {
368 |         let Type::Enum(s) = ty else {
369 |             return Err(LoadError::NotAnEnum);
370 |         };
371 |         // Option-like enums have two variants.
372 |         if let VariantShape::Many { variants, .. } = &s.shape {
373 |             if variants.len() != 2 {
374 |                 return Err(LoadError::WrongVariantCount {
375 |                     expected: 2,
376 |                     got: variants.len(),
377 |                 });
378 |             }
379 |             // Those variants are named None and Some.
380 |             for v in variants.values() {
381 |                 if let Some(n) = &v.member.name {
382 |                     if n == "None" || n == "Some" {
383 |                         continue;
384 |                     }
385 |                     return Err(LoadError::UnexpectedVariant(n.clone()));
386 |                 }
387 |             }
388 |         }
389 |         // Ok, that's the extent of the type validation I'm comfortable
390 |         // doing here for performance reasons.
391 | 
392 |         let v = choose_variant(buffer, addr, world, s)?;
393 |         let is_some = v.member.name.as_ref().unwrap() == "Some";
394 |         let vty = world.type_by_id(v.member.type_id).unwrap();
395 |         // Option-like enums have tuple variants.
396 |         let Type::Struct(s) = vty else {
397 |             // TODO: this error is probably not descriptive enough.
398 |             return Err(LoadError::NotAStruct);
399 |         };
400 |         if !s.tuple_like {
401 |             // TODO: this error is probably not descriptive enough.
402 |             return Err(LoadError::NotATuple);
403 |         }
404 |         if is_some {
405 |             if s.members.len() != 1 {
406 |                 return Err(LoadError::WrongMemberCount {
407 |                     expected: 1,
408 |                     got: s.members.len(),
409 |                 });
410 |             }
411 | 
412 |             let m = &s.members[0];
413 |             let mty =
414 |                 world.type_by_id(m.type_id).unwrap();
415 |             let ma = addr + usize::try_from(m.location).unwrap();
416 |             Ok(Some(T::from_buffer(buffer, ma, world, mty)?))
417 |         } else {
418 |             if !s.members.is_empty() {
419 |                 return Err(LoadError::WrongMemberCount {
420 |                     expected: 0,
421 |                     got: s.members.len(),
422 |                 });
423 |             }
424 |             Ok(None)
425 |         }
426 |     }
427 | }
428 | */
429 | 
430 | pub(crate) fn choose_variant<'e, M: Machine>(
431 |     machine: &M,
432 |     addr: u64,
433 |     world: &'e DebugDb,
434 |     e: &'e Enum,
435 | ) -> Result<&'e Variant, LoadError<M::Error>> {
436 |     match &e.shape {
437 |         VariantShape::Zero => {
438 |             Err(LoadError::Uninhabited)
439 |         }
440 |         VariantShape::One(v) => Ok(v),
441 |         VariantShape::Many {
442 |             member, variants, ..
443 |         } => {
444 |             let dtype_id = member.type_id;
445 |             let dty = world.type_by_id(dtype_id).unwrap();
446 |             let da = addr + member.location;
447 |             let dsize = usize::try_from(dty.byte_size(world).unwrap()).unwrap();
448 |             let d = load_unsigned(world.endian(), machine, da, dsize)?
449 |                 .ok_or(LoadError::DataUnavailable)?;
450 |             let v = variants
451 |                 .get(&Some(d))
452 |                 .or_else(|| variants.get(&None))
453 |                 .ok_or(LoadError::BadDiscriminator(d))?;
454 |             Ok(v)
455 |         }
456 |     }
457 | }
458 | 
459 | pub(crate) fn load_unsigned<M: Machine>(
460 |     endian: gimli::RunTimeEndian,
461 |     machine: &M,
462 |     addr: u64,
463 |     size: usize,
464 | ) -> Result<Option<u64>, M::Error> {
465 |     let mut buffer = [0; 8];
466 |     let buffer = &mut buffer[..size];
467 |     let n = machine.read_memory(addr, buffer)?;
468 |     Ok(if n < size {
469 |         None
470 |     } else {
471 |         Some(match size {
472 |             1 => u64::from(buffer[0]),
473 |             2 => u64::from(endian.read_u16(buffer)),
474 |             4 => u64::from(endian.read_u32(buffer)),
475 |             8 => endian.read_u64(buffer),
476 |             _ => unimplemented!(),
477 |         })
478 |     })
479 | }
480 | /*
481 | #[cfg(test)]
482 | mod test {
483 |     use super::*;
484 |     use crate::{TypeId, DebugDbBuilder};
485 | 
486 |     #[derive(Debug, Default)]
487 |     struct OffsetMaker {
488 |         next_offset: usize,
489 |     }
490 | 
491 |     impl OffsetMaker {
492 |         fn next(&mut self) -> gimli::UnitSectionOffset {
493 |             let n = self.next_offset;
494 |             self.next_offset += 1;
495 |             gimli::DebugInfoOffset(n).into()
496 |         }
497 |     }
498 | 
499 |     fn make_option_u16(
500 |         builder: &mut DebugDbBuilder,
501 |         om: &mut OffsetMaker,
502 |     ) -> TypeId {
503 |         let u16_goff = om.next();
504 |         builder.record_type(crate::Base {
505 |             name: "u16".to_string(),
506 |             encoding: Encoding::Unsigned,
507 |             byte_size: 2,
508 |             offset: u16_goff.into(),
509 |         });
510 | 
511 |         let none_goff = om.next();
512 |         builder.record_type(crate::Struct {
513 |             name: "core::option::Option<u16>::None".to_string(),
514 |             byte_size: 4,
515 |             alignment: Some(2),
516 |             offset: none_goff,
517 |             tuple_like: true,
518 |             template_type_parameters: vec![],
519 |             members: indexmap::indexmap! {},
520 |         });
521 | 
522 |         let some_goff = om.next();
523 |         builder.record_type(crate::Struct {
524 |             name: "core::option::Option<u16>::Some".to_string(),
525 |             byte_size: 4,
526 |             alignment: Some(2),
527 |             offset: some_goff,
528 |             tuple_like: true,
529 |             template_type_parameters: vec![],
530 |             members: indexmap::indexmap! {
531 |                 "__0".to_string() => crate::Member {
532 |                     name: Some("__0".to_string()),
533 |                     artificial: false,
534 |                     alignment: Some(2),
535 |                     location: 2,
536 |                     offset: om.next(),
537 |                     type_id: u16_goff.into(),
538 |                 },
539 |             },
540 |         });
541 | 
542 |         let option_goff = om.next();
543 |         builder.record_type(crate::Enum {
544 |             name: "core::option::Option<u16>".to_string(),
545 |             byte_size: 4,
546 |             alignment: Some(2),
547 |             template_type_parameters: vec![],
548 |             shape: VariantShape::Many {
549 |                 discr: om.next(),
550 |                 member: crate::Member {
551 |                     name: None,
552 |                     artificial: true,
553 |                     type_id: u16_goff.into(),
554 |                     alignment: Some(2),
555 |                     location: 0,
556 |                     offset: om.next(),
557 |                 },
558 |                 variants: indexmap::indexmap! {
559 |                     Some(0) => crate::Variant {
560 |                         offset: om.next(),
561 |                         member: crate::Member {
562 |                             name: Some("None".to_string()),
563 |                             artificial: false,
564 |                             alignment: Some(2),
565 |                             location: 0,
566 |                             type_id: none_goff.into(),
567 |                             offset: om.next(),
568 |                         },
569 |                     },
570 |                     Some(1) => crate::Variant {
571 |                         offset: om.next(),
572 |                         member: crate::Member {
573 |                             name: Some("Some".to_string()),
574 |                             artificial: false,
575 |                             alignment: Some(2),
576 |                             location: 0,
577 |                             type_id: some_goff.into(),
578 |                             offset: om.next(),
579 |                         },
580 |                     },
581 |                 },
582 |             },
583 |             offset: option_goff.into(),
584 |         });
585 | 
586 |         option_goff.into()
587 |     }
588 | 
589 |     #[test]
590 |     fn load_option_u16() {
591 |         let mut om = OffsetMaker::default();
592 |         let mut builder =
593 |             DebugDbBuilder::new(gimli::RunTimeEndian::Little, false);
594 | 
595 |         let option_goff = make_option_u16(&mut builder, &mut om);
596 | 
597 |         let world = builder.build().unwrap();
598 |         let oty = world.type_by_id(option_goff).unwrap();
599 | 
600 |         let img = [0, 0, 0xAB, 0xCD];
601 |         assert_eq!(
602 |             Option::<u16>::from_buffer(&img, 0, &world, oty).unwrap(),
603 |             None
604 |         );
605 |         let img = [1, 0, 0xAB, 0xCD];
606 |         assert_eq!(
607 |             Option::<u16>::from_buffer(&img, 0, &world, oty).unwrap(),
608 |             Some(0xCDAB)
609 |         );
610 |     }
611 | 
612 |     #[test]
613 |     fn load_u8_array() {
614 |         let mut om = OffsetMaker::default();
615 |         let mut builder =
616 |             DebugDbBuilder::new(gimli::RunTimeEndian::Little, false);
617 | 
618 |         let u8_goff = om.next();
619 |         builder.record_type(crate::Base {
620 |             name: "u8".to_string(),
621 |             encoding: Encoding::Unsigned,
622 |             byte_size: 1,
623 |             offset: u8_goff,
624 |         });
625 | 
626 |         let index_type_goff = om.next();
627 |         builder.record_type(crate::Base {
628 |             name: "__ARRAY_INDEX_TYPE__".to_string(),
629 |             encoding: Encoding::Unsigned,
630 |             byte_size: 8,
631 |             offset: index_type_goff,
632 |         });
633 | 
634 |         let ary_goff = om.next();
635 |         builder.record_type(crate::Array {
636 |             element_type_id: TypeId(u8_goff),
637 |             index_type_id: TypeId(index_type_goff),
638 |             lower_bound: 0,
639 |             count: Some(5),
640 |             offset: ary_goff,
641 |         });
642 | 
643 |         let world = builder.build().unwrap();
644 |         let aty = world.type_by_id(TypeId(ary_goff)).unwrap();
645 | 
646 |         let img = [0, 1, 2, 3, 4];
647 |         let ary: Vec<u8> = Load::from_buffer(&img, 0, &world, aty).unwrap();
648 |         assert_eq!(ary, [0, 1, 2, 3, 4]);
649 |     }
650 | }
651 | */
652 | 


--------------------------------------------------------------------------------
/src/model.rs:
--------------------------------------------------------------------------------
  1 | //! Data model types.
  2 | //!
  3 | //! This is our abstract description of types and routines in a program.
  4 | 
  5 | use std::borrow::Cow;
  6 | use std::hash::Hash;
  7 | use std::num::NonZeroU64;
  8 | use crate::DebugDb;
  9 | use indexmap::IndexMap;
 10 | 
 11 | /// Identifies a specific type within a program, using its offset within the
 12 | /// debug section(s).
 13 | ///
 14 | /// Sometimes types appear more than once in debug info. In that case, each type
 15 | /// will have a distinct `TypeId`.
 16 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
 17 | pub struct TypeId(pub gimli::UnitSectionOffset);
 18 | 
 19 | impl From<gimli::UnitSectionOffset> for TypeId {
 20 |     fn from(x: gimli::UnitSectionOffset) -> Self {
 21 |         Self(x)
 22 |     }
 23 | }
 24 | 
 25 | /// Identifies a subprogram within a program -- a function or subroutine.
 26 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
 27 | pub struct ProgramId(pub gimli::UnitSectionOffset);
 28 | 
 29 | impl From<gimli::UnitSectionOffset> for ProgramId {
 30 |     fn from(x: gimli::UnitSectionOffset) -> Self {
 31 |         Self(x)
 32 |     }
 33 | }
 34 | 
 35 | /// Identifies a static variable.
 36 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
 37 | pub struct VarId(pub gimli::UnitSectionOffset);
 38 | 
 39 | impl From<gimli::UnitSectionOffset> for VarId {
 40 |     fn from(x: gimli::UnitSectionOffset) -> Self {
 41 |         Self(x)
 42 |     }
 43 | }
 44 | 
 45 | /// Information about a type from a program.
 46 | ///
 47 | /// There are many kinds of types; this enum distinguishes between them.
 48 | #[derive(Debug, Clone)]
 49 | pub enum Type {
 50 |     Struct(Struct),
 51 |     Enum(Enum),
 52 |     Base(Base),
 53 |     CEnum(CEnum),
 54 |     Array(Array),
 55 |     Pointer(Pointer),
 56 |     Union(Union),
 57 |     Subroutine(Subroutine),
 58 |     Unresolved(Unresolved),
 59 | }
 60 | 
 61 | impl Type {
 62 |     /// Returns the location of the type's definition within the debug info
 63 |     /// section(s).
 64 |     pub fn offset(&self) -> gimli::UnitSectionOffset {
 65 |         // TODO so this field should clearly get factored out....
 66 |         match self {
 67 |             Self::Struct(s) => s.offset,
 68 |             Self::Enum(s) => s.offset,
 69 |             Self::Base(s) => s.offset,
 70 |             Self::CEnum(s) => s.offset,
 71 |             Self::Array(s) => s.offset,
 72 |             Self::Pointer(s) => s.offset,
 73 |             Self::Union(s) => s.offset,
 74 |             Self::Subroutine(s) => s.offset,
 75 |             Self::Unresolved(s) => s.offset,
 76 |         }
 77 |     }
 78 | 
 79 |     /// Determines the alignment of the type, in bytes.
 80 |     ///
 81 |     /// Not all types have alignment.
 82 |     pub fn alignment(&self, world: &DebugDb) -> Option<u64> {
 83 |         match self {
 84 |             Self::Struct(s) => s.alignment,
 85 |             Self::Enum(s) => s.alignment,
 86 |             Self::Base(s) => s.alignment,
 87 |             Self::CEnum(s) => s.alignment,
 88 |             Self::Union(s) => Some(s.alignment),
 89 |             Self::Array(a) => {
 90 |                 let eltty = world.type_by_id(a.element_type_id)?;
 91 |                 eltty.alignment(world)
 92 |             }
 93 |             Self::Pointer(_) => Some(world.pointer_size() as u64),
 94 | 
 95 |             _ => None,
 96 |         }
 97 |     }
 98 | 
 99 |     /// Determines the inherent size of the type, in bytes. The inherent size is
100 |     /// the size that can be computed without referring to the debug information
101 |     /// of other types.
102 |     ///
103 |     /// Not all types have sizes; even fewer have inherent sizes. This is an
104 |     /// implementation detail of the full `byte_size` algorithm.
105 |     pub fn inherent_byte_size(&self) -> Option<u64> {
106 |         match self {
107 |             Self::Struct(s) => s.byte_size,
108 |             Self::Enum(s) => s.byte_size,
109 |             Self::Base(s) => Some(s.byte_size),
110 |             Self::CEnum(s) => Some(s.byte_size),
111 |             Self::Union(s) => Some(s.byte_size),
112 | 
113 |             _ => None,
114 |         }
115 |     }
116 | 
117 |     pub(crate) fn byte_size_early<'a>(
118 |         &'a self,
119 |         pointer_size: usize,
120 |         lookup_type: impl Fn(TypeId) -> Option<&'a Type>,
121 |     ) -> Option<u64> {
122 |         let mut factor = 1;
123 |         let mut t = self;
124 |         loop {
125 |             match t.inherent_byte_size() {
126 |                 Some(x) => break Some(factor * x),
127 |                 None => match t {
128 |                     Self::Array(a) => {
129 |                         factor *= a.count?;
130 |                         t = lookup_type(a.element_type_id)?;
131 |                     }
132 |                     Self::Pointer(_) => break Some(factor * pointer_size as u64),
133 |                     Self::Subroutine(_) => break None,
134 | 
135 |                     _ => panic!("inconsistency btw byte_size_early and inherent_byte_size"),
136 |                 },
137 |             }
138 |         }
139 |     }
140 | 
141 |     /// Determines the size of the type, in bytes.
142 |     ///
143 |     /// Not all types have sizes.
144 |     pub fn byte_size(&self, world: &DebugDb) -> Option<u64> {
145 |         self.byte_size_early(
146 |             world.pointer_size(),
147 |             |t| world.type_by_id(t),
148 |         )
149 |     }
150 | 
151 |     /// Determines the name of the type.
152 |     pub fn name(&self, world: &DebugDb) -> Cow<'_, str> {
153 |         match self {
154 |             Self::Struct(s) => (&s.name).into(),
155 |             Self::Enum(s) => (&s.name).into(),
156 |             Self::Base(s) => (&s.name).into(),
157 |             Self::CEnum(s) => (&s.name).into(),
158 |             Self::Union(s) => (&s.name).into(),
159 |             Self::Pointer(s) => {
160 |                 if let Some(assigned_name) = &s.name {
161 |                     assigned_name.into()
162 |                 } else {
163 |                     let pointee_name = world
164 |                         .type_by_id(s.type_id)
165 |                         .map(|t| t.name(world))
166 |                         .unwrap_or("???".into());
167 |                     format!("*_ {pointee_name}").into()
168 |                 }
169 |             }
170 |             Self::Array(a) => {
171 |                 let eltname = world
172 |                     .type_by_id(a.element_type_id)
173 |                     .map(|t| t.name(world))
174 |                     .unwrap_or("???".into());
175 | 
176 |                 if let Some(n) = a.count {
177 |                     format!("[{}; {}]", eltname, n).into()
178 |                 } else {
179 |                     format!("[{}; ???]", eltname).into()
180 |                 }
181 |             }
182 |             Self::Subroutine(_) => "subroutine".into(), // TODO
183 |             Self::Unresolved(_) => "<UNRESOLVED>".into(),
184 |         }
185 |     }
186 | }
187 | 
188 | impl From<Base> for Type {
189 |     fn from(x: Base) -> Self {
190 |         Self::Base(x)
191 |     }
192 | }
193 | 
194 | impl From<Struct> for Type {
195 |     fn from(x: Struct) -> Self {
196 |         Self::Struct(x)
197 |     }
198 | }
199 | 
200 | impl From<Enum> for Type {
201 |     fn from(x: Enum) -> Self {
202 |         Self::Enum(x)
203 |     }
204 | }
205 | 
206 | impl From<Union> for Type {
207 |     fn from(x: Union) -> Self {
208 |         Self::Union(x)
209 |     }
210 | }
211 | 
212 | impl From<Pointer> for Type {
213 |     fn from(x: Pointer) -> Self {
214 |         Self::Pointer(x)
215 |     }
216 | }
217 | 
218 | impl From<Array> for Type {
219 |     fn from(x: Array) -> Self {
220 |         Self::Array(x)
221 |     }
222 | }
223 | 
224 | impl From<CEnum> for Type {
225 |     fn from(x: CEnum) -> Self {
226 |         Self::CEnum(x)
227 |     }
228 | }
229 | 
230 | impl From<Subroutine> for Type {
231 |     fn from(x: Subroutine) -> Self {
232 |         Self::Subroutine(x)
233 |     }
234 | }
235 | 
236 | /// A "base type," also known as a "primitive type," is not constructed by
237 | /// aggregating other types. Examples in Rust include `u32` and `bool`.
238 | ///
239 | /// Note that, in Rust in particular, there are several "base types" that you
240 | /// might not think of as such. Both `()` and `!` are represented as zero-sized
241 | /// base types.
242 | #[derive(Clone, Debug)]
243 | pub struct Base {
244 |     /// Name of the type.
245 |     pub name: String,
246 |     /// How to interpret the type's bits.
247 |     pub encoding: Encoding,
248 |     /// Number of bytes in a value of the type.
249 |     pub byte_size: u64,
250 |     /// Explicit alignment, if given.
251 |     pub alignment: Option<u64>,
252 |     /// Location in debug info.
253 |     pub offset: gimli::UnitSectionOffset,
254 | }
255 | 
256 | /// A "struct type" describes a record containing members, each of which has its
257 | /// own type.
258 | ///
259 | /// Rust defines both normal structs (with named members) and _tuple structs_
260 | /// (with numbered members). This type is used for both. A tuple struct will
261 | /// have the `tuple_like` flag set, and its members will be in numeric order.
262 | /// (They can also be accessed by names of the form `__0`, `__1`, etc.)
263 | #[derive(Debug, Clone)]
264 | pub struct Struct {
265 |     /// Name of the struct type.
266 |     pub name: String,
267 |     /// Size of a value of this struct in bytes.
268 |     pub byte_size: Option<u64>,
269 |     /// Alignment required for values of this struct.
270 |     pub alignment: Option<u64>,
271 |     /// If this struct is generic, a list of template parameters. Non-generic
272 |     /// structs have an empty list.
273 |     pub template_type_parameters: Vec<TemplateTypeParameter>,
274 |     /// When `true`, this struct appears to originate from a Rust "tuple struct"
275 |     /// with numbered fields. When `false`, this is a normal struct.
276 |     pub tuple_like: bool,
277 |     /// Member fields of the struct.
278 |     ///
279 |     /// These are in an `IndexMap` so that order is preserved. The members are
280 |     /// recorded in the order they appear in the debug info, which in practice
281 |     /// is also the order they're declared in the source. They are _not_ in
282 |     /// order of position in the struct in memory.
283 |     pub members: Vec<Member>,
284 |     /// Location in debug info.
285 |     pub offset: gimli::UnitSectionOffset,
286 |     /// Location of the declaration of this subprogram in the source.
287 |     pub decl_coord: DeclCoord,
288 | }
289 | 
290 | impl Struct {
291 |     pub fn unique_member(&self, name: &str) -> Option<&Member> {
292 |         let mut matches = self.members.iter()
293 |             .filter(|m| m.name.as_deref() == Some(name));
294 |         let first = matches.next()?;
295 |         if matches.next().is_some() {
296 |             // There is no _unique_ member by this name.
297 |             None
298 |         } else {
299 |             Some(first)
300 |         }
301 |     }
302 | }
303 | 
304 | /// An "enum type," in the Rust sense of the term, is a tagged union (or
305 | /// discriminated union). It can contain multiple different types of values, but
306 | /// only one at a time, and the options are distinguished through a
307 | /// "discriminator" member -- except if there is only one variant, in which case
308 | /// the compiler usually eliminates that member. See `VariantShape` for details.
309 | ///
310 | /// This library distinguishes between Rust-style enums (this type) and C-style
311 | /// enums (the `CEnum`) type. Rust programs will generate C-style enums when
312 | /// none of the enum variants have a payload or fields.
313 | #[derive(Debug, Clone)]
314 | pub struct Enum {
315 |     /// Name of the enum type.
316 |     pub name: String,
317 |     /// Size of a value of the enum type, in bytes.
318 |     pub byte_size: Option<u64>,
319 |     /// Alignment required for values of this enum.
320 |     pub alignment: Option<u64>,
321 |     /// If this struct is generic, a list of template parameters. Non-generic
322 |     /// structs have an empty list.
323 |     pub template_type_parameters: Vec<TemplateTypeParameter>,
324 |     /// Description of the variants in this enum.
325 |     pub shape: VariantShape,
326 |     /// Location in debug info.
327 |     pub offset: gimli::UnitSectionOffset,
328 | }
329 | 
330 | /// A "C-style enum" type -- a type with several value variants, each of which
331 | /// can be represented by an integer.
332 | #[derive(Debug, Clone)]
333 | pub struct CEnum {
334 |     /// Name of the enum type.
335 |     pub name: String,
336 |     /// Representation type.
337 |     pub repr_type_id: TypeId,
338 |     /// Flag indicating that this enum is a distinct type, rather than
339 |     /// evaluating as values of some base type. This is set for all enums in
340 |     /// Rust, some enums in C++, and no enums in C.
341 |     pub enum_class: bool,
342 |     /// Size of a value of the enum type, in bytes.
343 |     pub byte_size: u64,
344 |     /// Alignment required for values of this enum.
345 |     pub alignment: Option<u64>,
346 |     /// Variants ("enumerators") of this type.
347 |     pub enumerators: IndexMap<u64, Enumerator>,
348 |     /// Location in debug info.
349 |     pub offset: gimli::UnitSectionOffset,
350 | }
351 | 
352 | /// An array type.
353 | ///
354 | /// An array consists of an element type and a count. Not all array types in
355 | /// DWARF have counts, but in Rust, they do.
356 | ///
357 | /// Array types can also technically have a `lower_bound` that is not 0, but in
358 | /// practice to observe this you need to link with a Modula or Fortran binary.
359 | #[derive(Debug, Clone)]
360 | pub struct Array {
361 |     /// Type of elements of the array.
362 |     pub element_type_id: TypeId,
363 |     /// Type of the array index. This is synthetic and rarely useful; all Rust
364 |     /// arrays point to the same index type.
365 |     pub index_type_id: TypeId,
366 |     /// First index in the array. Always 0 in Rust and C.
367 |     pub lower_bound: u64,
368 |     /// Number of elements in the array, if specified.
369 |     pub count: Option<u64>,
370 |     /// Location in debug info.
371 |     pub offset: gimli::UnitSectionOffset,
372 | }
373 | 
374 | /// A pointer type.
375 | ///
376 | /// There are many flavors of pointers -- `const`, not-`const`, Rust references,
377 | /// C raw pointers, etc. This models them all. The differences between them are
378 | /// not present in DWARF -- though they can be inferred from the `name`.
379 | ///
380 | /// Pointer size is implicit and fixed for the whole program; it can be queried
381 | /// from the `DebugDb` instance.
382 | #[derive(Debug, Clone)]
383 | pub struct Pointer {
384 |     /// Type of data this points _to_.
385 |     pub type_id: TypeId,
386 |     /// Name of the pointer type. Compilers don't name all pointer types.
387 |     pub name: Option<String>,
388 |     /// Location in debug info.
389 |     pub offset: gimli::UnitSectionOffset,
390 | }
391 | 
392 | /// A C-style non-tagged union.
393 | ///
394 | /// A union has multiple members, like a struct, except that those members are
395 | /// overlaid in memory, and only one is valid at a time. Unlike an `Enum`, there
396 | /// is no information in union to tell you _which_ variant is valid.
397 | #[derive(Debug, Clone)]
398 | pub struct Union {
399 |     /// Name of this union type.
400 |     pub name: String,
401 |     /// Size of a value of this union type, in bytes.
402 |     pub byte_size: u64,
403 |     /// Alignment required for a value of this union type, in bytes.
404 |     pub alignment: u64,
405 |     /// If this union is generic, this contains an array of template type
406 |     /// parameters. If it is not generic, this is empty.
407 |     pub template_type_parameters: Vec<TemplateTypeParameter>,
408 |     /// Members of the union in declaration order.
409 |     pub members: Vec<Member>,
410 |     /// Location in debug info.
411 |     pub offset: gimli::UnitSectionOffset,
412 | }
413 | 
414 | /// A subroutine type. Note that this is different from a `Subprogram` -- this
415 | /// is used as the pointed-to type for function pointers.
416 | #[derive(Clone, Debug)]
417 | pub struct Subroutine {
418 |     /// Type of value returned, if any. In both C and Rust, functions that
419 |     /// return nothing (`void` and `()`, respectively) have no return type,
420 |     /// rather than `Some(typeid_of_void)`.
421 |     pub return_type_id: Option<TypeId>,
422 |     /// Types of parameters to a routine of this type.
423 |     pub formal_parameters: Vec<TypeId>,
424 |     /// Location in debug info.
425 |     pub offset: gimli::UnitSectionOffset,
426 | }
427 | 
428 | /// A type that was not found in the debug info.
429 | ///
430 | /// Usually this is because it's not actually used in the program, and only
431 | /// indirectly referenced.
432 | #[derive(Debug, Clone)]
433 | pub struct Unresolved {
434 |     /// Location in debug info.
435 |     pub offset: gimli::UnitSectionOffset,
436 | }
437 | 
438 | /// Possible encodings for a `Base` type.
439 | #[derive(Copy, Clone, Debug, Eq, PartialEq)]
440 | pub enum Encoding {
441 |     /// Unsigned integer.
442 |     Unsigned,
443 |     /// Signed integer.
444 |     Signed,
445 |     /// Unsigned char. This is used for Rust `char` (with `byte_size == 4`) as
446 |     /// well as for C `unsigned char` (`byte_size == 1`) and sometimes for C
447 |     /// `char` depending on the platform ABI because reasons.
448 |     UnsignedChar,
449 |     /// Unsigned char. This is used for C `unsigned char` (`byte_size == 1`) and
450 |     /// sometimes for C `char` depending on the platform ABI because reasons.
451 |     SignedChar,
452 |     /// Boolean -- 0 is false, non-zero is true.
453 |     ///
454 |     /// In Rust, true is always 1, but DWARF doesn't seem to mandate that, and
455 |     /// so here we are.
456 |     Boolean,
457 |     /// IEEE754 floating point number.
458 |     Float,
459 |     /// IEEE754 complex floating point number, i.e. probably a pair of floats.
460 |     /// Support for this encoding is currently somewhat limited as none of our
461 |     /// programs use complex floats.
462 |     ///
463 |     /// Note that this encoding is specific to the `__Complex` C language
464 |     /// extension, and is _not used_ for Rust complex numbers.
465 |     ComplexFloat,
466 | 
467 |     UtfChar,
468 | }
469 | 
470 | /// Information on a type parameter binding for an instance of a generic type.
471 | ///
472 | /// This is called "Template Type Parameter" because that's what DWARF calls it,
473 | /// because DWARF is rather C-specific.
474 | #[derive(Debug, Clone)]
475 | pub struct TemplateTypeParameter {
476 |     /// Name of parameter.
477 |     pub name: String,
478 |     /// Type the parameter is bound to.
479 |     pub type_id: TypeId,
480 | }
481 | 
482 | /// A component of a struct or union.
483 | #[derive(Debug, Clone, Eq, PartialEq)]
484 | pub struct Member {
485 |     /// Name of the member. Not all members have names, though in Rust they all
486 |     /// do.
487 |     pub name: Option<String>,
488 |     /// If `true`, this member is compiler-generated and will not make very much
489 |     /// sense to the user.
490 |     pub artificial: bool,
491 |     /// Type of data stored in this member.
492 |     pub type_id: TypeId,
493 |     /// Alignment specified for this member. If missing, check the alignment for
494 |     /// `type_id`.
495 |     pub alignment: Option<u64>,
496 |     /// Offset of this member within the enclosing type.
497 |     pub location: u64,
498 |     /// Location in debug info.
499 |     pub offset: gimli::UnitSectionOffset,
500 |     pub decl_coord: DeclCoord,
501 | }
502 | 
503 | /// Description of the potential variant shapes for a Rust-style enum (tagged
504 | /// union).
505 | #[derive(Debug, Clone)]
506 | pub enum VariantShape {
507 |     /// The enum has no variants. No discriminator member has been generated.
508 |     /// These enums are typically zero-sized.
509 |     Zero,
510 |     /// The enum contains only one variant, and so the compiler has not
511 |     /// generated a discriminator member, because it would go unused. The
512 |     /// `Variant` is embedded directly.
513 |     One(Variant),
514 |     /// The enum contains a discriminator. This generally implies that there are
515 |     /// two or more variants, though nothing in the spec requires this.
516 |     Many {
517 |         /// Location of the definition of the discriminator in debug info.
518 |         discr: gimli::UnitSectionOffset,
519 |         /// Member describing the discriminator. Note that this member will
520 |         /// typically be nameless.
521 |         member: Member,
522 |         /// Variants that may be selected depending on the value of the
523 |         /// discriminator. The key `None` is used for a "default" `Variant` that
524 |         /// is chosen if none of the explicit values match; this is used to
525 |         /// implement various enum layout optimizations in Rust.
526 |         variants: IndexMap<Option<u64>, Variant>,
527 |     },
528 | }
529 | 
530 | /// A variant of a Rust-style enum.
531 | #[derive(Debug, Clone)]
532 | pub struct Variant {
533 |     /// Member containing the variant's data. An enum in Rust that is not
534 |     /// C-style always has data in every variant, but if the variant has no
535 |     /// fields from the user's perspective, the embedded data will be an empty
536 |     /// struct.
537 |     pub member: Member,
538 |     /// Location in debug info.
539 |     pub offset: gimli::UnitSectionOffset,
540 |     pub decl_coord: DeclCoord,
541 | }
542 | 
543 | /// One of the options in a C-style enum type.
544 | #[derive(Debug, Clone)]
545 | pub struct Enumerator {
546 |     /// Name of this variant.
547 |     pub name: String,
548 |     /// Numeric value associated with this invariant.
549 |     pub const_value: u64,
550 |     /// Location in debug info.
551 |     pub offset: gimli::UnitSectionOffset,
552 | }
553 | 
554 | /// A function or subroutine in a program.
555 | ///
556 | /// Note that this is different from `Subroutine`, which defines the _type_ of a
557 | /// function; this defines the _identity_ of a function.
558 | #[derive(Clone, Debug)]
559 | pub struct Subprogram {
560 |     /// Name of the subprogram. Not all subprograms have names. TODO: why not?
561 |     pub name: Option<String>,
562 |     /// Range of PC values that are contained within the code generated for this
563 |     /// subprogram, when code has been generated at the top level (i.e. the
564 |     /// subprogram is not inlined).
565 |     ///
566 |     /// Subprograms that are completely inlined will often have nonsense
567 |     /// `pc_range` values starting at address 0.
568 |     pub pc_range: Option<std::ops::Range<u64>>,
569 |     /// Location of the declaration of this subprogram in the source.
570 |     pub decl_coord: DeclCoord,
571 |     /// If this subprogram is an instance of a generic subprogram, this provides
572 |     /// the bindings for the type parameters. If this subprogram is not generic,
573 |     /// this is empty.
574 |     pub template_type_parameters: Vec<TemplateTypeParameter>,
575 |     /// Type returned by subprogram, or `None` for `()`/`void`.
576 |     pub return_type_id: Option<TypeId>,
577 |     /// Information about parameters needed by this subprogram.
578 |     pub formal_parameters: Vec<SubParameter>,
579 |     /// Subprograms that have been inlined into this one.
580 |     pub inlines: Vec<InlinedSubroutine>,
581 |     /// If this subprogram represents a specialization of another, this provides
582 |     /// a link to the prototype. The prototype may have information that this
583 |     /// record does not, such as a valid name.
584 |     pub abstract_origin: Option<gimli::UnitSectionOffset>,
585 |     /// Actual symbol name used to refer to this subprogram, if it is different
586 |     /// from `name` -- which it tends to be in languages with hierarchical
587 |     /// namespaces.
588 |     pub linkage_name: Option<String>,
589 |     /// If `true`, this subprogram is expected not to return, meaning that any
590 |     /// code after a call to this subprogram is theoretically unreachable.
591 |     ///
592 |     /// In Rust, `noreturn` functions tend to have `!` as their return type.
593 |     pub noreturn: bool,
594 |     /// Location in debug info.
595 |     pub offset: gimli::UnitSectionOffset,
596 | }
597 | 
598 | /// Parameter to a subprogram.
599 | ///
600 | /// This is more detailed than the `formal_parameters` used for function type
601 | /// definitions.
602 | ///
603 | /// Note that it's common for subprogram parameters to be abstract. In that
604 | /// case, most useful content will be missing from `SubParameter`, and you'll
605 | /// need to go consult the `abstract_origin`.
606 | #[derive(Clone, Debug)]
607 | pub struct SubParameter {
608 |     /// Name of parameter, if available.
609 |     pub name: Option<String>,
610 |     /// Location of declaration of this parameter in the source.
611 |     pub decl_coord: DeclCoord,
612 |     /// Type of the parameter, if available.
613 |     pub type_id: Option<TypeId>,
614 |     /// Reference to a different `SubParameter` that this specializes.
615 |     pub abstract_origin: Option<gimli::UnitSectionOffset>,
616 |     /// Fixed value for this parameter. This can happen in cases where a
617 |     /// specialized `Subprogram` fixes one or more parameter values to
618 |     /// constants.
619 |     ///
620 |     /// TODO: type probably needs to be more general.
621 |     pub const_value: Option<u64>,
622 |     /// Location in debug info.
623 |     pub offset: gimli::UnitSectionOffset,
624 | }
625 | 
626 | /// File "coordinates" -- path, line number, column number.
627 | ///
628 | /// Note that, in accordance with tradition, both lines and columns are numbered
629 | /// starting at one.
630 | #[derive(Clone, Debug, Default, PartialEq, Eq)]
631 | pub struct DeclCoord {
632 |     /// Path to source file, if available.
633 |     pub file: Option<String>,
634 |     /// Line number, if available.
635 |     pub line: Option<NonZeroU64>,
636 |     /// Column number, if available.
637 |     pub column: Option<NonZeroU64>,
638 | }
639 | 
640 | impl DeclCoord {
641 |     pub fn is_useful(&self) -> bool {
642 |         self.file.is_some() || self.line.is_some() || self.column.is_some()
643 |     }
644 | }
645 | 
646 | /// Information about a subroutine that has been inlined into a subprogram.
647 | #[derive(Clone, Debug)]
648 | pub struct InlinedSubroutine {
649 |     /// Location of the subprogram abstract root that defines this.
650 |     pub abstract_origin: Option<gimli::UnitSectionOffset>,
651 |     /// Ranges of PC values that are included in this inlined subroutine.
652 |     pub pc_ranges: Vec<gimli::Range>,
653 |     /// Location of the callsite that was inlined.
654 |     pub call_coord: DeclCoord,
655 |     /// Further inlined subroutines within this one.
656 |     pub inlines: Vec<InlinedSubroutine>,
657 |     /// Definition of the formal parameters to this inlined subroutine.
658 |     pub formal_parameters: Vec<SubParameter>,
659 |     /// Location in debug info.
660 |     pub offset: gimli::UnitSectionOffset,
661 | }
662 | 
663 | /// A row of the computed line number table.
664 | #[derive(Clone, Debug)]
665 | pub struct LineNumberRow {
666 |     /// Range of PC values that should use this entry.
667 |     pub pc_range: std::ops::Range<u64>,
668 |     /// Filename.
669 |     pub file: String,
670 |     /// Line number, if available.
671 |     pub line: Option<NonZeroU64>,
672 |     /// Column number, if available.
673 |     pub column: Option<NonZeroU64>,
674 | }
675 | 
676 | /// Information about a static stack frame associated with a PC value.
677 | ///
678 | /// TODO: the name of this type should become more meaningful as we learn how it
679 | /// is used.
680 | pub struct PcInfo {
681 |     /// Subprogram being run.
682 |     pub subprogram: ProgramId,
683 |     /// File containing code being run.
684 |     pub file: String,
685 |     /// Line number of code being run, if available.
686 |     pub line: Option<NonZeroU64>,
687 |     /// Column number of code being run, if available.
688 |     pub column: Option<NonZeroU64>,
689 | }
690 | 
691 | /// A static variable with a fixed address.
692 | #[derive(Clone, Debug)]
693 | pub struct StaticVariable {
694 |     /// Name of variable.
695 |     pub name: String,
696 |     /// Type contained in variable.
697 |     pub type_id: TypeId,
698 |     /// Location of variable declaration.
699 |     pub decl: DeclCoord,
700 |     /// Address in memory.
701 |     pub location: u64,
702 |     /// Location in debug info.
703 |     pub offset: gimli::UnitSectionOffset,
704 | }
705 | 
706 | pub trait Equiv {
707 |     /// Tests if `self` and `other` are structurally equivalent, such that they
708 |     /// could be unified into a single definition despite appearing in separate
709 |     /// compilation units.
710 |     ///
711 |     /// Returns `None` if there is no way to make the definitions match, or
712 |     /// `Some(tids)` if the definitions match if all the types in `tids` are
713 |     /// also equivalent to each other.
714 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>>;
715 | }
716 | 
717 | impl Equiv for TypeId {
718 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
719 |         Some(vec![(*self, *other)])
720 |     }
721 | }
722 | 
723 | impl Equiv for Member {
724 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
725 |         let self_easy = (&self.name, self.artificial, self.alignment, self.location);
726 |         let other_easy = (&other.name, other.artificial, other.alignment, other.location);
727 |         if self_easy != other_easy {
728 |             return None;
729 |         }
730 | 
731 |         Some(vec![(self.type_id, other.type_id)])
732 |     }
733 | }
734 | 
735 | impl Equiv for Variant {
736 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
737 |         self.member.equiv(&other.member)
738 |     }
739 | }
740 | 
741 | impl Equiv for VariantShape {
742 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
743 |         match (self, other) {
744 |             (Self::Zero, Self::Zero) => Some(vec![]),
745 |             (Self::One(a), Self::One(b)) => a.equiv(b),
746 |             (Self::Many { member: ma, variants: va, .. }, Self::Many { member: mb, variants: vb, .. }) => {
747 |                 let mut conditions = vec![];
748 |                 conditions.extend(ma.equiv(mb)?);
749 |                 conditions.extend(va.equiv(vb)?);
750 |                 Some(conditions)
751 |             }
752 |             _ => None,
753 |         }
754 |     }
755 | }
756 | 
757 | impl Equiv for TemplateTypeParameter {
758 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
759 |         if self.name != other.name {
760 |             return None;
761 |         }
762 | 
763 |         Some(vec![(self.type_id, other.type_id)])
764 |     }
765 | }
766 | 
767 | impl<T> Equiv for Vec<T>
768 |     where T: Equiv,
769 | {
770 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
771 |         if self.len() != other.len() {
772 |             return None;
773 |         }
774 | 
775 |         let mut conditions = vec![];
776 |         for (a, b) in self.iter().zip(other) {
777 |             conditions.extend(a.equiv(b)?);
778 |         }
779 |         Some(conditions)
780 |     }
781 | }
782 | 
783 | impl<T> Equiv for Option<T>
784 |     where T: Equiv,
785 | {
786 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
787 |         match (self, other) {
788 |             (Some(a), Some(b)) => a.equiv(b),
789 |             _ => None
790 |         }
791 |     }
792 | }
793 | 
794 | impl<K, T> Equiv for IndexMap<K, T>
795 |     where T: Equiv,
796 |           K: Eq + Hash,
797 | {
798 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
799 |         if self.len() != other.len() {
800 |             return None;
801 |         }
802 | 
803 |         let mut conditions = vec![];
804 |         for (ak, a) in self {
805 |             conditions.extend(a.equiv(other.get(ak)?)?);
806 |         }
807 |         Some(conditions)
808 |     }
809 | }
810 | 
811 | impl Equiv for Struct {
812 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
813 |         let self_easy = (&self.name, self.byte_size, self.alignment, self.tuple_like);
814 |         let other_easy = (&other.name, other.byte_size, other.alignment, other.tuple_like);
815 |         if self_easy != other_easy {
816 |             return None;
817 |         }
818 | 
819 |         let mut conditions = vec![];
820 |         conditions.extend(self.template_type_parameters.equiv(&other.template_type_parameters)?);
821 |         conditions.extend(self.members.equiv(&other.members)?);
822 | 
823 |         Some(conditions)
824 |     }
825 | }
826 | 
827 | impl Equiv for Union {
828 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
829 |         let self_easy = (&self.name, self.byte_size, self.alignment);
830 |         let other_easy = (&other.name, other.byte_size, other.alignment);
831 |         if self_easy != other_easy {
832 |             return None;
833 |         }
834 | 
835 |         let mut conditions = vec![];
836 |         conditions.extend(self.template_type_parameters.equiv(&other.template_type_parameters)?);
837 |         conditions.extend(self.members.equiv(&other.members)?);
838 | 
839 |         Some(conditions)
840 |     }
841 | }
842 | 
843 | impl Equiv for Enum {
844 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
845 |         let self_easy = (&self.name, self.byte_size, self.alignment);
846 |         let other_easy = (&other.name, other.byte_size, other.alignment);
847 |         if self_easy != other_easy {
848 |             return None;
849 |         }
850 | 
851 |         let mut conditions = vec![];
852 |         conditions.extend(self.template_type_parameters.equiv(&other.template_type_parameters)?);
853 |         conditions.extend(self.shape.equiv(&other.shape)?);
854 | 
855 |         Some(conditions)
856 |     }
857 | }
858 | 
859 | impl Equiv for Pointer {
860 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
861 |         if self.name != other.name {
862 |             // TODO: should this allow for one unnamed type?
863 |             return None;
864 |         }
865 | 
866 |         Some(vec![(self.type_id, other.type_id)])
867 |     }
868 | }
869 | 
870 | impl Equiv for Base {
871 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
872 |         let self_easy = (&self.name, self.encoding, self.byte_size, self.alignment);
873 |         let other_easy = (&other.name, other.encoding, other.byte_size, other.alignment);
874 |         if self_easy != other_easy {
875 |             return None;
876 |         }
877 | 
878 |         Some(vec![])
879 |     }
880 | }
881 | 
882 | impl Equiv for Array {
883 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
884 |         if self.lower_bound != other.lower_bound || self.count != other.count {
885 |             return None;
886 |         }
887 | 
888 |         Some(vec![
889 |             (self.element_type_id, other.element_type_id),
890 |             (self.index_type_id, other.index_type_id),
891 |         ])
892 |     }
893 | }
894 | 
895 | impl Equiv for Enumerator {
896 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
897 |         if self.name != other.name || self.const_value != other.const_value {
898 |             return None;
899 |         }
900 |         Some(vec![])
901 |     }
902 | }
903 | 
904 | impl Equiv for CEnum {
905 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
906 |         let self_easy = (&self.name, self.enum_class, self.byte_size, self.alignment);
907 |         let other_easy = (&other.name, other.enum_class, other.byte_size, other.alignment);
908 |         if self_easy != other_easy {
909 |             return None;
910 |         }
911 | 
912 |         self.enumerators.equiv(&other.enumerators)
913 |     }
914 | }
915 | 
916 | impl Equiv for Subroutine {
917 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
918 |         let mut conditions = vec![];
919 |         conditions.extend(self.return_type_id.equiv(&other.return_type_id)?);
920 |         conditions.extend(self.formal_parameters.equiv(&other.formal_parameters)?);
921 |         Some(conditions)
922 |     }
923 | }
924 | 
925 | impl Equiv for Type {
926 |     fn equiv(&self, other: &Self) -> Option<Vec<(TypeId, TypeId)>> {
927 |         match (self, other) {
928 |             (Self::Struct(a), Self::Struct(b)) => a.equiv(b),
929 |             (Self::Enum(a), Self::Enum(b)) => a.equiv(b),
930 |             (Self::Pointer(a), Self::Pointer(b)) => a.equiv(b),
931 |             (Self::Base(a), Self::Base(b)) => a.equiv(b),
932 |             (Self::Array(a), Self::Array(b)) => a.equiv(b),
933 |             (Self::CEnum(a), Self::CEnum(b)) => a.equiv(b),
934 |             (Self::Union(a), Self::Union(b)) => a.equiv(b),
935 |             (Self::Subroutine(a), Self::Subroutine(b)) => a.equiv(b),
936 |             _ => None,
937 |         }
938 |     }
939 | }
940 | 


--------------------------------------------------------------------------------
/src/unify.rs:
--------------------------------------------------------------------------------
  1 | use crate::TypeId;
  2 | use crate::model::*;
  3 | use indexmap::IndexMap;
  4 | use core::hash::Hash;
  5 | use std::collections::BTreeMap;
  6 | 
  7 | #[derive(Clone)]
  8 | pub struct State<'a> {
  9 |     /// Substitution map. An entry `(key, value)` in this map means that the
 10 |     /// type identified by `key` has been found to be equivalent to earlier type
 11 |     /// `value`, for canonicalization purposes.
 12 |     subs: BTreeMap<TypeId, TypeId>,
 13 | 
 14 |     types: &'a BTreeMap<TypeId, Type>,
 15 | }
 16 | 
 17 | impl<'a> State<'a> {
 18 |     pub fn new(types: &'a BTreeMap<TypeId, Type>) -> Self {
 19 |         Self {
 20 |             subs: BTreeMap::new(),
 21 |             types,
 22 |         }
 23 |     }
 24 | 
 25 |     pub fn merge(&mut self, other: Self) {
 26 |         for (k, v) in other.subs {
 27 |             self.equate(k, v);
 28 |         }
 29 |     }
 30 | 
 31 |     /// Iteratively applies substitutions to `t` until a type with no
 32 |     /// substitutions is found.
 33 |     pub fn canonicalize(&self, t: TypeId) -> TypeId {
 34 |         let mut result = t;
 35 |         while let Some(next) = self.subs.get(&result) {
 36 |             result = *next;
 37 |         }
 38 |         result
 39 |     }
 40 | 
 41 |     pub fn is_subbed(&self, t: TypeId) -> bool {
 42 |         self.subs.contains_key(&t)
 43 |     }
 44 | 
 45 |     pub fn find_type(&self, t: TypeId) -> &'a Type {
 46 |         &self.types[&self.canonicalize(t)]
 47 |     }
 48 | 
 49 |     pub fn finish(self) -> BTreeMap<TypeId, TypeId> {
 50 |         let mut result = BTreeMap::new();
 51 |         for &t in self.types.keys() {
 52 |             let c = self.canonicalize(t);
 53 |             // Prune.
 54 |             if c != t {
 55 |                 result.insert(t, self.canonicalize(t));
 56 |             }
 57 |         }
 58 |         result
 59 |     }
 60 | 
 61 |     /// Unifies `a` and `b` such that they will look up to the same typeid in
 62 |     /// the future. The "canonical" type is the lower number of the two.
 63 |     ///
 64 |     /// This does no checking of similarity of `a` and `b`.
 65 |     pub fn equate(&mut self, a: TypeId, b: TypeId) {
 66 |         let ca = self.canonicalize(a);
 67 |         let cb = self.canonicalize(b);
 68 |         match ca.cmp(&cb) {
 69 |             std::cmp::Ordering::Less => {
 70 |                 self.subs.insert(cb, ca);
 71 |             }
 72 |             std::cmp::Ordering::Equal => (),
 73 |             std::cmp::Ordering::Greater => {
 74 |                 self.subs.insert(ca, cb);
 75 |             }
 76 |         }
 77 |     }
 78 | 
 79 |     fn checkpoint(&mut self, body: impl FnOnce(&mut Self) -> bool) -> bool {
 80 |         let mut cp = self.clone();
 81 |         if body(&mut cp) {
 82 |             *self = cp;
 83 |             true
 84 |         } else {
 85 |             false
 86 |         }
 87 |     }
 88 | }
 89 | 
 90 | pub trait Unify {
 91 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool;
 92 | }
 93 | 
 94 | impl<T> Unify for Vec<T>
 95 |     where T: Unify,
 96 | {
 97 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
 98 |         if self.len() != other.len() {
 99 |             return false;
100 |         }
101 | 
102 |         state.checkpoint(|state| {
103 |             self.iter().zip(other).all(|(a, b)| a.try_unify(b, state))
104 |         })
105 |     }
106 | }
107 | 
108 | impl<T> Unify for Option<T>
109 |     where T: Unify,
110 | {
111 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
112 |         match (self, other) {
113 |             (Some(a), Some(b)) => a.try_unify(b, state),
114 |             _ => false
115 |         }
116 |     }
117 | }
118 | 
119 | impl<K, T> Unify for IndexMap<K, T>
120 |     where T: Unify,
121 |           K: Eq + Hash,
122 | {
123 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
124 |         if self.len() != other.len() {
125 |             return false;
126 |         }
127 | 
128 |         state.checkpoint(|state| {
129 |             self.iter().all(|(ak, a)| a.try_unify(&other[ak], state))
130 | 
131 |         })
132 |     }
133 | }
134 | 
135 | impl Unify for TypeId {
136 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
137 |         let cself = state.canonicalize(*self);
138 |         let cother = state.canonicalize(*other);
139 | 
140 |         if cself == cother {
141 |             return true;
142 |         }
143 | 
144 |         state.checkpoint(|state| {
145 |             // Insert a provisional substitution.
146 |             state.equate(cself, cother);
147 |             // Attempt recursive unification.
148 |             state.find_type(cself).try_unify(state.find_type(cother), state)
149 |         })
150 |     }
151 | }
152 | 
153 | impl Unify for Member {
154 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
155 |         let self_easy = (&self.name, self.artificial, self.alignment, self.location);
156 |         let other_easy = (&other.name, other.artificial, other.alignment, other.location);
157 |         if self_easy != other_easy {
158 |             return false;
159 |         }
160 | 
161 |         self.type_id.try_unify(&other.type_id, state)
162 |     }
163 | }
164 | 
165 | impl Unify for Variant {
166 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
167 |         self.member.try_unify(&other.member, state)
168 |     }
169 | }
170 | 
171 | impl Unify for VariantShape {
172 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
173 |         match (self, other) {
174 |             (Self::Zero, Self::Zero) => true,
175 |             (Self::One(a), Self::One(b)) => a.try_unify(b, state),
176 |             (
177 |                 Self::Many { member: ma, variants: va, .. },
178 |                 Self::Many { member: mb, variants: vb, .. },
179 |             ) => {
180 |                 state.checkpoint(|state| {
181 |                     ma.try_unify(mb, state)
182 |                         && va.try_unify(vb, state)
183 |                 })
184 |             }
185 |             _ => false,
186 |         }
187 |     }
188 | }
189 | 
190 | impl Unify for TemplateTypeParameter {
191 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
192 |         if self.name != other.name {
193 |             return false;
194 |         }
195 | 
196 |         self.type_id.try_unify(&other.type_id, state)
197 |     }
198 | }
199 | 
200 | impl Unify for Struct {
201 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
202 |         let self_easy = (&self.name, self.byte_size, self.alignment, self.tuple_like);
203 |         let other_easy = (&other.name, other.byte_size, other.alignment, other.tuple_like);
204 |         if self_easy != other_easy {
205 |             return false;
206 |         }
207 | 
208 |         state.checkpoint(|state| {
209 |             self.template_type_parameters.try_unify(
210 |                 &other.template_type_parameters,
211 |                 state,
212 |             ) && self.members.try_unify(&other.members, state)
213 |         })
214 |     }
215 | }
216 | 
217 | impl Unify for Union {
218 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
219 |         let self_easy = (&self.name, self.byte_size, self.alignment);
220 |         let other_easy = (&other.name, other.byte_size, other.alignment);
221 |         if self_easy != other_easy {
222 |             return false;
223 |         }
224 | 
225 |         state.checkpoint(|state| {
226 |             self.template_type_parameters.try_unify(
227 |                 &other.template_type_parameters,
228 |                 state,
229 |             ) && self.members.try_unify(&other.members, state)
230 |         })
231 |     }
232 | }
233 | 
234 | impl Unify for Enum {
235 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
236 |         let self_easy = (&self.name, self.byte_size, self.alignment);
237 |         let other_easy = (&other.name, other.byte_size, other.alignment);
238 |         if self_easy != other_easy {
239 |             return false;
240 |         }
241 | 
242 |         state.checkpoint(|state| {
243 |             self.template_type_parameters.try_unify(
244 |                 &other.template_type_parameters,
245 |                 state,
246 |             ) && self.shape.try_unify(&other.shape, state)
247 |         })
248 |     }
249 | }
250 | 
251 | impl Unify for Pointer {
252 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
253 |         if self.name != other.name {
254 |             // TODO: should this allow for one unnamed type?
255 |             return false;
256 |         }
257 | 
258 |         self.type_id.try_unify(&other.type_id, state)
259 |     }
260 | }
261 | 
262 | impl Unify for Base {
263 |     fn try_unify(&self, other: &Self, _state: &mut State<'_>) -> bool {
264 |         let self_easy = (&self.name, self.encoding, self.byte_size, self.alignment);
265 |         let other_easy = (&other.name, other.encoding, other.byte_size, other.alignment);
266 |         self_easy == other_easy
267 |     }
268 | }
269 | 
270 | impl Unify for Array {
271 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
272 |         if self.lower_bound != other.lower_bound || self.count != other.count {
273 |             return false;
274 |         }
275 | 
276 |         state.checkpoint(|state| {
277 |             self.element_type_id.try_unify(&other.element_type_id, state)
278 |                 && self.index_type_id.try_unify(&other.index_type_id, state)
279 |         })
280 |     }
281 | }
282 | 
283 | impl Unify for Enumerator {
284 |     fn try_unify(&self, other: &Self, _state: &mut State<'_>) -> bool {
285 |         self.name == other.name && self.const_value == other.const_value
286 |     }
287 | }
288 | 
289 | impl Unify for CEnum {
290 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
291 |         let self_easy = (&self.name, self.enum_class, self.byte_size, self.alignment);
292 |         let other_easy = (&other.name, other.enum_class, other.byte_size, other.alignment);
293 |         if self_easy != other_easy {
294 |             return false;
295 |         }
296 | 
297 |         self.enumerators.try_unify(&other.enumerators, state)
298 |     }
299 | }
300 | 
301 | impl Unify for Subroutine {
302 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
303 |         state.checkpoint(|state| {
304 |             self.return_type_id.try_unify(&other.return_type_id, state)
305 |                 && self.formal_parameters.try_unify(&other.formal_parameters, state)
306 |         })
307 |     }
308 | }
309 | 
310 | impl Unify for Type {
311 |     fn try_unify(&self, other: &Self, state: &mut State<'_>) -> bool {
312 |         match (self, other) {
313 |             (Self::Struct(a), Self::Struct(b)) => a.try_unify(b, state),
314 |             (Self::Enum(a), Self::Enum(b)) => a.try_unify(b, state),
315 |             (Self::Pointer(a), Self::Pointer(b)) => a.try_unify(b, state),
316 |             (Self::Base(a), Self::Base(b)) => a.try_unify(b, state),
317 |             (Self::Array(a), Self::Array(b)) => a.try_unify(b, state),
318 |             (Self::CEnum(a), Self::CEnum(b)) => a.try_unify(b, state),
319 |             (Self::Union(a), Self::Union(b)) => a.try_unify(b, state),
320 |             (Self::Subroutine(a), Self::Subroutine(b)) => a.try_unify(b, state),
321 |             _ => false,
322 |         }
323 |     }
324 | }
325 | 


--------------------------------------------------------------------------------
/src/value.rs:
--------------------------------------------------------------------------------
  1 | //! Abstract, dynamic, JSON-like representation of Rust values.
  2 | //!
  3 | //! This can be read from a program image using `Load` even if the program doing
  4 | //! the reading doesn't know the type shape in advance.
  5 | 
  6 | use regex::Regex;
  7 | 
  8 | use crate::load::{choose_variant, load_unsigned, Load, LoadError, Machine};
  9 | use crate::{Encoding, Type, DebugDb, TypeId, EntityId};
 10 | use std::borrow::Cow;
 11 | use std::convert::TryFrom;
 12 | use std::fmt::Display;
 13 | use std::collections::{BTreeSet, BTreeMap};
 14 | 
 15 | #[derive(Clone, Debug)]
 16 | pub enum Value {
 17 |     Array(Vec<Value>),
 18 |     Base(Base),
 19 |     Struct(Struct),
 20 |     CEnum(CEnum),
 21 |     Enum(Enum),
 22 |     Pointer(Pointer),
 23 | }
 24 | 
 25 | impl Value {
 26 |     pub fn u64_value(&self) -> Option<u64> {
 27 |         if let Self::Base(b) = self {
 28 |             match b {
 29 |                 Base::U8(x) => return Some(u64::from(*x)),
 30 |                 Base::U32(x) => return Some(u64::from(*x)),
 31 |                 Base::U64(x) => return Some(*x),
 32 |                 _ => (),
 33 |             }
 34 |         }
 35 |         None
 36 |     }
 37 | 
 38 |     pub fn pointer_value(&self) -> Option<u64> {
 39 |         let Self::Pointer(p) = self else { return None; };
 40 |         Some(p.value)
 41 |     }
 42 | 
 43 |     pub fn newtype(&self, name: &str) -> Option<&Value> {
 44 |         let Self::Struct(s) = self else { return None };
 45 |         if s.name != name { return None; }
 46 |         if s.members.len() != 1 { return None };
 47 |         s.any_member_named("__0")
 48 |     }
 49 | 
 50 |     pub fn type_name(&self) -> Cow<'_, str> {
 51 |         match self {
 52 |             Self::Array(es) => {
 53 |                 let elt_type = es.first()
 54 |                     .map(|v| v.type_name())
 55 |                     .unwrap_or("???".into());
 56 |                 format!("[{}; {}]", elt_type, es.len()).into()
 57 |             }
 58 |             Self::Base(b) => match b {
 59 |                 Base::U8(_) => "u8".into(),
 60 |                 Base::U32(_) => "u32".into(),
 61 |                 Base::U64(_) => "u64".into(),
 62 |                 Base::Bool(_) => "bool".into(),
 63 |                 Base::Unit => "()".into(),
 64 |             },
 65 |             Self::Struct(s) => (&s.name).into(),
 66 |             Self::CEnum(s) => (&s.name).into(),
 67 |             Self::Enum(s) => (&s.name).into(),
 68 |             Self::Pointer(s) => (&s.name).into(),
 69 |         }
 70 |     }
 71 | 
 72 |     pub fn collect_names(&self, set: &mut BTreeSet<String>) {
 73 |         match self {
 74 |             Self::Array(v) => for elt in v {
 75 |                 elt.collect_names(set);
 76 |             },
 77 |             Self::Base(_) => (),
 78 |             Self::Struct(s) => {
 79 |                 set.insert(s.name.clone());
 80 |                 for (_, value) in &s.members {
 81 |                     value.collect_names(set);
 82 |                 }
 83 |             }
 84 |             Self::CEnum(e) => {
 85 |                 set.insert(e.name.clone());
 86 |             }
 87 |             Self::Enum(e) => {
 88 |                 set.insert(e.name.clone());
 89 |                 // We are deliberately skipping the name of the variant struct.
 90 |                 for (_, value) in &e.value.members {
 91 |                     value.collect_names(set);
 92 |                 }
 93 |             }
 94 |             Self::Pointer(p) => {
 95 |                 set.insert(p.name.clone());
 96 |             }
 97 |         }
 98 |     }
 99 | 
100 |     fn text(&self, world: &DebugDb, indent: usize, use_table: &UseTable, f: &mut core::fmt::Formatter) -> core::fmt::Result {
101 |         match self {
102 |             Self::Base(b) => match b {
103 |                 Base::U8(x) => write!(f, "{x}_u8"),
104 |                 Base::U32(x) => write!(f, "{x}_u32"),
105 |                 Base::U64(x) => write!(f, "{x}_u64"),
106 |                 Base::Bool(0) => write!(f, "false"),
107 |                 Base::Bool(1) => write!(f, "true"),
108 |                 Base::Bool(x) => write!(f, "{x}_bool"),
109 |                 Base::Unit => write!(f, "()"),
110 |             },
111 |             Self::Pointer(p) => {
112 |                 let nearest = world.entities_by_address(p.value)
113 |                     .filter_map(|ar| if let EntityId::Var(v) = ar.entity {
114 |                         Some((v, ar.range.clone()))
115 |                     } else {
116 |                         None
117 |                     })
118 |                     .min_by_key(|(_, range)| range.start.abs_diff(p.value));
119 |                 if let Some((vid, _range)) = nearest {
120 |                     let var = world.static_variable_by_id(vid).unwrap();
121 |                     let name = &var.name;
122 |                     let prefix = if p.is_probably_mut() {
123 |                         "&mut "
124 |                     } else {
125 |                         "&"
126 |                     };
127 |                     write!(f, "{prefix}{name} /* {:#x} */ as {}", p.value, p.name)
128 |                 } else {
129 |                     write!(f, "{:#x} as {}", p.value, p.name)
130 |                 }
131 |             },
132 |             Self::CEnum(e) => write!(f, "{}::{}", use_table.rewrite(&e.name), e.disc),
133 |             Self::Array(v) => {
134 |                 // TODO: special-case bases for more compact printering
135 |                 writeln!(f, "[")?;
136 |                 for elt in v {
137 |                     write!(f, "{:indent$}    ", "")?;
138 |                     elt.text(world, indent + 4, use_table, f)?;
139 |                     writeln!(f, ",")?;
140 |                 }
141 |                 write!(f, "{:indent$}]", "")
142 |             }
143 |             Self::Struct(s) => {
144 |                 if !display_dyn(world, s, f)? {
145 |                     write!(f, "{}", use_table.rewrite(&s.name))?;
146 |                     fmt_struct_body(s, world, indent, use_table, f)?;
147 |                 }
148 |                 Ok(())
149 |             }
150 |             Self::Enum(e) => {
151 |                 write!(f, "{}::{}", use_table.rewrite(&e.name), e.disc)?;
152 |                 fmt_struct_body(&e.value, world, indent, use_table, f)
153 |             }
154 |         }
155 |     }
156 | }
157 | 
158 | fn display_dyn(
159 |     world: &DebugDb,
160 |     s: &Struct,
161 |     f: &mut core::fmt::Formatter,
162 | ) -> Result<bool, core::fmt::Error> {
163 |     let dynptr = Regex::new(r#"^[&*](mut )?dyn (.*)$"#).unwrap();
164 |     if s.members.len() != 2 { return Ok(false); }
165 | 
166 |     let Some(c) = dynptr.captures(&s.name) else { return Ok(false); };
167 |     let _trait_name = &c[2];
168 |     let ismut = &c[1];
169 |     let Some((_, value)) = s.members.iter()
170 |         .find(|(name, _)| name.as_ref().map(String::as_str) == Some("vtable"))
171 |         else { return Ok(false); };
172 | 
173 |     let Some((_, dest)) = s.members.iter()
174 |         .find(|(name, _)| name.as_ref().map(String::as_str) == Some("pointer"))
175 |         else { return Ok(false); };
176 | 
177 |     let Some(addr) = value.pointer_value() else { return Ok(false); };
178 |     let Some(dest_addr) = dest.pointer_value() else { return Ok(false); };
179 | 
180 |     for e in world.entities_by_address(addr) {
181 |         if addr != e.range.start {
182 |             continue;
183 |         }
184 |         let EntityId::Var(v) = e.entity else { return Ok(false); };
185 |         let Some(v) = world.static_variable_by_id(v) else { return Ok(false); };
186 | 
187 |         let vtable = Regex::new(r#"^<(.*) as (.*)>::\{vtable\}$"#).unwrap();
188 |         let Some(vc) = vtable.captures(&v.name) else { return Ok(false); };
189 |         let concrete = &vc[1];
190 |         let trait_name = &vc[2];
191 | 
192 |         write!(f, "{dest_addr:#x} as &{ismut}{concrete} as &{ismut}dyn {trait_name}")?;
193 |         return Ok(true);
194 |     }
195 | 
196 |     Ok(false)
197 | }
198 | 
199 | fn fmt_struct_body(s: &Struct, world: &DebugDb, indent: usize, use_table: &UseTable, f: &mut core::fmt::Formatter) -> core::fmt::Result {
200 |     if s.members.is_empty() {
201 |         Ok(())
202 |     } else if s.is_tuple_like() {
203 |         if s.members.len() == 1 {
204 |             write!(f, "(")?;
205 |             for (_, value) in &s.members {
206 |                 value.text(world, indent, use_table, f)?;
207 |             }
208 |             write!(f, ")")
209 |         } else {
210 |             writeln!(f, "(")?;
211 |             for (_, value) in &s.members {
212 |                 write!(f, "{:indent$}    ", "")?;
213 |                 value.text(world, indent + 4, use_table, f)?;
214 |                 writeln!(f, ",")?;
215 |             }
216 |             write!(f, "{:indent$})", "")
217 |         }
218 |     } else {
219 |         writeln!(f, " {{")?;
220 |         for (name, value) in &s.members {
221 |             if let Some(name) = name {
222 |                 write!(f, "{:indent$}    {name}: ", "")?;
223 |             } else {
224 |                 write!(f, "{:indent$}    _: ", "")?;
225 |             }
226 |             value.text(world, indent + 4, use_table, f)?;
227 |             writeln!(f, ",")?;
228 |         }
229 |         write!(f, "{:indent$}}}", "")
230 |     }
231 | }
232 | 
233 | pub struct ValueWithDb<'a>(pub Value, pub &'a DebugDb);
234 | 
235 | impl Display for ValueWithDb<'_> {
236 |     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
237 |         let mut names = BTreeSet::new();
238 |         self.0.collect_names(&mut names);
239 |         let use_table = UseTable::new(names);
240 |         for (long, stub) in &use_table.0 {
241 |             if long == stub {
242 |                 writeln!(f, "use {long};")?;
243 |             } else {
244 |                 writeln!(f, "use {long} as {stub};")?;
245 |             }
246 |         }
247 |         self.0.text(self.1, 0, &use_table, f)
248 |     }
249 | }
250 | 
251 | struct UseTable(BTreeMap<String, String>);
252 | 
253 | impl UseTable {
254 |     fn new(names: BTreeSet<String>) -> Self {
255 |         let simple = Regex::new(r#"^([a-zA-Z_0-9{}#]+::)*([A-Za-z0-9_]+)$"#).unwrap();
256 |         let mut rewrites = BTreeMap::new();
257 |         let mut taken = BTreeSet::new();
258 |         for name in names {
259 |             if let Some(c) = simple.captures(&name) {
260 |                 let stub = &c[2];
261 |                 if !taken.contains(stub) {
262 |                     taken.insert(stub.to_string());
263 |                     rewrites.insert(name.clone(), stub.to_string());
264 |                 }
265 |             }
266 |         }
267 |         Self(rewrites)
268 |     }
269 | 
270 |     fn rewrite<'a>(&'a self, name: &'a str) -> &str {
271 |         self.0.get(name).map(String::as_str).unwrap_or(name)
272 |     }
273 | }
274 | 
275 | impl Load for Value {
276 |     fn from_state<M: Machine>(
277 |         machine: &M,
278 |         addr: u64,
279 |         world: &DebugDb,
280 |         ty: &Type,
281 |     ) -> Result<Self, LoadError<M::Error>> {
282 |         match ty {
283 |             Type::Base(_) => {
284 |                 Ok(Self::Base(Base::from_state(machine, addr, world, ty)?))
285 |             }
286 |             Type::Array(_) => {
287 |                 Ok(Self::Array(Vec::from_state(machine, addr, world, ty)?))
288 |             }
289 |             Type::Struct(_) => {
290 |                 Ok(Self::Struct(Struct::from_state(machine, addr, world, ty)?))
291 |             }
292 |             Type::CEnum(_) => {
293 |                 Ok(Self::CEnum(CEnum::from_state(machine, addr, world, ty)?))
294 |             }
295 |             Type::Enum(_) => {
296 |                 Ok(Self::Enum(Enum::from_state(machine, addr, world, ty)?))
297 |             }
298 |             Type::Pointer(_) => Ok(Self::Pointer(Pointer::from_state(
299 |                 machine, addr, world, ty,
300 |             )?)),
301 |             _ => unimplemented!(),
302 |         }
303 |     }
304 | }
305 | 
306 | #[derive(Copy, Clone, Debug)]
307 | pub enum Base {
308 |     Unit,
309 |     U8(u8),
310 |     U32(u32),
311 |     U64(u64),
312 |     Bool(u8),
313 | }
314 | 
315 | impl Base {
316 |     pub fn as_u64(self) -> Option<u64> {
317 |         match self {
318 |             Self::U8(x) => Some(u64::from(x)),
319 |             Self::U32(x) => Some(u64::from(x)),
320 |             Self::U64(x) => Some(x),
321 |             _ => None,
322 |         }
323 |     }
324 | }
325 | 
326 | impl Load for Base {
327 |     fn from_state<M: Machine>(
328 |         machine: &M,
329 |         addr: u64,
330 |         world: &DebugDb,
331 |         ty: &Type,
332 |     ) -> Result<Self, LoadError<M::Error>> {
333 |         let Type::Base(b) = ty else { return Err(LoadError::NotABase); };
334 |         match (b.encoding, b.byte_size) {
335 |             (Encoding::Unsigned, 1) => Ok(Base::U8(load_unsigned(
336 |                 world.endian(),
337 |                 machine,
338 |                 addr,
339 |                 1,
340 |             )?.ok_or(LoadError::DataUnavailable)? as u8)),
341 |             (Encoding::Unsigned, 4) => Ok(Base::U32(load_unsigned(
342 |                 world.endian(),
343 |                 machine,
344 |                 addr,
345 |                 4,
346 |             )?.ok_or(LoadError::DataUnavailable)? as u32)),
347 |             (Encoding::Unsigned, 8) => Ok(Base::U64(load_unsigned(
348 |                 world.endian(),
349 |                 machine,
350 |                 addr,
351 |                 8,
352 |             )?.ok_or(LoadError::DataUnavailable)?)),
353 |             (Encoding::Boolean, 1) => Ok(Base::Bool(load_unsigned(
354 |                 world.endian(),
355 |                 machine,
356 |                 addr,
357 |                 1,
358 |             )?.ok_or(LoadError::DataUnavailable)? as u8)),
359 |             (Encoding::Unsigned, 0) => Ok(Base::Unit),
360 |             _ => {
361 |                 println!("{:?} {}", b.encoding, b.byte_size);
362 |                 Err(LoadError::UnsupportedType)
363 |             },
364 |         }
365 |     }
366 | }
367 | 
368 | #[derive(Clone, Debug)]
369 | pub struct Struct {
370 |     pub name: String,
371 |     pub members: Vec<(Option<String>, Value)>,
372 | }
373 | 
374 | impl Struct {
375 |     // TODO: better to have a Value::Tuple and distinguish at creation
376 |     pub fn is_tuple_like(&self) -> bool {
377 |         for (name, _) in &self.members {
378 |             let Some(name) = name else { return false; };
379 |             if !name.starts_with("__") { return false; }
380 |             if name[2..].parse::<u32>().is_err() {
381 |                 return false;
382 |             }
383 |         }
384 |         true
385 |     }
386 | 
387 |     pub fn members_named<'s, 'n>(&'s self, name: &'n str) -> impl Iterator<Item = &'s Value> + 'n
388 |     where 's: 'n {
389 |         self.members.iter()
390 |             .filter(|(n, _)| n.as_deref() == Some(name))
391 |             .map(|(_, value)| value)
392 |     }
393 | 
394 |     pub fn unique_member_named<'s>(&'s self, name: &str) -> Option<&'s Value> {
395 |         let mut m = self.members_named(name);
396 |         let r = m.next()?;
397 |         if m.next().is_some() {
398 |             None
399 |         } else {
400 |             Some(r)
401 |         }
402 |     }
403 | 
404 |     pub fn any_member_named(&self, name: &str) -> Option<&Value> {
405 |         self.members.iter()
406 |             .find(|(n, _)| n.as_deref() == Some(name))
407 |             .map(|(_, v)| v)
408 |     }
409 | }
410 | 
411 | impl Load for Struct {
412 |     fn from_state<M: Machine>(
413 |         machine: &M,
414 |         addr: u64,
415 |         world: &DebugDb,
416 |         ty: &Type,
417 |     ) -> Result<Self, LoadError<M::Error>> {
418 |         let Type::Struct(s) = ty else { return Err(LoadError::NotAStruct); };
419 |         let mut members = vec![];
420 | 
421 |         for m in &s.members {
422 |             let t = world.type_by_id(m.type_id).unwrap();
423 |             let ma = addr + m.location;
424 |             let v = Value::from_state(machine, ma, world, t)?;
425 |             members.push((m.name.clone(), v));
426 |         }
427 | 
428 |         Ok(Self {
429 |             name: s.name.clone(),
430 |             members,
431 |         })
432 |     }
433 | }
434 | 
435 | #[derive(Clone, Debug)]
436 | pub struct Enum {
437 |     pub name: String,
438 |     pub disc: String,
439 |     pub value: Struct,
440 | }
441 | 
442 | impl Load for Enum {
443 |     fn from_state<M: Machine>(
444 |         machine: &M,
445 |         addr: u64,
446 |         world: &DebugDb,
447 |         ty: &Type,
448 |     ) -> Result<Self, LoadError<M::Error>> {
449 |         let Type::Enum(s) = ty else { return Err(LoadError::NotAnEnum); };
450 |         let v = choose_variant(machine, addr, world, s)?;
451 | 
452 |         let vtype_id = v.member.type_id;
453 |         let vty = world.type_by_id(vtype_id).unwrap();
454 |         let va = addr + v.member.location;
455 |         let value = Struct::from_state(machine, va, world, vty)?;
456 | 
457 |         Ok(Self {
458 |             name: s.name.clone(),
459 |             disc: v.member.name.as_ref().unwrap().clone(),
460 |             value,
461 |         })
462 |     }
463 | }
464 | 
465 | #[derive(Clone, Debug)]
466 | pub struct CEnum {
467 |     name: String,
468 |     disc: String,
469 | }
470 | 
471 | impl Load for CEnum {
472 |     fn from_state<M: Machine>(
473 |         machine: &M,
474 |         addr: u64,
475 |         world: &DebugDb,
476 |         ty: &Type,
477 |     ) -> Result<Self, LoadError<M::Error>> {
478 |         let Type::CEnum(s) = ty else { return Err(LoadError::NotACEnum) };
479 | 
480 |         let disc_value = load_unsigned(
481 |             world.endian(),
482 |             machine,
483 |             addr,
484 |             usize::try_from(s.byte_size).unwrap(),
485 |         )?.ok_or(LoadError::DataUnavailable)?;
486 | 
487 |         let e = s
488 |             .enumerators
489 |             .get(&disc_value)
490 |             .ok_or(LoadError::BadDiscriminator(disc_value))?;
491 | 
492 |         Ok(Self {
493 |             name: s.name.clone(),
494 |             disc: e.name.clone(),
495 |         })
496 |     }
497 | }
498 | 
499 | #[derive(Clone, Debug)]
500 | pub struct Pointer {
501 |     pub name: String,
502 |     pub dest_type_id: TypeId,
503 |     pub value: u64,
504 | }
505 | 
506 | impl Pointer {
507 |     fn is_probably_mut(&self) -> bool {
508 |         self.name.starts_with("&mut") || self.name.starts_with("*mut") || self.name.starts_with("*_")
509 |     }
510 | }
511 | 
512 | impl Load for Pointer {
513 |     fn from_state<M: Machine>(
514 |         machine: &M,
515 |         addr: u64,
516 |         world: &DebugDb,
517 |         ty: &Type,
518 |     ) -> Result<Self, LoadError<M::Error>> {
519 |         // TODO support pointer sizes
520 | 
521 |         let Type::Pointer(s) = ty else { return Err(LoadError::NotAPointer); };
522 | 
523 |         let value = load_unsigned(world.endian(),  machine, addr, world.pointer_size())?
524 |             .ok_or(LoadError::DataUnavailable)?;
525 | 
526 |         Ok(Self {
527 |             name: Cow::into_owned(ty.name(world)),
528 |             dest_type_id: s.type_id,
529 |             value,
530 |         })
531 |     }
532 | }
533 | 


--------------------------------------------------------------------------------