├── .gitignore ├── rustfmt.toml ├── Cargo.toml ├── .travis.yml ├── parser ├── Cargo.toml └── src │ ├── cfi.rs │ ├── source.rs │ ├── location.rs │ ├── unit.rs │ ├── range.rs │ ├── namespace.rs │ ├── lib.rs │ ├── variable.rs │ ├── function.rs │ └── file │ ├── pdb.rs │ └── mod.rs ├── main ├── test │ ├── 2.cc │ ├── 4.cc │ ├── 1.cc │ ├── 6.cc │ ├── 3.cc │ └── 5.cc ├── Cargo.toml └── src │ └── main.rs ├── LICENSE-MIT ├── README.md ├── LICENSE-APACHE └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | #use_small_heuristics = false 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "main", 4 | ] 5 | 6 | [profile.release] 7 | debug = true 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - stable 4 | - beta 5 | - nightly 6 | os: 7 | - linux 8 | matrix: 9 | include: 10 | - os: osx 11 | rust: stable 12 | -------------------------------------------------------------------------------- /parser/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ddbug_parser" 3 | version = "0.3.0" 4 | description = "Unified debug information parser" 5 | repository = "https://github.com/gimli-rs/ddbug" 6 | keywords = ["debug", "DWARF"] 7 | categories = ["development-tools::debugging"] 8 | license = "Apache-2.0 OR MIT" 9 | edition = "2018" 10 | 11 | [dependencies] 12 | fnv = "1.0" 13 | gimli = "0.26" 14 | log = "0.4" 15 | memmap = "0.7" 16 | object = "0.28" 17 | 18 | [features] 19 | default = [] 20 | -------------------------------------------------------------------------------- /main/test/2.cc: -------------------------------------------------------------------------------- 1 | struct S { 2 | long l; 3 | char ch[0]; 4 | }; 5 | 6 | int main() { 7 | S s; 8 | return 0; 9 | } 10 | 11 | /* 12 | struct S 13 | size: 8 14 | members: 15 | 0[8] l: long int 16 | 8[??] ch: [char] # 此时 ArrayType count/size 均为 None. 17 | 18 | base long int 19 | size: 8 20 | encoding: signed 21 | 22 | # sizetype 并不是一个合法的类型名.. 23 | base sizetype 24 | size: 8 25 | encoding: unsigned 26 | 27 | base char 28 | size: 1 29 | encoding: signed char 30 | 31 | base int 32 | size: 4 33 | encoding: signed 34 | */ -------------------------------------------------------------------------------- /main/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "clayout" 3 | version = "0.3.3" 4 | description = "Translate C++/Rust type into C type with the same memory layout." 5 | repository = "https://github.com/hidva/clayout" 6 | readme = "../README.md" 7 | keywords = ["debug", "DWARF"] 8 | categories = ["development-tools::debugging"] 9 | license = "Apache-2.0 OR MIT" 10 | edition = "2021" 11 | 12 | [dependencies] 13 | anyhow = "1" 14 | clap = { version = "4", features = ["derive"] } 15 | env_logger = "0.9" 16 | log = "0.4" 17 | parser = { package = "ddbug_parser", version = "0.3.0", path = "../parser" } 18 | -------------------------------------------------------------------------------- /main/test/4.cc: -------------------------------------------------------------------------------- 1 | struct S 2 | { 3 | // will usually occupy 2 bytes: 4 | // 3 bits: value of b1 5 | // 5 bits: unused 6 | // 2 bits: value of b2 7 | // 6 bits: unused 8 | unsigned long b1 : 3; 9 | unsigned char :0; // start a new byte 10 | unsigned short b2 : 2; 11 | }; 12 | 13 | int main() 14 | { 15 | S s; 16 | return sizeof(S); 17 | } 18 | /* 19 | struct S 20 | size: 8 21 | members: 22 | 0[0.3] b1: long unsigned int 23 | 0.3[0.5] 24 | 1[0.2] b2: short unsigned int 25 | 1.2[6.6] 26 | 27 | base long unsigned int 28 | size: 8 29 | encoding: unsigned 30 | 31 | base short unsigned int 32 | size: 2 33 | encoding: unsigned 34 | 35 | base int 36 | size: 4 37 | encoding: signed 38 | */ -------------------------------------------------------------------------------- /main/test/1.cc: -------------------------------------------------------------------------------- 1 | struct zhanyi_struct { 2 | union { 3 | long zhanyi_union_field_long; 4 | char zhanyi_union_field_char; 5 | }; 6 | long zhanyi_s_field_long; 7 | char zhanyi_s_field_ch; 8 | }; 9 | 10 | int main () { 11 | zhanyi_struct s; 12 | return 0; 13 | } 14 | 15 | /* 16 | struct zhanyi_struct 17 | size: 24 18 | members: 19 | 0[8] : union zhanyi_struct:: 20 | 0[8] zhanyi_union_field_long: long int 21 | 0[1] zhanyi_union_field_char: char 22 | 8[8] zhanyi_s_field_long: long int 23 | 16[1] zhanyi_s_field_ch: char 24 | 17[7] 25 | 26 | base long int 27 | size: 8 28 | encoding: signed 29 | 30 | base char 31 | size: 1 32 | encoding: signed char 33 | 34 | base int 35 | size: 4 36 | encoding: signed 37 | */ -------------------------------------------------------------------------------- /main/test/6.cc: -------------------------------------------------------------------------------- 1 | struct S1 { 2 | long l; 3 | char ch; 4 | }; 5 | union u { 6 | long u_l; 7 | char u_c; 8 | long u_b: 2; 9 | S1 s1; 10 | }; 11 | 12 | struct S { 13 | union u s_u; 14 | char s_c; 15 | }; 16 | 17 | 18 | int main() { 19 | S s; 20 | return 0; 21 | } 22 | 23 | /* 24 | 25 | struct S1 26 | size: 16 27 | members: 28 | 0[8] l: long int 29 | 8[1] ch: char 30 | 9[7] 31 | 32 | base long int 33 | size: 8 34 | encoding: signed 35 | 36 | base char 37 | size: 1 38 | encoding: signed char 39 | 40 | union u 41 | size: 16 42 | members: 43 | 0[8] u_l: long int 44 | 0[1] u_c: char 45 | 0[0.2] u_b: long int 46 | 0[16] s1: struct S1 47 | 48 | struct S 49 | size: 24 50 | members: 51 | 0[16] s_u: union u 52 | 16[1] s_c: char 53 | 17[7] 54 | 55 | base int 56 | size: 4 57 | encoding: signed 58 | */ -------------------------------------------------------------------------------- /main/test/3.cc: -------------------------------------------------------------------------------- 1 | struct ZhanyiStruct { 2 | long zy_bits_2bit: 2; 3 | }; 4 | 5 | struct ZhanyiStruct2: public ZhanyiStruct { 6 | char ch; 7 | }; 8 | 9 | /* 10 | union ZhanyiUnion1 { 11 | char zy_union_ch; 12 | ZhanyiStruct2 zy_union_zs2; 13 | }; 14 | 15 | // 还好还好, union 不能是父类. 16 | class ZhanyiClass1: public ZhanyiUnion1 { 17 | char zy_class_ch; 18 | long zy_class_l; 19 | }; 20 | */ 21 | 22 | int main() { 23 | ZhanyiStruct2 obj; 24 | return 0; 25 | } 26 | 27 | /* 28 | struct ZhanyiStruct 29 | size: 8 30 | members: 31 | 0[0.2] zy_bits_2bit: long int 32 | 0.2[7.6] 33 | 34 | base long int 35 | size: 8 36 | encoding: signed 37 | 38 | struct ZhanyiStruct2 39 | size: 16 40 | inherits: struct ZhanyiStruct 41 | members: 42 | 0[8] : struct ZhanyiStruct 43 | 8[1] ch: char 44 | 9[7] 45 | 46 | base char 47 | size: 1 48 | encoding: signed char 49 | 50 | base int 51 | size: 4 52 | encoding: signed 53 | */ -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-2021 The ddbug Developers 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /main/test/5.cc: -------------------------------------------------------------------------------- 1 | struct S {}; 2 | struct S1 { 3 | long l; 4 | char ch; 5 | S s; 6 | }; 7 | struct S2: public S1 { 8 | char ch; 9 | }; 10 | int main() { 11 | S2 s; 12 | return 0; 13 | } 14 | 15 | /* 16 | struct S 17 | size: 1 18 | 19 | struct S1 20 | size: 16 21 | members: 22 | 0[8] l: long int 23 | 8[1] ch: char 24 | 9[1] s: struct S 25 | 10[6] 26 | 27 | base long int 28 | size: 8 29 | encoding: signed 30 | 31 | base char 32 | size: 1 33 | encoding: signed char 34 | 35 | struct S2 36 | size: 24 37 | inherits: struct S1 38 | members: 39 | 0[16] : struct S1 40 | 16[1] ch: char # 这里居然没有重用 S1 空间. 41 | 17[7] 42 | 43 | base int 44 | size: 4 45 | encoding: signed 46 | */ 47 | // another case 48 | namespace XXX { 49 | struct S {}; 50 | struct S1 { 51 | long l:32; 52 | char ch; 53 | S s; 54 | }; 55 | struct S2: public S1 { 56 | char ch; 57 | }; 58 | int main() { 59 | S2 s; 60 | return 0; 61 | } 62 | } 63 | 64 | /* 65 | struct S 66 | size: 1 67 | 68 | struct S1 69 | size: 8 70 | members: 71 | 0[4] l: long int # 这里显示不了是 bitfield 72 | 4[1] ch: char 73 | 5[1] s: struct S 74 | 6[2] 75 | 76 | base long int 77 | size: 8 78 | encoding: signed 79 | 80 | base char 81 | size: 1 82 | encoding: signed char 83 | 84 | struct S2 85 | size: 16 86 | inherits: struct S1 87 | members: 88 | 0[8] : struct S1 89 | 8[1] ch: char 90 | 9[7] 91 | 92 | base int 93 | size: 4 94 | encoding: signed 95 | */ -------------------------------------------------------------------------------- /parser/src/cfi.rs: -------------------------------------------------------------------------------- 1 | use crate::location::Register; 2 | use crate::Address; 3 | 4 | /// A CFI directive and the function offset it applies to. 5 | /// 6 | /// Address::none() is used for directives that apply to the whole function. 7 | pub type Cfi = (Address, CfiDirective); 8 | 9 | /// A CFI directive. 10 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 11 | pub enum CfiDirective { 12 | /// .cfi_startproc 13 | StartProc, 14 | 15 | /// .cfi_endproc 16 | EndProc, 17 | 18 | /// .cfi_personality
19 | Personality(Address), 20 | 21 | /// .cfi_lsda
22 | // TODO: encoding? 23 | Lsda(Address), 24 | 25 | /// .cfi_signal_frame 26 | SignalFrame, 27 | 28 | /// .cfi_return_column 29 | ReturnColumn(Register), 30 | 31 | /// .cfi_def_cfa , 32 | DefCfa(Register, i64), 33 | 34 | /// .cfi_def_cfa_register 35 | DefCfaRegister(Register), 36 | 37 | /// .cfi_def_cfa_offset 38 | DefCfaOffset(i64), 39 | 40 | /// .cfi_offset , 41 | Offset(Register, i64), 42 | 43 | /// .cfi_val_offset , 44 | ValOffset(Register, i64), 45 | 46 | /// .cfi_register , 47 | Register(Register, Register), 48 | 49 | /// .cfi_restore 50 | Restore(Register), 51 | 52 | /// .cfi_undefined 53 | Undefined(Register), 54 | 55 | /// .cfi_same_value 56 | SameValue(Register), 57 | 58 | /// .cfi_remember_state 59 | RememberState, 60 | 61 | /// .cfi_restore_state 62 | RestoreState, 63 | 64 | /// An unsupported instruction. 65 | Other, 66 | } 67 | -------------------------------------------------------------------------------- /parser/src/source.rs: -------------------------------------------------------------------------------- 1 | use crate::unit::Unit; 2 | 3 | /// A source location. 4 | #[derive(Debug, Default, Clone)] 5 | pub struct Source<'input> { 6 | pub(crate) directory: Option<&'input str>, 7 | pub(crate) file: Option<&'input str>, 8 | pub(crate) line: u32, 9 | pub(crate) column: u32, 10 | } 11 | 12 | impl<'input> Source<'input> { 13 | /// The directory. 14 | /// 15 | /// This may be absolute, or relative to the working directory of the unit. 16 | #[inline] 17 | pub fn directory(&self) -> Option<&str> { 18 | self.directory 19 | } 20 | 21 | /// The file name. 22 | #[inline] 23 | pub fn file(&self) -> Option<&str> { 24 | self.file 25 | } 26 | 27 | /// Return true if there is no file name. 28 | #[inline] 29 | pub fn is_none(&self) -> bool { 30 | self.file.is_none() 31 | } 32 | 33 | /// Return true if there is a file name. 34 | #[inline] 35 | pub fn is_some(&self) -> bool { 36 | self.file.is_some() 37 | } 38 | 39 | /// The complete file path. 40 | pub fn path(&self, unit: &Unit) -> Option { 41 | fn is_absolute(directory: &str) -> bool { 42 | directory.get(0..1) == Some("/") || directory.get(1..2) == Some(":") 43 | } 44 | 45 | self.file().map(|file| { 46 | let mut path = String::new(); 47 | if let Some(directory) = self.directory() { 48 | if let (false, Some(unit_dir)) = (is_absolute(directory), unit.dir()) { 49 | path.push_str(unit_dir); 50 | if !unit_dir.ends_with('/') { 51 | path.push('/'); 52 | } 53 | } 54 | path.push_str(directory); 55 | if !directory.ends_with('/') { 56 | path.push('/'); 57 | } 58 | } 59 | path.push_str(file); 60 | path 61 | }) 62 | } 63 | 64 | /// The source line number. 65 | /// 66 | /// 0 means unknown line number. 67 | #[inline] 68 | pub fn line(&self) -> u32 { 69 | self.line 70 | } 71 | 72 | /// The source column number. 73 | /// 74 | /// 0 means unknown column number. 75 | #[inline] 76 | pub fn column(&self) -> u32 { 77 | self.column 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /parser/src/location.rs: -------------------------------------------------------------------------------- 1 | use crate::file::FileHash; 2 | use crate::{Address, Range, Size}; 3 | 4 | /// A register number. 5 | #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 6 | pub struct Register(pub u16); 7 | 8 | impl Register { 9 | /// The name of the register, if known. 10 | pub fn name(self, hash: &FileHash) -> Option<&'static str> { 11 | hash.file.get_register_name(self) 12 | } 13 | } 14 | 15 | /// A location within the stack frame. 16 | #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 17 | pub struct FrameLocation { 18 | /// The offset from the frame pointer. 19 | pub offset: i64, 20 | /// The size of the value in bits. 21 | pub bit_size: Size, 22 | } 23 | 24 | /// A piece of a value. 25 | // TODO: include the address ranges for which this piece is valid 26 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 27 | pub(crate) struct Piece { 28 | /// The offset of the piece within the containing object. 29 | pub bit_offset: u64, 30 | /// The size of the piece. If none, then the piece is the complete value. 31 | pub bit_size: Size, 32 | /// The location of the piece. 33 | pub location: Location, 34 | /// The offset of the piece within the location. 35 | pub location_offset: u64, 36 | /// If `true`, then the piece does not have a location. 37 | /// Instead, `location` is the value of the piece. 38 | pub is_value: bool, 39 | } 40 | 41 | /// A value location. 42 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 43 | pub(crate) enum Location { 44 | /// The value has been optimized away. 45 | Empty, 46 | /// A literal address or value. 47 | Literal { 48 | /// The literal address or value. 49 | value: u64, 50 | }, 51 | /// The value is stored in a register. 52 | Register { 53 | /// The register number. 54 | register: Register, 55 | }, 56 | /// The value is stored in memory at an offset from an address stored in a register. 57 | RegisterOffset { 58 | /// The register number. 59 | register: Register, 60 | /// The offset. 61 | offset: i64, 62 | }, 63 | /// The value is stored in memory at an offset from the frame base. 64 | FrameOffset { 65 | /// The offset. 66 | offset: i64, 67 | }, 68 | /// The value is stored in memory at an offset from the CFA. 69 | CfaOffset { 70 | /// The offset. 71 | offset: i64, 72 | }, 73 | /// The value is stored in memory at an address. This address may need relocation. 74 | Address { 75 | /// The offset. 76 | address: Address, 77 | }, 78 | /// The value is stored in memory at an offset within TLS. 79 | TlsOffset { 80 | /// The offset. 81 | offset: u64, 82 | }, 83 | /// The value is more complex than any of the above variants. 84 | Other, 85 | } 86 | 87 | pub(crate) fn registers<'a>( 88 | locations: &'a [(Range, Piece)], 89 | ) -> impl Iterator + 'a { 90 | locations.iter().filter_map(|(range, piece)| { 91 | if piece.is_value { 92 | return None; 93 | } 94 | match piece.location { 95 | Location::Register { register } => Some((*range, register)), 96 | _ => None, 97 | } 98 | }) 99 | } 100 | 101 | pub(crate) fn frame_locations<'a>( 102 | locations: &'a [(Range, Piece)], 103 | ) -> impl Iterator + 'a { 104 | locations.iter().filter_map(|(_, piece)| { 105 | if piece.is_value { 106 | return None; 107 | } 108 | match piece.location { 109 | // TODO: do we need to distinguish between these? 110 | Location::FrameOffset { offset } | Location::CfaOffset { offset } => { 111 | Some(FrameLocation { 112 | offset, 113 | bit_size: piece.bit_size, 114 | }) 115 | } 116 | _ => None, 117 | } 118 | }) 119 | } 120 | 121 | pub(crate) fn register_offsets<'a>( 122 | locations: &'a [(Range, Piece)], 123 | ) -> impl Iterator + 'a { 124 | locations.iter().filter_map(|(range, piece)| { 125 | if piece.is_value { 126 | return None; 127 | } 128 | match piece.location { 129 | Location::RegisterOffset { register, offset } => Some((*range, register, offset)), 130 | _ => None, 131 | } 132 | }) 133 | } 134 | -------------------------------------------------------------------------------- /parser/src/unit.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::ops::Deref; 3 | 4 | use gimli; 5 | 6 | use crate::file::FileHash; 7 | use crate::function::Function; 8 | use crate::range::RangeList; 9 | use crate::types::Type; 10 | use crate::variable::Variable; 11 | use crate::Id; 12 | 13 | /// A compilation unit. 14 | #[derive(Debug, Default)] 15 | pub struct Unit<'input> { 16 | pub(crate) id: Id, 17 | pub(crate) dir: Option>, 18 | pub(crate) name: Option>, 19 | pub(crate) language: Option, 20 | pub(crate) address_size: Option, 21 | pub(crate) low_pc: Option, 22 | pub(crate) ranges: RangeList, 23 | pub(crate) types: Vec>, 24 | pub(crate) functions: Vec>, 25 | pub(crate) variables: Vec>, 26 | } 27 | 28 | impl<'input> Unit<'input> { 29 | /// The user defined id for this type. 30 | #[inline] 31 | pub fn id(&self) -> usize { 32 | self.id.get() 33 | } 34 | 35 | /// Set a user defined id for this type. 36 | #[inline] 37 | pub fn set_id(&self, id: usize) { 38 | self.id.set(id) 39 | } 40 | 41 | /// The working directory when the unit was compiled. 42 | pub fn dir(&self) -> Option<&str> { 43 | self.dir.as_ref().map(Cow::deref) 44 | } 45 | 46 | /// The path of the primary source file. 47 | pub fn name(&self) -> Option<&str> { 48 | self.name.as_ref().map(Cow::deref) 49 | } 50 | 51 | /// The source language. 52 | // TODO: avoid gimli dependency. 53 | #[inline] 54 | pub fn language(&self) -> Option { 55 | self.language 56 | } 57 | 58 | /// The base address. 59 | #[inline] 60 | pub fn address(&self) -> Option { 61 | self.low_pc 62 | } 63 | 64 | /// The address ranges covered by functions and variables in the unit. 65 | /// 66 | /// Does not include unknown ranges. 67 | pub fn ranges(&self, hash: &FileHash) -> RangeList { 68 | let mut ranges = RangeList::default(); 69 | for function in &self.functions { 70 | for range in function.ranges() { 71 | ranges.push(*range); 72 | } 73 | } 74 | for variable in &self.variables { 75 | if let Some(range) = variable.range(hash) { 76 | ranges.push(range); 77 | } 78 | } 79 | ranges.sort(); 80 | ranges 81 | } 82 | 83 | /// The address ranges covered that are covered by the unit, but which 84 | /// are not known to be associated with any functions or variables. 85 | pub fn unknown_ranges(&self, hash: &FileHash) -> RangeList { 86 | let mut ranges = RangeList::default(); 87 | for range in self.ranges.list() { 88 | ranges.push(*range); 89 | } 90 | ranges.sort(); 91 | ranges.subtract(&self.ranges(hash)) 92 | } 93 | 94 | /// The total size of all functions and variables. 95 | pub fn size(&self, hash: &FileHash) -> u64 { 96 | // TODO: account for padding and overlap between functions and variables? 97 | self.function_size() + self.variable_size(hash) 98 | } 99 | 100 | /// The total size of all functions. 101 | pub fn function_size(&self) -> u64 { 102 | let mut ranges = RangeList::default(); 103 | for function in &self.functions { 104 | for range in function.ranges() { 105 | ranges.push(*range); 106 | } 107 | } 108 | ranges.sort(); 109 | ranges.size() 110 | } 111 | 112 | /// The total size of all variables. 113 | pub fn variable_size(&self, hash: &FileHash) -> u64 { 114 | let mut ranges = RangeList::default(); 115 | for variable in &self.variables { 116 | if let Some(range) = variable.range(hash) { 117 | ranges.push(range); 118 | } 119 | } 120 | ranges.sort(); 121 | ranges.size() 122 | } 123 | 124 | /// The types declared or defined by this unit. 125 | #[inline] 126 | pub fn types(&self) -> &[Type<'input>] { 127 | &self.types 128 | } 129 | 130 | /// The functions declared or defined by this unit. 131 | #[inline] 132 | pub fn functions(&self) -> &[Function<'input>] { 133 | &self.functions 134 | } 135 | 136 | /// The variables declared or defined by this unit. 137 | #[inline] 138 | pub fn variables(&self) -> &[Variable<'input>] { 139 | &self.variables 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | clayout, translate C++/Rust type into C type with the same memory layout. Generally, clayout is used together with bpftrace. 2 | 3 | clayout is developed on [ddbug](https://github.com/gimli-rs/ddbug). THANKS FOR ddbug! 4 | 5 | # Usage 6 | 7 | Imagine a scenario where you want to use bpftrace to track the value of `S::x` during the running of the following program. 8 | 9 | ```c++ 10 | #include 11 | #include 12 | 13 | struct X { 14 | virtual ~X() {} 15 | 16 | int x1; 17 | }; 18 | 19 | struct S : public X { 20 | S() : x(0) {} 21 | 22 | S(const S &other) : x(other.x) {} 23 | 24 | S f(int y, int z) { 25 | printf("output from a.out: this.x=%d y=%d z=%d\n", x, y, z); 26 | x += (y + z); 27 | return *this; 28 | } 29 | 30 | int x; 31 | }; 32 | 33 | int main(int argc, char **argv) { 34 | S s; 35 | int i = 0; 36 | while (1) { 37 | s.f(i, i); 38 | ++i; 39 | sleep(1); 40 | // break; 41 | } 42 | return 0; 43 | } 44 | ``` 45 | 46 | clayout can translate `S` into a C structure with the same memory layout: 47 | 48 | ``` 49 | # clayout will generate struct.h, struct.c 50 | $ clayout -i ${binary path} -o struct S 51 | ``` 52 | 53 | ```C 54 | // struct.h 55 | // Generated by hidva/clayout! 大吉大利! 56 | #pragma once 57 | #include 58 | struct HidvaStruct2 { 59 | void** __mem1; 60 | int x1; 61 | } __attribute__((__packed__)); 62 | 63 | 64 | struct S { 65 | struct HidvaStruct2 __parent0; 66 | int x; 67 | } __attribute__((__packed__)); 68 | ``` 69 | 70 | So you can easily write the following bpftrace script: 71 | 72 | ```bpftrace 73 | #include "struct.h" 74 | 75 | u:/apsara/zhanyi.ww/tmp/bphtrace/x/trace:_ZN1S1fEii { 76 | printf("output from bpftrace: ret=%p this.x=%d y=%d z=%d\n", (int32*)arg0, ((struct S*)arg1)->x, arg2, arg3) 77 | } 78 | ``` 79 | 80 | ``` 81 | $ bpftrace -c ./trace t.bt 82 | Attaching 1 probe... 83 | output from a.out: this.x=0 y=0 z=0 84 | output from bpftrace: ret=0x7ffff3044610 this.x=0 y=0 z=0 85 | output from a.out: this.x=0 y=1 z=1 86 | output from bpftrace: ret=0x7ffff3044610 this.x=0 y=1 z=1 87 | ``` 88 | 89 | Please note that you may intuitively think that the layout of S is as follows: 90 | 91 | ```C 92 | struct X { 93 | void** __mem1; 94 | int x1; 95 | } 96 | 97 | struct S { 98 | struct X __parent0; 99 | int x; 100 | } 101 | ``` 102 | 103 | But actually it is wrong! `S::x` will reuse the padding part of `X` in C++! 104 | 105 | ## multi input 106 | 107 | clayout supports multiple input files, and type references across files. 108 | 109 | ```C++ 110 | // x.h 111 | struct X { 112 | virtual ~X(); 113 | 114 | int x1; 115 | }; 116 | 117 | struct S : public X { 118 | S(); 119 | 120 | S(const S &other); 121 | 122 | S f(int y, int z); 123 | 124 | int x; 125 | }; 126 | 127 | // X.cc 128 | #include 129 | #include "x.h" 130 | 131 | X::~X() {} 132 | 133 | S::S(): x(0) {} 134 | 135 | S::S(const S &other) : x(other.x) {} 136 | 137 | S S::f(int y, int z) { 138 | printf("output from a.out: this.x=%d y=%d z=%d\n", x, y, z); 139 | x += (y + z); 140 | return *this; 141 | } 142 | 143 | // trace.cc 144 | #include 145 | #include "x.h" 146 | 147 | int main(int argc, char **argv) { 148 | S s; 149 | int i = 0; 150 | while (1) { 151 | s.f(i, i); 152 | ++i; 153 | sleep(1); 154 | } 155 | return 0; 156 | } 157 | ``` 158 | 159 | ``` 160 | $ clang++ -fPIC -shared -g -O0 X.cc -o libzh_x.so 161 | $ clang++ -g -O0 trace.cc -o trace -L. -lzh_x 162 | ``` 163 | 164 | Because of [-fstandalone-debug](https://clang.llvm.org/docs/UsersManual.html#controlling-size-of-debug-information), the trace binary file does not contain any debugging information of `X`: 165 | 166 | ``` 167 | $ readelf --debug-dump=info trace 168 | <1>: Abbrev Number: 13 (DW_TAG_structure_type) 169 | DW_AT_name : X 170 | DW_AT_declaration : 1 171 | ``` 172 | 173 | Because there is no debugging information of `X` in the trace binary file, a placeholder `__u8 __unknown_type1[12]` is used. 174 | 175 | ``` 176 | $ clayout -i trace -o output S 177 | ``` 178 | 179 | ```C++ 180 | // output.h 181 | // Generated by hidva/clayout! 大吉大利! 182 | #pragma once 183 | #include 184 | 185 | struct S { 186 | __u8 __unknown_type1[12]; 187 | int x; 188 | } __attribute__((__packed__)); 189 | ``` 190 | 191 | We can use multi input file to get the detail of `X`: 192 | 193 | ```bash 194 | $ clayout -i trace -i libzh_x.so -o output S 195 | ``` 196 | 197 | ```C++ 198 | // output.h 199 | // Generated by hidva/clayout! 大吉大利! 200 | #pragma once 201 | #include 202 | struct HidvaStruct2 { 203 | void** __mem1; 204 | int x1; 205 | } __attribute__((__packed__)); 206 | 207 | 208 | struct S { 209 | struct HidvaStruct2 __parent0; 210 | int x; 211 | } __attribute__((__packed__)); 212 | ``` 213 | 214 | -------------------------------------------------------------------------------- /parser/src/range.rs: -------------------------------------------------------------------------------- 1 | use std::mem; 2 | 3 | /// An address range. 4 | #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 5 | pub struct Range { 6 | /// The beginning of the address range (inclusive). 7 | pub begin: u64, 8 | 9 | /// The end of the address range (exclusive). 10 | pub end: u64, 11 | } 12 | 13 | impl Range { 14 | /// A range that covers everything. 15 | pub fn all() -> Self { 16 | Range { begin: 0, end: !0 } 17 | } 18 | 19 | /// The size of the address range. 20 | #[inline] 21 | pub fn size(&self) -> u64 { 22 | self.end - self.begin 23 | } 24 | 25 | /// Return true if the range contains the value. 26 | #[inline] 27 | pub fn contains(&self, addr: u64) -> bool { 28 | self.begin <= addr && addr < self.end 29 | } 30 | } 31 | 32 | /// A list of address ranges. 33 | #[derive(Debug, Default, Clone)] 34 | pub struct RangeList { 35 | ranges: Vec, 36 | } 37 | 38 | impl RangeList { 39 | /// The ranges in the list. 40 | #[inline] 41 | pub fn list(&self) -> &[Range] { 42 | &self.ranges 43 | } 44 | 45 | /// The total size of the ranges in the list. 46 | pub fn size(&self) -> u64 { 47 | let mut size = 0; 48 | for range in &self.ranges { 49 | size += range.size(); 50 | } 51 | size 52 | } 53 | 54 | /// Append a range, combining with previous range if possible. 55 | pub fn push(&mut self, range: Range) { 56 | if range.end <= range.begin { 57 | debug!("invalid range: {:?}", range); 58 | return; 59 | } 60 | if let Some(prev) = self.ranges.last_mut() { 61 | // Assume up to 15 bytes of padding if range.begin is aligned. 62 | // (This may be a wrong assumption, but does it matter and 63 | // how do we do better?) 64 | // TODO: make alignment configurable 65 | let padding = if range.begin == range.begin & !15 { 66 | 15 67 | } else { 68 | 0 69 | }; 70 | // Merge ranges if new range begins in or after previous range. 71 | // We don't care about merging in opposite order (that'll happen 72 | // when sorting). 73 | if range.begin >= prev.begin && range.begin <= prev.end + padding { 74 | if prev.end < range.end { 75 | prev.end = range.end; 76 | } 77 | return; 78 | } 79 | } 80 | self.ranges.push(range); 81 | } 82 | 83 | /// Sort the ranges by beginning address, and combine ranges where possible. 84 | pub fn sort(&mut self) { 85 | self.ranges.sort_by(|a, b| a.begin.cmp(&b.begin)); 86 | // Combine ranges by adding to a new list. 87 | let mut ranges = Vec::new(); 88 | mem::swap(&mut ranges, &mut self.ranges); 89 | for range in ranges { 90 | self.push(range); 91 | } 92 | } 93 | 94 | /// Remove a list of ranges from the list. 95 | /// 96 | /// This handles ranges that only partially overlap with existing ranges. 97 | pub fn subtract(&self, other: &Self) -> Self { 98 | let mut ranges = RangeList::default(); 99 | let mut other_ranges = other.ranges.iter(); 100 | let mut other_range = other_ranges.next(); 101 | for range in &*self.ranges { 102 | let mut range = *range; 103 | loop { 104 | match other_range { 105 | Some(r) => { 106 | // Is r completely before range? 107 | if r.end <= range.begin { 108 | other_range = other_ranges.next(); 109 | continue; 110 | } 111 | // Is r completely after range? 112 | if r.begin >= range.end { 113 | ranges.push(range); 114 | break; 115 | } 116 | // Do we need to keep the head of the range? 117 | if r.begin > range.begin { 118 | ranges.push(Range { 119 | begin: range.begin, 120 | end: r.begin, 121 | }); 122 | } 123 | // Do we need to keep the tail of the range? 124 | if r.end < range.end { 125 | range.begin = r.end; 126 | other_range = other_ranges.next(); 127 | continue; 128 | } 129 | break; 130 | } 131 | None => { 132 | ranges.push(range); 133 | break; 134 | } 135 | } 136 | } 137 | } 138 | ranges.sort(); 139 | ranges 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /parser/src/namespace.rs: -------------------------------------------------------------------------------- 1 | use std::cmp; 2 | use std::sync::Arc; 3 | 4 | /// A namespace kind. 5 | #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] 6 | pub enum NamespaceKind { 7 | /// An explicit namespace. 8 | Namespace, 9 | /// A namespace for items defined within a function. 10 | Function, 11 | /// A namespace for items defined within a type. 12 | Type, 13 | } 14 | 15 | /// A nestable namspace. 16 | #[derive(Debug)] 17 | pub struct Namespace<'input> { 18 | pub(crate) parent: Option>>, 19 | pub(crate) name: Option<&'input str>, 20 | pub(crate) kind: NamespaceKind, 21 | } 22 | 23 | impl<'input> Namespace<'input> { 24 | pub(crate) fn new( 25 | parent: &Option>>, 26 | name: Option<&'input str>, 27 | kind: NamespaceKind, 28 | ) -> Arc> { 29 | Arc::new(Namespace { 30 | parent: parent.clone(), 31 | name, 32 | kind, 33 | }) 34 | } 35 | 36 | /// The parent namespace. 37 | pub fn parent(&self) -> Option<&Namespace<'input>> { 38 | self.parent.as_ref().map(|x| &**x) 39 | } 40 | 41 | /// The namespace name. 42 | #[inline] 43 | pub fn name(&self) -> Option<&str> { 44 | self.name 45 | } 46 | 47 | /// The namespace kind. 48 | #[inline] 49 | pub fn kind(&self) -> NamespaceKind { 50 | self.kind 51 | } 52 | 53 | fn len(&self) -> usize { 54 | match self.parent { 55 | Some(ref parent) => parent.len() + 1, 56 | None => 1, 57 | } 58 | } 59 | 60 | fn up(&self, len: usize) -> &Namespace { 61 | if len == 0 { 62 | self 63 | } else { 64 | match self.parent { 65 | Some(ref parent) => parent.up(len - 1), 66 | None => self, 67 | } 68 | } 69 | } 70 | 71 | pub(crate) fn is_anon_type(namespace: &Option>) -> bool { 72 | match *namespace { 73 | Some(ref namespace) => { 74 | namespace.kind == NamespaceKind::Type 75 | && (namespace.name.is_none() || Namespace::is_anon_type(&namespace.parent)) 76 | } 77 | None => false, 78 | } 79 | } 80 | 81 | fn _is_within>(&self, namespace: &[T]) -> (bool, usize) { 82 | let (ret, offset) = match self.parent { 83 | Some(ref parent) => parent._is_within(namespace), 84 | None => (true, 0), 85 | }; 86 | 87 | if ret { 88 | if offset < namespace.len() { 89 | match self.name() { 90 | Some(name) => (name == namespace[offset].as_ref(), offset + 1), 91 | None => (false, offset + 1), 92 | } 93 | } else { 94 | (true, offset) 95 | } 96 | } else { 97 | (false, 0) 98 | } 99 | } 100 | 101 | /// Return true if this namespace is within the given namespace. 102 | /// 103 | /// `namespace` is a slice of names, starting with the root namespace name. 104 | pub fn is_within>(&self, namespace: &[T]) -> bool { 105 | self._is_within(namespace) == (true, namespace.len()) 106 | } 107 | 108 | fn _cmp(a: &Namespace, b: &Namespace) -> cmp::Ordering { 109 | debug_assert_eq!(a.len(), b.len()); 110 | match (a.parent.as_ref(), b.parent.as_ref()) { 111 | (Some(p1), Some(p2)) => { 112 | let ord = Self::_cmp(p1, p2); 113 | if ord != cmp::Ordering::Equal { 114 | return ord; 115 | } 116 | } 117 | _ => {} 118 | } 119 | a.name.cmp(&b.name) 120 | } 121 | 122 | fn cmp(a: &Namespace, b: &Namespace) -> cmp::Ordering { 123 | let len_a = a.len(); 124 | let len_b = b.len(); 125 | match len_a.cmp(&len_b) { 126 | cmp::Ordering::Equal => Self::_cmp(a, b), 127 | cmp::Ordering::Less => { 128 | let b = b.up(len_b - len_a); 129 | match Self::_cmp(a, b) { 130 | cmp::Ordering::Equal => cmp::Ordering::Less, 131 | other => other, 132 | } 133 | } 134 | cmp::Ordering::Greater => { 135 | let a = a.up(len_a - len_b); 136 | match Self::_cmp(a, b) { 137 | cmp::Ordering::Equal => cmp::Ordering::Greater, 138 | other => other, 139 | } 140 | } 141 | } 142 | } 143 | 144 | pub(crate) fn cmp_ns_and_name( 145 | ns1: Option<&Namespace>, 146 | name1: Option<&str>, 147 | ns2: Option<&Namespace>, 148 | name2: Option<&str>, 149 | ) -> cmp::Ordering { 150 | match (ns1, ns2) { 151 | (Some(ns1), Some(ns2)) => match Namespace::cmp(ns1, ns2) { 152 | cmp::Ordering::Equal => name1.cmp(&name2), 153 | o => o, 154 | }, 155 | (Some(_), None) => cmp::Ordering::Greater, 156 | (None, Some(_)) => cmp::Ordering::Less, 157 | (None, None) => name1.cmp(&name2), 158 | } 159 | } 160 | } 161 | 162 | #[cfg(test)] 163 | mod test { 164 | use super::*; 165 | 166 | #[test] 167 | fn cmp() { 168 | let ns1 = Namespace::new(&None, Some("a".into()), NamespaceKind::Namespace); 169 | let ns2 = Namespace::new(&None, Some("b".into()), NamespaceKind::Namespace); 170 | assert_eq!(Namespace::cmp(&ns1, &ns2), cmp::Ordering::Less); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /parser/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A library for parsing debuginfo. 2 | //! 3 | //! ## Example usage 4 | //! 5 | //! ```rust,no_run 6 | //! # fn main() -> Result<(), Box> { 7 | //! # let a_file_path = String::new(); 8 | //! let ctx = ddbug_parser::File::parse(a_file_path)?; 9 | //! let file = ctx.file(); 10 | //! for unit in file.units() { 11 | //! for function in unit.functions() { 12 | //! if let Some(name) = function.name() { 13 | //! println!("{}", name); 14 | //! } 15 | //! } 16 | //! } 17 | //! Ok(()) 18 | //! } 19 | //! ``` 20 | // Enable some rust 2018 idioms. 21 | #![warn(bare_trait_objects)] 22 | #![warn(unused_extern_crates)] 23 | // Calm down clippy. 24 | #![allow(clippy::new_ret_no_self)] 25 | #![allow(clippy::single_match)] 26 | #![allow(clippy::too_many_arguments)] 27 | #![allow(clippy::type_complexity)] 28 | 29 | #[macro_use] 30 | extern crate log; 31 | 32 | mod cfi; 33 | mod file; 34 | mod function; 35 | mod location; 36 | mod namespace; 37 | mod range; 38 | mod source; 39 | mod types; 40 | mod unit; 41 | mod variable; 42 | 43 | pub use crate::cfi::*; 44 | pub use crate::file::*; 45 | pub use crate::function::*; 46 | pub use crate::location::*; 47 | pub use crate::namespace::*; 48 | pub use crate::range::*; 49 | pub use crate::source::*; 50 | pub use crate::types::*; 51 | pub use crate::unit::*; 52 | pub use crate::variable::*; 53 | 54 | use std::borrow::{Borrow, Cow}; 55 | use std::error; 56 | use std::fmt; 57 | use std::io; 58 | use std::result; 59 | use std::sync::atomic::{AtomicUsize, Ordering}; 60 | 61 | /// A parsing error. 62 | #[derive(Debug)] 63 | pub struct Error(pub Cow<'static, str>); 64 | 65 | impl error::Error for Error { 66 | fn description(&self) -> &str { 67 | self.0.borrow() 68 | } 69 | } 70 | 71 | impl fmt::Display for Error { 72 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 73 | write!(f, "{}", self.0) 74 | } 75 | } 76 | 77 | impl From<&'static str> for Error { 78 | fn from(s: &'static str) -> Error { 79 | Error(Cow::Borrowed(s)) 80 | } 81 | } 82 | 83 | impl From for Error { 84 | fn from(s: String) -> Error { 85 | Error(Cow::Owned(s)) 86 | } 87 | } 88 | 89 | impl From for Error { 90 | fn from(e: io::Error) -> Error { 91 | Error(Cow::Owned(format!("IO error: {}", e))) 92 | } 93 | } 94 | 95 | impl From for Error { 96 | fn from(e: gimli::Error) -> Error { 97 | Error(Cow::Owned(format!("DWARF error: {}", e))) 98 | } 99 | } 100 | 101 | impl From for Error { 102 | fn from(e: object::Error) -> Error { 103 | Error(Cow::Owned(format!("object error: {}", e))) 104 | } 105 | } 106 | 107 | /* 108 | impl From for Error { 109 | fn from(e: crate_pdb::Error) -> Error { 110 | Error(Cow::Owned(format!("PDB error: {}", e))) 111 | } 112 | } 113 | */ 114 | 115 | /// A parsing result. 116 | pub type Result = result::Result; 117 | 118 | mod address { 119 | use std::u64; 120 | 121 | /// An optional address. 122 | /// 123 | /// This is similar to `Option`, but uses `!0` to encode the `None` case. 124 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 125 | pub struct Address(u64); 126 | 127 | impl Address { 128 | /// Create a known address value. 129 | #[inline] 130 | pub fn new(address: u64) -> Address { 131 | debug_assert!(Address(address) != Address::none()); 132 | Address(address) 133 | } 134 | 135 | /// Create an unknown or absent address value. 136 | #[inline] 137 | pub fn none() -> Address { 138 | Address(!0) 139 | } 140 | 141 | /// Return true if the address is unknown or absent. 142 | #[inline] 143 | pub fn is_none(self) -> bool { 144 | self == Self::none() 145 | } 146 | 147 | /// Return true if the address is known. 148 | #[inline] 149 | pub fn is_some(self) -> bool { 150 | self != Self::none() 151 | } 152 | 153 | /// Return the address. 154 | #[inline] 155 | pub fn get(self) -> Option { 156 | if self.is_none() { 157 | None 158 | } else { 159 | Some(self.0) 160 | } 161 | } 162 | } 163 | 164 | impl Default for Address { 165 | #[inline] 166 | fn default() -> Self { 167 | Address::none() 168 | } 169 | } 170 | } 171 | 172 | pub use crate::address::Address; 173 | 174 | mod size { 175 | use std::u64; 176 | 177 | /// An optional size. 178 | /// 179 | /// This is similar to `Option`, but uses `u64::MAX` to encode the `None` case. 180 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 181 | pub struct Size(u64); 182 | 183 | impl Size { 184 | /// Create a known size value. 185 | #[inline] 186 | pub fn new(size: u64) -> Size { 187 | debug_assert!(Size(size) != Size::none()); 188 | Size(size) 189 | } 190 | 191 | /// Create an unknown or absent size value. 192 | #[inline] 193 | pub fn none() -> Size { 194 | Size(u64::MAX) 195 | } 196 | 197 | /// Return true if the size is unknown or absent. 198 | #[inline] 199 | pub fn is_none(self) -> bool { 200 | self == Self::none() 201 | } 202 | 203 | /// Return true if the size is known. 204 | #[inline] 205 | pub fn is_some(self) -> bool { 206 | self != Self::none() 207 | } 208 | 209 | /// Return the size. 210 | #[inline] 211 | pub fn get(self) -> Option { 212 | if self.is_none() { 213 | None 214 | } else { 215 | Some(self.0) 216 | } 217 | } 218 | } 219 | 220 | impl Default for Size { 221 | #[inline] 222 | fn default() -> Self { 223 | Size::none() 224 | } 225 | } 226 | 227 | impl From> for Size { 228 | fn from(size: Option) -> Size { 229 | match size { 230 | Some(size) => Size::new(size), 231 | None => Size::none(), 232 | } 233 | } 234 | } 235 | } 236 | 237 | pub use crate::size::Size; 238 | 239 | #[derive(Debug, Default)] 240 | struct Id(AtomicUsize); 241 | 242 | impl Clone for Id { 243 | fn clone(&self) -> Self { 244 | Id(AtomicUsize::new(self.get())) 245 | } 246 | } 247 | 248 | impl Id { 249 | fn new(id: usize) -> Self { 250 | Id(AtomicUsize::new(id)) 251 | } 252 | 253 | fn get(&self) -> usize { 254 | self.0.load(Ordering::Acquire) 255 | } 256 | 257 | fn set(&self, id: usize) { 258 | self.0.store(id, Ordering::Release) 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /parser/src/variable.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::cmp; 3 | use std::sync::Arc; 4 | use std::usize; 5 | 6 | use crate::file::FileHash; 7 | use crate::location::{self, FrameLocation, Location, Piece, Register}; 8 | use crate::namespace::Namespace; 9 | use crate::range::Range; 10 | use crate::source::Source; 11 | use crate::types::{Type, TypeOffset}; 12 | use crate::{Address, Id, Size}; 13 | 14 | /// The debuginfo offset of a variable. 15 | /// 16 | /// This is unique for all variables in a file. 17 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 18 | pub struct VariableOffset(usize); 19 | 20 | impl VariableOffset { 21 | #[inline] 22 | pub(crate) fn new(offset: usize) -> VariableOffset { 23 | debug_assert!(VariableOffset(offset) != VariableOffset::none()); 24 | VariableOffset(offset) 25 | } 26 | 27 | #[inline] 28 | pub(crate) fn none() -> VariableOffset { 29 | VariableOffset(usize::MAX) 30 | } 31 | } 32 | 33 | impl Default for VariableOffset { 34 | #[inline] 35 | fn default() -> Self { 36 | VariableOffset::none() 37 | } 38 | } 39 | 40 | /// A global variable. 41 | #[derive(Debug, Default)] 42 | pub struct Variable<'input> { 43 | pub(crate) id: Id, 44 | pub(crate) offset: VariableOffset, 45 | pub(crate) namespace: Option>>, 46 | pub(crate) name: Option<&'input str>, 47 | pub(crate) linkage_name: Option<&'input str>, 48 | pub(crate) symbol_name: Option<&'input str>, 49 | pub(crate) ty: TypeOffset, 50 | pub(crate) source: Source<'input>, 51 | pub(crate) address: Address, 52 | pub(crate) size: Size, 53 | pub(crate) declaration: bool, 54 | } 55 | 56 | impl<'input> Variable<'input> { 57 | /// The user defined id for this variable. 58 | #[inline] 59 | pub fn id(&self) -> usize { 60 | self.id.get() 61 | } 62 | 63 | /// Set a user defined id for this variable. 64 | #[inline] 65 | pub fn set_id(&self, id: usize) { 66 | self.id.set(id) 67 | } 68 | 69 | /// The namespace of the variable. 70 | pub fn namespace(&self) -> Option<&Namespace> { 71 | self.namespace.as_ref().map(|x| &**x) 72 | } 73 | 74 | /// The name of the variable. 75 | #[inline] 76 | pub fn name(&self) -> Option<&str> { 77 | self.name 78 | } 79 | 80 | /// The linkage name of the variable. 81 | #[inline] 82 | pub fn linkage_name(&self) -> Option<&str> { 83 | self.linkage_name 84 | } 85 | 86 | /// The symbol name of the variable. 87 | /// 88 | /// This is determined from a symbol table entry with a matching address. 89 | #[inline] 90 | pub fn symbol_name(&self) -> Option<&str> { 91 | self.symbol_name 92 | } 93 | 94 | /// The type of the variable. 95 | /// 96 | /// Returns `None` if the type is invalid. 97 | #[inline] 98 | pub fn ty<'a>(&self, hash: &'a FileHash<'input>) -> Option>> { 99 | Type::from_offset(hash, self.ty) 100 | } 101 | 102 | /// The source information for the variable. 103 | #[inline] 104 | pub fn source(&self) -> &Source<'input> { 105 | &self.source 106 | } 107 | 108 | /// The address of the variable. 109 | #[inline] 110 | pub fn address(&self) -> Option { 111 | self.address.get() 112 | } 113 | 114 | /// The size in bytes of the variable. 115 | pub fn byte_size(&self, hash: &FileHash) -> Option { 116 | if self.size.is_some() { 117 | self.size.get() 118 | } else { 119 | self.ty(hash).and_then(|t| t.byte_size(hash)) 120 | } 121 | } 122 | 123 | /// The address range of the variable. 124 | pub fn range(&self, hash: &FileHash) -> Option { 125 | match (self.address(), self.byte_size(hash)) { 126 | (Some(begin), Some(size)) => { 127 | if size != 0 { 128 | Some(Range { 129 | begin, 130 | end: begin + size, 131 | }) 132 | } else { 133 | None 134 | } 135 | } 136 | _ => None, 137 | } 138 | } 139 | 140 | /// Return true if this is a declaration. 141 | #[inline] 142 | pub fn is_declaration(&self) -> bool { 143 | self.declaration 144 | } 145 | 146 | /// Compare the identifying information of two variables. 147 | /// 148 | /// Variables are equal if they have the same namespace and name. 149 | /// 150 | /// This can be used to sort, and to determine if two variables refer to the same definition 151 | /// (even if there are differences in the definitions). 152 | pub fn cmp_id( 153 | _hash_a: &FileHash, 154 | a: &Variable, 155 | _hash_b: &FileHash, 156 | b: &Variable, 157 | ) -> cmp::Ordering { 158 | Namespace::cmp_ns_and_name(a.namespace(), a.name(), b.namespace(), b.name()) 159 | } 160 | } 161 | 162 | /// A local variable. 163 | #[derive(Debug, Default, Clone)] 164 | pub struct LocalVariable<'input> { 165 | pub(crate) offset: VariableOffset, 166 | pub(crate) name: Option<&'input str>, 167 | pub(crate) ty: TypeOffset, 168 | pub(crate) source: Source<'input>, 169 | pub(crate) address: Address, 170 | pub(crate) size: Size, 171 | pub(crate) locations: Vec<(Range, Piece)>, 172 | } 173 | 174 | impl<'input> LocalVariable<'input> { 175 | /// The name of the variable. 176 | #[inline] 177 | pub fn name(&self) -> Option<&'input str> { 178 | self.name 179 | } 180 | 181 | /// The type offset of the variable. 182 | /// 183 | /// A type offset is unique for all types in a file. 184 | #[inline] 185 | pub fn type_offset(&self) -> TypeOffset { 186 | self.ty 187 | } 188 | 189 | /// The type of the variable. 190 | /// 191 | /// Returns `None` if the type is invalid. 192 | #[inline] 193 | pub fn ty<'a>(&self, hash: &'a FileHash<'input>) -> Option>> { 194 | Type::from_offset(hash, self.ty) 195 | } 196 | 197 | /// The source information for the variable. 198 | #[inline] 199 | pub fn source(&self) -> &Source<'input> { 200 | &self.source 201 | } 202 | 203 | /// The address of the variable. 204 | /// 205 | /// This will only be known for static variables. 206 | #[inline] 207 | pub fn address(&self) -> Option { 208 | self.address.get() 209 | } 210 | 211 | /// The size in bytes of the variable. 212 | pub fn byte_size(&self, hash: &FileHash) -> Option { 213 | if self.size.is_some() { 214 | self.size.get() 215 | } else { 216 | self.ty(hash).and_then(|t| t.byte_size(hash)) 217 | } 218 | } 219 | 220 | /// The registers in which this variable is stored. 221 | pub fn registers<'a>(&'a self) -> impl Iterator + 'a { 222 | location::registers(&self.locations) 223 | } 224 | 225 | /// The registers pointing to where this variable is stored. 226 | pub fn register_offsets<'a>(&'a self) -> impl Iterator + 'a { 227 | location::register_offsets(&self.locations) 228 | } 229 | 230 | /// The stack frame locations at which this variable is stored. 231 | pub fn frame_locations<'a>(&'a self) -> impl Iterator + 'a { 232 | self.locations.iter().filter_map(|(_, piece)| { 233 | if piece.is_value { 234 | return None; 235 | } 236 | match piece.location { 237 | // TODO: do we need to distinguish between these? 238 | Location::FrameOffset { offset } | Location::CfaOffset { offset } => { 239 | Some(FrameLocation { 240 | offset, 241 | bit_size: piece.bit_size, 242 | }) 243 | } 244 | _ => None, 245 | } 246 | }) 247 | } 248 | 249 | /// Compare the identifying information of two variables. 250 | /// 251 | /// Variables are considered equal if their names are equal. 252 | /// 253 | /// This can be used to sort, and to determine if two variables refer to the same definition 254 | /// (even if there are differences in the definitions). 255 | pub fn cmp_id(_hash_a: &FileHash, a: &Self, _hash_b: &FileHash, b: &Self) -> cmp::Ordering { 256 | a.name.cmp(&b.name) 257 | } 258 | } 259 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "aho-corasick" 13 | version = "0.7.19" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" 16 | dependencies = [ 17 | "memchr", 18 | ] 19 | 20 | [[package]] 21 | name = "anyhow" 22 | version = "1.0.66" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" 25 | 26 | [[package]] 27 | name = "atty" 28 | version = "0.2.14" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 31 | dependencies = [ 32 | "hermit-abi", 33 | "libc", 34 | "winapi", 35 | ] 36 | 37 | [[package]] 38 | name = "autocfg" 39 | version = "1.1.0" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 42 | 43 | [[package]] 44 | name = "bitflags" 45 | version = "1.3.2" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 48 | 49 | [[package]] 50 | name = "cfg-if" 51 | version = "1.0.0" 52 | source = "registry+https://github.com/rust-lang/crates.io-index" 53 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 54 | 55 | [[package]] 56 | name = "clap" 57 | version = "4.0.19" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "8e67816e006b17427c9b4386915109b494fec2d929c63e3bd3561234cbf1bf1e" 60 | dependencies = [ 61 | "atty", 62 | "bitflags", 63 | "clap_derive", 64 | "clap_lex", 65 | "once_cell", 66 | "strsim", 67 | "termcolor", 68 | ] 69 | 70 | [[package]] 71 | name = "clap_derive" 72 | version = "4.0.18" 73 | source = "registry+https://github.com/rust-lang/crates.io-index" 74 | checksum = "16a1b0f6422af32d5da0c58e2703320f379216ee70198241c84173a8c5ac28f3" 75 | dependencies = [ 76 | "heck", 77 | "proc-macro-error", 78 | "proc-macro2", 79 | "quote", 80 | "syn", 81 | ] 82 | 83 | [[package]] 84 | name = "clap_lex" 85 | version = "0.3.0" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" 88 | dependencies = [ 89 | "os_str_bytes", 90 | ] 91 | 92 | [[package]] 93 | name = "clayout" 94 | version = "0.3.3" 95 | dependencies = [ 96 | "anyhow", 97 | "clap", 98 | "ddbug_parser", 99 | "env_logger", 100 | "log", 101 | ] 102 | 103 | [[package]] 104 | name = "crc32fast" 105 | version = "1.3.2" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" 108 | dependencies = [ 109 | "cfg-if", 110 | ] 111 | 112 | [[package]] 113 | name = "ddbug_parser" 114 | version = "0.3.0" 115 | dependencies = [ 116 | "fnv", 117 | "gimli", 118 | "log", 119 | "memmap", 120 | "object", 121 | ] 122 | 123 | [[package]] 124 | name = "env_logger" 125 | version = "0.9.1" 126 | source = "registry+https://github.com/rust-lang/crates.io-index" 127 | checksum = "c90bf5f19754d10198ccb95b70664fc925bd1fc090a0fd9a6ebc54acc8cd6272" 128 | dependencies = [ 129 | "atty", 130 | "humantime", 131 | "log", 132 | "regex", 133 | "termcolor", 134 | ] 135 | 136 | [[package]] 137 | name = "fallible-iterator" 138 | version = "0.2.0" 139 | source = "registry+https://github.com/rust-lang/crates.io-index" 140 | checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" 141 | 142 | [[package]] 143 | name = "flate2" 144 | version = "1.0.22" 145 | source = "registry+https://github.com/rust-lang/crates.io-index" 146 | checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" 147 | dependencies = [ 148 | "cfg-if", 149 | "crc32fast", 150 | "libc", 151 | "miniz_oxide", 152 | ] 153 | 154 | [[package]] 155 | name = "fnv" 156 | version = "1.0.7" 157 | source = "registry+https://github.com/rust-lang/crates.io-index" 158 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 159 | 160 | [[package]] 161 | name = "gimli" 162 | version = "0.26.1" 163 | source = "registry+https://github.com/rust-lang/crates.io-index" 164 | checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4" 165 | dependencies = [ 166 | "fallible-iterator", 167 | "indexmap", 168 | "stable_deref_trait", 169 | ] 170 | 171 | [[package]] 172 | name = "hashbrown" 173 | version = "0.11.2" 174 | source = "registry+https://github.com/rust-lang/crates.io-index" 175 | checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" 176 | 177 | [[package]] 178 | name = "heck" 179 | version = "0.4.0" 180 | source = "registry+https://github.com/rust-lang/crates.io-index" 181 | checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" 182 | 183 | [[package]] 184 | name = "hermit-abi" 185 | version = "0.1.19" 186 | source = "registry+https://github.com/rust-lang/crates.io-index" 187 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 188 | dependencies = [ 189 | "libc", 190 | ] 191 | 192 | [[package]] 193 | name = "humantime" 194 | version = "2.1.0" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 197 | 198 | [[package]] 199 | name = "indexmap" 200 | version = "1.8.0" 201 | source = "registry+https://github.com/rust-lang/crates.io-index" 202 | checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223" 203 | dependencies = [ 204 | "autocfg", 205 | "hashbrown", 206 | ] 207 | 208 | [[package]] 209 | name = "libc" 210 | version = "0.2.119" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4" 213 | 214 | [[package]] 215 | name = "log" 216 | version = "0.4.17" 217 | source = "registry+https://github.com/rust-lang/crates.io-index" 218 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 219 | dependencies = [ 220 | "cfg-if", 221 | ] 222 | 223 | [[package]] 224 | name = "memchr" 225 | version = "2.4.1" 226 | source = "registry+https://github.com/rust-lang/crates.io-index" 227 | checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" 228 | 229 | [[package]] 230 | name = "memmap" 231 | version = "0.7.0" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" 234 | dependencies = [ 235 | "libc", 236 | "winapi", 237 | ] 238 | 239 | [[package]] 240 | name = "miniz_oxide" 241 | version = "0.4.4" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" 244 | dependencies = [ 245 | "adler", 246 | "autocfg", 247 | ] 248 | 249 | [[package]] 250 | name = "object" 251 | version = "0.28.3" 252 | source = "registry+https://github.com/rust-lang/crates.io-index" 253 | checksum = "40bec70ba014595f99f7aa110b84331ffe1ee9aece7fe6f387cc7e3ecda4d456" 254 | dependencies = [ 255 | "flate2", 256 | "memchr", 257 | ] 258 | 259 | [[package]] 260 | name = "once_cell" 261 | version = "1.16.0" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" 264 | 265 | [[package]] 266 | name = "os_str_bytes" 267 | version = "6.0.0" 268 | source = "registry+https://github.com/rust-lang/crates.io-index" 269 | checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64" 270 | 271 | [[package]] 272 | name = "proc-macro-error" 273 | version = "1.0.4" 274 | source = "registry+https://github.com/rust-lang/crates.io-index" 275 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 276 | dependencies = [ 277 | "proc-macro-error-attr", 278 | "proc-macro2", 279 | "quote", 280 | "syn", 281 | "version_check", 282 | ] 283 | 284 | [[package]] 285 | name = "proc-macro-error-attr" 286 | version = "1.0.4" 287 | source = "registry+https://github.com/rust-lang/crates.io-index" 288 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 289 | dependencies = [ 290 | "proc-macro2", 291 | "quote", 292 | "version_check", 293 | ] 294 | 295 | [[package]] 296 | name = "proc-macro2" 297 | version = "1.0.47" 298 | source = "registry+https://github.com/rust-lang/crates.io-index" 299 | checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" 300 | dependencies = [ 301 | "unicode-ident", 302 | ] 303 | 304 | [[package]] 305 | name = "quote" 306 | version = "1.0.21" 307 | source = "registry+https://github.com/rust-lang/crates.io-index" 308 | checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" 309 | dependencies = [ 310 | "proc-macro2", 311 | ] 312 | 313 | [[package]] 314 | name = "regex" 315 | version = "1.6.0" 316 | source = "registry+https://github.com/rust-lang/crates.io-index" 317 | checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" 318 | dependencies = [ 319 | "aho-corasick", 320 | "memchr", 321 | "regex-syntax", 322 | ] 323 | 324 | [[package]] 325 | name = "regex-syntax" 326 | version = "0.6.27" 327 | source = "registry+https://github.com/rust-lang/crates.io-index" 328 | checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" 329 | 330 | [[package]] 331 | name = "stable_deref_trait" 332 | version = "1.2.0" 333 | source = "registry+https://github.com/rust-lang/crates.io-index" 334 | checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" 335 | 336 | [[package]] 337 | name = "strsim" 338 | version = "0.10.0" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 341 | 342 | [[package]] 343 | name = "syn" 344 | version = "1.0.103" 345 | source = "registry+https://github.com/rust-lang/crates.io-index" 346 | checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" 347 | dependencies = [ 348 | "proc-macro2", 349 | "quote", 350 | "unicode-ident", 351 | ] 352 | 353 | [[package]] 354 | name = "termcolor" 355 | version = "1.1.2" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" 358 | dependencies = [ 359 | "winapi-util", 360 | ] 361 | 362 | [[package]] 363 | name = "unicode-ident" 364 | version = "1.0.5" 365 | source = "registry+https://github.com/rust-lang/crates.io-index" 366 | checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" 367 | 368 | [[package]] 369 | name = "version_check" 370 | version = "0.9.4" 371 | source = "registry+https://github.com/rust-lang/crates.io-index" 372 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 373 | 374 | [[package]] 375 | name = "winapi" 376 | version = "0.3.9" 377 | source = "registry+https://github.com/rust-lang/crates.io-index" 378 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 379 | dependencies = [ 380 | "winapi-i686-pc-windows-gnu", 381 | "winapi-x86_64-pc-windows-gnu", 382 | ] 383 | 384 | [[package]] 385 | name = "winapi-i686-pc-windows-gnu" 386 | version = "0.4.0" 387 | source = "registry+https://github.com/rust-lang/crates.io-index" 388 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 389 | 390 | [[package]] 391 | name = "winapi-util" 392 | version = "0.1.5" 393 | source = "registry+https://github.com/rust-lang/crates.io-index" 394 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 395 | dependencies = [ 396 | "winapi", 397 | ] 398 | 399 | [[package]] 400 | name = "winapi-x86_64-pc-windows-gnu" 401 | version = "0.4.0" 402 | source = "registry+https://github.com/rust-lang/crates.io-index" 403 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 404 | -------------------------------------------------------------------------------- /parser/src/function.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::cmp; 3 | use std::sync::Arc; 4 | use std::usize; 5 | 6 | use crate::file::FileHash; 7 | use crate::location::{self, FrameLocation, Piece, Register}; 8 | use crate::namespace::Namespace; 9 | use crate::range::Range; 10 | use crate::source::Source; 11 | use crate::types::{ParameterType, Type, TypeOffset}; 12 | use crate::variable::LocalVariable; 13 | use crate::{Address, Id, Size}; 14 | 15 | /// The debuginfo offset of a function. 16 | /// 17 | /// This is unique for all functions in a file. 18 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 19 | pub struct FunctionOffset(usize); 20 | 21 | impl FunctionOffset { 22 | #[inline] 23 | pub(crate) fn new(offset: usize) -> FunctionOffset { 24 | debug_assert!(FunctionOffset(offset) != FunctionOffset::none()); 25 | FunctionOffset(offset) 26 | } 27 | 28 | #[inline] 29 | pub(crate) fn none() -> FunctionOffset { 30 | FunctionOffset(usize::MAX) 31 | } 32 | 33 | #[inline] 34 | pub(crate) fn is_none(self) -> bool { 35 | self == Self::none() 36 | } 37 | 38 | #[inline] 39 | pub(crate) fn is_some(self) -> bool { 40 | self != Self::none() 41 | } 42 | 43 | #[inline] 44 | pub(crate) fn get(self) -> Option { 45 | if self.is_none() { 46 | None 47 | } else { 48 | Some(self.0) 49 | } 50 | } 51 | } 52 | 53 | impl Default for FunctionOffset { 54 | #[inline] 55 | fn default() -> Self { 56 | FunctionOffset::none() 57 | } 58 | } 59 | 60 | /// A function. 61 | #[derive(Debug, Default)] 62 | pub struct Function<'input> { 63 | pub(crate) id: Id, 64 | pub(crate) offset: FunctionOffset, 65 | pub(crate) namespace: Option>>, 66 | pub(crate) name: Option<&'input str>, 67 | pub(crate) linkage_name: Option<&'input str>, 68 | pub(crate) symbol_name: Option<&'input str>, 69 | pub(crate) source: Source<'input>, 70 | pub(crate) address: Address, 71 | pub(crate) size: Size, 72 | pub(crate) ranges: Vec, 73 | pub(crate) inline: bool, 74 | pub(crate) declaration: bool, 75 | pub(crate) parameters: Vec>, 76 | pub(crate) return_type: TypeOffset, 77 | } 78 | 79 | /// Extra function details. 80 | /// 81 | /// These are kept separate from `Function` so that they can be loaded only when needed. 82 | #[derive(Debug, Default)] 83 | pub struct FunctionDetails<'input> { 84 | pub(crate) parameters: Vec>, 85 | pub(crate) variables: Vec>, 86 | pub(crate) inlined_functions: Vec>, 87 | } 88 | 89 | impl<'input> Function<'input> { 90 | pub(crate) fn from_offset<'a>( 91 | hash: &'a FileHash<'input>, 92 | offset: FunctionOffset, 93 | ) -> Option<&'a Function<'input>> { 94 | if offset.is_none() { 95 | return None; 96 | } 97 | hash.functions_by_offset.get(&offset).cloned() 98 | } 99 | 100 | /// The user defined id for this function. 101 | #[inline] 102 | pub fn id(&self) -> usize { 103 | self.id.get() 104 | } 105 | 106 | /// Set a user defined id for this function. 107 | #[inline] 108 | pub fn set_id(&self, id: usize) { 109 | self.id.set(id) 110 | } 111 | 112 | /// The namespace of the function. 113 | pub fn namespace(&self) -> Option<&Namespace> { 114 | self.namespace.as_ref().map(|x| &**x) 115 | } 116 | 117 | /// The name of the function. 118 | #[inline] 119 | pub fn name(&self) -> Option<&str> { 120 | self.name 121 | } 122 | 123 | /// The linkage name of the variable. 124 | #[inline] 125 | pub fn linkage_name(&self) -> Option<&str> { 126 | self.linkage_name 127 | } 128 | 129 | /// The symbol name of the function. 130 | /// 131 | /// This is determined from a symbol table entry with a matching address. 132 | #[inline] 133 | pub fn symbol_name(&self) -> Option<&str> { 134 | self.symbol_name 135 | } 136 | 137 | /// The source information for the function. 138 | #[inline] 139 | pub fn source(&self) -> &Source<'input> { 140 | &self.source 141 | } 142 | 143 | /// The address of the function. 144 | #[inline] 145 | pub fn address(&self) -> Option { 146 | self.address.get() 147 | } 148 | 149 | /// The size in bytes of the function. 150 | /// 151 | /// This may exclude padding, and may be non-contiguous. 152 | #[inline] 153 | pub fn size(&self) -> Option { 154 | self.size.get() 155 | } 156 | 157 | /// The address ranges of the function. 158 | pub fn ranges(&self) -> &[Range] { 159 | &self.ranges 160 | } 161 | 162 | /// Return true if this is an inlined function. 163 | #[inline] 164 | pub fn is_inline(&self) -> bool { 165 | self.inline 166 | } 167 | 168 | /// Return true if this is a declaration. 169 | #[inline] 170 | pub fn is_declaration(&self) -> bool { 171 | self.declaration 172 | } 173 | 174 | /// The function parameter types. 175 | #[inline] 176 | pub fn parameters(&self) -> &[ParameterType<'input>] { 177 | &self.parameters 178 | } 179 | 180 | /// The return type. 181 | /// 182 | /// Returns `None` if the return type is invalid. 183 | #[inline] 184 | pub fn return_type<'a>(&self, hash: &'a FileHash<'input>) -> Option>> { 185 | Type::from_offset(hash, self.return_type) 186 | } 187 | 188 | /// Extra function details. 189 | pub fn details(&self, hash: &FileHash<'input>) -> FunctionDetails<'input> { 190 | hash.file.get_function_details(self.offset, hash) 191 | } 192 | 193 | /// Compare the identifying information of two functions. 194 | /// 195 | /// Functions are equal if they have the same namespace and name. 196 | /// 197 | /// This can be used to sort, and to determine if two functions refer to the same definition 198 | /// (even if there are differences in the definitions). 199 | pub fn cmp_id( 200 | _hash_a: &FileHash, 201 | a: &Function, 202 | _hash_b: &FileHash, 203 | b: &Function, 204 | ) -> cmp::Ordering { 205 | Namespace::cmp_ns_and_name(a.namespace(), a.name(), b.namespace(), b.name()) 206 | } 207 | } 208 | 209 | impl<'input> FunctionDetails<'input> { 210 | /// The function parameters. 211 | #[inline] 212 | pub fn parameters(&self) -> &[Parameter<'input>] { 213 | &self.parameters 214 | } 215 | 216 | /// The local variables. 217 | #[inline] 218 | pub fn variables(&self) -> &[LocalVariable<'input>] { 219 | &self.variables 220 | } 221 | 222 | /// The inlined functions. 223 | #[inline] 224 | pub fn inlined_functions(&self) -> &[InlinedFunction<'input>] { 225 | &self.inlined_functions 226 | } 227 | } 228 | 229 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 230 | pub(crate) struct ParameterOffset(usize); 231 | 232 | impl ParameterOffset { 233 | #[inline] 234 | pub(crate) fn new(offset: usize) -> ParameterOffset { 235 | debug_assert!(ParameterOffset(offset) != ParameterOffset::none()); 236 | ParameterOffset(offset) 237 | } 238 | 239 | #[inline] 240 | pub(crate) fn none() -> ParameterOffset { 241 | ParameterOffset(usize::MAX) 242 | } 243 | } 244 | 245 | impl Default for ParameterOffset { 246 | #[inline] 247 | fn default() -> Self { 248 | ParameterOffset::none() 249 | } 250 | } 251 | 252 | /// A function parameter. 253 | #[derive(Debug, Default, Clone)] 254 | pub struct Parameter<'input> { 255 | pub(crate) offset: ParameterOffset, 256 | pub(crate) name: Option<&'input str>, 257 | pub(crate) ty: TypeOffset, 258 | // TODO: move this to ParameterDetails 259 | pub(crate) locations: Vec<(Range, Piece)>, 260 | } 261 | 262 | impl<'input> Parameter<'input> { 263 | /// The name of the parameter. 264 | #[inline] 265 | pub fn name(&self) -> Option<&'input str> { 266 | self.name 267 | } 268 | 269 | /// The type offset of the parameter. 270 | /// 271 | /// A type offset is unique for all types in a file. 272 | #[inline] 273 | pub fn type_offset(&self) -> TypeOffset { 274 | self.ty 275 | } 276 | 277 | /// The type of the parameter. 278 | #[inline] 279 | pub fn ty<'a>(&self, hash: &'a FileHash<'input>) -> Option>> { 280 | Type::from_offset(hash, self.ty) 281 | } 282 | 283 | /// The size in bytes of the parameter. 284 | pub fn byte_size(&self, hash: &FileHash) -> Option { 285 | self.ty(hash).and_then(|v| v.byte_size(hash)) 286 | } 287 | 288 | /// The registers in which this parameter is stored. 289 | pub fn registers<'a>(&'a self) -> impl Iterator + 'a { 290 | location::registers(&self.locations) 291 | } 292 | 293 | /// The registers pointing to where this variable is stored. 294 | pub fn register_offsets<'a>(&'a self) -> impl Iterator + 'a { 295 | location::register_offsets(&self.locations) 296 | } 297 | 298 | /// The stack frame locations at which this parameter is stored. 299 | pub fn frame_locations<'a>(&'a self) -> impl Iterator + 'a { 300 | location::frame_locations(&self.locations) 301 | } 302 | 303 | /// Compare the identifying information of two parameters. 304 | /// 305 | /// Parameters are considered equal if their name and type are equal. 306 | /// 307 | /// This can be used to sort, and to determine if two types refer to the same definition 308 | /// (even if there are differences in the definitions). 309 | #[allow(dead_code)] 310 | fn cmp_id(hash_a: &FileHash, a: &Parameter, hash_b: &FileHash, b: &Parameter) -> cmp::Ordering { 311 | let ord = Self::cmp_type(hash_a, a, hash_b, b); 312 | if ord != cmp::Ordering::Equal { 313 | return ord; 314 | } 315 | a.name.cmp(&b.name) 316 | } 317 | 318 | /// Compare the types of two parameters. 319 | pub fn cmp_type( 320 | hash_a: &FileHash, 321 | a: &Parameter, 322 | hash_b: &FileHash, 323 | b: &Parameter, 324 | ) -> cmp::Ordering { 325 | match (a.ty(hash_a), b.ty(hash_b)) { 326 | (Some(ref ty_a), Some(ref ty_b)) => Type::cmp_id(hash_a, ty_a, hash_b, ty_b), 327 | (Some(_), None) => cmp::Ordering::Less, 328 | (None, Some(_)) => cmp::Ordering::Greater, 329 | (None, None) => cmp::Ordering::Equal, 330 | } 331 | } 332 | } 333 | 334 | /// An inlined instance of a function. 335 | #[derive(Debug, Default)] 336 | pub struct InlinedFunction<'input> { 337 | pub(crate) abstract_origin: FunctionOffset, 338 | pub(crate) size: Size, 339 | pub(crate) parameters: Vec>, 340 | pub(crate) variables: Vec>, 341 | pub(crate) inlined_functions: Vec>, 342 | pub(crate) call_source: Source<'input>, 343 | } 344 | 345 | impl<'input> InlinedFunction<'input> { 346 | /// The function that this is an inlined instance of. 347 | #[inline] 348 | pub fn abstract_origin<'a>(&self, hash: &'a FileHash<'input>) -> Option<&'a Function<'input>> { 349 | Function::from_offset(hash, self.abstract_origin) 350 | } 351 | 352 | /// The size of the inlined function. 353 | #[inline] 354 | pub fn size(&self) -> Option { 355 | self.size.get() 356 | } 357 | 358 | /// The source information for call location. 359 | #[inline] 360 | pub fn call_source(&self) -> &Source<'input> { 361 | &self.call_source 362 | } 363 | 364 | /// The function parameters. 365 | #[inline] 366 | pub fn parameters(&self) -> &[Parameter<'input>] { 367 | &self.parameters 368 | } 369 | 370 | /// The local variables. 371 | #[inline] 372 | pub fn variables(&self) -> &[LocalVariable<'input>] { 373 | &self.variables 374 | } 375 | 376 | /// The inlined functions within this inlined functions. 377 | #[inline] 378 | pub fn inlined_functions(&self) -> &[InlinedFunction<'input>] { 379 | &self.inlined_functions 380 | } 381 | } 382 | -------------------------------------------------------------------------------- /parser/src/file/pdb.rs: -------------------------------------------------------------------------------- 1 | use std::cell::Cell; 2 | use std::collections::BTreeMap; 3 | use std::io; 4 | use std::sync::Arc; 5 | 6 | use crate_pdb as pdb; 7 | use crate_pdb::FallibleIterator; 8 | 9 | use Result; 10 | use file::File; 11 | use function::{Function, FunctionOffset, Parameter}; 12 | use namespace::Namespace; 13 | use types::{ArrayType, BaseType, EnumerationType, Enumerator, FunctionType, Member, StructType, 14 | Type, TypeKind, TypeModifier, TypeModifierKind, TypeOffset, UnionType}; 15 | use unit::Unit; 16 | 17 | pub(crate) fn parse( 18 | input: &[u8], 19 | path: &str, 20 | cb: &mut FnMut(&mut File) -> Result<()>, 21 | ) -> Result<()> { 22 | let mut cursor = io::Cursor::new(input); 23 | let mut pdb = pdb::PDB::open(&mut cursor)?; 24 | let type_information = pdb.type_information()?; 25 | let symbol_table = pdb.global_symbols()?; 26 | 27 | let mut member_lists = BTreeMap::new(); 28 | let mut enumerator_lists = BTreeMap::new(); 29 | let mut argument_lists = BTreeMap::new(); 30 | let mut bitfields = BTreeMap::new(); 31 | 32 | let mut unit = Unit::default(); 33 | let namespace = None; 34 | 35 | let mut types = type_information.iter(); 36 | add_primitive_types(&mut unit.types); 37 | while let Some(ty) = types.next()? { 38 | let index = ty.type_index() as usize; 39 | // debug!("Type: {} {:?}", index, ty.parse()); 40 | match ty.parse() { 41 | Ok(pdb::TypeData::Class(ref data)) => { 42 | parse_class(&mut unit, &member_lists, &namespace, index, data)?; 43 | } 44 | Ok(pdb::TypeData::Union(ref data)) => { 45 | parse_union(&mut unit, &member_lists, &namespace, index, data)?; 46 | } 47 | Ok(pdb::TypeData::Enumeration(ref data)) => { 48 | parse_enumeration(&mut unit, &enumerator_lists, &namespace, index, data)?; 49 | } 50 | Ok(pdb::TypeData::Procedure(ref data)) => { 51 | parse_procedure(&mut unit, &argument_lists, index, data)?; 52 | } 53 | Ok(pdb::TypeData::MemberFunction(ref data)) => { 54 | parse_member_function(&mut unit, &argument_lists, index, data)?; 55 | } 56 | Ok(pdb::TypeData::Pointer(ref data)) => { 57 | let underlying_type = parse_type_index(data.underlying_type); 58 | let byte_size = u64::from(data.attributes.size()); 59 | let byte_size = if byte_size == 0 { 60 | None 61 | } else { 62 | Some(byte_size) 63 | }; 64 | unit.types.push(Type { 65 | id: Cell::new(0), 66 | offset: TypeOffset(index), 67 | kind: TypeKind::Modifier(TypeModifier { 68 | kind: TypeModifierKind::Pointer, 69 | ty: underlying_type, 70 | name: None, 71 | byte_size, 72 | address_size: None, 73 | }), 74 | }); 75 | } 76 | Ok(pdb::TypeData::Modifier(ref data)) => { 77 | let underlying_type = parse_type_index(data.underlying_type); 78 | // TODO: volatile, unaligned 79 | let kind = if data.constant { 80 | TypeModifierKind::Const 81 | } else { 82 | TypeModifierKind::Other 83 | }; 84 | unit.types.push(Type { 85 | id: Cell::new(0), 86 | offset: TypeOffset(index), 87 | kind: TypeKind::Modifier(TypeModifier { 88 | kind, 89 | ty: underlying_type, 90 | name: None, 91 | byte_size: None, 92 | address_size: None, 93 | }), 94 | }); 95 | } 96 | Ok(pdb::TypeData::Bitfield(data)) => { 97 | bitfields.insert(index, data); 98 | } 99 | Ok(pdb::TypeData::Array(ref data)) => { 100 | parse_array(&mut unit, index, data)?; 101 | } 102 | Ok(pdb::TypeData::FieldList(ref data)) => { 103 | parse_field_list( 104 | &mut member_lists, 105 | &mut enumerator_lists, 106 | &bitfields, 107 | index, 108 | data, 109 | )?; 110 | } 111 | Ok(pdb::TypeData::ArgumentList(data)) => { 112 | argument_lists.insert(index, data.arguments); 113 | } 114 | Ok(other) => { 115 | debug!("PDB unimplemented type {} {:?}", index, other); 116 | } 117 | Err(pdb::Error::UnimplementedTypeKind(kind)) => { 118 | debug!("PDB unimplemented type {} {}", index, kind); 119 | } 120 | Err(e) => { 121 | return Err(e.into()); 122 | } 123 | } 124 | } 125 | 126 | let mut symbols = symbol_table.iter(); 127 | let mut symbol_index = 0; 128 | while let Some(symbol) = symbols.next()? { 129 | match symbol.parse()? { 130 | pdb::SymbolData::PublicSymbol(data) => if data.function { 131 | unit.functions.push(Function { 132 | id: Cell::new(0), 133 | offset: Some(FunctionOffset(symbol_index)), 134 | namespace: namespace.clone(), 135 | name: Some(symbol.name()?.as_bytes()), 136 | symbol_name: None, 137 | linkage_name: None, 138 | source: Default::default(), 139 | address: Some(u64::from(data.offset)), 140 | size: None, 141 | inline: false, 142 | declaration: false, 143 | parameters: Vec::new(), 144 | return_type: None, 145 | inlined_functions: Vec::new(), 146 | variables: Vec::new(), 147 | }); 148 | symbol_index += 1; 149 | }, 150 | _ => {} 151 | } 152 | } 153 | 154 | let mut units = Vec::new(); 155 | units.push(unit); 156 | 157 | let mut file = File { 158 | path, 159 | // TODO 160 | code: None, 161 | // TODO 162 | sections: Vec::new(), 163 | // TODO 164 | symbols: Vec::new(), 165 | units, 166 | }; 167 | file.normalize(); 168 | cb(&mut file) 169 | } 170 | 171 | fn add_primitive_types<'input>(types: &mut Vec>) { 172 | add_primitive_type(types, 0x00, b"NoType", 4); 173 | add_primitive_type(types, 0x03, b"void", 0); 174 | add_primitive_type(types, 0x10, b"i8", 1); // signed char 175 | add_primitive_type(types, 0x11, b"i16", 2); // short 176 | add_primitive_type(types, 0x12, b"i32", 4); // long 177 | add_primitive_type(types, 0x13, b"i64", 8); 178 | add_primitive_type(types, 0x20, b"u8", 1); // unsigned char 179 | add_primitive_type(types, 0x21, b"u16", 2); // unsigned short 180 | add_primitive_type(types, 0x22, b"u32", 4); // unsigned long 181 | add_primitive_type(types, 0x23, b"u64", 8); 182 | add_primitive_type(types, 0x30, b"bool", 1); 183 | add_primitive_type(types, 0x40, b"f32", 4); // float 184 | add_primitive_type(types, 0x41, b"f64", 8); // double 185 | add_primitive_type(types, 0x68, b"i8", 1); // int8_t 186 | add_primitive_type(types, 0x69, b"u8", 1); // uint8_t 187 | add_primitive_type(types, 0x70, b"i8", 1); // char 188 | add_primitive_type(types, 0x71, b"wchar_t", 2); // wchar_t 189 | add_primitive_type(types, 0x72, b"i16", 4); // int16_t 190 | add_primitive_type(types, 0x73, b"u16", 4); // uint16_t 191 | add_primitive_type(types, 0x74, b"i32", 4); // int32_t 192 | add_primitive_type(types, 0x75, b"u32", 4); // uint32_t 193 | add_primitive_type(types, 0x76, b"i64", 8); // int64_t 194 | add_primitive_type(types, 0x77, b"u64", 8); // uint64_t 195 | } 196 | 197 | fn add_primitive_type<'input>( 198 | types: &mut Vec>, 199 | index: usize, 200 | name: &'static [u8], 201 | size: u64, 202 | ) { 203 | types.push(Type { 204 | id: Cell::new(0), 205 | offset: TypeOffset(index), 206 | kind: TypeKind::Base(BaseType { 207 | name: Some(name), 208 | byte_size: Some(size), 209 | }), 210 | }); 211 | 212 | types.push(Type { 213 | id: Cell::new(0), 214 | offset: TypeOffset(0x400 + index), 215 | kind: TypeKind::Modifier(TypeModifier { 216 | kind: TypeModifierKind::Pointer, 217 | ty: Some(TypeOffset(index)), 218 | name: None, 219 | byte_size: Some(4), 220 | address_size: None, 221 | }), 222 | }); 223 | 224 | types.push(Type { 225 | id: Cell::new(0), 226 | offset: TypeOffset(0x600 + index), 227 | kind: TypeKind::Modifier(TypeModifier { 228 | kind: TypeModifierKind::Pointer, 229 | ty: Some(TypeOffset(index)), 230 | name: None, 231 | byte_size: Some(8), 232 | address_size: None, 233 | }), 234 | }); 235 | } 236 | 237 | fn parse_class<'input>( 238 | unit: &mut Unit<'input>, 239 | member_lists: &BTreeMap>>, 240 | namespace: &Option>>, 241 | index: usize, 242 | data: &pdb::ClassType<'input>, 243 | ) -> Result<()> { 244 | // TODO: derived_from, vtable_shape 245 | let fields = data.fields.and_then(parse_type_index); 246 | let declaration = data.properties.forward_reference(); 247 | let byte_size = if declaration { 248 | None 249 | } else { 250 | Some(u64::from(data.size)) 251 | }; 252 | let mut members = match fields { 253 | Some(ref fields) => match member_lists.get(&fields.0) { 254 | Some(members) => members.clone(), 255 | None => return Err(format!("Missing field list for index {}", fields.0).into()), 256 | }, 257 | None => Vec::new(), 258 | }; 259 | let mut bit_offset = byte_size.map(|v| v * 8); 260 | for member in members.iter_mut().rev() { 261 | member.next_bit_offset = bit_offset; 262 | bit_offset = Some(member.bit_offset); 263 | } 264 | unit.types.push(Type { 265 | id: Cell::new(0), 266 | offset: TypeOffset(index), 267 | kind: TypeKind::Struct(StructType { 268 | namespace: namespace.clone(), 269 | name: Some(data.name.as_bytes()), 270 | source: Default::default(), 271 | byte_size, 272 | declaration, 273 | members, 274 | }), 275 | }); 276 | Ok(()) 277 | } 278 | 279 | fn parse_union<'input>( 280 | unit: &mut Unit<'input>, 281 | member_lists: &BTreeMap>>, 282 | namespace: &Option>>, 283 | index: usize, 284 | data: &pdb::UnionType<'input>, 285 | ) -> Result<()> { 286 | let fields = parse_type_index(data.fields); 287 | let declaration = data.properties.forward_reference(); 288 | let byte_size = if declaration { 289 | None 290 | } else { 291 | Some(u64::from(data.size)) 292 | }; 293 | let mut members = match fields { 294 | Some(fields) => match member_lists.get(&fields.0) { 295 | Some(members) => members.clone(), 296 | None => return Err(format!("Missing field list for index {}", fields.0).into()), 297 | }, 298 | None => Vec::new(), 299 | }; 300 | let mut bit_offset = byte_size.map(|v| v * 8); 301 | for member in members.iter_mut().rev() { 302 | member.next_bit_offset = bit_offset; 303 | bit_offset = Some(member.bit_offset); 304 | } 305 | unit.types.push(Type { 306 | id: Cell::new(0), 307 | offset: TypeOffset(index), 308 | kind: TypeKind::Union(UnionType { 309 | namespace: namespace.clone(), 310 | name: Some(data.name.as_bytes()), 311 | source: Default::default(), 312 | byte_size, 313 | declaration, 314 | members, 315 | }), 316 | }); 317 | Ok(()) 318 | } 319 | 320 | fn parse_enumeration<'input>( 321 | unit: &mut Unit<'input>, 322 | enumerator_lists: &BTreeMap>>, 323 | namespace: &Option>>, 324 | index: usize, 325 | data: &pdb::EnumerationType<'input>, 326 | ) -> Result<()> { 327 | let underlying_type = parse_type_index(data.underlying_type); 328 | let fields = parse_type_index(data.fields); 329 | let declaration = data.properties.forward_reference(); 330 | let enumerators = match fields { 331 | Some(ref fields) => match enumerator_lists.get(&fields.0) { 332 | Some(enumerators) => enumerators.clone(), 333 | None => return Err(format!("Missing field list for index {}", fields.0).into()), 334 | }, 335 | None => Vec::new(), 336 | }; 337 | unit.types.push(Type { 338 | id: Cell::new(0), 339 | offset: TypeOffset(index), 340 | kind: TypeKind::Enumeration(EnumerationType { 341 | namespace: namespace.clone(), 342 | name: Some(data.name.as_bytes()), 343 | source: Default::default(), 344 | declaration, 345 | ty: underlying_type, 346 | byte_size: None, 347 | enumerators, 348 | }), 349 | }); 350 | Ok(()) 351 | } 352 | 353 | fn parse_procedure<'input>( 354 | unit: &mut Unit<'input>, 355 | argument_lists: &BTreeMap>, 356 | index: usize, 357 | data: &pdb::ProcedureType, 358 | ) -> Result<()> { 359 | let return_type = data.return_type.and_then(parse_type_index); 360 | let argument_list = parse_type_index(data.argument_list); 361 | let parameter_count = data.parameter_count as usize; 362 | let parameters = match argument_list { 363 | Some(ref argument_list) => match argument_lists.get(&argument_list.0) { 364 | Some(arguments) => { 365 | if arguments.len() != parameter_count { 366 | debug!( 367 | "PDB parameter count mismatch {}, {}", 368 | arguments.len(), 369 | parameter_count 370 | ); 371 | } 372 | arguments 373 | .iter() 374 | .map(|argument| Parameter { 375 | offset: None, 376 | name: None, 377 | ty: parse_type_index(*argument), 378 | }) 379 | .collect() 380 | } 381 | None => return Err(format!("Missing argument list {}", argument_list.0).into()), 382 | }, 383 | None => Vec::new(), 384 | }; 385 | 386 | unit.types.push( 387 | // TODO: attributes 388 | Type { 389 | id: Cell::new(0), 390 | offset: TypeOffset(index), 391 | kind: TypeKind::Function(FunctionType { 392 | parameters, 393 | return_type, 394 | byte_size: None, 395 | }), 396 | }, 397 | ); 398 | Ok(()) 399 | } 400 | 401 | fn parse_member_function<'input>( 402 | unit: &mut Unit<'input>, 403 | argument_lists: &BTreeMap>, 404 | index: usize, 405 | data: &pdb::MemberFunctionType, 406 | ) -> Result<()> { 407 | let return_type = parse_type_index(data.return_type); 408 | //let class_type = parse_type_index(data.class_type); 409 | let this_pointer_type = data.this_pointer_type.and_then(parse_type_index); 410 | let argument_list = parse_type_index(data.argument_list); 411 | let parameter_count = data.parameter_count as usize; 412 | let mut parameters = Vec::with_capacity(parameter_count + 1); 413 | match this_pointer_type { 414 | None | Some(TypeOffset(3)) => {} 415 | ty => { 416 | parameters.push(Parameter { 417 | offset: None, 418 | name: None, 419 | ty, 420 | }); 421 | } 422 | } 423 | if let Some(ref argument_list) = argument_list { 424 | match argument_lists.get(&argument_list.0) { 425 | Some(arguments) => { 426 | if arguments.len() != parameter_count { 427 | debug!( 428 | "PDB parameter count mismatch {}, {}", 429 | arguments.len(), 430 | parameter_count 431 | ); 432 | } 433 | for argument in arguments { 434 | parameters.push(Parameter { 435 | offset: None, 436 | name: None, 437 | ty: parse_type_index(*argument), 438 | }); 439 | } 440 | } 441 | None => return Err(format!("Missing argument list {}", argument_list.0).into()), 442 | } 443 | }; 444 | 445 | unit.types.push( 446 | // TODO: class_type, attributes, this_adjustment 447 | Type { 448 | id: Cell::new(0), 449 | offset: TypeOffset(index), 450 | kind: TypeKind::Function(FunctionType { 451 | parameters, 452 | return_type, 453 | byte_size: None, 454 | }), 455 | }, 456 | ); 457 | Ok(()) 458 | } 459 | 460 | fn parse_array<'input>(unit: &mut Unit<'input>, index: usize, data: &pdb::ArrayType) -> Result<()> { 461 | if data.dimensions.len() != 1 { 462 | return Err("Unsupported multi-dimensional array".into()); 463 | } 464 | let element_type = parse_type_index(data.element_type); 465 | //let indexing_type = parse_type_index(indexing_type); 466 | let byte_size = Some(u64::from(data.dimensions[0])); 467 | unit.types.push( 468 | // TODO: indexing_type, stride 469 | Type { 470 | id: Cell::new(0), 471 | offset: TypeOffset(index), 472 | kind: TypeKind::Array(ArrayType { 473 | ty: element_type, 474 | byte_size, 475 | ..Default::default() 476 | }), 477 | }, 478 | ); 479 | Ok(()) 480 | } 481 | 482 | fn parse_field_list<'input>( 483 | member_lists: &mut BTreeMap>>, 484 | enumerator_lists: &mut BTreeMap>>, 485 | bitfields: &BTreeMap, 486 | index: usize, 487 | data: &pdb::FieldList<'input>, 488 | ) -> Result<()> { 489 | let continuation = data.continuation.and_then(parse_type_index); 490 | if continuation.is_some() { 491 | return Err("Unsupported PDB field list continuation".into()); 492 | } 493 | let mut members = Vec::new(); 494 | let mut enumerators = Vec::new(); 495 | for field in &data.fields { 496 | match *field { 497 | pdb::TypeData::Member(ref member) => { 498 | let mut ty = parse_type_index(member.field_type); 499 | let mut bit_offset = u64::from(member.offset) * 8; 500 | let mut bit_size = None; 501 | match bitfields.get(&(member.field_type as usize)) { 502 | Some(bitfield) => { 503 | ty = parse_type_index(bitfield.underlying_type); 504 | bit_offset += u64::from(bitfield.position); 505 | bit_size = Some(u64::from(bitfield.length)); 506 | } 507 | None => {} 508 | } 509 | members.push(Member { 510 | name: Some(member.name.as_bytes()), 511 | ty, 512 | bit_offset, 513 | bit_size, 514 | next_bit_offset: None, 515 | }); 516 | } 517 | pdb::TypeData::Enumerate(ref enumerate) => { 518 | let value = match enumerate.value { 519 | pdb::Variant::U8(val) => i64::from(val), 520 | pdb::Variant::U16(val) => i64::from(val), 521 | pdb::Variant::U32(val) => i64::from(val), 522 | pdb::Variant::U64(val) => val as i64, 523 | pdb::Variant::I8(val) => i64::from(val), 524 | pdb::Variant::I16(val) => i64::from(val), 525 | pdb::Variant::I32(val) => i64::from(val), 526 | pdb::Variant::I64(val) => val, 527 | }; 528 | enumerators.push(Enumerator { 529 | name: Some(enumerate.name.as_bytes()), 530 | value: Some(value), 531 | }); 532 | } 533 | _ => { 534 | debug!("PDB unimplemented field type {:?}", field); 535 | } 536 | } 537 | } 538 | member_lists.insert(index, members); 539 | enumerator_lists.insert(index, enumerators); 540 | Ok(()) 541 | } 542 | 543 | fn parse_type_index(index: pdb::TypeIndex) -> Option { 544 | if index == 0 { 545 | None 546 | } else { 547 | Some(TypeOffset(index as usize)) 548 | } 549 | } 550 | -------------------------------------------------------------------------------- /parser/src/file/mod.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::default::Default; 3 | use std::fs; 4 | use std::mem; 5 | use std::ops::Deref; 6 | use std::sync::Mutex; 7 | 8 | mod dwarf; 9 | 10 | use fnv::FnvHashMap as HashMap; 11 | use gimli; 12 | use memmap; 13 | use object::{self, Object, ObjectSection, ObjectSegment, ObjectSymbol, ObjectSymbolTable}; 14 | 15 | use crate::cfi::Cfi; 16 | use crate::function::{Function, FunctionDetails, FunctionOffset}; 17 | use crate::location::Register; 18 | use crate::range::{Range, RangeList}; 19 | use crate::types::{Enumerator, Type, TypeOffset}; 20 | use crate::unit::Unit; 21 | use crate::variable::Variable; 22 | use crate::{Address, Result, Size}; 23 | 24 | pub(crate) enum DebugInfo<'input, Endian> 25 | where 26 | Endian: gimli::Endianity + 'input, 27 | { 28 | Dwarf(dwarf::DwarfDebugInfo<'input, Endian>), 29 | } 30 | 31 | impl<'input, Endian> DebugInfo<'input, Endian> 32 | where 33 | Endian: gimli::Endianity + 'input, 34 | { 35 | fn get_type(&self, offset: TypeOffset) -> Option> { 36 | match self { 37 | DebugInfo::Dwarf(dwarf) => dwarf.get_type(offset), 38 | } 39 | } 40 | 41 | fn get_enumerators(&self, offset: TypeOffset) -> Vec> { 42 | match self { 43 | DebugInfo::Dwarf(dwarf) => dwarf.get_enumerators(offset), 44 | } 45 | } 46 | 47 | fn get_function_details( 48 | &self, 49 | offset: FunctionOffset, 50 | hash: &FileHash<'input>, 51 | ) -> Option> { 52 | match self { 53 | DebugInfo::Dwarf(dwarf) => dwarf.get_function_details(offset, hash), 54 | } 55 | } 56 | 57 | fn get_cfi(&self, range: Range) -> Vec { 58 | match self { 59 | DebugInfo::Dwarf(dwarf) => dwarf.get_cfi(range), 60 | } 61 | } 62 | 63 | fn get_register_name(&self, machine: Architecture, register: Register) -> Option<&'static str> { 64 | match self { 65 | DebugInfo::Dwarf(dwarf) => dwarf.get_register_name(machine, register), 66 | } 67 | } 68 | } 69 | 70 | pub(crate) struct Arena { 71 | // TODO: can these be a single `Vec>`? 72 | buffers: Mutex>>, 73 | strings: Mutex>, 74 | relocations: Mutex>>, 75 | } 76 | 77 | impl Arena { 78 | fn new() -> Self { 79 | Arena { 80 | buffers: Mutex::new(Vec::new()), 81 | strings: Mutex::new(Vec::new()), 82 | relocations: Mutex::new(Vec::new()), 83 | } 84 | } 85 | 86 | fn add_buffer<'input>(&'input self, bytes: Vec) -> &'input [u8] { 87 | let mut buffers = self.buffers.lock().unwrap(); 88 | let i = buffers.len(); 89 | buffers.push(bytes); 90 | let b = &buffers[i]; 91 | unsafe { mem::transmute::<&[u8], &'input [u8]>(b) } 92 | } 93 | 94 | fn add_string<'input>(&'input self, bytes: &'input [u8]) -> &'input str { 95 | // FIXME: this is effectively leaking strings that require lossy conversion, 96 | // fix by avoiding duplicates 97 | match String::from_utf8_lossy(bytes) { 98 | Cow::Borrowed(s) => s, 99 | Cow::Owned(s) => { 100 | let mut strings = self.strings.lock().unwrap(); 101 | let i = strings.len(); 102 | strings.push(s); 103 | let s = &strings[i]; 104 | unsafe { mem::transmute::<&str, &'input str>(s) } 105 | } 106 | } 107 | } 108 | 109 | fn add_relocations<'input>( 110 | &'input self, 111 | entry: Box, 112 | ) -> &'input dwarf::RelocationMap { 113 | let mut relocations = self.relocations.lock().unwrap(); 114 | let i = relocations.len(); 115 | relocations.push(entry); 116 | let entry = &relocations[i]; 117 | unsafe { mem::transmute::<&dwarf::RelocationMap, &'input dwarf::RelocationMap>(entry) } 118 | } 119 | } 120 | 121 | pub use object::Architecture; 122 | 123 | /// The context needed for a parsed file. 124 | /// 125 | /// The parsed file references the context, so it is included here as well. 126 | pub struct FileContext { 127 | // Self-referential, not actually `static. 128 | file: File<'static>, 129 | _map: memmap::Mmap, 130 | _arena: Box, 131 | } 132 | 133 | impl FileContext { 134 | fn new(map: memmap::Mmap, f: F) -> Result 135 | where 136 | F: for<'a> FnOnce(&'a [u8], &'a Arena) -> Result>, 137 | { 138 | let arena = Box::new(Arena::new()); 139 | let file = f(&map, &arena)?; 140 | Ok(FileContext { 141 | // `file` only borrows from `map` and `arena`, which we are preserving 142 | // without moving. 143 | file: unsafe { mem::transmute::, File<'static>>(file) }, 144 | _map: map, 145 | _arena: arena, 146 | }) 147 | } 148 | 149 | /// Return the parsed debuginfo for the file. 150 | pub fn file<'a>(&'a self) -> &'a File<'a> { 151 | unsafe { mem::transmute::<&'a File<'static>, &'a File<'a>>(&self.file) } 152 | } 153 | } 154 | 155 | /// The parsed debuginfo for a single file. 156 | pub struct File<'input> { 157 | pub(crate) path: String, 158 | pub(crate) machine: Architecture, 159 | pub(crate) segments: Vec>, 160 | pub(crate) sections: Vec>, 161 | pub(crate) symbols: Vec>, 162 | pub(crate) relocations: Vec>, 163 | pub(crate) units: Vec>, 164 | debug_info: DebugInfo<'input, gimli::RunTimeEndian>, 165 | } 166 | 167 | impl<'input> File<'input> { 168 | pub(crate) fn get_type(&self, offset: TypeOffset) -> Option> { 169 | self.debug_info.get_type(offset) 170 | } 171 | 172 | pub(crate) fn get_enumerators(&self, offset: TypeOffset) -> Vec> { 173 | self.debug_info.get_enumerators(offset) 174 | } 175 | 176 | pub(crate) fn get_function_details( 177 | &self, 178 | offset: FunctionOffset, 179 | hash: &FileHash<'input>, 180 | ) -> FunctionDetails<'input> { 181 | self.debug_info 182 | .get_function_details(offset, hash) 183 | .unwrap_or_default() 184 | } 185 | 186 | pub(crate) fn get_register_name(&self, register: Register) -> Option<&'static str> { 187 | self.debug_info.get_register_name(self.machine, register) 188 | } 189 | 190 | /// Parse the file with the given path. 191 | pub fn parse(path: String) -> Result { 192 | let handle = match fs::File::open(&path) { 193 | Ok(handle) => handle, 194 | Err(e) => { 195 | return Err(format!("open failed: {}", e).into()); 196 | } 197 | }; 198 | 199 | let map = match unsafe { memmap::Mmap::map(&handle) } { 200 | Ok(map) => map, 201 | Err(e) => { 202 | return Err(format!("memmap failed: {}", e).into()); 203 | } 204 | }; 205 | 206 | // TODO: split DWARF 207 | // TODO: PDB 208 | FileContext::new(map, |data, strings| { 209 | let object = object::File::parse(data)?; 210 | File::parse_object(&object, &object, path, strings) 211 | }) 212 | } 213 | 214 | fn parse_object( 215 | object: &object::File<'input>, 216 | debug_object: &object::File<'input>, 217 | path: String, 218 | arena: &'input Arena, 219 | ) -> Result> { 220 | let machine = object.architecture(); 221 | let mut segments = Vec::new(); 222 | for segment in object.segments() { 223 | if let Ok(bytes) = segment.data() { 224 | segments.push(Segment { 225 | address: segment.address(), 226 | bytes, 227 | }); 228 | } 229 | } 230 | 231 | let mut sections = Vec::new(); 232 | for section in object.sections() { 233 | let name = Some(section.name()?).map(|x| Cow::Owned(x.to_string())); 234 | let segment = section.segment_name()?.map(|x| Cow::Owned(x.to_string())); 235 | let address = if section.address() != 0 { 236 | Some(section.address()) 237 | } else { 238 | None 239 | }; 240 | let size = section.size(); 241 | if size != 0 { 242 | sections.push(Section { 243 | name, 244 | segment, 245 | address, 246 | size, 247 | }); 248 | } 249 | } 250 | 251 | // TODO: symbols from debug_object too? 252 | let mut symbols = Vec::new(); 253 | for symbol in object.symbols() { 254 | // TODO: handle relocatable objects 255 | let address = symbol.address(); 256 | if address == 0 { 257 | continue; 258 | } 259 | 260 | let size = symbol.size(); 261 | if size == 0 { 262 | continue; 263 | } 264 | 265 | // TODO: handle SymbolKind::File 266 | let kind = match symbol.kind() { 267 | object::SymbolKind::Text => SymbolKind::Function, 268 | object::SymbolKind::Data | object::SymbolKind::Unknown => SymbolKind::Variable, 269 | _ => continue, 270 | }; 271 | 272 | let name = Some(symbol.name()?); 273 | 274 | symbols.push(Symbol { 275 | name, 276 | kind, 277 | address, 278 | size, 279 | }); 280 | } 281 | 282 | let mut relocations = Vec::new(); 283 | if let (Some(dynamic_symbols), Some(dynamic_relocations)) = 284 | (object.dynamic_symbol_table(), object.dynamic_relocations()) 285 | { 286 | for (address, relocation) in dynamic_relocations { 287 | let size = relocation.size(); 288 | match relocation.target() { 289 | object::RelocationTarget::Symbol(index) => { 290 | if let Ok(symbol) = dynamic_symbols.symbol_by_index(index) { 291 | relocations.push(Relocation { 292 | address, 293 | size, 294 | symbol: symbol.name()?, 295 | }); 296 | } 297 | } 298 | _ => {} 299 | } 300 | } 301 | } 302 | 303 | let endian = if debug_object.is_little_endian() { 304 | gimli::RunTimeEndian::Little 305 | } else { 306 | gimli::RunTimeEndian::Big 307 | }; 308 | 309 | let (units, debug_info) = dwarf::parse(endian, debug_object, arena)?; 310 | let mut file = File { 311 | path, 312 | machine, 313 | segments, 314 | sections, 315 | symbols, 316 | relocations, 317 | units, 318 | debug_info, 319 | }; 320 | file.normalize(); 321 | Ok(file) 322 | } 323 | 324 | fn normalize(&mut self) { 325 | self.symbols.sort_by(|a, b| a.address.cmp(&b.address)); 326 | let mut used_symbols = vec![false; self.symbols.len()]; 327 | 328 | // Set symbol names on functions/variables. 329 | for unit in &mut self.units { 330 | for function in &mut unit.functions { 331 | if let Some(address) = function.address() { 332 | if let Some(symbol) = Self::get_symbol( 333 | &*self.symbols, 334 | &mut used_symbols, 335 | address, 336 | function.linkage_name().or_else(|| function.name()), 337 | ) { 338 | function.symbol_name = symbol.name; 339 | } 340 | // If there are multiple ranges for the function, 341 | // mark any symbols for the remaining ranges as used. 342 | // TODO: change `Function::symbol_name` to a list instead? 343 | for range in function.ranges().iter().skip(1) { 344 | Self::get_symbol(&*self.symbols, &mut used_symbols, range.begin, None); 345 | } 346 | } 347 | } 348 | 349 | for variable in &mut unit.variables { 350 | if let Some(address) = variable.address() { 351 | if let Some(symbol) = Self::get_symbol( 352 | &*self.symbols, 353 | &mut used_symbols, 354 | address, 355 | variable.linkage_name().or_else(|| variable.name()), 356 | ) { 357 | variable.symbol_name = symbol.name; 358 | } 359 | } 360 | } 361 | } 362 | 363 | // Create a unit for symbols that don't have debuginfo. 364 | let mut unit = Unit::default(); 365 | unit.name = Some(Cow::Borrowed("")); 366 | for (symbol, used) in self.symbols.iter().zip(used_symbols.iter()) { 367 | if *used { 368 | continue; 369 | } 370 | unit.ranges.push(Range { 371 | begin: symbol.address, 372 | end: symbol.address + symbol.size, 373 | }); 374 | match symbol.kind() { 375 | SymbolKind::Variable => { 376 | unit.variables.push(Variable { 377 | name: symbol.name, 378 | linkage_name: symbol.name, 379 | address: Address::new(symbol.address), 380 | size: Size::new(symbol.size), 381 | ..Default::default() 382 | }); 383 | } 384 | SymbolKind::Function => { 385 | let mut ranges = Vec::new(); 386 | if symbol.size > 0 { 387 | ranges.push(Range { 388 | begin: symbol.address, 389 | end: symbol.address + symbol.size, 390 | }); 391 | } 392 | unit.functions.push(Function { 393 | name: symbol.name, 394 | linkage_name: symbol.name, 395 | address: Address::new(symbol.address), 396 | size: Size::new(symbol.size), 397 | ranges, 398 | ..Default::default() 399 | }); 400 | } 401 | } 402 | } 403 | unit.ranges.sort(); 404 | self.units.push(unit); 405 | 406 | // Create a unit for all remaining address ranges. 407 | let mut unit = Unit::default(); 408 | unit.name = Some(Cow::Borrowed("")); 409 | unit.ranges = self.unknown_ranges(); 410 | self.units.push(unit); 411 | } 412 | 413 | // Determine if the symbol at the given address has the given name. 414 | // There may be multiple symbols for the same address. 415 | // If none match the given name, then return the first one. 416 | fn get_symbol<'sym>( 417 | symbols: &'sym [Symbol<'input>], 418 | used_symbols: &mut [bool], 419 | address: u64, 420 | name: Option<&str>, 421 | ) -> Option<&'sym Symbol<'input>> { 422 | if let Ok(mut index) = symbols.binary_search_by(|x| x.address.cmp(&address)) { 423 | while index > 0 && symbols[index - 1].address == address { 424 | index -= 1; 425 | } 426 | let mut found = false; 427 | for (symbol, used_symbol) in (&symbols[index..]) 428 | .iter() 429 | .zip((&mut used_symbols[index..]).iter_mut()) 430 | { 431 | if symbol.address != address { 432 | break; 433 | } 434 | *used_symbol = true; 435 | if symbol.name() == name { 436 | found = true; 437 | } 438 | } 439 | if found { 440 | None 441 | } else { 442 | Some(&symbols[index]) 443 | } 444 | } else { 445 | None 446 | } 447 | } 448 | 449 | /// The file path. 450 | #[inline] 451 | pub fn path(&self) -> &str { 452 | &self.path 453 | } 454 | 455 | /// The machine type that the file contains debuginfo for. 456 | #[inline] 457 | pub fn machine(&self) -> Architecture { 458 | self.machine 459 | } 460 | 461 | /// Find the segment data for the given address range. 462 | pub fn segment_bytes(&self, range: Range) -> Option<&'input [u8]> { 463 | for segment in &self.segments { 464 | if range.begin >= segment.address 465 | && range.end <= segment.address + segment.bytes.len() as u64 466 | { 467 | let begin = (range.begin - segment.address) as usize; 468 | let len = (range.end - range.begin) as usize; 469 | return Some(&segment.bytes[begin..][..len]); 470 | } 471 | } 472 | None 473 | } 474 | 475 | /// A list of segments in the file. 476 | #[inline] 477 | pub fn segments(&self) -> &[Segment<'input>] { 478 | &self.segments 479 | } 480 | 481 | /// A list of sections in the file. 482 | #[inline] 483 | pub fn sections(&self) -> &[Section<'input>] { 484 | &self.sections 485 | } 486 | 487 | /// A list of symbols in the file. 488 | #[inline] 489 | pub fn symbols(&self) -> &[Symbol<'input>] { 490 | &self.symbols 491 | } 492 | 493 | /// A list of relocations in the file. 494 | #[inline] 495 | pub fn relocations(&self) -> &[Relocation<'input>] { 496 | &self.relocations 497 | } 498 | 499 | /// A list of compilation units in the file. 500 | #[inline] 501 | pub fn units(&self) -> &[Unit<'input>] { 502 | &self.units 503 | } 504 | 505 | /// A list of address ranges covered by the compilation units. 506 | /// 507 | /// This includes both `Unit::ranges` and `Unit::unknown_ranges`. 508 | pub fn ranges(&self, hash: &FileHash) -> RangeList { 509 | let mut ranges = RangeList::default(); 510 | for unit in &self.units { 511 | for range in unit.ranges(hash).list() { 512 | ranges.push(*range); 513 | } 514 | for range in unit.unknown_ranges(hash).list() { 515 | ranges.push(*range); 516 | } 517 | } 518 | ranges.sort(); 519 | ranges 520 | } 521 | 522 | // Used to create unit. After creation of that unit 523 | // this will return an empty range list. 524 | fn unknown_ranges(&self) -> RangeList { 525 | // FIXME: don't create this hash twice 526 | let hash = FileHash::new(self); 527 | let unit_ranges = self.ranges(&hash); 528 | 529 | let mut ranges = RangeList::default(); 530 | for section in &self.sections { 531 | if let Some(range) = section.address() { 532 | ranges.push(range); 533 | } 534 | } 535 | ranges.sort(); 536 | ranges.subtract(&unit_ranges) 537 | } 538 | 539 | /// The total size of functions in all compilation units. 540 | pub fn function_size(&self) -> u64 { 541 | let mut size = 0; 542 | for unit in &self.units { 543 | size += unit.function_size(); 544 | } 545 | size 546 | } 547 | 548 | /// The total size of variables in all compilation units. 549 | pub fn variable_size(&self, hash: &FileHash) -> u64 { 550 | let mut size = 0; 551 | for unit in &self.units { 552 | size += unit.variable_size(hash); 553 | } 554 | size 555 | } 556 | 557 | /// Call frame information for the given address range. 558 | pub fn cfi(&self, range: Range) -> Vec { 559 | self.debug_info.get_cfi(range) 560 | } 561 | } 562 | 563 | /// An index of functions and types within a file. 564 | pub struct FileHash<'input> { 565 | /// The file being indexed. 566 | pub file: &'input File<'input>, 567 | /// All functions by address. 568 | pub functions_by_address: HashMap>, 569 | /// All functions by offset. 570 | pub functions_by_offset: HashMap>, 571 | /// All variables by address. 572 | pub variables_by_address: HashMap>, 573 | /// All types by offset. 574 | pub types: HashMap>, 575 | // The type corresponding to `TypeOffset::none()`. 576 | pub(crate) void: Type<'input>, 577 | } 578 | 579 | impl<'input> FileHash<'input> { 580 | /// Create a new `FileHash` for the given `File`. 581 | pub fn new(file: &'input File<'input>) -> Self { 582 | FileHash { 583 | file, 584 | functions_by_address: FileHash::functions_by_address(file), 585 | functions_by_offset: FileHash::functions_by_offset(file), 586 | variables_by_address: FileHash::variables_by_address(file), 587 | types: FileHash::types(file), 588 | void: Type::void(), 589 | } 590 | } 591 | 592 | /// Returns a map from address to function for all functions in the file. 593 | fn functions_by_address<'a>(file: &'a File<'input>) -> HashMap> { 594 | let mut functions = HashMap::default(); 595 | for unit in &file.units { 596 | for function in &unit.functions { 597 | if let Some(address) = function.address() { 598 | // TODO: handle duplicate addresses 599 | functions.insert(address, function); 600 | } 601 | } 602 | } 603 | functions 604 | } 605 | 606 | /// Returns a map from offset to function for all functions in the file. 607 | fn functions_by_offset<'a>( 608 | file: &'a File<'input>, 609 | ) -> HashMap> { 610 | let mut functions = HashMap::default(); 611 | for unit in &file.units { 612 | for function in &unit.functions { 613 | functions.insert(function.offset, function); 614 | } 615 | } 616 | functions 617 | } 618 | 619 | /// Returns a map from address to function for all functions in the file. 620 | fn variables_by_address<'a>(file: &'a File<'input>) -> HashMap> { 621 | let mut variables = HashMap::default(); 622 | for unit in &file.units { 623 | for variable in &unit.variables { 624 | if let Some(address) = variable.address() { 625 | // TODO: handle duplicate addresses 626 | variables.insert(address, variable); 627 | } 628 | } 629 | } 630 | variables 631 | } 632 | 633 | /// Returns a map from offset to type for all types in the file. 634 | fn types<'a>(file: &'a File<'input>) -> HashMap> { 635 | let mut types = HashMap::default(); 636 | for unit in &file.units { 637 | for ty in &unit.types { 638 | types.insert(ty.offset, ty); 639 | } 640 | } 641 | types 642 | } 643 | } 644 | 645 | /// A loadable range of bytes. 646 | #[derive(Debug)] 647 | pub struct Segment<'input> { 648 | /// The address that the bytes should be loaded at. 649 | pub address: u64, 650 | /// The bytes, which may be code or data. 651 | pub bytes: &'input [u8], 652 | } 653 | 654 | /// A named section. 655 | #[derive(Debug)] 656 | pub struct Section<'input> { 657 | pub(crate) name: Option>, 658 | pub(crate) segment: Option>, 659 | pub(crate) address: Option, 660 | pub(crate) size: u64, 661 | } 662 | 663 | impl<'input> Section<'input> { 664 | /// The name of this section. 665 | pub fn name(&self) -> Option<&str> { 666 | self.name.as_ref().map(Cow::deref) 667 | } 668 | 669 | /// The name of the segment containing this section, if applicable. 670 | pub fn segment(&self) -> Option<&str> { 671 | self.segment.as_ref().map(Cow::deref) 672 | } 673 | 674 | /// The address range covered by this section if it is loadable. 675 | pub fn address(&self) -> Option { 676 | self.address.map(|address| Range { 677 | begin: address, 678 | end: address + self.size, 679 | }) 680 | } 681 | 682 | /// The size of the section. 683 | #[inline] 684 | pub fn size(&self) -> u64 { 685 | self.size 686 | } 687 | } 688 | 689 | /// A symbol kind. 690 | #[derive(Debug, Clone, Copy)] 691 | pub enum SymbolKind { 692 | /// The symbol is a variable. 693 | Variable, 694 | /// The symbol is a function. 695 | Function, 696 | } 697 | 698 | /// A symbol. 699 | #[derive(Debug, Clone)] 700 | pub struct Symbol<'input> { 701 | pub(crate) name: Option<&'input str>, 702 | pub(crate) kind: SymbolKind, 703 | pub(crate) address: u64, 704 | pub(crate) size: u64, 705 | } 706 | 707 | impl<'input> Symbol<'input> { 708 | /// The symbol name. 709 | #[inline] 710 | pub fn name(&self) -> Option<&str> { 711 | self.name 712 | } 713 | 714 | /// The symbol kind. 715 | #[inline] 716 | pub fn kind(&self) -> SymbolKind { 717 | self.kind 718 | } 719 | 720 | /// The symbol address range. 721 | #[inline] 722 | pub fn address(&self) -> Range { 723 | Range { 724 | begin: self.address, 725 | end: self.address + self.size, 726 | } 727 | } 728 | 729 | /// The symbol size range. 730 | #[inline] 731 | pub fn size(&self) -> u64 { 732 | self.size 733 | } 734 | } 735 | 736 | /// A relocation. 737 | #[derive(Debug, Clone)] 738 | pub struct Relocation<'input> { 739 | pub(crate) address: u64, 740 | pub(crate) size: u8, 741 | pub(crate) symbol: &'input str, 742 | } 743 | 744 | impl<'input> Relocation<'input> { 745 | /// The relocation address. 746 | #[inline] 747 | pub fn address(&self) -> u64 { 748 | self.address 749 | } 750 | 751 | /// The relocation size. 752 | #[inline] 753 | pub fn size(&self) -> u8 { 754 | self.size 755 | } 756 | 757 | /// The name of the symbol referenced by the relocation. 758 | #[inline] 759 | pub fn symbol(&self) -> &'input str { 760 | self.symbol 761 | } 762 | } 763 | -------------------------------------------------------------------------------- /main/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::ensure; 2 | use clap::Parser; 3 | use log::{info, warn}; 4 | use std::borrow::Cow; 5 | use std::collections::HashMap; 6 | use std::io::{self, BufRead, Write}; 7 | use std::rc::Rc; 8 | use std::sync::atomic::{AtomicU64, Ordering::Relaxed}; 9 | 10 | const BITS_PER_BYTE: u64 = 8; 11 | 12 | fn bit2byte(input: u64) -> u64 { 13 | (input + BITS_PER_BYTE - 1) / BITS_PER_BYTE 14 | } 15 | static ID_GENERATOR: AtomicU64 = AtomicU64::new(0); 16 | fn uniq_id() -> u64 { 17 | ID_GENERATOR.fetch_add(1, Relaxed) 18 | } 19 | 20 | fn is_ident_char(ch: char) -> bool { 21 | return ch == '_' || ch.is_alphanumeric(); 22 | } 23 | fn ident_part(name: &str) -> &str { 24 | let Some(bad_idx) = name.find(|c|!is_ident_char(c)) else { 25 | return name; 26 | }; 27 | return &name[0..bad_idx]; 28 | } 29 | 30 | fn consume_ident_chars(out: &mut String, input: &mut impl Iterator) -> Option { 31 | while let Some(ch) = input.next() { 32 | if is_ident_char(ch) { 33 | out.push(ch); 34 | } else { 35 | return Some(ch); 36 | } 37 | } 38 | return None; 39 | } 40 | 41 | // '::std::_Rb_tree_key_compare >' -> ['std', '_Rb_tree_key_compare >']. 42 | fn parse_typename(input: &str) -> anyhow::Result> { 43 | let mut ret = Vec::::new(); 44 | let mut add_part = |p: String| { 45 | if !p.is_empty() { 46 | ret.push(p); 47 | } 48 | }; 49 | let mut iter = input.chars(); 50 | loop { 51 | let mut part = String::new(); 52 | let next_ch = consume_ident_chars(&mut part, &mut iter); 53 | let Some(next_ch) = next_ch else { 54 | add_part(part); 55 | return Ok(ret); 56 | }; 57 | if next_ch == ':' { 58 | add_part(part); 59 | let next_ch = iter.next(); 60 | ensure!(next_ch == Some(':'), "invalid input symbol"); 61 | continue; 62 | } 63 | part.push(next_ch); 64 | while let Some(ch) = iter.next() { 65 | part.push(ch); 66 | } 67 | add_part(part); 68 | return Ok(ret); 69 | } 70 | } 71 | 72 | #[derive(Parser)] 73 | #[clap(author, version, about)] 74 | struct Args { 75 | /// input so path, can specify more than once 76 | #[arg(short = 'i')] 77 | so_path: Vec, 78 | 79 | /// input file path, each line represents a so path, can specify more than once 80 | #[arg(short = 'I')] 81 | so_file_path: Vec, 82 | 83 | /// output file path 84 | #[arg(short)] 85 | out_path: String, 86 | 87 | /// type name, such as 'namespace1::namespace2::TypeName' 88 | #[arg(value_parser=parse_typename)] 89 | dest: Vec>, 90 | } 91 | 92 | impl Args { 93 | fn is_dest(&self, tyn: &parser::TypeName) -> bool { 94 | for d in &self.dest { 95 | if tyn.ends_with(&d) { 96 | return true; 97 | } 98 | } 99 | return false; 100 | } 101 | } 102 | 103 | fn read_lines

(filename: P) -> io::Result>> 104 | where 105 | P: AsRef, 106 | { 107 | let file = std::fs::File::open(filename)?; 108 | Ok(io::BufReader::new(file).lines()) 109 | } 110 | 111 | // is_declaration 意味着 ty 中并没有存放任何有效信息, 112 | fn is_declaration(ty: &parser::Type) -> bool { 113 | match ty.kind() { 114 | parser::TypeKind::Struct(s) => s.is_declaration(), 115 | parser::TypeKind::Union(s) => s.is_declaration(), 116 | parser::TypeKind::Enumeration(s) => s.is_declaration(), 117 | parser::TypeKind::Unspecified(_) => true, 118 | _ => false, 119 | } 120 | } 121 | 122 | #[derive(Clone, Copy, Eq, Hash, PartialEq, Debug)] 123 | struct TypeIndex { 124 | // input_id 是 inputs_hash 中的偏移, 125 | input_id: usize, 126 | typoff: parser::TypeOffset, 127 | } 128 | 129 | // 我们要自己处理 padding. 会对每一个 struct/union 使用 packed __attribute__, 130 | // 原因如下例子: 131 | // struct S1218 { 132 | // long l; 133 | // ch c; 134 | // }; 135 | // 136 | // struct A1218: public S1218 { 137 | // // 是的, 这里 A.i 实际上是放在 S padding 部分中的. 138 | // // S off=0, size=16 139 | // // i off=12, size=4 140 | // int i; 141 | // } 142 | #[derive(Debug)] 143 | struct TypeInfo { 144 | // name 可以用在 C 语言作为变量类型名. 145 | // 其格式必须满足 `[struct|union|enum] 标识符[*]*`. 146 | name: String, 147 | // packed size 是使用 attribute packed 之后的 size, 148 | // size 为 dwarf 中记录的 type size. 149 | // 以 S1218 为例, packed_size = 9, size = 16. 150 | packed_size: u64, 151 | size: u64, 152 | } 153 | 154 | impl TypeInfo { 155 | fn ident(&self) -> &str { 156 | ident_part(self.name.split_whitespace().last().unwrap()) 157 | } 158 | } 159 | 160 | type ProcessState = HashMap>>; 161 | 162 | struct Printer { 163 | h_file: std::fs::File, 164 | c_file: std::fs::File, 165 | used_idents: HashMap, 166 | } 167 | 168 | impl Printer { 169 | fn do_add_eq_assert(&mut self, expr: &str, size: u64) -> io::Result<()> { 170 | writeln!(self.c_file, " ZHANYI_HIDVA_ASSERT_EQ({}, {});", expr, size) 171 | } 172 | } 173 | impl Printer { 174 | fn try_open(path: &str) -> io::Result { 175 | const ASSERT_EQ_DEF: &'static str = r###" 176 | #define ZHANYI_HIDVA_ASSERT_EQ(a, e) do { \ 177 | int actual_size = (a); \ 178 | int expect_size = (e); \ 179 | if (actual_size != expect_size) { \ 180 | fprintf(stderr, "ASSERT FAILED! actual: %s, which is %d; expect: %s, which is %d\n", #a, actual_size, #e, expect_size); \ 181 | abort(); \ 182 | } \ 183 | } while(0) 184 | "###; 185 | let mut h_file_name = path.to_string(); 186 | h_file_name.push_str(".h"); 187 | let mut c_file_name = path.to_string(); 188 | c_file_name.push_str(".c"); 189 | let mut h_file = std::fs::File::create(&h_file_name)?; 190 | let mut c_file = std::fs::File::create(c_file_name)?; 191 | writeln!(h_file, "// Generated by hidva/clayout! 大吉大利!")?; 192 | writeln!(h_file, "#pragma once")?; 193 | writeln!(h_file, "#include ")?; 194 | writeln!(c_file, "// Generated by hidva/clayout! 大吉大利!")?; 195 | writeln!(c_file, "#include ")?; 196 | writeln!(c_file, "#include ")?; 197 | writeln!(c_file, "#include \"{}\"", &h_file_name)?; 198 | writeln!(c_file, "\n\n\n")?; 199 | writeln!(c_file, "{}", ASSERT_EQ_DEF)?; 200 | writeln!(c_file, "\n\n\n")?; 201 | writeln!(c_file, "int main() {{")?; 202 | Ok(Printer { 203 | h_file, 204 | c_file, 205 | used_idents: HashMap::new(), 206 | }) 207 | } 208 | 209 | fn add_eq_assert(&mut self, expr: &str, size: u64) -> io::Result<()> { 210 | self.do_add_eq_assert(expr, size)?; 211 | writeln!(self.c_file, "") 212 | } 213 | 214 | fn add_eq_asserts(&mut self, asserts: &[EqAssert]) -> io::Result<()> { 215 | for eq_assert in asserts { 216 | self.do_add_eq_assert(&eq_assert.expr, eq_assert.val)?; 217 | } 218 | writeln!(self.c_file, "") 219 | } 220 | 221 | // 输出到 .h 文件的所有标识符, 都是经过 alloc_ident 生成的. 比如 add_type 就是如此. 222 | fn alloc_ident(&mut self, tyname: &parser::TypeName) -> String { 223 | // return val may be empty 224 | fn get_ident_part(name: Option<&str>) -> &str { 225 | name.map(|v| ident_part(v)).unwrap_or("") 226 | } 227 | let mut idents = 'get_idents: { 228 | let mut idents = Vec::new(); 229 | let ident_part = get_ident_part(tyname.name); 230 | if ident_part.is_empty() { 231 | break 'get_idents idents; 232 | } 233 | idents.push(ident_part); 234 | 235 | let mut ns_opt = tyname.namespace; 236 | while let Some(ns) = ns_opt { 237 | let ident = get_ident_part(ns.name()); 238 | if !ident.is_empty() { 239 | idents.push(ident); 240 | } 241 | ns_opt = ns.parent(); 242 | } 243 | break 'get_idents idents; 244 | }; 245 | if idents.is_empty() { 246 | return format!("AnonType{}", uniq_id()); 247 | } 248 | idents.reverse(); 249 | 250 | let mut test_idx = idents.len() - 1; 251 | loop { 252 | let test_ident = idents[test_idx..].join("_"); 253 | let Some(used) = self.used_idents.get_mut(&test_ident) else { 254 | self.used_idents.insert(test_ident.clone(), 0); 255 | return test_ident; 256 | }; 257 | if test_idx == 0 { 258 | *used += 1; 259 | return format!("{}_{}", test_ident, *used); 260 | } 261 | test_idx -= 1; 262 | } 263 | } 264 | 265 | fn add_type(&mut self, lines: &[String]) -> io::Result<()> { 266 | for l in lines { 267 | writeln!(self.h_file, "{}", l)?; 268 | } 269 | writeln!(self.h_file, "")?; 270 | writeln!(self.h_file, "")?; 271 | return Ok(()); 272 | } 273 | 274 | fn finish(&mut self) -> io::Result<()> { 275 | writeln!(self.c_file, " return 0;")?; 276 | writeln!(self.c_file, "}}")?; 277 | Ok(()) 278 | } 279 | } 280 | 281 | struct Member { 282 | off: u64, 283 | len: u64, 284 | // def, 形如 `__u8 __padding33[3]`; field_name 形如 __padding33. 285 | field_name: String, 286 | // 没有包含结尾分号. 287 | def: String, 288 | is_padding: bool, // 仅有 new_padding() 可以设置为 true. 289 | } 290 | 291 | impl Member { 292 | // 输出到 def 时会 2 个空格缩进. 293 | fn print(&self, tyname: &str, def: &mut Vec, asserts: &mut Vec) { 294 | def.push(format!(" {};", &self.def)); 295 | 296 | asserts.push(EqAssert { 297 | expr: format!("(long int)(&((({}*)0)->{}))", tyname, self.field_name), 298 | val: self.off, 299 | }); 300 | asserts.push(EqAssert { 301 | expr: format!("sizeof((({}*)0)->{})", tyname, self.field_name), 302 | val: self.len, 303 | }); 304 | } 305 | 306 | fn new_padding(off: u64, len: u64) -> Self { 307 | let field_name = format!("__padding{}", uniq_id()); 308 | Self { 309 | off, 310 | len, 311 | def: format!("__u8 {}[{}]", &field_name, len), 312 | field_name, 313 | is_padding: true, 314 | } 315 | } 316 | 317 | fn new_placeholder(off: u64, len: u64, name: &str) -> Self { 318 | let field_name = format!("{}{}", name, uniq_id()); 319 | Self { 320 | off, 321 | len, 322 | def: format!("__u8 {}[{}]", &field_name, len), 323 | field_name, 324 | is_padding: false, 325 | } 326 | } 327 | } 328 | 329 | // 这个函数应该作为所有查询 process state 的入口点. 330 | fn get_type_info( 331 | processed: &mut ProcessState, 332 | printer: &mut Printer, 333 | tyidx: TypeIndex, 334 | ty_max_size: Option, 335 | inputs_hash: &[parser::FileHash], 336 | type_db: &HashMap, 337 | ) -> io::Result>> { 338 | let tyinfo = match processed.get(&tyidx).map(|v| v.clone()) { 339 | Some(i) => i, 340 | None => { 341 | process_type(processed, printer, tyidx, ty_max_size, inputs_hash, type_db)?; 342 | processed.get(&tyidx).unwrap().clone() 343 | } 344 | }; 345 | debug_assert!(tyinfo 346 | .as_ref() 347 | .map(|v| v.packed_size <= v.size) 348 | .unwrap_or(true)); 349 | if let (Some(tyinfo), Some(max_size)) = (&tyinfo, ty_max_size) { 350 | assert!(tyinfo.packed_size <= max_size); 351 | } 352 | return Ok(tyinfo); 353 | } 354 | 355 | // 这里 tyidx 是 real_tyidx 的符号链接, tyidx ---> real_tyidx. 356 | fn handle_sym_link( 357 | processed: &mut ProcessState, 358 | printer: &mut Printer, 359 | tyidx: TypeIndex, 360 | real_tyidx: TypeIndex, 361 | ty_max_size: Option, 362 | inputs_hash: &[parser::FileHash], 363 | type_db: &HashMap, 364 | ) -> io::Result<()> { 365 | let tyinfo = get_type_info( 366 | processed, 367 | printer, 368 | real_tyidx, 369 | ty_max_size, 370 | inputs_hash, 371 | type_db, 372 | )?; 373 | processed.insert(tyidx, tyinfo); 374 | return Ok(()); 375 | } 376 | 377 | // 找到 >= start 之后, 第一个 bit_offset 是 BYTE 边界的元素的下标, 不存在则返回 None. 378 | fn find_next_idx(tylayout: &Vec, start: usize) -> Option { 379 | for idx in start..tylayout.len() { 380 | if (tylayout[idx].bit_offset % BITS_PER_BYTE) == 0 { 381 | return Some(idx); 382 | } 383 | } 384 | return None; 385 | } 386 | 387 | // check ok return bit size. 388 | // #1 处对应的 C++ 示例: 389 | // struct S {}; 390 | // struct F: public S { int i ; }; // 这里 F.i 与 S 具有相同的起始地址. 391 | // #2 处对应 C++ 示例: 392 | // struct S { 393 | // long l; 394 | // char ch[0]; // 这里 ch bit_size None. 395 | // }; 396 | // fn check_layout(tylayout: &Vec) -> Option { 397 | // let iter = tylayout.iter(); 398 | // let Some(mut prev) = iter.next() else { 399 | // return None; 400 | // }; 401 | // while let Some(curr) = iter.next() { 402 | // let Some(prevsize) = prev.bit_size.get() else { 403 | // return None; 404 | // }; 405 | // if curr.bit_offset == prev.bit_offset || // #1 406 | // curr.bit_offset == prev.bit_offset + prevsize 407 | // { 408 | // prev = curr; 409 | // continue; 410 | // } 411 | // return None; 412 | // } 413 | // let prevsize = match prev.bit_size.get() { 414 | // Some(v) => v, 415 | // None => 0, // #2 416 | // }; 417 | // return Some(prev.bit_offset + prevsize); 418 | // } 419 | 420 | // 暂时还不支持 `long l:32` 这种情况... 421 | fn is_bitfield(l: &parser::Layout) -> bool { 422 | let Some(s) = l.bit_size.get() else { 423 | return false; 424 | }; 425 | return s % BITS_PER_BYTE != 0; 426 | } 427 | 428 | fn is_valid_ident(input: &str) -> bool { 429 | let ret = input.trim_end_matches(is_ident_char); 430 | return ret.is_empty(); 431 | } 432 | 433 | fn member_name(input: Option<&str>) -> Cow { 434 | let Some(input) = input else { 435 | return Cow::Owned(format!("__anon{}", uniq_id())); 436 | }; 437 | if is_valid_ident(input) { 438 | return Cow::Borrowed(input); 439 | } 440 | return Cow::Owned(format!("__mem{}", uniq_id())); 441 | } 442 | 443 | struct EqAssert { 444 | expr: String, 445 | val: u64, 446 | } 447 | 448 | // tydef, 形如 `union U`, `struct S` 这种, 449 | // tymems 最后一个 tymem off + len 为 ty_size. 450 | fn process_members( 451 | processed: &mut ProcessState, 452 | printer: &mut Printer, 453 | tyidx: TypeIndex, 454 | tyname: &parser::TypeName, 455 | tymems: &[Member], 456 | tydef: &str, 457 | tysize: Option, 458 | ) -> io::Result<()> { 459 | let mut asserts = Vec::::new(); 460 | let mut struct_def = Vec::::new(); 461 | 462 | struct_def.push(format!("// tyname={} tyidx={:?}", tyname, tyidx)); 463 | struct_def.push(format!("{} {{", tydef)); 464 | for tymem in tymems { 465 | tymem.print(tydef, &mut struct_def, &mut asserts); 466 | } 467 | struct_def.push("} __attribute__((__packed__));".to_string()); 468 | 469 | let Some(packed_size) = tymems.last().map(|v|v.off + v.len) else { 470 | return Ok(()); 471 | }; 472 | asserts.push(EqAssert { 473 | expr: format!("sizeof({})", tydef), 474 | val: packed_size, 475 | }); 476 | 477 | printer.add_type(&struct_def)?; 478 | printer.add_eq_asserts(&asserts)?; 479 | if let Some(tysize) = tysize { 480 | processed.insert( 481 | tyidx, 482 | Some(Rc::new(TypeInfo { 483 | name: tydef.to_string(), 484 | packed_size, 485 | size: tysize, 486 | })), 487 | ); 488 | } 489 | return Ok(()); 490 | } 491 | 492 | fn process_union_type( 493 | processed: &mut ProcessState, 494 | printer: &mut Printer, 495 | tyidx: TypeIndex, 496 | ty: &parser::UnionType, 497 | // ty_max_size 是用来处理 C++ 中 A1218, S1218 示例展示的重用 padding 问题, 498 | // union 没有这种问题, 所以可以忽略 ty_max_size. 499 | ty_max_size: Option, 500 | inputs_hash: &[parser::FileHash], 501 | type_db: &HashMap, 502 | ) -> io::Result<()> { 503 | let tyname = ty.type_name(); 504 | if ty.is_declaration() { 505 | let Some(&real_tyidx) = type_db.get(&tyname) else { 506 | warn!("process_union_type: unknown declaration union. typidx={:?} typname={}", tyidx, &tyname); 507 | return Ok(()); 508 | }; 509 | return handle_sym_link( 510 | processed, 511 | printer, 512 | tyidx, 513 | real_tyidx, 514 | ty_max_size, 515 | inputs_hash, 516 | type_db, 517 | ); 518 | } 519 | let Some(ty_size) = ty.byte_size() else { 520 | warn!("process_union_type: unknown byte size typidx={:?} typname={}", tyidx, ty.type_name()); 521 | return Ok(()); 522 | }; 523 | if let Some(ty_max_size) = ty_max_size { 524 | if ty_size > ty_max_size { 525 | warn!( 526 | "process_union_type: invalid byte size typidx={:?} typname={} expect={} actual={}", 527 | tyidx, 528 | ty.type_name(), 529 | ty_max_size, 530 | ty_size 531 | ); 532 | return Ok(()); 533 | } 534 | } 535 | 536 | let mut tymems = Vec::::new(); 537 | for union_mem in ty.members() { 538 | if union_mem.bit_offset() != 0 { 539 | warn!( 540 | "process_union_type: union_mem.bit_offset != 0! typidx={:?} typname={} member={:?}", 541 | tyidx, 542 | ty.type_name(), 543 | union_mem 544 | ); 545 | return Ok(()); 546 | } 547 | let Some(union_mem_bit_size) = union_mem.bit_size(&inputs_hash[tyidx.input_id]) else { 548 | warn!("process_union_type: unknown member size! typidx={:?} typname={} member={:?}", tyidx, ty.type_name(), union_mem); 549 | return Ok(()); 550 | }; 551 | let member_size = bit2byte(union_mem_bit_size); 552 | let tylayout = &parser::Layout { 553 | bit_offset: 0, 554 | bit_size: parser::Size::new(union_mem_bit_size), 555 | item: parser::LayoutItem::Member(union_mem), 556 | }; 557 | 558 | if is_bitfield(tylayout) { 559 | tymems.push(Member::new_placeholder(0, member_size, "__bitfield")); 560 | continue; 561 | } 562 | 563 | let member_tyoff = union_mem.type_offset(); 564 | let member_name = member_name(union_mem.name()); 565 | let mem_tyidx = TypeIndex { 566 | input_id: tyidx.input_id, 567 | typoff: member_tyoff, 568 | }; 569 | let mem_tyinfo = get_type_info( 570 | processed, 571 | printer, 572 | mem_tyidx, 573 | Some(member_size), 574 | inputs_hash, 575 | type_db, 576 | )?; 577 | let Some(mem_tyinfo) = mem_tyinfo else { 578 | tymems.push(Member::new_placeholder(0, member_size, "__unknown_type")); 579 | continue; 580 | }; 581 | debug_assert!(mem_tyinfo.packed_size <= member_size); 582 | 583 | tymems.push(Member { 584 | off: 0, 585 | len: mem_tyinfo.packed_size, 586 | def: format!("{} {}", &mem_tyinfo.name, &member_name), 587 | field_name: member_name.into_owned(), 588 | is_padding: false, 589 | }); 590 | } 591 | tymems.push(Member::new_placeholder(0, ty_size, "__HIDVA_dont_use")); 592 | 593 | let tydef = format!("union {}", printer.alloc_ident(&tyname)); 594 | return process_members( 595 | processed, 596 | printer, 597 | tyidx, 598 | &ty.type_name(), 599 | &tymems, 600 | &tydef, 601 | Some(ty_size), 602 | ); 603 | } 604 | 605 | fn process_struct_type( 606 | processed: &mut ProcessState, 607 | printer: &mut Printer, 608 | tyidx: TypeIndex, 609 | ty: &parser::StructType, 610 | ty_max_size: Option, 611 | inputs_hash: &[parser::FileHash], 612 | type_db: &HashMap, 613 | ) -> io::Result<()> { 614 | let tyname = ty.type_name(); 615 | if ty.is_declaration() { 616 | let Some(&real_tyidx) = type_db.get(&tyname) else { 617 | warn!("process_struct_type: unknown declaration struct. typidx={:?} typname={}", tyidx, tyname); 618 | return Ok(()); 619 | }; 620 | return handle_sym_link( 621 | processed, 622 | printer, 623 | tyidx, 624 | real_tyidx, 625 | ty_max_size, 626 | inputs_hash, 627 | type_db, 628 | ); 629 | } 630 | 631 | let Some(mut ty_bit_size) = ty.bit_size() else { 632 | warn!("process_struct_type: unknown type size: tyidx={:?} tyname={}", tyidx, ty.type_name()); 633 | return Ok(()); 634 | }; 635 | let ty_dwarf_size = ty.byte_size().unwrap(); 636 | let mut tylayout = ty.layout(&inputs_hash[tyidx.input_id]); 637 | while let Some(lastlayout) = tylayout.last() { 638 | if let parser::LayoutItem::Padding = lastlayout.item { 639 | let s = lastlayout.bit_size.get().unwrap(); 640 | debug_assert!(ty_bit_size >= s); 641 | ty_bit_size -= s; 642 | tylayout.pop(); 643 | } else { 644 | break; 645 | } 646 | } 647 | let tylayout = tylayout; 648 | let tysize = bit2byte(ty_bit_size); 649 | let ty_max_size = match ty_max_size { 650 | Some(v) => { 651 | if v > tysize { 652 | tysize 653 | } else { 654 | v 655 | } 656 | } 657 | None => tysize, 658 | }; 659 | 660 | let mut tymems = Vec::::with_capacity(tylayout.len()); 661 | let mut next_idx = find_next_idx(&tylayout, 0); 662 | debug_assert_eq!(next_idx.unwrap_or(0), 0); 663 | while let Some(item_idx) = next_idx { 664 | debug_assert_eq!(tylayout[item_idx].bit_offset % BITS_PER_BYTE, 0); 665 | debug_assert!(item_idx != 0 || tylayout[item_idx].bit_offset == 0); // layout() 函数会确保从 offset: 0 开始. 666 | next_idx = find_next_idx(&tylayout, item_idx + 1); 667 | 668 | let member_off = tylayout[item_idx].bit_offset / BITS_PER_BYTE; 669 | // member_size item_idx 占用的空间, 字节为单位. 670 | // 这里以 member_size 为准, 而不是 tylayout[item_idx].bit_size. 详见 S1218, A1218 例子. 671 | let member_size = next_idx 672 | .map(|v| tylayout[v].bit_offset / BITS_PER_BYTE) 673 | .unwrap_or(ty_max_size) 674 | - member_off; 675 | // debug_assert!(tylayout[item_idx].bit_size.get().map(|v| v <= member_size).unwrap_or(true)); 676 | if member_size <= 0 && next_idx.is_some() { 677 | continue; 678 | } 679 | // member_size == 0 && next_idx.is_none() 意味着 member 是最后一个元素, 如下示例所示: 680 | // struct S {int i; char ch[0];} 681 | // 此时 ch member_size = 0. 682 | 683 | if is_bitfield(&tylayout[item_idx]) { 684 | // 更合适的做法, 是将 tymem 拆分字段以及 padding 部分, 685 | // struct S { long i: 2; }; 686 | // struct A: public S {char ch;}; 687 | // 不过实测这里 A.ch 并不会塞到 S padding 中, 所以拆不拆都行. 688 | tymems.push(Member::new_placeholder( 689 | member_off, 690 | member_size, 691 | "_bitfield", 692 | )); 693 | continue; 694 | } 695 | 696 | let (member_tyoff, member_name) = match tylayout[item_idx].item { 697 | parser::LayoutItem::Padding => { 698 | tymems.push(Member::new_padding(member_off, member_size)); 699 | continue; 700 | } 701 | parser::LayoutItem::Member(mem) => (mem.type_offset(), member_name(mem.name())), 702 | parser::LayoutItem::Inherit(mem) => ( 703 | mem.type_offset(), 704 | Cow::Owned(format!("__parent{}", uniq_id())), 705 | ), 706 | parser::LayoutItem::VariantPart(_) => { 707 | tymems.push(Member::new_placeholder( 708 | member_off, 709 | member_size, 710 | "__variant_part", 711 | )); 712 | continue; 713 | } 714 | }; 715 | let mem_tyidx = TypeIndex { 716 | input_id: tyidx.input_id, 717 | typoff: member_tyoff, 718 | }; 719 | let mem_tyinfo = get_type_info( 720 | processed, 721 | printer, 722 | mem_tyidx, 723 | Some(member_size), 724 | inputs_hash, 725 | type_db, 726 | )?; 727 | let Some(mem_tyinfo) = mem_tyinfo else { 728 | tymems.push(Member::new_placeholder(member_off, member_size, "__unknown_type")); 729 | continue; 730 | }; 731 | debug_assert!(mem_tyinfo.packed_size <= member_size); 732 | 733 | tymems.push(Member { 734 | off: member_off, 735 | len: mem_tyinfo.packed_size, 736 | def: format!("{} {}", &mem_tyinfo.name, &member_name), 737 | field_name: member_name.into_owned(), 738 | is_padding: false, 739 | }); 740 | if mem_tyinfo.packed_size < member_size { 741 | tymems.push(Member::new_padding( 742 | member_off + mem_tyinfo.packed_size, 743 | member_size - mem_tyinfo.packed_size, 744 | )); 745 | } 746 | } 747 | while let Some(member) = tymems.last() { 748 | if member.is_padding { 749 | tymems.pop(); 750 | } else { 751 | break; 752 | } 753 | } 754 | 755 | let tydef = format!("struct {}", printer.alloc_ident(&tyname)); 756 | return process_members( 757 | processed, 758 | printer, 759 | tyidx, 760 | &ty.type_name(), 761 | &tymems, 762 | &tydef, 763 | Some(ty_dwarf_size), 764 | ); 765 | } 766 | 767 | fn process_enum_type( 768 | processed: &mut ProcessState, 769 | printer: &mut Printer, 770 | tyidx: TypeIndex, 771 | ty: &parser::EnumerationType, 772 | ty_max_size: Option, 773 | inputs_hash: &[parser::FileHash], 774 | type_db: &HashMap, 775 | ) -> io::Result<()> { 776 | let tyname = ty.type_name(); 777 | if ty.is_declaration() { 778 | let Some(&real_tyidx) = type_db.get(&tyname) else { 779 | warn!("process_enum_type: unknown declaration. typidx={:?} typname={}", tyidx, &tyname); 780 | return Ok(()); 781 | }; 782 | return handle_sym_link( 783 | processed, 784 | printer, 785 | tyidx, 786 | real_tyidx, 787 | ty_max_size, 788 | inputs_hash, 789 | type_db, 790 | ); 791 | } 792 | let Some(ty_size) = ty.byte_size(&inputs_hash[tyidx.input_id]) else { 793 | warn!("process_enum_type: unknown byte size. typidx={:?} typname={}", tyidx, ty.type_name()); 794 | return Ok(()); 795 | }; 796 | if let Some(ty_max_size) = ty_max_size { 797 | if ty_size > ty_max_size { 798 | warn!( 799 | "process_enum_type: invalid byte size. typidx={:?} typname={} expect={} actual={}", 800 | tyidx, 801 | ty.type_name(), 802 | ty_max_size, 803 | ty_size 804 | ); 805 | return Ok(()); 806 | } 807 | } 808 | 809 | // EnumerationType::ty may be none, 所以我们自己选择类型吧. 810 | let ty_repr = if ty_size == 8 { 811 | "__s64" 812 | } else if ty_size == 4 { 813 | "__s32" 814 | } else if ty_size == 2 { 815 | "__s16" 816 | } else if ty_size == 1 { 817 | "__s8" 818 | } else { 819 | warn!( 820 | "process_enum_type: invalid byte size. typidx={:?} typname={} expect=8/4/2/1 actual={}", 821 | tyidx, 822 | ty.type_name(), 823 | ty_size 824 | ); 825 | return Ok(()); 826 | }; 827 | 828 | let mut asserts = Vec::::new(); 829 | let mut struct_def = Vec::::new(); 830 | asserts.push(EqAssert { 831 | expr: format!("sizeof({})", ty_repr), 832 | val: ty_size, 833 | }); 834 | let tydef = printer.alloc_ident(&tyname); 835 | struct_def.push(format!("// --- enum {} begin ---", &tydef)); 836 | for enum_item in &ty.enumerators(&inputs_hash[tyidx.input_id]) { 837 | struct_def.push(format!( 838 | "// {}={}", 839 | enum_item.name().unwrap_or(""), 840 | enum_item.value().unwrap_or(-20181218), 841 | )); 842 | } 843 | struct_def.push(format!("// --- enum {} end ---", &tydef)); 844 | struct_def.push(format!("typedef {} {};", ty_repr, &tydef)); 845 | 846 | processed.insert( 847 | tyidx, 848 | Some(Rc::new(TypeInfo { 849 | name: tydef, 850 | packed_size: ty_size, 851 | size: ty_size, 852 | })), 853 | ); 854 | printer.add_type(&struct_def)?; 855 | printer.add_eq_asserts(&asserts)?; 856 | return Ok(()); 857 | } 858 | 859 | fn process_array_type( 860 | processed: &mut ProcessState, 861 | printer: &mut Printer, 862 | tyidx: TypeIndex, 863 | ty: &parser::ArrayType, 864 | ty_max_size: Option, 865 | inputs_hash: &[parser::FileHash], 866 | type_db: &HashMap, 867 | ) -> io::Result<()> { 868 | let mem_tyidx = TypeIndex { 869 | input_id: tyidx.input_id, 870 | typoff: ty.ty, 871 | }; 872 | let mem_tyinfo = get_type_info(processed, printer, mem_tyidx, None, inputs_hash, type_db)?; 873 | let Some(mem_tyinfo) = mem_tyinfo else { 874 | warn!("process_array_type: unknown element type: tyidx={:?} ty={:?}", tyidx, ty); 875 | return Ok(()); 876 | }; 877 | 878 | let mem_tyident = mem_tyinfo.ident(); 879 | let mut mem_tyname = mem_tyinfo.name.clone(); 880 | if mem_tyinfo.size > mem_tyinfo.packed_size { 881 | // 以 S1218 为例, 我们在生成 S1218 是 packed 的, 但如果 S1218 作为 array element, 则 882 | // 需要保持其原有 padding. 883 | mem_tyname = { 884 | let name = format!("{}_Padded", mem_tyident); 885 | let n = printer.alloc_ident(&parser::TypeName { 886 | namespace: None, 887 | name: Some(&name), 888 | }); 889 | format!("struct {}", n) 890 | }; 891 | let mut members = Vec::::new(); 892 | let data_name = "data"; 893 | members.push(Member { 894 | off: 0, 895 | len: mem_tyinfo.packed_size, 896 | field_name: data_name.to_string(), 897 | def: format!("{} {}", mem_tyinfo.name, data_name), 898 | is_padding: false, 899 | }); 900 | members.push(Member::new_padding( 901 | mem_tyinfo.packed_size, 902 | mem_tyinfo.size - mem_tyinfo.packed_size, 903 | )); 904 | process_members( 905 | processed, 906 | printer, 907 | tyidx, 908 | &parser::TypeName { 909 | namespace: None, 910 | name: Some("padding struct"), 911 | }, 912 | &members, 913 | &mem_tyname, 914 | None, 915 | )?; 916 | } 917 | 918 | let ele_cnt = if ty_max_size == Some(0) { 919 | 0 920 | } else { 921 | let Some(array_byte_size) = ty.byte_size(&inputs_hash[tyidx.input_id]) else { 922 | warn!("process_array_type: unknown array size: tyidx={:?} ty={:?}", tyidx, ty); 923 | return Ok(()); 924 | }; 925 | if let Some(max_size) = ty_max_size { 926 | if max_size < array_byte_size { 927 | return Ok(()); 928 | } 929 | } 930 | let Some(ele_count) = ty.count(&inputs_hash[tyidx.input_id]) else { 931 | warn!("process_array_type: unknown element count: tyidx={:?} ty={:?}", tyidx, ty); 932 | return Ok(()); 933 | }; 934 | if array_byte_size % ele_count != 0 || mem_tyinfo.size != array_byte_size / ele_count { 935 | warn!( 936 | "process_array_type: invalid array def: tyidx={:?} ty={:?}", 937 | tyidx, ty 938 | ); 939 | return Ok(()); 940 | } 941 | ele_count 942 | }; 943 | let array_name = { 944 | let name = format!("{}_Array{}", mem_tyident, ele_cnt); 945 | let tyname = parser::TypeName { 946 | namespace: None, 947 | name: Some(&name), 948 | }; 949 | printer.alloc_ident(&tyname) 950 | }; 951 | let array_size = mem_tyinfo.size * ele_cnt; 952 | 953 | printer.add_type(&[format!( 954 | "typedef {} {}[{}];", 955 | mem_tyname, array_name, ele_cnt 956 | )])?; 957 | printer.add_eq_assert(&format!("sizeof({})", array_name), array_size)?; 958 | processed.insert( 959 | tyidx, 960 | Some(Rc::new(TypeInfo { 961 | name: array_name.clone(), 962 | packed_size: array_size, 963 | size: array_size, 964 | })), 965 | ); 966 | return Ok(()); 967 | } 968 | 969 | fn process_modifier_type( 970 | processed: &mut ProcessState, 971 | printer: &mut Printer, 972 | tyidx: TypeIndex, 973 | ty: &parser::TypeModifier, 974 | ty_max_size: Option, 975 | inputs_hash: &[parser::FileHash], 976 | type_db: &HashMap, 977 | ) -> io::Result<()> { 978 | let real_tyidx = TypeIndex { 979 | input_id: tyidx.input_id, 980 | typoff: ty.ty, 981 | }; 982 | match ty.kind() { 983 | parser::TypeModifierKind::Const 984 | | parser::TypeModifierKind::Packed 985 | | parser::TypeModifierKind::Volatile 986 | | parser::TypeModifierKind::Restrict 987 | | parser::TypeModifierKind::Shared 988 | | parser::TypeModifierKind::Atomic 989 | | parser::TypeModifierKind::Other => { 990 | return handle_sym_link( 991 | processed, 992 | printer, 993 | tyidx, 994 | real_tyidx, 995 | ty_max_size, 996 | inputs_hash, 997 | type_db, 998 | ); 999 | } 1000 | parser::TypeModifierKind::Pointer 1001 | | parser::TypeModifierKind::Reference 1002 | | parser::TypeModifierKind::RvalueReference => { 1003 | let Some(tysize) = ty.byte_size(&inputs_hash[tyidx.input_id]) else { 1004 | warn!("process_modifier_type: unknown byte size: tyidx={:?}", tyidx); 1005 | return Ok(()); 1006 | }; 1007 | if let Some(maxsize) = ty_max_size { 1008 | if maxsize < tysize { 1009 | warn!( 1010 | "process_modifier_type: invalid byte size: tyidx={:?} maxsize={} size={}", 1011 | tyidx, maxsize, tysize 1012 | ); 1013 | return Ok(()); 1014 | } 1015 | } 1016 | 1017 | let real_tyinfo = 1018 | get_type_info(processed, printer, real_tyidx, None, inputs_hash, type_db)?; 1019 | let real_tyname = if let Some(tyinfo) = &real_tyinfo { 1020 | &tyinfo.name 1021 | } else { 1022 | "void" 1023 | }; 1024 | let tyname = format!("{}*", real_tyname); 1025 | 1026 | printer.add_eq_assert(&format!("sizeof({})", tyname), tysize)?; 1027 | processed.insert( 1028 | tyidx, 1029 | Some(Rc::new(TypeInfo { 1030 | name: tyname, 1031 | packed_size: tysize, 1032 | size: tysize, 1033 | })), 1034 | ); 1035 | } 1036 | } 1037 | return Ok(()); 1038 | } 1039 | 1040 | // process_type 之后, ty 一定存在于 processed 之中, 1041 | // processed[ty] 为 None, 意味着没有有效信息. 1042 | // 由于 typedef 的存在, 可能会出现多个 tyidx 对应着相同的 TypeInfo, 所以使用 Rc. 1043 | // 1044 | // 该函数调用时, ty 一定不在 processed 中. 1045 | fn process_type( 1046 | processed: &mut ProcessState, 1047 | printer: &mut Printer, 1048 | tyidx: TypeIndex, 1049 | ty_max_size: Option, 1050 | inputs_hash: &[parser::FileHash], 1051 | type_db: &HashMap, 1052 | ) -> io::Result<()> { 1053 | debug_assert!(!processed.contains_key(&tyidx)); 1054 | processed.insert(tyidx, None); // 先占个坑, 1055 | 1056 | let typ = parser::Type::from_offset(&inputs_hash[tyidx.input_id], tyidx.typoff); 1057 | let Some(typ) = typ else { 1058 | warn!("process_type: unknown type. tyidx={:?}", tyidx); 1059 | return Ok(()); 1060 | }; 1061 | let typ = typ.as_ref(); 1062 | 1063 | match typ.kind() { 1064 | parser::TypeKind::Void 1065 | | parser::TypeKind::Function(_) 1066 | | parser::TypeKind::PointerToMember(_) 1067 | | parser::TypeKind::Subrange(_) 1068 | | parser::TypeKind::Unspecified(_) => {} 1069 | parser::TypeKind::Base(ty) => { 1070 | let (Some(tyname), Some(tysize)) = (ty.name(), ty.byte_size()) else { 1071 | warn!("process_type: base type has no name. tyidx={:?}", tyidx); 1072 | return Ok(()); 1073 | }; 1074 | processed.insert( 1075 | tyidx, 1076 | Some(Rc::new(TypeInfo { 1077 | name: tyname.to_string(), 1078 | packed_size: tysize, 1079 | size: tysize, 1080 | })), 1081 | ); 1082 | printer.add_eq_assert(&format!("sizeof({})", tyname), tysize)?; 1083 | } 1084 | parser::TypeKind::Def(ty) => { 1085 | let real_typidx = TypeIndex { 1086 | input_id: tyidx.input_id, 1087 | typoff: ty.ty, 1088 | }; 1089 | handle_sym_link( 1090 | processed, 1091 | printer, 1092 | tyidx, 1093 | real_typidx, 1094 | ty_max_size, 1095 | inputs_hash, 1096 | type_db, 1097 | )?; 1098 | } 1099 | parser::TypeKind::Struct(ty) => { 1100 | return process_struct_type( 1101 | processed, 1102 | printer, 1103 | tyidx, 1104 | ty, 1105 | ty_max_size, 1106 | inputs_hash, 1107 | type_db, 1108 | ); 1109 | } 1110 | parser::TypeKind::Union(ty) => { 1111 | return process_union_type( 1112 | processed, 1113 | printer, 1114 | tyidx, 1115 | ty, 1116 | ty_max_size, 1117 | inputs_hash, 1118 | type_db, 1119 | ); 1120 | } 1121 | parser::TypeKind::Enumeration(ty) => { 1122 | return process_enum_type( 1123 | processed, 1124 | printer, 1125 | tyidx, 1126 | ty, 1127 | ty_max_size, 1128 | inputs_hash, 1129 | type_db, 1130 | ); 1131 | } 1132 | parser::TypeKind::Array(ty) => { 1133 | return process_array_type( 1134 | processed, 1135 | printer, 1136 | tyidx, 1137 | ty, 1138 | ty_max_size, 1139 | inputs_hash, 1140 | type_db, 1141 | ); 1142 | } 1143 | parser::TypeKind::Modifier(ty) => { 1144 | return process_modifier_type( 1145 | processed, 1146 | printer, 1147 | tyidx, 1148 | ty, 1149 | ty_max_size, 1150 | inputs_hash, 1151 | type_db, 1152 | ); 1153 | } 1154 | } 1155 | return Ok(()); 1156 | } 1157 | 1158 | fn main() -> anyhow::Result<()> { 1159 | env_logger::init(); 1160 | let args = Args::parse(); 1161 | 1162 | let mut inputs = Vec::new(); 1163 | let mut inputs_hash = Vec::new(); 1164 | for input_path in &args.so_path { 1165 | info!("load so. path={}", input_path); 1166 | inputs.push(parser::File::parse(input_path.clone())?); 1167 | } 1168 | for input_path in &args.so_file_path { 1169 | for line in read_lines(input_path)? { 1170 | let line = line?; 1171 | info!("load so. path={}", &line); 1172 | inputs.push(parser::File::parse(line)?); 1173 | } 1174 | } 1175 | info!("build input file hash"); 1176 | for input in &inputs { 1177 | inputs_hash.push(parser::FileHash::new(input.file())); 1178 | } 1179 | 1180 | info!("build type db"); 1181 | let mut dest = Vec::new(); 1182 | let mut type_db = HashMap::new(); 1183 | for (input_id, hash) in inputs_hash.iter().enumerate() { 1184 | for (&typoff, &typ) in hash.types.iter() { 1185 | if is_declaration(typ) { 1186 | continue; 1187 | } 1188 | if let Some(typname) = parser::TypeName::try_from(typ) { 1189 | let typidx = TypeIndex { input_id, typoff }; 1190 | if args.is_dest(&typname) { 1191 | dest.push(typidx); 1192 | } 1193 | // type_db 存放着哪些可能会被跨 so file 引用的符号, 很显然 anon ty 1194 | // 不在这种. 1195 | if typ.is_anon() { 1196 | continue; 1197 | } 1198 | // typname.is_anon() may be true 1199 | type_db.insert(typname, typidx); 1200 | } 1201 | } 1202 | } 1203 | 1204 | let mut printer = Printer::try_open(&args.out_path)?; 1205 | let mut processed = ProcessState::new(); 1206 | for dest_ty in &dest { 1207 | process_type( 1208 | &mut processed, 1209 | &mut printer, 1210 | *dest_ty, 1211 | None, 1212 | &inputs_hash, 1213 | &type_db, 1214 | )?; 1215 | } 1216 | printer.finish()?; 1217 | return Ok(()); 1218 | } 1219 | --------------------------------------------------------------------------------