├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ └── release.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── llvm-bitcursor ├── Cargo.toml ├── README.md └── src │ ├── error.rs │ └── lib.rs ├── llvm-bitstream ├── Cargo.toml ├── README.md ├── examples │ └── dump-bitstream.rs └── src │ ├── abbrev.rs │ ├── error.rs │ ├── lib.rs │ ├── parser.rs │ └── record.rs ├── llvm-constants ├── Cargo.toml ├── README.md ├── build.rs └── src │ ├── constants.rs │ ├── enums.rs │ └── lib.rs ├── llvm-mapper ├── Cargo.toml ├── README.md ├── examples │ └── unroll-bitstream.rs └── src │ ├── block │ ├── attributes.rs │ ├── function │ │ ├── basic_block.rs │ │ ├── instruction.rs │ │ └── mod.rs │ ├── identification.rs │ ├── mod.rs │ ├── module.rs │ ├── strtab.rs │ ├── symtab.rs │ ├── type_table.rs │ └── vst.rs │ ├── error.rs │ ├── lib.rs │ ├── map.rs │ ├── record │ ├── alias.rs │ ├── comdat.rs │ ├── datalayout.rs │ ├── function.rs │ └── mod.rs │ └── unroll.rs ├── llvm-support ├── Cargo.toml ├── README.md ├── build.rs └── src │ ├── align.rs │ ├── attribute.rs │ ├── bitcodes.rs │ ├── lib.rs │ ├── opcode.rs │ └── ty.rs └── release.toml /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "cargo" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Format 15 | run: cargo fmt && git diff --exit-code 16 | - name: Lint 17 | run: | 18 | rustup update 19 | rustup component add clippy 20 | cargo clippy -- \ 21 | -D warnings -D clippy::expect_used -D clippy::unwrap_used -D clippy::panic 22 | test: 23 | strategy: 24 | matrix: 25 | platform: ["ubuntu-latest", "macos-latest"] 26 | runs-on: ${{ matrix.platform }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | 30 | - name: Build 31 | run: cargo build 32 | 33 | - name: Test 34 | run: | 35 | cargo test 36 | cargo test --no-default-features 37 | 38 | - name: Doc 39 | run: cargo doc 40 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | # This is slightly annoying. Is there a better way to do this? 5 | - 'llvm-bitcursor-v*' 6 | - 'llvm-bitstream-v*' 7 | - 'llvm-constants-v*' 8 | - 'llvm-support-v*' 9 | - 'llvm-mapper-v*' 10 | 11 | name: release 12 | 13 | jobs: 14 | release: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: create release 18 | id: create_release 19 | uses: actions/create-release@v1 20 | env: 21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | with: 23 | tag_name: ${{ github.ref }} 24 | release_name: Release ${{ github.ref }} 25 | draft: false 26 | prerelease: ${{ contains(github.ref, 'pre') || contains(github.ref, 'rc') }} 27 | 28 | publish: 29 | runs-on: ubuntu-latest 30 | steps: 31 | - uses: actions/checkout@v2 32 | 33 | - name: publish release 34 | run: | 35 | echo ${{ secrets.CRATES_IO_TOKEN }} | cargo login 36 | tag="${GITHUB_REF#refs/tags/}" 37 | package_being_published="${tag%-v*}" 38 | cd "${package_being_published}" && cargo publish 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | *.bc 4 | *.ll 5 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | members = [ 4 | "llvm-bitcursor", 5 | "llvm-bitstream", 6 | # Subsumed within llvm-support. 7 | # "llvm-constants", 8 | "llvm-support", 9 | "llvm-mapper", 10 | ] 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 William Woodruff 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | HAS_NIGHTLY := $(shell rustup toolchain list | grep nightly) 2 | ifeq ($(HAS_NIGHTLY),) 3 | FMT_FLAG := 4 | else 5 | FMT_FLAG := +nightly 6 | endif 7 | 8 | .PHONY: all 9 | all: 10 | @echo "This is not a real build system." 11 | 12 | .PHONY: fmt 13 | fmt: 14 | cargo $(FMT_FLAG) fmt 15 | 16 | .PHONY: lint 17 | lint: 18 | cargo clippy -- \ 19 | -D warnings \ 20 | -D clippy::expect_used \ 21 | -D clippy::unwrap_used \ 22 | -D clippy::panic 23 | @# NOTE(ww): run docs here too, since they can fail the CI when links are broken 24 | cargo doc 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | mollusc 2 | ======= 3 | 4 | [![CI](https://github.com/woodruffw/mollusc/actions/workflows/ci.yml/badge.svg)](https://github.com/woodruffw/mollusc/actions/workflows/ci.yml) 5 | 6 | ⚠️This is a work in progress! Many parts are incomplete or only partially functional!⚠️ 7 | 8 | *mollusc* is a collection of pure-Rust libraries for parsing, interpreting, and analyzing LLVM. 9 | 10 | | Crate | Version | Description | 11 | | ------- | ------- | ----------- | 12 | | [`llvm-bitcursor`](./llvm-bitcursor) | [![Crates.io](https://img.shields.io/crates/v/llvm-bitcursor)](https://crates.io/crates/llvm-bitcursor) | A no-frills cursor library for reading fields from a bitstream. | 13 | | [`llvm-bitstream`](./llvm-bitstream) | [![Crates.io](https://img.shields.io/crates/v/llvm-bitstream)](https://crates.io/crates/llvm-bitstream) | A content-agnostic parser for LLVM's bitstream container format. | 14 | | [`llvm-constants`](./llvm-constants) | [![Crates.io](https://img.shields.io/crates/v/llvm-constants)](https://crates.io/crates/llvm-constants) | **Unused**. A collection of numeric and enum constants useful across multiple crates in the *mollusc* ecosystem. | 15 | | [`llvm-support`](./llvm-support) | [![Crates.io](https://img.shields.io/crates/v/llvm-support)](https://crates.io/crates/llvm-support) | Support types and routines for parsing LLVM's bitcode. | 16 | | [`llvm-mapper`](./llvm-mapper) | [![Crates.io](https://img.shields.io/crates/v/llvm-mapper)](https://crates.io/crates/llvm-mapper) | A library for mapping the contents of bitstreams into LLVM IR models. | 17 | | **Not implemented.** | N/A | A high level interface for interacting with LLVM IR. | 18 | -------------------------------------------------------------------------------- /llvm-bitcursor/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "llvm-bitcursor" 3 | description = "A no-frills bitstream cursor library for Rust" 4 | license = "MIT" 5 | homepage = "https://github.com/woodruffw/mollusc/tree/main/llvm-bitcursor" 6 | repository = "https://github.com/woodruffw/mollusc" 7 | authors = ["William Woodruff "] 8 | readme = "README.md" 9 | keywords = ["llvm", "parsing", "binary", "encoding"] 10 | categories = ["compilers", "encoding", "parsing"] 11 | edition = "2018" 12 | version = "0.0.3" 13 | 14 | [features] 15 | default = ["vbr"] 16 | vbr = [] 17 | 18 | [dependencies] 19 | log = "0.4" 20 | num = "0.4" 21 | thiserror = "1.0" 22 | -------------------------------------------------------------------------------- /llvm-bitcursor/README.md: -------------------------------------------------------------------------------- 1 | llvm-bitcursor 2 | ============== 3 | 4 | [![Crates.io](https://img.shields.io/crates/v/llvm-bitcursor)](https://crates.io/crates/llvm-bitcursor) 5 | [![Documentation](https://docs.rs/llvm-bitcursor/badge.svg)](https://docs.rs/llvm-bitcursor) 6 | 7 | A no-frills cursor library that supports reading unaligned fields from 8 | a bitstream. 9 | 10 | This library primarily exists to provide low-level support for the task 11 | of parsing LLVM's [bitstream format](https://llvm.org/docs/BitCodeFormat.html). 12 | If you're looking for a general purpose bitvector handling library, try 13 | [bitvec](https://crates.io/crates/bitvec) or [bit-vec](https://crates.io/crates/bit-vec). 14 | 15 | Features: 16 | 17 | * No-copy, all cursor state is internal 18 | * Support for LLVM's [VBR](https://llvm.org/docs/BitCodeFormat.html#variable-width-value) encoding 19 | (requires the `vbr` feature) 20 | * 100% safe Rust, with `#![forbid(unsafe_code)]` 21 | * No use of `unwrap`, `expect`, or `panic` 22 | 23 | Anti-features: 24 | 25 | * Not a general purpose bitvector/bitstring handling library 26 | * Probably not very fast 27 | * Doesn't care about bit order (always LSB-first) 28 | -------------------------------------------------------------------------------- /llvm-bitcursor/src/error.rs: -------------------------------------------------------------------------------- 1 | //! Error management for `llvm-bitcursor`. 2 | 3 | use thiserror::Error as ThisError; 4 | 5 | /// All errors potentially produced by `llvm-bitcursor` APIs. 6 | /// Consumers should *not* attempt to match specific variants of this error type. 7 | #[non_exhaustive] 8 | #[derive(Debug, ThisError)] 9 | pub enum Error { 10 | /// A read or other I/O operation encountered the end of the inner buffer. 11 | #[error("EOF while reading")] 12 | Eof, 13 | /// A user attempted to call [`BitCursor::new_with_len`](crate::BitCursor::new_with_len) with 14 | /// an impossible length (larger that the supplied buffer). 15 | #[error("invalid length for buffer supplied to cursor")] 16 | InvalidLength, 17 | /// A generic API (e.g. [`BitCursor::read_as`](crate::BitCursor::read_as)) was asked to 18 | /// read a value larger than the requested type could represent. 19 | #[error("loss of data with cast")] 20 | BadCast, 21 | /// A read API was called with an invalid bitsize (too small or large). 22 | #[error("invalid read size (zero or too large)")] 23 | InvalidReadSize, 24 | /// A VBR read API was called with an invalid VBR width. 25 | #[cfg(any(feature = "vbr", doc))] 26 | #[error("invalid VBR width (must be > 1 but <= system word width)")] 27 | InvalidVbrWidth, 28 | /// An I/O operation completed partially, but the inner buffer ended before it full completion. 29 | #[error("too little data to service request")] 30 | Short, 31 | } 32 | -------------------------------------------------------------------------------- /llvm-bitstream/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "llvm-bitstream" 3 | description = "A content-agnostic parser for LLVM's bitstream container format" 4 | license = "MIT" 5 | homepage = "https://github.com/woodruffw/mollusc/tree/main/llvm-bitstream" 6 | repository = "https://github.com/woodruffw/mollusc" 7 | authors = ["William Woodruff "] 8 | readme = "README.md" 9 | keywords = ["llvm", "parsing", "binary", "encoding"] 10 | categories = ["compilers", "encoding", "parsing"] 11 | edition = "2018" 12 | version = "0.0.3" 13 | 14 | [[example]] 15 | name = "dump-bitstream" 16 | 17 | [dependencies] 18 | llvm-bitcursor = { version = "0.0.3", path = "../llvm-bitcursor" } 19 | llvm-support = { version = "0.0.3", path = "../llvm-support" } 20 | log = "0.4" 21 | num = "0.4" 22 | num_enum = "0.6" 23 | thiserror = "1.0" 24 | 25 | [dev-dependencies] 26 | anyhow = "1.0" 27 | clap = "4.0" 28 | env_logger = "0.10" 29 | -------------------------------------------------------------------------------- /llvm-bitstream/README.md: -------------------------------------------------------------------------------- 1 | llvm-bitstream 2 | ============== 3 | 4 | [![Crates.io](https://img.shields.io/crates/v/llvm-bitstream)](https://crates.io/crates/llvm-bitstream) 5 | [![Documentation](https://docs.rs/llvm-bitstream/badge.svg)](https://docs.rs/llvm-bitstream) 6 | 7 | A content-agnostic parser for LLVM's [bitstream container format](https://llvm.org/docs/BitCodeFormat.html). 8 | 9 | Conceptually, this library is one step below a full LLVM bitcode parser: 10 | it can interpret the entries in a bitstream, but isn't aware of their semantics 11 | and isn't responsible for composing them into an LLVM IR 12 | program (or any other concrete structure that's been serialized as a bitstream). 13 | 14 | This library uses [`llvm-bitcursor`](https://crates.io/crates/llvm-bitcursor) under the hood. 15 | -------------------------------------------------------------------------------- /llvm-bitstream/examples/dump-bitstream.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | 3 | use anyhow::Result; 4 | use clap::{Arg, Command}; 5 | use llvm_bitstream::parser::StreamEntry; 6 | use llvm_bitstream::Bitstream; 7 | 8 | fn app() -> Command { 9 | Command::new(env!("CARGO_PKG_NAME")) 10 | .version(env!("CARGO_PKG_VERSION")) 11 | .about(env!("CARGO_PKG_DESCRIPTION")) 12 | .arg( 13 | Arg::new("input") 14 | .help("the bitstream input to dump") 15 | .index(1) 16 | .required(true), 17 | ) 18 | } 19 | 20 | fn main() -> Result<()> { 21 | env_logger::init(); 22 | let matches = app().get_matches(); 23 | 24 | let input = { 25 | let input = matches.get_one::("input").unwrap(); 26 | fs::read(input)? 27 | }; 28 | 29 | let (wrapper, bitstream) = Bitstream::from(&input)?; 30 | 31 | if let Some(wrapper) = wrapper { 32 | println!("Wrapper: {:#?}", wrapper); 33 | } 34 | 35 | println!("Entered bitstream; magic: {:#X}", bitstream.magic); 36 | 37 | let mut scope = 0; 38 | for entry in bitstream { 39 | match entry? { 40 | StreamEntry::SubBlock(block) => { 41 | println!("{}BLOCK {} {{", "\t".repeat(scope), block.block_id); 42 | scope += 1; 43 | } 44 | StreamEntry::EndBlock => { 45 | scope -= 1; 46 | println!("{}}}", "\t".repeat(scope)); 47 | } 48 | StreamEntry::Record(record) => { 49 | println!( 50 | "{}RECORD {{ code: {}, fields: {:?} }}", 51 | "\t".repeat(scope), 52 | record.code, 53 | record.fields 54 | ) 55 | } 56 | }; 57 | } 58 | 59 | Ok(()) 60 | } 61 | -------------------------------------------------------------------------------- /llvm-bitstream/src/abbrev.rs: -------------------------------------------------------------------------------- 1 | //! Abbreviation definition and abbreviated record parsing and handling for `llvm-bitstream`. 2 | 3 | use std::convert::{From, TryFrom, TryInto}; 4 | 5 | use llvm_bitcursor::BitCursor; 6 | use llvm_support::bitcodes::{AbbrevOpEnc, ReservedAbbrevId}; 7 | use llvm_support::CHAR6_ALPHABET; 8 | 9 | use crate::error::Error; 10 | use crate::record::Fields; 11 | 12 | /// An abbreviation ID, whether reserved or defined by the stream itself. 13 | #[derive(Clone, Copy, Debug)] 14 | pub enum AbbrevId { 15 | /// A reserved abbreviation ID. 16 | Reserved(ReservedAbbrevId), 17 | /// An abbreviation ID that's been defined within the stream. 18 | Defined(u64), 19 | } 20 | 21 | impl From for AbbrevId { 22 | fn from(value: u64) -> Self { 23 | ReservedAbbrevId::try_from(value) 24 | .map_or_else(|_| AbbrevId::Defined(value), AbbrevId::Reserved) 25 | } 26 | } 27 | 28 | /// The valid abbreviation operand forms. 29 | #[derive(Clone, Debug, PartialEq)] 30 | pub enum AbbrevOp { 31 | /// A literal, constant operand. 32 | Literal(u64), 33 | /// A VBR whose width is is associated as extra data. 34 | Vbr(u64), 35 | /// A fixed-width field whose width is associated as extra data. 36 | Fixed(u64), 37 | /// A fixed-length array whose member elements are specified. 38 | Array(Box), 39 | /// A single Char6. 40 | Char6, 41 | /// A fixed-length blob of bytes. 42 | Blob, 43 | } 44 | 45 | impl AbbrevOp { 46 | /// Given a Char6 value, map it back to its ASCII printable equivalent. 47 | /// 48 | /// This function is private because it requires caller-upheld invariants 49 | /// for panic safety. 50 | fn decode_char6(char6: u8) -> u8 { 51 | // Panic safety: the caller is expected to constrain char6 to a valid 52 | // index within CHAR6_ALPHABET. 53 | CHAR6_ALPHABET[char6 as usize] 54 | } 55 | 56 | /// Parse a single abbreviation operand from the stream, returning a 57 | /// vector of one or more fields for that operand. 58 | pub(self) fn parse>(&self, cur: &mut BitCursor) -> Result { 59 | // A sad thing happens in this function: we parse by iterating over 60 | // each operand, collecting the field(s) in the bitstream that correspond to it. 61 | // Operands are typed and carry detailed information about their semantics: 62 | // for example, an `AbbrevOp::Char6` is exactly 6 bits and maps directly 63 | // to a printable character. It would be really nice if we could expose this structure 64 | // at a higher level, i.e. by returning a `Value` enum with different variants 65 | // for each operand, and higher levels could take advantage of it. 66 | // Unfortunately, LLVM does not let us do this: bitstream consumers **must** 67 | // be agnostic to how the bitstream is emitted, which means that an emitter's 68 | // decision to use a Char6 vs. a VBR6 cannot affect later, higher-level interpretation. 69 | // As a result, we have to discard all of our nice structure here in favor of 70 | // sequences of "fields," which are really just individual `u64`s. 71 | Ok(match self { 72 | AbbrevOp::Literal(val) => vec![*val], 73 | AbbrevOp::Vbr(width) => vec![cur.read_vbr(*width as usize)?], 74 | AbbrevOp::Fixed(width) => vec![cur.read_as::(*width as usize)?], 75 | AbbrevOp::Array(elem) => { 76 | // An array operand is encoded as a length (VBR6), followed by 77 | // each encoded element of the array. 78 | // TODO(ww): Sanity check array_len here. 79 | let array_len = cur.read_vbr(6)? as usize; 80 | 81 | let mut fields: Fields = Vec::with_capacity(array_len); 82 | for _ in 0..array_len { 83 | fields.extend(elem.parse(cur)?); 84 | } 85 | 86 | fields 87 | } 88 | AbbrevOp::Char6 => vec![Self::decode_char6(cur.read_as::(6)?).into()], 89 | AbbrevOp::Blob => { 90 | // A blob operand is encoded as a length (VBR6), followed by a 32-bit aligned 91 | // sequence of bytes, followed by another alignment back to 32 bits. 92 | 93 | // TODO(ww): Sanity check blob_len here: it probably shouldn't be 0, 94 | // and it definitely can't be longer than the stream. 95 | let blob_len = cur.read_vbr(6)? as usize; 96 | cur.align32(); 97 | 98 | // TODO(ww): This read loop is probably slower than it needs to be; 99 | // `BitCursor` could probably learn a `read_bytes` API that's 100 | // only allowed when the stream is byte-aligned. 101 | let mut fields: Fields = Vec::with_capacity(blob_len); 102 | for _ in 0..blob_len { 103 | fields.push(cur.read_exact::()?.into()); 104 | } 105 | cur.align32(); 106 | 107 | fields 108 | } 109 | }) 110 | } 111 | } 112 | 113 | /// Represents a defined abbreviation, as specified by a `DEFINE_ABBREV` record. 114 | #[derive(Clone, Debug)] 115 | pub struct Abbrev { 116 | /// The abstract operands for this abbreviation definition. 117 | pub operands: Vec, 118 | } 119 | 120 | impl Abbrev { 121 | /// Parse a new `Abbrev` from the stream. 122 | /// 123 | /// Assumes that the `DEFINE_ABBREV` ID has already been consumed. 124 | pub fn new>(cur: &mut BitCursor) -> Result { 125 | // TODO(ww): This and other structures should probably implement a `FromStream` 126 | // trait instead, for construction. 127 | 128 | // Per the LLVM docs: abbreviation records look like this: 129 | // [DEFINE_ABBREV, VBR5:numabbrevops, abbrevop0, abbrevop1, ...] 130 | // Our surrounding parse context should have consumed the DEFINE_ABBREV 131 | // already, so we start with numabbrevops. 132 | let num_abbrev_opnds = cur.read_vbr(5)?; 133 | if num_abbrev_opnds < 1 { 134 | return Err(Error::AbbrevParse( 135 | "expected at least one abbrev operand".into(), 136 | )); 137 | } 138 | 139 | log::debug!("expecting {} operands", num_abbrev_opnds); 140 | 141 | // Abbreviated records must have at least one operand. 142 | if num_abbrev_opnds < 1 { 143 | return Err(Error::AbbrevParse( 144 | "expected abbrev operand count to be nonzero".into(), 145 | )); 146 | } 147 | 148 | // Decode each abbreviation operand. 149 | let mut operands = vec![]; 150 | let mut done_early = false; 151 | for idx in 0..num_abbrev_opnds { 152 | // Each operand starts with a single bit that indicates whether 153 | // the operand is "literal" (i.e., a VBR8) or an "encoded" operand. 154 | let operand_kind = cur.read(1)?; 155 | 156 | // If this operand is a literal, then we read it as a VBR8. 157 | if operand_kind == 1 { 158 | let val = cur.read_vbr(8)?; 159 | 160 | // NOTE(ww): This error is exceedingly unlikely (usize would have to be larger 161 | // than u64). But you never know. 162 | operands.push(AbbrevOp::Literal(val)); 163 | 164 | continue; 165 | } 166 | 167 | // Otherwise, we need to suss the encoding representation out of it. 168 | // This is always a 3-bit field (**not** a VBR3), which in turn tells us whether the 169 | // operand encoding includes extra data. 170 | let enc: AbbrevOpEnc = cur.read(3)?.try_into()?; 171 | let opnd = match enc { 172 | AbbrevOpEnc::Fixed => AbbrevOp::Fixed(cur.read_vbr(5)?), 173 | AbbrevOpEnc::Vbr => AbbrevOp::Vbr(cur.read_vbr(5)?), 174 | AbbrevOpEnc::Array => { 175 | // There is only ever one array operand in an abbreviation definition, 176 | // and it is always the second-to-last operand. Anything else is an error. 177 | if idx != num_abbrev_opnds - 2 { 178 | return Err(Error::AbbrevParse("array operand at invalid index".into())); 179 | } 180 | 181 | // NOTE(ww): We get a little clever here: instead of parsing 182 | // the inner array operand on its own, we steal it here and set 183 | // `done_early` to indicate that we're done with operand parsing. 184 | // This works since array operands are guaranteed to be second-to-last, 185 | // followed only by their element operand encoding. 186 | cur.read(1)?; 187 | let elem_enc: AbbrevOpEnc = cur.read(3)?.try_into()?; 188 | done_early = true; 189 | 190 | let elem = match elem_enc { 191 | AbbrevOpEnc::Fixed => AbbrevOp::Fixed(cur.read_vbr(5)?), 192 | AbbrevOpEnc::Vbr => AbbrevOp::Vbr(cur.read_vbr(5)?), 193 | AbbrevOpEnc::Char6 => AbbrevOp::Char6, 194 | _ => { 195 | // Blobs and arrays cannot themselves be member types. 196 | return Err(Error::AbbrevParse(format!( 197 | "invalid element type for an array: {:?}", 198 | elem_enc 199 | ))); 200 | } 201 | }; 202 | 203 | AbbrevOp::Array(Box::new(elem)) 204 | } 205 | AbbrevOpEnc::Char6 => AbbrevOp::Char6, 206 | AbbrevOpEnc::Blob => { 207 | // Similarly to arrays: there is only ever one blob operand. 208 | // Blobs don't have an element type, so they're always the last operand. 209 | if idx != num_abbrev_opnds - 1 { 210 | return Err(Error::AbbrevParse("blob operand at invalid index".into())); 211 | } 212 | 213 | AbbrevOp::Blob 214 | } 215 | }; 216 | 217 | operands.push(opnd); 218 | 219 | // See above: don't complete the entire operand parsing loop if we've successfully 220 | // stolen the last operand as part of an array. 221 | if done_early { 222 | break; 223 | } 224 | } 225 | 226 | Ok(Self { operands: operands }) 227 | } 228 | 229 | /// Parse an abbreviated record from this stream, returning its fields. 230 | pub fn parse>(&self, cur: &mut BitCursor) -> Result { 231 | Ok(self 232 | .operands 233 | .iter() 234 | .map(|opnd| opnd.parse(cur)) 235 | .collect::, _>>()? 236 | .into_iter() 237 | .flatten() 238 | .collect()) 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /llvm-bitstream/src/error.rs: -------------------------------------------------------------------------------- 1 | //! Errors for `llvm-bitstream`. 2 | 3 | use llvm_bitcursor::error::Error as CursorError; 4 | use llvm_support::bitcodes::{AbbrevOpEnc, BlockInfoCode}; 5 | use num_enum::TryFromPrimitiveError; 6 | use thiserror::Error as ThisError; 7 | 8 | /// All possible errors that can occur while parsing a bitstream. 9 | #[derive(Debug, ThisError)] 10 | pub enum Error { 11 | /// The underlying bitstream has no more data to parse. 12 | #[error("bitstream has been exhausted")] 13 | Exhausted, 14 | /// The underlying [`BitCursor`](llvm_bitcursor::BitCursor) returned an error 15 | /// that we couldn't specialize. 16 | #[error("underlying bitcursor error")] 17 | Cursor(#[from] CursorError), 18 | /// We couldn't parse the wrapper structure or other data that precedes the actual bitstream. 19 | #[error("couldn't parse bitstream container: {0}")] 20 | BadContainer(String), 21 | /// A record in the `BLOCKINFO` block has a code that we don't know. 22 | /// `BLOCKINFO` must be fully interpreted in order to correctly parse the remainder of 23 | /// the bitstream, so this is a hard error. 24 | #[error("bad record code for BLOCKINFO block")] 25 | BadBlockInfoCode(#[from] TryFromPrimitiveError), 26 | /// An operand in a `DEFINE_ABBREV` definition has a code that we don't know. 27 | /// This indicates either a malformed bitstream or a new operand format that 28 | /// we don't yet support, so it's a hard error. 29 | #[error("bad operand code for DEFINE_ABBREV operand")] 30 | BadAbbrevOpEnc(#[from] TryFromPrimitiveError), 31 | /// A generic error occurred while parsing the bitstream. 32 | #[error("error while parsing stream: {0}")] 33 | StreamParse(String), 34 | /// An error occurred while interpreting a `DEFINE_ABBREV` record. 35 | #[error("error while parsing abbrev record: {0}")] 36 | AbbrevParse(String), 37 | /// An error occurred while mapping an abbreviated record back to its abbreviation definition. 38 | #[error("unknown abbreviation for ID: {0}")] 39 | BadAbbrev(u64), 40 | /// An error occurred during block scope entrance or exit. 41 | #[error("error while parsing block scope: {0}")] 42 | BadScope(String), 43 | } 44 | -------------------------------------------------------------------------------- /llvm-bitstream/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `llvm-bitstream` is a library for interpreting files in LLVM's 2 | //! [bitstream format](https://llvm.org/docs/BitCodeFormat.html). 3 | 4 | #![deny(rustdoc::broken_intra_doc_links)] 5 | #![deny(missing_docs)] 6 | #![allow(clippy::redundant_field_names)] 7 | #![forbid(unsafe_code)] 8 | 9 | pub mod abbrev; 10 | pub mod error; 11 | pub mod parser; 12 | pub mod record; 13 | 14 | use std::io::{Seek, SeekFrom}; 15 | 16 | use llvm_bitcursor::BitCursor; 17 | use llvm_support::BITCODE_WRAPPER_MAGIC; 18 | 19 | use crate::error::Error; 20 | use crate::parser::StreamEntry; 21 | 22 | /// A representation of the wrapper structure for a bitstream. 23 | #[derive(Debug)] 24 | pub struct BitcodeWrapper { 25 | /// The magic for this wrapper. 26 | pub magic: u32, 27 | /// The version for this wrapper. 28 | pub version: u32, 29 | /// The offset to the actual bitstream. 30 | pub offset: u32, 31 | /// The size of the wrapped bitstream. 32 | pub size: u32, 33 | /// A target-specific value that encodes the CPU type. 34 | pub cpu_type: u32, 35 | } 36 | 37 | /// Represents an overarching bitstream container. 38 | /// 39 | /// This struct is responsible for managing two pieces of state: 40 | /// 1. The application-specific magic that identifies the input 41 | /// 2. An underlying [`StreamParser`](crate::parser::StreamParser) that can 42 | /// be advanced to produce individual blocks and records within the bitstream. 43 | #[derive(Debug)] 44 | pub struct Bitstream> { 45 | /// The application-specific magic associated with this bitstream. 46 | pub magic: u32, 47 | parser: parser::StreamParser, 48 | } 49 | 50 | impl> Bitstream { 51 | fn from_cursor(mut cur: BitCursor) -> Result { 52 | // This isn't documented anywhere, but LLVM's BitcodeReader requires 53 | // all inputs to be 4-byte aligned. 54 | // See: `llvm::initStream` in `Bitcode/Reader/BitcodeReader.cpp`. 55 | if cur.byte_len() % 4 != 0 { 56 | return Err(Error::BadContainer("input is not 4-byte aligned".into())); 57 | } 58 | 59 | // Every bitstream starts with an aligned, 32-bit magic field. 60 | // There's absolutely no point in continuing the parse if we fail here. 61 | Ok(Self { 62 | magic: cur.read_exact::().map_err(|e| { 63 | Error::BadContainer(format!( 64 | "bitstream should have begun with magic, but errored: {:?}", 65 | e 66 | )) 67 | })?, 68 | parser: parser::StreamParser::new(cur), 69 | }) 70 | } 71 | 72 | /// Intelligently create a new `Bitstream` from the given source, parsing 73 | /// the bitcode wrapper if necessary. 74 | pub fn from(inner: T) -> Result<(Option, Self), Error> { 75 | log::debug!("beginning intelligent parse"); 76 | let mut cur = BitCursor::new(&inner); 77 | 78 | // Read the magic to determine which parse strategy to use. 79 | let magic = cur.read_exact::()?; 80 | 81 | // The only wrapper we currently know is the bitcode wrapper. 82 | // If our magic doesn't match that, then we try the raw parser. 83 | if magic == BITCODE_WRAPPER_MAGIC { 84 | log::debug!("input looks like a bitcode wrapper!"); 85 | let (wrapper, parser) = Self::from_wrapped(inner)?; 86 | Ok((Some(wrapper), parser)) 87 | } else { 88 | log::debug!("input is probably a raw bitstream!"); 89 | Ok((None, Self::from_raw(inner)?)) 90 | } 91 | } 92 | 93 | /// Create a new `Bitstream` from the given source. 94 | /// 95 | /// **NOTE**: This function assumes that it's being given a "raw" bitstream, 96 | /// i.e. not one that's been wrapped with another container (such as the 97 | /// bitcode wrapper format). To parse a wrapped bitstream, use the 98 | /// [`from_wrapped`](Bitstream::from_wrapped) API. 99 | pub fn from_raw(inner: T) -> Result { 100 | let cur = BitCursor::new(inner); 101 | Self::from_cursor(cur) 102 | } 103 | 104 | /// Create a new `Bitstream` from the given wrapped source. 105 | /// 106 | /// The source is parsed as if it begins with a 107 | /// [bitcode wrapper](https://llvm.org/docs/BitCodeFormat.html#bitcode-wrapper-format). 108 | /// "Raw" inputs should be parsed with [`from_raw`](Bitstream::from_raw) instead. 109 | pub fn from_wrapped(inner: T) -> Result<(BitcodeWrapper, Self), Error> { 110 | let mut cur = BitCursor::new(&inner); 111 | 112 | let wrapper = BitcodeWrapper { 113 | magic: cur.read_exact::()?, 114 | version: cur.read_exact::()?, 115 | offset: cur.read_exact::()?, 116 | size: cur.read_exact::()?, 117 | cpu_type: cur.read_exact::()?, 118 | }; 119 | 120 | // NOTE(ww): The `new_with_len` API is a little bit silly -- ideally we'd just 121 | // take a slice of `inner` and create a new `BitCursor` with it, but we can't do 122 | // that while preserving the generic `T` bound. 123 | // The manual fixup (+ 20) is another artifact of this -- we keep the wrapper header 124 | // in the new cursor to make the offsets more intelligible, which means that we 125 | // also need to extend the end of our cursor's buffer. 126 | let actual_length = (wrapper.size as usize) + 20; 127 | let mut cur = BitCursor::new_with_len(inner, actual_length)?; 128 | 129 | cur.seek(SeekFrom::Start(wrapper.offset.into())) 130 | .map_err(|e| { 131 | Error::StreamParse(format!("couldn't seek past bitcode wrapper: {:?}", e)) 132 | })?; 133 | Ok((wrapper, Self::from_cursor(cur)?)) 134 | } 135 | 136 | /// Advance the underlying bitstream parser by one entry. 137 | /// 138 | /// NOTE: Most users should prefer the iterator implementation. 139 | pub fn advance(&mut self) -> Result { 140 | self.parser.advance() 141 | } 142 | } 143 | 144 | impl> Iterator for Bitstream { 145 | type Item = Result; 146 | 147 | fn next(&mut self) -> Option { 148 | match self.advance() { 149 | Ok(entry) => Some(Ok(entry)), 150 | Err(Error::Exhausted) => None, 151 | Err(e) => Some(Err(e)), 152 | } 153 | } 154 | } 155 | 156 | #[cfg(test)] 157 | mod tests {} 158 | -------------------------------------------------------------------------------- /llvm-bitstream/src/parser.rs: -------------------------------------------------------------------------------- 1 | //! Core parsing functionality for `llvm-bitstream`. 2 | 3 | use std::collections::HashMap; 4 | use std::convert::TryInto; 5 | use std::iter; 6 | 7 | use llvm_bitcursor::BitCursor; 8 | use llvm_support::bitcodes::{BlockInfoCode, ReservedAbbrevId, ReservedBlockId}; 9 | use llvm_support::{FIRST_APPLICATION_ABBREV_ID, INITIAL_ABBREV_ID_WIDTH}; 10 | 11 | use crate::abbrev::{self, AbbrevId}; 12 | use crate::error::Error; 13 | use crate::record::{Block, Fields, Record}; 14 | 15 | /// The kinds of entries we can see while advancing through the bitstream. 16 | /// Abbreviations are handled transparently by the parser, and thus are 17 | /// never surfaced as `StreamEntry` values. 18 | #[derive(Debug)] 19 | pub enum StreamEntry { 20 | /// The end of a block scope. 21 | EndBlock, 22 | /// The beginning of a new block scope, for a block with the given ID. 23 | SubBlock(Block), 24 | /// The beginning of a new record within the current scope, with the given 25 | /// abbreviation ID. 26 | Record(Record), 27 | } 28 | 29 | impl StreamEntry { 30 | /// Consumes this `StreamEntry` and returns its inner [Block](crate::record::Block), if it is 31 | /// in fact a block. 32 | /// 33 | /// If the entry is not a block, returns `None. 34 | pub fn as_block(self) -> Option { 35 | match self { 36 | StreamEntry::SubBlock(block) => Some(block), 37 | _ => None, 38 | } 39 | } 40 | } 41 | 42 | /// Represents the necessary parse state for a particular scope in the bitstream. 43 | /// 44 | /// Note that a scope does not *necessarily* correspond to a block: every 45 | /// parser begins with an initial non-block scope before the first block is encountered. 46 | #[derive(Debug)] 47 | enum Scope { 48 | Initial, 49 | Block { 50 | abbrev_id_width: u64, 51 | block_id: u64, 52 | blockinfo_block_id: Option, 53 | abbrevs: Vec, 54 | }, 55 | } 56 | 57 | impl Default for Scope { 58 | fn default() -> Self { 59 | Self::Initial 60 | } 61 | } 62 | 63 | impl Scope { 64 | /// Returns a new (block) scope. 65 | pub(self) fn new(abbrev_id_width: u64, block_id: u64) -> Self { 66 | Self::Block { 67 | abbrev_id_width: abbrev_id_width, 68 | block_id: block_id, 69 | blockinfo_block_id: None, 70 | abbrevs: vec![], 71 | } 72 | } 73 | 74 | /// Returns the current width used for abbreviation IDs. 75 | pub(self) fn abbrev_id_width(&self) -> u64 { 76 | match self { 77 | Scope::Initial => INITIAL_ABBREV_ID_WIDTH, 78 | Scope::Block { 79 | abbrev_id_width, .. 80 | } => *abbrev_id_width, 81 | } 82 | } 83 | 84 | /// Extend the current (block) scope's abbreviation definition list with the given 85 | /// iterator. 86 | /// 87 | /// Returns an error if used on a non-block scope. 88 | pub(self) fn extend_abbrevs( 89 | &mut self, 90 | new_abbrevs: impl iter::IntoIterator, 91 | ) -> Result<(), Error> { 92 | match self { 93 | Scope::Initial => Err(Error::BadScope( 94 | "non-block scope cannot reference abbreviations".into(), 95 | )), 96 | Scope::Block { abbrevs, .. } => { 97 | abbrevs.extend(new_abbrevs); 98 | Ok(()) 99 | } 100 | } 101 | } 102 | 103 | /// Return a reference to the abbreviation definition with the given `abbrev_id`. 104 | /// 105 | /// Returns an error if the scope cannot contain abbreviation definitions or does 106 | /// not have one for the given ID. 107 | pub(self) fn get_abbrev(&self, abbrev_id: u64) -> Result<&abbrev::Abbrev, Error> { 108 | match self { 109 | Scope::Initial => Err(Error::BadScope( 110 | "non-block scope cannot contain records".into(), 111 | )), 112 | Scope::Block { abbrevs, .. } => { 113 | let idx = (abbrev_id as usize) - FIRST_APPLICATION_ABBREV_ID; 114 | abbrevs.get(idx).ok_or(Error::BadAbbrev(abbrev_id)) 115 | } 116 | } 117 | } 118 | 119 | /// Returns `true` if this scope corresponds to a `BLOCKINFO` block. 120 | /// 121 | /// This keeps the [`StreamParser`](StreamParser) honest when determining 122 | /// which blocks and/or records to emit entries for. 123 | pub(self) fn is_blockinfo(&self) -> bool { 124 | match self { 125 | Scope::Initial => false, 126 | Scope::Block { block_id, .. } => *block_id == ReservedBlockId::BlockInfo as u64, 127 | } 128 | } 129 | 130 | /// Returns the last block ID recorded with `SETBID` in the `BLOCKINFO` block. 131 | /// 132 | /// This function's return is only sensible in the context of a scope corresponding 133 | /// to `BLOCKINFO`. Use on any other scope constitutes API misuse. 134 | pub(self) fn blockinfo_block_id(&self) -> Option { 135 | match self { 136 | Scope::Initial => None, 137 | Scope::Block { 138 | blockinfo_block_id, .. 139 | } => *blockinfo_block_id, 140 | } 141 | } 142 | 143 | /// Sets the current block ID for the `BLOCKINFO` block's state machine. 144 | /// 145 | /// Returns an error if requested in a nonsense context, such as on any 146 | /// non-`BLOCKINFO` scope. 147 | pub(self) fn set_blockinfo_block_id(&mut self, new_bid: u64) -> Result<(), Error> { 148 | if let Scope::Block { 149 | blockinfo_block_id, .. 150 | } = self 151 | { 152 | *blockinfo_block_id = Some(new_bid); 153 | return Ok(()); 154 | } 155 | 156 | Err(Error::BadScope( 157 | "can't set BLOCKINFO block ID for non-BLOCKINFO scope".into(), 158 | )) 159 | } 160 | } 161 | 162 | /// A parser for individual bitstream entries. 163 | /// 164 | /// This structure is **not** a general-purpose parser for bitstream inputs: 165 | /// it expects to be given a prepared [`BitCursor`](BitCursor) whose internal 166 | /// state is correct (i.e., has been advanced past the initial input magic). 167 | /// 168 | /// For a general-purpose parser with the correct state management, see 169 | /// [`Bitstream`](crate::Bitstream). 170 | #[derive(Debug)] 171 | pub struct StreamParser> { 172 | cursor: BitCursor, 173 | scopes: Vec, 174 | blockinfo: HashMap>, 175 | } 176 | 177 | impl> StreamParser { 178 | /// Create a new `StreamParser` from the given `BitCursor`. 179 | /// 180 | /// See the struct-level documentation for caveats. 181 | pub(crate) fn new(cur: BitCursor) -> Self { 182 | Self { 183 | cursor: cur, 184 | scopes: vec![Scope::default()], 185 | blockinfo: Default::default(), 186 | } 187 | } 188 | 189 | /// Returns the current scope. 190 | fn scope(&self) -> &Scope { 191 | // Unwrap safety: `scopes` is always created with at least one scope, so 192 | // `last()` cannot fail. 193 | #[allow(clippy::unwrap_used)] 194 | self.scopes.last().unwrap() 195 | } 196 | 197 | /// Returns the current scope as a mutable reference. 198 | fn scope_mut(&mut self) -> &mut Scope { 199 | // Unwrap safety: `scopes` is always created with at least one scope, so 200 | // `last()` cannot fail. 201 | #[allow(clippy::unwrap_used)] 202 | self.scopes.last_mut().unwrap() 203 | } 204 | 205 | /// Enter a block, creating the appropriate scope state for interpreting 206 | /// records within the block. 207 | /// 208 | /// If this block is a "metadata" one (e.g., `BLOCKINFO`), returns `None`. 209 | fn enter_block(&mut self) -> Result, Error> { 210 | let block_id = self.cursor.read_vbr(8)?; 211 | let new_width = self.cursor.read_vbr(4)?; 212 | 213 | self.cursor.align32(); 214 | 215 | if new_width < 1 { 216 | return Err(Error::BadScope(format!( 217 | "can't enter block: invalid code side: {}", 218 | new_width 219 | ))); 220 | } 221 | 222 | // The encoded block length is measured in 32-bit words, so our 223 | // actual block length in bytes is the word count times the bytesize 224 | // of each word. 225 | let block_len = self.cursor.read(32)? * 4; 226 | log::debug!( 227 | "entered block: ID={}, new abbrev width={}, block_len={} @ bit position {}", 228 | block_id, 229 | new_width, 230 | block_len, 231 | self.cursor.tell_bit() 232 | ); 233 | 234 | // Create a new scope for the block we've just entered. 235 | self.scopes.push(Scope::new(new_width, block_id)); 236 | 237 | // If our blockinfo map contains any abbrevs for the current block ID, add them here. 238 | if let Some(abbrevs) = self.blockinfo.get(&block_id).map(|a| a.to_vec()) { 239 | self.scope_mut().extend_abbrevs(abbrevs)?; 240 | } 241 | 242 | // If we've just entered a BLOCKINFO block, return `None` to avoid 243 | // surfacing parse details to the `advance()` API. 244 | if self.scope().is_blockinfo() { 245 | return Ok(None); 246 | } 247 | 248 | // Otherwise, return an appropriate entry. 249 | Ok(Some(StreamEntry::SubBlock(Block { 250 | block_id: block_id, 251 | len: block_len, 252 | }))) 253 | } 254 | 255 | /// Exit a block, returning the scope to the appropriate state for the parent block. 256 | fn exit_block(&mut self) -> Result, Error> { 257 | // An END_BLOCK record just aligns the stream. 258 | self.cursor.align32(); 259 | 260 | // NOTE(ww): We never allow an END_BLOCK to pop the last scope, 261 | // since the last scope is synthetic and does not correspond to a real block. 262 | if self.scopes.len() <= 1 { 263 | return Err(Error::BadScope( 264 | "malformed stream: cannot perform END_BLOCK because scope stack is empty".into(), 265 | )); 266 | } 267 | 268 | // Unwrap safety: we check for at least one scope above, so this cannot fail. 269 | #[allow(clippy::unwrap_used)] 270 | let scope = self.scopes.pop().unwrap(); 271 | 272 | log::debug!("exit_block: new active scope is {:?}", self.scope()); 273 | 274 | // If we're exiting a BLOCKINFO, we have nothing to return. 275 | if scope.is_blockinfo() { 276 | return Ok(None); 277 | } 278 | 279 | Ok(Some(StreamEntry::EndBlock)) 280 | } 281 | 282 | /// Interpret a `DEFINE_ABBREV` record. 283 | fn define_abbrev(&mut self) -> Result<(), Error> { 284 | let abbrev = abbrev::Abbrev::new(&mut self.cursor)?; 285 | log::debug!("new abbrev: {:?}", abbrev); 286 | 287 | // `DEFINE_ABBREV` occurs in two contexts: either in a `BLOCKINFO` 288 | // block (where it affects all blocks with block ID defined by the current `SETBID`), 289 | // or in any other block, where it affects only the current scope. 290 | // For the latter case we assume that any `BLOCKINFO`-defined abbrevs have 291 | // already been loaded into the current scope. 292 | if self.scope().is_blockinfo() { 293 | let block_id = self.scope().blockinfo_block_id().ok_or_else(|| { 294 | Error::StreamParse("DEFINE_ABBREV in BLOCKINFO but no preceding SETBID".into()) 295 | })?; 296 | self.blockinfo 297 | .entry(block_id) 298 | .or_insert_with(Vec::new) 299 | .push(abbrev); 300 | } else { 301 | self.scope_mut().extend_abbrevs(iter::once(abbrev))?; 302 | } 303 | 304 | Ok(()) 305 | } 306 | 307 | /// Interpret an `UNABBREV_RECORD` record. 308 | fn parse_unabbrev(&mut self) -> Result, Error> { 309 | // Sanity check: `UNABBREV_RECORD` can only occur inside a block, 310 | // so the current scope must be a block. 311 | if matches!(self.scope(), Scope::Initial) { 312 | return Err(Error::StreamParse( 313 | "UNABBREV_RECORD outside of any block scope".into(), 314 | )); 315 | } 316 | 317 | // An unabbrev record looks like this: 318 | // [code:VBR6, numops:VBR6, op0:VBR6, op1:VBR6, ...] 319 | // This isn't worth generalizing, so do it all in the body here. 320 | let code: u64 = self.cursor.read_vbr(6)?; 321 | let num_opnds = self.cursor.read_vbr(6)?; 322 | 323 | log::debug!("unabbrev record code={}, num_opnds={}", code, num_opnds); 324 | 325 | let mut fields: Fields = Vec::with_capacity(num_opnds as usize); 326 | for _ in 0..num_opnds { 327 | fields.push(self.cursor.read_vbr(6)?); 328 | } 329 | 330 | let record = Record::from_unabbrev(code, fields); 331 | if self.scope().is_blockinfo() { 332 | let code: BlockInfoCode = record.code.try_into()?; 333 | match code { 334 | BlockInfoCode::SetBid => { 335 | let block_id: u64 = record.fields[0]; 336 | log::debug!("SETBID: BLOCKINFO block ID is now {}", block_id); 337 | self.scope_mut().set_blockinfo_block_id(block_id)?; 338 | } 339 | BlockInfoCode::BlockName => log::debug!("skipping BLOCKNAME code in BLOCKINFO"), 340 | BlockInfoCode::SetRecordName => { 341 | log::debug!("skipping SETRECORDNAME code in BLOCKINFO") 342 | } 343 | o => log::debug!("skipping unsupported record {:?} in BLOCKINFO", o), 344 | }; 345 | return Ok(None); 346 | } 347 | 348 | Ok(Some(StreamEntry::Record(record))) 349 | } 350 | 351 | /// Interpret a record using its corresponding abbreviation definition. 352 | fn parse_with_abbrev(&mut self, abbrev_id: u64) -> Result, Error> { 353 | // To parse a record according to an abbreviation definition, we 354 | // fetch the corresponding abbreviation (failing if we don't have one), 355 | // then use the abbreviation for the parse. 356 | // TODO(ww): The clone at the end here is a little annoying, but we 357 | // need it to avoid mixing mutable and immutable borrows here. 358 | // There is absolutely a better way to do that. 359 | let abbrev = self.scope().get_abbrev(abbrev_id)?.clone(); 360 | 361 | let mut fields = abbrev.parse(&mut self.cursor)?; 362 | log::debug!("parsed fields: {:?}", fields); 363 | 364 | // Panic safety: every abbrev contains at least one operand, so this cannot panic. 365 | // We also expect the first operand to always be a u64, indicating the record code. 366 | let code: u64 = fields.remove(0); 367 | 368 | if self.scope().is_blockinfo() { 369 | return Ok(None); 370 | } 371 | 372 | Ok(Some(StreamEntry::Record(Record { 373 | abbrev_id: Some(abbrev_id), 374 | code: code, 375 | fields: fields, 376 | }))) 377 | } 378 | 379 | /// Return the next [`StreamEntry`](StreamEntry) in this bitstream. 380 | /// 381 | /// Returns an error on any parsing error, *or* the special 382 | /// [`Error::Exhausted`](Error::Exhausted) if the bitstream has 383 | /// been fully consumed. 384 | pub fn advance(&mut self) -> Result { 385 | if self.cursor.exhausted() { 386 | return Err(Error::Exhausted); 387 | } 388 | 389 | log::debug!( 390 | "advancing, current scope: {:?} @ bit position {}", 391 | self.scope(), 392 | self.cursor.tell_bit() 393 | ); 394 | 395 | // To return the next stream entry, we read the next abbreviation ID using 396 | // our current width. The abbreviation ID we read determines our subsequent 397 | // parse strategy and the kind of entry we return. 398 | let id: abbrev::AbbrevId = self 399 | .cursor 400 | .read(self.scope().abbrev_id_width() as usize)? 401 | .into(); 402 | log::debug!("next entry ID: {:?}", id); 403 | 404 | // NOTE(ww): The strange `map` + `unwrap_or_else` pattern below is to keep the parser 405 | // generalized without having to return `StreamEntries` that correspond to 406 | // parse details that a stream consumer shouldn't have to be aware of 407 | // (such as abbrev definitions and the BLOCKINFO block). 408 | match id { 409 | AbbrevId::Reserved(ReservedAbbrevId::EndBlock) => { 410 | self.exit_block()?.map(Ok).unwrap_or_else(|| self.advance()) 411 | } 412 | AbbrevId::Reserved(ReservedAbbrevId::EnterSubBlock) => self 413 | .enter_block()? 414 | .map(Ok) 415 | .unwrap_or_else(|| self.advance()), 416 | AbbrevId::Reserved(ReservedAbbrevId::DefineAbbrev) => { 417 | // DEFINE_ABBREV is always a parse detail, so we don't even bother 418 | // trying to return a StreamEntry for it. 419 | self.define_abbrev()?; 420 | self.advance() 421 | } 422 | AbbrevId::Reserved(ReservedAbbrevId::UnabbrevRecord) => self 423 | .parse_unabbrev()? 424 | .map(Ok) 425 | .unwrap_or_else(|| self.advance()), 426 | AbbrevId::Defined(abbrev_id) => self 427 | .parse_with_abbrev(abbrev_id)? 428 | .map(Ok) 429 | .unwrap_or_else(|| self.advance()), 430 | } 431 | } 432 | } 433 | -------------------------------------------------------------------------------- /llvm-bitstream/src/record.rs: -------------------------------------------------------------------------------- 1 | //! Record parsing and handling functionality for `llvm-bitstream`. 2 | 3 | /// A convenience alias for the fields of a record. 4 | pub type Fields = Vec; 5 | 6 | /// Represents a single bitstream record. 7 | #[derive(Clone, Debug)] 8 | pub struct Record { 9 | /// The abbreviation ID that was used to parse this record, or `None` if 10 | /// this record was parsed from an `UNABBREV_RECORD` encoding. 11 | pub abbrev_id: Option, 12 | 13 | /// The code that identifies the record's kind. 14 | pub code: u64, 15 | 16 | /// The fields of this record. 17 | pub fields: Fields, 18 | } 19 | 20 | impl Record { 21 | /// Creates a new `Record` from the given code and fields. 22 | pub fn from_unabbrev(code: u64, fields: Fields) -> Self { 23 | Self { 24 | abbrev_id: None, 25 | code: code, 26 | fields: fields, 27 | } 28 | } 29 | 30 | /// Creates a new `Record` from the given abbreviation ID, code, and fields. 31 | pub fn from_abbrev(abbrev_id: u64, code: u64, fields: Fields) -> Self { 32 | Self { 33 | abbrev_id: Some(abbrev_id), 34 | code: code, 35 | fields: fields, 36 | } 37 | } 38 | } 39 | 40 | /// Represents a single block scope in the bitstream. 41 | #[derive(Debug)] 42 | pub struct Block { 43 | /// The ID of the block. 44 | pub block_id: u64, 45 | /// The length of the block, in bytes. Blocks are always 32-bit-word-aligned. 46 | pub len: u64, 47 | } 48 | -------------------------------------------------------------------------------- /llvm-constants/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "llvm-constants" 3 | description = "Numeric and enum constants for interacting with LLVM bitstreams and IR" 4 | license = "MIT" 5 | homepage = "https://github.com/woodruffw/mollusc/tree/main/llvm-constants" 6 | repository = "https://github.com/woodruffw/mollusc" 7 | authors = ["William Woodruff "] 8 | readme = "README.md" 9 | keywords = ["llvm", "parsing", "binary", "encoding"] 10 | categories = ["compilers", "encoding", "parsing"] 11 | edition = "2018" 12 | version = "0.0.2" 13 | 14 | [dependencies] 15 | num_enum = "0.5.3" 16 | -------------------------------------------------------------------------------- /llvm-constants/README.md: -------------------------------------------------------------------------------- 1 | llvm-constants 2 | ============== 3 | 4 | [![Crates.io](https://img.shields.io/crates/v/llvm-constants)](https://crates.io/crates/llvm-constants) 5 | [![Documentation](https://docs.rs/llvm-constants/badge.svg)](https://docs.rs/llvm-constants) 6 | 7 | Numeric and enum constants for interpreting LLVM bitstreams and IR. 8 | 9 | This library contains only definitions; it is not useful on its own. 10 | -------------------------------------------------------------------------------- /llvm-constants/build.rs: -------------------------------------------------------------------------------- 1 | // This is an ugly little hack to get access to a reasonable "default" 2 | // target triple when loading bitcode inputs that don't mention their triple. 3 | // Based on: https://stackoverflow.com/a/51311222 4 | // Unwrap safety: None. If this fails, the build fails, and that's intended. 5 | #[allow(clippy::unwrap_used)] 6 | fn main() { 7 | println!( 8 | "cargo:rustc-env=TARGET_TRIPLE={}", 9 | std::env::var("TARGET").unwrap() 10 | ); 11 | } 12 | -------------------------------------------------------------------------------- /llvm-constants/src/constants.rs: -------------------------------------------------------------------------------- 1 | //! Numeric constants for `llvm-constants`. 2 | 3 | /// The 32-bit magic that indicates a raw LLVM IR bitcode stream. 4 | pub const LLVM_IR_MAGIC: u32 = 0xdec04342; 5 | 6 | /// The 32-bit magic that indicates a bitcode wrapper, which in 7 | /// turn points to the start of the actual bitcode stream. 8 | pub const BITCODE_WRAPPER_MAGIC: u32 = 0x0b17c0de; 9 | 10 | /// The initial abbreviation ID width in a bitstream. 11 | pub const INITIAL_ABBREV_ID_WIDTH: u64 = 2; 12 | 13 | /// All abbreviation IDs before this are defined by the bitstream format, 14 | /// rather than the stream itself. 15 | pub const FIRST_APPLICATION_ABBREV_ID: usize = 4; 16 | 17 | /// All block IDs before this have their semantics defined by the bitstream 18 | /// format, rather than the stream itself. 19 | pub const FIRST_APPLICATION_BLOCK_ID: u64 = 8; 20 | 21 | /// The lookup alphabet for the Char6 operand encoding. 22 | pub const CHAR6_ALPHABET: &[u8] = 23 | b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"; 24 | 25 | /// The current toolchain's target triple. 26 | pub const TARGET_TRIPLE: &str = env!("TARGET_TRIPLE"); 27 | 28 | #[cfg(test)] 29 | mod tests { 30 | use super::*; 31 | 32 | #[test] 33 | fn test_target_triple() { 34 | assert!(!TARGET_TRIPLE.is_empty()); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /llvm-constants/src/enums.rs: -------------------------------------------------------------------------------- 1 | //! Enum constants for `llvm-constants`. 2 | 3 | use num_enum::{IntoPrimitive, TryFromPrimitive}; 4 | 5 | use crate::constants::FIRST_APPLICATION_BLOCK_ID; 6 | 7 | /// Block IDs that are reserved by LLVM. 8 | // NOTE(ww): Block IDs 0 through 7 are reserved, but only 0 (BLOCKINFO) 9 | // is actually currently used. 10 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, TryFromPrimitive)] 11 | #[repr(u64)] 12 | pub enum ReservedBlockId { 13 | /// The `BLOCKINFO` block ID. 14 | BlockInfo = 0, 15 | /// Reserved; no semantics. 16 | Reserved1 = 1, 17 | /// Reserved; no semantics. 18 | Reserved2 = 2, 19 | /// Reserved; no semantics. 20 | Reserved3 = 3, 21 | /// Reserved; no semantics. 22 | Reserved4 = 4, 23 | /// Reserved; no semantics. 24 | Reserved5 = 5, 25 | /// Reserved; no semantics. 26 | Reserved6 = 6, 27 | /// Reserved; no semantics. 28 | Reserved7 = 7, 29 | } 30 | 31 | /// Block IDs that are used by LLVM for bitcode (i.e., IR bitstreams). 32 | /// See: `enum BlockIDs` in `Bitcode/LLVMBitCodes.h`, 33 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, TryFromPrimitive)] 34 | #[repr(u64)] 35 | pub enum IrBlockId { 36 | /// `MODULE_BLOCK_ID` 37 | Module = FIRST_APPLICATION_BLOCK_ID, 38 | /// `PARAM_ATTR_BLOCK_ID` 39 | ParamAttr, 40 | /// `PARAM_ATTR_GROUP_BLOCK_ID` 41 | ParamAttrGroup, 42 | /// `CONSTANTS_BLOCK_ID` 43 | Constants, 44 | /// `FUNCTION_BLOCK_ID` 45 | Function, 46 | /// `IDENTIFICATION_BLOCK_ID`. 47 | Identification, 48 | /// `VALUE_SYMTAB_BLOCK_ID`. 49 | ValueSymtab, 50 | /// `METADATA_BLOCK_ID`. 51 | Metadata, 52 | /// `METADATA_ATTACHMENT_BLOCK_ID`. 53 | MetadataAttachment, 54 | /// `TYPE_BLOCK_ID_NEW`. 55 | Type, 56 | /// `USELIST_BLOCK_ID`. 57 | Uselist, 58 | /// `MODULE_STRTAB_BLOCK_ID`. 59 | ModuleStrtab, 60 | /// `GLOBAL_VAL_SUMMARY_BLOCK_ID`. 61 | GlobalValSummary, 62 | /// `OPERAND_BUNDLE_TAGS_BLOCK_ID`. 63 | OperandBundleTags, 64 | /// `METADATA_KIND_BLOCK_ID`. 65 | MetadataKind, 66 | /// `STRTAB_BLOCK_ID`. 67 | Strtab, 68 | /// `FULL_LTO_GLOBAL_VAL_SUMMARY_BLOCK_ID`. 69 | FullLtoGlobalValSummary, 70 | /// `SYMTAB_BLOCK_ID`. 71 | Symtab, 72 | /// `SYNC_SCOPE_NAMES_BLOCK_ID`. 73 | SyncScopeNames, 74 | } 75 | 76 | /// Abbreviation IDs that are reserved by LLVM. 77 | #[derive(Clone, Copy, Debug, PartialEq, TryFromPrimitive)] 78 | #[repr(u64)] 79 | pub enum ReservedAbbrevId { 80 | /// Identifies an `END_BLOCK` record. 81 | EndBlock = 0, 82 | /// Identifies an `ENTER_SUBBLOCK` record. 83 | EnterSubBlock, 84 | /// Identifies a `DEFINE_ABBREV` record. 85 | DefineAbbrev, 86 | /// Identifies an `UNABBREV_RECORD` record. 87 | UnabbrevRecord, 88 | } 89 | 90 | /// Codes for each operand encoding type supported by `DEFINE_ABBREV`. 91 | #[derive(Clone, Copy, Debug, PartialEq, TryFromPrimitive)] 92 | #[repr(u64)] 93 | pub enum AbbrevOpEnc { 94 | /// A fixed-length, unsigned operand. 95 | Fixed = 1, 96 | /// A variable-length, unsigned operand. 97 | Vbr, 98 | /// An array of values. 99 | Array, 100 | /// A single 6-bit-encoded character. 101 | Char6, 102 | /// A blob of bytes. 103 | Blob, 104 | } 105 | 106 | /// Calling conventions supported by LLVM. 107 | #[non_exhaustive] 108 | #[derive(Debug, PartialEq, TryFromPrimitive)] 109 | #[repr(u64)] 110 | #[allow(missing_docs)] 111 | pub enum CallingConvention { 112 | C = 0, 113 | Fast = 8, 114 | Cold = 9, 115 | GHC = 10, 116 | HiPE = 11, 117 | WebKitJS = 12, 118 | AnyReg = 13, 119 | PreserveMost = 14, 120 | PreserveAll = 15, 121 | Swift = 16, 122 | CXXFASTTLS = 17, 123 | X86Stdcall = 64, 124 | X86Fastcall = 65, 125 | ARMAPCS = 66, 126 | ARMAAPCS = 67, 127 | ARMAAPCSVFP = 68, 128 | MSP430INTR = 69, 129 | X86ThisCall = 70, 130 | PTXKernel = 71, 131 | PTXDevice = 72, 132 | SPIRFUNC = 75, 133 | SPIRKERNEL = 76, 134 | IntelOCLBI = 77, 135 | X8664SysV = 78, 136 | Win64 = 79, 137 | X86VectorCall = 80, 138 | HHVM = 81, 139 | HHVMC = 82, 140 | X86INTR = 83, 141 | AVRINTR = 84, 142 | AVRSIGNAL = 85, 143 | AVRBUILTIN = 86, 144 | AMDGPUVS = 87, 145 | AMDGPUGS = 88, 146 | AMDGPUPS = 89, 147 | AMDGPUCS = 90, 148 | AMDGPUKERNEL = 91, 149 | X86RegCall = 92, 150 | AMDGPUHS = 93, 151 | MSP430BUILTIN = 94, 152 | AMDGPULS = 95, 153 | AMDGPUES = 96, 154 | } 155 | 156 | /// Codes for each `UNABBREV_RECORD` in `BLOCKINFO`. 157 | #[non_exhaustive] 158 | #[derive(Debug, PartialEq, TryFromPrimitive)] 159 | #[repr(u64)] 160 | pub enum BlockInfoCode { 161 | /// SETBID: `[blockid]` 162 | SetBid = 1, 163 | /// BLOCKNAME: `[...name...]` 164 | BlockName, 165 | /// SETRECORDNAME: `[recordid, ...name...]` 166 | SetRecordName, 167 | } 168 | 169 | /// Codes for each record in `IDENTIFICATION_BLOCK`. 170 | #[non_exhaustive] 171 | #[derive(Debug, PartialEq, TryFromPrimitive)] 172 | #[repr(u64)] 173 | pub enum IdentificationCode { 174 | /// IDENTIFICATION_CODE_STRING: `[...string...]` 175 | ProducerString = 1, 176 | /// IDENTIFICATION_CODE_EPOCH: `[epoch]` 177 | Epoch, 178 | } 179 | 180 | /// Codes for each record in `MODULE_BLOCK`. 181 | #[non_exhaustive] 182 | #[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)] 183 | #[repr(u64)] 184 | pub enum ModuleCode { 185 | /// MODULE_CODE_VERSION: `[version#]` 186 | Version = 1, 187 | /// MODULE_CODE_TRIPLE: `[...string...]` 188 | Triple = 2, 189 | /// MODULE_CODE_DATALAYOUT: `[...string...]` 190 | DataLayout = 3, 191 | /// MODULE_CODE_ASM: `[...string...]` 192 | Asm = 4, 193 | /// MODULE_CODE_SECTIONNAME: `[...string...]` 194 | SectionName = 5, 195 | /// MODULE_CODE_DEPLIB: `[...string...]` 196 | DepLib = 6, 197 | /// MODULE_CODE_GLOBALVAR: `[...fields...]` 198 | /// See: 199 | GlobalVar = 7, 200 | /// MODULE_CODE_FUNCTION: `[...fields...]` 201 | /// See: 202 | Function = 8, 203 | /// MODULE_CODE_ALIAS_OLD: `[...fields...]` 204 | /// See: 205 | AliasOld = 9, 206 | /// MODULE_CODE_GCNAME: `[...string...]` 207 | GcName = 11, 208 | /// MODULE_CODE_COMDAT 209 | /// v1: `[selection_kind, name]` 210 | /// v2: `[strtab_offset, strtab_size, selection_kind]` 211 | /// Only `v2` is currently supported. 212 | Comdat = 12, 213 | /// MODULE_CODE_VSTOFFSET: `[offset]` 214 | VstOffset = 13, 215 | /// MODULE_CODE_ALIAS: `[...fields...]` 216 | /// Not well documented; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`. 217 | Alias = 14, 218 | /// MODULE_CODE_METADATA_VALUES_UNUSED 219 | /// Not documented at all; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`. 220 | MetadataValuesUnused = 15, 221 | /// MODULE_CODE_SOURCE_FILENAME: `[...string...]` 222 | SourceFilename = 16, 223 | /// MODULE_CODE_HASH: `[5*i32]` 224 | Hash = 17, 225 | /// MODULE_CODE_IFUNC: `[...fields...]` 226 | /// Not well documented; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`. 227 | IFunc = 18, 228 | } 229 | 230 | /// Codes for each record in `TYPE_BLOCK` (i.e., `TYPE_BLOCK_ID_NEW`). 231 | #[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)] 232 | #[repr(u64)] 233 | pub enum TypeCode { 234 | /// TYPE_CODE_NUMENTRY: `[numentries]` 235 | NumEntry = 1, 236 | /// TYPE_CODE_VOID 237 | Void, 238 | /// TYPE_CODE_FLOAT 239 | Float, 240 | /// TYPE_CODE_DOUBLE 241 | Double, 242 | /// TYPE_CODE_LABEL 243 | Label, 244 | /// TYPE_CODE_OPAQUE 245 | Opaque, 246 | /// TYPE_CODE_INTEGER: `[width]` 247 | Integer, 248 | /// TYPE_CODE_POINTER: `[pointee type]` 249 | Pointer, 250 | /// TYPE_CODE_FUNCTION_OLD: `[vararg, attrid, retty, paramty x N]` 251 | FunctionOld, 252 | /// TYPE_CODE_HALF 253 | Half, 254 | /// TYPE_CODE_ARRAY: `[numelts, eltty]` 255 | Array, 256 | /// TYPE_CODE_VECTOR: `[numelts, eltty]` 257 | Vector, 258 | /// TYPE_CODE_X86_FP80 259 | X86Fp80, 260 | /// TYPE_CODE_FP128 261 | Fp128, 262 | /// TYPE_CODE_PPC_FP128 263 | PpcFp128, 264 | /// TYPE_CODE_METADATA, 265 | Metadata, 266 | /// TYPE_CODE_X86_MMX 267 | X86Mmx, 268 | /// TYPE_CODE_STRUCT_ANON: `[ispacked, eltty x N]` 269 | StructAnon, 270 | /// TYPE_CODE_STRUCT_NAME: `[strchr x N]` 271 | StructName, 272 | /// TYPE_CODE_STRUCT_NAMED: `[ispacked, eltty x N]` 273 | StructNamed, 274 | /// TYPE_CODE_FUNCTION: `[vararg, retty, paramty x N]` 275 | Function, 276 | /// TYPE_CODE_TOKEN 277 | Token, 278 | /// TYPE_CODE_BFLOAT 279 | BFloat, 280 | /// TYPE_CODE_X86_AMX 281 | X86Amx, 282 | /// TYPE_CODE_OPAQUE_POINTER: `[addrspace]` 283 | OpaquePointer, 284 | } 285 | 286 | /// Codes for each record in `STRTAB_BLOCK`. 287 | #[non_exhaustive] 288 | #[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)] 289 | #[repr(u64)] 290 | pub enum StrtabCode { 291 | /// STRTAB_BLOB: `[...string...]` 292 | Blob = 1, 293 | } 294 | 295 | /// Codes for each record in `SYMTAB_BLOCK`. 296 | #[non_exhaustive] 297 | #[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)] 298 | #[repr(u64)] 299 | pub enum SymtabCode { 300 | /// SYMTAB_BLOB: `[...data...]` 301 | Blob = 1, 302 | } 303 | 304 | /// Codes for each record in `PARAMATTR_BLOCK` or `PARAMATTR_GROUP_BLOCK`. 305 | // NOTE(ww): For whatever reason, these two blocks share the same enum for 306 | /// record codes. 307 | #[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)] 308 | #[repr(u64)] 309 | pub enum AttributeCode { 310 | /// PARAMATTR_CODE_ENTRY_OLD: `[paramidx0, attr0, paramidx1, attr1...]` 311 | EntryOld = 1, 312 | /// PARAMATTR_CODE_ENTRY: `[attrgrp0, attrgrp1, ...]` 313 | Entry, 314 | /// PARAMATTR_GRP_CODE_ENTRY: `[grpid, idx, attr0, attr1, ...]` 315 | GroupCodeEntry, 316 | } 317 | -------------------------------------------------------------------------------- /llvm-constants/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `llvm-constants` contains numeric and enum constants for interacting with LLVM 2 | //! bitstreams and IR. 3 | 4 | #![deny(rustdoc::broken_intra_doc_links)] 5 | #![deny(missing_docs)] 6 | #![allow(clippy::redundant_field_names)] 7 | #![forbid(unsafe_code)] 8 | 9 | mod constants; 10 | mod enums; 11 | 12 | pub use crate::constants::*; 13 | pub use crate::enums::*; 14 | -------------------------------------------------------------------------------- /llvm-mapper/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "llvm-mapper" 3 | description = "A library for mapping the contents of bitstreams into LLVM IR models" 4 | license = "MIT" 5 | homepage = "https://github.com/woodruffw/mollusc/tree/main/llvm-mapper" 6 | repository = "https://github.com/woodruffw/mollusc" 7 | authors = ["William Woodruff "] 8 | readme = "README.md" 9 | keywords = ["llvm", "parsing", "binary", "encoding"] 10 | categories = ["compilers", "encoding", "parsing"] 11 | edition = "2018" 12 | version = "0.0.4" 13 | 14 | [[example]] 15 | name = "unroll-bitstream" 16 | 17 | [dependencies] 18 | indexmap = "2.0" 19 | hashbrown = "0.14" 20 | llvm-bitstream = { version = "0.0.3", path = "../llvm-bitstream" } 21 | llvm-support = { version = "0.0.3", path = "../llvm-support" } 22 | log = "0.4" 23 | num_enum = "0.6" 24 | thiserror = "1.0" 25 | 26 | [dev-dependencies] 27 | anyhow = "1.0" 28 | clap = "4.0" 29 | env_logger = "0.10" 30 | -------------------------------------------------------------------------------- /llvm-mapper/README.md: -------------------------------------------------------------------------------- 1 | llvm-mapper 2 | =========== 3 | 4 | [![Crates.io](https://img.shields.io/crates/v/llvm-mapper)](https://crates.io/crates/llvm-mapper) 5 | [![Documentation](https://docs.rs/llvm-mapper/badge.svg)](https://docs.rs/llvm-mapper) 6 | 7 | A library for mapping the contents of bitstreams into LLVM IR models. 8 | 9 | This library produces a "full-featured" view of a particular LLVM IR program by mapping 10 | blocks and records in the underlying bitstream into their appropriate LLVM models. 11 | 12 | This library uses [`llvm-bitstream`](https://crates.io/crates/llvm-bitstream) under the hood. 13 | -------------------------------------------------------------------------------- /llvm-mapper/examples/unroll-bitstream.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryFrom; 2 | use std::fs; 3 | 4 | use anyhow::Result; 5 | use clap::{Arg, Command}; 6 | use llvm_bitstream::Bitstream; 7 | use llvm_mapper::unroll::Bitcode; 8 | 9 | fn app() -> Command { 10 | Command::new(env!("CARGO_PKG_NAME")) 11 | .version(env!("CARGO_PKG_VERSION")) 12 | .about(env!("CARGO_PKG_DESCRIPTION")) 13 | .arg( 14 | Arg::new("input") 15 | .help("the bitstream input to unroll") 16 | .index(1) 17 | .required(true), 18 | ) 19 | } 20 | 21 | fn main() -> Result<()> { 22 | env_logger::init(); 23 | let matches = app().get_matches(); 24 | 25 | let input = { 26 | let input = matches.get_one::("input").unwrap(); 27 | fs::read(input)? 28 | }; 29 | 30 | let (_, bitstream) = Bitstream::from(&input)?; 31 | 32 | let unrolled = Bitcode::try_from(bitstream)?; 33 | println!("{:#?}", unrolled); 34 | 35 | Ok(()) 36 | } 37 | -------------------------------------------------------------------------------- /llvm-mapper/src/block/function/basic_block.rs: -------------------------------------------------------------------------------- 1 | //! Models and functionality for basic blocks. 2 | 3 | use super::Instruction; 4 | 5 | /// Represents a basic block. 6 | #[non_exhaustive] 7 | #[derive(Debug, Default)] 8 | pub struct BasicBlock { 9 | /// The instructions of this basic block. 10 | pub instructions: Vec, 11 | } 12 | -------------------------------------------------------------------------------- /llvm-mapper/src/block/function/instruction.rs: -------------------------------------------------------------------------------- 1 | //! Models and functionality for individual LLVM IR instructions. 2 | 3 | use llvm_support::{BinaryOp, CastOp, UnaryOp}; 4 | 5 | /// Represents an LLVM instruction. 6 | #[derive(Debug)] 7 | pub enum Instruction { 8 | /// Unary instructions. 9 | Unary { 10 | /// The opcode. 11 | op: UnaryOp, 12 | }, 13 | /// Binary instructions. 14 | Binary { 15 | /// The opcode. 16 | op: BinaryOp, 17 | // TODO: lhs, rhs 18 | }, 19 | /// Cast instructions. 20 | Cast { 21 | /// The opcode. 22 | op: CastOp, 23 | // TODO: srcval, srcty, dstty 24 | }, 25 | /// `getelementptr` 26 | GetElementPtr, 27 | /// `extractvalue` 28 | ExtractValue, 29 | /// `insertvalue` 30 | InsertValue, 31 | /// `select` 32 | Select, 33 | /// `extractelement` 34 | ExtractElement, 35 | /// `insertelement` 36 | InsertElement, 37 | /// `shufflevector` 38 | ShuffleVector, 39 | /// `cmp` 40 | Cmp, 41 | /// `ret` 42 | Ret, 43 | /// `br` 44 | Br, 45 | /// `cleanupret` 46 | CleanupRet, 47 | /// `catchret` 48 | CatchRet, 49 | /// `catchswitch` 50 | CatchSwitch, 51 | /// `catchpad` 52 | CatchPad, 53 | /// `switch` 54 | Switch, 55 | /// `indirectbr` 56 | IndirectBr, 57 | /// `invoke` 58 | Invoke, 59 | /// `resume` 60 | Resume, 61 | /// `callbr` 62 | CallBr, 63 | /// `unreachable` 64 | Unreachable, 65 | /// `landingpad` 66 | LandingPad, 67 | /// `alloca` 68 | Alloca, 69 | /// `load` 70 | Load, 71 | /// `store` 72 | Store, 73 | /// `cmpxchg` 74 | CmpXchg, 75 | /// `atomicrmw` 76 | AtomicRMW, 77 | /// `fence` 78 | Fence, 79 | /// `call` 80 | Call, 81 | /// `va_arg` 82 | VAArg, 83 | /// `freeze` 84 | Freeze, 85 | } 86 | -------------------------------------------------------------------------------- /llvm-mapper/src/block/function/mod.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping `FUNCTION_BLOCK` blocks. 2 | 3 | mod basic_block; 4 | mod instruction; 5 | 6 | use std::convert::TryFrom; 7 | 8 | pub use basic_block::*; 9 | pub use instruction::*; 10 | use llvm_support::bitcodes::FunctionCode; 11 | use llvm_support::{BinaryOp, BinaryOpError, UnaryOp, UnaryOpError}; 12 | use num_enum::TryFromPrimitiveError; 13 | use thiserror::Error; 14 | 15 | use crate::map::{MapCtx, MapError}; 16 | use crate::unroll::Block; 17 | 18 | /// Errors that can occur when mapping function blocks. 19 | #[derive(Debug, Error)] 20 | pub enum FunctionError { 21 | /// `FUNC_CODE_DECLAREBLOCKS` is either missing or zero. 22 | #[error("function does not declare block count or has zero blocks")] 23 | InvalidBlockCount, 24 | 25 | /// An unknown record code was seen. 26 | #[error("unknown function code")] 27 | UnknownFunctionCode(#[from] TryFromPrimitiveError), 28 | 29 | /// An invalid instruction encoding was seen. 30 | #[error("invalid instruction encoding: {0}")] 31 | BadInst(String), 32 | 33 | /// An invalid unary opcode was seen. 34 | #[error("invalid unary opcode")] 35 | BadUnOp(#[from] UnaryOpError), 36 | 37 | /// An invalid binary opcode was seen. 38 | #[error("invalid binary opcode")] 39 | BadBinOp(#[from] BinaryOpError), 40 | 41 | /// A generic mapping error occurred. 42 | #[error("generic mapping error")] 43 | Map(#[from] MapError), 44 | } 45 | 46 | /// Models the `MODULE_CODE_FUNCTION` record. 47 | #[non_exhaustive] 48 | #[derive(Debug)] 49 | pub struct Function { 50 | /// The basic blocks of this function. 51 | pub blocks: Vec, 52 | } 53 | 54 | impl TryFrom<(&'_ Block, &'_ MapCtx<'_>)> for Function { 55 | type Error = FunctionError; 56 | 57 | fn try_from((block, ctx): (&'_ Block, &'_ MapCtx)) -> Result { 58 | // TODO: Handle each `MODULE_CODE_FUNCTION`'s sub-blocks. 59 | 60 | // A function block should have exactly one DECLAREBLOCKS record. 61 | let nblocks = { 62 | let declareblocks = block 63 | .records 64 | .exactly_one(FunctionCode::DeclareBlocks) 65 | .map_err(MapError::Inconsistent)?; 66 | 67 | *declareblocks 68 | .fields() 69 | .first() 70 | .ok_or(FunctionError::InvalidBlockCount)? 71 | }; 72 | 73 | // Like the type table, we need a little bit of a state machine to 74 | // construct each function's basic blocks and constituent instructions. 75 | let mut _bbs: Vec = Vec::with_capacity(nblocks as usize); 76 | let mut _bb = BasicBlock::default(); 77 | 78 | for record in block.records.into_iter() { 79 | let code = FunctionCode::try_from(record.code())?; 80 | 81 | macro_rules! unpack_fields { 82 | ($n:literal) => { 83 | <[u64; $n]>::try_from(record.fields()).map_err(|_| { 84 | FunctionError::BadInst(format!( 85 | "bad {code:?}: expected {} fields, got {}", 86 | $n, 87 | record.fields().len() 88 | )) 89 | }) 90 | }; 91 | } 92 | 93 | macro_rules! get_type { 94 | ($ty:ident) => { 95 | // TODO: This is wrong; the lookup here needs to be 96 | // aware of forward references. 97 | ctx.type_table.get($ty).ok_or_else(|| { 98 | FunctionError::BadInst(format!( 99 | "bad {code:?}: invalid type table reference: {}", 100 | $ty 101 | )) 102 | }) 103 | }; 104 | } 105 | 106 | // Function codes fall into a few general categories: 107 | // 108 | // * State machine management (`DECLAREBLOCKS`) 109 | // * Instruction declaration (`INST_*`) 110 | // * Debug state (`DEBUG_LOC`, `DEBUG_LOC_AGAIN`) 111 | // * Operand bundles (`OPERAND_BUNDLE`) 112 | // 113 | // Each category is grouped below, with the smaller ones first. 114 | match code { 115 | // Handled above. 116 | FunctionCode::DeclareBlocks => continue, 117 | 118 | // Operand bundles. 119 | FunctionCode::OperandBundle => unimplemented!(), 120 | 121 | // Debug state. 122 | FunctionCode::DebugLoc => unimplemented!(), 123 | FunctionCode::DebugLocAgain => unimplemented!(), 124 | 125 | // The big one: all instructions. 126 | FunctionCode::InstBinop => { 127 | // [opval, ty, opval, opcode] 128 | let [_lhs, ty, _rhs, opcode] = unpack_fields!(4)?; 129 | let ty = get_type!(ty)?; 130 | let _opcode = BinaryOp::try_from((opcode, ty))?; 131 | } 132 | FunctionCode::InstCast => { 133 | // [opval, opty, destty, castopc] 134 | let [_opval, _opty, _destty, _castopc] = unpack_fields!(4)?; 135 | } 136 | FunctionCode::InstGepOld => todo!(), 137 | FunctionCode::InstSelect => todo!(), 138 | FunctionCode::InstExtractelt => todo!(), 139 | FunctionCode::InstInsertelt => todo!(), 140 | FunctionCode::InstShufflevec => todo!(), 141 | FunctionCode::InstCmp => todo!(), 142 | FunctionCode::InstRet => todo!(), 143 | FunctionCode::InstBr => todo!(), 144 | FunctionCode::InstSwitch => todo!(), 145 | FunctionCode::InstInvoke => todo!(), 146 | FunctionCode::InstUnreachable => todo!(), 147 | FunctionCode::InstPhi => todo!(), 148 | FunctionCode::InstAlloca => { 149 | // [instty, opty, op, align] 150 | let [_instty, _opty, _op, _align] = unpack_fields!(4)?; 151 | } 152 | FunctionCode::InstLoad => { 153 | // [opty, op, align, vol] 154 | let [_opty, _op, _align, _vol] = unpack_fields!(4)?; 155 | } 156 | FunctionCode::InstVaarg => todo!(), 157 | FunctionCode::InstStoreOld => todo!(), 158 | FunctionCode::InstExtractval => todo!(), 159 | FunctionCode::InstInsertval => todo!(), 160 | FunctionCode::InstCmp2 => todo!(), 161 | FunctionCode::InstVselect => todo!(), 162 | FunctionCode::InstInboundsGepOld => todo!(), 163 | FunctionCode::InstIndirectbr => todo!(), 164 | FunctionCode::InstCall => todo!(), 165 | FunctionCode::InstFence => todo!(), 166 | FunctionCode::InstCmpxchgOld => todo!(), 167 | FunctionCode::InstAtomicrmwOld => todo!(), 168 | FunctionCode::InstResume => todo!(), 169 | FunctionCode::InstLandingpadOld => todo!(), 170 | FunctionCode::InstLoadatomic => todo!(), 171 | FunctionCode::InstStoreatomicOld => todo!(), 172 | FunctionCode::InstGep => todo!(), 173 | FunctionCode::InstStore => { 174 | // [ptrty, ptr, valty, val, align, vol] 175 | let [_ptrty, _ptr, _valty, _val] = unpack_fields!(4)?; 176 | 177 | // NOTE: Two more optional fields: align and vol. 178 | } 179 | FunctionCode::InstStoreatomic => todo!(), 180 | FunctionCode::InstCmpxchg => todo!(), 181 | FunctionCode::InstLandingpad => todo!(), 182 | FunctionCode::InstCleanupret => todo!(), 183 | FunctionCode::InstCatchret => todo!(), 184 | FunctionCode::InstCatchpad => todo!(), 185 | FunctionCode::InstCleanuppad => todo!(), 186 | FunctionCode::InstCatchswitch => todo!(), 187 | FunctionCode::InstUnop => { 188 | // [opval, ty, opcode] 189 | let [_opval, ty, opcode] = unpack_fields!(3)?; 190 | let _ty = get_type!(ty)?; 191 | let _opcode = UnaryOp::try_from(opcode)?; 192 | } 193 | FunctionCode::Instcallbr => todo!(), 194 | FunctionCode::InstFreeze => todo!(), 195 | FunctionCode::InstAtomicrmw => todo!(), 196 | } 197 | } 198 | 199 | unimplemented!() 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /llvm-mapper/src/block/identification.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping the `IDENTIFICATION_BLOCK` block. 2 | 3 | use std::convert::TryFrom; 4 | 5 | use llvm_support::bitcodes::IdentificationCode; 6 | use thiserror::Error; 7 | 8 | use crate::map::MapError; 9 | use crate::unroll::Block; 10 | 11 | /// Errors that can occur while mapping the identification block. 12 | #[derive(Debug, Error)] 13 | pub enum IdentificationError { 14 | /// The `IDENTIFICATION_CODE_PRODUCER` couldn't be found. 15 | #[error("identification block has no producer")] 16 | MissingProducer, 17 | 18 | /// The producer string is malformed. 19 | #[error("malformed producer string")] 20 | BadProducer, 21 | 22 | /// The `IDENTIFICATION_CODE_EPOCH` couldn't be found. 23 | #[error("identification block has no epoch")] 24 | MissingEpoch, 25 | 26 | /// A generic mapping error occured. 27 | #[error("mapping error in string table")] 28 | Map(#[from] MapError), 29 | } 30 | 31 | /// Models the `IDENTIFICATION_BLOCK` block. 32 | #[non_exhaustive] 33 | #[derive(Debug)] 34 | pub struct Identification { 35 | /// The name of the "producer" for this bitcode. 36 | pub producer: String, 37 | /// The compatibility epoch. 38 | pub epoch: u64, 39 | } 40 | 41 | impl TryFrom<&'_ Block> for Identification { 42 | type Error = IdentificationError; 43 | 44 | fn try_from(block: &'_ Block) -> Result { 45 | let producer = block 46 | .records 47 | .one(IdentificationCode::ProducerString as u64) 48 | .ok_or(IdentificationError::MissingProducer) 49 | .and_then(|r| { 50 | r.try_string(0) 51 | .map_err(|_| IdentificationError::BadProducer) 52 | })?; 53 | 54 | let epoch = *block 55 | .records 56 | .one(IdentificationCode::Epoch as u64) 57 | .ok_or(IdentificationError::MissingEpoch) 58 | .and_then(|r| r.fields().first().ok_or(IdentificationError::MissingEpoch))?; 59 | 60 | Ok(Self { producer, epoch }) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /llvm-mapper/src/block/mod.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping individual blocks. 2 | 3 | pub mod attributes; 4 | pub mod function; 5 | pub mod identification; 6 | pub mod module; 7 | pub mod strtab; 8 | pub mod symtab; 9 | pub mod type_table; 10 | pub mod vst; 11 | 12 | use std::convert::TryFrom; 13 | 14 | use llvm_support::bitcodes::{IrBlockId, ReservedBlockId}; 15 | use thiserror::Error; 16 | 17 | pub use self::attributes::*; 18 | pub use self::identification::*; 19 | pub use self::module::*; 20 | pub use self::strtab::*; 21 | pub use self::symtab::*; 22 | pub use self::type_table::*; 23 | 24 | /// Potential errors when mapping a single bitstream block. 25 | #[non_exhaustive] 26 | #[derive(Debug, Error)] 27 | pub enum BlockMapError { 28 | /// We couldn't map the identification block. 29 | #[error("error while mapping identification block")] 30 | Identification(#[from] IdentificationError), 31 | 32 | /// We couldn't map the module block. 33 | #[error("error while mapping module")] 34 | Module(#[from] ModuleError), 35 | 36 | /// We couldn't map the string table. 37 | #[error("error while mapping string table")] 38 | Strtab(#[from] StrtabError), 39 | 40 | /// We couldn't map the symbol table. 41 | #[error("error while mapping symbol table")] 42 | Symtab(#[from] SymtabError), 43 | } 44 | 45 | /// A holistic model of all possible block IDs, spanning reserved, IR, and unknown IDs. 46 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)] 47 | pub enum BlockId { 48 | /// A block ID that's been reserved by LLVM. Reserved IDs are internal, and cannot be mapped here. 49 | Reserved(ReservedBlockId), 50 | /// A block ID used by LLVM IR. 51 | Ir(IrBlockId), 52 | /// An unknown block ID. Unknown IDs cannot be mapped. 53 | Unknown(u64), 54 | } 55 | 56 | impl From for BlockId { 57 | fn from(v: ReservedBlockId) -> Self { 58 | Self::Reserved(v) 59 | } 60 | } 61 | 62 | impl From for BlockId { 63 | fn from(v: IrBlockId) -> Self { 64 | Self::Ir(v) 65 | } 66 | } 67 | 68 | impl From for BlockId { 69 | fn from(value: u64) -> Self { 70 | // Try to turn `value` into each of our known kinds of block IDs, in order 71 | // of precedence. 72 | ReservedBlockId::try_from(value).map_or_else( 73 | |_| IrBlockId::try_from(value).map_or_else(|_| BlockId::Unknown(value), BlockId::Ir), 74 | BlockId::Reserved, 75 | ) 76 | } 77 | } 78 | 79 | #[cfg(test)] 80 | mod tests { 81 | use super::*; 82 | 83 | #[test] 84 | fn test_blockid_from_u64() { 85 | assert_eq!( 86 | BlockId::from(0), 87 | BlockId::Reserved(ReservedBlockId::BlockInfo) 88 | ); 89 | assert_eq!( 90 | BlockId::from(7), 91 | BlockId::Reserved(ReservedBlockId::Reserved7) 92 | ); 93 | assert_eq!(BlockId::from(8), BlockId::Ir(IrBlockId::Module)); 94 | assert_eq!(BlockId::from(2384629342), BlockId::Unknown(2384629342)); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /llvm-mapper/src/block/module.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping the `MODULE_BLOCK` block. 2 | 3 | use std::convert::TryFrom; 4 | 5 | use llvm_support::bitcodes::{IrBlockId, ModuleCode}; 6 | use llvm_support::TARGET_TRIPLE; 7 | use thiserror::Error; 8 | 9 | use crate::block::attributes::{AttributeError, AttributeGroups, Attributes}; 10 | use crate::block::function::{Function as FunctionBlock, FunctionError as FunctionBlockError}; 11 | use crate::block::type_table::{TypeTable, TypeTableError}; 12 | use crate::block::vst::{ModuleStyleVst, Vst, VstError}; 13 | use crate::map::{CtxMappable, MapError, PartialCtxMappable, PartialMapCtx}; 14 | use crate::record::{ 15 | Alias, AliasError, Comdat, ComdatError, DataLayout, DataLayoutError, 16 | Function as FunctionRecord, FunctionError as FunctionRecordError, 17 | }; 18 | use crate::unroll::Block; 19 | 20 | /// Errors that can occur while mapping a module. 21 | #[derive(Debug, Error)] 22 | pub enum ModuleError { 23 | /// The `MODULE_CODE_VERSION` couldn't be found. 24 | #[error("bitcode module has no version")] 25 | MissingVersion, 26 | 27 | /// An error occured while mapping the datalayout record. 28 | #[error("invalid datalayout record")] 29 | DataLayoutRecord(#[from] DataLayoutError), 30 | 31 | /// An error occurred while mapping the type table block. 32 | #[error("invalid type table block")] 33 | TypeTableBlock(#[from] TypeTableError), 34 | 35 | /// An error occurred while mapping a value symbol table. 36 | #[error("invalid value symbol table")] 37 | VstBlock(#[from] VstError), 38 | 39 | /// An error occurred while mapping one of the attribute blocks. 40 | #[error("invalid attribute block")] 41 | AttributeBlock(#[from] AttributeError), 42 | 43 | /// An error occurred while mapping a COMDAT record. 44 | #[error("invalid COMDAT record")] 45 | ComdatRecord(#[from] ComdatError), 46 | 47 | /// An error occurred while mapping a function record. 48 | #[error("invalid function record")] 49 | FunctionRecord(#[from] FunctionRecordError), 50 | 51 | /// An error occurred while mapping a function block. 52 | #[error("invalid function block")] 53 | FunctionBlock(#[from] FunctionBlockError), 54 | 55 | /// An error occurred while mapping an alias record. 56 | #[error("invalid alias record")] 57 | Alias(#[from] AliasError), 58 | 59 | /// A generic mapping error occurred. 60 | #[error("generic mapping error")] 61 | Map(#[from] MapError), 62 | } 63 | 64 | /// Models the `MODULE_BLOCK` block. 65 | #[non_exhaustive] 66 | #[derive(Debug)] 67 | pub struct Module { 68 | /// The target triple specification. 69 | pub triple: String, 70 | /// Any assembly block lines in the module. 71 | pub asm: Vec, 72 | /// Any dependent libraries listed in the module. 73 | pub deplibs: Vec, 74 | } 75 | 76 | impl TryFrom<(&'_ Block, &'_ mut PartialMapCtx)> for Module { 77 | type Error = ModuleError; 78 | 79 | fn try_from((block, ctx): (&'_ Block, &'_ mut PartialMapCtx)) -> Result { 80 | // Mapping the module requires us to fill in the `PartialMapCtx` first, 81 | // so we can reify it into a `MapCtx` for subsequent steps. 82 | ctx.version = Some({ 83 | let version = block 84 | .records 85 | .exactly_one(ModuleCode::Version) 86 | .map_err(MapError::Inconsistent)?; 87 | 88 | *version 89 | .fields() 90 | .first() 91 | .ok_or(ModuleError::MissingVersion)? 92 | }); 93 | 94 | // Each module *should* have a datalayout record, but doesn't necessarily. 95 | if let Some(record) = block 96 | .records 97 | .one_or_none(ModuleCode::DataLayout) 98 | .map_err(MapError::Inconsistent)? 99 | { 100 | ctx.datalayout = DataLayout::try_map(record, ctx)?; 101 | } 102 | 103 | // Build the section table. We'll reference this later. 104 | ctx.section_table = block 105 | .records 106 | .by_code(ModuleCode::SectionName) 107 | .map(|rec| rec.try_string(0)) 108 | .collect::, _>>() 109 | .map_err(MapError::RecordString)?; 110 | 111 | // Build the GC table. We'll reference this later. 112 | ctx.gc_table = block 113 | .records 114 | .by_code(ModuleCode::GcName) 115 | .map(|rec| rec.try_string(0)) 116 | .collect::, _>>() 117 | .map_err(MapError::RecordString)?; 118 | 119 | // Build the type table. 120 | ctx.type_table = Some(TypeTable::try_from( 121 | block 122 | .blocks 123 | .exactly_one(IrBlockId::Type) 124 | .map_err(MapError::Inconsistent)?, 125 | )?); 126 | 127 | // Build the module-level VST. We'll reference this later. 128 | Vst::try_from(( 129 | block 130 | .blocks 131 | .exactly_one(IrBlockId::ValueSymtab) 132 | .map_err(MapError::Inconsistent)?, 133 | ModuleStyleVst {}, 134 | ))?; 135 | 136 | // Collect all attribute groups and individual attribute references. 137 | // The order here is important: attribute groups must be mapped 138 | // and stored in the `PartialMapCtx` before the attribute block itself can be mapped. 139 | // Neither block is mandatory. 140 | if let Some(attribute_groups) = block 141 | .blocks 142 | .one_or_none(IrBlockId::ParamAttrGroup) 143 | .map_err(MapError::Inconsistent)? 144 | .map(AttributeGroups::try_from) 145 | .transpose()? 146 | { 147 | ctx.attribute_groups = attribute_groups; 148 | } 149 | 150 | if let Some(attributes) = block 151 | .blocks 152 | .one_or_none(IrBlockId::ParamAttr) 153 | .map_err(MapError::Inconsistent)? 154 | .map(|b| Attributes::try_from((b, &*ctx))) 155 | .transpose()? 156 | { 157 | ctx.attributes = attributes; 158 | } 159 | 160 | // Build the list of COMDATs. We'll reference this later. 161 | ctx.comdats = block 162 | .records 163 | .by_code(ModuleCode::Comdat) 164 | .map(|rec| Comdat::try_map(rec, ctx)) 165 | .collect::, _>>()?; 166 | 167 | // After this point, `ctx` refers to a fully reified `MapCtx`. 168 | let ctx = ctx.reify().map_err(MapError::Context)?; 169 | 170 | // Each module *should* have a target triple, but doesn't necessarily. 171 | let triple = match block 172 | .records 173 | .one_or_none(ModuleCode::Triple) 174 | .map_err(MapError::Inconsistent)? 175 | { 176 | Some(record) => record.try_string(0).map_err(MapError::RecordString)?, 177 | None => TARGET_TRIPLE.into(), 178 | }; 179 | 180 | // Each module has zero or exactly one MODULE_CODE_ASM records. 181 | let asm = match block 182 | .records 183 | .one_or_none(ModuleCode::Asm) 184 | .map_err(MapError::Inconsistent)? 185 | { 186 | None => Vec::new(), 187 | Some(record) => record 188 | .try_string(0) 189 | .map_err(MapError::RecordString)? 190 | .split('\n') 191 | .map(String::from) 192 | .collect::>(), 193 | }; 194 | 195 | // Deplib records are deprecated, but we might be parsing an older bitstream. 196 | let deplibs = block 197 | .records 198 | .by_code(ModuleCode::DepLib) 199 | .map(|rec| rec.try_string(0)) 200 | .collect::, _>>() 201 | .map_err(MapError::RecordString)?; 202 | 203 | // Collect the function records and blocks in this module. 204 | let function_records = block 205 | .records 206 | .by_code(ModuleCode::Function) 207 | .map(|rec| FunctionRecord::try_map(rec, &ctx)) 208 | .collect::, _>>()?; 209 | 210 | let _function_blocks = block 211 | .blocks 212 | .by_id(IrBlockId::Function) 213 | .map(|block| FunctionBlock::try_from((block, &ctx))) 214 | .collect::, _>>()?; 215 | 216 | // TODO: Handle function blocks as well. 217 | log::debug!("functions: {:?}", function_records); 218 | 219 | let aliases = block 220 | .records 221 | .by_code(ModuleCode::Alias) 222 | .map(|rec| Alias::try_map(rec, &ctx)) 223 | .collect::, _>>()?; 224 | 225 | log::debug!("aliases: {:?}", aliases); 226 | 227 | Ok(Self { 228 | triple, 229 | asm, 230 | deplibs, 231 | }) 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /llvm-mapper/src/block/strtab.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping the `STRTAB_BLOCK` block. 2 | 3 | use std::convert::TryFrom; 4 | use std::str::Utf8Error; 5 | 6 | use llvm_support::bitcodes::StrtabCode; 7 | use llvm_support::StrtabRef; 8 | use thiserror::Error; 9 | 10 | use crate::map::MapError; 11 | use crate::record::RecordBlobError; 12 | use crate::unroll::{Block, Record}; 13 | 14 | /// Errors that can occur when accessing a string table. 15 | #[derive(Debug, Error)] 16 | pub enum StrtabError { 17 | /// The string table is missing its blob. 18 | #[error("malformed string table: missing blob")] 19 | MissingBlob, 20 | 21 | /// The blob containing the string table is invalid. 22 | #[error("invalid string table: {0}")] 23 | BadBlob(#[from] RecordBlobError), 24 | 25 | /// The requested range is invalid. 26 | #[error("requested range in string table is invalid")] 27 | BadRange, 28 | 29 | /// The requested string is not UTF-8. 30 | #[error("could not decode range into a UTF-8 string: {0}")] 31 | BadString(#[from] Utf8Error), 32 | 33 | /// A generic mapping error occured. 34 | #[error("mapping error in string table")] 35 | Map(#[from] MapError), 36 | } 37 | 38 | /// Models the `STRTAB_BLOCK` block. 39 | #[derive(Clone, Debug, Default)] 40 | pub struct Strtab(Vec); 41 | 42 | impl AsRef<[u8]> for Strtab { 43 | fn as_ref(&self) -> &[u8] { 44 | &self.0 45 | } 46 | } 47 | 48 | impl TryFrom<&'_ Block> for Strtab { 49 | type Error = StrtabError; 50 | 51 | fn try_from(block: &'_ Block) -> Result { 52 | // TODO(ww): The docs also claim that there's only one STRTAB_BLOB per STRTAB_BLOCK, 53 | // but at least one person has reported otherwise here: 54 | // https://lists.llvm.org/pipermail/llvm-dev/2020-August/144327.html 55 | // Needs investigation. 56 | let strtab = block 57 | .records 58 | .one(StrtabCode::Blob as u64) 59 | .ok_or(StrtabError::MissingBlob) 60 | .and_then(|r| r.try_blob(0).map_err(StrtabError::from))?; 61 | 62 | Ok(Self(strtab)) 63 | } 64 | } 65 | 66 | impl Strtab { 67 | /// Get a string in the string table by its index and length. 68 | /// 69 | /// Returns `None` on all of the error conditions associated with 70 | /// [`try_get`](Strtab::try_get). 71 | pub fn get(&self, sref: &StrtabRef) -> Option<&str> { 72 | self.try_get(sref).ok() 73 | } 74 | 75 | /// Get a string in the string table by its index and length. 76 | /// 77 | /// Returns an error if the requested span is invalid, or if the extracted 78 | /// slice isn't a valid string. 79 | pub fn try_get(&self, sref: &StrtabRef) -> Result<&str, StrtabError> { 80 | let inner = self.as_ref(); 81 | 82 | if sref.size == 0 || sref.offset >= inner.len() || sref.offset + sref.size > inner.len() { 83 | return Err(StrtabError::BadRange); 84 | } 85 | 86 | Ok(std::str::from_utf8( 87 | &inner[sref.offset..sref.offset + sref.size], 88 | )?) 89 | } 90 | 91 | /// Attempts to read a record's name from the string table. 92 | /// 93 | /// Adheres to the convention that the first two fields in the record are 94 | /// the string's offset and length into the string table. 95 | /// 96 | /// Panic safety: precondition: `record.fields().len() >= 2` 97 | pub(crate) fn read_name(&self, record: &Record) -> Result<&str, StrtabError> { 98 | let fields = record.fields(); 99 | 100 | self.try_get(&(fields[0], fields[1]).into()) 101 | } 102 | } 103 | 104 | #[cfg(test)] 105 | mod tests { 106 | use super::*; 107 | 108 | fn sref(tup: (usize, usize)) -> StrtabRef { 109 | tup.into() 110 | } 111 | 112 | #[test] 113 | fn test_strtab() { 114 | let inner = "this is a string table"; 115 | let strtab = Strtab(inner.into()); 116 | assert_eq!(strtab.get(&sref((0, 4))).unwrap(), "this"); 117 | assert_eq!(strtab.get(&sref((0, 7))).unwrap(), "this is"); 118 | assert_eq!(strtab.get(&sref((8, 14))).unwrap(), "a string table"); 119 | assert_eq!( 120 | strtab.get(&sref((0, inner.len()))).unwrap(), 121 | "this is a string table" 122 | ); 123 | 124 | assert!(strtab.get(&sref((inner.len(), 0))).is_none()); 125 | assert!(strtab.get(&sref((0, inner.len() + 1))).is_none()); 126 | assert!(strtab.get(&sref((0, 0))).is_none()); 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /llvm-mapper/src/block/symtab.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping the `SYMTAB_BLOCK` block. 2 | 3 | use std::convert::TryFrom; 4 | 5 | use llvm_support::bitcodes::SymtabCode; 6 | use thiserror::Error; 7 | 8 | use crate::map::MapError; 9 | use crate::record::RecordBlobError; 10 | use crate::unroll::Block; 11 | 12 | /// Errors that can occur when accessing a symbol table. 13 | #[derive(Debug, Error)] 14 | pub enum SymtabError { 15 | /// The symbol table is missing its blob. 16 | #[error("malformed symbol table: missing blob")] 17 | MissingBlob, 18 | 19 | /// The blob containing the symbol table is invalid. 20 | #[error("invalid string table: {0}")] 21 | InvalidBlob(#[from] RecordBlobError), 22 | 23 | /// A generic mapping error occured. 24 | #[error("mapping error in string table")] 25 | Map(#[from] MapError), 26 | } 27 | 28 | /// Models the `SYMTAB_BLOCK` block. 29 | /// 30 | /// For now, this is an opaque block: it's really only used to accelerate LTO, 31 | /// so we don't attempt to expand its fields here. 32 | #[derive(Debug)] 33 | pub struct Symtab(Vec); 34 | 35 | impl AsRef<[u8]> for Symtab { 36 | fn as_ref(&self) -> &[u8] { 37 | &self.0 38 | } 39 | } 40 | 41 | impl TryFrom<&'_ Block> for Symtab { 42 | type Error = SymtabError; 43 | 44 | fn try_from(block: &'_ Block) -> Result { 45 | let symtab = block 46 | .records 47 | .one(SymtabCode::Blob as u64) 48 | .ok_or(SymtabError::MissingBlob) 49 | .and_then(|r| r.try_blob(0).map_err(SymtabError::from))?; 50 | 51 | Ok(Self(symtab)) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /llvm-mapper/src/block/type_table.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping the `TYPE_BLOCK_ID_NEW` block. 2 | 3 | use std::convert::TryFrom; 4 | 5 | use llvm_support::bitcodes::TypeCode; 6 | use llvm_support::{ 7 | AddressSpace, ArrayTypeError, FunctionTypeError, IntegerTypeError, PointerTypeError, 8 | StructTypeError, Type, VectorTypeError, 9 | }; 10 | use num_enum::TryFromPrimitiveError; 11 | use thiserror::Error; 12 | 13 | use crate::map::MapError; 14 | use crate::unroll::Block; 15 | 16 | /// Errors that can occur when mapping the type table. 17 | #[derive(Debug, Error)] 18 | pub enum TypeTableError { 19 | /// The size of the type table is invalid. 20 | #[error("invalid type table size (expected {0} elements, got {1})")] 21 | BadSize(usize, usize), 22 | 23 | /// An invalid type index was requested. 24 | #[error("invalid type table index: {0}")] 25 | BadIndex(usize), 26 | 27 | /// An unknown record code was seen. 28 | #[error("unknown type code")] 29 | UnknownTypeCode(#[from] TryFromPrimitiveError), 30 | 31 | /// The layout of the table itself (i.e., the record structures) is invalid. 32 | #[error("invalid type table structure (broken records)")] 33 | BadTable, 34 | 35 | /// An invalid integer type was seen. 36 | #[error("invalid integer type")] 37 | InvalidIntegerType(#[from] IntegerTypeError), 38 | 39 | /// An invalid pointer type was seen. 40 | #[error("invalid pointer type")] 41 | InvalidPointerType(#[from] PointerTypeError), 42 | 43 | /// An invalid array type was seen. 44 | #[error("invalid array type")] 45 | InvalidArrayType(#[from] ArrayTypeError), 46 | 47 | /// An invalid vector type was seen. 48 | #[error("invalid vector type")] 49 | InvalidVectorType(#[from] VectorTypeError), 50 | 51 | /// An invalid structure type was seen. 52 | #[error("invalid structure type")] 53 | InvalidStructType(#[from] StructTypeError), 54 | 55 | /// An invalid function type was seen. 56 | #[error("invalid function type")] 57 | InvalidFunctionType(#[from] FunctionTypeError), 58 | 59 | /// A generic mapping error occured. 60 | #[error("mapping error in string table")] 61 | Map(#[from] MapError), 62 | } 63 | 64 | /// A symbolic type reference, which is really just an index into some 65 | /// unspecified type table. 66 | #[derive(Debug)] 67 | pub(crate) struct TypeRef(pub(crate) usize); 68 | 69 | impl From for TypeRef { 70 | fn from(value: usize) -> TypeRef { 71 | TypeRef(value) 72 | } 73 | } 74 | 75 | impl From for TypeRef { 76 | fn from(value: u64) -> TypeRef { 77 | TypeRef::from(value as usize) 78 | } 79 | } 80 | 81 | /// Represents a "partial type," i.e. a type whose subtypes may be symbolic 82 | /// and not fully resolved against a type table. 83 | #[derive(Debug)] 84 | enum PartialType { 85 | Half, 86 | BFloat, 87 | Float, 88 | Double, 89 | Metadata, 90 | X86Fp80, 91 | Fp128, 92 | PpcFp128, 93 | Void, 94 | Label, 95 | X86Mmx, 96 | X86Amx, 97 | Token, 98 | Integer(PartialIntegerType), 99 | Function(PartialFunctionType), 100 | Pointer(PartialPointerType), 101 | OpaquePointer(AddressSpace), 102 | Struct(PartialStructType), 103 | Array(PartialArrayType), 104 | FixedVector(PartialVectorType), 105 | ScalableVector(PartialVectorType), 106 | } 107 | 108 | impl PartialType { 109 | /// Fallibly convert this `PartialType` into a `Type`, using the given 110 | /// `PartialTypeTable` as a reference. 111 | fn resolve(&self, partials: &PartialTypeTable) -> Result { 112 | match self { 113 | PartialType::Half => Ok(Type::Half), 114 | PartialType::BFloat => Ok(Type::BFloat), 115 | PartialType::Float => Ok(Type::Float), 116 | PartialType::Double => Ok(Type::Double), 117 | PartialType::Metadata => Ok(Type::Metadata), 118 | PartialType::X86Fp80 => Ok(Type::X86Fp80), 119 | PartialType::Fp128 => Ok(Type::Fp128), 120 | PartialType::PpcFp128 => Ok(Type::PpcFp128), 121 | PartialType::Void => Ok(Type::Void), 122 | PartialType::Label => Ok(Type::Label), 123 | PartialType::X86Mmx => Ok(Type::X86Mmx), 124 | PartialType::X86Amx => Ok(Type::X86Amx), 125 | PartialType::Token => Ok(Type::Token), 126 | PartialType::Integer(ity) => Ok(Type::new_integer(ity.bit_width)?), 127 | PartialType::Function(fty) => { 128 | let return_type = partials.resolve(&fty.return_type)?; 129 | let param_types = fty 130 | .param_types 131 | .iter() 132 | .map(|ty_ref| partials.resolve(ty_ref)) 133 | .collect::, _>>()?; 134 | 135 | Ok(Type::new_function(return_type, param_types, fty.is_vararg)?) 136 | } 137 | PartialType::Pointer(pty) => { 138 | let pointee = partials.resolve(&pty.pointee)?; 139 | 140 | Ok(Type::new_pointer(pointee, pty.address_space)?) 141 | } 142 | PartialType::OpaquePointer(oty) => Ok(Type::OpaquePointer(*oty)), 143 | PartialType::Struct(sty) => { 144 | let field_types = sty 145 | .field_types 146 | .iter() 147 | .map(|fty| partials.resolve(fty)) 148 | .collect::, _>>()?; 149 | 150 | Ok(Type::new_struct( 151 | sty.name.clone(), 152 | field_types, 153 | sty.is_packed, 154 | )?) 155 | } 156 | PartialType::Array(aty) => { 157 | let element_type = partials.resolve(&aty.element_type)?; 158 | 159 | Ok(Type::new_array(aty.num_elements, element_type)?) 160 | } 161 | PartialType::FixedVector(vty) => { 162 | log::debug!("vty: {:?}", vty); 163 | 164 | let element_type = partials.resolve(&vty.element_type)?; 165 | log::debug!("element_type: {:?}", partials.get(&vty.element_type)); 166 | 167 | Ok(Type::new_vector(vty.num_elements, element_type)?) 168 | } 169 | PartialType::ScalableVector(vty) => { 170 | let element_type = partials.resolve(&vty.element_type)?; 171 | 172 | Ok(Type::new_scalable_vector(vty.num_elements, element_type)?) 173 | } 174 | } 175 | } 176 | } 177 | 178 | #[derive(Debug)] 179 | struct PartialIntegerType { 180 | bit_width: u32, 181 | } 182 | 183 | /// Represents an (unresolved) function type. 184 | #[derive(Debug)] 185 | struct PartialFunctionType { 186 | return_type: TypeRef, 187 | param_types: Vec, 188 | is_vararg: bool, 189 | } 190 | 191 | /// Represents an (unresolved) pointer type. 192 | #[derive(Debug)] 193 | struct PartialPointerType { 194 | pointee: TypeRef, 195 | address_space: AddressSpace, 196 | } 197 | 198 | #[derive(Debug)] 199 | struct PartialStructType { 200 | name: Option, 201 | field_types: Vec, 202 | is_packed: bool, 203 | } 204 | 205 | #[derive(Debug)] 206 | struct PartialArrayType { 207 | num_elements: u64, 208 | element_type: TypeRef, 209 | } 210 | 211 | #[derive(Debug)] 212 | struct PartialVectorType { 213 | num_elements: u64, 214 | element_type: TypeRef, 215 | } 216 | 217 | /// Represents a partial type table. 218 | /// 219 | /// Every partial type table starts out empty (but with an expected ultimate size), 220 | /// and is incrementally updated as records within the type block are visited. 221 | #[derive(Debug)] 222 | struct PartialTypeTable { 223 | numentries: usize, 224 | inner: Vec, 225 | } 226 | 227 | impl PartialTypeTable { 228 | fn new(numentries: usize) -> Self { 229 | Self { 230 | numentries: numentries, 231 | inner: Vec::with_capacity(numentries), 232 | } 233 | } 234 | 235 | fn add(&mut self, ty: PartialType) { 236 | self.inner.push(ty) 237 | } 238 | 239 | fn last_mut(&mut self) -> Option<&mut PartialType> { 240 | self.inner.last_mut() 241 | } 242 | 243 | /// Fallibly convert a `TypeRef` into its `PartialType` in this partial type table. 244 | fn get(&self, ty_ref: &TypeRef) -> Result<&PartialType, TypeTableError> { 245 | self.inner 246 | .get(ty_ref.0) 247 | .ok_or(TypeTableError::BadIndex(ty_ref.0)) 248 | } 249 | 250 | /// Fallibly converts the given `TypeRef` into a fully owned `Type`. 251 | fn resolve(&self, ty_ref: &TypeRef) -> Result { 252 | // `TypeRef` resolution happens in two steps: we grab the corresponding 253 | // `PartialType`, and then resolve its subtypes. 254 | let pty = self.get(ty_ref)?; 255 | 256 | log::debug!("type ref {} resolves to {:?}", ty_ref.0, pty); 257 | 258 | pty.resolve(self) 259 | } 260 | 261 | /// Fallibly converts this `PartialTypeTable` into a `TypeTable`. 262 | fn reify(self) -> Result { 263 | if self.inner.len() != self.numentries { 264 | return Err(TypeTableError::BadSize(self.numentries, self.inner.len())); 265 | } 266 | 267 | // Walk the partial type table, resolving each partial type 268 | // into a fully owned `Type`. 269 | let types = self 270 | .inner 271 | .iter() 272 | .map(|pty| pty.resolve(&self)) 273 | .collect::, _>>()?; 274 | 275 | Ok(TypeTable(types)) 276 | } 277 | } 278 | 279 | /// Models the `TYPE_BLOCK_ID_NEW` block. 280 | #[derive(Clone, Debug)] 281 | pub struct TypeTable(Vec); 282 | 283 | impl TypeTable { 284 | pub(crate) fn get(&self, ty_ref: impl Into) -> Option<&Type> { 285 | let ty_ref = ty_ref.into(); 286 | self.0.get(ty_ref.0) 287 | } 288 | } 289 | 290 | impl TryFrom<&'_ Block> for TypeTable { 291 | type Error = TypeTableError; 292 | 293 | fn try_from(block: &Block) -> Result { 294 | // Figure out how many type entries we have, and reserve the space for them up-front. 295 | let numentries = *block 296 | .records 297 | .one(TypeCode::NumEntry) 298 | .ok_or(TypeTableError::BadTable) 299 | .and_then(|r| r.fields().first().ok_or(TypeTableError::BadTable))? 300 | as usize; 301 | 302 | // To map the type table, we perform two passes: 303 | // 1. We iterate over all type records, building an initial table of "partial" 304 | // types that contain only symbolic references to other types. 305 | // This pass allows us to fully resolve e.g. forward-declared types 306 | // without having to perform a more expensive visiting pass later. 307 | // 2. We iterate over all of the partial types, resolving them into 308 | // fully owned and expanded `Type`s. 309 | let mut partial_types = PartialTypeTable::new(numentries); 310 | let mut last_type_name = String::new(); 311 | for record in &block.records { 312 | // A convenience macro for turning a type record field access into an error on failure. 313 | macro_rules! type_field { 314 | ($n:literal) => { 315 | record 316 | .fields() 317 | .get($n) 318 | .copied() 319 | .ok_or(TypeTableError::BadTable)? 320 | }; 321 | } 322 | 323 | let code = TypeCode::try_from(record.code()).map_err(TypeTableError::from)?; 324 | 325 | match code { 326 | // Already visited; nothing to do. 327 | TypeCode::NumEntry => continue, 328 | TypeCode::Void => partial_types.add(PartialType::Void), 329 | TypeCode::Half => partial_types.add(PartialType::Half), 330 | TypeCode::BFloat => partial_types.add(PartialType::BFloat), 331 | TypeCode::Float => partial_types.add(PartialType::Float), 332 | TypeCode::Double => partial_types.add(PartialType::Double), 333 | TypeCode::Label => partial_types.add(PartialType::Label), 334 | TypeCode::Opaque => { 335 | // NOTE(ww): LLVM's BitcodeReader checks that the 336 | // TYPE_CODE_OPAQUE record has exactly one field, but 337 | // doesn't seem to use that field for anything. 338 | // Not sure what's up with that. 339 | 340 | if last_type_name.is_empty() { 341 | return Err(MapError::Invalid( 342 | "opaque type but no preceding type name".into(), 343 | ) 344 | .into()); 345 | } 346 | 347 | // Our opaque type might be forward-referenced. If so, we 348 | // fill in the previous type's name. Otherwise, we create 349 | // a new structure type with no body. 350 | if let Some(PartialType::Struct(s)) = partial_types.last_mut() { 351 | if s.name.is_some() { 352 | return Err(MapError::Invalid( 353 | "forward-declared opaque type already has name".into(), 354 | ) 355 | .into()); 356 | } 357 | 358 | s.name = Some(last_type_name.clone()); 359 | } else { 360 | partial_types.add(PartialType::Struct(PartialStructType { 361 | name: Some(last_type_name.clone()), 362 | field_types: vec![], 363 | is_packed: false, 364 | })); 365 | } 366 | 367 | last_type_name.clear(); 368 | } 369 | TypeCode::Integer => { 370 | let bit_width = type_field!(0) as u32; 371 | partial_types.add(PartialType::Integer(PartialIntegerType { bit_width })); 372 | } 373 | TypeCode::Pointer => { 374 | let pointee = TypeRef(type_field!(0) as usize); 375 | 376 | let address_space = AddressSpace::try_from(type_field!(1)).map_err(|e| { 377 | MapError::Invalid(format!("bad address space for pointer type: {:?}", e)) 378 | })?; 379 | 380 | partial_types.add(PartialType::Pointer(PartialPointerType { 381 | pointee, 382 | address_space, 383 | })); 384 | } 385 | TypeCode::FunctionOld => { 386 | // TODO(ww): These only show up in older bitcode, so don't bother with them for now. 387 | return Err(MapError::Unsupported( 388 | "unsupported: old function type codes; please implement!".into(), 389 | ) 390 | .into()); 391 | } 392 | TypeCode::Array => { 393 | let num_elements = type_field!(0); 394 | 395 | let element_type = TypeRef(type_field!(1) as usize); 396 | 397 | partial_types.add(PartialType::Array(PartialArrayType { 398 | num_elements, 399 | element_type, 400 | })); 401 | } 402 | TypeCode::Vector => { 403 | let num_elements = type_field!(0); 404 | 405 | let element_type = TypeRef(type_field!(1) as usize); 406 | 407 | // A vector type is either fixed or scalable, depending on the 408 | // third field (which can also be absent, indicating fixed). 409 | let scalable = record.fields().get(2).map_or_else(|| false, |f| *f > 0); 410 | let new_type = match scalable { 411 | true => PartialType::ScalableVector(PartialVectorType { 412 | num_elements, 413 | element_type, 414 | }), 415 | false => PartialType::FixedVector(PartialVectorType { 416 | num_elements, 417 | element_type, 418 | }), 419 | }; 420 | 421 | partial_types.add(new_type); 422 | } 423 | TypeCode::X86Fp80 => partial_types.add(PartialType::X86Fp80), 424 | TypeCode::Fp128 => partial_types.add(PartialType::Fp128), 425 | TypeCode::PpcFp128 => partial_types.add(PartialType::PpcFp128), 426 | TypeCode::Metadata => partial_types.add(PartialType::Metadata), 427 | TypeCode::X86Mmx => partial_types.add(PartialType::X86Mmx), 428 | TypeCode::StructAnon => { 429 | let is_packed = type_field!(0) > 0; 430 | 431 | let field_types = record.fields()[1..] 432 | .iter() 433 | .map(|f| TypeRef(*f as usize)) 434 | .collect::>(); 435 | 436 | partial_types.add(PartialType::Struct(PartialStructType { 437 | name: None, 438 | field_types, 439 | is_packed, 440 | })); 441 | } 442 | TypeCode::StructName => { 443 | // A `TYPE_CODE_STRUCT_NAME` is not a type in its own right; it merely 444 | // supplies the name for a future type record. 445 | last_type_name.push_str(&record.try_string(0).map_err(MapError::RecordString)?); 446 | continue; 447 | } 448 | TypeCode::StructNamed => { 449 | // TODO(ww): Should probably be deduped with StructAnon above, 450 | // since they're 90% identical. 451 | 452 | let is_packed = type_field!(0) > 0; 453 | 454 | let field_types = record.fields()[1..] 455 | .iter() 456 | .map(|f| TypeRef(*f as usize)) 457 | .collect::>(); 458 | 459 | // Like with opaque types, we might be forward-referenced here. 460 | // If so, we update our pre-existing structure type with its 461 | // correct name and fields. 462 | if let Some(PartialType::Struct(s)) = partial_types.last_mut() { 463 | if s.name.is_some() || !s.field_types.is_empty() { 464 | return Err(MapError::Invalid( 465 | "forward-declared struct type already has name and/or type fields" 466 | .into(), 467 | ) 468 | .into()); 469 | } 470 | 471 | s.name = Some(last_type_name.clone()); 472 | s.field_types = field_types; 473 | } else { 474 | partial_types.add(PartialType::Struct(PartialStructType { 475 | name: Some(last_type_name.clone()), 476 | field_types, 477 | is_packed, 478 | })); 479 | } 480 | 481 | last_type_name.clear(); 482 | } 483 | TypeCode::Function => { 484 | let is_vararg = type_field!(0) > 0; 485 | let return_type = TypeRef(type_field!(1) as usize); 486 | 487 | let param_types = record.fields()[2..] 488 | .iter() 489 | .map(|f| TypeRef(*f as usize)) 490 | .collect::>(); 491 | 492 | partial_types.add(PartialType::Function(PartialFunctionType { 493 | return_type, 494 | param_types, 495 | is_vararg, 496 | })); 497 | } 498 | TypeCode::Token => partial_types.add(PartialType::Token), 499 | TypeCode::X86Amx => partial_types.add(PartialType::X86Amx), 500 | TypeCode::OpaquePointer => { 501 | let address_space = AddressSpace::try_from(type_field!(0)).map_err(|e| { 502 | MapError::Invalid(format!("bad address space in type: {:?}", e)) 503 | })?; 504 | 505 | partial_types.add(PartialType::OpaquePointer(address_space)) 506 | } 507 | } 508 | } 509 | 510 | partial_types.reify() 511 | } 512 | } 513 | -------------------------------------------------------------------------------- /llvm-mapper/src/block/vst.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping `VALUE_SYMTAB_BLOCK_ID` blocks. 2 | //! 3 | //! These blocks contain "value symbol tables," which are effectively 4 | //! mappings between strings and value models (in LLVM, `llvm::Value`s). 5 | 6 | use std::convert::TryFrom; 7 | 8 | use thiserror::Error; 9 | 10 | use crate::unroll::Block; 11 | 12 | /// Errors that can occur when mapping or accessing a VST. 13 | #[derive(Debug, Error)] 14 | pub enum VstError {} 15 | 16 | /// A ZST representing a "module-style" VST. 17 | /// 18 | /// This is a ZST instead of an enum variant to make dispatch on the "style" of VST 19 | /// being parsed slightly more static and readable. 20 | pub struct ModuleStyleVst; 21 | 22 | /// A ZST reprsenting a "function-style" VST. 23 | /// 24 | /// See [`ModuleStyleVst`] for the design justification here. 25 | pub struct FunctionStyleVst; 26 | 27 | /// Represents a single value symbol table ("VST") in a bitcode module. 28 | pub struct Vst {} 29 | 30 | impl TryFrom<(&'_ Block, ModuleStyleVst)> for Vst { 31 | type Error = VstError; 32 | 33 | fn try_from((_block, _): (&'_ Block, ModuleStyleVst)) -> Result { 34 | Ok(Vst {}) 35 | // unimplemented!(); 36 | } 37 | } 38 | 39 | impl TryFrom<(&'_ Block, FunctionStyleVst)> for Vst { 40 | type Error = VstError; 41 | 42 | fn try_from((_block, _): (&'_ Block, FunctionStyleVst)) -> Result { 43 | Ok(Vst {}) 44 | // unimplemented!(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /llvm-mapper/src/error.rs: -------------------------------------------------------------------------------- 1 | //! Errors for `llvm-mapper`. 2 | 3 | use llvm_bitstream::error::Error as BitstreamError; 4 | use thiserror::Error as ThisError; 5 | 6 | use crate::block::BlockMapError; 7 | 8 | /// All possible errors that can occur while mapping a bitstream. 9 | /// 10 | /// The error variants here are deeply nested. 11 | #[non_exhaustive] 12 | #[derive(Debug, ThisError)] 13 | pub enum Error { 14 | /// We encountered an error while performing the underlying bitstream parse. 15 | #[error("error while parsing the bitstream")] 16 | Parse(#[from] BitstreamError), 17 | 18 | /// We couldn't unroll the stream because of a structural error. 19 | #[error("error while unrolling the bitstream: {0}")] 20 | Unroll(String), 21 | 22 | /// We couldn't perform the bitstream map. 23 | #[error("error while mapping the bitsteam")] 24 | Map(#[from] BlockMapError), 25 | } 26 | -------------------------------------------------------------------------------- /llvm-mapper/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `llvm-mapper` is a library for mapping entities in LLVM's bitstream 2 | //! format into higher-level IR and bitcode metadata models. 3 | 4 | #![deny(rustdoc::broken_intra_doc_links)] 5 | #![deny(missing_docs)] 6 | #![allow(clippy::redundant_field_names)] 7 | #![forbid(unsafe_code)] 8 | 9 | pub mod block; 10 | pub mod error; 11 | pub mod map; 12 | pub mod record; 13 | pub mod unroll; 14 | 15 | pub use unroll::Bitcode; 16 | -------------------------------------------------------------------------------- /llvm-mapper/src/map.rs: -------------------------------------------------------------------------------- 1 | //! Traits for mapping bitstream types to models. 2 | 3 | use thiserror::Error; 4 | 5 | use crate::block::Strtab; 6 | use crate::block::{AttributeGroups, Attributes, TypeTable}; 7 | use crate::record::{Comdat, DataLayout, RecordStringError}; 8 | use crate::unroll::ConsistencyError; 9 | 10 | /// Generic errors that can occur when mapping. 11 | #[derive(Debug, Error)] 12 | pub enum MapError { 13 | /// We couldn't map a block, for any number of reasons. 14 | #[error("error while mapping block: {0}")] 15 | BadBlockMap(String), 16 | 17 | /// We encountered an inconsistent block or record state. 18 | #[error("inconsistent block or record state")] 19 | Inconsistent(#[from] ConsistencyError), 20 | 21 | /// We encountered an unsupported feature or layout. 22 | #[error("unsupported: {0}")] 23 | Unsupported(String), 24 | 25 | /// We encountered an invalid state or combination of states. 26 | /// 27 | /// This variant should be used extremely sparingly. 28 | #[error("invalid: {0}")] 29 | Invalid(String), 30 | 31 | /// We couldn't extract a string from a record. 32 | #[error("error while extracting string: {0}")] 33 | RecordString(#[from] RecordStringError), 34 | 35 | /// We don't have the appropriate context for a mapping operation. 36 | #[error("missing context for mapping")] 37 | Context(#[from] MapCtxError), 38 | } 39 | 40 | /// Errors that can occur when accessing a [`MapCtx`](MapCtx). 41 | #[derive(Debug, Error)] 42 | pub enum MapCtxError { 43 | /// The version field is needed, but unavailable. 44 | #[error("mapping context requires a version for disambiguation, but none is available")] 45 | NoVersion, 46 | 47 | /// The type table is needed, but unavailable. 48 | #[error("mapping context requires types, but none are available")] 49 | NoTypeTable, 50 | } 51 | 52 | /// A mushy container for various bits of state that are necessary for 53 | /// correct block and record mapping in the context of a particular IR module. 54 | /// 55 | /// This is the "partial" counterpart to the [`MapCtx`](MapCtx) structure, 56 | /// which is produced from this structure with a call to [`reify`](PartialMapCtx::reify). 57 | #[non_exhaustive] 58 | #[derive(Debug, Default)] 59 | pub(crate) struct PartialMapCtx { 60 | pub(crate) version: Option, 61 | pub(crate) datalayout: DataLayout, 62 | pub(crate) section_table: Vec, 63 | pub(crate) gc_table: Vec, 64 | pub(crate) strtab: Strtab, 65 | pub(crate) attribute_groups: AttributeGroups, 66 | pub(crate) attributes: Attributes, 67 | pub(crate) type_table: Option, 68 | pub(crate) comdats: Vec, 69 | } 70 | 71 | impl PartialMapCtx { 72 | pub(crate) fn reify(&self) -> Result { 73 | log::debug!("reifying {self:?}"); 74 | Ok(MapCtx { 75 | version: self.version.ok_or(MapCtxError::NoVersion)?, 76 | datalayout: &self.datalayout, 77 | section_table: &self.section_table, 78 | gc_table: &self.gc_table, 79 | strtab: &self.strtab, 80 | attribute_groups: &self.attribute_groups, 81 | attributes: &self.attributes, 82 | type_table: self.type_table.as_ref().ok_or(MapCtxError::NoTypeTable)?, 83 | comdats: &self.comdats, 84 | }) 85 | } 86 | 87 | /// A helper function for whether or not to use an associated string table for string lookups. 88 | /// 89 | /// This corresponds to `MODULE_CODE_VERSION`s of 2 and higher. 90 | pub fn use_strtab(&self) -> Result { 91 | self.version.map(|v| v >= 2).ok_or(MapCtxError::NoVersion) 92 | } 93 | 94 | /// Returns the attribute groups stored in this context, or an error if not available. 95 | pub fn attribute_groups(&self) -> &AttributeGroups { 96 | &self.attribute_groups 97 | } 98 | } 99 | 100 | /// A handle for various bits of state that are necessary for correct block 101 | /// and record mapping in the context of a particular IR module. 102 | /// 103 | /// Block and record mapping operations are expected to update the supplied context, 104 | /// as appropriate. 105 | #[non_exhaustive] 106 | #[derive(Debug)] 107 | pub struct MapCtx<'ctx> { 108 | /// The `MODULE_CODE_VERSION` for the IR module being mapped. 109 | pub version: u64, 110 | 111 | /// The datalayout specification. 112 | pub datalayout: &'ctx DataLayout, 113 | 114 | /// The section table. 115 | pub section_table: &'ctx [String], 116 | 117 | /// The GC table. 118 | pub gc_table: &'ctx [String], 119 | 120 | /// The string table. 121 | pub strtab: &'ctx Strtab, 122 | 123 | /// Any attribute groups. 124 | pub attribute_groups: &'ctx AttributeGroups, 125 | 126 | /// Any raw attributes. 127 | pub attributes: &'ctx Attributes, 128 | 129 | /// The type table. 130 | pub type_table: &'ctx TypeTable, 131 | 132 | /// The COMDAT list. 133 | pub comdats: &'ctx [Comdat], 134 | // TODO(ww): Maybe symtab and identification in here? 135 | } 136 | 137 | impl MapCtx<'_> { 138 | /// A helper function for whether or not to use an associated string table for string lookups. 139 | /// 140 | /// This corresponds to `MODULE_CODE_VERSION`s of 2 and higher. 141 | pub fn use_strtab(&self) -> bool { 142 | self.version >= 2 143 | } 144 | 145 | /// A helper function for determining how operands are encoded. 146 | /// 147 | /// This corresponds to `MODULE_CODE_VERSION`s of 1 and higher. 148 | pub fn use_relative_ids(&self) -> bool { 149 | self.version >= 1 150 | } 151 | } 152 | 153 | /// A trait for mapping some raw `T` into a model type. 154 | /// 155 | /// This trait allows an implementer to modify the given [`PartialMapCtx`](PartialMapCtx), 156 | /// filling it in with state before it's reified into a "real" [`MapCtx`](MapCtx). 157 | /// 158 | /// This two-stage process is designed to limit the number of invalid 159 | /// states that a `MapCtx` can be in, and to enable better lifetimes 160 | /// later in the IR module mapping process. 161 | pub(crate) trait PartialCtxMappable: Sized { 162 | type Error; 163 | 164 | /// Attempt to map `T` into `Self` using the given [`PartialMapCtx`](PartialMapCtx). 165 | fn try_map(raw: &T, ctx: &mut PartialMapCtx) -> Result; 166 | } 167 | 168 | /// A trait for mapping some raw `T` into a model type. 169 | /// 170 | /// Implementing this trait is *almost* always preferable over 171 | /// [`PartialCtxMappable`](PartialCtxMappable) -- the former should really only 172 | /// be used when a mapping implementation **absolutely** must modify its 173 | /// [`MapCtx`](MapCtx), which should only happen early in IR module parsing. 174 | pub(crate) trait CtxMappable<'ctx, T>: Sized { 175 | type Error; 176 | 177 | /// Attempt to map `T` into `Self` using the given [`MapCtx`](MapCtx). 178 | fn try_map(raw: &T, ctx: &'ctx MapCtx) -> Result; 179 | } 180 | -------------------------------------------------------------------------------- /llvm-mapper/src/record/alias.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping the `MODULE_CODE_ALIAS` record. 2 | 3 | use std::convert::TryFrom; 4 | 5 | use llvm_support::{ 6 | DllStorageClass, Linkage, RuntimePreemption, ThreadLocalMode, Type, UnnamedAddr, Visibility, 7 | }; 8 | use num_enum::TryFromPrimitiveError; 9 | use thiserror::Error; 10 | 11 | use crate::map::{CtxMappable, MapCtx}; 12 | use crate::record::StrtabError; 13 | use crate::unroll::Record; 14 | 15 | /// Errors that can occur while mapping an alias record. 16 | #[derive(Debug, Error)] 17 | pub enum AliasError { 18 | /// The alias record is too short to be well-formed. 19 | #[error("alias record too short: {0} < 5 fields")] 20 | TooShort(usize), 21 | 22 | /// The alias record is in an old unsupported format. 23 | #[error("unsupported alias record format (v1)")] 24 | V1Unsupported, 25 | 26 | /// Retrieving a string from a string table failed. 27 | #[error("error while accessing string table")] 28 | Strtab(#[from] StrtabError), 29 | 30 | /// The alias has a bad or unknown type. 31 | #[error("invalid type table index: {0}")] 32 | Type(u64), 33 | 34 | /// The alias has an invalid visibility. 35 | #[error("invalid visibility")] 36 | Visibility(#[from] TryFromPrimitiveError), 37 | 38 | /// The alias has an invalid DLL storage class. 39 | #[error("invalid storage class")] 40 | DllStorageClass(#[from] TryFromPrimitiveError), 41 | } 42 | 43 | /// Models the `MODULE_CODE_ALIAS` record. 44 | #[derive(Debug)] 45 | pub struct Alias<'ctx> { 46 | /// The alias's name. 47 | pub name: &'ctx str, 48 | 49 | /// The alias's type. 50 | pub ty: &'ctx Type, 51 | 52 | /// The aliasee value index. 53 | pub value_index: u64, 54 | 55 | /// The alias's linkage. 56 | pub linkage: Linkage, 57 | 58 | /// The alias's visibility. 59 | pub visibility: Visibility, 60 | 61 | /// The alias's storage class. 62 | pub storage_class: DllStorageClass, 63 | 64 | /// The alias's thread local storage mode. 65 | pub tls_mode: ThreadLocalMode, 66 | 67 | /// The alias's `unnamed_addr` specifier. 68 | pub unnamed_addr: UnnamedAddr, 69 | 70 | /// The alias's preemption specifier. 71 | pub preemption_specifier: RuntimePreemption, 72 | } 73 | 74 | impl<'ctx> CtxMappable<'ctx, Record> for Alias<'ctx> { 75 | type Error = AliasError; 76 | 77 | fn try_map(record: &Record, ctx: &'ctx MapCtx) -> Result { 78 | let fields = record.fields(); 79 | 80 | if !ctx.use_strtab() { 81 | return Err(AliasError::V1Unsupported); 82 | } 83 | 84 | // Every alias record has at least 5 fields, corresponding to 85 | // [strtab_offset, strtab_size, *v1], where v1 has 3 mandatory fields: 86 | // [alias type, aliasee value#, linkage, ...] 87 | if fields.len() < 5 { 88 | return Err(AliasError::TooShort(fields.len())); 89 | } 90 | 91 | let name = ctx.strtab.read_name(record)?; 92 | let ty = ctx 93 | .type_table 94 | .get(fields[2]) 95 | .ok_or(AliasError::Type(fields[2]))?; 96 | let value_index = fields[3]; 97 | let linkage = Linkage::from(fields[4]); 98 | 99 | let visibility = fields 100 | .get(5) 101 | .map_or_else(|| Ok(Visibility::Default), |v| Visibility::try_from(*v))?; 102 | 103 | let storage_class = fields.get(6).map_or_else( 104 | || Ok(DllStorageClass::Default), 105 | |v| DllStorageClass::try_from(*v), 106 | )?; 107 | 108 | let tls_mode = fields 109 | .get(7) 110 | .copied() 111 | .map(ThreadLocalMode::from) 112 | .unwrap_or(ThreadLocalMode::NotThreadLocal); 113 | 114 | let unnamed_addr = fields 115 | .get(8) 116 | .copied() 117 | .map(UnnamedAddr::from) 118 | .unwrap_or(UnnamedAddr::None); 119 | 120 | let preemption_specifier = fields 121 | .get(9) 122 | .copied() 123 | .map(RuntimePreemption::from) 124 | .unwrap_or(RuntimePreemption::DsoPreemptable); 125 | 126 | Ok(Alias { 127 | name, 128 | ty, 129 | value_index, 130 | linkage, 131 | visibility, 132 | storage_class, 133 | tls_mode, 134 | unnamed_addr, 135 | preemption_specifier, 136 | }) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /llvm-mapper/src/record/comdat.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping the `MODULE_CODE_COMDAT` record. 2 | 3 | use std::convert::TryInto; 4 | 5 | use llvm_support::StrtabRef; 6 | use num_enum::{TryFromPrimitive, TryFromPrimitiveError}; 7 | use thiserror::Error; 8 | 9 | use crate::block::strtab::StrtabError; 10 | use crate::map::{MapError, PartialCtxMappable, PartialMapCtx}; 11 | use crate::unroll::Record; 12 | 13 | /// Errors that can occur when mapping a COMDAT record. 14 | #[non_exhaustive] 15 | #[derive(Debug, Error)] 16 | pub enum ComdatError { 17 | /// The COMDAT record is in an old unsupported format. 18 | #[error("unsupported COMDAT record format (v1)")] 19 | V1Unsupported, 20 | 21 | /// The COMDAT record is too short. 22 | #[error("COMDAT record doesn't have enough fields ({0} < 3)")] 23 | TooShort(usize), 24 | 25 | /// We couldn't get the COMDAT's name from the string table. 26 | #[error("error while accessing COMDAT name: {0}")] 27 | Name(#[from] StrtabError), 28 | 29 | /// The COMDAT's selection kind is invalid or unknown. 30 | #[error("unknown or invalid COMDAT selection kind: {0}")] 31 | SelectionKind(#[from] TryFromPrimitiveError), 32 | 33 | /// A generic mapping error occured. 34 | #[error("mapping error in comdat list")] 35 | Map(#[from] MapError), 36 | } 37 | 38 | /// The different kinds of COMDAT selections. 39 | /// 40 | /// This is a nearly direct copy of LLVM's `SelectionKind`; see `IR/Comdat.h`. 41 | #[non_exhaustive] 42 | #[derive(Debug, TryFromPrimitive)] 43 | #[repr(u64)] 44 | pub enum SelectionKind { 45 | /// The linker may choose any COMDAT. 46 | Any, 47 | /// The data referenced by the COMDAT must be the same. 48 | ExactMatch, 49 | /// The linker will choose the largest COMDAT. 50 | Largest, 51 | /// No deduplication is performed. 52 | NoDeduplicate, 53 | /// The data referenced by the COMDAT must be the same size. 54 | SameSize, 55 | } 56 | 57 | /// Models the `MODULE_CODE_COMDAT` record. 58 | #[non_exhaustive] 59 | #[derive(Debug)] 60 | pub struct Comdat { 61 | /// The selection kind for this COMDAT. 62 | pub selection_kind: SelectionKind, 63 | /// The COMDAT key. 64 | pub name: String, 65 | } 66 | 67 | impl PartialCtxMappable for Comdat { 68 | type Error = ComdatError; 69 | 70 | fn try_map(record: &Record, ctx: &mut PartialMapCtx) -> Result { 71 | if !ctx.use_strtab().map_err(MapError::Context)? { 72 | return Err(ComdatError::V1Unsupported); 73 | } 74 | 75 | // v2: [strtab offset, strtab size, selection kind] 76 | if record.fields().len() != 3 { 77 | return Err(ComdatError::TooShort(record.fields().len())); 78 | } 79 | 80 | // Index safety: we check for at least 3 fields above. 81 | let name = { 82 | let sref: StrtabRef = (record.fields()[0], record.fields()[1]).into(); 83 | ctx.strtab.try_get(&sref)?.into() 84 | }; 85 | let selection_kind: SelectionKind = record.fields()[2].try_into()?; 86 | 87 | Ok(Self { 88 | selection_kind, 89 | name: name, 90 | }) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /llvm-mapper/src/record/datalayout.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping the `MODULE_CODE_DATALAYOUT` record. 2 | 3 | use std::convert::{TryFrom, TryInto}; 4 | use std::num::ParseIntError; 5 | use std::str::FromStr; 6 | 7 | use llvm_support::{ 8 | AddressSpace, AddressSpaceError, Align, AlignError, AlignSpecError, Endian, 9 | FunctionPointerAlign, Mangling, PointerAlignSpec, PointerAlignSpecs, TypeAlignSpec, 10 | TypeAlignSpecs, 11 | }; 12 | use thiserror::Error; 13 | 14 | use crate::map::{PartialCtxMappable, PartialMapCtx}; 15 | use crate::record::RecordStringError; 16 | use crate::unroll::Record; 17 | 18 | /// Potential errors when parsing an LLVM datalayout string. 19 | #[derive(Debug, Error)] 20 | pub enum DataLayoutError { 21 | /// The datalayout string can't be extracted from the record. 22 | #[error("malformed datalayout record: {0}")] 23 | BadString(#[from] RecordStringError), 24 | /// The specified alignment is invalid. 25 | #[error("bad alignment value: {0}")] 26 | BadAlign(#[from] AlignError), 27 | /// The specified address space is invalid. 28 | #[error("bad address space")] 29 | BadAddressSpace(#[from] AddressSpaceError), 30 | /// An unknown specification was encountered. 31 | #[error("unknown datalayout specification: {0}")] 32 | UnknownSpec(char), 33 | /// An empty specification was encountered. 34 | #[error("empty specification in datalayout")] 35 | EmptySpec, 36 | /// The datalayout string isn't in ASCII. 37 | #[error("non-ASCII characters in datalayout string")] 38 | BadEncoding, 39 | /// We couldn't parse a field as an integer. 40 | #[error("couldn't parse spec field: {0}")] 41 | BadInt(#[from] ParseIntError), 42 | /// We couldn't parse an individual spec, for some reason. 43 | #[error("couldn't parse spec: {0}")] 44 | BadSpecParse(String), 45 | /// We couldn't parse an alignment spec. 46 | #[error("cou't parse alignment spec: {0}")] 47 | BadAlignSpec(#[from] AlignSpecError), 48 | } 49 | 50 | /// Models the `MODULE_CODE_DATALAYOUT` record. 51 | #[non_exhaustive] 52 | #[derive(Debug)] 53 | pub struct DataLayout { 54 | /// The endianness of the target. 55 | pub endianness: Endian, 56 | /// The target's natural stack alignment, if present. 57 | pub natural_stack_alignment: Option, 58 | /// The address space for program memory. 59 | pub program_address_space: AddressSpace, 60 | /// The address space for global variables. 61 | pub global_variable_address_space: AddressSpace, 62 | /// The address space for objects created by `alloca`. 63 | pub alloca_address_space: AddressSpace, 64 | /// Non-pointer type alignment specifications for the target. 65 | pub type_alignments: TypeAlignSpecs, 66 | /// Pointer alignment specifications for the target. 67 | pub pointer_alignments: PointerAlignSpecs, 68 | /// Aggregate alignment for the target. 69 | pub aggregate_alignment: Align, 70 | /// Function pointer alignment for the target, if present. 71 | pub function_pointer_alignment: Option, 72 | /// The target's symbol mangling discipline, if present. 73 | pub mangling: Option, 74 | /// A list of integer widths (in bits) that are efficiently supported by the target. 75 | pub native_integer_widths: Vec, 76 | /// A list of address spaces that use non-integral pointers. 77 | pub non_integral_address_spaces: Vec, 78 | } 79 | 80 | impl Default for DataLayout { 81 | fn default() -> Self { 82 | Self { 83 | endianness: Endian::Big, 84 | natural_stack_alignment: None, 85 | program_address_space: Default::default(), 86 | global_variable_address_space: Default::default(), 87 | alloca_address_space: Default::default(), 88 | type_alignments: TypeAlignSpecs::default(), 89 | pointer_alignments: PointerAlignSpecs::default(), 90 | aggregate_alignment: Align::ALIGN8, 91 | function_pointer_alignment: None, 92 | mangling: None, 93 | native_integer_widths: vec![], 94 | non_integral_address_spaces: vec![], 95 | } 96 | } 97 | } 98 | 99 | impl FromStr for DataLayout { 100 | type Err = DataLayoutError; 101 | 102 | fn from_str(value: &str) -> Result { 103 | if !value.is_ascii() { 104 | return Err(DataLayoutError::BadEncoding); 105 | } 106 | 107 | let mut datalayout = Self::default(); 108 | for spec in value.split('-') { 109 | if spec.is_empty() { 110 | return Err(DataLayoutError::EmptySpec); 111 | } 112 | 113 | let body = &spec[1..]; 114 | 115 | // Unwrap safety: we check for a nonempty spec above. 116 | #[allow(clippy::unwrap_used)] 117 | match spec.chars().next().unwrap() { 118 | 'e' => datalayout.endianness = Endian::Little, 119 | 'E' => datalayout.endianness = Endian::Big, 120 | 'S' => { 121 | datalayout.natural_stack_alignment = 122 | Some(Align::from_bit_align(body.parse::()?)?); 123 | } 124 | 'P' => { 125 | datalayout.program_address_space = body.parse::()?.try_into()?; 126 | } 127 | 'G' => { 128 | datalayout.global_variable_address_space = body.parse::()?.try_into()?; 129 | } 130 | 'A' => { 131 | datalayout.alloca_address_space = body.parse::()?.try_into()?; 132 | } 133 | 'p' => { 134 | // Pass the entire spec in here, since we need the spec identifier as well. 135 | let align_spec = spec.parse::()?; 136 | datalayout.pointer_alignments.update(align_spec); 137 | } 138 | 'i' | 'v' | 'f' | 'a' => { 139 | // Pass the entire spec in here, since we need the spec identifier as well. 140 | let align_spec = spec.parse::()?; 141 | datalayout.type_alignments.update(align_spec); 142 | } 143 | 'F' => match body.chars().next() { 144 | Some(id) => { 145 | let align = Align::from_bit_align(body[1..].parse::()?)?; 146 | let align = match id { 147 | 'i' => FunctionPointerAlign::Independent { 148 | abi_alignment: align, 149 | }, 150 | 'n' => FunctionPointerAlign::MultipleOfFunctionAlign { 151 | abi_alignment: align, 152 | }, 153 | o => { 154 | return Err(DataLayoutError::BadSpecParse(format!( 155 | "unknown function pointer alignment specifier: {}", 156 | o 157 | ))) 158 | } 159 | }; 160 | datalayout.function_pointer_alignment = Some(align); 161 | } 162 | None => { 163 | return Err(DataLayoutError::BadSpecParse( 164 | "function pointer alignment spec is empty".into(), 165 | )) 166 | } 167 | }, 168 | 'm' => { 169 | // The mangling spec is `m:X`, where `X` is the mangling kind. 170 | // We've already parsed `m`, so we expect exactly two characters. 171 | let mut mangling = body.chars().take(2); 172 | match mangling.next() { 173 | Some(':') => {} 174 | Some(u) => { 175 | return Err(DataLayoutError::BadSpecParse(format!( 176 | "bad separator for mangling spec: {}", 177 | u 178 | ))) 179 | } 180 | None => { 181 | return Err(DataLayoutError::BadSpecParse( 182 | "mangling spec is empty".into(), 183 | )) 184 | } 185 | } 186 | 187 | // TODO(ww): This could be FromStr on Mangling. 188 | let kind = match mangling.next() { 189 | None => { 190 | return Err(DataLayoutError::BadSpecParse( 191 | "mangling spec has no mangling kind".into(), 192 | )) 193 | } 194 | Some('e') => Mangling::Elf, 195 | Some('m') => Mangling::Mips, 196 | Some('o') => Mangling::Macho, 197 | Some('x') => Mangling::WindowsX86Coff, 198 | Some('w') => Mangling::WindowsCoff, 199 | Some('a') => Mangling::XCoff, 200 | Some(u) => { 201 | return Err(DataLayoutError::BadSpecParse(format!( 202 | "unknown mangling kind in spec: {}", 203 | u 204 | ))) 205 | } 206 | }; 207 | 208 | datalayout.mangling = Some(kind); 209 | } 210 | 'n' => { 211 | // 'n' marks the start of either an 'n' or an 'ni' block. 212 | match body.chars().next() { 213 | Some('i') => { 214 | if body.len() <= 1 { 215 | return Err(DataLayoutError::BadSpecParse( 216 | "cannot find address space 0".into(), 217 | )); 218 | } 219 | datalayout.non_integral_address_spaces = body[2..] 220 | .split(':') 221 | .map(|s| { 222 | s.parse::() 223 | .map_err(DataLayoutError::from) 224 | .and_then(|a| AddressSpace::try_from(a).map_err(Into::into)) 225 | .and_then(|a| { 226 | if a == AddressSpace::default() { 227 | Err(DataLayoutError::BadSpecParse( 228 | "address space 0 cannot be non-integral".into(), 229 | )) 230 | } else { 231 | Ok(a) 232 | } 233 | }) 234 | }) 235 | .collect::>()? 236 | } 237 | Some(_) => { 238 | datalayout.native_integer_widths = body 239 | .split(':') 240 | .map(|s| s.parse::()) 241 | .collect::>()?; 242 | } 243 | None => { 244 | return Err(DataLayoutError::BadSpecParse( 245 | "integer width spec is empty".into(), 246 | )) 247 | } 248 | } 249 | } 250 | u => return Err(DataLayoutError::UnknownSpec(u)), 251 | } 252 | } 253 | 254 | Ok(datalayout) 255 | } 256 | } 257 | 258 | impl PartialCtxMappable for DataLayout { 259 | type Error = DataLayoutError; 260 | 261 | fn try_map(record: &Record, _ctx: &mut PartialMapCtx) -> Result { 262 | let datalayout = record.try_string(0)?; 263 | datalayout.parse::() 264 | } 265 | } 266 | 267 | #[cfg(test)] 268 | mod tests { 269 | use super::*; 270 | 271 | #[test] 272 | fn test_datalayout_has_defaults() { 273 | let dl = DataLayout::default(); 274 | 275 | assert_eq!(dl.type_alignments, TypeAlignSpecs::default()); 276 | assert_eq!(dl.pointer_alignments, PointerAlignSpecs::default()); 277 | } 278 | 279 | #[test] 280 | fn test_datalayout_parses() { 281 | { 282 | assert_eq!( 283 | "not ascii ¬∫˙˚√∂∆˙√ß" 284 | .parse::() 285 | .unwrap_err() 286 | .to_string(), 287 | "non-ASCII characters in datalayout string" 288 | ); 289 | 290 | assert_eq!( 291 | "z".parse::().unwrap_err().to_string(), 292 | "unknown datalayout specification: z" 293 | ); 294 | } 295 | 296 | { 297 | let dl = "E-S64".parse::().unwrap(); 298 | 299 | assert_eq!(dl.endianness, Endian::Big); 300 | assert_eq!(dl.natural_stack_alignment.unwrap().byte_align(), 8); 301 | assert!(dl.mangling.is_none()); 302 | } 303 | 304 | { 305 | let dl = "e-S32".parse::().unwrap(); 306 | 307 | assert_eq!(dl.endianness, Endian::Little); 308 | assert_eq!(dl.natural_stack_alignment.unwrap().byte_align(), 4); 309 | } 310 | 311 | { 312 | let dl = "m:e".parse::().unwrap(); 313 | 314 | assert_eq!(dl.mangling, Some(Mangling::Elf)); 315 | } 316 | 317 | { 318 | assert_eq!( 319 | "m".parse::().unwrap_err().to_string(), 320 | "couldn't parse spec: mangling spec is empty" 321 | ); 322 | 323 | assert_eq!( 324 | "m:".parse::().unwrap_err().to_string(), 325 | "couldn't parse spec: mangling spec has no mangling kind" 326 | ); 327 | 328 | assert_eq!( 329 | "m:?".parse::().unwrap_err().to_string(), 330 | "couldn't parse spec: unknown mangling kind in spec: ?" 331 | ); 332 | } 333 | 334 | { 335 | let dl = "Fi64".parse::().unwrap(); 336 | 337 | assert_eq!( 338 | dl.function_pointer_alignment, 339 | Some(FunctionPointerAlign::Independent { 340 | abi_alignment: Align::ALIGN64 341 | }) 342 | ); 343 | } 344 | 345 | { 346 | let dl = "Fn8".parse::().unwrap(); 347 | 348 | assert_eq!( 349 | dl.function_pointer_alignment, 350 | Some(FunctionPointerAlign::MultipleOfFunctionAlign { 351 | abi_alignment: Align::ALIGN8 352 | }) 353 | ); 354 | } 355 | 356 | { 357 | assert_eq!( 358 | "F".parse::().unwrap_err().to_string(), 359 | "couldn't parse spec: function pointer alignment spec is empty" 360 | ); 361 | 362 | assert_eq!( 363 | "Fn".parse::().unwrap_err().to_string(), 364 | "couldn't parse spec field: cannot parse integer from empty string" 365 | ); 366 | 367 | assert_eq!( 368 | "Fn123".parse::().unwrap_err().to_string(), 369 | "bad alignment value: supplied value is not a multiple of 8: 123" 370 | ); 371 | 372 | assert_eq!( 373 | "F?64".parse::().unwrap_err().to_string(), 374 | "couldn't parse spec: unknown function pointer alignment specifier: ?" 375 | ); 376 | } 377 | 378 | { 379 | let dl = "n8:16:32:64".parse::().unwrap(); 380 | 381 | assert_eq!(dl.native_integer_widths, vec![8, 16, 32, 64]); 382 | } 383 | 384 | { 385 | let dl = "n64".parse::().unwrap(); 386 | 387 | assert_eq!(dl.native_integer_widths, vec![64]); 388 | } 389 | 390 | { 391 | assert_eq!( 392 | "n".parse::().unwrap_err().to_string(), 393 | "couldn't parse spec: integer width spec is empty" 394 | ); 395 | 396 | assert_eq!( 397 | "nx".parse::().unwrap_err().to_string(), 398 | "couldn't parse spec field: invalid digit found in string" 399 | ); 400 | 401 | assert_eq!( 402 | "n:".parse::().unwrap_err().to_string(), 403 | "couldn't parse spec field: cannot parse integer from empty string" 404 | ); 405 | 406 | assert_eq!( 407 | "n8:".parse::().unwrap_err().to_string(), 408 | "couldn't parse spec field: cannot parse integer from empty string" 409 | ); 410 | } 411 | 412 | { 413 | let dl = "ni:1:10:20".parse::().unwrap(); 414 | 415 | assert_eq!( 416 | dl.non_integral_address_spaces, 417 | vec![ 418 | AddressSpace::try_from(1_u32).unwrap(), 419 | AddressSpace::try_from(10_u32).unwrap(), 420 | AddressSpace::try_from(20_u32).unwrap() 421 | ] 422 | ); 423 | } 424 | 425 | { 426 | let dl = "ni:1".parse::().unwrap(); 427 | 428 | assert_eq!( 429 | dl.non_integral_address_spaces, 430 | vec![AddressSpace::try_from(1_u32).unwrap(),] 431 | ); 432 | } 433 | 434 | { 435 | assert_eq!( 436 | "ni".parse::().unwrap_err().to_string(), 437 | "couldn't parse spec: cannot find address space 0" 438 | ); 439 | 440 | assert_eq!( 441 | "ni0".parse::().unwrap_err().to_string(), 442 | "couldn't parse spec field: cannot parse integer from empty string" 443 | ); 444 | } 445 | } 446 | } 447 | -------------------------------------------------------------------------------- /llvm-mapper/src/record/function.rs: -------------------------------------------------------------------------------- 1 | //! Functionality for mapping the `MODULE_CODE_FUNCTION` record. 2 | 3 | use std::convert::TryFrom; 4 | 5 | use llvm_support::{ 6 | AlignError, CallingConvention, DllStorageClass, FunctionType, Linkage, MaybeAlign, Type, 7 | UnnamedAddr, Visibility, 8 | }; 9 | use num_enum::TryFromPrimitiveError; 10 | use thiserror::Error; 11 | 12 | use crate::block::attributes::AttributeEntry; 13 | use crate::map::{CtxMappable, MapCtx}; 14 | use crate::record::StrtabError; 15 | use crate::unroll::Record; 16 | 17 | /// Errors that can occur when mapping a function record. 18 | #[derive(Debug, Error)] 19 | pub enum FunctionError { 20 | /// The function record is too short to be well-formed. 21 | #[error("function record too short: {0} < 10 fields")] 22 | TooShort(usize), 23 | 24 | /// The function record is in an old unsupported format. 25 | #[error("unsupported function record format (v1)")] 26 | V1Unsupported, 27 | 28 | /// Retrieving a string from a string table failed. 29 | #[error("error while accessing string table")] 30 | Strtab(#[from] StrtabError), 31 | 32 | /// This function has an unknown calling convention. 33 | #[error("unknown calling convention")] 34 | CallingConvention(#[from] TryFromPrimitiveError), 35 | 36 | /// The function has a bad or unknown type ID. 37 | #[error("invalid type table index: {0}")] 38 | TypeId(u64), 39 | 40 | /// The function has a non-function type. 41 | #[error("non-function type for function")] 42 | Type, 43 | 44 | /// The function has an invalid attribute entry ID. 45 | #[error("invalid attribute entry ID: {0}")] 46 | Attribute(u64), 47 | 48 | /// The function has an invalid alignment. 49 | #[error("invalid alignment")] 50 | Alignment(#[from] AlignError), 51 | 52 | /// The function has an invalid section table index. 53 | #[error("invalid section table index: {0}")] 54 | Section(usize), 55 | 56 | /// The function has an invalid visibility. 57 | #[error("invalid visibility")] 58 | Visibility(#[from] TryFromPrimitiveError), 59 | 60 | /// The function has an invalid GC table index. 61 | #[error("invalid GC table index: {0}")] 62 | Gc(usize), 63 | 64 | /// The function has an invalid DLL storage class. 65 | #[error("invalid storage class")] 66 | DllStorageClass(#[from] TryFromPrimitiveError), 67 | } 68 | 69 | /// Models the `MODULE_CODE_FUNCTION` record. 70 | #[non_exhaustive] 71 | #[derive(Debug)] 72 | pub struct Function<'ctx> { 73 | /// The function's name. 74 | pub name: &'ctx str, 75 | 76 | /// A reference to the function's type in the type table. 77 | pub ty: &'ctx FunctionType, 78 | 79 | /// The function's calling convention. 80 | pub calling_convention: CallingConvention, 81 | 82 | /// Whether the function is a declaration, or a full definition (with body). 83 | pub is_declaration: bool, 84 | 85 | /// The function's linkage. 86 | pub linkage: Linkage, 87 | 88 | /// The function's attributes, if it has any. 89 | pub attributes: Option<&'ctx AttributeEntry>, 90 | 91 | /// The function's alignment. 92 | pub alignment: MaybeAlign, 93 | 94 | /// The function's custom section, if it has one. 95 | pub section: Option<&'ctx str>, 96 | 97 | /// The function's visibility. 98 | pub visibility: Visibility, 99 | 100 | /// The function's garbage collector, if it has one. 101 | pub gc_name: Option<&'ctx str>, 102 | 103 | /// The function's `unnamed_addr` specifier. 104 | pub unnamed_addr: UnnamedAddr, 105 | 106 | /// The function's DLL storage class. 107 | pub storage_class: DllStorageClass, 108 | } 109 | 110 | impl<'ctx> CtxMappable<'ctx, Record> for Function<'ctx> { 111 | type Error = FunctionError; 112 | 113 | fn try_map(record: &Record, ctx: &'ctx MapCtx) -> Result { 114 | let fields = record.fields(); 115 | 116 | if !ctx.use_strtab() { 117 | return Err(FunctionError::V1Unsupported); 118 | } 119 | 120 | // Every function record has at least 10 fields, corresponding to 121 | // [strtab_offset, strtab_size, *v1], where v1 has 8 mandatory fields: 122 | // [type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, ...] 123 | if fields.len() < 10 { 124 | return Err(FunctionError::TooShort(fields.len())); 125 | } 126 | 127 | let name = ctx.strtab.read_name(record)?; 128 | let Type::Function(ty) = ctx 129 | .type_table 130 | .get(fields[2]) 131 | .ok_or(FunctionError::TypeId(fields[2]))? else { 132 | return Err(FunctionError::Type); 133 | }; 134 | let calling_convention = CallingConvention::try_from(fields[3])?; 135 | let is_declaration = fields[4] != 0; 136 | let linkage = Linkage::from(fields[5]); 137 | 138 | let attributes = { 139 | let paramattr = fields[6]; 140 | // An ID of 0 is a special sentinel for no attributes, 141 | // so any nonzero ID is a 1-based index. 142 | if paramattr == 0 { 143 | None 144 | } else { 145 | // NOTE(ww): This is more conservative than LLVM: LLVM treats an 146 | // unknown attribute ID as an empty set of attributes, 147 | // rather than a hard failure. 148 | Some( 149 | ctx.attributes 150 | .get(paramattr - 1) 151 | .ok_or(FunctionError::Attribute(paramattr))?, 152 | ) 153 | } 154 | }; 155 | 156 | // TODO: Upgrade attributes here? It's what LLVM does. 157 | 158 | let alignment = MaybeAlign::try_from(fields[7] as u8)?; 159 | 160 | let section = match fields[8] as usize { 161 | 0 => None, 162 | idx => Some( 163 | ctx.section_table 164 | .get(idx - 1) 165 | .map(AsRef::as_ref) 166 | .ok_or(FunctionError::Section(idx - 1))?, 167 | ), 168 | }; 169 | 170 | let visibility = Visibility::try_from(fields[9])?; 171 | 172 | // From here, all fields are optional and need to be guarded as such. 173 | 174 | let gc_name = fields 175 | .get(10) 176 | .and_then(|idx| match *idx as usize { 177 | 0 => None, 178 | idx => Some( 179 | ctx.gc_table 180 | .get(idx - 1) 181 | .map(AsRef::as_ref) 182 | .ok_or(FunctionError::Gc(idx - 1)), 183 | ), 184 | }) 185 | .transpose()?; 186 | 187 | let unnamed_addr = fields 188 | .get(11) 189 | .copied() 190 | .map(UnnamedAddr::from) 191 | .unwrap_or(UnnamedAddr::None); 192 | 193 | // fields[12]: prologuedata 194 | 195 | let storage_class = fields.get(13).map_or_else( 196 | || Ok(DllStorageClass::Default), 197 | |v| DllStorageClass::try_from(*v), 198 | )?; 199 | 200 | // fields[14]: comdat 201 | // fields[15]: prefixdata 202 | // fields[16]: personalityfn 203 | // fields[16]: preemptionspecifier 204 | 205 | Ok(Self { 206 | name, 207 | ty, 208 | calling_convention, 209 | is_declaration, 210 | linkage, 211 | attributes, 212 | alignment, 213 | section, 214 | visibility, 215 | gc_name, 216 | unnamed_addr, 217 | storage_class, 218 | }) 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /llvm-mapper/src/record/mod.rs: -------------------------------------------------------------------------------- 1 | //! Structures for mapping from bitstream records to LLVM models. 2 | //! 3 | //! Depending on their importance or complexity, not every record is given a dedicated 4 | //! structure or mapping implementation. Simpler records are mapped inline within their 5 | //! blocks. 6 | 7 | pub mod alias; 8 | pub mod comdat; 9 | pub mod datalayout; 10 | pub mod function; 11 | 12 | use std::num::TryFromIntError; 13 | use std::string::FromUtf8Error; 14 | 15 | use thiserror::Error; 16 | 17 | pub use self::alias::*; 18 | pub use self::comdat::*; 19 | pub use self::datalayout::*; 20 | pub use self::function::*; 21 | use crate::block::StrtabError; 22 | 23 | /// Potential errors when trying to extract a string from a record. 24 | #[non_exhaustive] 25 | #[derive(Debug, Error)] 26 | pub enum RecordStringError { 27 | /// The start index for the string is invalid. 28 | #[error("impossible string index: {0} >= {1} (field count)")] 29 | BadIndex(usize, usize), 30 | /// A field in the record is too large to fit in a byte. 31 | #[error("impossible character value in string: {0}")] 32 | BadCharacter(#[from] TryFromIntError), 33 | /// The string doesn't look like valid UTF-8. 34 | #[error("invalid string encoding: {0}")] 35 | BadEncoding(#[from] FromUtf8Error), 36 | } 37 | 38 | /// Potential errors when trying to extract a blob from a record. 39 | #[non_exhaustive] 40 | #[derive(Debug, Error)] 41 | pub enum RecordBlobError { 42 | /// The start index for the blob is invalid. 43 | #[error("impossible blob index: {0} >= {1} (field count)")] 44 | BadIndex(usize, usize), 45 | /// A field in the record is too large to fit in a byte. 46 | #[error("impossible byte value in blob: {0}")] 47 | BadByte(#[from] TryFromIntError), 48 | } 49 | -------------------------------------------------------------------------------- /llvm-support/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "llvm-support" 3 | description = "Support types and routines for parsing LLVM's bitcode" 4 | license = "MIT" 5 | homepage = "https://github.com/woodruffw/mollusc/tree/main/llvm-support" 6 | repository = "https://github.com/woodruffw/mollusc" 7 | authors = ["William Woodruff "] 8 | readme = "README.md" 9 | keywords = ["llvm", "parsing", "binary", "encoding"] 10 | categories = ["compilers", "encoding", "parsing"] 11 | edition = "2018" 12 | version = "0.0.3" 13 | 14 | [dependencies] 15 | paste = "1.0" 16 | thiserror = "1.0" 17 | num_enum = "0.6.0" 18 | -------------------------------------------------------------------------------- /llvm-support/README.md: -------------------------------------------------------------------------------- 1 | llvm-support 2 | ============ 3 | 4 | [![Crates.io](https://img.shields.io/crates/v/llvm-support)](https://crates.io/crates/llvm-support) 5 | [![Documentation](https://docs.rs/llvm-support/badge.svg)](https://docs.rs/llvm-support) 6 | 7 | Support types and routines for parsing LLVM's bitcode. 8 | 9 | This crate is roughly analogous to LLVM's `libSupport` in scope: it provides 10 | some essential invariant-preserving types for parsing, mapping, and validating 11 | LLVM bitcode. 12 | -------------------------------------------------------------------------------- /llvm-support/build.rs: -------------------------------------------------------------------------------- 1 | // This is an ugly little hack to get access to a reasonable "default" 2 | // target triple when loading bitcode inputs that don't mention their triple. 3 | // Based on: https://stackoverflow.com/a/51311222 4 | // Unwrap safety: None. If this fails, the build fails, and that's intended. 5 | #[allow(clippy::unwrap_used)] 6 | fn main() { 7 | println!( 8 | "cargo:rustc-env=TARGET_TRIPLE={}", 9 | std::env::var("TARGET").unwrap() 10 | ); 11 | } 12 | -------------------------------------------------------------------------------- /llvm-support/src/attribute.rs: -------------------------------------------------------------------------------- 1 | //! Support code for LLVM attributes. 2 | 3 | use num_enum::TryFromPrimitive; 4 | 5 | /// Represents the different kinds of attributes. 6 | #[derive(Debug, PartialEq, Eq, TryFromPrimitive)] 7 | #[repr(u64)] 8 | pub enum AttributeKind { 9 | /// A well-known enum attribute. 10 | Enum = 0, 11 | /// A well-known integral attribute with an integer value. 12 | IntKeyValue = 1, 13 | /// A string attribute. 14 | StrKey = 3, 15 | /// A string attribute with a string value. 16 | StrKeyValue = 4, 17 | // TODO(ww): 5 and 6 are attribute kinds in the LLVM codebase, but aren't documented. 18 | } 19 | 20 | /// Represents the IDs of different specific attributes. 21 | #[non_exhaustive] 22 | #[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive)] 23 | #[repr(u64)] 24 | pub enum AttributeId { 25 | /// `align()` 26 | Alignment = 1, 27 | /// `alwaysinline` 28 | AlwaysInline = 2, 29 | /// `byval` 30 | ByVal = 3, 31 | /// `inlinehint` 32 | InlineHint = 4, 33 | /// `inreg` 34 | InReg = 5, 35 | /// `minsize` 36 | MinSize = 6, 37 | /// `naked` 38 | Naked = 7, 39 | /// `nest` 40 | Nest = 8, 41 | /// `noalias` 42 | NoAlias = 9, 43 | /// `nobuiltin` 44 | NoBuiltin = 10, 45 | /// `nocapture` 46 | NoCapture = 11, 47 | /// `noduplicate` 48 | NoDuplicate = 12, 49 | /// `noimplicitfloat` 50 | NoImplicitFloat = 13, 51 | /// `noinline` 52 | NoInline = 14, 53 | /// `nonlazybind` 54 | NonLazyBind = 15, 55 | /// `noredzone` 56 | NoRedZone = 16, 57 | /// `noreturn` 58 | NoReturn = 17, 59 | /// `nounwind` 60 | NoUnwind = 18, 61 | /// `optsize` 62 | OptimizeForSize = 19, 63 | /// `readnone` 64 | ReadNone = 20, 65 | /// `readonly` 66 | ReadOnly = 21, 67 | /// `returned` 68 | Returned = 22, 69 | /// `returns_twice` 70 | ReturnsTwice = 23, 71 | /// `signext` 72 | SExt = 24, 73 | /// `alignstack()` 74 | StackAlignment = 25, 75 | /// `ssp` 76 | StackProtect = 26, 77 | /// `sspreq` 78 | StackProtectReq = 27, 79 | /// `sspstrong` 80 | StackProtectStrong = 28, 81 | /// `sret` 82 | StructRet = 29, 83 | /// `sanitize_address` 84 | SanitizeAddress = 30, 85 | /// `sanitize_thread` 86 | SanitizeThread = 31, 87 | /// `sanitize_memory` 88 | SanitizeMemory = 32, 89 | /// `uwtable ([variant])` 90 | UwTable = 33, 91 | /// `zeroext` 92 | ZExt = 34, 93 | /// `builtin` 94 | Builtin = 35, 95 | /// `cold` 96 | Cold = 36, 97 | /// `optnone` 98 | OptimizeNone = 37, 99 | /// `inalloca` 100 | InAlloca = 38, 101 | /// `nonnull` 102 | NonNull = 39, 103 | /// `jumptable` 104 | JumpTable = 40, 105 | /// `dereferenceable()` 106 | Dereferenceable = 41, 107 | /// `dereferenceable_or_null()` 108 | DereferenceableOrNull = 42, 109 | /// `convergent` 110 | Convergent = 43, 111 | /// `safestack` 112 | SafeStack = 44, 113 | /// `argmemonly` 114 | ArgMemOnly = 45, 115 | /// `swiftself` 116 | SwiftSelf = 46, 117 | /// `swifterror` 118 | SwiftError = 47, 119 | /// `norecurse` 120 | NoRecurse = 48, 121 | /// `inaccessiblememonly` 122 | InaccessiblememOnly = 49, 123 | /// `inaccessiblememonly_or_argmemonly` 124 | InaccessiblememOrArgmemonly = 50, 125 | /// `allocsize([, ])` 126 | AllocSize = 51, 127 | /// `writeonly` 128 | WriteOnly = 52, 129 | /// `speculatable` 130 | Speculatable = 53, 131 | /// `strictfp` 132 | StrictFp = 54, 133 | /// `sanitize_hwaddress` 134 | SanitizeHwAddress = 55, 135 | /// `nocf_check` 136 | NoCfCheck = 56, 137 | /// `optforfuzzing` 138 | OptForFuzzing = 57, 139 | /// `shadowcallstack` 140 | Shadowcallstack = 58, 141 | /// `speculative_load_hardening` 142 | SpeculativeLoadHardening = 59, 143 | /// `immarg` 144 | ImmArg = 60, 145 | /// `willreturn` 146 | WillReturn = 61, 147 | /// `nofree` 148 | NoFree = 62, 149 | /// `nosync` 150 | NoSync = 63, 151 | /// `sanitize_memtag` 152 | SanitizeMemtag = 64, 153 | /// `preallocated` 154 | Preallocated = 65, 155 | /// `no_merge` 156 | NoMerge = 66, 157 | /// `null_pointer_is_valid` 158 | NullPointerIsValid = 67, 159 | /// `noundef` 160 | NoUndef = 68, 161 | /// `byref` 162 | ByRef = 69, 163 | /// `mustprogress` 164 | MustProgress = 70, 165 | /// `no_callback` 166 | NoCallback = 71, 167 | /// `hot` 168 | Hot = 72, 169 | /// `no_profile` 170 | NoProfile = 73, 171 | /// `vscale_range([, ])` 172 | VScaleRange = 74, 173 | /// `swift_async` 174 | SwiftAsync = 75, 175 | /// `nosanitize_coverage` 176 | NoSanitizeCoverage = 76, 177 | /// `elementtype` 178 | ElementType = 77, 179 | /// `disable_sanitizer_instrumentation` 180 | DisableSanitizerInstrumentation = 78, 181 | /// `nosanitize_bounds` 182 | NoSanitizeBounds = 79, 183 | /// `allocalign` 184 | AllocAlign = 80, 185 | /// `allocptr` 186 | AllocatedPointer = 81, 187 | /// `allockind ()` 188 | AllocKind = 82, 189 | /// `presplitcoroutine` 190 | PresplitCoroutine = 83, 191 | /// `fn_ret_thunk_extern` 192 | FnretthunkExtern = 84, 193 | /// `skipprofile` 194 | SkipProfile = 85, 195 | /// `memory ()` 196 | Memory = 86, 197 | } 198 | -------------------------------------------------------------------------------- /llvm-support/src/bitcodes.rs: -------------------------------------------------------------------------------- 1 | //! Core bitcode constants. 2 | //! 3 | //! These correspond directly to many of the block IDs, record codes, and 4 | //! other special constants in LLVM bitcode streams. 5 | 6 | use num_enum::{IntoPrimitive, TryFromPrimitive}; 7 | 8 | use crate::FIRST_APPLICATION_BLOCK_ID; 9 | 10 | /// Block IDs that are reserved by LLVM. 11 | // NOTE(ww): Block IDs 0 through 7 are reserved, but only 0 (BLOCKINFO) 12 | // is actually currently used. 13 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, TryFromPrimitive)] 14 | #[repr(u64)] 15 | pub enum ReservedBlockId { 16 | /// The `BLOCKINFO` block ID. 17 | BlockInfo = 0, 18 | /// Reserved; no semantics. 19 | Reserved1 = 1, 20 | /// Reserved; no semantics. 21 | Reserved2 = 2, 22 | /// Reserved; no semantics. 23 | Reserved3 = 3, 24 | /// Reserved; no semantics. 25 | Reserved4 = 4, 26 | /// Reserved; no semantics. 27 | Reserved5 = 5, 28 | /// Reserved; no semantics. 29 | Reserved6 = 6, 30 | /// Reserved; no semantics. 31 | Reserved7 = 7, 32 | } 33 | 34 | /// Block IDs that are used by LLVM for bitcode (i.e., IR bitstreams). 35 | /// See: `enum BlockIDs` in `Bitcode/LLVMBitCodes.h`, 36 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, TryFromPrimitive)] 37 | #[repr(u64)] 38 | pub enum IrBlockId { 39 | /// `MODULE_BLOCK_ID` 40 | Module = FIRST_APPLICATION_BLOCK_ID, 41 | /// `PARAM_ATTR_BLOCK_ID` 42 | ParamAttr, 43 | /// `PARAM_ATTR_GROUP_BLOCK_ID` 44 | ParamAttrGroup, 45 | /// `CONSTANTS_BLOCK_ID` 46 | Constants, 47 | /// `FUNCTION_BLOCK_ID` 48 | Function, 49 | /// `IDENTIFICATION_BLOCK_ID`. 50 | Identification, 51 | /// `VALUE_SYMTAB_BLOCK_ID`. 52 | ValueSymtab, 53 | /// `METADATA_BLOCK_ID`. 54 | Metadata, 55 | /// `METADATA_ATTACHMENT_BLOCK_ID`. 56 | MetadataAttachment, 57 | /// `TYPE_BLOCK_ID_NEW`. 58 | Type, 59 | /// `USELIST_BLOCK_ID`. 60 | Uselist, 61 | /// `MODULE_STRTAB_BLOCK_ID`. 62 | ModuleStrtab, 63 | /// `GLOBAL_VAL_SUMMARY_BLOCK_ID`. 64 | GlobalValSummary, 65 | /// `OPERAND_BUNDLE_TAGS_BLOCK_ID`. 66 | OperandBundleTags, 67 | /// `METADATA_KIND_BLOCK_ID`. 68 | MetadataKind, 69 | /// `STRTAB_BLOCK_ID`. 70 | Strtab, 71 | /// `FULL_LTO_GLOBAL_VAL_SUMMARY_BLOCK_ID`. 72 | FullLtoGlobalValSummary, 73 | /// `SYMTAB_BLOCK_ID`. 74 | Symtab, 75 | /// `SYNC_SCOPE_NAMES_BLOCK_ID`. 76 | SyncScopeNames, 77 | } 78 | 79 | /// Abbreviation IDs that are reserved by LLVM. 80 | #[derive(Clone, Copy, Debug, PartialEq, Eq, TryFromPrimitive)] 81 | #[repr(u64)] 82 | pub enum ReservedAbbrevId { 83 | /// Identifies an `END_BLOCK` record. 84 | EndBlock = 0, 85 | /// Identifies an `ENTER_SUBBLOCK` record. 86 | EnterSubBlock, 87 | /// Identifies a `DEFINE_ABBREV` record. 88 | DefineAbbrev, 89 | /// Identifies an `UNABBREV_RECORD` record. 90 | UnabbrevRecord, 91 | } 92 | 93 | /// Codes for each operand encoding type supported by `DEFINE_ABBREV`. 94 | #[derive(Clone, Copy, Debug, PartialEq, Eq, TryFromPrimitive)] 95 | #[repr(u64)] 96 | pub enum AbbrevOpEnc { 97 | /// A fixed-length, unsigned operand. 98 | Fixed = 1, 99 | /// A variable-length, unsigned operand. 100 | Vbr, 101 | /// An array of values. 102 | Array, 103 | /// A single 6-bit-encoded character. 104 | Char6, 105 | /// A blob of bytes. 106 | Blob, 107 | } 108 | 109 | /// Codes for each `UNABBREV_RECORD` in `BLOCKINFO`. 110 | #[non_exhaustive] 111 | #[derive(Debug, PartialEq, Eq, TryFromPrimitive)] 112 | #[repr(u64)] 113 | pub enum BlockInfoCode { 114 | /// SETBID: `[blockid]` 115 | SetBid = 1, 116 | /// BLOCKNAME: `[...name...]` 117 | BlockName, 118 | /// SETRECORDNAME: `[recordid, ...name...]` 119 | SetRecordName, 120 | } 121 | 122 | /// Codes for each record in `IDENTIFICATION_BLOCK`. 123 | #[non_exhaustive] 124 | #[derive(Debug, PartialEq, Eq, TryFromPrimitive)] 125 | #[repr(u64)] 126 | pub enum IdentificationCode { 127 | /// IDENTIFICATION_CODE_STRING: `[...string...]` 128 | ProducerString = 1, 129 | /// IDENTIFICATION_CODE_EPOCH: `[epoch]` 130 | Epoch, 131 | } 132 | 133 | /// Codes for each record in `MODULE_BLOCK`. 134 | #[non_exhaustive] 135 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 136 | #[repr(u64)] 137 | pub enum ModuleCode { 138 | /// MODULE_CODE_VERSION: `[version#]` 139 | Version = 1, 140 | /// MODULE_CODE_TRIPLE: `[...string...]` 141 | Triple = 2, 142 | /// MODULE_CODE_DATALAYOUT: `[...string...]` 143 | DataLayout = 3, 144 | /// MODULE_CODE_ASM: `[...string...]` 145 | Asm = 4, 146 | /// MODULE_CODE_SECTIONNAME: `[...string...]` 147 | SectionName = 5, 148 | /// MODULE_CODE_DEPLIB: `[...string...]` 149 | DepLib = 6, 150 | /// MODULE_CODE_GLOBALVAR: `[...fields...]` 151 | /// See: 152 | GlobalVar = 7, 153 | /// MODULE_CODE_FUNCTION: `[...fields...]` 154 | /// See: 155 | Function = 8, 156 | /// MODULE_CODE_ALIAS_OLD: `[...fields...]` 157 | /// See: 158 | AliasOld = 9, 159 | /// MODULE_CODE_GCNAME: `[...string...]` 160 | GcName = 11, 161 | /// MODULE_CODE_COMDAT 162 | /// v1: `[selection_kind, name]` 163 | /// v2: `[strtab_offset, strtab_size, selection_kind]` 164 | /// Only `v2` is currently supported. 165 | Comdat = 12, 166 | /// MODULE_CODE_VSTOFFSET: `[offset]` 167 | VstOffset = 13, 168 | /// MODULE_CODE_ALIAS: `[...fields...]` 169 | /// Not well documented; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`. 170 | Alias = 14, 171 | /// MODULE_CODE_METADATA_VALUES_UNUSED 172 | /// Not documented at all; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`. 173 | MetadataValuesUnused = 15, 174 | /// MODULE_CODE_SOURCE_FILENAME: `[...string...]` 175 | SourceFilename = 16, 176 | /// MODULE_CODE_HASH: `[5*i32]` 177 | Hash = 17, 178 | /// MODULE_CODE_IFUNC: `[...fields...]` 179 | /// Not well documented; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`. 180 | IFunc = 18, 181 | } 182 | 183 | /// Codes for each record in `TYPE_BLOCK` (i.e., `TYPE_BLOCK_ID_NEW`). 184 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 185 | #[repr(u64)] 186 | pub enum TypeCode { 187 | /// TYPE_CODE_NUMENTRY: `[numentries]` 188 | NumEntry = 1, 189 | /// TYPE_CODE_VOID 190 | Void, 191 | /// TYPE_CODE_FLOAT 192 | Float, 193 | /// TYPE_CODE_DOUBLE 194 | Double, 195 | /// TYPE_CODE_LABEL 196 | Label, 197 | /// TYPE_CODE_OPAQUE 198 | Opaque, 199 | /// TYPE_CODE_INTEGER: `[width]` 200 | Integer, 201 | /// TYPE_CODE_POINTER: `[pointee type]` 202 | Pointer, 203 | /// TYPE_CODE_FUNCTION_OLD: `[vararg, attrid, retty, paramty x N]` 204 | FunctionOld, 205 | /// TYPE_CODE_HALF 206 | Half, 207 | /// TYPE_CODE_ARRAY: `[numelts, eltty]` 208 | Array, 209 | /// TYPE_CODE_VECTOR: `[numelts, eltty]` 210 | Vector, 211 | /// TYPE_CODE_X86_FP80 212 | X86Fp80, 213 | /// TYPE_CODE_FP128 214 | Fp128, 215 | /// TYPE_CODE_PPC_FP128 216 | PpcFp128, 217 | /// TYPE_CODE_METADATA, 218 | Metadata, 219 | /// TYPE_CODE_X86_MMX 220 | X86Mmx, 221 | /// TYPE_CODE_STRUCT_ANON: `[ispacked, eltty x N]` 222 | StructAnon, 223 | /// TYPE_CODE_STRUCT_NAME: `[strchr x N]` 224 | StructName, 225 | /// TYPE_CODE_STRUCT_NAMED: `[ispacked, eltty x N]` 226 | StructNamed, 227 | /// TYPE_CODE_FUNCTION: `[vararg, retty, paramty x N]` 228 | Function, 229 | /// TYPE_CODE_TOKEN 230 | Token, 231 | /// TYPE_CODE_BFLOAT 232 | BFloat, 233 | /// TYPE_CODE_X86_AMX 234 | X86Amx, 235 | /// TYPE_CODE_OPAQUE_POINTER: `[addrspace]` 236 | OpaquePointer, 237 | } 238 | 239 | /// Codes for each record in `STRTAB_BLOCK`. 240 | #[non_exhaustive] 241 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 242 | #[repr(u64)] 243 | pub enum StrtabCode { 244 | /// STRTAB_BLOB: `[...string...]` 245 | Blob = 1, 246 | } 247 | 248 | /// Codes for each record in `SYMTAB_BLOCK`. 249 | #[non_exhaustive] 250 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 251 | #[repr(u64)] 252 | pub enum SymtabCode { 253 | /// SYMTAB_BLOB: `[...data...]` 254 | Blob = 1, 255 | } 256 | 257 | /// Codes for each record in `PARAMATTR_BLOCK` or `PARAMATTR_GROUP_BLOCK`. 258 | // NOTE(ww): For whatever reason, these two blocks share the same enum for 259 | /// record codes. 260 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 261 | #[repr(u64)] 262 | pub enum AttributeCode { 263 | /// PARAMATTR_CODE_ENTRY_OLD: `[paramidx0, attr0, paramidx1, attr1...]` 264 | EntryOld = 1, 265 | /// PARAMATTR_CODE_ENTRY: `[attrgrp0, attrgrp1, ...]` 266 | Entry, 267 | /// PARAMATTR_GRP_CODE_ENTRY: `[grpid, idx, attr0, attr1, ...]` 268 | GroupCodeEntry, 269 | } 270 | 271 | /// Codes for each record in `FUNCTION_BLOCK`. 272 | /// 273 | /// See: `FunctionCodes` in `LLVMBitCodes.h`. 274 | #[allow(missing_docs)] 275 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 276 | #[repr(u64)] 277 | pub enum FunctionCode { 278 | DeclareBlocks = 1, 279 | InstBinop = 2, 280 | InstCast = 3, 281 | InstGepOld = 4, 282 | InstSelect = 5, 283 | InstExtractelt = 6, 284 | InstInsertelt = 7, 285 | InstShufflevec = 8, 286 | InstCmp = 9, 287 | InstRet = 10, 288 | InstBr = 11, 289 | InstSwitch = 12, 290 | InstInvoke = 13, 291 | InstUnreachable = 15, 292 | InstPhi = 16, 293 | InstAlloca = 19, 294 | InstLoad = 20, 295 | InstVaarg = 23, 296 | InstStoreOld = 24, 297 | InstExtractval = 26, 298 | InstInsertval = 27, 299 | InstCmp2 = 28, 300 | InstVselect = 29, 301 | InstInboundsGepOld = 30, 302 | InstIndirectbr = 31, 303 | DebugLocAgain = 33, 304 | InstCall = 34, 305 | DebugLoc = 35, 306 | InstFence = 36, 307 | InstCmpxchgOld = 37, 308 | InstAtomicrmwOld = 38, 309 | InstResume = 39, 310 | InstLandingpadOld = 40, 311 | InstLoadatomic = 41, 312 | InstStoreatomicOld = 42, 313 | InstGep = 43, 314 | InstStore = 44, 315 | InstStoreatomic = 45, 316 | InstCmpxchg = 46, 317 | InstLandingpad = 47, 318 | InstCleanupret = 48, 319 | InstCatchret = 49, 320 | InstCatchpad = 50, 321 | InstCleanuppad = 51, 322 | InstCatchswitch = 52, 323 | OperandBundle = 55, 324 | InstUnop = 56, 325 | Instcallbr = 57, 326 | InstFreeze = 58, 327 | InstAtomicrmw = 59, 328 | } 329 | 330 | /// Codes for each unary operation in unary instructions. 331 | /// 332 | /// See: `UnaryOpcodes` in `LLVMBitCodes.h`. 333 | #[allow(missing_docs)] 334 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 335 | #[repr(u64)] 336 | pub enum UnaryOpcode { 337 | FNeg = 0, 338 | } 339 | 340 | /// Codes for each binary operation in binary instructions. 341 | /// 342 | /// See: `BinaryOpcodes` in `LLVMBitCodes.h`. 343 | #[allow(missing_docs)] 344 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 345 | #[repr(u64)] 346 | pub enum BinaryOpcode { 347 | Add = 0, 348 | Sub, 349 | Mul, 350 | UDiv, 351 | SDiv, 352 | URem, 353 | SRem, 354 | Shl, 355 | LShr, 356 | AShr, 357 | And, 358 | Or, 359 | Xor, 360 | } 361 | 362 | /// AtomicRMW operations. 363 | /// See: `RMWOperations` in `LLVMBitCodes.h`. 364 | #[allow(missing_docs)] 365 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 366 | #[repr(u64)] 367 | pub enum RMWOperation { 368 | Xchg = 0, 369 | Add, 370 | Sub, 371 | And, 372 | Nand, 373 | Or, 374 | Xor, 375 | Max, 376 | Min, 377 | UMax, 378 | UMin, 379 | FAdd, 380 | FSub, 381 | } 382 | -------------------------------------------------------------------------------- /llvm-support/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `llvm-support` provides support types to the other *mollusc* crates, 2 | //! in furtherance of the general task of parsing LLVM's bitcode. 3 | 4 | #![deny(rustdoc::broken_intra_doc_links)] 5 | #![deny(missing_docs)] 6 | #![allow(clippy::redundant_field_names)] 7 | #![forbid(unsafe_code)] 8 | 9 | pub mod align; 10 | pub mod attribute; 11 | pub mod bitcodes; 12 | pub mod opcode; 13 | pub mod ty; 14 | 15 | use num_enum::{IntoPrimitive, TryFromPrimitive}; 16 | 17 | pub use self::align::*; 18 | pub use self::attribute::*; 19 | pub use self::opcode::*; 20 | pub use self::ty::*; 21 | 22 | /// The 32-bit magic that indicates a raw LLVM IR bitcode stream. 23 | pub const LLVM_IR_MAGIC: u32 = 0xdec04342; 24 | 25 | /// The 32-bit magic that indicates a bitcode wrapper, which in 26 | /// turn points to the start of the actual bitcode stream. 27 | pub const BITCODE_WRAPPER_MAGIC: u32 = 0x0b17c0de; 28 | 29 | /// The initial abbreviation ID width in a bitstream. 30 | pub const INITIAL_ABBREV_ID_WIDTH: u64 = 2; 31 | 32 | /// All abbreviation IDs before this are defined by the bitstream format, 33 | /// rather than the stream itself. 34 | pub const FIRST_APPLICATION_ABBREV_ID: usize = 4; 35 | 36 | /// All block IDs before this have their semantics defined by the bitstream 37 | /// format, rather than the stream itself. 38 | pub const FIRST_APPLICATION_BLOCK_ID: u64 = 8; 39 | 40 | /// The lookup alphabet for the Char6 operand encoding. 41 | pub const CHAR6_ALPHABET: &[u8] = 42 | b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"; 43 | 44 | /// The current toolchain's target triple. 45 | pub const TARGET_TRIPLE: &str = env!("TARGET_TRIPLE"); 46 | 47 | /// An exact endianness. 48 | /// 49 | /// For an inexact endianness model (i.e., one that supports a notion of "system" endianness), 50 | /// see [`InexactEndian`](InexactEndian) 51 | #[derive(Debug, PartialEq, Eq)] 52 | pub enum Endian { 53 | /// Little-endian. 54 | Little, 55 | /// Big-endian. 56 | Big, 57 | } 58 | 59 | /// An "inexact" endianness, i.e. one that supports an unspecified system endianness. 60 | #[derive(Debug)] 61 | pub enum InexactEndian { 62 | /// Either big-endian or little-endian. 63 | Exact(Endian), 64 | /// The host system's endianness, which may not be known immediately. 65 | System, 66 | } 67 | 68 | /// Symbol mangling styles supported by LLVM. 69 | #[derive(Debug, PartialEq, Eq)] 70 | pub enum Mangling { 71 | /// ELF-style mangling. 72 | Elf, 73 | /// MIPS-style mangling. 74 | Mips, 75 | /// Mach-O-style mangling. 76 | Macho, 77 | /// COFF on x86 Windows-style mangling. 78 | WindowsX86Coff, 79 | /// COFF on Windows-style mangling. 80 | WindowsCoff, 81 | /// XCOFF-style mangling. 82 | XCoff, 83 | } 84 | 85 | /// Global value linkage types. 86 | /// 87 | /// See: 88 | #[non_exhaustive] 89 | #[derive(Debug, PartialEq, Eq)] 90 | #[repr(u64)] 91 | #[allow(missing_docs)] 92 | pub enum Linkage { 93 | External, 94 | AvailableExternally, 95 | LinkOnceAny, 96 | LinkOnceOdr, 97 | WeakAny, 98 | WeakOdr, 99 | Appending, 100 | Internal, 101 | Private, 102 | ExternalWeak, 103 | Common, 104 | } 105 | 106 | impl From for Linkage { 107 | fn from(value: u64) -> Self { 108 | // See getDecodedLinkage in BitcodeReader.cpp. 109 | match value { 110 | 0 | 5 | 6 | 15 => Linkage::External, 111 | 1 | 16 => Linkage::WeakAny, 112 | 2 => Linkage::Appending, 113 | 3 => Linkage::Internal, 114 | 4 | 18 => Linkage::LinkOnceAny, 115 | 7 => Linkage::ExternalWeak, 116 | 8 => Linkage::Common, 117 | 9 | 13 | 14 => Linkage::Private, 118 | 10 | 17 => Linkage::WeakOdr, 119 | 11 | 19 => Linkage::LinkOnceOdr, 120 | 12 => Linkage::AvailableExternally, 121 | _ => Linkage::External, 122 | } 123 | } 124 | } 125 | 126 | /// An `(offset, size)` reference to a string within some string table. 127 | pub struct StrtabRef { 128 | /// The string's offset within its string table. 129 | pub offset: usize, 130 | /// The string's size, in bytes. 131 | pub size: usize, 132 | } 133 | 134 | impl From<(usize, usize)> for StrtabRef { 135 | fn from(value: (usize, usize)) -> Self { 136 | Self { 137 | offset: value.0, 138 | size: value.1, 139 | } 140 | } 141 | } 142 | 143 | impl From<(u64, u64)> for StrtabRef { 144 | fn from(value: (u64, u64)) -> Self { 145 | Self::from((value.0 as usize, value.1 as usize)) 146 | } 147 | } 148 | 149 | /// Valid visibility styles. 150 | /// 151 | /// See: 152 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 153 | #[repr(u64)] 154 | pub enum Visibility { 155 | /// Default visibility. 156 | Default = 0, 157 | 158 | /// Hidden visibility. 159 | Hidden, 160 | 161 | /// Protected visibility. 162 | Protected, 163 | } 164 | 165 | /// DLL storage classes. 166 | /// 167 | /// See: 168 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] 169 | #[repr(u64)] 170 | pub enum DllStorageClass { 171 | /// The default storage class. 172 | Default = 0, 173 | 174 | /// The `dllimport` storage class. 175 | Import, 176 | 177 | /// The `dllexport` storage class. 178 | Export, 179 | } 180 | 181 | /// Thread local storage modes. 182 | /// 183 | /// See: 184 | /// See also: 185 | #[derive(Debug, PartialEq, Eq, IntoPrimitive)] 186 | #[repr(u64)] 187 | pub enum ThreadLocalMode { 188 | /// Not thread local. 189 | NotThreadLocal = 0, 190 | 191 | /// The general dynamic TLS model. 192 | GeneralDynamicTls, 193 | 194 | /// The local dynamic TLS model. 195 | LocalDynamicTls, 196 | 197 | /// The initial exec TLS model. 198 | InitialExecTls, 199 | 200 | /// The local exec TLS model. 201 | LocalExecTls, 202 | } 203 | 204 | impl From for ThreadLocalMode { 205 | fn from(value: u64) -> ThreadLocalMode { 206 | match value { 207 | 0 => ThreadLocalMode::NotThreadLocal, 208 | 1 => ThreadLocalMode::GeneralDynamicTls, 209 | 2 => ThreadLocalMode::LocalDynamicTls, 210 | 3 => ThreadLocalMode::InitialExecTls, 211 | 4 => ThreadLocalMode::LocalExecTls, 212 | // Unknown values are treated as general dynamic. 213 | _ => ThreadLocalMode::GeneralDynamicTls, 214 | } 215 | } 216 | } 217 | 218 | /// The `unnamed_addr` specifier. 219 | #[derive(Debug, PartialEq, Eq, IntoPrimitive)] 220 | #[repr(u64)] 221 | pub enum UnnamedAddr { 222 | /// No `unnamed_addr`. 223 | None = 0, 224 | 225 | /// The address of this variable is not significant. 226 | Global, 227 | 228 | /// The address of this variable is not significant, but only within the module. 229 | Local, 230 | } 231 | 232 | impl From for UnnamedAddr { 233 | fn from(value: u64) -> UnnamedAddr { 234 | match value { 235 | 0 => UnnamedAddr::None, 236 | 1 => UnnamedAddr::Global, 237 | 2 => UnnamedAddr::Local, 238 | // Unknown values are treated as having no `unnamed_addr` specifier. 239 | _ => UnnamedAddr::None, 240 | } 241 | } 242 | } 243 | 244 | /// The runtime preemption specifier. 245 | /// 246 | /// See: 247 | #[derive(Debug, PartialEq, Eq, IntoPrimitive)] 248 | #[repr(u64)] 249 | pub enum RuntimePreemption { 250 | /// The function or variable may be replaced by a symbol from outside the linkage 251 | /// unit at runtime. 252 | DsoPreemptable, 253 | 254 | /// The compiler may assume that the function or variable will resolve to a symbol within 255 | /// the same linkage unit. 256 | DsoLocal, 257 | } 258 | 259 | impl From for RuntimePreemption { 260 | fn from(value: u64) -> RuntimePreemption { 261 | match value { 262 | 0 => RuntimePreemption::DsoPreemptable, 263 | 1 => RuntimePreemption::DsoLocal, 264 | // Unknown values are treated as `dso_preemptable`. 265 | _ => RuntimePreemption::DsoPreemptable, 266 | } 267 | } 268 | } 269 | 270 | /// Calling conventions supported by LLVM. 271 | #[non_exhaustive] 272 | #[derive(Debug, PartialEq, Eq, TryFromPrimitive)] 273 | #[repr(u64)] 274 | #[allow(missing_docs)] 275 | pub enum CallingConvention { 276 | C = 0, 277 | Fast = 8, 278 | Cold = 9, 279 | GHC = 10, 280 | HiPE = 11, 281 | WebKitJS = 12, 282 | AnyReg = 13, 283 | PreserveMost = 14, 284 | PreserveAll = 15, 285 | Swift = 16, 286 | CXXFASTTLS = 17, 287 | X86Stdcall = 64, 288 | X86Fastcall = 65, 289 | ARMAPCS = 66, 290 | ARMAAPCS = 67, 291 | ARMAAPCSVFP = 68, 292 | MSP430INTR = 69, 293 | X86ThisCall = 70, 294 | PTXKernel = 71, 295 | PTXDevice = 72, 296 | SPIRFUNC = 75, 297 | SPIRKERNEL = 76, 298 | IntelOCLBI = 77, 299 | X8664SysV = 78, 300 | Win64 = 79, 301 | X86VectorCall = 80, 302 | HHVM = 81, 303 | HHVMC = 82, 304 | X86INTR = 83, 305 | AVRINTR = 84, 306 | AVRSIGNAL = 85, 307 | AVRBUILTIN = 86, 308 | AMDGPUVS = 87, 309 | AMDGPUGS = 88, 310 | AMDGPUPS = 89, 311 | AMDGPUCS = 90, 312 | AMDGPUKERNEL = 91, 313 | X86RegCall = 92, 314 | AMDGPUHS = 93, 315 | MSP430BUILTIN = 94, 316 | AMDGPULS = 95, 317 | AMDGPUES = 96, 318 | } 319 | 320 | #[cfg(test)] 321 | mod tests { 322 | use super::*; 323 | 324 | #[test] 325 | fn test_target_triple() { 326 | assert!(!TARGET_TRIPLE.is_empty()); 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /llvm-support/src/opcode.rs: -------------------------------------------------------------------------------- 1 | //! Support code for instruction opcodes. 2 | 3 | use std::convert::TryFrom; 4 | 5 | use num_enum::TryFromPrimitiveError; 6 | use thiserror::Error; 7 | 8 | use crate::{ 9 | bitcodes::{BinaryOpcode, UnaryOpcode}, 10 | Type, 11 | }; 12 | 13 | /// Represents the different classes of LLVM opcodes. 14 | #[derive(Clone, Copy, Debug)] 15 | pub enum Opcode { 16 | /// Opcodes that terminate basic blocks. 17 | Term(TermOp), 18 | /// Opcodes that take a single operand. 19 | Unary(UnaryOp), 20 | /// Opcodes that take two operands. 21 | Binary(BinaryOp), 22 | /// Opcodes that interact with memory. 23 | Mem(MemOp), 24 | /// Opcodes that cast between types and representations. 25 | Cast(CastOp), 26 | /// Funclet "landing pad" operands. 27 | FuncletPad(FuncletPadOp), 28 | /// "Other" operands of all sorts. 29 | Other(OtherOp), 30 | } 31 | 32 | /// Opcodes that terminate basic blocks. Every well-formed basic block ends 33 | /// with an instruction with one of these opcodes. 34 | #[derive(Clone, Copy, Debug)] 35 | pub enum TermOp { 36 | /// `ret` 37 | Ret, 38 | /// `br` 39 | Br, 40 | /// `switch` 41 | Switch, 42 | /// `indirectbr` 43 | IndirectBr, 44 | /// `invoke` 45 | Invoke, 46 | /// `resume` 47 | Resume, 48 | /// `unreachable` 49 | Unreachable, 50 | /// `cleanupret` 51 | CleanupRet, 52 | /// `catchret` 53 | CatchRet, 54 | /// `callswitch` 55 | /// NOTE: Not documented? 56 | CatchSwitch, 57 | /// `callbr` 58 | CallBr, 59 | } 60 | 61 | /// Unary opcodes. 62 | #[derive(Clone, Copy, Debug)] 63 | pub enum UnaryOp { 64 | /// `fneg` 65 | FNeg, 66 | } 67 | 68 | /// Errors that can occur when constructing a `BinaryOp`. 69 | #[derive(Debug, Error)] 70 | pub enum UnaryOpError { 71 | /// The opcode given doesn't correspond to a known operation. 72 | #[error("unknown opcode")] 73 | Opcode(#[from] TryFromPrimitiveError), 74 | } 75 | 76 | impl TryFrom for UnaryOp { 77 | type Error = UnaryOpError; 78 | 79 | fn try_from(value: u64) -> Result { 80 | Ok(UnaryOpcode::try_from(value)?.into()) 81 | } 82 | } 83 | 84 | impl From for UnaryOp { 85 | fn from(value: UnaryOpcode) -> Self { 86 | match value { 87 | UnaryOpcode::FNeg => UnaryOp::FNeg, 88 | } 89 | } 90 | } 91 | 92 | /// Binary opcodes. 93 | #[derive(Clone, Copy, Debug)] 94 | pub enum BinaryOp { 95 | /// `add` 96 | Add, 97 | /// `fadd` 98 | FAdd, 99 | /// `sub` 100 | Sub, 101 | /// `fsub` 102 | FSub, 103 | /// `mul` 104 | Mul, 105 | /// `fmul` 106 | FMul, 107 | /// `udiv` 108 | UDiv, 109 | /// `sdiv` 110 | SDiv, 111 | /// `fdiv` 112 | FDiv, 113 | /// `urem` 114 | URem, 115 | /// `srem` 116 | SRem, 117 | /// `frem` 118 | FRem, 119 | /// `shl` 120 | Shl, 121 | /// `lshl` 122 | LShr, 123 | /// `ashr` 124 | AShr, 125 | /// `and` 126 | And, 127 | /// `or` 128 | Or, 129 | /// `xor` 130 | Xor, 131 | } 132 | 133 | /// Errors that can occur when constructing a `BinaryOp`. 134 | #[derive(Debug, Error)] 135 | pub enum BinaryOpError { 136 | /// The specified type isn't valid for binary operations. 137 | #[error("invalid type for binary op: {0:?}")] 138 | InvalidType(Type), 139 | 140 | /// The specified type is incompatible with the operation. 141 | #[error("incompatible type for op: {0:?}")] 142 | IncompatibleType(Type), 143 | 144 | /// The opcode given doesn't correspond to a known operation. 145 | #[error("unknown opcode")] 146 | Opcode(#[from] TryFromPrimitiveError), 147 | } 148 | 149 | impl TryFrom<(u64, &Type)> for BinaryOp { 150 | type Error = BinaryOpError; 151 | 152 | fn try_from((opc, ty): (u64, &Type)) -> Result { 153 | let opc = BinaryOpcode::try_from(opc)?; 154 | 155 | let is_fp = ty.is_floating_or_floating_vector(); 156 | 157 | // Binary operations are only valid on integer/fp types or vectors thereof. 158 | if !is_fp || !ty.is_integer_or_integer_vector() { 159 | return Err(BinaryOpError::InvalidType(ty.clone())); 160 | } 161 | 162 | Ok(match (opc, is_fp) { 163 | (BinaryOpcode::Add, false) => BinaryOp::Add, 164 | (BinaryOpcode::Add, true) => BinaryOp::FAdd, 165 | (BinaryOpcode::Sub, false) => BinaryOp::Sub, 166 | (BinaryOpcode::Sub, true) => BinaryOp::FSub, 167 | (BinaryOpcode::Mul, false) => BinaryOp::Mul, 168 | (BinaryOpcode::Mul, true) => BinaryOp::FMul, 169 | (BinaryOpcode::UDiv, false) => BinaryOp::UDiv, 170 | // `udiv` can't be used with floating-point types. 171 | (BinaryOpcode::UDiv, true) => return Err(BinaryOpError::IncompatibleType(ty.clone())), 172 | (BinaryOpcode::SDiv, false) => BinaryOp::SDiv, 173 | (BinaryOpcode::SDiv, true) => BinaryOp::FDiv, 174 | (BinaryOpcode::URem, false) => BinaryOp::URem, 175 | // `urem` can't be used with floating-point types. 176 | (BinaryOpcode::URem, true) => return Err(BinaryOpError::IncompatibleType(ty.clone())), 177 | (BinaryOpcode::SRem, false) => BinaryOp::SRem, 178 | (BinaryOpcode::SRem, true) => BinaryOp::FRem, 179 | // The rest are all integer-type only. 180 | (BinaryOpcode::Shl, true) => BinaryOp::Shl, 181 | (BinaryOpcode::LShr, true) => BinaryOp::LShr, 182 | (BinaryOpcode::AShr, true) => BinaryOp::AShr, 183 | (BinaryOpcode::And, true) => BinaryOp::And, 184 | (BinaryOpcode::Or, true) => BinaryOp::Or, 185 | (BinaryOpcode::Xor, true) => BinaryOp::Xor, 186 | (_, false) => return Err(BinaryOpError::IncompatibleType(ty.clone())), 187 | }) 188 | } 189 | } 190 | 191 | /// Memory opcodes. 192 | #[derive(Clone, Copy, Debug)] 193 | pub enum MemOp { 194 | /// `alloca` 195 | Alloca, 196 | /// `load` 197 | Load, 198 | /// `store` 199 | Store, 200 | /// `getelementptr` 201 | GetElementPtr, 202 | /// `fence` 203 | Fence, 204 | /// `cmpxchg` 205 | AtomicCmpXchg, 206 | /// `atomicrmw` 207 | AtomicRMW, 208 | } 209 | 210 | /// Cast opcodes. 211 | #[derive(Clone, Copy, Debug)] 212 | pub enum CastOp { 213 | /// `trunc` 214 | Trunc, 215 | /// `zext` 216 | ZExt, 217 | /// `sext` 218 | SExt, 219 | /// `fptoui` 220 | FPToUI, 221 | /// `fptosi` 222 | FPToSI, 223 | /// `uitofp` 224 | UIToFP, 225 | /// `sitofp` 226 | SIToFP, 227 | /// `fptrunc` 228 | FPTrunc, 229 | /// `fpext` 230 | FPExt, 231 | /// `ptrtoint` 232 | PtrToInt, 233 | /// `inttoptr` 234 | IntToPtr, 235 | /// `bitcast` 236 | BitCast, 237 | /// `addrspacecast` 238 | AddrSpaceCast, 239 | } 240 | 241 | /// Funclet pad opcodes. 242 | #[derive(Clone, Copy, Debug)] 243 | pub enum FuncletPadOp { 244 | /// `cleanuppad` 245 | CleanupPad, 246 | /// `catchpad` 247 | CatchPad, 248 | } 249 | 250 | /// Other opcodes. 251 | #[derive(Clone, Copy, Debug)] 252 | pub enum OtherOp { 253 | /// `icmp` 254 | ICmp, 255 | /// `fcmp` 256 | FCmp, 257 | /// `phi` 258 | Phi, 259 | /// `call` 260 | Call, 261 | /// `select` 262 | Select, 263 | /// Internal pass opcode. 264 | UserOp1, 265 | /// Internal pass opcode. 266 | UserOp2, 267 | /// `va_arg` 268 | VAArg, 269 | /// `extractelement` 270 | ExtractElement, 271 | /// `insertelement` 272 | InsertElement, 273 | /// `shufflevector` 274 | ShuffleVector, 275 | /// `extractvalue` 276 | ExtractValue, 277 | /// `insertvalue` 278 | InsertValue, 279 | /// `landingpad` 280 | LandingPad, 281 | /// `freeze` 282 | Freeze, 283 | } 284 | -------------------------------------------------------------------------------- /llvm-support/src/ty.rs: -------------------------------------------------------------------------------- 1 | //! Structures for managing LLVM types. 2 | 3 | use std::convert::TryFrom; 4 | 5 | use thiserror::Error; 6 | 7 | use crate::AddressSpace; 8 | 9 | /// The IDs of types known to LLVM. 10 | /// 11 | /// These are not fully unique: all integer types share the `Integer` type ID, 12 | /// and similarly for pointers, arrays, etc. 13 | // TODO(ww): Perhaps use arbitrary enum discriminants here when they're stabilized. 14 | // See: https://github.com/rust-lang/rfcs/pull/2363 15 | #[repr(u64)] 16 | pub enum TypeId { 17 | /// 16-bit floating-points. 18 | Half = 0, 19 | /// 16-bit floating-points (7-bit significand). 20 | BFloat, 21 | /// 32-bit floating-points. 22 | Float, 23 | /// 64-bit floating-points. 24 | Double, 25 | /// 80-bit floating-points (x87). 26 | X86Fp80, 27 | /// 128-bit floating-points (112-bit significand). 28 | Fp128, 29 | /// 128-bit floating-points (two 64-bits, PowerPC). 30 | PpcFp128, 31 | /// The void type (a type with no size). 32 | Void, 33 | /// Labels. 34 | Label, 35 | /// Metadata. 36 | Metadata, 37 | /// MMX vectors (64 bits, x86). 38 | X86Mmx, 39 | /// AMX vectors (8192 bits, x86). 40 | X86Amx, 41 | /// Tokens. 42 | Token, 43 | /// Arbitrary bit-width integers. 44 | Integer, 45 | /// Functions. 46 | Function, 47 | /// Pointers. 48 | Pointer, 49 | /// Structures. 50 | Struct, 51 | /// Arrays. 52 | Array, 53 | /// Fixed-width SIMD vectors. 54 | FixedVector, 55 | /// Scalable SIMD vectors. 56 | ScalableVector, 57 | } 58 | 59 | /// A representation of LLVM's types. 60 | /// 61 | /// See [`TypeId`](TypeId) for documentation of each variant. 62 | #[allow(missing_docs)] 63 | #[derive(Clone, Debug, PartialEq)] 64 | pub enum Type { 65 | Half, 66 | BFloat, 67 | Float, 68 | Double, 69 | Metadata, 70 | X86Fp80, 71 | Fp128, 72 | PpcFp128, 73 | Void, 74 | Label, 75 | X86Mmx, 76 | X86Amx, 77 | Token, 78 | Integer(IntegerType), 79 | Function(FunctionType), 80 | Pointer(PointerType), 81 | OpaquePointer(AddressSpace), 82 | Struct(StructType), 83 | Array(ArrayType), 84 | FixedVector(VectorType), 85 | ScalableVector(VectorType), 86 | } 87 | 88 | impl Type { 89 | /// Returns whether this type is one of the floating point types. 90 | /// 91 | /// ```rust 92 | /// use llvm_support::Type; 93 | /// 94 | /// assert!(Type::BFloat.is_floating()); 95 | /// assert!(Type::Float.is_floating()); 96 | /// assert!(Type::Double.is_floating()); 97 | /// assert!(Type::X86Fp80.is_floating()); 98 | /// assert!(Type::Fp128.is_floating()); 99 | /// assert!(Type::PpcFp128.is_floating()); 100 | /// assert!(!Type::Metadata.is_floating()); 101 | /// ``` 102 | pub fn is_floating(&self) -> bool { 103 | matches!( 104 | self, 105 | Type::Half 106 | | Type::BFloat 107 | | Type::Float 108 | | Type::Double 109 | | Type::X86Fp80 110 | | Type::Fp128 111 | | Type::PpcFp128 112 | ) 113 | } 114 | 115 | /// Returns whether this type is an integer type. 116 | pub fn is_integer(&self) -> bool { 117 | matches!(self, Type::Integer(_)) 118 | } 119 | 120 | /// Returns whether this type is a valid "pointee" type, i.e. suitable as the inner type 121 | /// for a pointer type. 122 | pub fn is_pointee(&self) -> bool { 123 | !matches!( 124 | self, 125 | Type::Void | Type::Label | Type::Metadata | Type::Token | Type::X86Amx 126 | ) 127 | } 128 | 129 | /// Returns whether this type is a valid array element type, i.e. is suitable as the inner type 130 | /// for an array type. 131 | pub fn is_array_element(&self) -> bool { 132 | !matches!( 133 | self, 134 | Type::Void 135 | | Type::Label 136 | | Type::Metadata 137 | | Type::Function(_) 138 | | Type::Token 139 | | Type::X86Amx 140 | | Type::ScalableVector(_) 141 | ) 142 | } 143 | 144 | /// Returns whether this type is a valid structure element type, i.e. is suitable as a field 145 | /// type within a structure type. 146 | pub fn is_struct_element(&self) -> bool { 147 | !matches!( 148 | self, 149 | Type::Void | Type::Label | Type::Metadata | Type::Function(_) | Type::Token 150 | ) 151 | } 152 | 153 | /// Returns whether this type is a valid vector element type, i.e. is suitable as the inner 154 | /// type for a vector type. 155 | /// 156 | /// ```rust 157 | /// use llvm_support::{AddressSpace, Type}; 158 | /// 159 | /// assert!(Type::Float.is_vector_element()); 160 | /// assert!(Type::new_integer(32).unwrap().is_vector_element()); 161 | /// assert!( 162 | /// Type::new_pointer(Type::new_integer(8).unwrap(), AddressSpace::default()) 163 | /// .unwrap() 164 | /// .is_vector_element() 165 | /// ); 166 | /// assert!(!Type::Metadata.is_vector_element()); 167 | /// ``` 168 | pub fn is_vector_element(&self) -> bool { 169 | self.is_floating() || matches!(self, Type::Integer(_) | Type::Pointer(_)) 170 | } 171 | 172 | /// Returns whether this type is "first class", i.e. is a valid type for an LLVM value. 173 | fn is_first_class(&self) -> bool { 174 | !matches!(self, Type::Function(_) | Type::Void) 175 | } 176 | 177 | /// Returns whether this type is a valid argument type, i.e. is suitable as an argument 178 | /// within a function type. 179 | /// 180 | /// ```rust 181 | /// use llvm_support::Type; 182 | /// 183 | /// assert!(Type::Float.is_argument()); 184 | /// assert!(!Type::Void.is_argument()); 185 | /// ``` 186 | pub fn is_argument(&self) -> bool { 187 | self.is_first_class() 188 | } 189 | 190 | /// Returns whether this type is a valid return type, i.e. is suitable as the return type 191 | /// within a function type. 192 | pub fn is_return(&self) -> bool { 193 | !matches!(self, Type::Function(_) | Type::Label | Type::Metadata) 194 | } 195 | 196 | /// Return the scalar type for this type. 197 | /// 198 | /// This is always the identity type for non-vector types, and the element type for vector types. 199 | pub fn scalar_type(&self) -> &Self { 200 | match &self { 201 | Type::ScalableVector(VectorType { 202 | num_elements: _, 203 | element_type, 204 | .. 205 | }) => element_type, 206 | Type::FixedVector(VectorType { 207 | num_elements: _, 208 | element_type, 209 | .. 210 | }) => element_type, 211 | _ => self, 212 | } 213 | } 214 | 215 | /// Returns whether this type is a floating-point type or a vector type 216 | /// of floating points. 217 | pub fn is_floating_or_floating_vector(&self) -> bool { 218 | self.scalar_type().is_floating() 219 | } 220 | 221 | /// Returns whether this type is a integer type or a vector type 222 | /// of integers. 223 | pub fn is_integer_or_integer_vector(&self) -> bool { 224 | self.scalar_type().is_integer() 225 | } 226 | 227 | /// Create a new struct type with the given fields. 228 | pub fn new_struct( 229 | name: Option, 230 | fields: Vec, 231 | is_packed: bool, 232 | ) -> Result { 233 | let inner = StructType::new(name, fields, is_packed)?; 234 | 235 | Ok(Type::Struct(inner)) 236 | } 237 | 238 | /// Create a new integral type from the given bit width. 239 | pub fn new_integer(bit_width: u32) -> Result { 240 | let inner = IntegerType::try_from(bit_width)?; 241 | 242 | Ok(Type::Integer(inner)) 243 | } 244 | 245 | /// Create a new pointer type from the given pointee type and address space. 246 | pub fn new_pointer( 247 | pointee: Type, 248 | address_space: AddressSpace, 249 | ) -> Result { 250 | let inner = PointerType::new(pointee, address_space)?; 251 | 252 | Ok(Type::Pointer(inner)) 253 | } 254 | 255 | /// Create a new array type of the given size and element type. 256 | pub fn new_array(num_elements: u64, element_type: Type) -> Result { 257 | let inner = ArrayType::new(num_elements, element_type)?; 258 | 259 | Ok(Type::Array(inner)) 260 | } 261 | 262 | /// Create a new scalable vector type of the given size and element type. 263 | pub fn new_scalable_vector( 264 | num_elements: u64, 265 | element_type: Type, 266 | ) -> Result { 267 | let inner = VectorType::new(num_elements, element_type)?; 268 | 269 | Ok(Type::ScalableVector(inner)) 270 | } 271 | 272 | /// Create a new (fixed) vector type of the given size and element type. 273 | pub fn new_vector(num_elements: u64, element_type: Type) -> Result { 274 | let inner = VectorType::new(num_elements, element_type)?; 275 | 276 | Ok(Type::FixedVector(inner)) 277 | } 278 | 279 | /// Create a new function type of the given return type, parameter types, and variadic disposition. 280 | pub fn new_function( 281 | return_type: Type, 282 | param_types: Vec, 283 | is_vararg: bool, 284 | ) -> Result { 285 | let inner = FunctionType::new(return_type, param_types, is_vararg)?; 286 | 287 | Ok(Type::Function(inner)) 288 | } 289 | } 290 | 291 | /// Errors that can occur when constructing an [`StructType`](StructType). 292 | #[derive(Debug, Error)] 293 | pub enum StructTypeError { 294 | /// The requested element type is invalid. 295 | #[error("invalid structure element type: {0:?}")] 296 | BadElement(Type), 297 | } 298 | 299 | /// Represents a "struct" type. 300 | #[non_exhaustive] 301 | #[derive(Clone, Debug, PartialEq)] 302 | pub struct StructType { 303 | /// This structure's name, if is has one. 304 | pub name: Option, 305 | /// The individual fields of this structure. 306 | pub fields: Vec, 307 | /// Whether the fields of this structure are packed. 308 | is_packed: bool, 309 | } 310 | 311 | impl StructType { 312 | /// Create a new `StructType`. 313 | pub fn new( 314 | name: Option, 315 | fields: Vec, 316 | is_packed: bool, 317 | ) -> Result { 318 | if let Some(bad) = fields.iter().find(|t| !t.is_struct_element()) { 319 | Err(StructTypeError::BadElement(bad.clone())) 320 | } else { 321 | Ok(Self { 322 | name, 323 | fields, 324 | is_packed, 325 | }) 326 | } 327 | } 328 | } 329 | 330 | /// Errors that can occur when constructing an [`IntegerType`](IntegerType). 331 | #[derive(Debug, Error)] 332 | pub enum IntegerTypeError { 333 | /// The requested bit width for this integer type is invalid. 334 | #[error( 335 | "specified bit width is invalid (not in [{}, {}])", 336 | IntegerType::MIN_INT_BITS, 337 | IntegerType::MAX_INT_BITS 338 | )] 339 | BadWidth, 340 | } 341 | 342 | /// Represents a fixed-width integral type. 343 | /// 344 | /// The validity of the internal width is correct by construction. 345 | #[non_exhaustive] 346 | #[derive(Clone, Debug, PartialEq, Eq)] 347 | pub struct IntegerType { 348 | /// The width of this integral type, in bits. 349 | bit_width: u32, 350 | } 351 | 352 | impl IntegerType { 353 | /// The minimum number of bits in a valid integer type. 354 | pub const MIN_INT_BITS: u32 = 1; 355 | /// The maximum number of bits in a valid integer type. 356 | pub const MAX_INT_BITS: u32 = (1 << 24) - 1; 357 | 358 | /// Returns the width of this integral type in bits. 359 | pub fn bit_width(&self) -> u32 { 360 | self.bit_width 361 | } 362 | 363 | /// Returns the width of this integral type in bytes. 364 | /// 365 | /// The byte width of this type may be larger than the number of bits needed. 366 | pub fn byte_width(&self) -> u32 { 367 | (self.bit_width + 7) / 8 368 | } 369 | } 370 | 371 | impl TryFrom for IntegerType { 372 | type Error = IntegerTypeError; 373 | 374 | fn try_from(value: u32) -> Result { 375 | if (IntegerType::MIN_INT_BITS..=IntegerType::MAX_INT_BITS).contains(&value) { 376 | Ok(Self { bit_width: value }) 377 | } else { 378 | Err(Self::Error::BadWidth) 379 | } 380 | } 381 | } 382 | 383 | /// Errors that can occur when constructing an [`PointerType`](PointerType). 384 | #[derive(Debug, Error)] 385 | pub enum PointerTypeError { 386 | /// The requested pointee type is invalid. 387 | #[error("invalid pointee type: {0:?}")] 388 | BadPointee(Type), 389 | } 390 | 391 | /// Represents a pointer type in some address space. 392 | /// 393 | /// The validity of the internal pointee type is correct by construction. 394 | #[non_exhaustive] 395 | #[derive(Clone, Debug, PartialEq)] 396 | pub struct PointerType { 397 | pointee: Box, 398 | address_space: AddressSpace, 399 | } 400 | 401 | impl PointerType { 402 | /// Create a new `PointerType`. 403 | pub fn new(pointee: Type, address_space: AddressSpace) -> Result { 404 | if pointee.is_pointee() { 405 | Ok(Self { 406 | pointee: Box::new(pointee), 407 | address_space, 408 | }) 409 | } else { 410 | Err(PointerTypeError::BadPointee(pointee)) 411 | } 412 | } 413 | 414 | /// Return a reference to the pointed-to type. 415 | pub fn pointee(&self) -> &Type { 416 | self.pointee.as_ref() 417 | } 418 | } 419 | 420 | /// Errors that can occur when constructing an [`ArrayType`](ArrayType). 421 | #[derive(Debug, Error)] 422 | pub enum ArrayTypeError { 423 | /// The requested element type is invalid. 424 | #[error("invalid array element type: {0:?}")] 425 | BadElement(Type), 426 | } 427 | 428 | /// Represents an array type. 429 | #[non_exhaustive] 430 | #[derive(Clone, Debug, PartialEq)] 431 | pub struct ArrayType { 432 | num_elements: u64, 433 | element_type: Box, 434 | } 435 | 436 | impl ArrayType { 437 | /// Create a new `ArrayType`. 438 | pub fn new(num_elements: u64, element_type: Type) -> Result { 439 | if element_type.is_array_element() { 440 | Ok(Self { 441 | num_elements, 442 | element_type: Box::new(element_type), 443 | }) 444 | } else { 445 | Err(ArrayTypeError::BadElement(element_type)) 446 | } 447 | } 448 | 449 | /// Return a reference to the inner element type. 450 | pub fn element(&self) -> &Type { 451 | self.element_type.as_ref() 452 | } 453 | } 454 | 455 | /// Errors that can occur when constructing a [`VectorType`](VectorType). 456 | #[derive(Debug, Error)] 457 | pub enum VectorTypeError { 458 | /// The requested element type is invalid. 459 | #[error("invalid vector element type: {0:?}")] 460 | BadElement(Type), 461 | } 462 | 463 | /// Represents an vector type. 464 | /// 465 | /// This vector may be fixed or scaled; which one is determined by its surrounding 466 | /// [`Type`](Type) variant. 467 | #[non_exhaustive] 468 | #[derive(Clone, Debug, PartialEq)] 469 | pub struct VectorType { 470 | num_elements: u64, 471 | element_type: Box, 472 | } 473 | 474 | impl VectorType { 475 | /// Create a new `VectorType`. 476 | pub fn new(num_elements: u64, element_type: Type) -> Result { 477 | if element_type.is_vector_element() { 478 | Ok(Self { 479 | num_elements, 480 | element_type: Box::new(element_type), 481 | }) 482 | } else { 483 | Err(VectorTypeError::BadElement(element_type)) 484 | } 485 | } 486 | 487 | /// Return a reference to the inner element type. 488 | pub fn element(&self) -> &Type { 489 | self.element_type.as_ref() 490 | } 491 | } 492 | 493 | /// Errors that can occur when constructing a [`FunctionType`](FunctionType). 494 | #[derive(Debug, Error)] 495 | pub enum FunctionTypeError { 496 | /// The requested return type is invalid. 497 | #[error("invalid function return type: {0:?}")] 498 | BadReturn(Type), 499 | /// The requested parameter type is invalid. 500 | #[error("invalid function parameter type: {0:?}")] 501 | BadParameter(Type), 502 | } 503 | 504 | /// Represents an function type. 505 | #[non_exhaustive] 506 | #[derive(Clone, Debug, PartialEq)] 507 | pub struct FunctionType { 508 | return_type: Box, 509 | param_types: Vec, 510 | is_vararg: bool, 511 | } 512 | 513 | impl FunctionType { 514 | /// Create a new `FunctionType`. 515 | pub fn new( 516 | return_type: Type, 517 | param_types: Vec, 518 | is_vararg: bool, 519 | ) -> Result { 520 | if !return_type.is_return() { 521 | Err(FunctionTypeError::BadReturn(return_type)) 522 | } else if let Some(bad) = param_types.iter().find(|ty| !ty.is_argument()) { 523 | Err(FunctionTypeError::BadParameter(bad.clone())) 524 | } else { 525 | Ok(FunctionType { 526 | return_type: Box::new(return_type), 527 | param_types, 528 | is_vararg, 529 | }) 530 | } 531 | } 532 | } 533 | 534 | #[cfg(test)] 535 | mod tests { 536 | use super::*; 537 | 538 | #[test] 539 | fn test_integer_type() { 540 | { 541 | // Error cases. 542 | assert!(IntegerType::try_from(0).is_err()); 543 | assert!(IntegerType::try_from(IntegerType::MAX_INT_BITS + 1).is_err()); 544 | } 545 | 546 | { 547 | // Normal cases. 548 | let ty = IntegerType::try_from(IntegerType::MIN_INT_BITS).unwrap(); 549 | assert_eq!(ty.bit_width(), 1); 550 | assert_eq!(ty.byte_width(), 1); 551 | 552 | let ty = IntegerType::try_from(IntegerType::MAX_INT_BITS).unwrap(); 553 | assert_eq!(ty.bit_width(), IntegerType::MAX_INT_BITS); 554 | assert_eq!(ty.byte_width(), 2097152); 555 | 556 | let ty = IntegerType::try_from(31).unwrap(); 557 | assert_eq!(ty.bit_width(), 31); 558 | assert_eq!(ty.byte_width(), 4); 559 | 560 | let ty = IntegerType::try_from(32).unwrap(); 561 | assert_eq!(ty.bit_width(), 32); 562 | assert_eq!(ty.byte_width(), 4); 563 | 564 | for i in 1..=8 { 565 | let ty = IntegerType::try_from(i).unwrap(); 566 | assert_eq!(ty.bit_width(), i); 567 | assert_eq!(ty.byte_width(), 1); 568 | } 569 | } 570 | } 571 | 572 | #[test] 573 | fn test_pointer_type() { 574 | { 575 | // Error cases. 576 | assert!(PointerType::new(Type::Void, AddressSpace::default()).is_err()); 577 | assert!(PointerType::new(Type::Label, AddressSpace::default()).is_err()); 578 | assert!(PointerType::new(Type::Metadata, AddressSpace::default()).is_err()); 579 | assert!(PointerType::new(Type::Token, AddressSpace::default()).is_err()); 580 | assert!(PointerType::new(Type::X86Amx, AddressSpace::default()).is_err()); 581 | } 582 | 583 | { 584 | // Normal cases. 585 | let ty = PointerType::new(Type::Double, AddressSpace::default()).unwrap(); 586 | assert_eq!(ty.pointee(), &Type::Double); 587 | 588 | let ty = 589 | PointerType::new(Type::new_integer(32).unwrap(), AddressSpace::default()).unwrap(); 590 | assert_eq!(ty.pointee(), &Type::new_integer(32).unwrap()); 591 | } 592 | } 593 | } 594 | -------------------------------------------------------------------------------- /release.toml: -------------------------------------------------------------------------------- 1 | pre-release-commit-message = "{{crate_name}}: {{version}}" 2 | dev-version = false 3 | publish = false # handled by GitHub Actions 4 | push = true 5 | --------------------------------------------------------------------------------