├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yml
    │   └── release.yml
├── .gitignore
├── Cargo.toml
├── LICENSE
├── Makefile
├── README.md
├── llvm-bitcursor
    ├── Cargo.toml
    ├── README.md
    └── src
    │   ├── error.rs
    │   └── lib.rs
├── llvm-bitstream
    ├── Cargo.toml
    ├── README.md
    ├── examples
    │   └── dump-bitstream.rs
    └── src
    │   ├── abbrev.rs
    │   ├── error.rs
    │   ├── lib.rs
    │   ├── parser.rs
    │   └── record.rs
├── llvm-constants
    ├── Cargo.toml
    ├── README.md
    ├── build.rs
    └── src
    │   ├── constants.rs
    │   ├── enums.rs
    │   └── lib.rs
├── llvm-mapper
    ├── Cargo.toml
    ├── README.md
    ├── examples
    │   └── unroll-bitstream.rs
    └── src
    │   ├── block
    │       ├── attributes.rs
    │       ├── function
    │       │   ├── basic_block.rs
    │       │   ├── instruction.rs
    │       │   └── mod.rs
    │       ├── identification.rs
    │       ├── mod.rs
    │       ├── module.rs
    │       ├── strtab.rs
    │       ├── symtab.rs
    │       ├── type_table.rs
    │       └── vst.rs
    │   ├── error.rs
    │   ├── lib.rs
    │   ├── map.rs
    │   ├── record
    │       ├── alias.rs
    │       ├── comdat.rs
    │       ├── datalayout.rs
    │       ├── function.rs
    │       └── mod.rs
    │   └── unroll.rs
├── llvm-support
    ├── Cargo.toml
    ├── README.md
    ├── build.rs
    └── src
    │   ├── align.rs
    │   ├── attribute.rs
    │   ├── bitcodes.rs
    │   ├── lib.rs
    │   ├── opcode.rs
    │   └── ty.rs
└── release.toml


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: "cargo"
4 |     directory: "/"
5 |     schedule:
6 |       interval: "daily"
7 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   lint:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v2
14 |       - name: Format
15 |         run: cargo fmt && git diff --exit-code
16 |       - name: Lint
17 |         run: |
18 |           rustup update
19 |           rustup component add clippy
20 |           cargo clippy -- \
21 |             -D warnings -D clippy::expect_used -D clippy::unwrap_used -D clippy::panic
22 |   test:
23 |     strategy:
24 |       matrix:
25 |         platform: ["ubuntu-latest", "macos-latest"]
26 |     runs-on: ${{ matrix.platform }}
27 |     steps:
28 |     - uses: actions/checkout@v2
29 | 
30 |     - name: Build
31 |       run: cargo build
32 | 
33 |     - name: Test
34 |       run: |
35 |         cargo test
36 |         cargo test --no-default-features
37 | 
38 |     - name: Doc
39 |       run: cargo doc
40 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     tags:
 4 |       # This is slightly annoying. Is there a better way to do this?
 5 |       - 'llvm-bitcursor-v*'
 6 |       - 'llvm-bitstream-v*'
 7 |       - 'llvm-constants-v*'
 8 |       - 'llvm-support-v*'
 9 |       - 'llvm-mapper-v*'
10 | 
11 | name: release
12 | 
13 | jobs:
14 |   release:
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - name: create release
18 |         id: create_release
19 |         uses: actions/create-release@v1
20 |         env:
21 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22 |         with:
23 |           tag_name: ${{ github.ref }}
24 |           release_name: Release ${{ github.ref }}
25 |           draft: false
26 |           prerelease: ${{ contains(github.ref, 'pre') || contains(github.ref, 'rc') }}
27 | 
28 |   publish:
29 |     runs-on: ubuntu-latest
30 |     steps:
31 |       - uses: actions/checkout@v2
32 | 
33 |       - name: publish release
34 |         run: |
35 |           echo ${{ secrets.CRATES_IO_TOKEN }} | cargo login
36 |           tag="${GITHUB_REF#refs/tags/}"
37 |           package_being_published="${tag%-v*}"
38 |           cd "${package_being_published}" && cargo publish
39 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | *.bc
4 | *.ll
5 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | 
 3 | members = [
 4 |   "llvm-bitcursor",
 5 |   "llvm-bitstream",
 6 |   # Subsumed within llvm-support.
 7 |   # "llvm-constants",
 8 |   "llvm-support",
 9 |   "llvm-mapper",
10 | ]
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2021 William Woodruff <william @ yossarian.net>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | HAS_NIGHTLY := $(shell rustup toolchain list | grep nightly)
 2 | ifeq ($(HAS_NIGHTLY),)
 3 | 	FMT_FLAG :=
 4 | else
 5 | 	FMT_FLAG := +nightly
 6 | endif
 7 | 
 8 | .PHONY: all
 9 | all:
10 | 	@echo "This is not a real build system."
11 | 
12 | .PHONY: fmt
13 | fmt:
14 | 	cargo $(FMT_FLAG) fmt
15 | 
16 | .PHONY: lint
17 | lint:
18 | 	cargo clippy -- \
19 | 		-D warnings \
20 | 		-D clippy::expect_used \
21 | 		-D clippy::unwrap_used \
22 | 		-D clippy::panic
23 | 	@# NOTE(ww): run docs here too, since they can fail the CI when links are broken
24 | 	cargo doc
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | mollusc
 2 | =======
 3 | 
 4 | [![CI](https://github.com/woodruffw/mollusc/actions/workflows/ci.yml/badge.svg)](https://github.com/woodruffw/mollusc/actions/workflows/ci.yml)
 5 | 
 6 | ⚠️This is a work in progress! Many parts are incomplete or only partially functional!⚠️
 7 | 
 8 | *mollusc* is a collection of pure-Rust libraries for parsing, interpreting, and analyzing LLVM.
 9 | 
10 | | Crate   | Version | Description |
11 | | ------- | ------- | ----------- |
12 | | [`llvm-bitcursor`](./llvm-bitcursor) | [![Crates.io](https://img.shields.io/crates/v/llvm-bitcursor)](https://crates.io/crates/llvm-bitcursor) | A no-frills cursor library for reading fields from a bitstream. |
13 | | [`llvm-bitstream`](./llvm-bitstream) | [![Crates.io](https://img.shields.io/crates/v/llvm-bitstream)](https://crates.io/crates/llvm-bitstream) | A content-agnostic parser for LLVM's bitstream container format. |
14 | | [`llvm-constants`](./llvm-constants) | [![Crates.io](https://img.shields.io/crates/v/llvm-constants)](https://crates.io/crates/llvm-constants) | **Unused**. A collection of numeric and enum constants useful across multiple crates in the *mollusc* ecosystem. |
15 | | [`llvm-support`](./llvm-support) | [![Crates.io](https://img.shields.io/crates/v/llvm-support)](https://crates.io/crates/llvm-support) | Support types and routines for parsing LLVM's bitcode. |
16 | | [`llvm-mapper`](./llvm-mapper) | [![Crates.io](https://img.shields.io/crates/v/llvm-mapper)](https://crates.io/crates/llvm-mapper) | A library for mapping the contents of bitstreams into LLVM IR models. |
17 | | **Not implemented.** | N/A | A high level interface for interacting with LLVM IR. |
18 | 


--------------------------------------------------------------------------------
/llvm-bitcursor/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "llvm-bitcursor"
 3 | description = "A no-frills bitstream cursor library for Rust"
 4 | license = "MIT"
 5 | homepage = "https://github.com/woodruffw/mollusc/tree/main/llvm-bitcursor"
 6 | repository = "https://github.com/woodruffw/mollusc"
 7 | authors = ["William Woodruff <william@yossarian.net>"]
 8 | readme = "README.md"
 9 | keywords = ["llvm", "parsing", "binary", "encoding"]
10 | categories = ["compilers", "encoding", "parsing"]
11 | edition = "2018"
12 | version = "0.0.3"
13 | 
14 | [features]
15 | default = ["vbr"]
16 | vbr = []
17 | 
18 | [dependencies]
19 | log = "0.4"
20 | num = "0.4"
21 | thiserror = "1.0"
22 | 


--------------------------------------------------------------------------------
/llvm-bitcursor/README.md:
--------------------------------------------------------------------------------
 1 | llvm-bitcursor
 2 | ==============
 3 | 
 4 | [![Crates.io](https://img.shields.io/crates/v/llvm-bitcursor)](https://crates.io/crates/llvm-bitcursor)
 5 | [![Documentation](https://docs.rs/llvm-bitcursor/badge.svg)](https://docs.rs/llvm-bitcursor)
 6 | 
 7 | A no-frills cursor library that supports reading unaligned fields from
 8 | a bitstream.
 9 | 
10 | This library primarily exists to provide low-level support for the task
11 | of parsing LLVM's [bitstream format](https://llvm.org/docs/BitCodeFormat.html).
12 | If you're looking for a general purpose bitvector handling library, try
13 | [bitvec](https://crates.io/crates/bitvec) or [bit-vec](https://crates.io/crates/bit-vec).
14 | 
15 | Features:
16 | 
17 | * No-copy, all cursor state is internal
18 | * Support for LLVM's [VBR](https://llvm.org/docs/BitCodeFormat.html#variable-width-value) encoding
19 |   (requires the `vbr` feature)
20 | * 100% safe Rust, with `#![forbid(unsafe_code)]`
21 | * No use of `unwrap`, `expect`, or `panic`
22 | 
23 | Anti-features:
24 | 
25 | * Not a general purpose bitvector/bitstring handling library
26 | * Probably not very fast
27 | * Doesn't care about bit order (always LSB-first)
28 | 


--------------------------------------------------------------------------------
/llvm-bitcursor/src/error.rs:
--------------------------------------------------------------------------------
 1 | //! Error management for `llvm-bitcursor`.
 2 | 
 3 | use thiserror::Error as ThisError;
 4 | 
 5 | /// All errors potentially produced by `llvm-bitcursor` APIs.
 6 | /// Consumers should *not* attempt to match specific variants of this error type.
 7 | #[non_exhaustive]
 8 | #[derive(Debug, ThisError)]
 9 | pub enum Error {
10 |     /// A read or other I/O operation encountered the end of the inner buffer.
11 |     #[error("EOF while reading")]
12 |     Eof,
13 |     /// A user attempted to call [`BitCursor::new_with_len`](crate::BitCursor::new_with_len) with
14 |     /// an impossible length (larger that the supplied buffer).
15 |     #[error("invalid length for buffer supplied to cursor")]
16 |     InvalidLength,
17 |     /// A generic API (e.g. [`BitCursor::read_as`](crate::BitCursor::read_as)) was asked to
18 |     /// read a value larger than the requested type could represent.
19 |     #[error("loss of data with cast")]
20 |     BadCast,
21 |     /// A read API was called with an invalid bitsize (too small or large).
22 |     #[error("invalid read size (zero or too large)")]
23 |     InvalidReadSize,
24 |     /// A VBR read API was called with an invalid VBR width.
25 |     #[cfg(any(feature = "vbr", doc))]
26 |     #[error("invalid VBR width (must be > 1 but <= system word width)")]
27 |     InvalidVbrWidth,
28 |     /// An I/O operation completed partially, but the inner buffer ended before it full completion.
29 |     #[error("too little data to service request")]
30 |     Short,
31 | }
32 | 


--------------------------------------------------------------------------------
/llvm-bitstream/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "llvm-bitstream"
 3 | description = "A content-agnostic parser for LLVM's bitstream container format"
 4 | license = "MIT"
 5 | homepage = "https://github.com/woodruffw/mollusc/tree/main/llvm-bitstream"
 6 | repository = "https://github.com/woodruffw/mollusc"
 7 | authors = ["William Woodruff <william@yossarian.net>"]
 8 | readme = "README.md"
 9 | keywords = ["llvm", "parsing", "binary", "encoding"]
10 | categories = ["compilers", "encoding", "parsing"]
11 | edition = "2018"
12 | version = "0.0.3"
13 | 
14 | [[example]]
15 | name = "dump-bitstream"
16 | 
17 | [dependencies]
18 | llvm-bitcursor = { version = "0.0.3", path = "../llvm-bitcursor" }
19 | llvm-support = { version = "0.0.3", path = "../llvm-support" }
20 | log = "0.4"
21 | num = "0.4"
22 | num_enum = "0.6"
23 | thiserror = "1.0"
24 | 
25 | [dev-dependencies]
26 | anyhow = "1.0"
27 | clap = "4.0"
28 | env_logger = "0.10"
29 | 


--------------------------------------------------------------------------------
/llvm-bitstream/README.md:
--------------------------------------------------------------------------------
 1 | llvm-bitstream
 2 | ==============
 3 | 
 4 | [![Crates.io](https://img.shields.io/crates/v/llvm-bitstream)](https://crates.io/crates/llvm-bitstream)
 5 | [![Documentation](https://docs.rs/llvm-bitstream/badge.svg)](https://docs.rs/llvm-bitstream)
 6 | 
 7 | A content-agnostic parser for LLVM's [bitstream container format](https://llvm.org/docs/BitCodeFormat.html).
 8 | 
 9 | Conceptually, this library is one step below a full LLVM bitcode parser:
10 | it can interpret the entries in a bitstream, but isn't aware of their semantics
11 | and isn't responsible for composing them into an LLVM IR
12 | program (or any other concrete structure that's been serialized as a bitstream).
13 | 
14 | This library uses [`llvm-bitcursor`](https://crates.io/crates/llvm-bitcursor) under the hood.
15 | 


--------------------------------------------------------------------------------
/llvm-bitstream/examples/dump-bitstream.rs:
--------------------------------------------------------------------------------
 1 | use std::fs;
 2 | 
 3 | use anyhow::Result;
 4 | use clap::{Arg, Command};
 5 | use llvm_bitstream::parser::StreamEntry;
 6 | use llvm_bitstream::Bitstream;
 7 | 
 8 | fn app() -> Command {
 9 |     Command::new(env!("CARGO_PKG_NAME"))
10 |         .version(env!("CARGO_PKG_VERSION"))
11 |         .about(env!("CARGO_PKG_DESCRIPTION"))
12 |         .arg(
13 |             Arg::new("input")
14 |                 .help("the bitstream input to dump")
15 |                 .index(1)
16 |                 .required(true),
17 |         )
18 | }
19 | 
20 | fn main() -> Result<()> {
21 |     env_logger::init();
22 |     let matches = app().get_matches();
23 | 
24 |     let input = {
25 |         let input = matches.get_one::<String>("input").unwrap();
26 |         fs::read(input)?
27 |     };
28 | 
29 |     let (wrapper, bitstream) = Bitstream::from(&input)?;
30 | 
31 |     if let Some(wrapper) = wrapper {
32 |         println!("Wrapper: {:#?}", wrapper);
33 |     }
34 | 
35 |     println!("Entered bitstream; magic: {:#X}", bitstream.magic);
36 | 
37 |     let mut scope = 0;
38 |     for entry in bitstream {
39 |         match entry? {
40 |             StreamEntry::SubBlock(block) => {
41 |                 println!("{}BLOCK {} {{", "\t".repeat(scope), block.block_id);
42 |                 scope += 1;
43 |             }
44 |             StreamEntry::EndBlock => {
45 |                 scope -= 1;
46 |                 println!("{}}}", "\t".repeat(scope));
47 |             }
48 |             StreamEntry::Record(record) => {
49 |                 println!(
50 |                     "{}RECORD {{ code: {}, fields: {:?} }}",
51 |                     "\t".repeat(scope),
52 |                     record.code,
53 |                     record.fields
54 |                 )
55 |             }
56 |         };
57 |     }
58 | 
59 |     Ok(())
60 | }
61 | 


--------------------------------------------------------------------------------
/llvm-bitstream/src/abbrev.rs:
--------------------------------------------------------------------------------
  1 | //! Abbreviation definition and abbreviated record parsing and handling for `llvm-bitstream`.
  2 | 
  3 | use std::convert::{From, TryFrom, TryInto};
  4 | 
  5 | use llvm_bitcursor::BitCursor;
  6 | use llvm_support::bitcodes::{AbbrevOpEnc, ReservedAbbrevId};
  7 | use llvm_support::CHAR6_ALPHABET;
  8 | 
  9 | use crate::error::Error;
 10 | use crate::record::Fields;
 11 | 
 12 | /// An abbreviation ID, whether reserved or defined by the stream itself.
 13 | #[derive(Clone, Copy, Debug)]
 14 | pub enum AbbrevId {
 15 |     /// A reserved abbreviation ID.
 16 |     Reserved(ReservedAbbrevId),
 17 |     /// An abbreviation ID that's been defined within the stream.
 18 |     Defined(u64),
 19 | }
 20 | 
 21 | impl From<u64> for AbbrevId {
 22 |     fn from(value: u64) -> Self {
 23 |         ReservedAbbrevId::try_from(value)
 24 |             .map_or_else(|_| AbbrevId::Defined(value), AbbrevId::Reserved)
 25 |     }
 26 | }
 27 | 
 28 | /// The valid abbreviation operand forms.
 29 | #[derive(Clone, Debug, PartialEq)]
 30 | pub enum AbbrevOp {
 31 |     /// A literal, constant operand.
 32 |     Literal(u64),
 33 |     /// A VBR whose width is is associated as extra data.
 34 |     Vbr(u64),
 35 |     /// A fixed-width field whose width is associated as extra data.
 36 |     Fixed(u64),
 37 |     /// A fixed-length array whose member elements are specified.
 38 |     Array(Box<AbbrevOp>),
 39 |     /// A single Char6.
 40 |     Char6,
 41 |     /// A fixed-length blob of bytes.
 42 |     Blob,
 43 | }
 44 | 
 45 | impl AbbrevOp {
 46 |     /// Given a Char6 value, map it back to its ASCII printable equivalent.
 47 |     ///
 48 |     /// This function is private because it requires caller-upheld invariants
 49 |     /// for panic safety.
 50 |     fn decode_char6(char6: u8) -> u8 {
 51 |         // Panic safety: the caller is expected to constrain char6 to a valid
 52 |         // index within CHAR6_ALPHABET.
 53 |         CHAR6_ALPHABET[char6 as usize]
 54 |     }
 55 | 
 56 |     /// Parse a single abbreviation operand from the stream, returning a
 57 |     /// vector of one or more fields for that operand.
 58 |     pub(self) fn parse<T: AsRef<[u8]>>(&self, cur: &mut BitCursor<T>) -> Result<Fields, Error> {
 59 |         // A sad thing happens in this function: we parse by iterating over
 60 |         // each operand, collecting the field(s) in the bitstream that correspond to it.
 61 |         // Operands are typed and carry detailed information about their semantics:
 62 |         // for example, an `AbbrevOp::Char6` is exactly 6 bits and maps directly
 63 |         // to a printable character. It would be really nice if we could expose this structure
 64 |         // at a higher level, i.e. by returning a `Value` enum with different variants
 65 |         // for each operand, and higher levels could take advantage of it.
 66 |         // Unfortunately, LLVM does not let us do this: bitstream consumers **must**
 67 |         // be agnostic to how the bitstream is emitted, which means that an emitter's
 68 |         // decision to use a Char6 vs. a VBR6 cannot affect later, higher-level interpretation.
 69 |         // As a result, we have to discard all of our nice structure here in favor of
 70 |         // sequences of "fields," which are really just individual `u64`s.
 71 |         Ok(match self {
 72 |             AbbrevOp::Literal(val) => vec![*val],
 73 |             AbbrevOp::Vbr(width) => vec![cur.read_vbr(*width as usize)?],
 74 |             AbbrevOp::Fixed(width) => vec![cur.read_as::<u64>(*width as usize)?],
 75 |             AbbrevOp::Array(elem) => {
 76 |                 // An array operand is encoded as a length (VBR6), followed by
 77 |                 // each encoded element of the array.
 78 |                 // TODO(ww): Sanity check array_len here.
 79 |                 let array_len = cur.read_vbr(6)? as usize;
 80 | 
 81 |                 let mut fields: Fields = Vec::with_capacity(array_len);
 82 |                 for _ in 0..array_len {
 83 |                     fields.extend(elem.parse(cur)?);
 84 |                 }
 85 | 
 86 |                 fields
 87 |             }
 88 |             AbbrevOp::Char6 => vec![Self::decode_char6(cur.read_as::<u8>(6)?).into()],
 89 |             AbbrevOp::Blob => {
 90 |                 // A blob operand is encoded as a length (VBR6), followed by a 32-bit aligned
 91 |                 // sequence of bytes, followed by another alignment back to 32 bits.
 92 | 
 93 |                 // TODO(ww): Sanity check blob_len here: it probably shouldn't be 0,
 94 |                 // and it definitely can't be longer than the stream.
 95 |                 let blob_len = cur.read_vbr(6)? as usize;
 96 |                 cur.align32();
 97 | 
 98 |                 // TODO(ww): This read loop is probably slower than it needs to be;
 99 |                 // `BitCursor` could probably learn a `read_bytes` API that's
100 |                 // only allowed when the stream is byte-aligned.
101 |                 let mut fields: Fields = Vec::with_capacity(blob_len);
102 |                 for _ in 0..blob_len {
103 |                     fields.push(cur.read_exact::<u8>()?.into());
104 |                 }
105 |                 cur.align32();
106 | 
107 |                 fields
108 |             }
109 |         })
110 |     }
111 | }
112 | 
113 | /// Represents a defined abbreviation, as specified by a `DEFINE_ABBREV` record.
114 | #[derive(Clone, Debug)]
115 | pub struct Abbrev {
116 |     /// The abstract operands for this abbreviation definition.
117 |     pub operands: Vec<AbbrevOp>,
118 | }
119 | 
120 | impl Abbrev {
121 |     /// Parse a new `Abbrev` from the stream.
122 |     ///
123 |     /// Assumes that the `DEFINE_ABBREV` ID has already been consumed.
124 |     pub fn new<T: AsRef<[u8]>>(cur: &mut BitCursor<T>) -> Result<Self, Error> {
125 |         // TODO(ww): This and other structures should probably implement a `FromStream`
126 |         // trait instead, for construction.
127 | 
128 |         // Per the LLVM docs: abbreviation records look like this:
129 |         // [DEFINE_ABBREV, VBR5:numabbrevops, abbrevop0, abbrevop1, ...]
130 |         // Our surrounding parse context should have consumed the DEFINE_ABBREV
131 |         // already, so we start with numabbrevops.
132 |         let num_abbrev_opnds = cur.read_vbr(5)?;
133 |         if num_abbrev_opnds < 1 {
134 |             return Err(Error::AbbrevParse(
135 |                 "expected at least one abbrev operand".into(),
136 |             ));
137 |         }
138 | 
139 |         log::debug!("expecting {} operands", num_abbrev_opnds);
140 | 
141 |         // Abbreviated records must have at least one operand.
142 |         if num_abbrev_opnds < 1 {
143 |             return Err(Error::AbbrevParse(
144 |                 "expected abbrev operand count to be nonzero".into(),
145 |             ));
146 |         }
147 | 
148 |         // Decode each abbreviation operand.
149 |         let mut operands = vec![];
150 |         let mut done_early = false;
151 |         for idx in 0..num_abbrev_opnds {
152 |             // Each operand starts with a single bit that indicates whether
153 |             // the operand is "literal" (i.e., a VBR8) or an "encoded" operand.
154 |             let operand_kind = cur.read(1)?;
155 | 
156 |             // If this operand is a literal, then we read it as a VBR8.
157 |             if operand_kind == 1 {
158 |                 let val = cur.read_vbr(8)?;
159 | 
160 |                 // NOTE(ww): This error is exceedingly unlikely (usize would have to be larger
161 |                 // than u64). But you never know.
162 |                 operands.push(AbbrevOp::Literal(val));
163 | 
164 |                 continue;
165 |             }
166 | 
167 |             // Otherwise, we need to suss the encoding representation out of it.
168 |             // This is always a 3-bit field (**not** a VBR3), which in turn tells us whether the
169 |             // operand encoding includes extra data.
170 |             let enc: AbbrevOpEnc = cur.read(3)?.try_into()?;
171 |             let opnd = match enc {
172 |                 AbbrevOpEnc::Fixed => AbbrevOp::Fixed(cur.read_vbr(5)?),
173 |                 AbbrevOpEnc::Vbr => AbbrevOp::Vbr(cur.read_vbr(5)?),
174 |                 AbbrevOpEnc::Array => {
175 |                     // There is only ever one array operand in an abbreviation definition,
176 |                     // and it is always the second-to-last operand. Anything else is an error.
177 |                     if idx != num_abbrev_opnds - 2 {
178 |                         return Err(Error::AbbrevParse("array operand at invalid index".into()));
179 |                     }
180 | 
181 |                     // NOTE(ww): We get a little clever here: instead of parsing
182 |                     // the inner array operand on its own, we steal it here and set
183 |                     // `done_early` to indicate that we're done with operand parsing.
184 |                     // This works since array operands are guaranteed to be second-to-last,
185 |                     // followed only by their element operand encoding.
186 |                     cur.read(1)?;
187 |                     let elem_enc: AbbrevOpEnc = cur.read(3)?.try_into()?;
188 |                     done_early = true;
189 | 
190 |                     let elem = match elem_enc {
191 |                         AbbrevOpEnc::Fixed => AbbrevOp::Fixed(cur.read_vbr(5)?),
192 |                         AbbrevOpEnc::Vbr => AbbrevOp::Vbr(cur.read_vbr(5)?),
193 |                         AbbrevOpEnc::Char6 => AbbrevOp::Char6,
194 |                         _ => {
195 |                             // Blobs and arrays cannot themselves be member types.
196 |                             return Err(Error::AbbrevParse(format!(
197 |                                 "invalid element type for an array: {:?}",
198 |                                 elem_enc
199 |                             )));
200 |                         }
201 |                     };
202 | 
203 |                     AbbrevOp::Array(Box::new(elem))
204 |                 }
205 |                 AbbrevOpEnc::Char6 => AbbrevOp::Char6,
206 |                 AbbrevOpEnc::Blob => {
207 |                     // Similarly to arrays: there is only ever one blob operand.
208 |                     // Blobs don't have an element type, so they're always the last operand.
209 |                     if idx != num_abbrev_opnds - 1 {
210 |                         return Err(Error::AbbrevParse("blob operand at invalid index".into()));
211 |                     }
212 | 
213 |                     AbbrevOp::Blob
214 |                 }
215 |             };
216 | 
217 |             operands.push(opnd);
218 | 
219 |             // See above: don't complete the entire operand parsing loop if we've successfully
220 |             // stolen the last operand as part of an array.
221 |             if done_early {
222 |                 break;
223 |             }
224 |         }
225 | 
226 |         Ok(Self { operands: operands })
227 |     }
228 | 
229 |     /// Parse an abbreviated record from this stream, returning its fields.
230 |     pub fn parse<T: AsRef<[u8]>>(&self, cur: &mut BitCursor<T>) -> Result<Fields, Error> {
231 |         Ok(self
232 |             .operands
233 |             .iter()
234 |             .map(|opnd| opnd.parse(cur))
235 |             .collect::<Result<Vec<_>, _>>()?
236 |             .into_iter()
237 |             .flatten()
238 |             .collect())
239 |     }
240 | }
241 | 


--------------------------------------------------------------------------------
/llvm-bitstream/src/error.rs:
--------------------------------------------------------------------------------
 1 | //! Errors for `llvm-bitstream`.
 2 | 
 3 | use llvm_bitcursor::error::Error as CursorError;
 4 | use llvm_support::bitcodes::{AbbrevOpEnc, BlockInfoCode};
 5 | use num_enum::TryFromPrimitiveError;
 6 | use thiserror::Error as ThisError;
 7 | 
 8 | /// All possible errors that can occur while parsing a bitstream.
 9 | #[derive(Debug, ThisError)]
10 | pub enum Error {
11 |     /// The underlying bitstream has no more data to parse.
12 |     #[error("bitstream has been exhausted")]
13 |     Exhausted,
14 |     /// The underlying [`BitCursor`](llvm_bitcursor::BitCursor) returned an error
15 |     /// that we couldn't specialize.
16 |     #[error("underlying bitcursor error")]
17 |     Cursor(#[from] CursorError),
18 |     /// We couldn't parse the wrapper structure or other data that precedes the actual bitstream.
19 |     #[error("couldn't parse bitstream container: {0}")]
20 |     BadContainer(String),
21 |     /// A record in the `BLOCKINFO` block has a code that we don't know.
22 |     /// `BLOCKINFO` must be fully interpreted in order to correctly parse the remainder of
23 |     /// the bitstream, so this is a hard error.
24 |     #[error("bad record code for BLOCKINFO block")]
25 |     BadBlockInfoCode(#[from] TryFromPrimitiveError<BlockInfoCode>),
26 |     /// An operand in a `DEFINE_ABBREV` definition has a code that we don't know.
27 |     /// This indicates either a malformed bitstream or a new operand format that
28 |     /// we don't yet support, so it's a hard error.
29 |     #[error("bad operand code for DEFINE_ABBREV operand")]
30 |     BadAbbrevOpEnc(#[from] TryFromPrimitiveError<AbbrevOpEnc>),
31 |     /// A generic error occurred while parsing the bitstream.
32 |     #[error("error while parsing stream: {0}")]
33 |     StreamParse(String),
34 |     /// An error occurred while interpreting a `DEFINE_ABBREV` record.
35 |     #[error("error while parsing abbrev record: {0}")]
36 |     AbbrevParse(String),
37 |     /// An error occurred while mapping an abbreviated record back to its abbreviation definition.
38 |     #[error("unknown abbreviation for ID: {0}")]
39 |     BadAbbrev(u64),
40 |     /// An error occurred during block scope entrance or exit.
41 |     #[error("error while parsing block scope: {0}")]
42 |     BadScope(String),
43 | }
44 | 


--------------------------------------------------------------------------------
/llvm-bitstream/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! `llvm-bitstream` is a library for interpreting files in LLVM's
  2 | //! [bitstream format](https://llvm.org/docs/BitCodeFormat.html).
  3 | 
  4 | #![deny(rustdoc::broken_intra_doc_links)]
  5 | #![deny(missing_docs)]
  6 | #![allow(clippy::redundant_field_names)]
  7 | #![forbid(unsafe_code)]
  8 | 
  9 | pub mod abbrev;
 10 | pub mod error;
 11 | pub mod parser;
 12 | pub mod record;
 13 | 
 14 | use std::io::{Seek, SeekFrom};
 15 | 
 16 | use llvm_bitcursor::BitCursor;
 17 | use llvm_support::BITCODE_WRAPPER_MAGIC;
 18 | 
 19 | use crate::error::Error;
 20 | use crate::parser::StreamEntry;
 21 | 
 22 | /// A representation of the wrapper structure for a bitstream.
 23 | #[derive(Debug)]
 24 | pub struct BitcodeWrapper {
 25 |     /// The magic for this wrapper.
 26 |     pub magic: u32,
 27 |     /// The version for this wrapper.
 28 |     pub version: u32,
 29 |     /// The offset to the actual bitstream.
 30 |     pub offset: u32,
 31 |     /// The size of the wrapped bitstream.
 32 |     pub size: u32,
 33 |     /// A target-specific value that encodes the CPU type.
 34 |     pub cpu_type: u32,
 35 | }
 36 | 
 37 | /// Represents an overarching bitstream container.
 38 | ///
 39 | /// This struct is responsible for managing two pieces of state:
 40 | /// 1. The application-specific magic that identifies the input
 41 | /// 2. An underlying [`StreamParser`](crate::parser::StreamParser) that can
 42 | ///    be advanced to produce individual blocks and records within the bitstream.
 43 | #[derive(Debug)]
 44 | pub struct Bitstream<T: AsRef<[u8]>> {
 45 |     /// The application-specific magic associated with this bitstream.
 46 |     pub magic: u32,
 47 |     parser: parser::StreamParser<T>,
 48 | }
 49 | 
 50 | impl<T: AsRef<[u8]>> Bitstream<T> {
 51 |     fn from_cursor(mut cur: BitCursor<T>) -> Result<Self, Error> {
 52 |         // This isn't documented anywhere, but LLVM's BitcodeReader requires
 53 |         // all inputs to be 4-byte aligned.
 54 |         // See: `llvm::initStream` in `Bitcode/Reader/BitcodeReader.cpp`.
 55 |         if cur.byte_len() % 4 != 0 {
 56 |             return Err(Error::BadContainer("input is not 4-byte aligned".into()));
 57 |         }
 58 | 
 59 |         // Every bitstream starts with an aligned, 32-bit magic field.
 60 |         // There's absolutely no point in continuing the parse if we fail here.
 61 |         Ok(Self {
 62 |             magic: cur.read_exact::<u32>().map_err(|e| {
 63 |                 Error::BadContainer(format!(
 64 |                     "bitstream should have begun with magic, but errored: {:?}",
 65 |                     e
 66 |                 ))
 67 |             })?,
 68 |             parser: parser::StreamParser::new(cur),
 69 |         })
 70 |     }
 71 | 
 72 |     /// Intelligently create a new `Bitstream` from the given source, parsing
 73 |     /// the bitcode wrapper if necessary.
 74 |     pub fn from(inner: T) -> Result<(Option<BitcodeWrapper>, Self), Error> {
 75 |         log::debug!("beginning intelligent parse");
 76 |         let mut cur = BitCursor::new(&inner);
 77 | 
 78 |         // Read the magic to determine which parse strategy to use.
 79 |         let magic = cur.read_exact::<u32>()?;
 80 | 
 81 |         // The only wrapper we currently know is the bitcode wrapper.
 82 |         // If our magic doesn't match that, then we try the raw parser.
 83 |         if magic == BITCODE_WRAPPER_MAGIC {
 84 |             log::debug!("input looks like a bitcode wrapper!");
 85 |             let (wrapper, parser) = Self::from_wrapped(inner)?;
 86 |             Ok((Some(wrapper), parser))
 87 |         } else {
 88 |             log::debug!("input is probably a raw bitstream!");
 89 |             Ok((None, Self::from_raw(inner)?))
 90 |         }
 91 |     }
 92 | 
 93 |     /// Create a new `Bitstream` from the given source.
 94 |     ///
 95 |     /// **NOTE**: This function assumes that it's being given a "raw" bitstream,
 96 |     /// i.e. not one that's been wrapped with another container (such as the
 97 |     /// bitcode wrapper format). To parse a wrapped bitstream, use the
 98 |     /// [`from_wrapped`](Bitstream::from_wrapped) API.
 99 |     pub fn from_raw(inner: T) -> Result<Self, Error> {
100 |         let cur = BitCursor::new(inner);
101 |         Self::from_cursor(cur)
102 |     }
103 | 
104 |     /// Create a new `Bitstream` from the given wrapped source.
105 |     ///
106 |     /// The source is parsed as if it begins with a
107 |     /// [bitcode wrapper](https://llvm.org/docs/BitCodeFormat.html#bitcode-wrapper-format).
108 |     /// "Raw" inputs should be parsed with [`from_raw`](Bitstream::from_raw) instead.
109 |     pub fn from_wrapped(inner: T) -> Result<(BitcodeWrapper, Self), Error> {
110 |         let mut cur = BitCursor::new(&inner);
111 | 
112 |         let wrapper = BitcodeWrapper {
113 |             magic: cur.read_exact::<u32>()?,
114 |             version: cur.read_exact::<u32>()?,
115 |             offset: cur.read_exact::<u32>()?,
116 |             size: cur.read_exact::<u32>()?,
117 |             cpu_type: cur.read_exact::<u32>()?,
118 |         };
119 | 
120 |         // NOTE(ww): The `new_with_len` API is a little bit silly -- ideally we'd just
121 |         // take a slice of `inner` and create a new `BitCursor` with it, but we can't do
122 |         // that while preserving the generic `T` bound.
123 |         // The manual fixup (+ 20) is another artifact of this -- we keep the wrapper header
124 |         // in the new cursor to make the offsets more intelligible, which means that we
125 |         // also need to extend the end of our cursor's buffer.
126 |         let actual_length = (wrapper.size as usize) + 20;
127 |         let mut cur = BitCursor::new_with_len(inner, actual_length)?;
128 | 
129 |         cur.seek(SeekFrom::Start(wrapper.offset.into()))
130 |             .map_err(|e| {
131 |                 Error::StreamParse(format!("couldn't seek past bitcode wrapper: {:?}", e))
132 |             })?;
133 |         Ok((wrapper, Self::from_cursor(cur)?))
134 |     }
135 | 
136 |     /// Advance the underlying bitstream parser by one entry.
137 |     ///
138 |     /// NOTE: Most users should prefer the iterator implementation.
139 |     pub fn advance(&mut self) -> Result<StreamEntry, Error> {
140 |         self.parser.advance()
141 |     }
142 | }
143 | 
144 | impl<T: AsRef<[u8]>> Iterator for Bitstream<T> {
145 |     type Item = Result<StreamEntry, Error>;
146 | 
147 |     fn next(&mut self) -> Option<Self::Item> {
148 |         match self.advance() {
149 |             Ok(entry) => Some(Ok(entry)),
150 |             Err(Error::Exhausted) => None,
151 |             Err(e) => Some(Err(e)),
152 |         }
153 |     }
154 | }
155 | 
156 | #[cfg(test)]
157 | mod tests {}
158 | 


--------------------------------------------------------------------------------
/llvm-bitstream/src/parser.rs:
--------------------------------------------------------------------------------
  1 | //! Core parsing functionality for `llvm-bitstream`.
  2 | 
  3 | use std::collections::HashMap;
  4 | use std::convert::TryInto;
  5 | use std::iter;
  6 | 
  7 | use llvm_bitcursor::BitCursor;
  8 | use llvm_support::bitcodes::{BlockInfoCode, ReservedAbbrevId, ReservedBlockId};
  9 | use llvm_support::{FIRST_APPLICATION_ABBREV_ID, INITIAL_ABBREV_ID_WIDTH};
 10 | 
 11 | use crate::abbrev::{self, AbbrevId};
 12 | use crate::error::Error;
 13 | use crate::record::{Block, Fields, Record};
 14 | 
 15 | /// The kinds of entries we can see while advancing through the bitstream.
 16 | /// Abbreviations are handled transparently by the parser, and thus are
 17 | /// never surfaced as `StreamEntry` values.
 18 | #[derive(Debug)]
 19 | pub enum StreamEntry {
 20 |     /// The end of a block scope.
 21 |     EndBlock,
 22 |     /// The beginning of a new block scope, for a block with the given ID.
 23 |     SubBlock(Block),
 24 |     /// The beginning of a new record within the current scope, with the given
 25 |     /// abbreviation ID.
 26 |     Record(Record),
 27 | }
 28 | 
 29 | impl StreamEntry {
 30 |     /// Consumes this `StreamEntry` and returns its inner [Block](crate::record::Block), if it is
 31 |     /// in fact a block.
 32 |     ///
 33 |     /// If the entry is not a block, returns `None.
 34 |     pub fn as_block(self) -> Option<Block> {
 35 |         match self {
 36 |             StreamEntry::SubBlock(block) => Some(block),
 37 |             _ => None,
 38 |         }
 39 |     }
 40 | }
 41 | 
 42 | /// Represents the necessary parse state for a particular scope in the bitstream.
 43 | ///
 44 | /// Note that a scope does not *necessarily* correspond to a block: every
 45 | /// parser begins with an initial non-block scope before the first block is encountered.
 46 | #[derive(Debug)]
 47 | enum Scope {
 48 |     Initial,
 49 |     Block {
 50 |         abbrev_id_width: u64,
 51 |         block_id: u64,
 52 |         blockinfo_block_id: Option<u64>,
 53 |         abbrevs: Vec<abbrev::Abbrev>,
 54 |     },
 55 | }
 56 | 
 57 | impl Default for Scope {
 58 |     fn default() -> Self {
 59 |         Self::Initial
 60 |     }
 61 | }
 62 | 
 63 | impl Scope {
 64 |     /// Returns a new (block) scope.
 65 |     pub(self) fn new(abbrev_id_width: u64, block_id: u64) -> Self {
 66 |         Self::Block {
 67 |             abbrev_id_width: abbrev_id_width,
 68 |             block_id: block_id,
 69 |             blockinfo_block_id: None,
 70 |             abbrevs: vec![],
 71 |         }
 72 |     }
 73 | 
 74 |     /// Returns the current width used for abbreviation IDs.
 75 |     pub(self) fn abbrev_id_width(&self) -> u64 {
 76 |         match self {
 77 |             Scope::Initial => INITIAL_ABBREV_ID_WIDTH,
 78 |             Scope::Block {
 79 |                 abbrev_id_width, ..
 80 |             } => *abbrev_id_width,
 81 |         }
 82 |     }
 83 | 
 84 |     /// Extend the current (block) scope's abbreviation definition list with the given
 85 |     /// iterator.
 86 |     ///
 87 |     /// Returns an error if used on a non-block scope.
 88 |     pub(self) fn extend_abbrevs(
 89 |         &mut self,
 90 |         new_abbrevs: impl iter::IntoIterator<Item = abbrev::Abbrev>,
 91 |     ) -> Result<(), Error> {
 92 |         match self {
 93 |             Scope::Initial => Err(Error::BadScope(
 94 |                 "non-block scope cannot reference abbreviations".into(),
 95 |             )),
 96 |             Scope::Block { abbrevs, .. } => {
 97 |                 abbrevs.extend(new_abbrevs);
 98 |                 Ok(())
 99 |             }
100 |         }
101 |     }
102 | 
103 |     /// Return a reference to the abbreviation definition with the given `abbrev_id`.
104 |     ///
105 |     /// Returns an error if the scope cannot contain abbreviation definitions or does
106 |     /// not have one for the given ID.
107 |     pub(self) fn get_abbrev(&self, abbrev_id: u64) -> Result<&abbrev::Abbrev, Error> {
108 |         match self {
109 |             Scope::Initial => Err(Error::BadScope(
110 |                 "non-block scope cannot contain records".into(),
111 |             )),
112 |             Scope::Block { abbrevs, .. } => {
113 |                 let idx = (abbrev_id as usize) - FIRST_APPLICATION_ABBREV_ID;
114 |                 abbrevs.get(idx).ok_or(Error::BadAbbrev(abbrev_id))
115 |             }
116 |         }
117 |     }
118 | 
119 |     /// Returns `true` if this scope corresponds to a `BLOCKINFO` block.
120 |     ///
121 |     /// This keeps the [`StreamParser`](StreamParser) honest when determining
122 |     /// which blocks and/or records to emit entries for.
123 |     pub(self) fn is_blockinfo(&self) -> bool {
124 |         match self {
125 |             Scope::Initial => false,
126 |             Scope::Block { block_id, .. } => *block_id == ReservedBlockId::BlockInfo as u64,
127 |         }
128 |     }
129 | 
130 |     /// Returns the last block ID recorded with `SETBID` in the `BLOCKINFO` block.
131 |     ///
132 |     /// This function's return is only sensible in the context of a scope corresponding
133 |     /// to `BLOCKINFO`. Use on any other scope constitutes API misuse.
134 |     pub(self) fn blockinfo_block_id(&self) -> Option<u64> {
135 |         match self {
136 |             Scope::Initial => None,
137 |             Scope::Block {
138 |                 blockinfo_block_id, ..
139 |             } => *blockinfo_block_id,
140 |         }
141 |     }
142 | 
143 |     /// Sets the current block ID for the `BLOCKINFO` block's state machine.
144 |     ///
145 |     /// Returns an error if requested in a nonsense context, such as on any
146 |     /// non-`BLOCKINFO` scope.
147 |     pub(self) fn set_blockinfo_block_id(&mut self, new_bid: u64) -> Result<(), Error> {
148 |         if let Scope::Block {
149 |             blockinfo_block_id, ..
150 |         } = self
151 |         {
152 |             *blockinfo_block_id = Some(new_bid);
153 |             return Ok(());
154 |         }
155 | 
156 |         Err(Error::BadScope(
157 |             "can't set BLOCKINFO block ID for non-BLOCKINFO scope".into(),
158 |         ))
159 |     }
160 | }
161 | 
162 | /// A parser for individual bitstream entries.
163 | ///
164 | /// This structure is **not** a general-purpose parser for bitstream inputs:
165 | /// it expects to be given a prepared [`BitCursor`](BitCursor) whose internal
166 | /// state is correct (i.e., has been advanced past the initial input magic).
167 | ///
168 | /// For a general-purpose parser with the correct state management, see
169 | /// [`Bitstream`](crate::Bitstream).
170 | #[derive(Debug)]
171 | pub struct StreamParser<T: AsRef<[u8]>> {
172 |     cursor: BitCursor<T>,
173 |     scopes: Vec<Scope>,
174 |     blockinfo: HashMap<u64, Vec<abbrev::Abbrev>>,
175 | }
176 | 
177 | impl<T: AsRef<[u8]>> StreamParser<T> {
178 |     /// Create a new `StreamParser` from the given `BitCursor`.
179 |     ///
180 |     /// See the struct-level documentation for caveats.
181 |     pub(crate) fn new(cur: BitCursor<T>) -> Self {
182 |         Self {
183 |             cursor: cur,
184 |             scopes: vec![Scope::default()],
185 |             blockinfo: Default::default(),
186 |         }
187 |     }
188 | 
189 |     /// Returns the current scope.
190 |     fn scope(&self) -> &Scope {
191 |         // Unwrap safety: `scopes` is always created with at least one scope, so
192 |         // `last()` cannot fail.
193 |         #[allow(clippy::unwrap_used)]
194 |         self.scopes.last().unwrap()
195 |     }
196 | 
197 |     /// Returns the current scope as a mutable reference.
198 |     fn scope_mut(&mut self) -> &mut Scope {
199 |         // Unwrap safety: `scopes` is always created with at least one scope, so
200 |         // `last()` cannot fail.
201 |         #[allow(clippy::unwrap_used)]
202 |         self.scopes.last_mut().unwrap()
203 |     }
204 | 
205 |     /// Enter a block, creating the appropriate scope state for interpreting
206 |     /// records within the block.
207 |     ///
208 |     /// If this block is a "metadata" one (e.g., `BLOCKINFO`), returns `None`.
209 |     fn enter_block(&mut self) -> Result<Option<StreamEntry>, Error> {
210 |         let block_id = self.cursor.read_vbr(8)?;
211 |         let new_width = self.cursor.read_vbr(4)?;
212 | 
213 |         self.cursor.align32();
214 | 
215 |         if new_width < 1 {
216 |             return Err(Error::BadScope(format!(
217 |                 "can't enter block: invalid code side: {}",
218 |                 new_width
219 |             )));
220 |         }
221 | 
222 |         // The encoded block length is measured in 32-bit words, so our
223 |         // actual block length in bytes is the word count times the bytesize
224 |         // of each word.
225 |         let block_len = self.cursor.read(32)? * 4;
226 |         log::debug!(
227 |             "entered block: ID={}, new abbrev width={}, block_len={} @ bit position {}",
228 |             block_id,
229 |             new_width,
230 |             block_len,
231 |             self.cursor.tell_bit()
232 |         );
233 | 
234 |         // Create a new scope for the block we've just entered.
235 |         self.scopes.push(Scope::new(new_width, block_id));
236 | 
237 |         // If our blockinfo map contains any abbrevs for the current block ID, add them here.
238 |         if let Some(abbrevs) = self.blockinfo.get(&block_id).map(|a| a.to_vec()) {
239 |             self.scope_mut().extend_abbrevs(abbrevs)?;
240 |         }
241 | 
242 |         // If we've just entered a BLOCKINFO block, return `None` to avoid
243 |         // surfacing parse details to the `advance()` API.
244 |         if self.scope().is_blockinfo() {
245 |             return Ok(None);
246 |         }
247 | 
248 |         // Otherwise, return an appropriate entry.
249 |         Ok(Some(StreamEntry::SubBlock(Block {
250 |             block_id: block_id,
251 |             len: block_len,
252 |         })))
253 |     }
254 | 
255 |     /// Exit a block, returning the scope to the appropriate state for the parent block.
256 |     fn exit_block(&mut self) -> Result<Option<StreamEntry>, Error> {
257 |         // An END_BLOCK record just aligns the stream.
258 |         self.cursor.align32();
259 | 
260 |         // NOTE(ww): We never allow an END_BLOCK to pop the last scope,
261 |         // since the last scope is synthetic and does not correspond to a real block.
262 |         if self.scopes.len() <= 1 {
263 |             return Err(Error::BadScope(
264 |                 "malformed stream: cannot perform END_BLOCK because scope stack is empty".into(),
265 |             ));
266 |         }
267 | 
268 |         // Unwrap safety: we check for at least one scope above, so this cannot fail.
269 |         #[allow(clippy::unwrap_used)]
270 |         let scope = self.scopes.pop().unwrap();
271 | 
272 |         log::debug!("exit_block: new active scope is {:?}", self.scope());
273 | 
274 |         // If we're exiting a BLOCKINFO, we have nothing to return.
275 |         if scope.is_blockinfo() {
276 |             return Ok(None);
277 |         }
278 | 
279 |         Ok(Some(StreamEntry::EndBlock))
280 |     }
281 | 
282 |     /// Interpret a `DEFINE_ABBREV` record.
283 |     fn define_abbrev(&mut self) -> Result<(), Error> {
284 |         let abbrev = abbrev::Abbrev::new(&mut self.cursor)?;
285 |         log::debug!("new abbrev: {:?}", abbrev);
286 | 
287 |         // `DEFINE_ABBREV` occurs in two contexts: either in a `BLOCKINFO`
288 |         // block (where it affects all blocks with block ID defined by the current `SETBID`),
289 |         // or in any other block, where it affects only the current scope.
290 |         // For the latter case we assume that any `BLOCKINFO`-defined abbrevs have
291 |         // already been loaded into the current scope.
292 |         if self.scope().is_blockinfo() {
293 |             let block_id = self.scope().blockinfo_block_id().ok_or_else(|| {
294 |                 Error::StreamParse("DEFINE_ABBREV in BLOCKINFO but no preceding SETBID".into())
295 |             })?;
296 |             self.blockinfo
297 |                 .entry(block_id)
298 |                 .or_insert_with(Vec::new)
299 |                 .push(abbrev);
300 |         } else {
301 |             self.scope_mut().extend_abbrevs(iter::once(abbrev))?;
302 |         }
303 | 
304 |         Ok(())
305 |     }
306 | 
307 |     /// Interpret an `UNABBREV_RECORD` record.
308 |     fn parse_unabbrev(&mut self) -> Result<Option<StreamEntry>, Error> {
309 |         // Sanity check: `UNABBREV_RECORD` can only occur inside a block,
310 |         // so the current scope must be a block.
311 |         if matches!(self.scope(), Scope::Initial) {
312 |             return Err(Error::StreamParse(
313 |                 "UNABBREV_RECORD outside of any block scope".into(),
314 |             ));
315 |         }
316 | 
317 |         // An unabbrev record looks like this:
318 |         // [code:VBR6, numops:VBR6, op0:VBR6, op1:VBR6, ...]
319 |         // This isn't worth generalizing, so do it all in the body here.
320 |         let code: u64 = self.cursor.read_vbr(6)?;
321 |         let num_opnds = self.cursor.read_vbr(6)?;
322 | 
323 |         log::debug!("unabbrev record code={}, num_opnds={}", code, num_opnds);
324 | 
325 |         let mut fields: Fields = Vec::with_capacity(num_opnds as usize);
326 |         for _ in 0..num_opnds {
327 |             fields.push(self.cursor.read_vbr(6)?);
328 |         }
329 | 
330 |         let record = Record::from_unabbrev(code, fields);
331 |         if self.scope().is_blockinfo() {
332 |             let code: BlockInfoCode = record.code.try_into()?;
333 |             match code {
334 |                 BlockInfoCode::SetBid => {
335 |                     let block_id: u64 = record.fields[0];
336 |                     log::debug!("SETBID: BLOCKINFO block ID is now {}", block_id);
337 |                     self.scope_mut().set_blockinfo_block_id(block_id)?;
338 |                 }
339 |                 BlockInfoCode::BlockName => log::debug!("skipping BLOCKNAME code in BLOCKINFO"),
340 |                 BlockInfoCode::SetRecordName => {
341 |                     log::debug!("skipping SETRECORDNAME code in BLOCKINFO")
342 |                 }
343 |                 o => log::debug!("skipping unsupported record {:?} in BLOCKINFO", o),
344 |             };
345 |             return Ok(None);
346 |         }
347 | 
348 |         Ok(Some(StreamEntry::Record(record)))
349 |     }
350 | 
351 |     /// Interpret a record using its corresponding abbreviation definition.
352 |     fn parse_with_abbrev(&mut self, abbrev_id: u64) -> Result<Option<StreamEntry>, Error> {
353 |         // To parse a record according to an abbreviation definition, we
354 |         // fetch the corresponding abbreviation (failing if we don't have one),
355 |         // then use the abbreviation for the parse.
356 |         // TODO(ww): The clone at the end here is a little annoying, but we
357 |         // need it to avoid mixing mutable and immutable borrows here.
358 |         // There is absolutely a better way to do that.
359 |         let abbrev = self.scope().get_abbrev(abbrev_id)?.clone();
360 | 
361 |         let mut fields = abbrev.parse(&mut self.cursor)?;
362 |         log::debug!("parsed fields: {:?}", fields);
363 | 
364 |         // Panic safety: every abbrev contains at least one operand, so this cannot panic.
365 |         // We also expect the first operand to always be a u64, indicating the record code.
366 |         let code: u64 = fields.remove(0);
367 | 
368 |         if self.scope().is_blockinfo() {
369 |             return Ok(None);
370 |         }
371 | 
372 |         Ok(Some(StreamEntry::Record(Record {
373 |             abbrev_id: Some(abbrev_id),
374 |             code: code,
375 |             fields: fields,
376 |         })))
377 |     }
378 | 
379 |     /// Return the next [`StreamEntry`](StreamEntry) in this bitstream.
380 |     ///
381 |     /// Returns an error on any parsing error, *or* the special
382 |     /// [`Error::Exhausted`](Error::Exhausted) if the bitstream has
383 |     /// been fully consumed.
384 |     pub fn advance(&mut self) -> Result<StreamEntry, Error> {
385 |         if self.cursor.exhausted() {
386 |             return Err(Error::Exhausted);
387 |         }
388 | 
389 |         log::debug!(
390 |             "advancing, current scope: {:?} @ bit position {}",
391 |             self.scope(),
392 |             self.cursor.tell_bit()
393 |         );
394 | 
395 |         // To return the next stream entry, we read the next abbreviation ID using
396 |         // our current width. The abbreviation ID we read determines our subsequent
397 |         // parse strategy and the kind of entry we return.
398 |         let id: abbrev::AbbrevId = self
399 |             .cursor
400 |             .read(self.scope().abbrev_id_width() as usize)?
401 |             .into();
402 |         log::debug!("next entry ID: {:?}", id);
403 | 
404 |         // NOTE(ww): The strange `map` + `unwrap_or_else` pattern below is to keep the parser
405 |         // generalized without having to return `StreamEntries` that correspond to
406 |         // parse details that a stream consumer shouldn't have to be aware of
407 |         // (such as abbrev definitions and the BLOCKINFO block).
408 |         match id {
409 |             AbbrevId::Reserved(ReservedAbbrevId::EndBlock) => {
410 |                 self.exit_block()?.map(Ok).unwrap_or_else(|| self.advance())
411 |             }
412 |             AbbrevId::Reserved(ReservedAbbrevId::EnterSubBlock) => self
413 |                 .enter_block()?
414 |                 .map(Ok)
415 |                 .unwrap_or_else(|| self.advance()),
416 |             AbbrevId::Reserved(ReservedAbbrevId::DefineAbbrev) => {
417 |                 // DEFINE_ABBREV is always a parse detail, so we don't even bother
418 |                 // trying to return a StreamEntry for it.
419 |                 self.define_abbrev()?;
420 |                 self.advance()
421 |             }
422 |             AbbrevId::Reserved(ReservedAbbrevId::UnabbrevRecord) => self
423 |                 .parse_unabbrev()?
424 |                 .map(Ok)
425 |                 .unwrap_or_else(|| self.advance()),
426 |             AbbrevId::Defined(abbrev_id) => self
427 |                 .parse_with_abbrev(abbrev_id)?
428 |                 .map(Ok)
429 |                 .unwrap_or_else(|| self.advance()),
430 |         }
431 |     }
432 | }
433 | 


--------------------------------------------------------------------------------
/llvm-bitstream/src/record.rs:
--------------------------------------------------------------------------------
 1 | //! Record parsing and handling functionality for `llvm-bitstream`.
 2 | 
 3 | /// A convenience alias for the fields of a record.
 4 | pub type Fields = Vec<u64>;
 5 | 
 6 | /// Represents a single bitstream record.
 7 | #[derive(Clone, Debug)]
 8 | pub struct Record {
 9 |     /// The abbreviation ID that was used to parse this record, or `None` if
10 |     /// this record was parsed from an `UNABBREV_RECORD` encoding.
11 |     pub abbrev_id: Option<u64>,
12 | 
13 |     /// The code that identifies the record's kind.
14 |     pub code: u64,
15 | 
16 |     /// The fields of this record.
17 |     pub fields: Fields,
18 | }
19 | 
20 | impl Record {
21 |     /// Creates a new `Record` from the given code and fields.
22 |     pub fn from_unabbrev(code: u64, fields: Fields) -> Self {
23 |         Self {
24 |             abbrev_id: None,
25 |             code: code,
26 |             fields: fields,
27 |         }
28 |     }
29 | 
30 |     /// Creates a new `Record` from the given abbreviation ID, code, and fields.
31 |     pub fn from_abbrev(abbrev_id: u64, code: u64, fields: Fields) -> Self {
32 |         Self {
33 |             abbrev_id: Some(abbrev_id),
34 |             code: code,
35 |             fields: fields,
36 |         }
37 |     }
38 | }
39 | 
40 | /// Represents a single block scope in the bitstream.
41 | #[derive(Debug)]
42 | pub struct Block {
43 |     /// The ID of the block.
44 |     pub block_id: u64,
45 |     /// The length of the block, in bytes. Blocks are always 32-bit-word-aligned.
46 |     pub len: u64,
47 | }
48 | 


--------------------------------------------------------------------------------
/llvm-constants/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "llvm-constants"
 3 | description = "Numeric and enum constants for interacting with LLVM bitstreams and IR"
 4 | license = "MIT"
 5 | homepage = "https://github.com/woodruffw/mollusc/tree/main/llvm-constants"
 6 | repository = "https://github.com/woodruffw/mollusc"
 7 | authors = ["William Woodruff <william@yossarian.net>"]
 8 | readme = "README.md"
 9 | keywords = ["llvm", "parsing", "binary", "encoding"]
10 | categories = ["compilers", "encoding", "parsing"]
11 | edition = "2018"
12 | version = "0.0.2"
13 | 
14 | [dependencies]
15 | num_enum = "0.5.3"
16 | 


--------------------------------------------------------------------------------
/llvm-constants/README.md:
--------------------------------------------------------------------------------
 1 | llvm-constants
 2 | ==============
 3 | 
 4 | [![Crates.io](https://img.shields.io/crates/v/llvm-constants)](https://crates.io/crates/llvm-constants)
 5 | [![Documentation](https://docs.rs/llvm-constants/badge.svg)](https://docs.rs/llvm-constants)
 6 | 
 7 | Numeric and enum constants for interpreting LLVM bitstreams and IR.
 8 | 
 9 | This library contains only definitions; it is not useful on its own.
10 | 


--------------------------------------------------------------------------------
/llvm-constants/build.rs:
--------------------------------------------------------------------------------
 1 | // This is an ugly little hack to get access to a reasonable "default"
 2 | // target triple when loading bitcode inputs that don't mention their triple.
 3 | // Based on: https://stackoverflow.com/a/51311222
 4 | // Unwrap safety: None. If this fails, the build fails, and that's intended.
 5 | #[allow(clippy::unwrap_used)]
 6 | fn main() {
 7 |     println!(
 8 |         "cargo:rustc-env=TARGET_TRIPLE={}",
 9 |         std::env::var("TARGET").unwrap()
10 |     );
11 | }
12 | 


--------------------------------------------------------------------------------
/llvm-constants/src/constants.rs:
--------------------------------------------------------------------------------
 1 | //! Numeric constants for `llvm-constants`.
 2 | 
 3 | /// The 32-bit magic that indicates a raw LLVM IR bitcode stream.
 4 | pub const LLVM_IR_MAGIC: u32 = 0xdec04342;
 5 | 
 6 | /// The 32-bit magic that indicates a bitcode wrapper, which in
 7 | /// turn points to the start of the actual bitcode stream.
 8 | pub const BITCODE_WRAPPER_MAGIC: u32 = 0x0b17c0de;
 9 | 
10 | /// The initial abbreviation ID width in a bitstream.
11 | pub const INITIAL_ABBREV_ID_WIDTH: u64 = 2;
12 | 
13 | /// All abbreviation IDs before this are defined by the bitstream format,
14 | /// rather than the stream itself.
15 | pub const FIRST_APPLICATION_ABBREV_ID: usize = 4;
16 | 
17 | /// All block IDs before this have their semantics defined by the bitstream
18 | /// format, rather than the stream itself.
19 | pub const FIRST_APPLICATION_BLOCK_ID: u64 = 8;
20 | 
21 | /// The lookup alphabet for the Char6 operand encoding.
22 | pub const CHAR6_ALPHABET: &[u8] =
23 |     b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._";
24 | 
25 | /// The current toolchain's target triple.
26 | pub const TARGET_TRIPLE: &str = env!("TARGET_TRIPLE");
27 | 
28 | #[cfg(test)]
29 | mod tests {
30 |     use super::*;
31 | 
32 |     #[test]
33 |     fn test_target_triple() {
34 |         assert!(!TARGET_TRIPLE.is_empty());
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/llvm-constants/src/enums.rs:
--------------------------------------------------------------------------------
  1 | //! Enum constants for `llvm-constants`.
  2 | 
  3 | use num_enum::{IntoPrimitive, TryFromPrimitive};
  4 | 
  5 | use crate::constants::FIRST_APPLICATION_BLOCK_ID;
  6 | 
  7 | /// Block IDs that are reserved by LLVM.
  8 | // NOTE(ww): Block IDs 0 through 7 are reserved, but only 0 (BLOCKINFO)
  9 | // is actually currently used.
 10 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, TryFromPrimitive)]
 11 | #[repr(u64)]
 12 | pub enum ReservedBlockId {
 13 |     /// The `BLOCKINFO` block ID.
 14 |     BlockInfo = 0,
 15 |     /// Reserved; no semantics.
 16 |     Reserved1 = 1,
 17 |     /// Reserved; no semantics.
 18 |     Reserved2 = 2,
 19 |     /// Reserved; no semantics.
 20 |     Reserved3 = 3,
 21 |     /// Reserved; no semantics.
 22 |     Reserved4 = 4,
 23 |     /// Reserved; no semantics.
 24 |     Reserved5 = 5,
 25 |     /// Reserved; no semantics.
 26 |     Reserved6 = 6,
 27 |     /// Reserved; no semantics.
 28 |     Reserved7 = 7,
 29 | }
 30 | 
 31 | /// Block IDs that are used by LLVM for bitcode (i.e., IR bitstreams).
 32 | /// See: `enum BlockIDs` in `Bitcode/LLVMBitCodes.h`,
 33 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, TryFromPrimitive)]
 34 | #[repr(u64)]
 35 | pub enum IrBlockId {
 36 |     /// `MODULE_BLOCK_ID`
 37 |     Module = FIRST_APPLICATION_BLOCK_ID,
 38 |     /// `PARAM_ATTR_BLOCK_ID`
 39 |     ParamAttr,
 40 |     /// `PARAM_ATTR_GROUP_BLOCK_ID`
 41 |     ParamAttrGroup,
 42 |     /// `CONSTANTS_BLOCK_ID`
 43 |     Constants,
 44 |     /// `FUNCTION_BLOCK_ID`
 45 |     Function,
 46 |     /// `IDENTIFICATION_BLOCK_ID`.
 47 |     Identification,
 48 |     /// `VALUE_SYMTAB_BLOCK_ID`.
 49 |     ValueSymtab,
 50 |     /// `METADATA_BLOCK_ID`.
 51 |     Metadata,
 52 |     /// `METADATA_ATTACHMENT_BLOCK_ID`.
 53 |     MetadataAttachment,
 54 |     /// `TYPE_BLOCK_ID_NEW`.
 55 |     Type,
 56 |     /// `USELIST_BLOCK_ID`.
 57 |     Uselist,
 58 |     /// `MODULE_STRTAB_BLOCK_ID`.
 59 |     ModuleStrtab,
 60 |     /// `GLOBAL_VAL_SUMMARY_BLOCK_ID`.
 61 |     GlobalValSummary,
 62 |     /// `OPERAND_BUNDLE_TAGS_BLOCK_ID`.
 63 |     OperandBundleTags,
 64 |     /// `METADATA_KIND_BLOCK_ID`.
 65 |     MetadataKind,
 66 |     /// `STRTAB_BLOCK_ID`.
 67 |     Strtab,
 68 |     /// `FULL_LTO_GLOBAL_VAL_SUMMARY_BLOCK_ID`.
 69 |     FullLtoGlobalValSummary,
 70 |     /// `SYMTAB_BLOCK_ID`.
 71 |     Symtab,
 72 |     /// `SYNC_SCOPE_NAMES_BLOCK_ID`.
 73 |     SyncScopeNames,
 74 | }
 75 | 
 76 | /// Abbreviation IDs that are reserved by LLVM.
 77 | #[derive(Clone, Copy, Debug, PartialEq, TryFromPrimitive)]
 78 | #[repr(u64)]
 79 | pub enum ReservedAbbrevId {
 80 |     /// Identifies an `END_BLOCK` record.
 81 |     EndBlock = 0,
 82 |     /// Identifies an `ENTER_SUBBLOCK` record.
 83 |     EnterSubBlock,
 84 |     /// Identifies a `DEFINE_ABBREV` record.
 85 |     DefineAbbrev,
 86 |     /// Identifies an `UNABBREV_RECORD` record.
 87 |     UnabbrevRecord,
 88 | }
 89 | 
 90 | /// Codes for each operand encoding type supported by `DEFINE_ABBREV`.
 91 | #[derive(Clone, Copy, Debug, PartialEq, TryFromPrimitive)]
 92 | #[repr(u64)]
 93 | pub enum AbbrevOpEnc {
 94 |     /// A fixed-length, unsigned operand.
 95 |     Fixed = 1,
 96 |     /// A variable-length, unsigned operand.
 97 |     Vbr,
 98 |     /// An array of values.
 99 |     Array,
100 |     /// A single 6-bit-encoded character.
101 |     Char6,
102 |     /// A blob of bytes.
103 |     Blob,
104 | }
105 | 
106 | /// Calling conventions supported by LLVM.
107 | #[non_exhaustive]
108 | #[derive(Debug, PartialEq, TryFromPrimitive)]
109 | #[repr(u64)]
110 | #[allow(missing_docs)]
111 | pub enum CallingConvention {
112 |     C = 0,
113 |     Fast = 8,
114 |     Cold = 9,
115 |     GHC = 10,
116 |     HiPE = 11,
117 |     WebKitJS = 12,
118 |     AnyReg = 13,
119 |     PreserveMost = 14,
120 |     PreserveAll = 15,
121 |     Swift = 16,
122 |     CXXFASTTLS = 17,
123 |     X86Stdcall = 64,
124 |     X86Fastcall = 65,
125 |     ARMAPCS = 66,
126 |     ARMAAPCS = 67,
127 |     ARMAAPCSVFP = 68,
128 |     MSP430INTR = 69,
129 |     X86ThisCall = 70,
130 |     PTXKernel = 71,
131 |     PTXDevice = 72,
132 |     SPIRFUNC = 75,
133 |     SPIRKERNEL = 76,
134 |     IntelOCLBI = 77,
135 |     X8664SysV = 78,
136 |     Win64 = 79,
137 |     X86VectorCall = 80,
138 |     HHVM = 81,
139 |     HHVMC = 82,
140 |     X86INTR = 83,
141 |     AVRINTR = 84,
142 |     AVRSIGNAL = 85,
143 |     AVRBUILTIN = 86,
144 |     AMDGPUVS = 87,
145 |     AMDGPUGS = 88,
146 |     AMDGPUPS = 89,
147 |     AMDGPUCS = 90,
148 |     AMDGPUKERNEL = 91,
149 |     X86RegCall = 92,
150 |     AMDGPUHS = 93,
151 |     MSP430BUILTIN = 94,
152 |     AMDGPULS = 95,
153 |     AMDGPUES = 96,
154 | }
155 | 
156 | /// Codes for each `UNABBREV_RECORD` in `BLOCKINFO`.
157 | #[non_exhaustive]
158 | #[derive(Debug, PartialEq, TryFromPrimitive)]
159 | #[repr(u64)]
160 | pub enum BlockInfoCode {
161 |     /// SETBID: `[blockid]`
162 |     SetBid = 1,
163 |     /// BLOCKNAME: `[...name...]`
164 |     BlockName,
165 |     /// SETRECORDNAME: `[recordid, ...name...]`
166 |     SetRecordName,
167 | }
168 | 
169 | /// Codes for each record in `IDENTIFICATION_BLOCK`.
170 | #[non_exhaustive]
171 | #[derive(Debug, PartialEq, TryFromPrimitive)]
172 | #[repr(u64)]
173 | pub enum IdentificationCode {
174 |     /// IDENTIFICATION_CODE_STRING: `[...string...]`
175 |     ProducerString = 1,
176 |     /// IDENTIFICATION_CODE_EPOCH: `[epoch]`
177 |     Epoch,
178 | }
179 | 
180 | /// Codes for each record in `MODULE_BLOCK`.
181 | #[non_exhaustive]
182 | #[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)]
183 | #[repr(u64)]
184 | pub enum ModuleCode {
185 |     /// MODULE_CODE_VERSION: `[version#]`
186 |     Version = 1,
187 |     /// MODULE_CODE_TRIPLE: `[...string...]`
188 |     Triple = 2,
189 |     /// MODULE_CODE_DATALAYOUT: `[...string...]`
190 |     DataLayout = 3,
191 |     /// MODULE_CODE_ASM: `[...string...]`
192 |     Asm = 4,
193 |     /// MODULE_CODE_SECTIONNAME: `[...string...]`
194 |     SectionName = 5,
195 |     /// MODULE_CODE_DEPLIB: `[...string...]`
196 |     DepLib = 6,
197 |     /// MODULE_CODE_GLOBALVAR: `[...fields...]`
198 |     /// See: <https://llvm.org/docs/BitCodeFormat.html#module-code-globalvar-record>
199 |     GlobalVar = 7,
200 |     /// MODULE_CODE_FUNCTION: `[...fields...]`
201 |     /// See: <https://llvm.org/docs/BitCodeFormat.html#module-code-function-record>
202 |     Function = 8,
203 |     /// MODULE_CODE_ALIAS_OLD: `[...fields...]`
204 |     /// See: <https://llvm.org/docs/BitCodeFormat.html#module-code-alias-record>
205 |     AliasOld = 9,
206 |     /// MODULE_CODE_GCNAME: `[...string...]`
207 |     GcName = 11,
208 |     /// MODULE_CODE_COMDAT
209 |     /// v1: `[selection_kind, name]`
210 |     /// v2: `[strtab_offset, strtab_size, selection_kind]`
211 |     /// Only `v2` is currently supported.
212 |     Comdat = 12,
213 |     /// MODULE_CODE_VSTOFFSET: `[offset]`
214 |     VstOffset = 13,
215 |     /// MODULE_CODE_ALIAS: `[...fields...]`
216 |     /// Not well documented; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`.
217 |     Alias = 14,
218 |     /// MODULE_CODE_METADATA_VALUES_UNUSED
219 |     /// Not documented at all; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`.
220 |     MetadataValuesUnused = 15,
221 |     /// MODULE_CODE_SOURCE_FILENAME: `[...string...]`
222 |     SourceFilename = 16,
223 |     /// MODULE_CODE_HASH: `[5*i32]`
224 |     Hash = 17,
225 |     /// MODULE_CODE_IFUNC: `[...fields...]`
226 |     /// Not well documented; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`.
227 |     IFunc = 18,
228 | }
229 | 
230 | /// Codes for each record in `TYPE_BLOCK` (i.e., `TYPE_BLOCK_ID_NEW`).
231 | #[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)]
232 | #[repr(u64)]
233 | pub enum TypeCode {
234 |     /// TYPE_CODE_NUMENTRY: `[numentries]`
235 |     NumEntry = 1,
236 |     /// TYPE_CODE_VOID
237 |     Void,
238 |     /// TYPE_CODE_FLOAT
239 |     Float,
240 |     /// TYPE_CODE_DOUBLE
241 |     Double,
242 |     /// TYPE_CODE_LABEL
243 |     Label,
244 |     /// TYPE_CODE_OPAQUE
245 |     Opaque,
246 |     /// TYPE_CODE_INTEGER: `[width]`
247 |     Integer,
248 |     /// TYPE_CODE_POINTER: `[pointee type]`
249 |     Pointer,
250 |     /// TYPE_CODE_FUNCTION_OLD: `[vararg, attrid, retty, paramty x N]`
251 |     FunctionOld,
252 |     /// TYPE_CODE_HALF
253 |     Half,
254 |     /// TYPE_CODE_ARRAY: `[numelts, eltty]`
255 |     Array,
256 |     /// TYPE_CODE_VECTOR: `[numelts, eltty]`
257 |     Vector,
258 |     /// TYPE_CODE_X86_FP80
259 |     X86Fp80,
260 |     /// TYPE_CODE_FP128
261 |     Fp128,
262 |     /// TYPE_CODE_PPC_FP128
263 |     PpcFp128,
264 |     /// TYPE_CODE_METADATA,
265 |     Metadata,
266 |     /// TYPE_CODE_X86_MMX
267 |     X86Mmx,
268 |     /// TYPE_CODE_STRUCT_ANON: `[ispacked, eltty x N]`
269 |     StructAnon,
270 |     /// TYPE_CODE_STRUCT_NAME: `[strchr x N]`
271 |     StructName,
272 |     /// TYPE_CODE_STRUCT_NAMED: `[ispacked, eltty x N]`
273 |     StructNamed,
274 |     /// TYPE_CODE_FUNCTION: `[vararg, retty, paramty x N]`
275 |     Function,
276 |     /// TYPE_CODE_TOKEN
277 |     Token,
278 |     /// TYPE_CODE_BFLOAT
279 |     BFloat,
280 |     /// TYPE_CODE_X86_AMX
281 |     X86Amx,
282 |     /// TYPE_CODE_OPAQUE_POINTER: `[addrspace]`
283 |     OpaquePointer,
284 | }
285 | 
286 | /// Codes for each record in `STRTAB_BLOCK`.
287 | #[non_exhaustive]
288 | #[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)]
289 | #[repr(u64)]
290 | pub enum StrtabCode {
291 |     /// STRTAB_BLOB: `[...string...]`
292 |     Blob = 1,
293 | }
294 | 
295 | /// Codes for each record in `SYMTAB_BLOCK`.
296 | #[non_exhaustive]
297 | #[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)]
298 | #[repr(u64)]
299 | pub enum SymtabCode {
300 |     /// SYMTAB_BLOB: `[...data...]`
301 |     Blob = 1,
302 | }
303 | 
304 | /// Codes for each record in `PARAMATTR_BLOCK` or `PARAMATTR_GROUP_BLOCK`.
305 | // NOTE(ww): For whatever reason, these two blocks share the same enum for
306 | /// record codes.
307 | #[derive(Debug, PartialEq, IntoPrimitive, TryFromPrimitive)]
308 | #[repr(u64)]
309 | pub enum AttributeCode {
310 |     /// PARAMATTR_CODE_ENTRY_OLD: `[paramidx0, attr0, paramidx1, attr1...]`
311 |     EntryOld = 1,
312 |     /// PARAMATTR_CODE_ENTRY: `[attrgrp0, attrgrp1, ...]`
313 |     Entry,
314 |     /// PARAMATTR_GRP_CODE_ENTRY: `[grpid, idx, attr0, attr1, ...]`
315 |     GroupCodeEntry,
316 | }
317 | 


--------------------------------------------------------------------------------
/llvm-constants/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! `llvm-constants` contains numeric and enum constants for interacting with LLVM
 2 | //! bitstreams and IR.
 3 | 
 4 | #![deny(rustdoc::broken_intra_doc_links)]
 5 | #![deny(missing_docs)]
 6 | #![allow(clippy::redundant_field_names)]
 7 | #![forbid(unsafe_code)]
 8 | 
 9 | mod constants;
10 | mod enums;
11 | 
12 | pub use crate::constants::*;
13 | pub use crate::enums::*;
14 | 


--------------------------------------------------------------------------------
/llvm-mapper/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "llvm-mapper"
 3 | description = "A library for mapping the contents of bitstreams into LLVM IR models"
 4 | license = "MIT"
 5 | homepage = "https://github.com/woodruffw/mollusc/tree/main/llvm-mapper"
 6 | repository = "https://github.com/woodruffw/mollusc"
 7 | authors = ["William Woodruff <william@yossarian.net>"]
 8 | readme = "README.md"
 9 | keywords = ["llvm", "parsing", "binary", "encoding"]
10 | categories = ["compilers", "encoding", "parsing"]
11 | edition = "2018"
12 | version = "0.0.4"
13 | 
14 | [[example]]
15 | name = "unroll-bitstream"
16 | 
17 | [dependencies]
18 | indexmap = "2.0"
19 | hashbrown = "0.14"
20 | llvm-bitstream = { version = "0.0.3", path = "../llvm-bitstream" }
21 | llvm-support = { version = "0.0.3", path = "../llvm-support" }
22 | log = "0.4"
23 | num_enum = "0.6"
24 | thiserror = "1.0"
25 | 
26 | [dev-dependencies]
27 | anyhow = "1.0"
28 | clap = "4.0"
29 | env_logger = "0.10"
30 | 


--------------------------------------------------------------------------------
/llvm-mapper/README.md:
--------------------------------------------------------------------------------
 1 | llvm-mapper
 2 | ===========
 3 | 
 4 | [![Crates.io](https://img.shields.io/crates/v/llvm-mapper)](https://crates.io/crates/llvm-mapper)
 5 | [![Documentation](https://docs.rs/llvm-mapper/badge.svg)](https://docs.rs/llvm-mapper)
 6 | 
 7 | A library for mapping the contents of bitstreams into LLVM IR models.
 8 | 
 9 | This library produces a "full-featured" view of a particular LLVM IR program by mapping
10 | blocks and records in the underlying bitstream into their appropriate LLVM models.
11 | 
12 | This library uses [`llvm-bitstream`](https://crates.io/crates/llvm-bitstream) under the hood.
13 | 


--------------------------------------------------------------------------------
/llvm-mapper/examples/unroll-bitstream.rs:
--------------------------------------------------------------------------------
 1 | use std::convert::TryFrom;
 2 | use std::fs;
 3 | 
 4 | use anyhow::Result;
 5 | use clap::{Arg, Command};
 6 | use llvm_bitstream::Bitstream;
 7 | use llvm_mapper::unroll::Bitcode;
 8 | 
 9 | fn app() -> Command {
10 |     Command::new(env!("CARGO_PKG_NAME"))
11 |         .version(env!("CARGO_PKG_VERSION"))
12 |         .about(env!("CARGO_PKG_DESCRIPTION"))
13 |         .arg(
14 |             Arg::new("input")
15 |                 .help("the bitstream input to unroll")
16 |                 .index(1)
17 |                 .required(true),
18 |         )
19 | }
20 | 
21 | fn main() -> Result<()> {
22 |     env_logger::init();
23 |     let matches = app().get_matches();
24 | 
25 |     let input = {
26 |         let input = matches.get_one::<String>("input").unwrap();
27 |         fs::read(input)?
28 |     };
29 | 
30 |     let (_, bitstream) = Bitstream::from(&input)?;
31 | 
32 |     let unrolled = Bitcode::try_from(bitstream)?;
33 |     println!("{:#?}", unrolled);
34 | 
35 |     Ok(())
36 | }
37 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/block/function/basic_block.rs:
--------------------------------------------------------------------------------
 1 | //! Models and functionality for basic blocks.
 2 | 
 3 | use super::Instruction;
 4 | 
 5 | /// Represents a basic block.
 6 | #[non_exhaustive]
 7 | #[derive(Debug, Default)]
 8 | pub struct BasicBlock {
 9 |     /// The instructions of this basic block.
10 |     pub instructions: Vec<Instruction>,
11 | }
12 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/block/function/instruction.rs:
--------------------------------------------------------------------------------
 1 | //! Models and functionality for individual LLVM IR instructions.
 2 | 
 3 | use llvm_support::{BinaryOp, CastOp, UnaryOp};
 4 | 
 5 | /// Represents an LLVM instruction.
 6 | #[derive(Debug)]
 7 | pub enum Instruction {
 8 |     /// Unary instructions.
 9 |     Unary {
10 |         /// The opcode.
11 |         op: UnaryOp,
12 |     },
13 |     /// Binary instructions.
14 |     Binary {
15 |         /// The opcode.
16 |         op: BinaryOp,
17 |         // TODO: lhs, rhs
18 |     },
19 |     /// Cast instructions.
20 |     Cast {
21 |         /// The opcode.
22 |         op: CastOp,
23 |         // TODO: srcval, srcty, dstty
24 |     },
25 |     /// `getelementptr`
26 |     GetElementPtr,
27 |     /// `extractvalue`
28 |     ExtractValue,
29 |     /// `insertvalue`
30 |     InsertValue,
31 |     /// `select`
32 |     Select,
33 |     /// `extractelement`
34 |     ExtractElement,
35 |     /// `insertelement`
36 |     InsertElement,
37 |     /// `shufflevector`
38 |     ShuffleVector,
39 |     /// `cmp`
40 |     Cmp,
41 |     /// `ret`
42 |     Ret,
43 |     /// `br`
44 |     Br,
45 |     /// `cleanupret`
46 |     CleanupRet,
47 |     /// `catchret`
48 |     CatchRet,
49 |     /// `catchswitch`
50 |     CatchSwitch,
51 |     /// `catchpad`
52 |     CatchPad,
53 |     /// `switch`
54 |     Switch,
55 |     /// `indirectbr`
56 |     IndirectBr,
57 |     /// `invoke`
58 |     Invoke,
59 |     /// `resume`
60 |     Resume,
61 |     /// `callbr`
62 |     CallBr,
63 |     /// `unreachable`
64 |     Unreachable,
65 |     /// `landingpad`
66 |     LandingPad,
67 |     /// `alloca`
68 |     Alloca,
69 |     /// `load`
70 |     Load,
71 |     /// `store`
72 |     Store,
73 |     /// `cmpxchg`
74 |     CmpXchg,
75 |     /// `atomicrmw`
76 |     AtomicRMW,
77 |     /// `fence`
78 |     Fence,
79 |     /// `call`
80 |     Call,
81 |     /// `va_arg`
82 |     VAArg,
83 |     /// `freeze`
84 |     Freeze,
85 | }
86 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/block/function/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Functionality for mapping `FUNCTION_BLOCK` blocks.
  2 | 
  3 | mod basic_block;
  4 | mod instruction;
  5 | 
  6 | use std::convert::TryFrom;
  7 | 
  8 | pub use basic_block::*;
  9 | pub use instruction::*;
 10 | use llvm_support::bitcodes::FunctionCode;
 11 | use llvm_support::{BinaryOp, BinaryOpError, UnaryOp, UnaryOpError};
 12 | use num_enum::TryFromPrimitiveError;
 13 | use thiserror::Error;
 14 | 
 15 | use crate::map::{MapCtx, MapError};
 16 | use crate::unroll::Block;
 17 | 
 18 | /// Errors that can occur when mapping function blocks.
 19 | #[derive(Debug, Error)]
 20 | pub enum FunctionError {
 21 |     /// `FUNC_CODE_DECLAREBLOCKS` is either missing or zero.
 22 |     #[error("function does not declare block count or has zero blocks")]
 23 |     InvalidBlockCount,
 24 | 
 25 |     /// An unknown record code was seen.
 26 |     #[error("unknown function code")]
 27 |     UnknownFunctionCode(#[from] TryFromPrimitiveError<FunctionCode>),
 28 | 
 29 |     /// An invalid instruction encoding was seen.
 30 |     #[error("invalid instruction encoding: {0}")]
 31 |     BadInst(String),
 32 | 
 33 |     /// An invalid unary opcode was seen.
 34 |     #[error("invalid unary opcode")]
 35 |     BadUnOp(#[from] UnaryOpError),
 36 | 
 37 |     /// An invalid binary opcode was seen.
 38 |     #[error("invalid binary opcode")]
 39 |     BadBinOp(#[from] BinaryOpError),
 40 | 
 41 |     /// A generic mapping error occurred.
 42 |     #[error("generic mapping error")]
 43 |     Map(#[from] MapError),
 44 | }
 45 | 
 46 | /// Models the `MODULE_CODE_FUNCTION` record.
 47 | #[non_exhaustive]
 48 | #[derive(Debug)]
 49 | pub struct Function {
 50 |     /// The basic blocks of this function.
 51 |     pub blocks: Vec<BasicBlock>,
 52 | }
 53 | 
 54 | impl TryFrom<(&'_ Block, &'_ MapCtx<'_>)> for Function {
 55 |     type Error = FunctionError;
 56 | 
 57 |     fn try_from((block, ctx): (&'_ Block, &'_ MapCtx)) -> Result<Self, Self::Error> {
 58 |         // TODO: Handle each `MODULE_CODE_FUNCTION`'s sub-blocks.
 59 | 
 60 |         // A function block should have exactly one DECLAREBLOCKS record.
 61 |         let nblocks = {
 62 |             let declareblocks = block
 63 |                 .records
 64 |                 .exactly_one(FunctionCode::DeclareBlocks)
 65 |                 .map_err(MapError::Inconsistent)?;
 66 | 
 67 |             *declareblocks
 68 |                 .fields()
 69 |                 .first()
 70 |                 .ok_or(FunctionError::InvalidBlockCount)?
 71 |         };
 72 | 
 73 |         // Like the type table, we need a little bit of a state machine to
 74 |         // construct each function's basic blocks and constituent instructions.
 75 |         let mut _bbs: Vec<BasicBlock> = Vec::with_capacity(nblocks as usize);
 76 |         let mut _bb = BasicBlock::default();
 77 | 
 78 |         for record in block.records.into_iter() {
 79 |             let code = FunctionCode::try_from(record.code())?;
 80 | 
 81 |             macro_rules! unpack_fields {
 82 |                 ($n:literal) => {
 83 |                     <[u64; $n]>::try_from(record.fields()).map_err(|_| {
 84 |                         FunctionError::BadInst(format!(
 85 |                             "bad {code:?}: expected {} fields, got {}",
 86 |                             $n,
 87 |                             record.fields().len()
 88 |                         ))
 89 |                     })
 90 |                 };
 91 |             }
 92 | 
 93 |             macro_rules! get_type {
 94 |                 ($ty:ident) => {
 95 |                     // TODO: This is wrong; the lookup here needs to be
 96 |                     // aware of forward references.
 97 |                     ctx.type_table.get($ty).ok_or_else(|| {
 98 |                         FunctionError::BadInst(format!(
 99 |                             "bad {code:?}: invalid type table reference: {}",
100 |                             $ty
101 |                         ))
102 |                     })
103 |                 };
104 |             }
105 | 
106 |             // Function codes fall into a few general categories:
107 |             //
108 |             // * State machine management (`DECLAREBLOCKS`)
109 |             // * Instruction declaration (`INST_*`)
110 |             // * Debug state (`DEBUG_LOC`, `DEBUG_LOC_AGAIN`)
111 |             // * Operand bundles (`OPERAND_BUNDLE`)
112 |             //
113 |             // Each category is grouped below, with the smaller ones first.
114 |             match code {
115 |                 // Handled above.
116 |                 FunctionCode::DeclareBlocks => continue,
117 | 
118 |                 // Operand bundles.
119 |                 FunctionCode::OperandBundle => unimplemented!(),
120 | 
121 |                 // Debug state.
122 |                 FunctionCode::DebugLoc => unimplemented!(),
123 |                 FunctionCode::DebugLocAgain => unimplemented!(),
124 | 
125 |                 // The big one: all instructions.
126 |                 FunctionCode::InstBinop => {
127 |                     // [opval, ty, opval, opcode]
128 |                     let [_lhs, ty, _rhs, opcode] = unpack_fields!(4)?;
129 |                     let ty = get_type!(ty)?;
130 |                     let _opcode = BinaryOp::try_from((opcode, ty))?;
131 |                 }
132 |                 FunctionCode::InstCast => {
133 |                     // [opval, opty, destty, castopc]
134 |                     let [_opval, _opty, _destty, _castopc] = unpack_fields!(4)?;
135 |                 }
136 |                 FunctionCode::InstGepOld => todo!(),
137 |                 FunctionCode::InstSelect => todo!(),
138 |                 FunctionCode::InstExtractelt => todo!(),
139 |                 FunctionCode::InstInsertelt => todo!(),
140 |                 FunctionCode::InstShufflevec => todo!(),
141 |                 FunctionCode::InstCmp => todo!(),
142 |                 FunctionCode::InstRet => todo!(),
143 |                 FunctionCode::InstBr => todo!(),
144 |                 FunctionCode::InstSwitch => todo!(),
145 |                 FunctionCode::InstInvoke => todo!(),
146 |                 FunctionCode::InstUnreachable => todo!(),
147 |                 FunctionCode::InstPhi => todo!(),
148 |                 FunctionCode::InstAlloca => {
149 |                     // [instty, opty, op, align]
150 |                     let [_instty, _opty, _op, _align] = unpack_fields!(4)?;
151 |                 }
152 |                 FunctionCode::InstLoad => {
153 |                     // [opty, op, align, vol]
154 |                     let [_opty, _op, _align, _vol] = unpack_fields!(4)?;
155 |                 }
156 |                 FunctionCode::InstVaarg => todo!(),
157 |                 FunctionCode::InstStoreOld => todo!(),
158 |                 FunctionCode::InstExtractval => todo!(),
159 |                 FunctionCode::InstInsertval => todo!(),
160 |                 FunctionCode::InstCmp2 => todo!(),
161 |                 FunctionCode::InstVselect => todo!(),
162 |                 FunctionCode::InstInboundsGepOld => todo!(),
163 |                 FunctionCode::InstIndirectbr => todo!(),
164 |                 FunctionCode::InstCall => todo!(),
165 |                 FunctionCode::InstFence => todo!(),
166 |                 FunctionCode::InstCmpxchgOld => todo!(),
167 |                 FunctionCode::InstAtomicrmwOld => todo!(),
168 |                 FunctionCode::InstResume => todo!(),
169 |                 FunctionCode::InstLandingpadOld => todo!(),
170 |                 FunctionCode::InstLoadatomic => todo!(),
171 |                 FunctionCode::InstStoreatomicOld => todo!(),
172 |                 FunctionCode::InstGep => todo!(),
173 |                 FunctionCode::InstStore => {
174 |                     // [ptrty, ptr, valty, val, align, vol]
175 |                     let [_ptrty, _ptr, _valty, _val] = unpack_fields!(4)?;
176 | 
177 |                     // NOTE: Two more optional fields: align and vol.
178 |                 }
179 |                 FunctionCode::InstStoreatomic => todo!(),
180 |                 FunctionCode::InstCmpxchg => todo!(),
181 |                 FunctionCode::InstLandingpad => todo!(),
182 |                 FunctionCode::InstCleanupret => todo!(),
183 |                 FunctionCode::InstCatchret => todo!(),
184 |                 FunctionCode::InstCatchpad => todo!(),
185 |                 FunctionCode::InstCleanuppad => todo!(),
186 |                 FunctionCode::InstCatchswitch => todo!(),
187 |                 FunctionCode::InstUnop => {
188 |                     // [opval, ty, opcode]
189 |                     let [_opval, ty, opcode] = unpack_fields!(3)?;
190 |                     let _ty = get_type!(ty)?;
191 |                     let _opcode = UnaryOp::try_from(opcode)?;
192 |                 }
193 |                 FunctionCode::Instcallbr => todo!(),
194 |                 FunctionCode::InstFreeze => todo!(),
195 |                 FunctionCode::InstAtomicrmw => todo!(),
196 |             }
197 |         }
198 | 
199 |         unimplemented!()
200 |     }
201 | }
202 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/block/identification.rs:
--------------------------------------------------------------------------------
 1 | //! Functionality for mapping the `IDENTIFICATION_BLOCK` block.
 2 | 
 3 | use std::convert::TryFrom;
 4 | 
 5 | use llvm_support::bitcodes::IdentificationCode;
 6 | use thiserror::Error;
 7 | 
 8 | use crate::map::MapError;
 9 | use crate::unroll::Block;
10 | 
11 | /// Errors that can occur while mapping the identification block.
12 | #[derive(Debug, Error)]
13 | pub enum IdentificationError {
14 |     /// The `IDENTIFICATION_CODE_PRODUCER` couldn't be found.
15 |     #[error("identification block has no producer")]
16 |     MissingProducer,
17 | 
18 |     /// The producer string is malformed.
19 |     #[error("malformed producer string")]
20 |     BadProducer,
21 | 
22 |     /// The `IDENTIFICATION_CODE_EPOCH` couldn't be found.
23 |     #[error("identification block has no epoch")]
24 |     MissingEpoch,
25 | 
26 |     /// A generic mapping error occured.
27 |     #[error("mapping error in string table")]
28 |     Map(#[from] MapError),
29 | }
30 | 
31 | /// Models the `IDENTIFICATION_BLOCK` block.
32 | #[non_exhaustive]
33 | #[derive(Debug)]
34 | pub struct Identification {
35 |     /// The name of the "producer" for this bitcode.
36 |     pub producer: String,
37 |     /// The compatibility epoch.
38 |     pub epoch: u64,
39 | }
40 | 
41 | impl TryFrom<&'_ Block> for Identification {
42 |     type Error = IdentificationError;
43 | 
44 |     fn try_from(block: &'_ Block) -> Result<Self, Self::Error> {
45 |         let producer = block
46 |             .records
47 |             .one(IdentificationCode::ProducerString as u64)
48 |             .ok_or(IdentificationError::MissingProducer)
49 |             .and_then(|r| {
50 |                 r.try_string(0)
51 |                     .map_err(|_| IdentificationError::BadProducer)
52 |             })?;
53 | 
54 |         let epoch = *block
55 |             .records
56 |             .one(IdentificationCode::Epoch as u64)
57 |             .ok_or(IdentificationError::MissingEpoch)
58 |             .and_then(|r| r.fields().first().ok_or(IdentificationError::MissingEpoch))?;
59 | 
60 |         Ok(Self { producer, epoch })
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/block/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Functionality for mapping individual blocks.
 2 | 
 3 | pub mod attributes;
 4 | pub mod function;
 5 | pub mod identification;
 6 | pub mod module;
 7 | pub mod strtab;
 8 | pub mod symtab;
 9 | pub mod type_table;
10 | pub mod vst;
11 | 
12 | use std::convert::TryFrom;
13 | 
14 | use llvm_support::bitcodes::{IrBlockId, ReservedBlockId};
15 | use thiserror::Error;
16 | 
17 | pub use self::attributes::*;
18 | pub use self::identification::*;
19 | pub use self::module::*;
20 | pub use self::strtab::*;
21 | pub use self::symtab::*;
22 | pub use self::type_table::*;
23 | 
24 | /// Potential errors when mapping a single bitstream block.
25 | #[non_exhaustive]
26 | #[derive(Debug, Error)]
27 | pub enum BlockMapError {
28 |     /// We couldn't map the identification block.
29 |     #[error("error while mapping identification block")]
30 |     Identification(#[from] IdentificationError),
31 | 
32 |     /// We couldn't map the module block.
33 |     #[error("error while mapping module")]
34 |     Module(#[from] ModuleError),
35 | 
36 |     /// We couldn't map the string table.
37 |     #[error("error while mapping string table")]
38 |     Strtab(#[from] StrtabError),
39 | 
40 |     /// We couldn't map the symbol table.
41 |     #[error("error while mapping symbol table")]
42 |     Symtab(#[from] SymtabError),
43 | }
44 | 
45 | /// A holistic model of all possible block IDs, spanning reserved, IR, and unknown IDs.
46 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
47 | pub enum BlockId {
48 |     /// A block ID that's been reserved by LLVM. Reserved IDs are internal, and cannot be mapped here.
49 |     Reserved(ReservedBlockId),
50 |     /// A block ID used by LLVM IR.
51 |     Ir(IrBlockId),
52 |     /// An unknown block ID. Unknown IDs cannot be mapped.
53 |     Unknown(u64),
54 | }
55 | 
56 | impl From<ReservedBlockId> for BlockId {
57 |     fn from(v: ReservedBlockId) -> Self {
58 |         Self::Reserved(v)
59 |     }
60 | }
61 | 
62 | impl From<IrBlockId> for BlockId {
63 |     fn from(v: IrBlockId) -> Self {
64 |         Self::Ir(v)
65 |     }
66 | }
67 | 
68 | impl From<u64> for BlockId {
69 |     fn from(value: u64) -> Self {
70 |         // Try to turn `value` into each of our known kinds of block IDs, in order
71 |         // of precedence.
72 |         ReservedBlockId::try_from(value).map_or_else(
73 |             |_| IrBlockId::try_from(value).map_or_else(|_| BlockId::Unknown(value), BlockId::Ir),
74 |             BlockId::Reserved,
75 |         )
76 |     }
77 | }
78 | 
79 | #[cfg(test)]
80 | mod tests {
81 |     use super::*;
82 | 
83 |     #[test]
84 |     fn test_blockid_from_u64() {
85 |         assert_eq!(
86 |             BlockId::from(0),
87 |             BlockId::Reserved(ReservedBlockId::BlockInfo)
88 |         );
89 |         assert_eq!(
90 |             BlockId::from(7),
91 |             BlockId::Reserved(ReservedBlockId::Reserved7)
92 |         );
93 |         assert_eq!(BlockId::from(8), BlockId::Ir(IrBlockId::Module));
94 |         assert_eq!(BlockId::from(2384629342), BlockId::Unknown(2384629342));
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/block/module.rs:
--------------------------------------------------------------------------------
  1 | //! Functionality for mapping the `MODULE_BLOCK` block.
  2 | 
  3 | use std::convert::TryFrom;
  4 | 
  5 | use llvm_support::bitcodes::{IrBlockId, ModuleCode};
  6 | use llvm_support::TARGET_TRIPLE;
  7 | use thiserror::Error;
  8 | 
  9 | use crate::block::attributes::{AttributeError, AttributeGroups, Attributes};
 10 | use crate::block::function::{Function as FunctionBlock, FunctionError as FunctionBlockError};
 11 | use crate::block::type_table::{TypeTable, TypeTableError};
 12 | use crate::block::vst::{ModuleStyleVst, Vst, VstError};
 13 | use crate::map::{CtxMappable, MapError, PartialCtxMappable, PartialMapCtx};
 14 | use crate::record::{
 15 |     Alias, AliasError, Comdat, ComdatError, DataLayout, DataLayoutError,
 16 |     Function as FunctionRecord, FunctionError as FunctionRecordError,
 17 | };
 18 | use crate::unroll::Block;
 19 | 
 20 | /// Errors that can occur while mapping a module.
 21 | #[derive(Debug, Error)]
 22 | pub enum ModuleError {
 23 |     /// The `MODULE_CODE_VERSION` couldn't be found.
 24 |     #[error("bitcode module has no version")]
 25 |     MissingVersion,
 26 | 
 27 |     /// An error occured while mapping the datalayout record.
 28 |     #[error("invalid datalayout record")]
 29 |     DataLayoutRecord(#[from] DataLayoutError),
 30 | 
 31 |     /// An error occurred while mapping the type table block.
 32 |     #[error("invalid type table block")]
 33 |     TypeTableBlock(#[from] TypeTableError),
 34 | 
 35 |     /// An error occurred while mapping a value symbol table.
 36 |     #[error("invalid value symbol table")]
 37 |     VstBlock(#[from] VstError),
 38 | 
 39 |     /// An error occurred while mapping one of the attribute blocks.
 40 |     #[error("invalid attribute block")]
 41 |     AttributeBlock(#[from] AttributeError),
 42 | 
 43 |     /// An error occurred while mapping a COMDAT record.
 44 |     #[error("invalid COMDAT record")]
 45 |     ComdatRecord(#[from] ComdatError),
 46 | 
 47 |     /// An error occurred while mapping a function record.
 48 |     #[error("invalid function record")]
 49 |     FunctionRecord(#[from] FunctionRecordError),
 50 | 
 51 |     /// An error occurred while mapping a function block.
 52 |     #[error("invalid function block")]
 53 |     FunctionBlock(#[from] FunctionBlockError),
 54 | 
 55 |     /// An error occurred while mapping an alias record.
 56 |     #[error("invalid alias record")]
 57 |     Alias(#[from] AliasError),
 58 | 
 59 |     /// A generic mapping error occurred.
 60 |     #[error("generic mapping error")]
 61 |     Map(#[from] MapError),
 62 | }
 63 | 
 64 | /// Models the `MODULE_BLOCK` block.
 65 | #[non_exhaustive]
 66 | #[derive(Debug)]
 67 | pub struct Module {
 68 |     /// The target triple specification.
 69 |     pub triple: String,
 70 |     /// Any assembly block lines in the module.
 71 |     pub asm: Vec<String>,
 72 |     /// Any dependent libraries listed in the module.
 73 |     pub deplibs: Vec<String>,
 74 | }
 75 | 
 76 | impl TryFrom<(&'_ Block, &'_ mut PartialMapCtx)> for Module {
 77 |     type Error = ModuleError;
 78 | 
 79 |     fn try_from((block, ctx): (&'_ Block, &'_ mut PartialMapCtx)) -> Result<Self, Self::Error> {
 80 |         // Mapping the module requires us to fill in the `PartialMapCtx` first,
 81 |         // so we can reify it into a `MapCtx` for subsequent steps.
 82 |         ctx.version = Some({
 83 |             let version = block
 84 |                 .records
 85 |                 .exactly_one(ModuleCode::Version)
 86 |                 .map_err(MapError::Inconsistent)?;
 87 | 
 88 |             *version
 89 |                 .fields()
 90 |                 .first()
 91 |                 .ok_or(ModuleError::MissingVersion)?
 92 |         });
 93 | 
 94 |         // Each module *should* have a datalayout record, but doesn't necessarily.
 95 |         if let Some(record) = block
 96 |             .records
 97 |             .one_or_none(ModuleCode::DataLayout)
 98 |             .map_err(MapError::Inconsistent)?
 99 |         {
100 |             ctx.datalayout = DataLayout::try_map(record, ctx)?;
101 |         }
102 | 
103 |         // Build the section table. We'll reference this later.
104 |         ctx.section_table = block
105 |             .records
106 |             .by_code(ModuleCode::SectionName)
107 |             .map(|rec| rec.try_string(0))
108 |             .collect::<Result<Vec<_>, _>>()
109 |             .map_err(MapError::RecordString)?;
110 | 
111 |         // Build the GC table. We'll reference this later.
112 |         ctx.gc_table = block
113 |             .records
114 |             .by_code(ModuleCode::GcName)
115 |             .map(|rec| rec.try_string(0))
116 |             .collect::<Result<Vec<_>, _>>()
117 |             .map_err(MapError::RecordString)?;
118 | 
119 |         // Build the type table.
120 |         ctx.type_table = Some(TypeTable::try_from(
121 |             block
122 |                 .blocks
123 |                 .exactly_one(IrBlockId::Type)
124 |                 .map_err(MapError::Inconsistent)?,
125 |         )?);
126 | 
127 |         // Build the module-level VST. We'll reference this later.
128 |         Vst::try_from((
129 |             block
130 |                 .blocks
131 |                 .exactly_one(IrBlockId::ValueSymtab)
132 |                 .map_err(MapError::Inconsistent)?,
133 |             ModuleStyleVst {},
134 |         ))?;
135 | 
136 |         // Collect all attribute groups and individual attribute references.
137 |         // The order here is important: attribute groups must be mapped
138 |         // and stored in the `PartialMapCtx` before the attribute block itself can be mapped.
139 |         // Neither block is mandatory.
140 |         if let Some(attribute_groups) = block
141 |             .blocks
142 |             .one_or_none(IrBlockId::ParamAttrGroup)
143 |             .map_err(MapError::Inconsistent)?
144 |             .map(AttributeGroups::try_from)
145 |             .transpose()?
146 |         {
147 |             ctx.attribute_groups = attribute_groups;
148 |         }
149 | 
150 |         if let Some(attributes) = block
151 |             .blocks
152 |             .one_or_none(IrBlockId::ParamAttr)
153 |             .map_err(MapError::Inconsistent)?
154 |             .map(|b| Attributes::try_from((b, &*ctx)))
155 |             .transpose()?
156 |         {
157 |             ctx.attributes = attributes;
158 |         }
159 | 
160 |         // Build the list of COMDATs. We'll reference this later.
161 |         ctx.comdats = block
162 |             .records
163 |             .by_code(ModuleCode::Comdat)
164 |             .map(|rec| Comdat::try_map(rec, ctx))
165 |             .collect::<Result<Vec<_>, _>>()?;
166 | 
167 |         // After this point, `ctx` refers to a fully reified `MapCtx`.
168 |         let ctx = ctx.reify().map_err(MapError::Context)?;
169 | 
170 |         // Each module *should* have a target triple, but doesn't necessarily.
171 |         let triple = match block
172 |             .records
173 |             .one_or_none(ModuleCode::Triple)
174 |             .map_err(MapError::Inconsistent)?
175 |         {
176 |             Some(record) => record.try_string(0).map_err(MapError::RecordString)?,
177 |             None => TARGET_TRIPLE.into(),
178 |         };
179 | 
180 |         // Each module has zero or exactly one MODULE_CODE_ASM records.
181 |         let asm = match block
182 |             .records
183 |             .one_or_none(ModuleCode::Asm)
184 |             .map_err(MapError::Inconsistent)?
185 |         {
186 |             None => Vec::new(),
187 |             Some(record) => record
188 |                 .try_string(0)
189 |                 .map_err(MapError::RecordString)?
190 |                 .split('\n')
191 |                 .map(String::from)
192 |                 .collect::<Vec<_>>(),
193 |         };
194 | 
195 |         // Deplib records are deprecated, but we might be parsing an older bitstream.
196 |         let deplibs = block
197 |             .records
198 |             .by_code(ModuleCode::DepLib)
199 |             .map(|rec| rec.try_string(0))
200 |             .collect::<Result<Vec<_>, _>>()
201 |             .map_err(MapError::RecordString)?;
202 | 
203 |         // Collect the function records and blocks in this module.
204 |         let function_records = block
205 |             .records
206 |             .by_code(ModuleCode::Function)
207 |             .map(|rec| FunctionRecord::try_map(rec, &ctx))
208 |             .collect::<Result<Vec<_>, _>>()?;
209 | 
210 |         let _function_blocks = block
211 |             .blocks
212 |             .by_id(IrBlockId::Function)
213 |             .map(|block| FunctionBlock::try_from((block, &ctx)))
214 |             .collect::<Result<Vec<_>, _>>()?;
215 | 
216 |         // TODO: Handle function blocks as well.
217 |         log::debug!("functions: {:?}", function_records);
218 | 
219 |         let aliases = block
220 |             .records
221 |             .by_code(ModuleCode::Alias)
222 |             .map(|rec| Alias::try_map(rec, &ctx))
223 |             .collect::<Result<Vec<_>, _>>()?;
224 | 
225 |         log::debug!("aliases: {:?}", aliases);
226 | 
227 |         Ok(Self {
228 |             triple,
229 |             asm,
230 |             deplibs,
231 |         })
232 |     }
233 | }
234 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/block/strtab.rs:
--------------------------------------------------------------------------------
  1 | //! Functionality for mapping the `STRTAB_BLOCK` block.
  2 | 
  3 | use std::convert::TryFrom;
  4 | use std::str::Utf8Error;
  5 | 
  6 | use llvm_support::bitcodes::StrtabCode;
  7 | use llvm_support::StrtabRef;
  8 | use thiserror::Error;
  9 | 
 10 | use crate::map::MapError;
 11 | use crate::record::RecordBlobError;
 12 | use crate::unroll::{Block, Record};
 13 | 
 14 | /// Errors that can occur when accessing a string table.
 15 | #[derive(Debug, Error)]
 16 | pub enum StrtabError {
 17 |     /// The string table is missing its blob.
 18 |     #[error("malformed string table: missing blob")]
 19 |     MissingBlob,
 20 | 
 21 |     /// The blob containing the string table is invalid.
 22 |     #[error("invalid string table: {0}")]
 23 |     BadBlob(#[from] RecordBlobError),
 24 | 
 25 |     /// The requested range is invalid.
 26 |     #[error("requested range in string table is invalid")]
 27 |     BadRange,
 28 | 
 29 |     /// The requested string is not UTF-8.
 30 |     #[error("could not decode range into a UTF-8 string: {0}")]
 31 |     BadString(#[from] Utf8Error),
 32 | 
 33 |     /// A generic mapping error occured.
 34 |     #[error("mapping error in string table")]
 35 |     Map(#[from] MapError),
 36 | }
 37 | 
 38 | /// Models the `STRTAB_BLOCK` block.
 39 | #[derive(Clone, Debug, Default)]
 40 | pub struct Strtab(Vec<u8>);
 41 | 
 42 | impl AsRef<[u8]> for Strtab {
 43 |     fn as_ref(&self) -> &[u8] {
 44 |         &self.0
 45 |     }
 46 | }
 47 | 
 48 | impl TryFrom<&'_ Block> for Strtab {
 49 |     type Error = StrtabError;
 50 | 
 51 |     fn try_from(block: &'_ Block) -> Result<Self, Self::Error> {
 52 |         // TODO(ww): The docs also claim that there's only one STRTAB_BLOB per STRTAB_BLOCK,
 53 |         // but at least one person has reported otherwise here:
 54 |         // https://lists.llvm.org/pipermail/llvm-dev/2020-August/144327.html
 55 |         // Needs investigation.
 56 |         let strtab = block
 57 |             .records
 58 |             .one(StrtabCode::Blob as u64)
 59 |             .ok_or(StrtabError::MissingBlob)
 60 |             .and_then(|r| r.try_blob(0).map_err(StrtabError::from))?;
 61 | 
 62 |         Ok(Self(strtab))
 63 |     }
 64 | }
 65 | 
 66 | impl Strtab {
 67 |     /// Get a string in the string table by its index and length.
 68 |     ///
 69 |     /// Returns `None` on all of the error conditions associated with
 70 |     /// [`try_get`](Strtab::try_get).
 71 |     pub fn get(&self, sref: &StrtabRef) -> Option<&str> {
 72 |         self.try_get(sref).ok()
 73 |     }
 74 | 
 75 |     /// Get a string in the string table by its index and length.
 76 |     ///
 77 |     /// Returns an error if the requested span is invalid, or if the extracted
 78 |     /// slice isn't a valid string.
 79 |     pub fn try_get(&self, sref: &StrtabRef) -> Result<&str, StrtabError> {
 80 |         let inner = self.as_ref();
 81 | 
 82 |         if sref.size == 0 || sref.offset >= inner.len() || sref.offset + sref.size > inner.len() {
 83 |             return Err(StrtabError::BadRange);
 84 |         }
 85 | 
 86 |         Ok(std::str::from_utf8(
 87 |             &inner[sref.offset..sref.offset + sref.size],
 88 |         )?)
 89 |     }
 90 | 
 91 |     /// Attempts to read a record's name from the string table.
 92 |     ///
 93 |     /// Adheres to the convention that the first two fields in the record are
 94 |     /// the string's offset and length into the string table.
 95 |     ///
 96 |     /// Panic safety: precondition: `record.fields().len() >= 2`
 97 |     pub(crate) fn read_name(&self, record: &Record) -> Result<&str, StrtabError> {
 98 |         let fields = record.fields();
 99 | 
100 |         self.try_get(&(fields[0], fields[1]).into())
101 |     }
102 | }
103 | 
104 | #[cfg(test)]
105 | mod tests {
106 |     use super::*;
107 | 
108 |     fn sref(tup: (usize, usize)) -> StrtabRef {
109 |         tup.into()
110 |     }
111 | 
112 |     #[test]
113 |     fn test_strtab() {
114 |         let inner = "this is a string table";
115 |         let strtab = Strtab(inner.into());
116 |         assert_eq!(strtab.get(&sref((0, 4))).unwrap(), "this");
117 |         assert_eq!(strtab.get(&sref((0, 7))).unwrap(), "this is");
118 |         assert_eq!(strtab.get(&sref((8, 14))).unwrap(), "a string table");
119 |         assert_eq!(
120 |             strtab.get(&sref((0, inner.len()))).unwrap(),
121 |             "this is a string table"
122 |         );
123 | 
124 |         assert!(strtab.get(&sref((inner.len(), 0))).is_none());
125 |         assert!(strtab.get(&sref((0, inner.len() + 1))).is_none());
126 |         assert!(strtab.get(&sref((0, 0))).is_none());
127 |     }
128 | }
129 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/block/symtab.rs:
--------------------------------------------------------------------------------
 1 | //! Functionality for mapping the `SYMTAB_BLOCK` block.
 2 | 
 3 | use std::convert::TryFrom;
 4 | 
 5 | use llvm_support::bitcodes::SymtabCode;
 6 | use thiserror::Error;
 7 | 
 8 | use crate::map::MapError;
 9 | use crate::record::RecordBlobError;
10 | use crate::unroll::Block;
11 | 
12 | /// Errors that can occur when accessing a symbol table.
13 | #[derive(Debug, Error)]
14 | pub enum SymtabError {
15 |     /// The symbol table is missing its blob.
16 |     #[error("malformed symbol table: missing blob")]
17 |     MissingBlob,
18 | 
19 |     /// The blob containing the symbol table is invalid.
20 |     #[error("invalid string table: {0}")]
21 |     InvalidBlob(#[from] RecordBlobError),
22 | 
23 |     /// A generic mapping error occured.
24 |     #[error("mapping error in string table")]
25 |     Map(#[from] MapError),
26 | }
27 | 
28 | /// Models the `SYMTAB_BLOCK` block.
29 | ///
30 | /// For now, this is an opaque block: it's really only used to accelerate LTO,
31 | /// so we don't attempt to expand its fields here.
32 | #[derive(Debug)]
33 | pub struct Symtab(Vec<u8>);
34 | 
35 | impl AsRef<[u8]> for Symtab {
36 |     fn as_ref(&self) -> &[u8] {
37 |         &self.0
38 |     }
39 | }
40 | 
41 | impl TryFrom<&'_ Block> for Symtab {
42 |     type Error = SymtabError;
43 | 
44 |     fn try_from(block: &'_ Block) -> Result<Self, Self::Error> {
45 |         let symtab = block
46 |             .records
47 |             .one(SymtabCode::Blob as u64)
48 |             .ok_or(SymtabError::MissingBlob)
49 |             .and_then(|r| r.try_blob(0).map_err(SymtabError::from))?;
50 | 
51 |         Ok(Self(symtab))
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/block/type_table.rs:
--------------------------------------------------------------------------------
  1 | //! Functionality for mapping the `TYPE_BLOCK_ID_NEW` block.
  2 | 
  3 | use std::convert::TryFrom;
  4 | 
  5 | use llvm_support::bitcodes::TypeCode;
  6 | use llvm_support::{
  7 |     AddressSpace, ArrayTypeError, FunctionTypeError, IntegerTypeError, PointerTypeError,
  8 |     StructTypeError, Type, VectorTypeError,
  9 | };
 10 | use num_enum::TryFromPrimitiveError;
 11 | use thiserror::Error;
 12 | 
 13 | use crate::map::MapError;
 14 | use crate::unroll::Block;
 15 | 
 16 | /// Errors that can occur when mapping the type table.
 17 | #[derive(Debug, Error)]
 18 | pub enum TypeTableError {
 19 |     /// The size of the type table is invalid.
 20 |     #[error("invalid type table size (expected {0} elements, got {1})")]
 21 |     BadSize(usize, usize),
 22 | 
 23 |     /// An invalid type index was requested.
 24 |     #[error("invalid type table index: {0}")]
 25 |     BadIndex(usize),
 26 | 
 27 |     /// An unknown record code was seen.
 28 |     #[error("unknown type code")]
 29 |     UnknownTypeCode(#[from] TryFromPrimitiveError<TypeCode>),
 30 | 
 31 |     /// The layout of the table itself (i.e., the record structures) is invalid.
 32 |     #[error("invalid type table structure (broken records)")]
 33 |     BadTable,
 34 | 
 35 |     /// An invalid integer type was seen.
 36 |     #[error("invalid integer type")]
 37 |     InvalidIntegerType(#[from] IntegerTypeError),
 38 | 
 39 |     /// An invalid pointer type was seen.
 40 |     #[error("invalid pointer type")]
 41 |     InvalidPointerType(#[from] PointerTypeError),
 42 | 
 43 |     /// An invalid array type was seen.
 44 |     #[error("invalid array type")]
 45 |     InvalidArrayType(#[from] ArrayTypeError),
 46 | 
 47 |     /// An invalid vector type was seen.
 48 |     #[error("invalid vector type")]
 49 |     InvalidVectorType(#[from] VectorTypeError),
 50 | 
 51 |     /// An invalid structure type was seen.
 52 |     #[error("invalid structure type")]
 53 |     InvalidStructType(#[from] StructTypeError),
 54 | 
 55 |     /// An invalid function type was seen.
 56 |     #[error("invalid function type")]
 57 |     InvalidFunctionType(#[from] FunctionTypeError),
 58 | 
 59 |     /// A generic mapping error occured.
 60 |     #[error("mapping error in string table")]
 61 |     Map(#[from] MapError),
 62 | }
 63 | 
 64 | /// A symbolic type reference, which is really just an index into some
 65 | /// unspecified type table.
 66 | #[derive(Debug)]
 67 | pub(crate) struct TypeRef(pub(crate) usize);
 68 | 
 69 | impl From<usize> for TypeRef {
 70 |     fn from(value: usize) -> TypeRef {
 71 |         TypeRef(value)
 72 |     }
 73 | }
 74 | 
 75 | impl From<u64> for TypeRef {
 76 |     fn from(value: u64) -> TypeRef {
 77 |         TypeRef::from(value as usize)
 78 |     }
 79 | }
 80 | 
 81 | /// Represents a "partial type," i.e. a type whose subtypes may be symbolic
 82 | /// and not fully resolved against a type table.
 83 | #[derive(Debug)]
 84 | enum PartialType {
 85 |     Half,
 86 |     BFloat,
 87 |     Float,
 88 |     Double,
 89 |     Metadata,
 90 |     X86Fp80,
 91 |     Fp128,
 92 |     PpcFp128,
 93 |     Void,
 94 |     Label,
 95 |     X86Mmx,
 96 |     X86Amx,
 97 |     Token,
 98 |     Integer(PartialIntegerType),
 99 |     Function(PartialFunctionType),
100 |     Pointer(PartialPointerType),
101 |     OpaquePointer(AddressSpace),
102 |     Struct(PartialStructType),
103 |     Array(PartialArrayType),
104 |     FixedVector(PartialVectorType),
105 |     ScalableVector(PartialVectorType),
106 | }
107 | 
108 | impl PartialType {
109 |     /// Fallibly convert this `PartialType` into a `Type`, using the given
110 |     /// `PartialTypeTable` as a reference.
111 |     fn resolve(&self, partials: &PartialTypeTable) -> Result<Type, TypeTableError> {
112 |         match self {
113 |             PartialType::Half => Ok(Type::Half),
114 |             PartialType::BFloat => Ok(Type::BFloat),
115 |             PartialType::Float => Ok(Type::Float),
116 |             PartialType::Double => Ok(Type::Double),
117 |             PartialType::Metadata => Ok(Type::Metadata),
118 |             PartialType::X86Fp80 => Ok(Type::X86Fp80),
119 |             PartialType::Fp128 => Ok(Type::Fp128),
120 |             PartialType::PpcFp128 => Ok(Type::PpcFp128),
121 |             PartialType::Void => Ok(Type::Void),
122 |             PartialType::Label => Ok(Type::Label),
123 |             PartialType::X86Mmx => Ok(Type::X86Mmx),
124 |             PartialType::X86Amx => Ok(Type::X86Amx),
125 |             PartialType::Token => Ok(Type::Token),
126 |             PartialType::Integer(ity) => Ok(Type::new_integer(ity.bit_width)?),
127 |             PartialType::Function(fty) => {
128 |                 let return_type = partials.resolve(&fty.return_type)?;
129 |                 let param_types = fty
130 |                     .param_types
131 |                     .iter()
132 |                     .map(|ty_ref| partials.resolve(ty_ref))
133 |                     .collect::<Result<Vec<_>, _>>()?;
134 | 
135 |                 Ok(Type::new_function(return_type, param_types, fty.is_vararg)?)
136 |             }
137 |             PartialType::Pointer(pty) => {
138 |                 let pointee = partials.resolve(&pty.pointee)?;
139 | 
140 |                 Ok(Type::new_pointer(pointee, pty.address_space)?)
141 |             }
142 |             PartialType::OpaquePointer(oty) => Ok(Type::OpaquePointer(*oty)),
143 |             PartialType::Struct(sty) => {
144 |                 let field_types = sty
145 |                     .field_types
146 |                     .iter()
147 |                     .map(|fty| partials.resolve(fty))
148 |                     .collect::<Result<Vec<_>, _>>()?;
149 | 
150 |                 Ok(Type::new_struct(
151 |                     sty.name.clone(),
152 |                     field_types,
153 |                     sty.is_packed,
154 |                 )?)
155 |             }
156 |             PartialType::Array(aty) => {
157 |                 let element_type = partials.resolve(&aty.element_type)?;
158 | 
159 |                 Ok(Type::new_array(aty.num_elements, element_type)?)
160 |             }
161 |             PartialType::FixedVector(vty) => {
162 |                 log::debug!("vty: {:?}", vty);
163 | 
164 |                 let element_type = partials.resolve(&vty.element_type)?;
165 |                 log::debug!("element_type: {:?}", partials.get(&vty.element_type));
166 | 
167 |                 Ok(Type::new_vector(vty.num_elements, element_type)?)
168 |             }
169 |             PartialType::ScalableVector(vty) => {
170 |                 let element_type = partials.resolve(&vty.element_type)?;
171 | 
172 |                 Ok(Type::new_scalable_vector(vty.num_elements, element_type)?)
173 |             }
174 |         }
175 |     }
176 | }
177 | 
178 | #[derive(Debug)]
179 | struct PartialIntegerType {
180 |     bit_width: u32,
181 | }
182 | 
183 | /// Represents an (unresolved) function type.
184 | #[derive(Debug)]
185 | struct PartialFunctionType {
186 |     return_type: TypeRef,
187 |     param_types: Vec<TypeRef>,
188 |     is_vararg: bool,
189 | }
190 | 
191 | /// Represents an (unresolved) pointer type.
192 | #[derive(Debug)]
193 | struct PartialPointerType {
194 |     pointee: TypeRef,
195 |     address_space: AddressSpace,
196 | }
197 | 
198 | #[derive(Debug)]
199 | struct PartialStructType {
200 |     name: Option<String>,
201 |     field_types: Vec<TypeRef>,
202 |     is_packed: bool,
203 | }
204 | 
205 | #[derive(Debug)]
206 | struct PartialArrayType {
207 |     num_elements: u64,
208 |     element_type: TypeRef,
209 | }
210 | 
211 | #[derive(Debug)]
212 | struct PartialVectorType {
213 |     num_elements: u64,
214 |     element_type: TypeRef,
215 | }
216 | 
217 | /// Represents a partial type table.
218 | ///
219 | /// Every partial type table starts out empty (but with an expected ultimate size),
220 | /// and is incrementally updated as records within the type block are visited.
221 | #[derive(Debug)]
222 | struct PartialTypeTable {
223 |     numentries: usize,
224 |     inner: Vec<PartialType>,
225 | }
226 | 
227 | impl PartialTypeTable {
228 |     fn new(numentries: usize) -> Self {
229 |         Self {
230 |             numentries: numentries,
231 |             inner: Vec::with_capacity(numentries),
232 |         }
233 |     }
234 | 
235 |     fn add(&mut self, ty: PartialType) {
236 |         self.inner.push(ty)
237 |     }
238 | 
239 |     fn last_mut(&mut self) -> Option<&mut PartialType> {
240 |         self.inner.last_mut()
241 |     }
242 | 
243 |     /// Fallibly convert a `TypeRef` into its `PartialType` in this partial type table.
244 |     fn get(&self, ty_ref: &TypeRef) -> Result<&PartialType, TypeTableError> {
245 |         self.inner
246 |             .get(ty_ref.0)
247 |             .ok_or(TypeTableError::BadIndex(ty_ref.0))
248 |     }
249 | 
250 |     /// Fallibly converts the given `TypeRef` into a fully owned `Type`.
251 |     fn resolve(&self, ty_ref: &TypeRef) -> Result<Type, TypeTableError> {
252 |         // `TypeRef` resolution happens in two steps: we grab the corresponding
253 |         // `PartialType`, and then resolve its subtypes.
254 |         let pty = self.get(ty_ref)?;
255 | 
256 |         log::debug!("type ref {} resolves to {:?}", ty_ref.0, pty);
257 | 
258 |         pty.resolve(self)
259 |     }
260 | 
261 |     /// Fallibly converts this `PartialTypeTable` into a `TypeTable`.
262 |     fn reify(self) -> Result<TypeTable, TypeTableError> {
263 |         if self.inner.len() != self.numentries {
264 |             return Err(TypeTableError::BadSize(self.numentries, self.inner.len()));
265 |         }
266 | 
267 |         // Walk the partial type table, resolving each partial type
268 |         // into a fully owned `Type`.
269 |         let types = self
270 |             .inner
271 |             .iter()
272 |             .map(|pty| pty.resolve(&self))
273 |             .collect::<Result<Vec<_>, _>>()?;
274 | 
275 |         Ok(TypeTable(types))
276 |     }
277 | }
278 | 
279 | /// Models the `TYPE_BLOCK_ID_NEW` block.
280 | #[derive(Clone, Debug)]
281 | pub struct TypeTable(Vec<Type>);
282 | 
283 | impl TypeTable {
284 |     pub(crate) fn get(&self, ty_ref: impl Into<TypeRef>) -> Option<&Type> {
285 |         let ty_ref = ty_ref.into();
286 |         self.0.get(ty_ref.0)
287 |     }
288 | }
289 | 
290 | impl TryFrom<&'_ Block> for TypeTable {
291 |     type Error = TypeTableError;
292 | 
293 |     fn try_from(block: &Block) -> Result<Self, Self::Error> {
294 |         // Figure out how many type entries we have, and reserve the space for them up-front.
295 |         let numentries = *block
296 |             .records
297 |             .one(TypeCode::NumEntry)
298 |             .ok_or(TypeTableError::BadTable)
299 |             .and_then(|r| r.fields().first().ok_or(TypeTableError::BadTable))?
300 |             as usize;
301 | 
302 |         // To map the type table, we perform two passes:
303 |         // 1. We iterate over all type records, building an initial table of "partial"
304 |         //    types that contain only symbolic references to other types.
305 |         //    This pass allows us to fully resolve e.g. forward-declared types
306 |         //    without having to perform a more expensive visiting pass later.
307 |         // 2. We iterate over all of the partial types, resolving them into
308 |         //    fully owned and expanded `Type`s.
309 |         let mut partial_types = PartialTypeTable::new(numentries);
310 |         let mut last_type_name = String::new();
311 |         for record in &block.records {
312 |             // A convenience macro for turning a type record field access into an error on failure.
313 |             macro_rules! type_field {
314 |                 ($n:literal) => {
315 |                     record
316 |                         .fields()
317 |                         .get($n)
318 |                         .copied()
319 |                         .ok_or(TypeTableError::BadTable)?
320 |                 };
321 |             }
322 | 
323 |             let code = TypeCode::try_from(record.code()).map_err(TypeTableError::from)?;
324 | 
325 |             match code {
326 |                 // Already visited; nothing to do.
327 |                 TypeCode::NumEntry => continue,
328 |                 TypeCode::Void => partial_types.add(PartialType::Void),
329 |                 TypeCode::Half => partial_types.add(PartialType::Half),
330 |                 TypeCode::BFloat => partial_types.add(PartialType::BFloat),
331 |                 TypeCode::Float => partial_types.add(PartialType::Float),
332 |                 TypeCode::Double => partial_types.add(PartialType::Double),
333 |                 TypeCode::Label => partial_types.add(PartialType::Label),
334 |                 TypeCode::Opaque => {
335 |                     // NOTE(ww): LLVM's BitcodeReader checks that the
336 |                     // TYPE_CODE_OPAQUE record has exactly one field, but
337 |                     // doesn't seem to use that field for anything.
338 |                     // Not sure what's up with that.
339 | 
340 |                     if last_type_name.is_empty() {
341 |                         return Err(MapError::Invalid(
342 |                             "opaque type but no preceding type name".into(),
343 |                         )
344 |                         .into());
345 |                     }
346 | 
347 |                     // Our opaque type might be forward-referenced. If so, we
348 |                     // fill in the previous type's name. Otherwise, we create
349 |                     // a new structure type with no body.
350 |                     if let Some(PartialType::Struct(s)) = partial_types.last_mut() {
351 |                         if s.name.is_some() {
352 |                             return Err(MapError::Invalid(
353 |                                 "forward-declared opaque type already has name".into(),
354 |                             )
355 |                             .into());
356 |                         }
357 | 
358 |                         s.name = Some(last_type_name.clone());
359 |                     } else {
360 |                         partial_types.add(PartialType::Struct(PartialStructType {
361 |                             name: Some(last_type_name.clone()),
362 |                             field_types: vec![],
363 |                             is_packed: false,
364 |                         }));
365 |                     }
366 | 
367 |                     last_type_name.clear();
368 |                 }
369 |                 TypeCode::Integer => {
370 |                     let bit_width = type_field!(0) as u32;
371 |                     partial_types.add(PartialType::Integer(PartialIntegerType { bit_width }));
372 |                 }
373 |                 TypeCode::Pointer => {
374 |                     let pointee = TypeRef(type_field!(0) as usize);
375 | 
376 |                     let address_space = AddressSpace::try_from(type_field!(1)).map_err(|e| {
377 |                         MapError::Invalid(format!("bad address space for pointer type: {:?}", e))
378 |                     })?;
379 | 
380 |                     partial_types.add(PartialType::Pointer(PartialPointerType {
381 |                         pointee,
382 |                         address_space,
383 |                     }));
384 |                 }
385 |                 TypeCode::FunctionOld => {
386 |                     // TODO(ww): These only show up in older bitcode, so don't bother with them for now.
387 |                     return Err(MapError::Unsupported(
388 |                         "unsupported: old function type codes; please implement!".into(),
389 |                     )
390 |                     .into());
391 |                 }
392 |                 TypeCode::Array => {
393 |                     let num_elements = type_field!(0);
394 | 
395 |                     let element_type = TypeRef(type_field!(1) as usize);
396 | 
397 |                     partial_types.add(PartialType::Array(PartialArrayType {
398 |                         num_elements,
399 |                         element_type,
400 |                     }));
401 |                 }
402 |                 TypeCode::Vector => {
403 |                     let num_elements = type_field!(0);
404 | 
405 |                     let element_type = TypeRef(type_field!(1) as usize);
406 | 
407 |                     // A vector type is either fixed or scalable, depending on the
408 |                     // third field (which can also be absent, indicating fixed).
409 |                     let scalable = record.fields().get(2).map_or_else(|| false, |f| *f > 0);
410 |                     let new_type = match scalable {
411 |                         true => PartialType::ScalableVector(PartialVectorType {
412 |                             num_elements,
413 |                             element_type,
414 |                         }),
415 |                         false => PartialType::FixedVector(PartialVectorType {
416 |                             num_elements,
417 |                             element_type,
418 |                         }),
419 |                     };
420 | 
421 |                     partial_types.add(new_type);
422 |                 }
423 |                 TypeCode::X86Fp80 => partial_types.add(PartialType::X86Fp80),
424 |                 TypeCode::Fp128 => partial_types.add(PartialType::Fp128),
425 |                 TypeCode::PpcFp128 => partial_types.add(PartialType::PpcFp128),
426 |                 TypeCode::Metadata => partial_types.add(PartialType::Metadata),
427 |                 TypeCode::X86Mmx => partial_types.add(PartialType::X86Mmx),
428 |                 TypeCode::StructAnon => {
429 |                     let is_packed = type_field!(0) > 0;
430 | 
431 |                     let field_types = record.fields()[1..]
432 |                         .iter()
433 |                         .map(|f| TypeRef(*f as usize))
434 |                         .collect::<Vec<_>>();
435 | 
436 |                     partial_types.add(PartialType::Struct(PartialStructType {
437 |                         name: None,
438 |                         field_types,
439 |                         is_packed,
440 |                     }));
441 |                 }
442 |                 TypeCode::StructName => {
443 |                     // A `TYPE_CODE_STRUCT_NAME` is not a type in its own right; it merely
444 |                     // supplies the name for a future type record.
445 |                     last_type_name.push_str(&record.try_string(0).map_err(MapError::RecordString)?);
446 |                     continue;
447 |                 }
448 |                 TypeCode::StructNamed => {
449 |                     // TODO(ww): Should probably be deduped with StructAnon above,
450 |                     // since they're 90% identical.
451 | 
452 |                     let is_packed = type_field!(0) > 0;
453 | 
454 |                     let field_types = record.fields()[1..]
455 |                         .iter()
456 |                         .map(|f| TypeRef(*f as usize))
457 |                         .collect::<Vec<_>>();
458 | 
459 |                     // Like with opaque types, we might be forward-referenced here.
460 |                     // If so, we update our pre-existing structure type with its
461 |                     // correct name and fields.
462 |                     if let Some(PartialType::Struct(s)) = partial_types.last_mut() {
463 |                         if s.name.is_some() || !s.field_types.is_empty() {
464 |                             return Err(MapError::Invalid(
465 |                                 "forward-declared struct type already has name and/or type fields"
466 |                                     .into(),
467 |                             )
468 |                             .into());
469 |                         }
470 | 
471 |                         s.name = Some(last_type_name.clone());
472 |                         s.field_types = field_types;
473 |                     } else {
474 |                         partial_types.add(PartialType::Struct(PartialStructType {
475 |                             name: Some(last_type_name.clone()),
476 |                             field_types,
477 |                             is_packed,
478 |                         }));
479 |                     }
480 | 
481 |                     last_type_name.clear();
482 |                 }
483 |                 TypeCode::Function => {
484 |                     let is_vararg = type_field!(0) > 0;
485 |                     let return_type = TypeRef(type_field!(1) as usize);
486 | 
487 |                     let param_types = record.fields()[2..]
488 |                         .iter()
489 |                         .map(|f| TypeRef(*f as usize))
490 |                         .collect::<Vec<_>>();
491 | 
492 |                     partial_types.add(PartialType::Function(PartialFunctionType {
493 |                         return_type,
494 |                         param_types,
495 |                         is_vararg,
496 |                     }));
497 |                 }
498 |                 TypeCode::Token => partial_types.add(PartialType::Token),
499 |                 TypeCode::X86Amx => partial_types.add(PartialType::X86Amx),
500 |                 TypeCode::OpaquePointer => {
501 |                     let address_space = AddressSpace::try_from(type_field!(0)).map_err(|e| {
502 |                         MapError::Invalid(format!("bad address space in type: {:?}", e))
503 |                     })?;
504 | 
505 |                     partial_types.add(PartialType::OpaquePointer(address_space))
506 |                 }
507 |             }
508 |         }
509 | 
510 |         partial_types.reify()
511 |     }
512 | }
513 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/block/vst.rs:
--------------------------------------------------------------------------------
 1 | //! Functionality for mapping `VALUE_SYMTAB_BLOCK_ID` blocks.
 2 | //!
 3 | //! These blocks contain "value symbol tables," which are effectively
 4 | //! mappings between strings and value models (in LLVM, `llvm::Value`s).
 5 | 
 6 | use std::convert::TryFrom;
 7 | 
 8 | use thiserror::Error;
 9 | 
10 | use crate::unroll::Block;
11 | 
12 | /// Errors that can occur when mapping or accessing a VST.
13 | #[derive(Debug, Error)]
14 | pub enum VstError {}
15 | 
16 | /// A ZST representing a "module-style" VST.
17 | ///
18 | /// This is a ZST instead of an enum variant to make dispatch on the "style" of VST
19 | /// being parsed slightly more static and readable.
20 | pub struct ModuleStyleVst;
21 | 
22 | /// A ZST reprsenting a "function-style" VST.
23 | ///
24 | /// See [`ModuleStyleVst`] for the design justification here.
25 | pub struct FunctionStyleVst;
26 | 
27 | /// Represents a single value symbol table ("VST") in a bitcode module.
28 | pub struct Vst {}
29 | 
30 | impl TryFrom<(&'_ Block, ModuleStyleVst)> for Vst {
31 |     type Error = VstError;
32 | 
33 |     fn try_from((_block, _): (&'_ Block, ModuleStyleVst)) -> Result<Self, Self::Error> {
34 |         Ok(Vst {})
35 |         // unimplemented!();
36 |     }
37 | }
38 | 
39 | impl TryFrom<(&'_ Block, FunctionStyleVst)> for Vst {
40 |     type Error = VstError;
41 | 
42 |     fn try_from((_block, _): (&'_ Block, FunctionStyleVst)) -> Result<Self, Self::Error> {
43 |         Ok(Vst {})
44 |         // unimplemented!();
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/error.rs:
--------------------------------------------------------------------------------
 1 | //! Errors for `llvm-mapper`.
 2 | 
 3 | use llvm_bitstream::error::Error as BitstreamError;
 4 | use thiserror::Error as ThisError;
 5 | 
 6 | use crate::block::BlockMapError;
 7 | 
 8 | /// All possible errors that can occur while mapping a bitstream.
 9 | ///
10 | /// The error variants here are deeply nested.
11 | #[non_exhaustive]
12 | #[derive(Debug, ThisError)]
13 | pub enum Error {
14 |     /// We encountered an error while performing the underlying bitstream parse.
15 |     #[error("error while parsing the bitstream")]
16 |     Parse(#[from] BitstreamError),
17 | 
18 |     /// We couldn't unroll the stream because of a structural error.
19 |     #[error("error while unrolling the bitstream: {0}")]
20 |     Unroll(String),
21 | 
22 |     /// We couldn't perform the bitstream map.
23 |     #[error("error while mapping the bitsteam")]
24 |     Map(#[from] BlockMapError),
25 | }
26 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! `llvm-mapper` is a library for mapping entities in LLVM's bitstream
 2 | //! format into higher-level IR and bitcode metadata models.
 3 | 
 4 | #![deny(rustdoc::broken_intra_doc_links)]
 5 | #![deny(missing_docs)]
 6 | #![allow(clippy::redundant_field_names)]
 7 | #![forbid(unsafe_code)]
 8 | 
 9 | pub mod block;
10 | pub mod error;
11 | pub mod map;
12 | pub mod record;
13 | pub mod unroll;
14 | 
15 | pub use unroll::Bitcode;
16 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/map.rs:
--------------------------------------------------------------------------------
  1 | //! Traits for mapping bitstream types to models.
  2 | 
  3 | use thiserror::Error;
  4 | 
  5 | use crate::block::Strtab;
  6 | use crate::block::{AttributeGroups, Attributes, TypeTable};
  7 | use crate::record::{Comdat, DataLayout, RecordStringError};
  8 | use crate::unroll::ConsistencyError;
  9 | 
 10 | /// Generic errors that can occur when mapping.
 11 | #[derive(Debug, Error)]
 12 | pub enum MapError {
 13 |     /// We couldn't map a block, for any number of reasons.
 14 |     #[error("error while mapping block: {0}")]
 15 |     BadBlockMap(String),
 16 | 
 17 |     /// We encountered an inconsistent block or record state.
 18 |     #[error("inconsistent block or record state")]
 19 |     Inconsistent(#[from] ConsistencyError),
 20 | 
 21 |     /// We encountered an unsupported feature or layout.
 22 |     #[error("unsupported: {0}")]
 23 |     Unsupported(String),
 24 | 
 25 |     /// We encountered an invalid state or combination of states.
 26 |     ///
 27 |     /// This variant should be used extremely sparingly.
 28 |     #[error("invalid: {0}")]
 29 |     Invalid(String),
 30 | 
 31 |     /// We couldn't extract a string from a record.
 32 |     #[error("error while extracting string: {0}")]
 33 |     RecordString(#[from] RecordStringError),
 34 | 
 35 |     /// We don't have the appropriate context for a mapping operation.
 36 |     #[error("missing context for mapping")]
 37 |     Context(#[from] MapCtxError),
 38 | }
 39 | 
 40 | /// Errors that can occur when accessing a [`MapCtx`](MapCtx).
 41 | #[derive(Debug, Error)]
 42 | pub enum MapCtxError {
 43 |     /// The version field is needed, but unavailable.
 44 |     #[error("mapping context requires a version for disambiguation, but none is available")]
 45 |     NoVersion,
 46 | 
 47 |     /// The type table is needed, but unavailable.
 48 |     #[error("mapping context requires types, but none are available")]
 49 |     NoTypeTable,
 50 | }
 51 | 
 52 | /// A mushy container for various bits of state that are necessary for
 53 | /// correct block and record mapping in the context of a particular IR module.
 54 | ///
 55 | /// This is the "partial" counterpart to the [`MapCtx`](MapCtx) structure,
 56 | /// which is produced from this structure with a call to [`reify`](PartialMapCtx::reify).
 57 | #[non_exhaustive]
 58 | #[derive(Debug, Default)]
 59 | pub(crate) struct PartialMapCtx {
 60 |     pub(crate) version: Option<u64>,
 61 |     pub(crate) datalayout: DataLayout,
 62 |     pub(crate) section_table: Vec<String>,
 63 |     pub(crate) gc_table: Vec<String>,
 64 |     pub(crate) strtab: Strtab,
 65 |     pub(crate) attribute_groups: AttributeGroups,
 66 |     pub(crate) attributes: Attributes,
 67 |     pub(crate) type_table: Option<TypeTable>,
 68 |     pub(crate) comdats: Vec<Comdat>,
 69 | }
 70 | 
 71 | impl PartialMapCtx {
 72 |     pub(crate) fn reify(&self) -> Result<MapCtx, MapCtxError> {
 73 |         log::debug!("reifying {self:?}");
 74 |         Ok(MapCtx {
 75 |             version: self.version.ok_or(MapCtxError::NoVersion)?,
 76 |             datalayout: &self.datalayout,
 77 |             section_table: &self.section_table,
 78 |             gc_table: &self.gc_table,
 79 |             strtab: &self.strtab,
 80 |             attribute_groups: &self.attribute_groups,
 81 |             attributes: &self.attributes,
 82 |             type_table: self.type_table.as_ref().ok_or(MapCtxError::NoTypeTable)?,
 83 |             comdats: &self.comdats,
 84 |         })
 85 |     }
 86 | 
 87 |     /// A helper function for whether or not to use an associated string table for string lookups.
 88 |     ///
 89 |     /// This corresponds to `MODULE_CODE_VERSION`s of 2 and higher.
 90 |     pub fn use_strtab(&self) -> Result<bool, MapCtxError> {
 91 |         self.version.map(|v| v >= 2).ok_or(MapCtxError::NoVersion)
 92 |     }
 93 | 
 94 |     /// Returns the attribute groups stored in this context, or an error if not available.
 95 |     pub fn attribute_groups(&self) -> &AttributeGroups {
 96 |         &self.attribute_groups
 97 |     }
 98 | }
 99 | 
100 | /// A handle for various bits of state that are necessary for correct block
101 | /// and record mapping in the context of a particular IR module.
102 | ///
103 | /// Block and record mapping operations are expected to update the supplied context,
104 | /// as appropriate.
105 | #[non_exhaustive]
106 | #[derive(Debug)]
107 | pub struct MapCtx<'ctx> {
108 |     /// The `MODULE_CODE_VERSION` for the IR module being mapped.
109 |     pub version: u64,
110 | 
111 |     /// The datalayout specification.
112 |     pub datalayout: &'ctx DataLayout,
113 | 
114 |     /// The section table.
115 |     pub section_table: &'ctx [String],
116 | 
117 |     /// The GC table.
118 |     pub gc_table: &'ctx [String],
119 | 
120 |     /// The string table.
121 |     pub strtab: &'ctx Strtab,
122 | 
123 |     /// Any attribute groups.
124 |     pub attribute_groups: &'ctx AttributeGroups,
125 | 
126 |     /// Any raw attributes.
127 |     pub attributes: &'ctx Attributes,
128 | 
129 |     /// The type table.
130 |     pub type_table: &'ctx TypeTable,
131 | 
132 |     /// The COMDAT list.
133 |     pub comdats: &'ctx [Comdat],
134 |     // TODO(ww): Maybe symtab and identification in here?
135 | }
136 | 
137 | impl MapCtx<'_> {
138 |     /// A helper function for whether or not to use an associated string table for string lookups.
139 |     ///
140 |     /// This corresponds to `MODULE_CODE_VERSION`s of 2 and higher.
141 |     pub fn use_strtab(&self) -> bool {
142 |         self.version >= 2
143 |     }
144 | 
145 |     /// A helper function for determining how operands are encoded.
146 |     ///
147 |     /// This corresponds to `MODULE_CODE_VERSION`s of 1 and higher.
148 |     pub fn use_relative_ids(&self) -> bool {
149 |         self.version >= 1
150 |     }
151 | }
152 | 
153 | /// A trait for mapping some raw `T` into a model type.
154 | ///
155 | /// This trait allows an implementer to modify the given [`PartialMapCtx`](PartialMapCtx),
156 | /// filling it in with state before it's reified into a "real" [`MapCtx`](MapCtx).
157 | ///
158 | /// This two-stage process is designed to limit the number of invalid
159 | /// states that a `MapCtx` can be in, and to enable better lifetimes
160 | /// later in the IR module mapping process.
161 | pub(crate) trait PartialCtxMappable<T>: Sized {
162 |     type Error;
163 | 
164 |     /// Attempt to map `T` into `Self` using the given [`PartialMapCtx`](PartialMapCtx).
165 |     fn try_map(raw: &T, ctx: &mut PartialMapCtx) -> Result<Self, Self::Error>;
166 | }
167 | 
168 | /// A trait for mapping some raw `T` into a model type.
169 | ///
170 | /// Implementing this trait is *almost* always preferable over
171 | /// [`PartialCtxMappable`](PartialCtxMappable) -- the former should really only
172 | /// be used when a mapping implementation **absolutely** must modify its
173 | /// [`MapCtx`](MapCtx), which should only happen early in IR module parsing.
174 | pub(crate) trait CtxMappable<'ctx, T>: Sized {
175 |     type Error;
176 | 
177 |     /// Attempt to map `T` into `Self` using the given [`MapCtx`](MapCtx).
178 |     fn try_map(raw: &T, ctx: &'ctx MapCtx) -> Result<Self, Self::Error>;
179 | }
180 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/record/alias.rs:
--------------------------------------------------------------------------------
  1 | //! Functionality for mapping the `MODULE_CODE_ALIAS` record.
  2 | 
  3 | use std::convert::TryFrom;
  4 | 
  5 | use llvm_support::{
  6 |     DllStorageClass, Linkage, RuntimePreemption, ThreadLocalMode, Type, UnnamedAddr, Visibility,
  7 | };
  8 | use num_enum::TryFromPrimitiveError;
  9 | use thiserror::Error;
 10 | 
 11 | use crate::map::{CtxMappable, MapCtx};
 12 | use crate::record::StrtabError;
 13 | use crate::unroll::Record;
 14 | 
 15 | /// Errors that can occur while mapping an alias record.
 16 | #[derive(Debug, Error)]
 17 | pub enum AliasError {
 18 |     /// The alias record is too short to be well-formed.
 19 |     #[error("alias record too short: {0} < 5 fields")]
 20 |     TooShort(usize),
 21 | 
 22 |     /// The alias record is in an old unsupported format.
 23 |     #[error("unsupported alias record format (v1)")]
 24 |     V1Unsupported,
 25 | 
 26 |     /// Retrieving a string from a string table failed.
 27 |     #[error("error while accessing string table")]
 28 |     Strtab(#[from] StrtabError),
 29 | 
 30 |     /// The alias has a bad or unknown type.
 31 |     #[error("invalid type table index: {0}")]
 32 |     Type(u64),
 33 | 
 34 |     /// The alias has an invalid visibility.
 35 |     #[error("invalid visibility")]
 36 |     Visibility(#[from] TryFromPrimitiveError<Visibility>),
 37 | 
 38 |     /// The alias has an invalid DLL storage class.
 39 |     #[error("invalid storage class")]
 40 |     DllStorageClass(#[from] TryFromPrimitiveError<DllStorageClass>),
 41 | }
 42 | 
 43 | /// Models the `MODULE_CODE_ALIAS` record.
 44 | #[derive(Debug)]
 45 | pub struct Alias<'ctx> {
 46 |     /// The alias's name.
 47 |     pub name: &'ctx str,
 48 | 
 49 |     /// The alias's type.
 50 |     pub ty: &'ctx Type,
 51 | 
 52 |     /// The aliasee value index.
 53 |     pub value_index: u64,
 54 | 
 55 |     /// The alias's linkage.
 56 |     pub linkage: Linkage,
 57 | 
 58 |     /// The alias's visibility.
 59 |     pub visibility: Visibility,
 60 | 
 61 |     /// The alias's storage class.
 62 |     pub storage_class: DllStorageClass,
 63 | 
 64 |     /// The alias's thread local storage mode.
 65 |     pub tls_mode: ThreadLocalMode,
 66 | 
 67 |     /// The alias's `unnamed_addr` specifier.
 68 |     pub unnamed_addr: UnnamedAddr,
 69 | 
 70 |     /// The alias's preemption specifier.
 71 |     pub preemption_specifier: RuntimePreemption,
 72 | }
 73 | 
 74 | impl<'ctx> CtxMappable<'ctx, Record> for Alias<'ctx> {
 75 |     type Error = AliasError;
 76 | 
 77 |     fn try_map(record: &Record, ctx: &'ctx MapCtx) -> Result<Self, Self::Error> {
 78 |         let fields = record.fields();
 79 | 
 80 |         if !ctx.use_strtab() {
 81 |             return Err(AliasError::V1Unsupported);
 82 |         }
 83 | 
 84 |         // Every alias record has at least 5 fields, corresponding to
 85 |         // [strtab_offset, strtab_size, *v1], where v1 has 3 mandatory fields:
 86 |         // [alias type, aliasee value#, linkage, ...]
 87 |         if fields.len() < 5 {
 88 |             return Err(AliasError::TooShort(fields.len()));
 89 |         }
 90 | 
 91 |         let name = ctx.strtab.read_name(record)?;
 92 |         let ty = ctx
 93 |             .type_table
 94 |             .get(fields[2])
 95 |             .ok_or(AliasError::Type(fields[2]))?;
 96 |         let value_index = fields[3];
 97 |         let linkage = Linkage::from(fields[4]);
 98 | 
 99 |         let visibility = fields
100 |             .get(5)
101 |             .map_or_else(|| Ok(Visibility::Default), |v| Visibility::try_from(*v))?;
102 | 
103 |         let storage_class = fields.get(6).map_or_else(
104 |             || Ok(DllStorageClass::Default),
105 |             |v| DllStorageClass::try_from(*v),
106 |         )?;
107 | 
108 |         let tls_mode = fields
109 |             .get(7)
110 |             .copied()
111 |             .map(ThreadLocalMode::from)
112 |             .unwrap_or(ThreadLocalMode::NotThreadLocal);
113 | 
114 |         let unnamed_addr = fields
115 |             .get(8)
116 |             .copied()
117 |             .map(UnnamedAddr::from)
118 |             .unwrap_or(UnnamedAddr::None);
119 | 
120 |         let preemption_specifier = fields
121 |             .get(9)
122 |             .copied()
123 |             .map(RuntimePreemption::from)
124 |             .unwrap_or(RuntimePreemption::DsoPreemptable);
125 | 
126 |         Ok(Alias {
127 |             name,
128 |             ty,
129 |             value_index,
130 |             linkage,
131 |             visibility,
132 |             storage_class,
133 |             tls_mode,
134 |             unnamed_addr,
135 |             preemption_specifier,
136 |         })
137 |     }
138 | }
139 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/record/comdat.rs:
--------------------------------------------------------------------------------
 1 | //! Functionality for mapping the `MODULE_CODE_COMDAT` record.
 2 | 
 3 | use std::convert::TryInto;
 4 | 
 5 | use llvm_support::StrtabRef;
 6 | use num_enum::{TryFromPrimitive, TryFromPrimitiveError};
 7 | use thiserror::Error;
 8 | 
 9 | use crate::block::strtab::StrtabError;
10 | use crate::map::{MapError, PartialCtxMappable, PartialMapCtx};
11 | use crate::unroll::Record;
12 | 
13 | /// Errors that can occur when mapping a COMDAT record.
14 | #[non_exhaustive]
15 | #[derive(Debug, Error)]
16 | pub enum ComdatError {
17 |     /// The COMDAT record is in an old unsupported format.
18 |     #[error("unsupported COMDAT record format (v1)")]
19 |     V1Unsupported,
20 | 
21 |     /// The COMDAT record is too short.
22 |     #[error("COMDAT record doesn't have enough fields ({0} < 3)")]
23 |     TooShort(usize),
24 | 
25 |     /// We couldn't get the COMDAT's name from the string table.
26 |     #[error("error while accessing COMDAT name: {0}")]
27 |     Name(#[from] StrtabError),
28 | 
29 |     /// The COMDAT's selection kind is invalid or unknown.
30 |     #[error("unknown or invalid COMDAT selection kind: {0}")]
31 |     SelectionKind(#[from] TryFromPrimitiveError<SelectionKind>),
32 | 
33 |     /// A generic mapping error occured.
34 |     #[error("mapping error in comdat list")]
35 |     Map(#[from] MapError),
36 | }
37 | 
38 | /// The different kinds of COMDAT selections.
39 | ///
40 | /// This is a nearly direct copy of LLVM's `SelectionKind`; see `IR/Comdat.h`.
41 | #[non_exhaustive]
42 | #[derive(Debug, TryFromPrimitive)]
43 | #[repr(u64)]
44 | pub enum SelectionKind {
45 |     /// The linker may choose any COMDAT.
46 |     Any,
47 |     /// The data referenced by the COMDAT must be the same.
48 |     ExactMatch,
49 |     /// The linker will choose the largest COMDAT.
50 |     Largest,
51 |     /// No deduplication is performed.
52 |     NoDeduplicate,
53 |     /// The data referenced by the COMDAT must be the same size.
54 |     SameSize,
55 | }
56 | 
57 | /// Models the `MODULE_CODE_COMDAT` record.
58 | #[non_exhaustive]
59 | #[derive(Debug)]
60 | pub struct Comdat {
61 |     /// The selection kind for this COMDAT.
62 |     pub selection_kind: SelectionKind,
63 |     /// The COMDAT key.
64 |     pub name: String,
65 | }
66 | 
67 | impl PartialCtxMappable<Record> for Comdat {
68 |     type Error = ComdatError;
69 | 
70 |     fn try_map(record: &Record, ctx: &mut PartialMapCtx) -> Result<Self, Self::Error> {
71 |         if !ctx.use_strtab().map_err(MapError::Context)? {
72 |             return Err(ComdatError::V1Unsupported);
73 |         }
74 | 
75 |         // v2: [strtab offset, strtab size, selection kind]
76 |         if record.fields().len() != 3 {
77 |             return Err(ComdatError::TooShort(record.fields().len()));
78 |         }
79 | 
80 |         // Index safety: we check for at least 3 fields above.
81 |         let name = {
82 |             let sref: StrtabRef = (record.fields()[0], record.fields()[1]).into();
83 |             ctx.strtab.try_get(&sref)?.into()
84 |         };
85 |         let selection_kind: SelectionKind = record.fields()[2].try_into()?;
86 | 
87 |         Ok(Self {
88 |             selection_kind,
89 |             name: name,
90 |         })
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/record/datalayout.rs:
--------------------------------------------------------------------------------
  1 | //! Functionality for mapping the `MODULE_CODE_DATALAYOUT` record.
  2 | 
  3 | use std::convert::{TryFrom, TryInto};
  4 | use std::num::ParseIntError;
  5 | use std::str::FromStr;
  6 | 
  7 | use llvm_support::{
  8 |     AddressSpace, AddressSpaceError, Align, AlignError, AlignSpecError, Endian,
  9 |     FunctionPointerAlign, Mangling, PointerAlignSpec, PointerAlignSpecs, TypeAlignSpec,
 10 |     TypeAlignSpecs,
 11 | };
 12 | use thiserror::Error;
 13 | 
 14 | use crate::map::{PartialCtxMappable, PartialMapCtx};
 15 | use crate::record::RecordStringError;
 16 | use crate::unroll::Record;
 17 | 
 18 | /// Potential errors when parsing an LLVM datalayout string.
 19 | #[derive(Debug, Error)]
 20 | pub enum DataLayoutError {
 21 |     /// The datalayout string can't be extracted from the record.
 22 |     #[error("malformed datalayout record: {0}")]
 23 |     BadString(#[from] RecordStringError),
 24 |     /// The specified alignment is invalid.
 25 |     #[error("bad alignment value: {0}")]
 26 |     BadAlign(#[from] AlignError),
 27 |     /// The specified address space is invalid.
 28 |     #[error("bad address space")]
 29 |     BadAddressSpace(#[from] AddressSpaceError),
 30 |     /// An unknown specification was encountered.
 31 |     #[error("unknown datalayout specification: {0}")]
 32 |     UnknownSpec(char),
 33 |     /// An empty specification was encountered.
 34 |     #[error("empty specification in datalayout")]
 35 |     EmptySpec,
 36 |     /// The datalayout string isn't in ASCII.
 37 |     #[error("non-ASCII characters in datalayout string")]
 38 |     BadEncoding,
 39 |     /// We couldn't parse a field as an integer.
 40 |     #[error("couldn't parse spec field: {0}")]
 41 |     BadInt(#[from] ParseIntError),
 42 |     /// We couldn't parse an individual spec, for some reason.
 43 |     #[error("couldn't parse spec: {0}")]
 44 |     BadSpecParse(String),
 45 |     /// We couldn't parse an alignment spec.
 46 |     #[error("cou't parse alignment spec: {0}")]
 47 |     BadAlignSpec(#[from] AlignSpecError),
 48 | }
 49 | 
 50 | /// Models the `MODULE_CODE_DATALAYOUT` record.
 51 | #[non_exhaustive]
 52 | #[derive(Debug)]
 53 | pub struct DataLayout {
 54 |     /// The endianness of the target.
 55 |     pub endianness: Endian,
 56 |     /// The target's natural stack alignment, if present.
 57 |     pub natural_stack_alignment: Option<Align>,
 58 |     /// The address space for program memory.
 59 |     pub program_address_space: AddressSpace,
 60 |     /// The address space for global variables.
 61 |     pub global_variable_address_space: AddressSpace,
 62 |     /// The address space for objects created by `alloca`.
 63 |     pub alloca_address_space: AddressSpace,
 64 |     /// Non-pointer type alignment specifications for the target.
 65 |     pub type_alignments: TypeAlignSpecs,
 66 |     /// Pointer alignment specifications for the target.
 67 |     pub pointer_alignments: PointerAlignSpecs,
 68 |     /// Aggregate alignment for the target.
 69 |     pub aggregate_alignment: Align,
 70 |     /// Function pointer alignment for the target, if present.
 71 |     pub function_pointer_alignment: Option<FunctionPointerAlign>,
 72 |     /// The target's symbol mangling discipline, if present.
 73 |     pub mangling: Option<Mangling>,
 74 |     /// A list of integer widths (in bits) that are efficiently supported by the target.
 75 |     pub native_integer_widths: Vec<u32>,
 76 |     /// A list of address spaces that use non-integral pointers.
 77 |     pub non_integral_address_spaces: Vec<AddressSpace>,
 78 | }
 79 | 
 80 | impl Default for DataLayout {
 81 |     fn default() -> Self {
 82 |         Self {
 83 |             endianness: Endian::Big,
 84 |             natural_stack_alignment: None,
 85 |             program_address_space: Default::default(),
 86 |             global_variable_address_space: Default::default(),
 87 |             alloca_address_space: Default::default(),
 88 |             type_alignments: TypeAlignSpecs::default(),
 89 |             pointer_alignments: PointerAlignSpecs::default(),
 90 |             aggregate_alignment: Align::ALIGN8,
 91 |             function_pointer_alignment: None,
 92 |             mangling: None,
 93 |             native_integer_widths: vec![],
 94 |             non_integral_address_spaces: vec![],
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | impl FromStr for DataLayout {
100 |     type Err = DataLayoutError;
101 | 
102 |     fn from_str(value: &str) -> Result<Self, Self::Err> {
103 |         if !value.is_ascii() {
104 |             return Err(DataLayoutError::BadEncoding);
105 |         }
106 | 
107 |         let mut datalayout = Self::default();
108 |         for spec in value.split('-') {
109 |             if spec.is_empty() {
110 |                 return Err(DataLayoutError::EmptySpec);
111 |             }
112 | 
113 |             let body = &spec[1..];
114 | 
115 |             // Unwrap safety: we check for a nonempty spec above.
116 |             #[allow(clippy::unwrap_used)]
117 |             match spec.chars().next().unwrap() {
118 |                 'e' => datalayout.endianness = Endian::Little,
119 |                 'E' => datalayout.endianness = Endian::Big,
120 |                 'S' => {
121 |                     datalayout.natural_stack_alignment =
122 |                         Some(Align::from_bit_align(body.parse::<u64>()?)?);
123 |                 }
124 |                 'P' => {
125 |                     datalayout.program_address_space = body.parse::<u32>()?.try_into()?;
126 |                 }
127 |                 'G' => {
128 |                     datalayout.global_variable_address_space = body.parse::<u32>()?.try_into()?;
129 |                 }
130 |                 'A' => {
131 |                     datalayout.alloca_address_space = body.parse::<u32>()?.try_into()?;
132 |                 }
133 |                 'p' => {
134 |                     // Pass the entire spec in here, since we need the spec identifier as well.
135 |                     let align_spec = spec.parse::<PointerAlignSpec>()?;
136 |                     datalayout.pointer_alignments.update(align_spec);
137 |                 }
138 |                 'i' | 'v' | 'f' | 'a' => {
139 |                     // Pass the entire spec in here, since we need the spec identifier as well.
140 |                     let align_spec = spec.parse::<TypeAlignSpec>()?;
141 |                     datalayout.type_alignments.update(align_spec);
142 |                 }
143 |                 'F' => match body.chars().next() {
144 |                     Some(id) => {
145 |                         let align = Align::from_bit_align(body[1..].parse::<u64>()?)?;
146 |                         let align = match id {
147 |                             'i' => FunctionPointerAlign::Independent {
148 |                                 abi_alignment: align,
149 |                             },
150 |                             'n' => FunctionPointerAlign::MultipleOfFunctionAlign {
151 |                                 abi_alignment: align,
152 |                             },
153 |                             o => {
154 |                                 return Err(DataLayoutError::BadSpecParse(format!(
155 |                                     "unknown function pointer alignment specifier: {}",
156 |                                     o
157 |                                 )))
158 |                             }
159 |                         };
160 |                         datalayout.function_pointer_alignment = Some(align);
161 |                     }
162 |                     None => {
163 |                         return Err(DataLayoutError::BadSpecParse(
164 |                             "function pointer alignment spec is empty".into(),
165 |                         ))
166 |                     }
167 |                 },
168 |                 'm' => {
169 |                     // The mangling spec is `m:X`, where `X` is the mangling kind.
170 |                     // We've already parsed `m`, so we expect exactly two characters.
171 |                     let mut mangling = body.chars().take(2);
172 |                     match mangling.next() {
173 |                         Some(':') => {}
174 |                         Some(u) => {
175 |                             return Err(DataLayoutError::BadSpecParse(format!(
176 |                                 "bad separator for mangling spec: {}",
177 |                                 u
178 |                             )))
179 |                         }
180 |                         None => {
181 |                             return Err(DataLayoutError::BadSpecParse(
182 |                                 "mangling spec is empty".into(),
183 |                             ))
184 |                         }
185 |                     }
186 | 
187 |                     // TODO(ww): This could be FromStr on Mangling.
188 |                     let kind = match mangling.next() {
189 |                         None => {
190 |                             return Err(DataLayoutError::BadSpecParse(
191 |                                 "mangling spec has no mangling kind".into(),
192 |                             ))
193 |                         }
194 |                         Some('e') => Mangling::Elf,
195 |                         Some('m') => Mangling::Mips,
196 |                         Some('o') => Mangling::Macho,
197 |                         Some('x') => Mangling::WindowsX86Coff,
198 |                         Some('w') => Mangling::WindowsCoff,
199 |                         Some('a') => Mangling::XCoff,
200 |                         Some(u) => {
201 |                             return Err(DataLayoutError::BadSpecParse(format!(
202 |                                 "unknown mangling kind in spec: {}",
203 |                                 u
204 |                             )))
205 |                         }
206 |                     };
207 | 
208 |                     datalayout.mangling = Some(kind);
209 |                 }
210 |                 'n' => {
211 |                     // 'n' marks the start of either an 'n' or an 'ni' block.
212 |                     match body.chars().next() {
213 |                         Some('i') => {
214 |                             if body.len() <= 1 {
215 |                                 return Err(DataLayoutError::BadSpecParse(
216 |                                     "cannot find address space 0".into(),
217 |                                 ));
218 |                             }
219 |                             datalayout.non_integral_address_spaces = body[2..]
220 |                                 .split(':')
221 |                                 .map(|s| {
222 |                                     s.parse::<u32>()
223 |                                         .map_err(DataLayoutError::from)
224 |                                         .and_then(|a| AddressSpace::try_from(a).map_err(Into::into))
225 |                                         .and_then(|a| {
226 |                                             if a == AddressSpace::default() {
227 |                                                 Err(DataLayoutError::BadSpecParse(
228 |                                                     "address space 0 cannot be non-integral".into(),
229 |                                                 ))
230 |                                             } else {
231 |                                                 Ok(a)
232 |                                             }
233 |                                         })
234 |                                 })
235 |                                 .collect::<Result<_, _>>()?
236 |                         }
237 |                         Some(_) => {
238 |                             datalayout.native_integer_widths = body
239 |                                 .split(':')
240 |                                 .map(|s| s.parse::<u32>())
241 |                                 .collect::<Result<_, _>>()?;
242 |                         }
243 |                         None => {
244 |                             return Err(DataLayoutError::BadSpecParse(
245 |                                 "integer width spec is empty".into(),
246 |                             ))
247 |                         }
248 |                     }
249 |                 }
250 |                 u => return Err(DataLayoutError::UnknownSpec(u)),
251 |             }
252 |         }
253 | 
254 |         Ok(datalayout)
255 |     }
256 | }
257 | 
258 | impl PartialCtxMappable<Record> for DataLayout {
259 |     type Error = DataLayoutError;
260 | 
261 |     fn try_map(record: &Record, _ctx: &mut PartialMapCtx) -> Result<Self, Self::Error> {
262 |         let datalayout = record.try_string(0)?;
263 |         datalayout.parse::<Self>()
264 |     }
265 | }
266 | 
267 | #[cfg(test)]
268 | mod tests {
269 |     use super::*;
270 | 
271 |     #[test]
272 |     fn test_datalayout_has_defaults() {
273 |         let dl = DataLayout::default();
274 | 
275 |         assert_eq!(dl.type_alignments, TypeAlignSpecs::default());
276 |         assert_eq!(dl.pointer_alignments, PointerAlignSpecs::default());
277 |     }
278 | 
279 |     #[test]
280 |     fn test_datalayout_parses() {
281 |         {
282 |             assert_eq!(
283 |                 "not ascii ¬∫˙˚√∂∆˙√ß"
284 |                     .parse::<DataLayout>()
285 |                     .unwrap_err()
286 |                     .to_string(),
287 |                 "non-ASCII characters in datalayout string"
288 |             );
289 | 
290 |             assert_eq!(
291 |                 "z".parse::<DataLayout>().unwrap_err().to_string(),
292 |                 "unknown datalayout specification: z"
293 |             );
294 |         }
295 | 
296 |         {
297 |             let dl = "E-S64".parse::<DataLayout>().unwrap();
298 | 
299 |             assert_eq!(dl.endianness, Endian::Big);
300 |             assert_eq!(dl.natural_stack_alignment.unwrap().byte_align(), 8);
301 |             assert!(dl.mangling.is_none());
302 |         }
303 | 
304 |         {
305 |             let dl = "e-S32".parse::<DataLayout>().unwrap();
306 | 
307 |             assert_eq!(dl.endianness, Endian::Little);
308 |             assert_eq!(dl.natural_stack_alignment.unwrap().byte_align(), 4);
309 |         }
310 | 
311 |         {
312 |             let dl = "m:e".parse::<DataLayout>().unwrap();
313 | 
314 |             assert_eq!(dl.mangling, Some(Mangling::Elf));
315 |         }
316 | 
317 |         {
318 |             assert_eq!(
319 |                 "m".parse::<DataLayout>().unwrap_err().to_string(),
320 |                 "couldn't parse spec: mangling spec is empty"
321 |             );
322 | 
323 |             assert_eq!(
324 |                 "m:".parse::<DataLayout>().unwrap_err().to_string(),
325 |                 "couldn't parse spec: mangling spec has no mangling kind"
326 |             );
327 | 
328 |             assert_eq!(
329 |                 "m:?".parse::<DataLayout>().unwrap_err().to_string(),
330 |                 "couldn't parse spec: unknown mangling kind in spec: ?"
331 |             );
332 |         }
333 | 
334 |         {
335 |             let dl = "Fi64".parse::<DataLayout>().unwrap();
336 | 
337 |             assert_eq!(
338 |                 dl.function_pointer_alignment,
339 |                 Some(FunctionPointerAlign::Independent {
340 |                     abi_alignment: Align::ALIGN64
341 |                 })
342 |             );
343 |         }
344 | 
345 |         {
346 |             let dl = "Fn8".parse::<DataLayout>().unwrap();
347 | 
348 |             assert_eq!(
349 |                 dl.function_pointer_alignment,
350 |                 Some(FunctionPointerAlign::MultipleOfFunctionAlign {
351 |                     abi_alignment: Align::ALIGN8
352 |                 })
353 |             );
354 |         }
355 | 
356 |         {
357 |             assert_eq!(
358 |                 "F".parse::<DataLayout>().unwrap_err().to_string(),
359 |                 "couldn't parse spec: function pointer alignment spec is empty"
360 |             );
361 | 
362 |             assert_eq!(
363 |                 "Fn".parse::<DataLayout>().unwrap_err().to_string(),
364 |                 "couldn't parse spec field: cannot parse integer from empty string"
365 |             );
366 | 
367 |             assert_eq!(
368 |                 "Fn123".parse::<DataLayout>().unwrap_err().to_string(),
369 |                 "bad alignment value: supplied value is not a multiple of 8: 123"
370 |             );
371 | 
372 |             assert_eq!(
373 |                 "F?64".parse::<DataLayout>().unwrap_err().to_string(),
374 |                 "couldn't parse spec: unknown function pointer alignment specifier: ?"
375 |             );
376 |         }
377 | 
378 |         {
379 |             let dl = "n8:16:32:64".parse::<DataLayout>().unwrap();
380 | 
381 |             assert_eq!(dl.native_integer_widths, vec![8, 16, 32, 64]);
382 |         }
383 | 
384 |         {
385 |             let dl = "n64".parse::<DataLayout>().unwrap();
386 | 
387 |             assert_eq!(dl.native_integer_widths, vec![64]);
388 |         }
389 | 
390 |         {
391 |             assert_eq!(
392 |                 "n".parse::<DataLayout>().unwrap_err().to_string(),
393 |                 "couldn't parse spec: integer width spec is empty"
394 |             );
395 | 
396 |             assert_eq!(
397 |                 "nx".parse::<DataLayout>().unwrap_err().to_string(),
398 |                 "couldn't parse spec field: invalid digit found in string"
399 |             );
400 | 
401 |             assert_eq!(
402 |                 "n:".parse::<DataLayout>().unwrap_err().to_string(),
403 |                 "couldn't parse spec field: cannot parse integer from empty string"
404 |             );
405 | 
406 |             assert_eq!(
407 |                 "n8:".parse::<DataLayout>().unwrap_err().to_string(),
408 |                 "couldn't parse spec field: cannot parse integer from empty string"
409 |             );
410 |         }
411 | 
412 |         {
413 |             let dl = "ni:1:10:20".parse::<DataLayout>().unwrap();
414 | 
415 |             assert_eq!(
416 |                 dl.non_integral_address_spaces,
417 |                 vec![
418 |                     AddressSpace::try_from(1_u32).unwrap(),
419 |                     AddressSpace::try_from(10_u32).unwrap(),
420 |                     AddressSpace::try_from(20_u32).unwrap()
421 |                 ]
422 |             );
423 |         }
424 | 
425 |         {
426 |             let dl = "ni:1".parse::<DataLayout>().unwrap();
427 | 
428 |             assert_eq!(
429 |                 dl.non_integral_address_spaces,
430 |                 vec![AddressSpace::try_from(1_u32).unwrap(),]
431 |             );
432 |         }
433 | 
434 |         {
435 |             assert_eq!(
436 |                 "ni".parse::<DataLayout>().unwrap_err().to_string(),
437 |                 "couldn't parse spec: cannot find address space 0"
438 |             );
439 | 
440 |             assert_eq!(
441 |                 "ni0".parse::<DataLayout>().unwrap_err().to_string(),
442 |                 "couldn't parse spec field: cannot parse integer from empty string"
443 |             );
444 |         }
445 |     }
446 | }
447 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/record/function.rs:
--------------------------------------------------------------------------------
  1 | //! Functionality for mapping the `MODULE_CODE_FUNCTION` record.
  2 | 
  3 | use std::convert::TryFrom;
  4 | 
  5 | use llvm_support::{
  6 |     AlignError, CallingConvention, DllStorageClass, FunctionType, Linkage, MaybeAlign, Type,
  7 |     UnnamedAddr, Visibility,
  8 | };
  9 | use num_enum::TryFromPrimitiveError;
 10 | use thiserror::Error;
 11 | 
 12 | use crate::block::attributes::AttributeEntry;
 13 | use crate::map::{CtxMappable, MapCtx};
 14 | use crate::record::StrtabError;
 15 | use crate::unroll::Record;
 16 | 
 17 | /// Errors that can occur when mapping a function record.
 18 | #[derive(Debug, Error)]
 19 | pub enum FunctionError {
 20 |     /// The function record is too short to be well-formed.
 21 |     #[error("function record too short: {0} < 10 fields")]
 22 |     TooShort(usize),
 23 | 
 24 |     /// The function record is in an old unsupported format.
 25 |     #[error("unsupported function record format (v1)")]
 26 |     V1Unsupported,
 27 | 
 28 |     /// Retrieving a string from a string table failed.
 29 |     #[error("error while accessing string table")]
 30 |     Strtab(#[from] StrtabError),
 31 | 
 32 |     /// This function has an unknown calling convention.
 33 |     #[error("unknown calling convention")]
 34 |     CallingConvention(#[from] TryFromPrimitiveError<CallingConvention>),
 35 | 
 36 |     /// The function has a bad or unknown type ID.
 37 |     #[error("invalid type table index: {0}")]
 38 |     TypeId(u64),
 39 | 
 40 |     /// The function has a non-function type.
 41 |     #[error("non-function type for function")]
 42 |     Type,
 43 | 
 44 |     /// The function has an invalid attribute entry ID.
 45 |     #[error("invalid attribute entry ID: {0}")]
 46 |     Attribute(u64),
 47 | 
 48 |     /// The function has an invalid alignment.
 49 |     #[error("invalid alignment")]
 50 |     Alignment(#[from] AlignError),
 51 | 
 52 |     /// The function has an invalid section table index.
 53 |     #[error("invalid section table index: {0}")]
 54 |     Section(usize),
 55 | 
 56 |     /// The function has an invalid visibility.
 57 |     #[error("invalid visibility")]
 58 |     Visibility(#[from] TryFromPrimitiveError<Visibility>),
 59 | 
 60 |     /// The function has an invalid GC table index.
 61 |     #[error("invalid GC table index: {0}")]
 62 |     Gc(usize),
 63 | 
 64 |     /// The function has an invalid DLL storage class.
 65 |     #[error("invalid storage class")]
 66 |     DllStorageClass(#[from] TryFromPrimitiveError<DllStorageClass>),
 67 | }
 68 | 
 69 | /// Models the `MODULE_CODE_FUNCTION` record.
 70 | #[non_exhaustive]
 71 | #[derive(Debug)]
 72 | pub struct Function<'ctx> {
 73 |     /// The function's name.
 74 |     pub name: &'ctx str,
 75 | 
 76 |     /// A reference to the function's type in the type table.
 77 |     pub ty: &'ctx FunctionType,
 78 | 
 79 |     /// The function's calling convention.
 80 |     pub calling_convention: CallingConvention,
 81 | 
 82 |     /// Whether the function is a declaration, or a full definition (with body).
 83 |     pub is_declaration: bool,
 84 | 
 85 |     /// The function's linkage.
 86 |     pub linkage: Linkage,
 87 | 
 88 |     /// The function's attributes, if it has any.
 89 |     pub attributes: Option<&'ctx AttributeEntry>,
 90 | 
 91 |     /// The function's alignment.
 92 |     pub alignment: MaybeAlign,
 93 | 
 94 |     /// The function's custom section, if it has one.
 95 |     pub section: Option<&'ctx str>,
 96 | 
 97 |     /// The function's visibility.
 98 |     pub visibility: Visibility,
 99 | 
100 |     /// The function's garbage collector, if it has one.
101 |     pub gc_name: Option<&'ctx str>,
102 | 
103 |     /// The function's `unnamed_addr` specifier.
104 |     pub unnamed_addr: UnnamedAddr,
105 | 
106 |     /// The function's DLL storage class.
107 |     pub storage_class: DllStorageClass,
108 | }
109 | 
110 | impl<'ctx> CtxMappable<'ctx, Record> for Function<'ctx> {
111 |     type Error = FunctionError;
112 | 
113 |     fn try_map(record: &Record, ctx: &'ctx MapCtx) -> Result<Self, Self::Error> {
114 |         let fields = record.fields();
115 | 
116 |         if !ctx.use_strtab() {
117 |             return Err(FunctionError::V1Unsupported);
118 |         }
119 | 
120 |         // Every function record has at least 10 fields, corresponding to
121 |         // [strtab_offset, strtab_size, *v1], where v1 has 8 mandatory fields:
122 |         // [type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, ...]
123 |         if fields.len() < 10 {
124 |             return Err(FunctionError::TooShort(fields.len()));
125 |         }
126 | 
127 |         let name = ctx.strtab.read_name(record)?;
128 |         let Type::Function(ty) = ctx
129 |             .type_table
130 |             .get(fields[2])
131 |             .ok_or(FunctionError::TypeId(fields[2]))? else {
132 |                 return Err(FunctionError::Type);
133 |             };
134 |         let calling_convention = CallingConvention::try_from(fields[3])?;
135 |         let is_declaration = fields[4] != 0;
136 |         let linkage = Linkage::from(fields[5]);
137 | 
138 |         let attributes = {
139 |             let paramattr = fields[6];
140 |             // An ID of 0 is a special sentinel for no attributes,
141 |             // so any nonzero ID is a 1-based index.
142 |             if paramattr == 0 {
143 |                 None
144 |             } else {
145 |                 // NOTE(ww): This is more conservative than LLVM: LLVM treats an
146 |                 // unknown attribute ID as an empty set of attributes,
147 |                 // rather than a hard failure.
148 |                 Some(
149 |                     ctx.attributes
150 |                         .get(paramattr - 1)
151 |                         .ok_or(FunctionError::Attribute(paramattr))?,
152 |                 )
153 |             }
154 |         };
155 | 
156 |         // TODO: Upgrade attributes here? It's what LLVM does.
157 | 
158 |         let alignment = MaybeAlign::try_from(fields[7] as u8)?;
159 | 
160 |         let section = match fields[8] as usize {
161 |             0 => None,
162 |             idx => Some(
163 |                 ctx.section_table
164 |                     .get(idx - 1)
165 |                     .map(AsRef::as_ref)
166 |                     .ok_or(FunctionError::Section(idx - 1))?,
167 |             ),
168 |         };
169 | 
170 |         let visibility = Visibility::try_from(fields[9])?;
171 | 
172 |         // From here, all fields are optional and need to be guarded as such.
173 | 
174 |         let gc_name = fields
175 |             .get(10)
176 |             .and_then(|idx| match *idx as usize {
177 |                 0 => None,
178 |                 idx => Some(
179 |                     ctx.gc_table
180 |                         .get(idx - 1)
181 |                         .map(AsRef::as_ref)
182 |                         .ok_or(FunctionError::Gc(idx - 1)),
183 |                 ),
184 |             })
185 |             .transpose()?;
186 | 
187 |         let unnamed_addr = fields
188 |             .get(11)
189 |             .copied()
190 |             .map(UnnamedAddr::from)
191 |             .unwrap_or(UnnamedAddr::None);
192 | 
193 |         // fields[12]: prologuedata
194 | 
195 |         let storage_class = fields.get(13).map_or_else(
196 |             || Ok(DllStorageClass::Default),
197 |             |v| DllStorageClass::try_from(*v),
198 |         )?;
199 | 
200 |         // fields[14]: comdat
201 |         // fields[15]: prefixdata
202 |         // fields[16]: personalityfn
203 |         // fields[16]: preemptionspecifier
204 | 
205 |         Ok(Self {
206 |             name,
207 |             ty,
208 |             calling_convention,
209 |             is_declaration,
210 |             linkage,
211 |             attributes,
212 |             alignment,
213 |             section,
214 |             visibility,
215 |             gc_name,
216 |             unnamed_addr,
217 |             storage_class,
218 |         })
219 |     }
220 | }
221 | 


--------------------------------------------------------------------------------
/llvm-mapper/src/record/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Structures for mapping from bitstream records to LLVM models.
 2 | //!
 3 | //! Depending on their importance or complexity, not every record is given a dedicated
 4 | //! structure or mapping implementation. Simpler records are mapped inline within their
 5 | //! blocks.
 6 | 
 7 | pub mod alias;
 8 | pub mod comdat;
 9 | pub mod datalayout;
10 | pub mod function;
11 | 
12 | use std::num::TryFromIntError;
13 | use std::string::FromUtf8Error;
14 | 
15 | use thiserror::Error;
16 | 
17 | pub use self::alias::*;
18 | pub use self::comdat::*;
19 | pub use self::datalayout::*;
20 | pub use self::function::*;
21 | use crate::block::StrtabError;
22 | 
23 | /// Potential errors when trying to extract a string from a record.
24 | #[non_exhaustive]
25 | #[derive(Debug, Error)]
26 | pub enum RecordStringError {
27 |     /// The start index for the string is invalid.
28 |     #[error("impossible string index: {0} >= {1} (field count)")]
29 |     BadIndex(usize, usize),
30 |     /// A field in the record is too large to fit in a byte.
31 |     #[error("impossible character value in string: {0}")]
32 |     BadCharacter(#[from] TryFromIntError),
33 |     /// The string doesn't look like valid UTF-8.
34 |     #[error("invalid string encoding: {0}")]
35 |     BadEncoding(#[from] FromUtf8Error),
36 | }
37 | 
38 | /// Potential errors when trying to extract a blob from a record.
39 | #[non_exhaustive]
40 | #[derive(Debug, Error)]
41 | pub enum RecordBlobError {
42 |     /// The start index for the blob is invalid.
43 |     #[error("impossible blob index: {0} >= {1} (field count)")]
44 |     BadIndex(usize, usize),
45 |     /// A field in the record is too large to fit in a byte.
46 |     #[error("impossible byte value in blob: {0}")]
47 |     BadByte(#[from] TryFromIntError),
48 | }
49 | 


--------------------------------------------------------------------------------
/llvm-support/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "llvm-support"
 3 | description = "Support types and routines for parsing LLVM's bitcode"
 4 | license = "MIT"
 5 | homepage = "https://github.com/woodruffw/mollusc/tree/main/llvm-support"
 6 | repository = "https://github.com/woodruffw/mollusc"
 7 | authors = ["William Woodruff <william@yossarian.net>"]
 8 | readme = "README.md"
 9 | keywords = ["llvm", "parsing", "binary", "encoding"]
10 | categories = ["compilers", "encoding", "parsing"]
11 | edition = "2018"
12 | version = "0.0.3"
13 | 
14 | [dependencies]
15 | paste = "1.0"
16 | thiserror = "1.0"
17 | num_enum = "0.6.0"
18 | 


--------------------------------------------------------------------------------
/llvm-support/README.md:
--------------------------------------------------------------------------------
 1 | llvm-support
 2 | ============
 3 | 
 4 | [![Crates.io](https://img.shields.io/crates/v/llvm-support)](https://crates.io/crates/llvm-support)
 5 | [![Documentation](https://docs.rs/llvm-support/badge.svg)](https://docs.rs/llvm-support)
 6 | 
 7 | Support types and routines for parsing LLVM's bitcode.
 8 | 
 9 | This crate is roughly analogous to LLVM's `libSupport` in scope: it provides
10 | some essential invariant-preserving types for parsing, mapping, and validating
11 | LLVM bitcode.
12 | 


--------------------------------------------------------------------------------
/llvm-support/build.rs:
--------------------------------------------------------------------------------
 1 | // This is an ugly little hack to get access to a reasonable "default"
 2 | // target triple when loading bitcode inputs that don't mention their triple.
 3 | // Based on: https://stackoverflow.com/a/51311222
 4 | // Unwrap safety: None. If this fails, the build fails, and that's intended.
 5 | #[allow(clippy::unwrap_used)]
 6 | fn main() {
 7 |     println!(
 8 |         "cargo:rustc-env=TARGET_TRIPLE={}",
 9 |         std::env::var("TARGET").unwrap()
10 |     );
11 | }
12 | 


--------------------------------------------------------------------------------
/llvm-support/src/attribute.rs:
--------------------------------------------------------------------------------
  1 | //! Support code for LLVM attributes.
  2 | 
  3 | use num_enum::TryFromPrimitive;
  4 | 
  5 | /// Represents the different kinds of attributes.
  6 | #[derive(Debug, PartialEq, Eq, TryFromPrimitive)]
  7 | #[repr(u64)]
  8 | pub enum AttributeKind {
  9 |     /// A well-known enum attribute.
 10 |     Enum = 0,
 11 |     /// A well-known integral attribute with an integer value.
 12 |     IntKeyValue = 1,
 13 |     /// A string attribute.
 14 |     StrKey = 3,
 15 |     /// A string attribute with a string value.
 16 |     StrKeyValue = 4,
 17 |     // TODO(ww): 5 and 6 are attribute kinds in the LLVM codebase, but aren't documented.
 18 | }
 19 | 
 20 | /// Represents the IDs of different specific attributes.
 21 | #[non_exhaustive]
 22 | #[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive)]
 23 | #[repr(u64)]
 24 | pub enum AttributeId {
 25 |     /// `align(<n>)`
 26 |     Alignment = 1,
 27 |     /// `alwaysinline`
 28 |     AlwaysInline = 2,
 29 |     /// `byval`
 30 |     ByVal = 3,
 31 |     /// `inlinehint`
 32 |     InlineHint = 4,
 33 |     /// `inreg`
 34 |     InReg = 5,
 35 |     /// `minsize`
 36 |     MinSize = 6,
 37 |     /// `naked`
 38 |     Naked = 7,
 39 |     /// `nest`
 40 |     Nest = 8,
 41 |     /// `noalias`
 42 |     NoAlias = 9,
 43 |     /// `nobuiltin`
 44 |     NoBuiltin = 10,
 45 |     /// `nocapture`
 46 |     NoCapture = 11,
 47 |     /// `noduplicate`
 48 |     NoDuplicate = 12,
 49 |     /// `noimplicitfloat`
 50 |     NoImplicitFloat = 13,
 51 |     /// `noinline`
 52 |     NoInline = 14,
 53 |     /// `nonlazybind`
 54 |     NonLazyBind = 15,
 55 |     /// `noredzone`
 56 |     NoRedZone = 16,
 57 |     /// `noreturn`
 58 |     NoReturn = 17,
 59 |     /// `nounwind`
 60 |     NoUnwind = 18,
 61 |     /// `optsize`
 62 |     OptimizeForSize = 19,
 63 |     /// `readnone`
 64 |     ReadNone = 20,
 65 |     /// `readonly`
 66 |     ReadOnly = 21,
 67 |     /// `returned`
 68 |     Returned = 22,
 69 |     /// `returns_twice`
 70 |     ReturnsTwice = 23,
 71 |     /// `signext`
 72 |     SExt = 24,
 73 |     /// `alignstack(<n>)`
 74 |     StackAlignment = 25,
 75 |     /// `ssp`
 76 |     StackProtect = 26,
 77 |     /// `sspreq`
 78 |     StackProtectReq = 27,
 79 |     /// `sspstrong`
 80 |     StackProtectStrong = 28,
 81 |     /// `sret`
 82 |     StructRet = 29,
 83 |     /// `sanitize_address`
 84 |     SanitizeAddress = 30,
 85 |     /// `sanitize_thread`
 86 |     SanitizeThread = 31,
 87 |     /// `sanitize_memory`
 88 |     SanitizeMemory = 32,
 89 |     /// `uwtable ([variant])`
 90 |     UwTable = 33,
 91 |     /// `zeroext`
 92 |     ZExt = 34,
 93 |     /// `builtin`
 94 |     Builtin = 35,
 95 |     /// `cold`
 96 |     Cold = 36,
 97 |     /// `optnone`
 98 |     OptimizeNone = 37,
 99 |     /// `inalloca`
100 |     InAlloca = 38,
101 |     /// `nonnull`
102 |     NonNull = 39,
103 |     /// `jumptable`
104 |     JumpTable = 40,
105 |     /// `dereferenceable(<n>)`
106 |     Dereferenceable = 41,
107 |     /// `dereferenceable_or_null(<n>)`
108 |     DereferenceableOrNull = 42,
109 |     /// `convergent`
110 |     Convergent = 43,
111 |     /// `safestack`
112 |     SafeStack = 44,
113 |     /// `argmemonly`
114 |     ArgMemOnly = 45,
115 |     /// `swiftself`
116 |     SwiftSelf = 46,
117 |     /// `swifterror`
118 |     SwiftError = 47,
119 |     /// `norecurse`
120 |     NoRecurse = 48,
121 |     /// `inaccessiblememonly`
122 |     InaccessiblememOnly = 49,
123 |     /// `inaccessiblememonly_or_argmemonly`
124 |     InaccessiblememOrArgmemonly = 50,
125 |     /// `allocsize(<EltSizeParam>[, <NumEltsParam>])`
126 |     AllocSize = 51,
127 |     /// `writeonly`
128 |     WriteOnly = 52,
129 |     /// `speculatable`
130 |     Speculatable = 53,
131 |     /// `strictfp`
132 |     StrictFp = 54,
133 |     /// `sanitize_hwaddress`
134 |     SanitizeHwAddress = 55,
135 |     /// `nocf_check`
136 |     NoCfCheck = 56,
137 |     /// `optforfuzzing`
138 |     OptForFuzzing = 57,
139 |     /// `shadowcallstack`
140 |     Shadowcallstack = 58,
141 |     /// `speculative_load_hardening`
142 |     SpeculativeLoadHardening = 59,
143 |     /// `immarg`
144 |     ImmArg = 60,
145 |     /// `willreturn`
146 |     WillReturn = 61,
147 |     /// `nofree`
148 |     NoFree = 62,
149 |     /// `nosync`
150 |     NoSync = 63,
151 |     /// `sanitize_memtag`
152 |     SanitizeMemtag = 64,
153 |     /// `preallocated`
154 |     Preallocated = 65,
155 |     /// `no_merge`
156 |     NoMerge = 66,
157 |     /// `null_pointer_is_valid`
158 |     NullPointerIsValid = 67,
159 |     /// `noundef`
160 |     NoUndef = 68,
161 |     /// `byref`
162 |     ByRef = 69,
163 |     /// `mustprogress`
164 |     MustProgress = 70,
165 |     /// `no_callback`
166 |     NoCallback = 71,
167 |     /// `hot`
168 |     Hot = 72,
169 |     /// `no_profile`
170 |     NoProfile = 73,
171 |     /// `vscale_range(<Min>[, <Max>])`
172 |     VScaleRange = 74,
173 |     /// `swift_async`
174 |     SwiftAsync = 75,
175 |     /// `nosanitize_coverage`
176 |     NoSanitizeCoverage = 76,
177 |     /// `elementtype`
178 |     ElementType = 77,
179 |     /// `disable_sanitizer_instrumentation`
180 |     DisableSanitizerInstrumentation = 78,
181 |     /// `nosanitize_bounds`
182 |     NoSanitizeBounds = 79,
183 |     /// `allocalign`
184 |     AllocAlign = 80,
185 |     /// `allocptr`
186 |     AllocatedPointer = 81,
187 |     /// `allockind (<KindBitset>)`
188 |     AllocKind = 82,
189 |     /// `presplitcoroutine`
190 |     PresplitCoroutine = 83,
191 |     /// `fn_ret_thunk_extern`
192 |     FnretthunkExtern = 84,
193 |     /// `skipprofile`
194 |     SkipProfile = 85,
195 |     /// `memory (<LayoutBitset>)`
196 |     Memory = 86,
197 | }
198 | 


--------------------------------------------------------------------------------
/llvm-support/src/bitcodes.rs:
--------------------------------------------------------------------------------
  1 | //! Core bitcode constants.
  2 | //!
  3 | //! These correspond directly to many of the block IDs, record codes, and
  4 | //! other special constants in LLVM bitcode streams.
  5 | 
  6 | use num_enum::{IntoPrimitive, TryFromPrimitive};
  7 | 
  8 | use crate::FIRST_APPLICATION_BLOCK_ID;
  9 | 
 10 | /// Block IDs that are reserved by LLVM.
 11 | // NOTE(ww): Block IDs 0 through 7 are reserved, but only 0 (BLOCKINFO)
 12 | // is actually currently used.
 13 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, TryFromPrimitive)]
 14 | #[repr(u64)]
 15 | pub enum ReservedBlockId {
 16 |     /// The `BLOCKINFO` block ID.
 17 |     BlockInfo = 0,
 18 |     /// Reserved; no semantics.
 19 |     Reserved1 = 1,
 20 |     /// Reserved; no semantics.
 21 |     Reserved2 = 2,
 22 |     /// Reserved; no semantics.
 23 |     Reserved3 = 3,
 24 |     /// Reserved; no semantics.
 25 |     Reserved4 = 4,
 26 |     /// Reserved; no semantics.
 27 |     Reserved5 = 5,
 28 |     /// Reserved; no semantics.
 29 |     Reserved6 = 6,
 30 |     /// Reserved; no semantics.
 31 |     Reserved7 = 7,
 32 | }
 33 | 
 34 | /// Block IDs that are used by LLVM for bitcode (i.e., IR bitstreams).
 35 | /// See: `enum BlockIDs` in `Bitcode/LLVMBitCodes.h`,
 36 | #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, TryFromPrimitive)]
 37 | #[repr(u64)]
 38 | pub enum IrBlockId {
 39 |     /// `MODULE_BLOCK_ID`
 40 |     Module = FIRST_APPLICATION_BLOCK_ID,
 41 |     /// `PARAM_ATTR_BLOCK_ID`
 42 |     ParamAttr,
 43 |     /// `PARAM_ATTR_GROUP_BLOCK_ID`
 44 |     ParamAttrGroup,
 45 |     /// `CONSTANTS_BLOCK_ID`
 46 |     Constants,
 47 |     /// `FUNCTION_BLOCK_ID`
 48 |     Function,
 49 |     /// `IDENTIFICATION_BLOCK_ID`.
 50 |     Identification,
 51 |     /// `VALUE_SYMTAB_BLOCK_ID`.
 52 |     ValueSymtab,
 53 |     /// `METADATA_BLOCK_ID`.
 54 |     Metadata,
 55 |     /// `METADATA_ATTACHMENT_BLOCK_ID`.
 56 |     MetadataAttachment,
 57 |     /// `TYPE_BLOCK_ID_NEW`.
 58 |     Type,
 59 |     /// `USELIST_BLOCK_ID`.
 60 |     Uselist,
 61 |     /// `MODULE_STRTAB_BLOCK_ID`.
 62 |     ModuleStrtab,
 63 |     /// `GLOBAL_VAL_SUMMARY_BLOCK_ID`.
 64 |     GlobalValSummary,
 65 |     /// `OPERAND_BUNDLE_TAGS_BLOCK_ID`.
 66 |     OperandBundleTags,
 67 |     /// `METADATA_KIND_BLOCK_ID`.
 68 |     MetadataKind,
 69 |     /// `STRTAB_BLOCK_ID`.
 70 |     Strtab,
 71 |     /// `FULL_LTO_GLOBAL_VAL_SUMMARY_BLOCK_ID`.
 72 |     FullLtoGlobalValSummary,
 73 |     /// `SYMTAB_BLOCK_ID`.
 74 |     Symtab,
 75 |     /// `SYNC_SCOPE_NAMES_BLOCK_ID`.
 76 |     SyncScopeNames,
 77 | }
 78 | 
 79 | /// Abbreviation IDs that are reserved by LLVM.
 80 | #[derive(Clone, Copy, Debug, PartialEq, Eq, TryFromPrimitive)]
 81 | #[repr(u64)]
 82 | pub enum ReservedAbbrevId {
 83 |     /// Identifies an `END_BLOCK` record.
 84 |     EndBlock = 0,
 85 |     /// Identifies an `ENTER_SUBBLOCK` record.
 86 |     EnterSubBlock,
 87 |     /// Identifies a `DEFINE_ABBREV` record.
 88 |     DefineAbbrev,
 89 |     /// Identifies an `UNABBREV_RECORD` record.
 90 |     UnabbrevRecord,
 91 | }
 92 | 
 93 | /// Codes for each operand encoding type supported by `DEFINE_ABBREV`.
 94 | #[derive(Clone, Copy, Debug, PartialEq, Eq, TryFromPrimitive)]
 95 | #[repr(u64)]
 96 | pub enum AbbrevOpEnc {
 97 |     /// A fixed-length, unsigned operand.
 98 |     Fixed = 1,
 99 |     /// A variable-length, unsigned operand.
100 |     Vbr,
101 |     /// An array of values.
102 |     Array,
103 |     /// A single 6-bit-encoded character.
104 |     Char6,
105 |     /// A blob of bytes.
106 |     Blob,
107 | }
108 | 
109 | /// Codes for each `UNABBREV_RECORD` in `BLOCKINFO`.
110 | #[non_exhaustive]
111 | #[derive(Debug, PartialEq, Eq, TryFromPrimitive)]
112 | #[repr(u64)]
113 | pub enum BlockInfoCode {
114 |     /// SETBID: `[blockid]`
115 |     SetBid = 1,
116 |     /// BLOCKNAME: `[...name...]`
117 |     BlockName,
118 |     /// SETRECORDNAME: `[recordid, ...name...]`
119 |     SetRecordName,
120 | }
121 | 
122 | /// Codes for each record in `IDENTIFICATION_BLOCK`.
123 | #[non_exhaustive]
124 | #[derive(Debug, PartialEq, Eq, TryFromPrimitive)]
125 | #[repr(u64)]
126 | pub enum IdentificationCode {
127 |     /// IDENTIFICATION_CODE_STRING: `[...string...]`
128 |     ProducerString = 1,
129 |     /// IDENTIFICATION_CODE_EPOCH: `[epoch]`
130 |     Epoch,
131 | }
132 | 
133 | /// Codes for each record in `MODULE_BLOCK`.
134 | #[non_exhaustive]
135 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
136 | #[repr(u64)]
137 | pub enum ModuleCode {
138 |     /// MODULE_CODE_VERSION: `[version#]`
139 |     Version = 1,
140 |     /// MODULE_CODE_TRIPLE: `[...string...]`
141 |     Triple = 2,
142 |     /// MODULE_CODE_DATALAYOUT: `[...string...]`
143 |     DataLayout = 3,
144 |     /// MODULE_CODE_ASM: `[...string...]`
145 |     Asm = 4,
146 |     /// MODULE_CODE_SECTIONNAME: `[...string...]`
147 |     SectionName = 5,
148 |     /// MODULE_CODE_DEPLIB: `[...string...]`
149 |     DepLib = 6,
150 |     /// MODULE_CODE_GLOBALVAR: `[...fields...]`
151 |     /// See: <https://llvm.org/docs/BitCodeFormat.html#module-code-globalvar-record>
152 |     GlobalVar = 7,
153 |     /// MODULE_CODE_FUNCTION: `[...fields...]`
154 |     /// See: <https://llvm.org/docs/BitCodeFormat.html#module-code-function-record>
155 |     Function = 8,
156 |     /// MODULE_CODE_ALIAS_OLD: `[...fields...]`
157 |     /// See: <https://llvm.org/docs/BitCodeFormat.html#module-code-alias-record>
158 |     AliasOld = 9,
159 |     /// MODULE_CODE_GCNAME: `[...string...]`
160 |     GcName = 11,
161 |     /// MODULE_CODE_COMDAT
162 |     /// v1: `[selection_kind, name]`
163 |     /// v2: `[strtab_offset, strtab_size, selection_kind]`
164 |     /// Only `v2` is currently supported.
165 |     Comdat = 12,
166 |     /// MODULE_CODE_VSTOFFSET: `[offset]`
167 |     VstOffset = 13,
168 |     /// MODULE_CODE_ALIAS: `[...fields...]`
169 |     /// Not well documented; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`.
170 |     Alias = 14,
171 |     /// MODULE_CODE_METADATA_VALUES_UNUSED
172 |     /// Not documented at all; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`.
173 |     MetadataValuesUnused = 15,
174 |     /// MODULE_CODE_SOURCE_FILENAME: `[...string...]`
175 |     SourceFilename = 16,
176 |     /// MODULE_CODE_HASH: `[5*i32]`
177 |     Hash = 17,
178 |     /// MODULE_CODE_IFUNC: `[...fields...]`
179 |     /// Not well documented; see `ModuleCodes` in `Bitcode/LLVMBitCodes.h`.
180 |     IFunc = 18,
181 | }
182 | 
183 | /// Codes for each record in `TYPE_BLOCK` (i.e., `TYPE_BLOCK_ID_NEW`).
184 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
185 | #[repr(u64)]
186 | pub enum TypeCode {
187 |     /// TYPE_CODE_NUMENTRY: `[numentries]`
188 |     NumEntry = 1,
189 |     /// TYPE_CODE_VOID
190 |     Void,
191 |     /// TYPE_CODE_FLOAT
192 |     Float,
193 |     /// TYPE_CODE_DOUBLE
194 |     Double,
195 |     /// TYPE_CODE_LABEL
196 |     Label,
197 |     /// TYPE_CODE_OPAQUE
198 |     Opaque,
199 |     /// TYPE_CODE_INTEGER: `[width]`
200 |     Integer,
201 |     /// TYPE_CODE_POINTER: `[pointee type]`
202 |     Pointer,
203 |     /// TYPE_CODE_FUNCTION_OLD: `[vararg, attrid, retty, paramty x N]`
204 |     FunctionOld,
205 |     /// TYPE_CODE_HALF
206 |     Half,
207 |     /// TYPE_CODE_ARRAY: `[numelts, eltty]`
208 |     Array,
209 |     /// TYPE_CODE_VECTOR: `[numelts, eltty]`
210 |     Vector,
211 |     /// TYPE_CODE_X86_FP80
212 |     X86Fp80,
213 |     /// TYPE_CODE_FP128
214 |     Fp128,
215 |     /// TYPE_CODE_PPC_FP128
216 |     PpcFp128,
217 |     /// TYPE_CODE_METADATA,
218 |     Metadata,
219 |     /// TYPE_CODE_X86_MMX
220 |     X86Mmx,
221 |     /// TYPE_CODE_STRUCT_ANON: `[ispacked, eltty x N]`
222 |     StructAnon,
223 |     /// TYPE_CODE_STRUCT_NAME: `[strchr x N]`
224 |     StructName,
225 |     /// TYPE_CODE_STRUCT_NAMED: `[ispacked, eltty x N]`
226 |     StructNamed,
227 |     /// TYPE_CODE_FUNCTION: `[vararg, retty, paramty x N]`
228 |     Function,
229 |     /// TYPE_CODE_TOKEN
230 |     Token,
231 |     /// TYPE_CODE_BFLOAT
232 |     BFloat,
233 |     /// TYPE_CODE_X86_AMX
234 |     X86Amx,
235 |     /// TYPE_CODE_OPAQUE_POINTER: `[addrspace]`
236 |     OpaquePointer,
237 | }
238 | 
239 | /// Codes for each record in `STRTAB_BLOCK`.
240 | #[non_exhaustive]
241 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
242 | #[repr(u64)]
243 | pub enum StrtabCode {
244 |     /// STRTAB_BLOB: `[...string...]`
245 |     Blob = 1,
246 | }
247 | 
248 | /// Codes for each record in `SYMTAB_BLOCK`.
249 | #[non_exhaustive]
250 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
251 | #[repr(u64)]
252 | pub enum SymtabCode {
253 |     /// SYMTAB_BLOB: `[...data...]`
254 |     Blob = 1,
255 | }
256 | 
257 | /// Codes for each record in `PARAMATTR_BLOCK` or `PARAMATTR_GROUP_BLOCK`.
258 | // NOTE(ww): For whatever reason, these two blocks share the same enum for
259 | /// record codes.
260 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
261 | #[repr(u64)]
262 | pub enum AttributeCode {
263 |     /// PARAMATTR_CODE_ENTRY_OLD: `[paramidx0, attr0, paramidx1, attr1...]`
264 |     EntryOld = 1,
265 |     /// PARAMATTR_CODE_ENTRY: `[attrgrp0, attrgrp1, ...]`
266 |     Entry,
267 |     /// PARAMATTR_GRP_CODE_ENTRY: `[grpid, idx, attr0, attr1, ...]`
268 |     GroupCodeEntry,
269 | }
270 | 
271 | /// Codes for each record in `FUNCTION_BLOCK`.
272 | ///
273 | /// See: `FunctionCodes` in `LLVMBitCodes.h`.
274 | #[allow(missing_docs)]
275 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
276 | #[repr(u64)]
277 | pub enum FunctionCode {
278 |     DeclareBlocks = 1,
279 |     InstBinop = 2,
280 |     InstCast = 3,
281 |     InstGepOld = 4,
282 |     InstSelect = 5,
283 |     InstExtractelt = 6,
284 |     InstInsertelt = 7,
285 |     InstShufflevec = 8,
286 |     InstCmp = 9,
287 |     InstRet = 10,
288 |     InstBr = 11,
289 |     InstSwitch = 12,
290 |     InstInvoke = 13,
291 |     InstUnreachable = 15,
292 |     InstPhi = 16,
293 |     InstAlloca = 19,
294 |     InstLoad = 20,
295 |     InstVaarg = 23,
296 |     InstStoreOld = 24,
297 |     InstExtractval = 26,
298 |     InstInsertval = 27,
299 |     InstCmp2 = 28,
300 |     InstVselect = 29,
301 |     InstInboundsGepOld = 30,
302 |     InstIndirectbr = 31,
303 |     DebugLocAgain = 33,
304 |     InstCall = 34,
305 |     DebugLoc = 35,
306 |     InstFence = 36,
307 |     InstCmpxchgOld = 37,
308 |     InstAtomicrmwOld = 38,
309 |     InstResume = 39,
310 |     InstLandingpadOld = 40,
311 |     InstLoadatomic = 41,
312 |     InstStoreatomicOld = 42,
313 |     InstGep = 43,
314 |     InstStore = 44,
315 |     InstStoreatomic = 45,
316 |     InstCmpxchg = 46,
317 |     InstLandingpad = 47,
318 |     InstCleanupret = 48,
319 |     InstCatchret = 49,
320 |     InstCatchpad = 50,
321 |     InstCleanuppad = 51,
322 |     InstCatchswitch = 52,
323 |     OperandBundle = 55,
324 |     InstUnop = 56,
325 |     Instcallbr = 57,
326 |     InstFreeze = 58,
327 |     InstAtomicrmw = 59,
328 | }
329 | 
330 | /// Codes for each unary operation in unary instructions.
331 | ///
332 | /// See: `UnaryOpcodes` in `LLVMBitCodes.h`.
333 | #[allow(missing_docs)]
334 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
335 | #[repr(u64)]
336 | pub enum UnaryOpcode {
337 |     FNeg = 0,
338 | }
339 | 
340 | /// Codes for each binary operation in binary instructions.
341 | ///
342 | /// See: `BinaryOpcodes` in `LLVMBitCodes.h`.
343 | #[allow(missing_docs)]
344 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
345 | #[repr(u64)]
346 | pub enum BinaryOpcode {
347 |     Add = 0,
348 |     Sub,
349 |     Mul,
350 |     UDiv,
351 |     SDiv,
352 |     URem,
353 |     SRem,
354 |     Shl,
355 |     LShr,
356 |     AShr,
357 |     And,
358 |     Or,
359 |     Xor,
360 | }
361 | 
362 | /// AtomicRMW operations.
363 | /// See: `RMWOperations` in `LLVMBitCodes.h`.
364 | #[allow(missing_docs)]
365 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
366 | #[repr(u64)]
367 | pub enum RMWOperation {
368 |     Xchg = 0,
369 |     Add,
370 |     Sub,
371 |     And,
372 |     Nand,
373 |     Or,
374 |     Xor,
375 |     Max,
376 |     Min,
377 |     UMax,
378 |     UMin,
379 |     FAdd,
380 |     FSub,
381 | }
382 | 


--------------------------------------------------------------------------------
/llvm-support/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! `llvm-support` provides support types to the other *mollusc* crates,
  2 | //! in furtherance of the general task of parsing LLVM's bitcode.
  3 | 
  4 | #![deny(rustdoc::broken_intra_doc_links)]
  5 | #![deny(missing_docs)]
  6 | #![allow(clippy::redundant_field_names)]
  7 | #![forbid(unsafe_code)]
  8 | 
  9 | pub mod align;
 10 | pub mod attribute;
 11 | pub mod bitcodes;
 12 | pub mod opcode;
 13 | pub mod ty;
 14 | 
 15 | use num_enum::{IntoPrimitive, TryFromPrimitive};
 16 | 
 17 | pub use self::align::*;
 18 | pub use self::attribute::*;
 19 | pub use self::opcode::*;
 20 | pub use self::ty::*;
 21 | 
 22 | /// The 32-bit magic that indicates a raw LLVM IR bitcode stream.
 23 | pub const LLVM_IR_MAGIC: u32 = 0xdec04342;
 24 | 
 25 | /// The 32-bit magic that indicates a bitcode wrapper, which in
 26 | /// turn points to the start of the actual bitcode stream.
 27 | pub const BITCODE_WRAPPER_MAGIC: u32 = 0x0b17c0de;
 28 | 
 29 | /// The initial abbreviation ID width in a bitstream.
 30 | pub const INITIAL_ABBREV_ID_WIDTH: u64 = 2;
 31 | 
 32 | /// All abbreviation IDs before this are defined by the bitstream format,
 33 | /// rather than the stream itself.
 34 | pub const FIRST_APPLICATION_ABBREV_ID: usize = 4;
 35 | 
 36 | /// All block IDs before this have their semantics defined by the bitstream
 37 | /// format, rather than the stream itself.
 38 | pub const FIRST_APPLICATION_BLOCK_ID: u64 = 8;
 39 | 
 40 | /// The lookup alphabet for the Char6 operand encoding.
 41 | pub const CHAR6_ALPHABET: &[u8] =
 42 |     b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._";
 43 | 
 44 | /// The current toolchain's target triple.
 45 | pub const TARGET_TRIPLE: &str = env!("TARGET_TRIPLE");
 46 | 
 47 | /// An exact endianness.
 48 | ///
 49 | /// For an inexact endianness model (i.e., one that supports a notion of "system" endianness),
 50 | /// see [`InexactEndian`](InexactEndian)
 51 | #[derive(Debug, PartialEq, Eq)]
 52 | pub enum Endian {
 53 |     /// Little-endian.
 54 |     Little,
 55 |     /// Big-endian.
 56 |     Big,
 57 | }
 58 | 
 59 | /// An "inexact" endianness, i.e. one that supports an unspecified system endianness.
 60 | #[derive(Debug)]
 61 | pub enum InexactEndian {
 62 |     /// Either big-endian or little-endian.
 63 |     Exact(Endian),
 64 |     /// The host system's endianness, which may not be known immediately.
 65 |     System,
 66 | }
 67 | 
 68 | /// Symbol mangling styles supported by LLVM.
 69 | #[derive(Debug, PartialEq, Eq)]
 70 | pub enum Mangling {
 71 |     /// ELF-style mangling.
 72 |     Elf,
 73 |     /// MIPS-style mangling.
 74 |     Mips,
 75 |     /// Mach-O-style mangling.
 76 |     Macho,
 77 |     /// COFF on x86 Windows-style mangling.
 78 |     WindowsX86Coff,
 79 |     /// COFF on Windows-style mangling.
 80 |     WindowsCoff,
 81 |     /// XCOFF-style mangling.
 82 |     XCoff,
 83 | }
 84 | 
 85 | /// Global value linkage types.
 86 | ///
 87 | /// See: <https://llvm.org/docs/LangRef.html#linkage-types>
 88 | #[non_exhaustive]
 89 | #[derive(Debug, PartialEq, Eq)]
 90 | #[repr(u64)]
 91 | #[allow(missing_docs)]
 92 | pub enum Linkage {
 93 |     External,
 94 |     AvailableExternally,
 95 |     LinkOnceAny,
 96 |     LinkOnceOdr,
 97 |     WeakAny,
 98 |     WeakOdr,
 99 |     Appending,
100 |     Internal,
101 |     Private,
102 |     ExternalWeak,
103 |     Common,
104 | }
105 | 
106 | impl From<u64> for Linkage {
107 |     fn from(value: u64) -> Self {
108 |         // See getDecodedLinkage in BitcodeReader.cpp.
109 |         match value {
110 |             0 | 5 | 6 | 15 => Linkage::External,
111 |             1 | 16 => Linkage::WeakAny,
112 |             2 => Linkage::Appending,
113 |             3 => Linkage::Internal,
114 |             4 | 18 => Linkage::LinkOnceAny,
115 |             7 => Linkage::ExternalWeak,
116 |             8 => Linkage::Common,
117 |             9 | 13 | 14 => Linkage::Private,
118 |             10 | 17 => Linkage::WeakOdr,
119 |             11 | 19 => Linkage::LinkOnceOdr,
120 |             12 => Linkage::AvailableExternally,
121 |             _ => Linkage::External,
122 |         }
123 |     }
124 | }
125 | 
126 | /// An `(offset, size)` reference to a string within some string table.
127 | pub struct StrtabRef {
128 |     /// The string's offset within its string table.
129 |     pub offset: usize,
130 |     /// The string's size, in bytes.
131 |     pub size: usize,
132 | }
133 | 
134 | impl From<(usize, usize)> for StrtabRef {
135 |     fn from(value: (usize, usize)) -> Self {
136 |         Self {
137 |             offset: value.0,
138 |             size: value.1,
139 |         }
140 |     }
141 | }
142 | 
143 | impl From<(u64, u64)> for StrtabRef {
144 |     fn from(value: (u64, u64)) -> Self {
145 |         Self::from((value.0 as usize, value.1 as usize))
146 |     }
147 | }
148 | 
149 | /// Valid visibility styles.
150 | ///
151 | /// See: <https://llvm.org/docs/LangRef.html#visibility-styles>
152 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
153 | #[repr(u64)]
154 | pub enum Visibility {
155 |     /// Default visibility.
156 |     Default = 0,
157 | 
158 |     /// Hidden visibility.
159 |     Hidden,
160 | 
161 |     /// Protected visibility.
162 |     Protected,
163 | }
164 | 
165 | /// DLL storage classes.
166 | ///
167 | /// See: <https://llvm.org/docs/LangRef.html#dllstorageclass>
168 | #[derive(Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
169 | #[repr(u64)]
170 | pub enum DllStorageClass {
171 |     /// The default storage class.
172 |     Default = 0,
173 | 
174 |     /// The `dllimport` storage class.
175 |     Import,
176 | 
177 |     /// The `dllexport` storage class.
178 |     Export,
179 | }
180 | 
181 | /// Thread local storage modes.
182 | ///
183 | /// See: <https://llvm.org/docs/LangRef.html#thread-local-storage-models>
184 | /// See also: <https://www.akkadia.org/drepper/tls.pdf>
185 | #[derive(Debug, PartialEq, Eq, IntoPrimitive)]
186 | #[repr(u64)]
187 | pub enum ThreadLocalMode {
188 |     /// Not thread local.
189 |     NotThreadLocal = 0,
190 | 
191 |     /// The general dynamic TLS model.
192 |     GeneralDynamicTls,
193 | 
194 |     /// The local dynamic TLS model.
195 |     LocalDynamicTls,
196 | 
197 |     /// The initial exec TLS model.
198 |     InitialExecTls,
199 | 
200 |     /// The local exec TLS model.
201 |     LocalExecTls,
202 | }
203 | 
204 | impl From<u64> for ThreadLocalMode {
205 |     fn from(value: u64) -> ThreadLocalMode {
206 |         match value {
207 |             0 => ThreadLocalMode::NotThreadLocal,
208 |             1 => ThreadLocalMode::GeneralDynamicTls,
209 |             2 => ThreadLocalMode::LocalDynamicTls,
210 |             3 => ThreadLocalMode::InitialExecTls,
211 |             4 => ThreadLocalMode::LocalExecTls,
212 |             // Unknown values are treated as general dynamic.
213 |             _ => ThreadLocalMode::GeneralDynamicTls,
214 |         }
215 |     }
216 | }
217 | 
218 | /// The `unnamed_addr` specifier.
219 | #[derive(Debug, PartialEq, Eq, IntoPrimitive)]
220 | #[repr(u64)]
221 | pub enum UnnamedAddr {
222 |     /// No `unnamed_addr`.
223 |     None = 0,
224 | 
225 |     /// The address of this variable is not significant.
226 |     Global,
227 | 
228 |     /// The address of this variable is not significant, but only within the module.
229 |     Local,
230 | }
231 | 
232 | impl From<u64> for UnnamedAddr {
233 |     fn from(value: u64) -> UnnamedAddr {
234 |         match value {
235 |             0 => UnnamedAddr::None,
236 |             1 => UnnamedAddr::Global,
237 |             2 => UnnamedAddr::Local,
238 |             // Unknown values are treated as having no `unnamed_addr` specifier.
239 |             _ => UnnamedAddr::None,
240 |         }
241 |     }
242 | }
243 | 
244 | /// The runtime preemption specifier.
245 | ///
246 | /// See: <https://llvm.org/docs/LangRef.html#runtime-preemption-model>
247 | #[derive(Debug, PartialEq, Eq, IntoPrimitive)]
248 | #[repr(u64)]
249 | pub enum RuntimePreemption {
250 |     /// The function or variable may be replaced by a symbol from outside the linkage
251 |     /// unit at runtime.
252 |     DsoPreemptable,
253 | 
254 |     /// The compiler may assume that the function or variable will resolve to a symbol within
255 |     /// the same linkage unit.
256 |     DsoLocal,
257 | }
258 | 
259 | impl From<u64> for RuntimePreemption {
260 |     fn from(value: u64) -> RuntimePreemption {
261 |         match value {
262 |             0 => RuntimePreemption::DsoPreemptable,
263 |             1 => RuntimePreemption::DsoLocal,
264 |             // Unknown values are treated as `dso_preemptable`.
265 |             _ => RuntimePreemption::DsoPreemptable,
266 |         }
267 |     }
268 | }
269 | 
270 | /// Calling conventions supported by LLVM.
271 | #[non_exhaustive]
272 | #[derive(Debug, PartialEq, Eq, TryFromPrimitive)]
273 | #[repr(u64)]
274 | #[allow(missing_docs)]
275 | pub enum CallingConvention {
276 |     C = 0,
277 |     Fast = 8,
278 |     Cold = 9,
279 |     GHC = 10,
280 |     HiPE = 11,
281 |     WebKitJS = 12,
282 |     AnyReg = 13,
283 |     PreserveMost = 14,
284 |     PreserveAll = 15,
285 |     Swift = 16,
286 |     CXXFASTTLS = 17,
287 |     X86Stdcall = 64,
288 |     X86Fastcall = 65,
289 |     ARMAPCS = 66,
290 |     ARMAAPCS = 67,
291 |     ARMAAPCSVFP = 68,
292 |     MSP430INTR = 69,
293 |     X86ThisCall = 70,
294 |     PTXKernel = 71,
295 |     PTXDevice = 72,
296 |     SPIRFUNC = 75,
297 |     SPIRKERNEL = 76,
298 |     IntelOCLBI = 77,
299 |     X8664SysV = 78,
300 |     Win64 = 79,
301 |     X86VectorCall = 80,
302 |     HHVM = 81,
303 |     HHVMC = 82,
304 |     X86INTR = 83,
305 |     AVRINTR = 84,
306 |     AVRSIGNAL = 85,
307 |     AVRBUILTIN = 86,
308 |     AMDGPUVS = 87,
309 |     AMDGPUGS = 88,
310 |     AMDGPUPS = 89,
311 |     AMDGPUCS = 90,
312 |     AMDGPUKERNEL = 91,
313 |     X86RegCall = 92,
314 |     AMDGPUHS = 93,
315 |     MSP430BUILTIN = 94,
316 |     AMDGPULS = 95,
317 |     AMDGPUES = 96,
318 | }
319 | 
320 | #[cfg(test)]
321 | mod tests {
322 |     use super::*;
323 | 
324 |     #[test]
325 |     fn test_target_triple() {
326 |         assert!(!TARGET_TRIPLE.is_empty());
327 |     }
328 | }
329 | 


--------------------------------------------------------------------------------
/llvm-support/src/opcode.rs:
--------------------------------------------------------------------------------
  1 | //! Support code for instruction opcodes.
  2 | 
  3 | use std::convert::TryFrom;
  4 | 
  5 | use num_enum::TryFromPrimitiveError;
  6 | use thiserror::Error;
  7 | 
  8 | use crate::{
  9 |     bitcodes::{BinaryOpcode, UnaryOpcode},
 10 |     Type,
 11 | };
 12 | 
 13 | /// Represents the different classes of LLVM opcodes.
 14 | #[derive(Clone, Copy, Debug)]
 15 | pub enum Opcode {
 16 |     /// Opcodes that terminate basic blocks.
 17 |     Term(TermOp),
 18 |     /// Opcodes that take a single operand.
 19 |     Unary(UnaryOp),
 20 |     /// Opcodes that take two operands.
 21 |     Binary(BinaryOp),
 22 |     /// Opcodes that interact with memory.
 23 |     Mem(MemOp),
 24 |     /// Opcodes that cast between types and representations.
 25 |     Cast(CastOp),
 26 |     /// Funclet "landing pad" operands.
 27 |     FuncletPad(FuncletPadOp),
 28 |     /// "Other" operands of all sorts.
 29 |     Other(OtherOp),
 30 | }
 31 | 
 32 | /// Opcodes that terminate basic blocks. Every well-formed basic block ends
 33 | /// with an instruction with one of these opcodes.
 34 | #[derive(Clone, Copy, Debug)]
 35 | pub enum TermOp {
 36 |     /// `ret`
 37 |     Ret,
 38 |     /// `br`
 39 |     Br,
 40 |     /// `switch`
 41 |     Switch,
 42 |     /// `indirectbr`
 43 |     IndirectBr,
 44 |     /// `invoke`
 45 |     Invoke,
 46 |     /// `resume`
 47 |     Resume,
 48 |     /// `unreachable`
 49 |     Unreachable,
 50 |     /// `cleanupret`
 51 |     CleanupRet,
 52 |     /// `catchret`
 53 |     CatchRet,
 54 |     /// `callswitch`
 55 |     /// NOTE: Not documented?
 56 |     CatchSwitch,
 57 |     /// `callbr`
 58 |     CallBr,
 59 | }
 60 | 
 61 | /// Unary opcodes.
 62 | #[derive(Clone, Copy, Debug)]
 63 | pub enum UnaryOp {
 64 |     /// `fneg`
 65 |     FNeg,
 66 | }
 67 | 
 68 | /// Errors that can occur when constructing a `BinaryOp`.
 69 | #[derive(Debug, Error)]
 70 | pub enum UnaryOpError {
 71 |     /// The opcode given doesn't correspond to a known operation.
 72 |     #[error("unknown opcode")]
 73 |     Opcode(#[from] TryFromPrimitiveError<UnaryOpcode>),
 74 | }
 75 | 
 76 | impl TryFrom<u64> for UnaryOp {
 77 |     type Error = UnaryOpError;
 78 | 
 79 |     fn try_from(value: u64) -> Result<Self, Self::Error> {
 80 |         Ok(UnaryOpcode::try_from(value)?.into())
 81 |     }
 82 | }
 83 | 
 84 | impl From<UnaryOpcode> for UnaryOp {
 85 |     fn from(value: UnaryOpcode) -> Self {
 86 |         match value {
 87 |             UnaryOpcode::FNeg => UnaryOp::FNeg,
 88 |         }
 89 |     }
 90 | }
 91 | 
 92 | /// Binary opcodes.
 93 | #[derive(Clone, Copy, Debug)]
 94 | pub enum BinaryOp {
 95 |     /// `add`
 96 |     Add,
 97 |     /// `fadd`
 98 |     FAdd,
 99 |     /// `sub`
100 |     Sub,
101 |     /// `fsub`
102 |     FSub,
103 |     /// `mul`
104 |     Mul,
105 |     /// `fmul`
106 |     FMul,
107 |     /// `udiv`
108 |     UDiv,
109 |     /// `sdiv`
110 |     SDiv,
111 |     /// `fdiv`
112 |     FDiv,
113 |     /// `urem`
114 |     URem,
115 |     /// `srem`
116 |     SRem,
117 |     /// `frem`
118 |     FRem,
119 |     /// `shl`
120 |     Shl,
121 |     /// `lshl`
122 |     LShr,
123 |     /// `ashr`
124 |     AShr,
125 |     /// `and`
126 |     And,
127 |     /// `or`
128 |     Or,
129 |     /// `xor`
130 |     Xor,
131 | }
132 | 
133 | /// Errors that can occur when constructing a `BinaryOp`.
134 | #[derive(Debug, Error)]
135 | pub enum BinaryOpError {
136 |     /// The specified type isn't valid for binary operations.
137 |     #[error("invalid type for binary op: {0:?}")]
138 |     InvalidType(Type),
139 | 
140 |     /// The specified type is incompatible with the operation.
141 |     #[error("incompatible type for op: {0:?}")]
142 |     IncompatibleType(Type),
143 | 
144 |     /// The opcode given doesn't correspond to a known operation.
145 |     #[error("unknown opcode")]
146 |     Opcode(#[from] TryFromPrimitiveError<BinaryOpcode>),
147 | }
148 | 
149 | impl TryFrom<(u64, &Type)> for BinaryOp {
150 |     type Error = BinaryOpError;
151 | 
152 |     fn try_from((opc, ty): (u64, &Type)) -> Result<Self, Self::Error> {
153 |         let opc = BinaryOpcode::try_from(opc)?;
154 | 
155 |         let is_fp = ty.is_floating_or_floating_vector();
156 | 
157 |         // Binary operations are only valid on integer/fp types or vectors thereof.
158 |         if !is_fp || !ty.is_integer_or_integer_vector() {
159 |             return Err(BinaryOpError::InvalidType(ty.clone()));
160 |         }
161 | 
162 |         Ok(match (opc, is_fp) {
163 |             (BinaryOpcode::Add, false) => BinaryOp::Add,
164 |             (BinaryOpcode::Add, true) => BinaryOp::FAdd,
165 |             (BinaryOpcode::Sub, false) => BinaryOp::Sub,
166 |             (BinaryOpcode::Sub, true) => BinaryOp::FSub,
167 |             (BinaryOpcode::Mul, false) => BinaryOp::Mul,
168 |             (BinaryOpcode::Mul, true) => BinaryOp::FMul,
169 |             (BinaryOpcode::UDiv, false) => BinaryOp::UDiv,
170 |             // `udiv` can't be used with floating-point types.
171 |             (BinaryOpcode::UDiv, true) => return Err(BinaryOpError::IncompatibleType(ty.clone())),
172 |             (BinaryOpcode::SDiv, false) => BinaryOp::SDiv,
173 |             (BinaryOpcode::SDiv, true) => BinaryOp::FDiv,
174 |             (BinaryOpcode::URem, false) => BinaryOp::URem,
175 |             // `urem` can't be used with floating-point types.
176 |             (BinaryOpcode::URem, true) => return Err(BinaryOpError::IncompatibleType(ty.clone())),
177 |             (BinaryOpcode::SRem, false) => BinaryOp::SRem,
178 |             (BinaryOpcode::SRem, true) => BinaryOp::FRem,
179 |             // The rest are all integer-type only.
180 |             (BinaryOpcode::Shl, true) => BinaryOp::Shl,
181 |             (BinaryOpcode::LShr, true) => BinaryOp::LShr,
182 |             (BinaryOpcode::AShr, true) => BinaryOp::AShr,
183 |             (BinaryOpcode::And, true) => BinaryOp::And,
184 |             (BinaryOpcode::Or, true) => BinaryOp::Or,
185 |             (BinaryOpcode::Xor, true) => BinaryOp::Xor,
186 |             (_, false) => return Err(BinaryOpError::IncompatibleType(ty.clone())),
187 |         })
188 |     }
189 | }
190 | 
191 | /// Memory opcodes.
192 | #[derive(Clone, Copy, Debug)]
193 | pub enum MemOp {
194 |     /// `alloca`
195 |     Alloca,
196 |     /// `load`
197 |     Load,
198 |     /// `store`
199 |     Store,
200 |     /// `getelementptr`
201 |     GetElementPtr,
202 |     /// `fence`
203 |     Fence,
204 |     /// `cmpxchg`
205 |     AtomicCmpXchg,
206 |     /// `atomicrmw`
207 |     AtomicRMW,
208 | }
209 | 
210 | /// Cast opcodes.
211 | #[derive(Clone, Copy, Debug)]
212 | pub enum CastOp {
213 |     /// `trunc`
214 |     Trunc,
215 |     /// `zext`
216 |     ZExt,
217 |     /// `sext`
218 |     SExt,
219 |     /// `fptoui`
220 |     FPToUI,
221 |     /// `fptosi`
222 |     FPToSI,
223 |     /// `uitofp`
224 |     UIToFP,
225 |     /// `sitofp`
226 |     SIToFP,
227 |     /// `fptrunc`
228 |     FPTrunc,
229 |     /// `fpext`
230 |     FPExt,
231 |     /// `ptrtoint`
232 |     PtrToInt,
233 |     /// `inttoptr`
234 |     IntToPtr,
235 |     /// `bitcast`
236 |     BitCast,
237 |     /// `addrspacecast`
238 |     AddrSpaceCast,
239 | }
240 | 
241 | /// Funclet pad opcodes.
242 | #[derive(Clone, Copy, Debug)]
243 | pub enum FuncletPadOp {
244 |     /// `cleanuppad`
245 |     CleanupPad,
246 |     /// `catchpad`
247 |     CatchPad,
248 | }
249 | 
250 | /// Other opcodes.
251 | #[derive(Clone, Copy, Debug)]
252 | pub enum OtherOp {
253 |     /// `icmp`
254 |     ICmp,
255 |     /// `fcmp`
256 |     FCmp,
257 |     /// `phi`
258 |     Phi,
259 |     /// `call`
260 |     Call,
261 |     /// `select`
262 |     Select,
263 |     /// Internal pass opcode.
264 |     UserOp1,
265 |     /// Internal pass opcode.
266 |     UserOp2,
267 |     /// `va_arg`
268 |     VAArg,
269 |     /// `extractelement`
270 |     ExtractElement,
271 |     /// `insertelement`
272 |     InsertElement,
273 |     /// `shufflevector`
274 |     ShuffleVector,
275 |     /// `extractvalue`
276 |     ExtractValue,
277 |     /// `insertvalue`
278 |     InsertValue,
279 |     /// `landingpad`
280 |     LandingPad,
281 |     /// `freeze`
282 |     Freeze,
283 | }
284 | 


--------------------------------------------------------------------------------
/llvm-support/src/ty.rs:
--------------------------------------------------------------------------------
  1 | //! Structures for managing LLVM types.
  2 | 
  3 | use std::convert::TryFrom;
  4 | 
  5 | use thiserror::Error;
  6 | 
  7 | use crate::AddressSpace;
  8 | 
  9 | /// The IDs of types known to LLVM.
 10 | ///
 11 | /// These are not fully unique: all integer types share the `Integer` type ID,
 12 | /// and similarly for pointers, arrays, etc.
 13 | // TODO(ww): Perhaps use arbitrary enum discriminants here when they're stabilized.
 14 | // See: https://github.com/rust-lang/rfcs/pull/2363
 15 | #[repr(u64)]
 16 | pub enum TypeId {
 17 |     /// 16-bit floating-points.
 18 |     Half = 0,
 19 |     /// 16-bit floating-points (7-bit significand).
 20 |     BFloat,
 21 |     /// 32-bit floating-points.
 22 |     Float,
 23 |     /// 64-bit floating-points.
 24 |     Double,
 25 |     /// 80-bit floating-points (x87).
 26 |     X86Fp80,
 27 |     /// 128-bit floating-points (112-bit significand).
 28 |     Fp128,
 29 |     /// 128-bit floating-points (two 64-bits, PowerPC).
 30 |     PpcFp128,
 31 |     /// The void type (a type with no size).
 32 |     Void,
 33 |     /// Labels.
 34 |     Label,
 35 |     /// Metadata.
 36 |     Metadata,
 37 |     /// MMX vectors (64 bits, x86).
 38 |     X86Mmx,
 39 |     /// AMX vectors (8192 bits, x86).
 40 |     X86Amx,
 41 |     /// Tokens.
 42 |     Token,
 43 |     /// Arbitrary bit-width integers.
 44 |     Integer,
 45 |     /// Functions.
 46 |     Function,
 47 |     /// Pointers.
 48 |     Pointer,
 49 |     /// Structures.
 50 |     Struct,
 51 |     /// Arrays.
 52 |     Array,
 53 |     /// Fixed-width SIMD vectors.
 54 |     FixedVector,
 55 |     /// Scalable SIMD vectors.
 56 |     ScalableVector,
 57 | }
 58 | 
 59 | /// A representation of LLVM's types.
 60 | ///
 61 | /// See [`TypeId`](TypeId) for documentation of each variant.
 62 | #[allow(missing_docs)]
 63 | #[derive(Clone, Debug, PartialEq)]
 64 | pub enum Type {
 65 |     Half,
 66 |     BFloat,
 67 |     Float,
 68 |     Double,
 69 |     Metadata,
 70 |     X86Fp80,
 71 |     Fp128,
 72 |     PpcFp128,
 73 |     Void,
 74 |     Label,
 75 |     X86Mmx,
 76 |     X86Amx,
 77 |     Token,
 78 |     Integer(IntegerType),
 79 |     Function(FunctionType),
 80 |     Pointer(PointerType),
 81 |     OpaquePointer(AddressSpace),
 82 |     Struct(StructType),
 83 |     Array(ArrayType),
 84 |     FixedVector(VectorType),
 85 |     ScalableVector(VectorType),
 86 | }
 87 | 
 88 | impl Type {
 89 |     /// Returns whether this type is one of the floating point types.
 90 |     ///
 91 |     /// ```rust
 92 |     /// use llvm_support::Type;
 93 |     ///
 94 |     /// assert!(Type::BFloat.is_floating());
 95 |     /// assert!(Type::Float.is_floating());
 96 |     /// assert!(Type::Double.is_floating());
 97 |     /// assert!(Type::X86Fp80.is_floating());
 98 |     /// assert!(Type::Fp128.is_floating());
 99 |     /// assert!(Type::PpcFp128.is_floating());
100 |     /// assert!(!Type::Metadata.is_floating());
101 |     /// ```
102 |     pub fn is_floating(&self) -> bool {
103 |         matches!(
104 |             self,
105 |             Type::Half
106 |                 | Type::BFloat
107 |                 | Type::Float
108 |                 | Type::Double
109 |                 | Type::X86Fp80
110 |                 | Type::Fp128
111 |                 | Type::PpcFp128
112 |         )
113 |     }
114 | 
115 |     /// Returns whether this type is an integer type.
116 |     pub fn is_integer(&self) -> bool {
117 |         matches!(self, Type::Integer(_))
118 |     }
119 | 
120 |     /// Returns whether this type is a valid "pointee" type, i.e. suitable as the inner type
121 |     /// for a pointer type.
122 |     pub fn is_pointee(&self) -> bool {
123 |         !matches!(
124 |             self,
125 |             Type::Void | Type::Label | Type::Metadata | Type::Token | Type::X86Amx
126 |         )
127 |     }
128 | 
129 |     /// Returns whether this type is a valid array element type, i.e. is suitable as the inner type
130 |     /// for an array type.
131 |     pub fn is_array_element(&self) -> bool {
132 |         !matches!(
133 |             self,
134 |             Type::Void
135 |                 | Type::Label
136 |                 | Type::Metadata
137 |                 | Type::Function(_)
138 |                 | Type::Token
139 |                 | Type::X86Amx
140 |                 | Type::ScalableVector(_)
141 |         )
142 |     }
143 | 
144 |     /// Returns whether this type is a valid structure element type, i.e. is suitable as a field
145 |     /// type within a structure type.
146 |     pub fn is_struct_element(&self) -> bool {
147 |         !matches!(
148 |             self,
149 |             Type::Void | Type::Label | Type::Metadata | Type::Function(_) | Type::Token
150 |         )
151 |     }
152 | 
153 |     /// Returns whether this type is a valid vector element type, i.e. is suitable as the inner
154 |     /// type for a vector type.
155 |     ///
156 |     /// ```rust
157 |     /// use llvm_support::{AddressSpace, Type};
158 |     ///
159 |     /// assert!(Type::Float.is_vector_element());
160 |     /// assert!(Type::new_integer(32).unwrap().is_vector_element());
161 |     /// assert!(
162 |     ///     Type::new_pointer(Type::new_integer(8).unwrap(), AddressSpace::default())
163 |     ///     .unwrap()
164 |     ///     .is_vector_element()
165 |     /// );
166 |     /// assert!(!Type::Metadata.is_vector_element());
167 |     /// ```
168 |     pub fn is_vector_element(&self) -> bool {
169 |         self.is_floating() || matches!(self, Type::Integer(_) | Type::Pointer(_))
170 |     }
171 | 
172 |     /// Returns whether this type is "first class", i.e. is a valid type for an LLVM value.
173 |     fn is_first_class(&self) -> bool {
174 |         !matches!(self, Type::Function(_) | Type::Void)
175 |     }
176 | 
177 |     /// Returns whether this type is a valid argument type, i.e. is suitable as an argument
178 |     /// within a function type.
179 |     ///
180 |     /// ```rust
181 |     /// use llvm_support::Type;
182 |     ///
183 |     /// assert!(Type::Float.is_argument());
184 |     /// assert!(!Type::Void.is_argument());
185 |     /// ```
186 |     pub fn is_argument(&self) -> bool {
187 |         self.is_first_class()
188 |     }
189 | 
190 |     /// Returns whether this type is a valid return type, i.e. is suitable as the return type
191 |     /// within a function type.
192 |     pub fn is_return(&self) -> bool {
193 |         !matches!(self, Type::Function(_) | Type::Label | Type::Metadata)
194 |     }
195 | 
196 |     /// Return the scalar type for this type.
197 |     ///
198 |     /// This is always the identity type for non-vector types, and the element type for vector types.
199 |     pub fn scalar_type(&self) -> &Self {
200 |         match &self {
201 |             Type::ScalableVector(VectorType {
202 |                 num_elements: _,
203 |                 element_type,
204 |                 ..
205 |             }) => element_type,
206 |             Type::FixedVector(VectorType {
207 |                 num_elements: _,
208 |                 element_type,
209 |                 ..
210 |             }) => element_type,
211 |             _ => self,
212 |         }
213 |     }
214 | 
215 |     /// Returns whether this type is a floating-point type or a vector type
216 |     /// of floating points.
217 |     pub fn is_floating_or_floating_vector(&self) -> bool {
218 |         self.scalar_type().is_floating()
219 |     }
220 | 
221 |     /// Returns whether this type is a integer type or a vector type
222 |     /// of integers.
223 |     pub fn is_integer_or_integer_vector(&self) -> bool {
224 |         self.scalar_type().is_integer()
225 |     }
226 | 
227 |     /// Create a new struct type with the given fields.
228 |     pub fn new_struct(
229 |         name: Option<String>,
230 |         fields: Vec<Type>,
231 |         is_packed: bool,
232 |     ) -> Result<Self, StructTypeError> {
233 |         let inner = StructType::new(name, fields, is_packed)?;
234 | 
235 |         Ok(Type::Struct(inner))
236 |     }
237 | 
238 |     /// Create a new integral type from the given bit width.
239 |     pub fn new_integer(bit_width: u32) -> Result<Self, IntegerTypeError> {
240 |         let inner = IntegerType::try_from(bit_width)?;
241 | 
242 |         Ok(Type::Integer(inner))
243 |     }
244 | 
245 |     /// Create a new pointer type from the given pointee type and address space.
246 |     pub fn new_pointer(
247 |         pointee: Type,
248 |         address_space: AddressSpace,
249 |     ) -> Result<Self, PointerTypeError> {
250 |         let inner = PointerType::new(pointee, address_space)?;
251 | 
252 |         Ok(Type::Pointer(inner))
253 |     }
254 | 
255 |     /// Create a new array type of the given size and element type.
256 |     pub fn new_array(num_elements: u64, element_type: Type) -> Result<Self, ArrayTypeError> {
257 |         let inner = ArrayType::new(num_elements, element_type)?;
258 | 
259 |         Ok(Type::Array(inner))
260 |     }
261 | 
262 |     /// Create a new scalable vector type of the given size and element type.
263 |     pub fn new_scalable_vector(
264 |         num_elements: u64,
265 |         element_type: Type,
266 |     ) -> Result<Self, VectorTypeError> {
267 |         let inner = VectorType::new(num_elements, element_type)?;
268 | 
269 |         Ok(Type::ScalableVector(inner))
270 |     }
271 | 
272 |     /// Create a new (fixed) vector type of the given size and element type.
273 |     pub fn new_vector(num_elements: u64, element_type: Type) -> Result<Self, VectorTypeError> {
274 |         let inner = VectorType::new(num_elements, element_type)?;
275 | 
276 |         Ok(Type::FixedVector(inner))
277 |     }
278 | 
279 |     /// Create a new function type of the given return type, parameter types, and variadic disposition.
280 |     pub fn new_function(
281 |         return_type: Type,
282 |         param_types: Vec<Type>,
283 |         is_vararg: bool,
284 |     ) -> Result<Self, FunctionTypeError> {
285 |         let inner = FunctionType::new(return_type, param_types, is_vararg)?;
286 | 
287 |         Ok(Type::Function(inner))
288 |     }
289 | }
290 | 
291 | /// Errors that can occur when constructing an [`StructType`](StructType).
292 | #[derive(Debug, Error)]
293 | pub enum StructTypeError {
294 |     /// The requested element type is invalid.
295 |     #[error("invalid structure element type: {0:?}")]
296 |     BadElement(Type),
297 | }
298 | 
299 | /// Represents a "struct" type.
300 | #[non_exhaustive]
301 | #[derive(Clone, Debug, PartialEq)]
302 | pub struct StructType {
303 |     /// This structure's name, if is has one.
304 |     pub name: Option<String>,
305 |     /// The individual fields of this structure.
306 |     pub fields: Vec<Type>,
307 |     /// Whether the fields of this structure are packed.
308 |     is_packed: bool,
309 | }
310 | 
311 | impl StructType {
312 |     /// Create a new `StructType`.
313 |     pub fn new(
314 |         name: Option<String>,
315 |         fields: Vec<Type>,
316 |         is_packed: bool,
317 |     ) -> Result<Self, StructTypeError> {
318 |         if let Some(bad) = fields.iter().find(|t| !t.is_struct_element()) {
319 |             Err(StructTypeError::BadElement(bad.clone()))
320 |         } else {
321 |             Ok(Self {
322 |                 name,
323 |                 fields,
324 |                 is_packed,
325 |             })
326 |         }
327 |     }
328 | }
329 | 
330 | /// Errors that can occur when constructing an [`IntegerType`](IntegerType).
331 | #[derive(Debug, Error)]
332 | pub enum IntegerTypeError {
333 |     /// The requested bit width for this integer type is invalid.
334 |     #[error(
335 |         "specified bit width is invalid (not in [{}, {}])",
336 |         IntegerType::MIN_INT_BITS,
337 |         IntegerType::MAX_INT_BITS
338 |     )]
339 |     BadWidth,
340 | }
341 | 
342 | /// Represents a fixed-width integral type.
343 | ///
344 | /// The validity of the internal width is correct by construction.
345 | #[non_exhaustive]
346 | #[derive(Clone, Debug, PartialEq, Eq)]
347 | pub struct IntegerType {
348 |     /// The width of this integral type, in bits.
349 |     bit_width: u32,
350 | }
351 | 
352 | impl IntegerType {
353 |     /// The minimum number of bits in a valid integer type.
354 |     pub const MIN_INT_BITS: u32 = 1;
355 |     /// The maximum number of bits in a valid integer type.
356 |     pub const MAX_INT_BITS: u32 = (1 << 24) - 1;
357 | 
358 |     /// Returns the width of this integral type in bits.
359 |     pub fn bit_width(&self) -> u32 {
360 |         self.bit_width
361 |     }
362 | 
363 |     /// Returns the width of this integral type in bytes.
364 |     ///
365 |     /// The byte width of this type may be larger than the number of bits needed.
366 |     pub fn byte_width(&self) -> u32 {
367 |         (self.bit_width + 7) / 8
368 |     }
369 | }
370 | 
371 | impl TryFrom<u32> for IntegerType {
372 |     type Error = IntegerTypeError;
373 | 
374 |     fn try_from(value: u32) -> Result<Self, Self::Error> {
375 |         if (IntegerType::MIN_INT_BITS..=IntegerType::MAX_INT_BITS).contains(&value) {
376 |             Ok(Self { bit_width: value })
377 |         } else {
378 |             Err(Self::Error::BadWidth)
379 |         }
380 |     }
381 | }
382 | 
383 | /// Errors that can occur when constructing an [`PointerType`](PointerType).
384 | #[derive(Debug, Error)]
385 | pub enum PointerTypeError {
386 |     /// The requested pointee type is invalid.
387 |     #[error("invalid pointee type: {0:?}")]
388 |     BadPointee(Type),
389 | }
390 | 
391 | /// Represents a pointer type in some address space.
392 | ///
393 | /// The validity of the internal pointee type is correct by construction.
394 | #[non_exhaustive]
395 | #[derive(Clone, Debug, PartialEq)]
396 | pub struct PointerType {
397 |     pointee: Box<Type>,
398 |     address_space: AddressSpace,
399 | }
400 | 
401 | impl PointerType {
402 |     /// Create a new `PointerType`.
403 |     pub fn new(pointee: Type, address_space: AddressSpace) -> Result<Self, PointerTypeError> {
404 |         if pointee.is_pointee() {
405 |             Ok(Self {
406 |                 pointee: Box::new(pointee),
407 |                 address_space,
408 |             })
409 |         } else {
410 |             Err(PointerTypeError::BadPointee(pointee))
411 |         }
412 |     }
413 | 
414 |     /// Return a reference to the pointed-to type.
415 |     pub fn pointee(&self) -> &Type {
416 |         self.pointee.as_ref()
417 |     }
418 | }
419 | 
420 | /// Errors that can occur when constructing an [`ArrayType`](ArrayType).
421 | #[derive(Debug, Error)]
422 | pub enum ArrayTypeError {
423 |     /// The requested element type is invalid.
424 |     #[error("invalid array element type: {0:?}")]
425 |     BadElement(Type),
426 | }
427 | 
428 | /// Represents an array type.
429 | #[non_exhaustive]
430 | #[derive(Clone, Debug, PartialEq)]
431 | pub struct ArrayType {
432 |     num_elements: u64,
433 |     element_type: Box<Type>,
434 | }
435 | 
436 | impl ArrayType {
437 |     /// Create a new `ArrayType`.
438 |     pub fn new(num_elements: u64, element_type: Type) -> Result<Self, ArrayTypeError> {
439 |         if element_type.is_array_element() {
440 |             Ok(Self {
441 |                 num_elements,
442 |                 element_type: Box::new(element_type),
443 |             })
444 |         } else {
445 |             Err(ArrayTypeError::BadElement(element_type))
446 |         }
447 |     }
448 | 
449 |     /// Return a reference to the inner element type.
450 |     pub fn element(&self) -> &Type {
451 |         self.element_type.as_ref()
452 |     }
453 | }
454 | 
455 | /// Errors that can occur when constructing a [`VectorType`](VectorType).
456 | #[derive(Debug, Error)]
457 | pub enum VectorTypeError {
458 |     /// The requested element type is invalid.
459 |     #[error("invalid vector element type: {0:?}")]
460 |     BadElement(Type),
461 | }
462 | 
463 | /// Represents an vector type.
464 | ///
465 | /// This vector may be fixed or scaled; which one is determined by its surrounding
466 | /// [`Type`](Type) variant.
467 | #[non_exhaustive]
468 | #[derive(Clone, Debug, PartialEq)]
469 | pub struct VectorType {
470 |     num_elements: u64,
471 |     element_type: Box<Type>,
472 | }
473 | 
474 | impl VectorType {
475 |     /// Create a new `VectorType`.
476 |     pub fn new(num_elements: u64, element_type: Type) -> Result<Self, VectorTypeError> {
477 |         if element_type.is_vector_element() {
478 |             Ok(Self {
479 |                 num_elements,
480 |                 element_type: Box::new(element_type),
481 |             })
482 |         } else {
483 |             Err(VectorTypeError::BadElement(element_type))
484 |         }
485 |     }
486 | 
487 |     /// Return a reference to the inner element type.
488 |     pub fn element(&self) -> &Type {
489 |         self.element_type.as_ref()
490 |     }
491 | }
492 | 
493 | /// Errors that can occur when constructing a [`FunctionType`](FunctionType).
494 | #[derive(Debug, Error)]
495 | pub enum FunctionTypeError {
496 |     /// The requested return type is invalid.
497 |     #[error("invalid function return type: {0:?}")]
498 |     BadReturn(Type),
499 |     /// The requested parameter type is invalid.
500 |     #[error("invalid function parameter type: {0:?}")]
501 |     BadParameter(Type),
502 | }
503 | 
504 | /// Represents an function type.
505 | #[non_exhaustive]
506 | #[derive(Clone, Debug, PartialEq)]
507 | pub struct FunctionType {
508 |     return_type: Box<Type>,
509 |     param_types: Vec<Type>,
510 |     is_vararg: bool,
511 | }
512 | 
513 | impl FunctionType {
514 |     /// Create a new `FunctionType`.
515 |     pub fn new(
516 |         return_type: Type,
517 |         param_types: Vec<Type>,
518 |         is_vararg: bool,
519 |     ) -> Result<Self, FunctionTypeError> {
520 |         if !return_type.is_return() {
521 |             Err(FunctionTypeError::BadReturn(return_type))
522 |         } else if let Some(bad) = param_types.iter().find(|ty| !ty.is_argument()) {
523 |             Err(FunctionTypeError::BadParameter(bad.clone()))
524 |         } else {
525 |             Ok(FunctionType {
526 |                 return_type: Box::new(return_type),
527 |                 param_types,
528 |                 is_vararg,
529 |             })
530 |         }
531 |     }
532 | }
533 | 
534 | #[cfg(test)]
535 | mod tests {
536 |     use super::*;
537 | 
538 |     #[test]
539 |     fn test_integer_type() {
540 |         {
541 |             // Error cases.
542 |             assert!(IntegerType::try_from(0).is_err());
543 |             assert!(IntegerType::try_from(IntegerType::MAX_INT_BITS + 1).is_err());
544 |         }
545 | 
546 |         {
547 |             // Normal cases.
548 |             let ty = IntegerType::try_from(IntegerType::MIN_INT_BITS).unwrap();
549 |             assert_eq!(ty.bit_width(), 1);
550 |             assert_eq!(ty.byte_width(), 1);
551 | 
552 |             let ty = IntegerType::try_from(IntegerType::MAX_INT_BITS).unwrap();
553 |             assert_eq!(ty.bit_width(), IntegerType::MAX_INT_BITS);
554 |             assert_eq!(ty.byte_width(), 2097152);
555 | 
556 |             let ty = IntegerType::try_from(31).unwrap();
557 |             assert_eq!(ty.bit_width(), 31);
558 |             assert_eq!(ty.byte_width(), 4);
559 | 
560 |             let ty = IntegerType::try_from(32).unwrap();
561 |             assert_eq!(ty.bit_width(), 32);
562 |             assert_eq!(ty.byte_width(), 4);
563 | 
564 |             for i in 1..=8 {
565 |                 let ty = IntegerType::try_from(i).unwrap();
566 |                 assert_eq!(ty.bit_width(), i);
567 |                 assert_eq!(ty.byte_width(), 1);
568 |             }
569 |         }
570 |     }
571 | 
572 |     #[test]
573 |     fn test_pointer_type() {
574 |         {
575 |             // Error cases.
576 |             assert!(PointerType::new(Type::Void, AddressSpace::default()).is_err());
577 |             assert!(PointerType::new(Type::Label, AddressSpace::default()).is_err());
578 |             assert!(PointerType::new(Type::Metadata, AddressSpace::default()).is_err());
579 |             assert!(PointerType::new(Type::Token, AddressSpace::default()).is_err());
580 |             assert!(PointerType::new(Type::X86Amx, AddressSpace::default()).is_err());
581 |         }
582 | 
583 |         {
584 |             // Normal cases.
585 |             let ty = PointerType::new(Type::Double, AddressSpace::default()).unwrap();
586 |             assert_eq!(ty.pointee(), &Type::Double);
587 | 
588 |             let ty =
589 |                 PointerType::new(Type::new_integer(32).unwrap(), AddressSpace::default()).unwrap();
590 |             assert_eq!(ty.pointee(), &Type::new_integer(32).unwrap());
591 |         }
592 |     }
593 | }
594 | 


--------------------------------------------------------------------------------
/release.toml:
--------------------------------------------------------------------------------
1 | pre-release-commit-message = "{{crate_name}}: {{version}}"
2 | dev-version = false
3 | publish = false # handled by GitHub Actions
4 | push = true
5 | 


--------------------------------------------------------------------------------