├── rust-toolchain
├── .gitignore
├── fuzz
    ├── .gitignore
    ├── Cargo.toml
    └── fuzz_targets
    │   └── write_helpers_are_correct.rs
├── src
    ├── testkit.rs
    ├── display
    │   ├── display_sink
    │   │   ├── imp_generic.rs
    │   │   └── imp_x86.rs
    │   └── display_sink.rs
    ├── safer_unchecked.rs
    ├── display.rs
    ├── testkit
    │   └── display.rs
    ├── annotation
    │   └── mod.rs
    ├── color_new.rs
    ├── lib.rs
    ├── reader.rs
    ├── address
    │   └── mod.rs
    └── color.rs
├── LICENSE
├── Makefile
├── tests
    ├── reader.rs
    ├── lib.rs
    └── display.rs
├── Cargo.toml
├── goodfile
├── README.md
├── docs
    └── 0001-AnnotatingDecoder.md
└── CHANGELOG


/rust-toolchain:
--------------------------------------------------------------------------------
1 | 1.71.0
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/fuzz/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | corpus
3 | artifacts
4 | 


--------------------------------------------------------------------------------
/src/testkit.rs:
--------------------------------------------------------------------------------
 1 | //! utilities to validate that implementations of traits in `yaxpeax-arch` uphold requirements
 2 | //! described in this crate.
 3 | //!
 4 | //! currently, this only includes tools to validate correct use of
 5 | //! [`crate::display::DisplaySink`], but may grow in the future.
 6 | 
 7 | #[cfg(feature="alloc")]
 8 | mod display;
 9 | #[cfg(feature="alloc")]
10 | pub use display::{DisplaySinkValidator, DisplaySinkWriteComparator};
11 | 


--------------------------------------------------------------------------------
/fuzz/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "yaxpeax-arch-fuzz"
 3 | version = "0.0.0"
 4 | authors = ["Automatically generated"]
 5 | publish = false
 6 | edition = "2018"
 7 | 
 8 | [package.metadata]
 9 | cargo-fuzz = true
10 | 
11 | [dependencies]
12 | libfuzzer-sys = "0.4"
13 | 
14 | [dependencies.yaxpeax-arch]
15 | path = ".."
16 | 
17 | # Prevent this from interfering with workspaces
18 | [workspace]
19 | members = ["."]
20 | 
21 | [[bin]]
22 | name = "write_helpers_are_correct"
23 | path = "fuzz_targets/write_helpers_are_correct.rs"
24 | test = false
25 | doc = false
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2020 iximeow
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software for any
 4 | purpose with or without fee is hereby granted.
 5 | 
 6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
 7 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 8 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
 9 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
10 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
11 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
12 | PERFORMANCE OF THIS SOFTWARE.
13 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | test: build-smoketest test-std test-no-std test-serde-no-std test-colors-no-std test-color-new-no-std test-alloc-no-std
 2 | 
 3 | build-smoketest:
 4 | 	cargo build
 5 | 	cargo build --no-default-features
 6 | 	cargo build --no-default-features --target wasm32-wasi
 7 | 
 8 | test-std:
 9 | 	cargo test
10 | test-no-std:
11 | 	cargo test --no-default-features
12 | test-serde-no-std:
13 | 	cargo test --no-default-features --features "serde"
14 | test-colors-no-std:
15 | 	cargo test --no-default-features --features "colors"
16 | test-color-new-no-std:
17 | 	cargo test --no-default-features --features "color-new"
18 | test-alloc-no-std:
19 | 	cargo test --no-default-features --features "alloc"
20 | 


--------------------------------------------------------------------------------
/tests/reader.rs:
--------------------------------------------------------------------------------
 1 | use yaxpeax_arch::{Reader, U8Reader, U16le, U32le};
 2 | 
 3 | #[test]
 4 | fn reader_offset_is_words_not_bytes() {
 5 |     fn test_u16<T: Reader<u64, U16le>>(reader: &mut T) {
 6 |         reader.mark();
 7 |         assert_eq!(reader.offset(), 0);
 8 |         reader.next().unwrap();
 9 |         assert_eq!(reader.offset(), 1);
10 |         reader.mark();
11 |         reader.next().unwrap();
12 |         assert_eq!(reader.offset(), 1);
13 |         assert_eq!(reader.total_offset(), 2);
14 |     }
15 |     fn test_u32<T: Reader<u64, U32le>>(reader: &mut T) {
16 |         reader.mark();
17 |         assert_eq!(reader.offset(), 0);
18 |         reader.next().unwrap();
19 |         assert_eq!(reader.offset(), 1);
20 |     }
21 | 
22 |     test_u16(&mut U8Reader::new(&[0x01, 0x02, 0x03, 0x04]));
23 |     test_u32(&mut U8Reader::new(&[0x01, 0x02, 0x03, 0x04]));
24 | }
25 | 


--------------------------------------------------------------------------------
/src/display/display_sink/imp_generic.rs:
--------------------------------------------------------------------------------
 1 | /// append `data` to `buf`, assuming `data` is less than 8 bytes and that `buf` has enough space
 2 | /// remaining to hold all bytes in `data`.
 3 | ///
 4 | /// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
 5 | #[inline(always)]
 6 | pub unsafe fn append_string_lt_8_unchecked(buf: &mut alloc::string::String, data: &str) {
 7 |     buf.push_str(data);
 8 | }
 9 | 
10 | /// append `data` to `buf`, assuming `data` is less than 16 bytes and that `buf` has enough space
11 | /// remaining to hold all bytes in `data`.
12 | ///
13 | /// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
14 | #[inline(always)]
15 | pub unsafe fn append_string_lt_16_unchecked(buf: &mut alloc::string::String, data: &str) {
16 |     buf.push_str(data);
17 | }
18 | 
19 | /// append `data` to `buf`, assuming `data` is less than 32 bytes and that `buf` has enough space
20 | /// remaining to hold all bytes in `data`.
21 | ///
22 | /// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
23 | #[inline(always)]
24 | pub unsafe fn append_string_lt_32_unchecked(buf: &mut alloc::string::String, data: &str) {
25 |     buf.push_str(data);
26 | }
27 | 


--------------------------------------------------------------------------------
/src/safer_unchecked.rs:
--------------------------------------------------------------------------------
 1 | //! tools to help validate correct use of `unchecked` functions.
 2 | //!
 3 | //! these `kinda_unchecked` functions will use equivalent implementations that panic when
 4 | //! invariants are violated when the `debug_assertions` config is present, but use the
 5 | //! corresponding `*_unchecked` otherwise.
 6 | //!
 7 | //! for example, `GetSaferUnchecked` uses a normal index when debug assertions are enabled, but
 8 | //! `.get_unchecked()` otherwise. this means that tests and even fuzzing can be made to exercise
 9 | //! panic-on-error cases as desired.
10 | 
11 | use core::slice::SliceIndex;
12 | 
13 | pub trait GetSaferUnchecked<T> {
14 |     unsafe fn get_kinda_unchecked<I>(&self, index: I) -> &<I as SliceIndex<[T]>>::Output
15 |     where
16 |         I: SliceIndex<[T]>;
17 | }
18 | 
19 | impl<T> GetSaferUnchecked<T> for [T] {
20 |     #[inline(always)]
21 |     unsafe fn get_kinda_unchecked<I>(&self, index: I) -> &<I as SliceIndex<[T]>>::Output
22 |     where
23 |         I: SliceIndex<[T]>,
24 |     {
25 |         if cfg!(debug_assertions) {
26 |             &self[index]
27 |         } else {
28 |             self.get_unchecked(index)
29 |         }
30 |     }
31 | }
32 | 
33 | #[inline(always)]
34 | pub unsafe fn unreachable_kinda_unchecked() -> ! {
35 |     if cfg!(debug_assertions) {
36 |         panic!("UB: Unreachable unchecked was executed")
37 |     } else {
38 |         core::hint::unreachable_unchecked()
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | 
 3 | authors = [ "iximeow <me@iximeow.net>" ]
 4 | description = "fundamental traits to describe an architecture in the yaxpeax project"
 5 | edition = "2021"
 6 | keywords = ["disassembly", "disassembler"]
 7 | license = "0BSD"
 8 | name = "yaxpeax-arch"
 9 | repository = "https://git.iximeow.net/yaxpeax-arch/"
10 | version = "0.3.2"
11 | 
12 | [dependencies]
13 | "num-traits" = { version = "0.2", default-features = false }
14 | "crossterm" = { version = "0.27.0", optional = true }
15 | "serde" = { version = "1.0", optional = true }
16 | "serde_derive" = { version = "1.0", optional = true }
17 | 
18 | [dev-dependencies]
19 | anyhow = "1.0.41"
20 | thiserror = "1.0.26"
21 | 
22 | [profile.release]
23 | lto = true
24 | 
25 | [features]
26 | default = ["std", "alloc", "use-serde", "color-new", "address-parse"]
27 | 
28 | std = ["alloc"]
29 | 
30 | alloc = []
31 | 
32 | # enables the (optional) use of Serde for bounds on
33 | # Arch and Arch::Address
34 | use-serde = ["serde", "serde_derive"]
35 | 
36 | # feature flag for the existing but misfeature'd initial support for output
37 | # coloring.  the module this gates will be removed in 0.4.0, which includes
38 | # removing `trait Colorize`, and requires a major version bump for any
39 | # dependency that moves forward.
40 | colors = ["crossterm"]
41 | 
42 | # feature flag for revised output colorizing support, which will replace the
43 | # existing `colors` feature in 0.4.0.
44 | color-new = []
45 | 
46 | address-parse = []
47 | 


--------------------------------------------------------------------------------
/goodfile:
--------------------------------------------------------------------------------
 1 | Build.dependencies({"git", "make", "rustc", "cargo", "rustup"})
 2 | 
 3 | Step.start("crate")
 4 | Step.push("build")
 5 | Build.run({"cargo", "build"})
 6 | -- and now that some code is conditional on target arch, at least try to build
 7 | -- for other architectures even if we might not be able to run on them.
 8 | Build.run({"rustup", "target", "add", "wasm32-wasi"})
 9 | Build.run({"cargo", "build", "--no-default-features", "--target", "wasm32-wasi"})
10 | 
11 | Step.advance("test")
12 | -- TODO: set `-D warnings` here and below...
13 | Build.run({"cargo", "test"}, {name="test default features"})
14 | 
15 | -- `cargo test` ends up running doc tests. great! but yaxpeax-arch's docs reference items in std only.
16 | -- so for other feature combinations, skip doc tests. do this by passing `--tests` explicitly,
17 | -- which disables the automagic "run everything" settings.
18 | Build.run({"cargo", "test", "--no-default-features", "--tests"}, {name="test no features"})
19 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std"}, {name="test std only"})
20 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "colors"}, {name="test feature combinations"})
21 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde"}, {name="test feature combinations"})
22 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "address-parse"}, {name="test feature combinations"})
23 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "alloc"}, {name="test feature combinations"})
24 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "color-new"}, {name="test feature combinations"})
25 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors"}, {name="test feature combinations"})
26 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde"}, {name="test feature combinations"})
27 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,address-parse"}, {name="test feature combinations"})
28 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,address-parse,alloc"}, {name="test feature combinations"})
29 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde,colors,address-parse"}, {name="test feature combinations"})
30 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde,colors,address-parse,alloc"}, {name="test feature combinations"})
31 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors,address-parse"}, {name="test feature combinations"})
32 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors,address-parse,alloc"}, {name="test feature combinations"})
33 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde,colors"}, {name="test feature combinations"})
34 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde,colors,alloc"}, {name="test feature combinations"})
35 | Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "color-new,alloc"}, {name="test feature combinations"})
36 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/write_helpers_are_correct.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | use libfuzzer_sys::fuzz_target;
 3 | use yaxpeax_arch::display::DisplaySink;
 4 | 
 5 | use std::convert::TryInto;
 6 | 
 7 | fuzz_target!(|data: &[u8]| {
 8 |     let mut buf = String::new();
 9 |     match data.len() {
10 |         1 => {
11 |             let i = data[0];
12 | 
13 |             buf.clear();
14 |             buf.write_u8(i).expect("write succeeds");
15 |             assert_eq!(buf, format!("{:x}", i));
16 | 
17 |             buf.clear();
18 |             buf.write_prefixed_u8(i).expect("write succeeds");
19 |             assert_eq!(buf, format!("0x{:x}", i));
20 | 
21 |             let expected = if (i as i8) < 0 {
22 |                 format!("-0x{:x}", (i as i8).unsigned_abs())
23 |             } else {
24 |                 format!("0x{:x}", i)
25 |             };
26 | 
27 |             buf.clear();
28 |             buf.write_prefixed_i8(i as i8).expect("write succeeds");
29 |             assert_eq!(buf, expected);
30 |         },
31 |         2 => {
32 |             let i: u16 = u16::from_le_bytes(data.try_into().expect("checked the size is right"));
33 | 
34 |             buf.clear();
35 |             buf.write_u16(i).expect("write succeeds");
36 |             assert_eq!(buf, format!("{:x}", i));
37 | 
38 |             buf.clear();
39 |             buf.write_prefixed_u16(i).expect("write succeeds");
40 |             assert_eq!(buf, format!("0x{:x}", i));
41 | 
42 |             let expected = if (i as i16) < 0 {
43 |                 format!("-0x{:x}", (i as i16).unsigned_abs())
44 |             } else {
45 |                 format!("0x{:x}", i)
46 |             };
47 | 
48 |             buf.clear();
49 |             buf.write_prefixed_i16(i as i16).expect("write succeeds");
50 |             assert_eq!(buf, expected);
51 |         }
52 |         4 => {
53 |             let i: u32 = u32::from_le_bytes(data.try_into().expect("checked the size is right"));
54 | 
55 |             buf.clear();
56 |             buf.write_u32(i).expect("write succeeds");
57 |             assert_eq!(buf, format!("{:x}", i));
58 | 
59 |             buf.clear();
60 |             buf.write_prefixed_u32(i).expect("write succeeds");
61 |             assert_eq!(buf, format!("0x{:x}", i));
62 | 
63 |             let expected = if (i as i32) < 0 {
64 |                 format!("-0x{:x}", (i as i32).unsigned_abs())
65 |             } else {
66 |                 format!("0x{:x}", i)
67 |             };
68 | 
69 |             buf.clear();
70 |             buf.write_prefixed_i32(i as i32).expect("write succeeds");
71 |             assert_eq!(buf, expected);
72 |         },
73 |         8 => {
74 |             let i: u64 = u64::from_le_bytes(data.try_into().expect("checked the size is right"));
75 | 
76 |             buf.clear();
77 |             buf.write_u64(i).expect("write succeeds");
78 |             assert_eq!(buf, format!("{:x}", i));
79 | 
80 |             buf.clear();
81 |             buf.write_prefixed_u64(i).expect("write succeeds");
82 |             assert_eq!(buf, format!("0x{:x}", i));
83 | 
84 |             let expected = if (i as i64) < 0 {
85 |                 format!("-0x{:x}", (i as i64).unsigned_abs())
86 |             } else {
87 |                 format!("0x{:x}", i)
88 |             };
89 | 
90 |             buf.clear();
91 |             buf.write_prefixed_i64(i as i64).expect("write succeeds");
92 |             assert_eq!(buf, expected);
93 |         },
94 |         _ => {}
95 |     }
96 | });
97 | 


--------------------------------------------------------------------------------
/tests/lib.rs:
--------------------------------------------------------------------------------
  1 | use yaxpeax_arch::AddressBase;
  2 | 
  3 | mod reader;
  4 | 
  5 | #[test]
  6 | fn test_u16() {
  7 |     for l in 0..100 {
  8 |         for r in 0..=core::u16::MAX {
  9 |             assert_eq!(r.wrapping_offset(l.diff(&r).expect("u16 addresses always have valid diffs")), l);
 10 |         }
 11 |     }
 12 | }
 13 | 
 14 | #[test]
 15 | #[cfg(std)]
 16 | fn generic_error_can_bail() {
 17 |     use yaxpeax_arch::{Arch, Decoder, Reader};
 18 | 
 19 |     #[allow(dead_code)]
 20 |     fn decode<A: Arch, U: Into<impl Reader<A::Address, A::Word>>>(data: U, decoder: &A::Decoder) -> anyhow::Result<()> {
 21 |         let mut reader = data.into();
 22 |         decoder.decode(&mut reader)?;
 23 |         Ok(())
 24 |     }
 25 | }
 26 | #[test]
 27 | #[cfg(std)]
 28 | fn error_can_bail() {
 29 |     use yaxpeax_arch::{Arch, AddressDiff, Decoder, Reader, LengthedInstruction, Instruction, StandardDecodeError, U8Reader};
 30 |     struct TestIsa {}
 31 |     #[derive(Debug, Default)]
 32 |     struct TestInst {}
 33 |     impl Arch for TestIsa {
 34 |         type Word = u8;
 35 |         type Address = u64;
 36 |         type Instruction = TestInst;
 37 |         type Decoder = TestIsaDecoder;
 38 |         type DecodeError = StandardDecodeError;
 39 |         type Operand = ();
 40 |     }
 41 | 
 42 |     impl Instruction for TestInst {
 43 |         fn well_defined(&self) -> bool { true }
 44 |     }
 45 | 
 46 |     impl LengthedInstruction for TestInst {
 47 |         type Unit = AddressDiff<u64>;
 48 |         fn len(&self) -> Self::Unit { AddressDiff::from_const(1) }
 49 |         fn min_size() -> Self::Unit { AddressDiff::from_const(1) }
 50 |     }
 51 | 
 52 |     struct TestIsaDecoder {}
 53 | 
 54 |     impl Default for TestIsaDecoder {
 55 |         fn default() -> Self {
 56 |             TestIsaDecoder {}
 57 |         }
 58 |     }
 59 | 
 60 |     impl Decoder<TestIsa> for TestIsaDecoder {
 61 |         fn decode_into<T: Reader<u64, u8>>(&self, _inst: &mut TestInst, _words: &mut T) -> Result<(), StandardDecodeError> {
 62 | 
 63 |             Err(StandardDecodeError::ExhaustedInput)
 64 |         }
 65 |     }
 66 | 
 67 |     #[derive(Debug, PartialEq, thiserror::Error)]
 68 |     pub enum Error {
 69 |         #[error("decode error")]
 70 |         TestDecode(#[from] StandardDecodeError),
 71 |     }
 72 | 
 73 |     fn exercise_eq() -> Result<(), Error> {
 74 |         let mut reader = U8Reader::new(&[]);
 75 |         TestIsaDecoder::default().decode(&mut reader)?;
 76 |         Ok(())
 77 |     }
 78 | 
 79 |     assert_eq!(exercise_eq(), Err(Error::TestDecode(StandardDecodeError::ExhaustedInput)));
 80 | }
 81 | 
 82 | #[test]
 83 | fn example_arch_impl() {
 84 |     use yaxpeax_arch::{Arch, AddressDiff, Decoder, Reader, LengthedInstruction, Instruction, StandardDecodeError, U8Reader};
 85 |     struct TestIsa {}
 86 |     #[derive(Debug, Default)]
 87 |     struct TestInst {}
 88 |     impl Arch for TestIsa {
 89 |         type Word = u8;
 90 |         type Address = u64;
 91 |         type Instruction = TestInst;
 92 |         type Decoder = TestIsaDecoder;
 93 |         type DecodeError = StandardDecodeError;
 94 |         type Operand = ();
 95 |     }
 96 | 
 97 |     impl Instruction for TestInst {
 98 |         fn well_defined(&self) -> bool { true }
 99 |     }
100 | 
101 |     impl LengthedInstruction for TestInst {
102 |         type Unit = AddressDiff<u64>;
103 |         fn len(&self) -> Self::Unit { AddressDiff::from_const(1) }
104 |         fn min_size() -> Self::Unit { AddressDiff::from_const(1) }
105 |     }
106 | 
107 |     struct TestIsaDecoder {}
108 | 
109 |     impl Default for TestIsaDecoder {
110 |         fn default() -> Self {
111 |             TestIsaDecoder {}
112 |         }
113 |     }
114 | 
115 |     impl Decoder<TestIsa> for TestIsaDecoder {
116 |         fn decode_into<T: Reader<u64, u8>>(&self, _inst: &mut TestInst, _words: &mut T) -> Result<(), StandardDecodeError> {
117 |             Err(StandardDecodeError::ExhaustedInput)
118 |         }
119 |     }
120 | 
121 |     fn exercise_eq() -> Result<(), StandardDecodeError> {
122 |         let mut reader = U8Reader::new(&[]);
123 |         TestIsaDecoder::default().decode(&mut reader)?;
124 |         Ok(())
125 |     }
126 | 
127 |     assert_eq!(exercise_eq(), Err(StandardDecodeError::ExhaustedInput));
128 | }
129 | 


--------------------------------------------------------------------------------
/tests/display.rs:
--------------------------------------------------------------------------------
  1 | 
  2 | // this was something of a misfeature for these formatters..
  3 | #[test]
  4 | #[allow(deprecated)]
  5 | fn formatters_are_not_feature_gated() {
  6 |     use yaxpeax_arch::display::{
  7 |         u8_hex, u16_hex, u32_hex, u64_hex,
  8 |         signed_i8_hex, signed_i16_hex, signed_i32_hex, signed_i64_hex
  9 |     };
 10 |     let _ = u8_hex(10);
 11 |     let _ = u16_hex(10);
 12 |     let _ = u32_hex(10);
 13 |     let _ = u64_hex(10);
 14 |     let _ = signed_i8_hex(10);
 15 |     let _ = signed_i16_hex(10);
 16 |     let _ = signed_i32_hex(10);
 17 |     let _ = signed_i64_hex(10);
 18 | }
 19 | 
 20 | #[cfg(feature="alloc")]
 21 | #[test]
 22 | fn instruction_text_sink_write_char_requires_ascii() {
 23 |     use core::fmt::Write;
 24 | 
 25 |     let mut text = String::with_capacity(512);
 26 |     let mut sink = unsafe {
 27 |         yaxpeax_arch::display::InstructionTextSink::new(&mut text)
 28 |     };
 29 |     let expected = "`1234567890-=+_)(*&^%$#@!~\\][poiuytrewq	|}{POIUYTREWQ';lkjhgfdsa\":LKJHGFDSA/.,mnbvcxz?><MNBVCXZ \r\n";
 30 |     for c in expected.as_bytes().iter() {
 31 |         sink.write_char(*c as char).expect("write works");
 32 |     }
 33 |     assert_eq!(text, expected);
 34 | }
 35 | 
 36 | #[cfg(feature="alloc")]
 37 | #[test]
 38 | #[should_panic]
 39 | fn instruction_text_sink_write_char_rejects_not_ascii() {
 40 |     use core::fmt::Write;
 41 | 
 42 |     let mut text = String::with_capacity(512);
 43 |     let mut sink = unsafe {
 44 |         yaxpeax_arch::display::InstructionTextSink::new(&mut text)
 45 |     };
 46 |     sink.write_char('\u{80}').expect("write works");
 47 | }
 48 | 
 49 | #[cfg(feature="alloc")]
 50 | #[test]
 51 | fn display_sink_write_hex_helpers() {
 52 |     use yaxpeax_arch::display::{DisplaySink};
 53 | 
 54 |     // for u8/i8/u16/i16 we can exhaustively test. we'll leave the rest for fuzzers.
 55 |     let mut buf = String::new();
 56 |     for i in 0..=u8::MAX {
 57 |         buf.clear();
 58 |         buf.write_u8(i).expect("write succeeds");
 59 |         assert_eq!(buf, format!("{:x}", i));
 60 | 
 61 |         buf.clear();
 62 |         buf.write_prefixed_u8(i).expect("write succeeds");
 63 |         assert_eq!(buf, format!("0x{:x}", i));
 64 | 
 65 |         let expected = if (i as i8) < 0 {
 66 |             format!("-0x{:x}", (i as i8).unsigned_abs())
 67 |         } else {
 68 |             format!("0x{:x}", i)
 69 |         };
 70 | 
 71 |         buf.clear();
 72 |         buf.write_prefixed_i8(i as i8).expect("write succeeds");
 73 |         assert_eq!(buf, expected);
 74 |     }
 75 | 
 76 |     for i in 0..=u16::MAX {
 77 |         buf.clear();
 78 |         buf.write_u16(i).expect("write succeeds");
 79 |         assert_eq!(buf, format!("{:x}", i));
 80 | 
 81 |         buf.clear();
 82 |         buf.write_prefixed_u16(i).expect("write succeeds");
 83 |         assert_eq!(buf, format!("0x{:x}", i));
 84 | 
 85 |         let expected = if (i as i16) < 0 {
 86 |             format!("-0x{:x}", (i as i16).unsigned_abs())
 87 |         } else {
 88 |             format!("0x{:x}", i)
 89 |         };
 90 | 
 91 |         buf.clear();
 92 |         buf.write_prefixed_i16(i as i16).expect("write succeeds");
 93 |         assert_eq!(buf, expected);
 94 |     }
 95 | }
 96 | 
 97 | #[cfg(feature="alloc")]
 98 | #[test]
 99 | fn sinks_are_equivalent() {
100 |     use yaxpeax_arch::display::{DisplaySink, FmtSink};
101 |     use yaxpeax_arch::testkit::DisplaySinkWriteComparator;
102 | 
103 |     let mut bare = String::new();
104 |     let mut through_sink = String::new();
105 |     for i in 0..u16::MAX {
106 |         bare.clear();
107 |         through_sink.clear();
108 |         let mut out = FmtSink::new(&mut through_sink);
109 |         let mut comparator = DisplaySinkWriteComparator::new(
110 |             &mut out,
111 |             |sink| { sink.inner_ref().as_str() },
112 |             &mut bare,
113 |             |sink| { sink.as_str() },
114 |         );
115 |         comparator.write_u16(i).expect("write succeeds");
116 |         comparator.write_prefixed_u16(i).expect("write succeeds");
117 |         comparator.write_prefixed_i16(i as i16).expect("write succeeds");
118 |     }
119 | }
120 | 
121 | #[cfg(all(feature="alloc", feature="color-new"))]
122 | #[test]
123 | fn ansi_sink_works() {
124 |     use yaxpeax_arch::color_new::ansi::AnsiDisplaySink;
125 |     use yaxpeax_arch::display::DisplaySink;
126 | 
127 |     let mut buf = String::new();
128 | 
129 |     let mut ansi_sink = AnsiDisplaySink::new(&mut buf, yaxpeax_arch::color_new::DefaultColors);
130 | 
131 |     ansi_sink.span_start_immediate();
132 |     ansi_sink.write_prefixed_u8(0x80).expect("write succeeds");
133 |     ansi_sink.span_end_immediate();
134 |     ansi_sink.write_fixed_size("(").expect("write succeeds");
135 |     ansi_sink.span_start_register();
136 |     ansi_sink.write_fixed_size("rbp").expect("write succeeds");
137 |     ansi_sink.span_end_register();
138 |     ansi_sink.write_fixed_size(")").expect("write succeeds");
139 | 
140 |     drop(ansi_sink);
141 | 
142 |     assert_eq!(buf, "\x1b[37m0x80\x1b[39m(\x1b[38;5;6mrbp\x1b[39m)");
143 | }
144 | 


--------------------------------------------------------------------------------
/src/display.rs:
--------------------------------------------------------------------------------
  1 | // allow use of deprecated items in this module since some functions using `SignedHexDisplay` still
  2 | // exist here
  3 | #![allow(deprecated)]
  4 | 
  5 | use crate::YaxColors;
  6 | 
  7 | use core::fmt;
  8 | use core::num::Wrapping;
  9 | use core::ops::Neg;
 10 | 
 11 | mod display_sink;
 12 | 
 13 | pub use display_sink::{DisplaySink, FmtSink};
 14 | #[cfg(feature = "alloc")]
 15 | pub use display_sink::InstructionTextSink;
 16 | 
 17 | /// translate a byte in range `[0, 15]` to a lowercase base-16 digit.
 18 | ///
 19 | /// if `c` is in range, the output is always valid as the sole byte in a utf-8 string. if `c` is out
 20 | /// of range, the returned character might not be a valid single-byte utf-8 codepoint.
 21 | #[cfg(feature = "alloc")] // this function is of course not directly related to alloc, but it's only needed by impls that themselves are only present with alloc.
 22 | fn u8_to_hex(c: u8) -> u8 {
 23 |     // this conditional branch is faster than a lookup for... most architectures (especially x86
 24 |     // with cmov)
 25 |     if c < 10 {
 26 |         b'0' + c
 27 |     } else {
 28 |         b'a' + c - 10
 29 |     }
 30 | }
 31 | 
 32 | #[deprecated(since="0.3.0", note="format_number_i32 does not optimize as expected and will be removed in the future. see DisplaySink instead.")]
 33 | pub enum NumberStyleHint {
 34 |     Signed,
 35 |     HexSigned,
 36 |     SignedWithSign,
 37 |     HexSignedWithSign,
 38 |     SignedWithSignSplit,
 39 |     HexSignedWithSignSplit,
 40 |     Unsigned,
 41 |     HexUnsigned,
 42 |     UnsignedWithSign,
 43 |     HexUnsignedWithSign
 44 | }
 45 | 
 46 | #[deprecated(since="0.3.0", note="format_number_i32 is both slow and incorrect: YaxColors may not result in correct styling when writing anywhere other than a terminal, and both stylin and formatting does not inline as well as initially expected. see DisplaySink instead.")]
 47 | pub fn format_number_i32<W: fmt::Write, Y: YaxColors>(_colors: &Y, f: &mut W, i: i32, hint: NumberStyleHint) -> fmt::Result {
 48 |     match hint {
 49 |         NumberStyleHint::Signed => {
 50 |             write!(f, "{}", (i))
 51 |         },
 52 |         NumberStyleHint::HexSigned => {
 53 |             write!(f, "{}", signed_i32_hex(i))
 54 |         },
 55 |         NumberStyleHint::Unsigned => {
 56 |             write!(f, "{}", i as u32)
 57 |         },
 58 |         NumberStyleHint::HexUnsigned => {
 59 |             write!(f, "{}", u32_hex(i as u32))
 60 |         },
 61 |         NumberStyleHint::SignedWithSignSplit => {
 62 |             if i == core::i32::MIN {
 63 |                 write!(f, "- {}", "2147483647")
 64 |             } else if i < 0 {
 65 |                 write!(f, "- {}", -Wrapping(i))
 66 |             } else {
 67 |                 write!(f, "+ {}", i)
 68 |             }
 69 |         }
 70 |         NumberStyleHint::HexSignedWithSignSplit => {
 71 |             if i == core::i32::MIN {
 72 |                 write!(f, "- {}", ("0x7fffffff"))
 73 |             } else if i < 0 {
 74 |                 write!(f, "- {}", u32_hex((-Wrapping(i)).0 as u32))
 75 |             } else {
 76 |                 write!(f, "+ {}", u32_hex(i as u32))
 77 |             }
 78 |         },
 79 |         NumberStyleHint::HexSignedWithSign => {
 80 |             write!(f, "{}", signed_i32_hex(i))
 81 |         },
 82 |         NumberStyleHint::SignedWithSign => {
 83 |             write!(f, "{:+}", i)
 84 |         }
 85 |         NumberStyleHint::HexUnsignedWithSign => {
 86 |             write!(f, "{:+#x}", i as u32)
 87 |         },
 88 |         NumberStyleHint::UnsignedWithSign => {
 89 |             write!(f, "{:+}", i as u32)
 90 |         }
 91 |     }
 92 | }
 93 | 
 94 | #[deprecated(since="0.3.0", note="SignedHexDisplay does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
 95 | pub struct SignedHexDisplay<T: core::fmt::LowerHex + Neg> {
 96 |     value: T,
 97 |     negative: bool
 98 | }
 99 | 
100 | impl<T: fmt::LowerHex + Neg + Copy> fmt::Display for SignedHexDisplay<T> where Wrapping<T>: Neg, <Wrapping<T> as Neg>::Output: fmt::LowerHex {
101 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
102 |         if self.negative {
103 |             write!(f, "-{:#x}", -Wrapping(self.value))
104 |         } else {
105 |             write!(f, "{:#x}", self.value)
106 |         }
107 |     }
108 | }
109 | 
110 | #[deprecated(since="0.3.0", note="u8_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
111 | pub fn u8_hex(value: u8) -> SignedHexDisplay<i8> {
112 |     SignedHexDisplay {
113 |         value: value as i8,
114 |         negative: false,
115 |     }
116 | }
117 | 
118 | #[deprecated(since="0.3.0", note="signed_i8_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
119 | pub fn signed_i8_hex(imm: i8) -> SignedHexDisplay<i8> {
120 |     SignedHexDisplay {
121 |         value: imm,
122 |         negative: imm < 0,
123 |     }
124 | }
125 | 
126 | #[deprecated(since="0.3.0", note="u16_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
127 | pub fn u16_hex(value: u16) -> SignedHexDisplay<i16> {
128 |     SignedHexDisplay {
129 |         value: value as i16,
130 |         negative: false,
131 |     }
132 | }
133 | 
134 | #[deprecated(since="0.3.0", note="signed_i16_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
135 | pub fn signed_i16_hex(imm: i16) -> SignedHexDisplay<i16> {
136 |     SignedHexDisplay {
137 |         value: imm,
138 |         negative: imm < 0,
139 |     }
140 | }
141 | 
142 | #[deprecated(since="0.3.0", note="u32_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
143 | pub fn u32_hex(value: u32) -> SignedHexDisplay<i32> {
144 |     SignedHexDisplay {
145 |         value: value as i32,
146 |         negative: false,
147 |     }
148 | }
149 | 
150 | #[deprecated(since="0.3.0", note="signed_i32_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
151 | pub fn signed_i32_hex(imm: i32) -> SignedHexDisplay<i32> {
152 |     SignedHexDisplay {
153 |         value: imm,
154 |         negative: imm < 0,
155 |     }
156 | }
157 | 
158 | #[deprecated(since="0.3.0", note="u64_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
159 | pub fn u64_hex(value: u64) -> SignedHexDisplay<i64> {
160 |     SignedHexDisplay {
161 |         value: value as i64,
162 |         negative: false,
163 |     }
164 | }
165 | 
166 | #[deprecated(since="0.3.0", note="signed_i64_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
167 | pub fn signed_i64_hex(imm: i64) -> SignedHexDisplay<i64> {
168 |     SignedHexDisplay {
169 |         value: imm,
170 |         negative: imm < 0,
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/src/testkit/display.rs:
--------------------------------------------------------------------------------
  1 | //! tools to test the correctness of `yaxpeax-arch` trait implementations.
  2 | 
  3 | use core::fmt;
  4 | use core::fmt::Write;
  5 | 
  6 | use crate::display::DisplaySink;
  7 | 
  8 | /// `DisplaySinkValidator` is a `DisplaySink` that panics if invariants required of
  9 | /// `DisplaySink`-writing functions are not upheld.
 10 | ///
 11 | /// there are two categories of invariants that `DisplaySinkValidator` validates.
 12 | ///
 13 | /// first, this panics if spans are not `span_end_*`-ed in first-in-last-out order with
 14 | /// corresponding `span_start_*. second, this panics if `write_lt_*` functions are ever provided
 15 | /// inputs longer than the corresponding maximum length.
 16 | ///
 17 | /// functions that write to a `DisplaySink` are strongly encouraged to come with fuzzing that for
 18 | /// all inputs `DisplaySinkValidator` does not panic.
 19 | pub struct DisplaySinkValidator {
 20 |     spans: alloc::vec::Vec<&'static str>,
 21 | }
 22 | 
 23 | impl DisplaySinkValidator {
 24 |     pub fn new() -> Self {
 25 |         Self { spans: alloc::vec::Vec::new() }
 26 |     }
 27 | }
 28 | 
 29 | impl core::ops::Drop for DisplaySinkValidator {
 30 |     fn drop(&mut self) {
 31 |         if self.spans.len() != 0 {
 32 |             panic!("DisplaySinkValidator dropped with open spans");
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | impl fmt::Write for DisplaySinkValidator {
 38 |     fn write_str(&mut self, _s: &str) -> Result<(), fmt::Error> {
 39 |         Ok(())
 40 |     }
 41 |     fn write_char(&mut self, _c: char) -> Result<(), fmt::Error> {
 42 |         Ok(())
 43 |     }
 44 | }
 45 | 
 46 | impl DisplaySink for DisplaySinkValidator {
 47 |     unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> {
 48 |         if s.len() >= 32 {
 49 |             panic!("DisplaySinkValidator::write_lt_32 was given a string longer than the maximum permitted length");
 50 |         }
 51 | 
 52 |         self.write_str(s)
 53 |     }
 54 |     unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> {
 55 |         if s.len() >= 16 {
 56 |             panic!("DisplaySinkValidator::write_lt_16 was given a string longer than the maximum permitted length");
 57 |         }
 58 | 
 59 |         self.write_str(s)
 60 |     }
 61 |     unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> {
 62 |         if s.len() >= 8 {
 63 |             panic!("DisplaySinkValidator::write_lt_8 was given a string longer than the maximum permitted length");
 64 |         }
 65 | 
 66 |         self.write_str(s)
 67 |     }
 68 | 
 69 |     fn span_start_immediate(&mut self) {
 70 |         self.spans.push("immediate");
 71 |     }
 72 | 
 73 |     fn span_end_immediate(&mut self) {
 74 |         let last = self.spans.pop().expect("item to pop");
 75 |         assert_eq!(last, "immediate");
 76 |     }
 77 | 
 78 |     fn span_start_register(&mut self) {
 79 |         self.spans.push("register");
 80 |     }
 81 | 
 82 |     fn span_end_register(&mut self) {
 83 |         let last = self.spans.pop().expect("item to pop");
 84 |         assert_eq!(last, "register");
 85 |     }
 86 | 
 87 |     fn span_start_opcode(&mut self) {
 88 |         self.spans.push("opcode");
 89 |     }
 90 | 
 91 |     fn span_end_opcode(&mut self) {
 92 |         let last = self.spans.pop().expect("item to pop");
 93 |         assert_eq!(last, "opcode");
 94 |     }
 95 | 
 96 |     fn span_start_program_counter(&mut self) {
 97 |         self.spans.push("program counter");
 98 |     }
 99 | 
100 |     fn span_end_program_counter(&mut self) {
101 |         let last = self.spans.pop().expect("item to pop");
102 |         assert_eq!(last, "program counter");
103 |     }
104 | 
105 |     fn span_start_number(&mut self) {
106 |         self.spans.push("number");
107 |     }
108 | 
109 |     fn span_end_number(&mut self) {
110 |         let last = self.spans.pop().expect("item to pop");
111 |         assert_eq!(last, "number");
112 |     }
113 | 
114 |     fn span_start_address(&mut self) {
115 |         self.spans.push("address");
116 |     }
117 | 
118 |     fn span_end_address(&mut self) {
119 |         let last = self.spans.pop().expect("item to pop");
120 |         assert_eq!(last, "address");
121 |     }
122 | 
123 |     fn span_start_function_expr(&mut self) {
124 |         self.spans.push("function expr");
125 |     }
126 | 
127 |     fn span_end_function_expr(&mut self) {
128 |         let last = self.spans.pop().expect("item to pop");
129 |         assert_eq!(last, "function expr");
130 |     }
131 | }
132 | 
133 | /// `DisplaySinkWriteComparator` helps test that two `DisplaySink` implementations which should
134 | /// produce the same output actually do.
135 | ///
136 | /// this is most useful for cases like testing specialized `write_lt_*` functions, which ought to
137 | /// behave the same as if `write_str()` were called instead and so can be used as a very simple
138 | /// oracle.
139 | ///
140 | /// this is somewhat less useful when the sinks are expected to produce unequal text, such as when
141 | /// one sink writes ANSI color sequences and the other does not.
142 | pub struct DisplaySinkWriteComparator<'sinks, T: DisplaySink, U: DisplaySink> {
143 |     sink1: &'sinks mut T,
144 |     sink1_check: fn(&T) -> &str,
145 |     sink2: &'sinks mut U,
146 |     sink2_check: fn(&U) -> &str,
147 | }
148 | 
149 | impl<'sinks, T: DisplaySink, U: DisplaySink> DisplaySinkWriteComparator<'sinks, T, U> {
150 |     pub fn new(
151 |         t: &'sinks mut T, t_check: fn(&T) -> &str,
152 |         u: &'sinks mut U, u_check: fn(&U) -> &str
153 |     ) -> Self {
154 |         Self {
155 |             sink1: t,
156 |             sink1_check: t_check,
157 |             sink2: u,
158 |             sink2_check: u_check,
159 |         }
160 |     }
161 | 
162 |     fn compare_sinks(&self) {
163 |         let sink1_text = (self.sink1_check)(self.sink1);
164 |         let sink2_text = (self.sink2_check)(self.sink2);
165 | 
166 |         if sink1_text != sink2_text {
167 |             panic!("sinks produced different output: {} != {}", sink1_text, sink2_text);
168 |         }
169 |     }
170 | }
171 | 
172 | impl<'sinks, T: DisplaySink, U: DisplaySink> DisplaySink for DisplaySinkWriteComparator<'sinks, T, U> {
173 |     fn write_u8(&mut self, v: u8) -> Result<(), fmt::Error> {
174 |         self.sink1.write_u8(v).expect("write to sink1 succeeds");
175 |         self.sink2.write_u8(v).expect("write to sink2 succeeds");
176 |         self.compare_sinks();
177 |         Ok(())
178 |     }
179 | }
180 | 
181 | impl<'sinks, T: DisplaySink, U: DisplaySink> fmt::Write for DisplaySinkWriteComparator<'sinks, T, U> {
182 |     fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> {
183 |         self.sink1.write_str(s).expect("write to sink1 succeeds");
184 |         self.sink2.write_str(s).expect("write to sink2 succeeds");
185 |         Ok(())
186 |     }
187 |     fn write_char(&mut self, c: char) -> Result<(), fmt::Error> {
188 |         self.sink1.write_char(c).expect("write to sink1 succeeds");
189 |         self.sink2.write_char(c).expect("write to sink2 succeeds");
190 |         Ok(())
191 |     }
192 | }
193 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## yaxpeax-arch
 2 | 
 3 | [![crate](https://img.shields.io/crates/v/yaxpeax-arch.svg?logo=rust)](https://crates.io/crates/yaxpeax-arch)
 4 | [![documentation](https://docs.rs/yaxpeax-arch/badge.svg)](https://docs.rs/yaxpeax-arch)
 5 | 
 6 | shared traits for architecture definitions, instruction decoders, and related interfaces for instruction decoders from the yaxpeax project.
 7 | 
 8 | typically this crate is only interesting if you're writing code to operate on multiple architectures that all implement `yaxpeax-arch` traits. for example, [yaxpeax-dis](https://crates.io/crates/yaxpeax-dis) implements disassembly and display logic generic over the traits defined here, so adding a new decoder is usually only a one or two line addition.
 9 | 
10 | `yaxpeax-arch` has several crate features, which implementers are encouraged to also support:
11 | * `std`: opt-in for `std`-specific support - in this crate, `std` enables a [`std::error::Error`](https://doc.rust-lang.org/std/error/trait.Error.html) requirement on `DecodeError`, allowing users to `?`-unwrap decode results.
12 | * `color_new`: enables traits and structs to stylize formatted instructions, including ANSI colorization.
13 | * ~`colors`~: DEPRECATED. enables (optional) [`crossterm`](https://docs.rs/crossterm/latest/crossterm/)-based ANSI colorization. default coloring rules are defined by [`ColorSettings`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/struct.ColorSettings.html), when enabled.
14 | * `address-parse`: enable a requirement that `yaxpeax_arch::Address` be parsable from `&str`. this is useful for use cases that, for example, read addresses from humans.
15 | * `use-serde`: enable [`serde`](https://docs.rs/serde/latest/serde/) serialization and deserialization bounds for types like `Address`.
16 | 
17 | with all features disabled, `yaxpeax-arch`'s only direct dependency is `num-traits`, and is suitable for `#![no_std]` usage.
18 | 
19 | ### design
20 | 
21 | `yaxpeax-arch` has backwards-incompatible changes from time to time, but there's not much to make incompatible. the main benefit of this crate is the [`Arch`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/trait.Arch.html) trait, for other libraries to build architecture-agnostic functionality.
22 | 
23 | nontrivial additions to `yaxpeax-arch` should include some discussion summarized by an addition to the crate [`docs/`](https://github.com/iximeow/yaxpeax-arch/tree/no-gods-no-/docs). you may ask, "where does discussion happen?", and the answer currently is in my (iximeow's) head, or various discord/irc/discord/email conversations. if there's need in the future, `yaxpeax` may develop a more consistent process.
24 | 
25 | `yaxpeax-arch` intends to support ad-hoc development of architecture support. maintainers of various architectures' crates may not want to implement all available interfaces to a complete level of detail, and must not be required to. incomplete implementations may be an issue for downstream users, but library quality is mediated by human conversation, not `yaxpeax-arch` interfaces. extensions to these fundamental definitions should be considerate of partial and incomplete implementations.
26 | 
27 | ### implementations
28 | 
29 | there are numerous architectures for which decoders are implemented, at varying levels of completion. now and in the future, they will be enumerated here:
30 | 
31 | | symbol | meaning |
32 | | ------ | ------- |
33 | | 🥳 | complete, reliable |
34 | | ⚠️| "complete", likely has gaps |
35 | | 🚧 | incomplete |
36 | | ❓ | unimplemented |
37 | 
38 | 
39 | | architecture | library | decode | tests | benchmarks | note |
40 | | ------------ | ------- | ------ | ----- | ---------- | ---- |
41 | | `x86_64` | [yaxpeax-x86](https://www.github.com/iximeow/yaxpeax-x86) | 🥳 | 🥳 | 🥳 | |
42 | | `x86:32` | [yaxpeax-x86](https://www.github.com/iximeow/yaxpeax-x86) | 🥳 | 🥳 | ❓ | sse and sse2 support cannot be disabled |
43 | | `x86:16` | [yaxpeax-x86](https://www.github.com/iximeow/yaxpeax-x86) | 🥳 | 🥳 | ❓ | instructions above the 8086 or 286 cannot be disabled |
44 | | `ia64` | [yaxpeax-ia64](https://www.github.com/iximeow/yaxpeax-ia64) | 🥳 | ⚠️ | ❓ | lack of a good oracle has complicated testing |
45 | | `armv7` | [yaxpeax-arm](https://www.github.com/iximeow/yaxpeax-arm) | 🚧 | 🚧 | ❓ | NEON is not yet supported |
46 | | `armv8` | [yaxpeax-arm](https://www.github.com/iximeow/yaxpeax-arm) | 🚧 | 🚧 | ❓ | a32 decoding is not yet supported, NEON is not supported |
47 | | `m16c` | [yaxpeax-m16c](https://www.github.com/iximeow/yaxpeax-m16c) | ⚠️ | 🚧 | ❓ | |
48 | | `mips` | [yaxpeax-mips](https://www.github.com/iximeow/yaxpeax-mips) | 🚧 | 🚧 | ❓ | |
49 | | `msp430` | [yaxpeax-msp430](https://www.github.com/iximeow/yaxpeax-msp430) | 🚧 | 🚧 | ❓ | |
50 | | `pic17` | [yaxpeax-pic17](https://www.github.com/iximeow/yaxpeax-pic17) | 🚧 | 🚧 | ❓ | |
51 | | `pic18` | [yaxpeax-pic18](https://www.github.com/iximeow/yaxpeax-pic18) | 🚧 | 🚧 | ❓ | |
52 | | `pic24` | [yaxpeax-pic24](https://www.github.com/iximeow/yaxpeax-pic24) | ❓ | ❓ | ❓ | exists, but only decodes `NOP` |
53 | | `sm83` | [yaxpeax-sm83](https://www.github.com/iximeow/yaxpeax-sm83) | 🥳 | 🚧 | ❓ | |
54 | | `avr` | [yaxpeax-avr](https://github.com/The6P4C/yaxpeax-avr) | 🥳 | 🚧 | ❓ | contributed by [@the6p4c](https://twitter.com/The6P4C)! |
55 | | `sh`/`sh2`/`j2`/`sh3`/`sh4` | [yaxpeax-superh](https://git.sr.ht/~nabijaczleweli/yaxpeax-superh) | 🥳 | 🚧 | ❓ | contributed by [наб](https://nabijaczleweli.xyz) |
56 | | `MOS 6502` | [yaxpeax-6502](https://github.com/cr1901/yaxpeax-6502) | ⚠️ | ❓ | ❓ | contributed by [@cr1901](https://www.twitter.com/cr1901) |
57 | | `lc87` | [yaxpeax-lc87](https://www.github.com/iximeow/yaxpeax-lc87) | 🥳 | ⚠️ | ❓ | |
58 | | `rx` | [yaxpeax-rx](https://www.github.com/iximeow/yaxpeax-rx) | 🥳 | ⚠️ | ❓ | |
59 | | `"avnera"` | [yaxpeax-avnera](https://www.github.com/iximeow/yaxpeax-avnera) | ⚠️ | ⚠️ | ❓ | |undocumented architecture in some Avnera (now Skyworks) Bluetooth modules |
60 | 
61 | #### feature support
62 | 
63 | `yaxpeax-arch` defines a few typically-optional features that decoders can also implement, in addition to simple `(bytes) -> instruction` decoding. these are `yaxpeax-arch` traits (or collections thereof) which architectures implement, not crate features.
64 | 
65 | `description_spans`: implementation of [`AnnotatingDecoder`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/trait.AnnotatingDecoder.html), to decode instructions with bit-level details of what incoming bitstreams mean.
66 | 
67 | `contextualize`: implementation of [`ShowContextual`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/trait.ShowContextual.html), to display instructions with user-defined information in place of default instruction data. typically expected to show label names instead of relative branch addresses. **i do not recommend implementing this trait**, it needs significant reconsideration.
68 | 
69 | | architecture | `description_spans` | `contextualize` |
70 | | ------------ | ------------------- | --------------- |
71 | | `x86_64` | 🥳 | ❓ |
72 | | `ia64` | ⚠️ | ❓ |
73 | | `msp430` | 🥳 | ❓ |
74 | 
75 | ### mirrors
76 | 
77 | the canonical copy of `yaxpeax-arch` is at [https://git.iximeow.net/yaxpeax-arch](https://git.iximeow.net/yaxpeax-arch).
78 | 
79 | `yaxpeax-arch` is also mirrored on GitHub at [https://www.github.com/iximeow/yaxpeax-arch](https://www.github.com/iximeow/yaxpeax-arch).
80 | 


--------------------------------------------------------------------------------
/docs/0001-AnnotatingDecoder.md:
--------------------------------------------------------------------------------
 1 | ## `DescriptionSink`
 2 | 
 3 | most architectures' machine code packs interesting meanings into specific bit fields, and one of the more important tasks of the yaxpeax decoders is to unpack these into opcodes, operands, and other instruction data for later use. in the worst case, some architectures - typically interpreted bytecodes - do less bit-packing and simply map bytes to instructions.
 4 | 
 5 | the yaxpeax decoders' primary role is to handle this unpacking into user code-friendly structs. i want decoders to be able to report the meaning of bitfields too, so user code can mark up bit streams.
 6 | 
 7 | implementing this capability should (borderline-"must") not regress performance for decoders that do not use it. as a constraint, this is surprisingly restrictive!
 8 | 
 9 | a. it rules out a parameter to [`Decoder::decode_into`](https://docs.rs/yaxpeax-arch/0.2.5/yaxpeax_arch/trait.Decoder.html#tymethod.decode_into): an ignored or unused parameter can still change how `decode_into` inlines.  
10 | b. it rules out extra state on `Decoder` impls: writing to an unread `Vec` is still extra work at decode time.  
11 | 
12 | decoders other than x86 are less performance-sensitive, so **light** regressions in performance may be tolerable.
13 | 
14 | i would also like to:
15 | 
16 | c. not require decoders implement this to participate in code analysis [`yaxpeax-core`](https://github.com/iximeow/yaxpeax-core/) provides.  
17 | d. re-use existing decode logic -- requiring myself and other decoder authors to write everything twice would be miserable.  
18 | 
19 | the point `c` suggests not adding this capability to existing traits. taken together, these constraints point towards a _new_ trait that _could_ be implemented as an independent copy of decode logic, like:
20 | 
21 | ```rust
22 | trait AnnotatingDecoder<A: Arch + ?Sized> {
23 |     fn decode_with_annotation<
24 |         T: Reader<A::Address, A::Word>,
25 |     >(&mut self, inst: &mut A::Instruction, words: &mut T) -> Result<(), A::DecodeError>;
26 | }
27 | ```
28 | 
29 | but for implementations, it's easiest to tack this onto an existing `Arch`'s `InstDecoder`. point `b` means no new state, so wherever details about a span of bits are recorded, it should be an additional `&mut` parameter. then, if that parameter is an impl of some `Sink` trait, `yaxpeax_arch` can provide a no-op implementation of the `Sink` and let call sites be eliminated for non-annotating decodes.
30 | 
31 | taken together, this ends up adding three traits:
32 | 
33 | ```rust
34 | pub trait DescriptionSink<Descriptor> {
35 |     fn record(&mut self, start: u32, end: u32, description: Descriptor);
36 | }
37 | 
38 | pub trait FieldDescription {
39 |     fn id(&self) -> u32;
40 | }
41 | 
42 | pub trait AnnotatingDecoder<A: Arch + ?Sized> {
43 |     type FieldDescription: FieldDescription + Clone + Display + PartialEq;
44 | 
45 |     fn decode_with_annotation<
46 |         T: Reader<A::Address, A::Word>,
47 |         S: DescriptionSink<Self::FieldDescription>
48 |     >(&self, inst: &mut A::Instruction, words: &mut T, sink: &mut S) -> Result<(), A::DecodeError>;
49 | }
50 | ```
51 | 
52 | where `FieldDescription` lets callers that operate generically over spans do *something* with them. implementations can use `id` to tag descriptions that should be ordered together, regardless of the actual order the decoder reports them in. for some architectures, fields parsed later in decoding may influence the understanding of earlier fields, so reporting spans in `id`-order up front is an unreasonable burden. consider an x86 instruction, `660f6ec0` - the leading `66` is an operand-size override, but only after reading `0f6e` is it known that that prefix is changing the operands from `mm`/`dword` registers to `xmm`/`qword` registers. in fact this is only known _after_ reporting the opcode of `0f6e`, too.
53 | 
54 | `start` and `end` are bit offsets where a `description` applies. `description`s can overlap in part, or in full. exact bit order is known only by the architecture being decoded; is the order `0-7,8-15,16-23,24-31`, `7-0,15-8,23-16,31-24`, or something else? i'm not sure trying to encode that in `yaxpeax-arch` traits is useful right now. `start` and `end` are `u32` because in my professional opinion, `u16` is cursed, `u8` isn't large enough, and `u32` is the next smallest size. `id()` returns a `u32` because i never want to think of `id` space constraints; even if `id` encoded a `major.minor`-style pair of ordering components, the most constrained layout would be `u16.u16` for at most 65536 values in major or minor. that's a big instruction.
55 | 
56 | ### implementation
57 | 
58 | i've added WIP support for span reporting to `msp430`, `ia64`, and `x86` decoders. i extended `yaxpeax-dis` to [make pretty lines](https://twitter.com/iximeow/status/1423930207614889984). more could be said about that; `id`-order is expected to be, roughtly, the order an instruction is decoded. some instructions sets keep the "first" bits as the low-order bits, some others use the higher bits first. so respecting `id`-order necessarily means some instruction sets will have fields "backwards" and make lines extra confusing.
59 | 
60 | decoders probably ought to indicate boundaries for significant parts of decoding, lest large instructions [like itanium](https://twitter.com/iximeow/status/1424092536071618561) be a nebulous mess. maybe `FieldDescription` could have an `is_separator()` to know when an element (and its bit range) indicates the end of part of an instruction?
61 | 
62 | for the most part, things work great. `yaxpeax-x86` had a minor performance regression. tracking it down wasn't too bad: the first one was because `sink` is a fifth argument for a non-inlined function. at this point most ABIs start spilling to memory. so an unused `sink` caused an extra stack write. this was a measurable overhead. the second regression was again pretty simple looking at `disas-bench` builds:
63 | 
64 | ```sh
65 | diff \
66 |   ` # a typical non-formatting build, from cratesio yaxpeax-x86 1.0.4 ` \
67 |   <(objdump -d bench-yaxpeax-no-fmt | grep -o ' .*long_mode.*>:')
68 |   ` # a non-formatting build, from the local patch of yaxpeax-x86 with annotation reported to a no-op sink ` \
69 |   <(objdump -d bench-yaxpeax-no-fmt-no-annotation | grep -o ' .*long_mode.*>:')
70 | ```
71 | 
72 | the entire diff output:
73 | ```diff
74 | >  <_ZN11yaxpeax_x869long_mode8read_sib17hdc339ef7a182098aE>:
75 | ```
76 | 
77 | indeed, [`read_sib`](https://github.com/iximeow/yaxpeax-x86/blob/4371ed02ac30cb56ec4ddbf60c87e85c183d860b/src/long_mode/mod.rs#L5769-L5770) is not written as `inline(always)`, so it's possible this might not get inlined sometimes. since the only difference to `read_sib` is an extra parameter, for which all calls are no-ops that ignore arguments, i'm surprised to see the change, anyway. adding `#[inline(always)]` to `read_sib` returned `yaxpeax-x86` to "same-as-before" decode throughput.
78 | 
79 | in the process, i found a slight optimization for `read_sib` that removed a few extra branches from the function. the scrutiny was good after all.
80 | 
81 | ### conclusion
82 | 
83 | in summary, it works. it doesn't slow down callers that don't need spans of information. decoders can implement it optionally and at their leisure, without being ineligible for analysis-oriented libraries.
84 | 
85 | this is almost certainly going to be in `yaxpeax-arch 0.2.6` with implementations trickling into decoders whenever it seems like fun.
86 | 


--------------------------------------------------------------------------------
/src/annotation/mod.rs:
--------------------------------------------------------------------------------
  1 | //! traits (and convenient impls) for decoders that also produce descriptions of parsed bit fields.
  2 | //!
  3 | //! the design of this API is discussed in [`yaxpeax-arch`
  4 | //! documentation](https://github.com/iximeow/yaxpeax-arch/blob/no-gods-no-/docs/0001-AnnotatingDecoder.md#descriptionsink).
  5 | //!
  6 | //! ## usage
  7 | //!
  8 | //! [`AnnotatingDecoder::decode_with_annotation`] decodes an instruction much like
  9 | //! [`crate::Decoder::decode_into`], but also reports descriptions of bit fields to a provided
 10 | //! [`DescriptionSink`]. [`VecSink`] is likely the `DescriptionSink` of interest to retain fields;
 11 | //! decoders are not required to make any guarantees about the order of descriptions, either by the
 12 | //! description's associated [`FieldDescription::id`], or with respect to the bits a
 13 | //! `FieldDescription` is reported against. fields may be described by multiple `FieldDescription`
 14 | //! with matching `id` and `desc` -- this is to describe data in an instruction where
 15 | //! non-contiguous bits are taken together for a single detail. for these cases, the various
 16 | //! `FieldDescription` must compare equal, and users of `yaxpeax-arch` can rely on this equivalence
 17 | //! for grouping bit ranges.
 18 | //!
 19 | //! in a generic setting, there isn't much to do with a `FieldDescription` other than display it. a
 20 | //! typical use might look something like:
 21 | //! ```
 22 | //! #[cfg(feature="std")]
 23 | //! # {
 24 | //! use core::fmt;
 25 | //!
 26 | //! use yaxpeax_arch::annotation::{AnnotatingDecoder, VecSink};
 27 | //! use yaxpeax_arch::{Arch, Reader, U8Reader};
 28 | //!
 29 | //! fn show_field_descriptions<A: Arch>(decoder: A::Decoder, buf: &[u8])
 30 | //! where
 31 | //!     A::Decoder: AnnotatingDecoder<A>,
 32 | //!     A::Instruction: fmt::Display, for<'data> U8Reader<'data>: Reader<A::Address, A::Word>,
 33 | //! {
 34 | //!     let mut inst = A::Instruction::default();
 35 | //!     let mut reader = U8Reader::new(buf);
 36 | //!     let mut sink: VecSink<<A::Decoder as AnnotatingDecoder<A>>::FieldDescription> = VecSink::new();
 37 | //!
 38 | //!     decoder.decode_with_annotation(&mut inst, &mut reader, &mut sink).unwrap();
 39 | //!
 40 | //!     println!("decoded instruction {}", inst);
 41 | //!     for (start, end, desc) in sink.records.iter() {
 42 | //!         println!("  bits [{}, {}]: {}", start, end, desc);
 43 | //!     }
 44 | //! }
 45 | //! # }
 46 | //! ```
 47 | //!
 48 | //! note that the range `[start, end]` for a reported span is _inclusive_. the `end`-th bit of a
 49 | //! an instruction's bit stream is described by the description.
 50 | //!
 51 | //! ## implementation guidance
 52 | //!
 53 | //! the typical implementation pattern is that an architecture's `Decoder` implements [`crate::Decoder`]
 54 | //! _and_ [`AnnotatingDecoder`], then callers are free to choose which style of decoding they want.
 55 | //! [`NullSink`] has a blanket impl of [`DescriptionSink`] for all possible descriptions, and
 56 | //! discards reported field descriptions. `decode_with_annotation` with annotations reported to a
 57 | //! `NullSink` must be functionally identical to a call to `Decoder::decode_into`.
 58 | //!
 59 | //! the important points:
 60 | //!
 61 | //! * `AnnotatingDecoder` is an **optional** implementation for decoders.
 62 | //! * `FieldDescription` in general is oriented towards human-directed output, but implementations
 63 | //! can be as precise as they want.
 64 | //! * since bit/byte order varies from architecture to architecture, a field's `start` and `end`
 65 | //! are defined with some ordering from the corresponding decoder crate. crates should describe the
 66 | //! bit ordering they select, and where possible, the bit ordering they describe should match
 67 | //! relevant ISA mauals.
 68 | //! * `FieldDescription` that return true for [`FieldDescription::is_separator`] are an exception
 69 | //! to bit span inclusivity: for these descriptions, the bit range should be `[b, b]` where `b` is
 70 | //! the last bit before the boundary being delimited. unlike other descriptions, `is_separator`
 71 | //! descriptions describe the space between bits `b` and `b+1`.
 72 | //! * if a description is to cover multiple bit fields, the reported `FieldDescription` must
 73 | //! be identical on `id` and `desc` for all involved bit fields.
 74 | 
 75 | use crate::{Arch, Reader};
 76 | 
 77 | use core::fmt::Display;
 78 | 
 79 | /// implementers of `DescriptionSink` receive descriptions of an instruction's disassembly process
 80 | /// and relevant offsets in the bitstream being decoded. descriptions are archtecture-specific, and
 81 | /// architectures are expected to be able to turn the bit-level `start` and `width` values into a
 82 | /// meaningful description of bits in the original instruction stream.
 83 | pub trait DescriptionSink<Descriptor> {
 84 |     /// inform this `DescriptionSink` of a `description` that was informed by bits `start` to
 85 |     /// `end` from the start of an instruction's decoding. `start` and `end` are only relative the
 86 |     /// instruction being decoded when this sink `DescriptionSink` provided, so they will have no
 87 |     /// relation to the position in an underlying data stream used for past or future instructions.
 88 |     fn record(&mut self, start: u32, end: u32, description: Descriptor);
 89 | }
 90 | 
 91 | pub struct NullSink;
 92 | 
 93 | impl<T> DescriptionSink<T> for NullSink {
 94 |     fn record(&mut self, _start: u32, _end: u32, _description: T) { }
 95 | }
 96 | 
 97 | #[cfg(feature = "alloc")]
 98 | mod vec_sink {
 99 |     use alloc::vec::Vec;
100 |     use core::fmt::Display;
101 |     use crate::annotation::DescriptionSink;
102 | 
103 |     pub struct VecSink<T: Clone + Display> {
104 |         pub records: Vec<(u32, u32, T)>
105 |     }
106 | 
107 |     impl<T: Clone + Display> VecSink<T> {
108 |         pub fn new() -> Self {
109 |             VecSink { records: Vec::new() }
110 |         }
111 | 
112 |         pub fn into_inner(self) -> Vec<(u32, u32, T)> {
113 |             self.records
114 |         }
115 |     }
116 | 
117 |     impl<T: Clone + Display> DescriptionSink<T> for VecSink<T> {
118 |         fn record(&mut self, start: u32, end: u32, description: T) {
119 |             self.records.push((start, end, description));
120 |         }
121 |     }
122 | }
123 | #[cfg(feature = "alloc")]
124 | pub use vec_sink::VecSink;
125 | 
126 | pub trait FieldDescription {
127 |     fn id(&self) -> u32;
128 |     fn is_separator(&self) -> bool;
129 | }
130 | 
131 | /// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s, with the
132 | /// decoder able to report descriptions of bits or fields in the instruction to a sink implementing
133 | /// [`DescriptionSink`]. the sink may be [`NullSink`] to discard provided data. decoding with a
134 | /// `NullSink` should behave identically to `Decoder::decode_into`. implementers are recommended to
135 | /// implement `Decoder::decode_into` as a call to `AnnotatingDecoder::decode_with_annotation` if
136 | /// implementing both traits.
137 | pub trait AnnotatingDecoder<A: Arch + ?Sized> {
138 |     type FieldDescription: FieldDescription + Clone + Display + PartialEq;
139 | 
140 |     fn decode_with_annotation<
141 |         T: Reader<A::Address, A::Word>,
142 |         S: DescriptionSink<Self::FieldDescription>
143 |     >(&self, inst: &mut A::Instruction, words: &mut T, sink: &mut S) -> Result<(), A::DecodeError>;
144 | }
145 | 


--------------------------------------------------------------------------------
/src/display/display_sink/imp_x86.rs:
--------------------------------------------------------------------------------
  1 | //! `imp_x86` has specialized copies to append short strings to strings. buffer sizing must be
  2 | //! handled by callers, in all cases.
  3 | //!
  4 | //! the structure of all implementations here is, essentially, to take the size of the data to
  5 | //! append and execute a copy for each bit set in that size, from highest to lowest. some bits are
  6 | //! simply never checked if the input is promised to never be that large - if a string to append is
  7 | //! only 0..7 bytes long, it is sufficient to only look at the low three bits to copy all bytes.
  8 | //!
  9 | //! in this way, it is slightly more efficient to right-size which append function is used, if the
 10 | //! maximum size of input strings can be bounded well. if the maximum size of input strings cannot
 11 | //! be bounded, you shouldn't be using these functions.
 12 | 
 13 | /// append `data` to `buf`, assuming `data` is less than 8 bytes and that `buf` has enough space
 14 | /// remaining to hold all bytes in `data`.
 15 | ///
 16 | /// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
 17 | #[inline(always)]
 18 | pub unsafe fn append_string_lt_8_unchecked(buf: &mut alloc::string::String, data: &str) {
 19 |     // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 20 |     // be valid utf8
 21 |     let buf = unsafe { buf.as_mut_vec() };
 22 |     let new_bytes = data.as_bytes();
 23 | 
 24 |     unsafe {
 25 |         let dest = buf.as_mut_ptr().offset(buf.len() as isize);
 26 |         let src = new_bytes.as_ptr();
 27 | 
 28 |         let rem = new_bytes.len() as isize;
 29 | 
 30 |         // set_len early because there is no way to avoid the following asm!() writing that
 31 |         // same number of bytes into buf
 32 |         buf.set_len(buf.len() + new_bytes.len());
 33 | 
 34 |         core::arch::asm!(
 35 |             "8:",
 36 |             "cmp {rem:e}, 4",
 37 |             "jb 9f",
 38 |             "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
 39 |             "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
 40 |             "sub {rem:e}, 4",
 41 |             "jz 11f",
 42 |             "9:",
 43 |             "cmp {rem:e}, 2",
 44 |             "jb 10f",
 45 |             "mov {buf:x}, word ptr [{src} + {rem} - 2]",
 46 |             "mov word ptr [{dest} + {rem} - 2], {buf:x}",
 47 |             "sub {rem:e}, 2",
 48 |             "jz 11f",
 49 |             "10:",
 50 |             "cmp {rem:e}, 1",
 51 |             "jb 11f",
 52 |             "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
 53 |             "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
 54 |             "11:",
 55 |             src = in(reg) src,
 56 |             dest = in(reg) dest,
 57 |             rem = inout(reg) rem => _,
 58 |             buf = out(reg) _,
 59 |             options(nostack),
 60 |         );
 61 |     }
 62 | }
 63 | 
 64 | /// append `data` to `buf`, assuming `data` is less than 16 bytes and that `buf` has enough space
 65 | /// remaining to hold all bytes in `data`.
 66 | ///
 67 | /// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
 68 | #[inline(always)]
 69 | pub unsafe fn append_string_lt_16_unchecked(buf: &mut alloc::string::String, data: &str) {
 70 |     // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 71 |     // be valid utf8
 72 |     let buf = unsafe { buf.as_mut_vec() };
 73 |     let new_bytes = data.as_bytes();
 74 | 
 75 |     unsafe {
 76 |         let dest = buf.as_mut_ptr().offset(buf.len() as isize);
 77 |         let src = new_bytes.as_ptr();
 78 | 
 79 |         let rem = new_bytes.len() as isize;
 80 | 
 81 |         // set_len early because there is no way to avoid the following asm!() writing that
 82 |         // same number of bytes into buf
 83 |         buf.set_len(buf.len() + new_bytes.len());
 84 | 
 85 |         core::arch::asm!(
 86 |             "7:",
 87 |             "cmp {rem:e}, 8",
 88 |             "jb 8f",
 89 |             "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
 90 |             "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
 91 |             "sub {rem:e}, 8",
 92 |             "jz 11f",
 93 |             "8:",
 94 |             "cmp {rem:e}, 4",
 95 |             "jb 9f",
 96 |             "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
 97 |             "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
 98 |             "sub {rem:e}, 4",
 99 |             "jz 11f",
100 |             "9:",
101 |             "cmp {rem:e}, 2",
102 |             "jb 10f",
103 |             "mov {buf:x}, word ptr [{src} + {rem} - 2]",
104 |             "mov word ptr [{dest} + {rem} - 2], {buf:x}",
105 |             "sub {rem:e}, 2",
106 |             "jz 11f",
107 |             "10:",
108 |             "cmp {rem:e}, 1",
109 |             "jb 11f",
110 |             "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
111 |             "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
112 |             "11:",
113 |             src = in(reg) src,
114 |             dest = in(reg) dest,
115 |             rem = inout(reg) rem => _,
116 |             buf = out(reg) _,
117 |             options(nostack),
118 |         );
119 |     }
120 | }
121 | 
122 | /// append `data` to `buf`, assuming `data` is less than 32 bytes and that `buf` has enough space
123 | /// remaining to hold all bytes in `data`.
124 | ///
125 | /// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
126 | #[inline(always)]
127 | pub unsafe fn append_string_lt_32_unchecked(buf: &mut alloc::string::String, data: &str) {
128 |     // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
129 |     // be valid utf8
130 |     let buf = unsafe { buf.as_mut_vec() };
131 |     let new_bytes = data.as_bytes();
132 | 
133 |     unsafe {
134 |         let dest = buf.as_mut_ptr().offset(buf.len() as isize);
135 |         let src = new_bytes.as_ptr();
136 | 
137 |         let rem = new_bytes.len() as isize;
138 | 
139 |         // set_len early because there is no way to avoid the following asm!() writing that
140 |         // same number of bytes into buf
141 |         buf.set_len(buf.len() + new_bytes.len());
142 | 
143 |         core::arch::asm!(
144 |             "6:",
145 |             "cmp {rem:e}, 16",
146 |             "jb 7f",
147 |             "mov {buf:r}, qword ptr [{src} + {rem} - 16]",
148 |             "mov qword ptr [{dest} + {rem} - 16], {buf:r}",
149 |             "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
150 |             "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
151 |             "sub {rem:e}, 16",
152 |             "jz 11f",
153 |             "7:",
154 |             "cmp {rem:e}, 8",
155 |             "jb 8f",
156 |             "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
157 |             "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
158 |             "sub {rem:e}, 8",
159 |             "jz 11f",
160 |             "8:",
161 |             "cmp {rem:e}, 4",
162 |             "jb 9f",
163 |             "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
164 |             "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
165 |             "sub {rem:e}, 4",
166 |             "jz 11f",
167 |             "9:",
168 |             "cmp {rem:e}, 2",
169 |             "jb 10f",
170 |             "mov {buf:x}, word ptr [{src} + {rem} - 2]",
171 |             "mov word ptr [{dest} + {rem} - 2], {buf:x}",
172 |             "sub {rem:e}, 2",
173 |             "jz 11f",
174 |             "10:",
175 |             "cmp {rem:e}, 1",
176 |             "jb 11f",
177 |             "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
178 |             "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
179 |             "11:",
180 |             src = in(reg) src,
181 |             dest = in(reg) dest,
182 |             rem = inout(reg) rem => _,
183 |             buf = out(reg) _,
184 |             options(nostack),
185 |         );
186 |     }
187 | }
188 | 


--------------------------------------------------------------------------------
/CHANGELOG:
--------------------------------------------------------------------------------
  1 | ## TODO
  2 | 
  3 | ~~TODO: Reader::next_n should return the number of items read as Err(ReadError::Incomplete(n)) if the buffer is exhausted~~
  4 | * a reader's `.offset()` should reflect the amount of items that were consumed, if any. if a reader can quickly determine
  5 |   there is not enough input, should it return Incomplete(0) or ExhaustedInput? Incomplete(0) vs ExhaustedInput may still
  6 |   imply that some state was changed (an access mode, for example). this needs more thought.
  7 | TODO: Reader::offset should return an AddressDiff<Address>, not a bare Address
  8 | * quick look seems reasonable enough, should be changed in concert with
  9 |   yaxpeax-core though and that's more than i'm signing up for today
 10 | TODO: impls of `fn one` and `fn zero` so downstream users don't have to import num_traits directly
 11 | * seems nice at first but this means that there are conflicting functions when Zero or One are in scope
 12 |   ... assuming that the idea at the time was to add `fn one` and `fn zero` to `AddressBase`.
 13 | TODO: 0.4.0 or later:
 14 |   * remove `mod colors`, crossterm dependency, related feature flags
 15 | 
 16 | ## 0.3.2
 17 | 
 18 | fix yaxpeax-arch not building for non-x86 targets when alloc is not enabled
 19 | 
 20 | ## 0.3.1
 21 | 
 22 | fix InstructionTextSink::write_char to not panic in debug builds
 23 | 
 24 | ## 0.3.0
 25 | 
 26 | added a new crate feature flag, `alloc`.
 27 |   this flag is for any features that do not require std, but do require
 28 |   containers from `liballoc`. good examples are `alloc::string::String` or
 29 |   `alloc::vec::Vec`.
 30 | 
 31 | added `yaxpeax_arch::display::DisplaySink` after revisiting output colorization.
 32 |   `DisplaySink` is better suited for general markup, rather than being focused
 33 |   specifically on ANSI/console text coloring. `YaxColors` also simply does not
 34 |   style text in some unfortunate circumstances, such as when the console that
 35 |   needs to be styled is only written to after intermediate buffering.
 36 | 
 37 |   `DisplaySink` also includes specializable functions for writing text to an
 38 |   output, and the implementation for `alloc::string::String` takes advantage of
 39 |   this: writing through `impl DisplaySink for String` will often be substantially
 40 |   more performant than writing through `fmt::Write`.
 41 | 
 42 | added `mod color_new`:
 43 |   this includes an alternate vision for `YaxColors` and better fits with the
 44 |   new `DisplaySink` machinery; ANSI-style text markup can be done through the
 45 |   new `yaxpeax_arch::color_new::ansi::AnsiDisplaySink`.
 46 | 
 47 |   this provides more flexibility than i'd initially expected! yours truly will
 48 |   be using this to render instructions with HTML spans (rather than ANSI
 49 |   sequences) to colorize dis.yaxpeax.net.
 50 | 
 51 |   in the future, `mod colored` will be removed, `mod color_new` will be renamed
 52 |   to `mod color`.
 53 | 
 54 | deprecated `mod colored`:
 55 |   generally, colorization of text is a presentation issue; `trait Colorize`
 56 |   mixed formatting of data to text with how that text is presented, but that is
 57 |   at odds with the same text being presented in different ways for which
 58 |   colorization is not generic. for example, rendering an instruction as marked
 59 |   up HTML involves coloring in an entirely different way than rendering an
 60 |   instruction with ANSI sequences for a VT100-like terminal.
 61 | 
 62 | added `yaxpeax_arch::safer_unchecked` to aid in testing use of unchecked methods
 63 |   these were originally added to improve yaxpeax-x86 testing:
 64 |   https://github.com/iximeow/yaxpeax-x86/pull/17, but are being pulled into
 65 |   yaxpeax-arch as they're generally applicable and overall wonderful tools.
 66 |   thank you again 522!
 67 | 
 68 | added `mod testkit`:
 69 |   this module contains tools to validate the correctness of crates implementing
 70 |   `yaxpeax-arch` traits. these initial tools are focused on validating the
 71 |   correctness of functions that write to `DisplaySink`, especially that span
 72 |   management is correct.
 73 | 
 74 |   `yaxpeax-x86`, for example, will imminently have fuzz targets to use these
 75 |   types for its own validation.
 76 | 
 77 | made VecSink's `records` private. instead of extracting records from the struct
 78 |   by accessing this field directly, call `VecSink::into_inner()`.
 79 | 
 80 | made VecSink is now available through the `alloc` feature flag as well as `std`.
 81 | 
 82 | meta: the major omission in this release is an architecture-agnostic way to
 83 | format an instruction into a `DisplaySink`. i haven't been able to figure out
 84 | quite the right shape for that! it is fully expected in the future, and will
 85 | probably end up somehow referenced through `yaxpeax_arch::Arch`.
 86 | 
 87 | ## 0.2.8
 88 | 
 89 | added an impl of `From<ReadError>` for `StandardPartialDecoderError`, matching the existing `StandardDecodeError` impl.
 90 | 
 91 | moved a use of `serde` types to be covered by the relevant cfg flag; using `colors` without `serde` (unlikely) now actually builds.
 92 | 
 93 | fixed up doc comments to build without error.
 94 | 
 95 | (and additional testing permutations to validate cfg flags and doc comments in the future)
 96 | 
 97 | ## 0.2.7
 98 | 
 99 | moved `AnnotatingDecoder` and its associated types to `annotation/`, for module-level documentation about that feature.
100 | 
101 | yanked 0.2.6 because there was not yet a user of it other than myself, and it had this feature in the wrong location in the crate.
102 | 
103 | ## 0.2.6
104 | 
105 | added `AnnotatingDecoder` and associated traits `FieldDescription` and `DescriptionSink` for architectures to report meanings for bit ranges in decoded instructions.
106 | 
107 | added `NullSink`, with an `impl<T> DescriptionSink<T> for NullSink` - `NullSink` can always be used to discard instruction annotations. this is mostly useful for shared annotating and non-annotating decode logic.
108 | 
109 | added a `docs/` directory for `yaxpeax-arch`: trip reports for `yaxpeax-arch` design. if `yaxpeax` eventually grows an RFC process one day, these are the kind of changes that would get RFC'd.
110 | 
111 | added `docs/0001-AnnotatingDecoder.md`, describing motivation and implementation notes of `AnnotatingDecoder`.
112 | 
113 | ## 0.2.5
114 | 
115 | added `yaxpeax-lc87` to the matrix
116 | 
117 | ## 0.2.4
118 | 
119 | fix incorrect `Reader` impls of `offset` and `total_offset` on non-`u8` words
120 | 
121 | ## 0.2.3
122 | 
123 | added `Reader` impls for `U8Reader` on `u16` addresses
124 | 
125 | ## 0.2.2
126 | 
127 | added `ReaderBuilder` trait and impls for `U8Reader` on various address and word types.
128 | 
129 | added documentation for `Reader`, `U8Reader`, and `ReaderBuilder`.
130 | 
131 | avoid an unlikely violation of `core::ptr::offset` safety rules on 32-bit architectures.
132 | 
133 | ## 0.2.1
134 | 
135 | updated architecture matrix
136 | 
137 | ## 0.2.0
138 | 
139 | correct a bug in 0.1.0 that incorrectly bounded `DecodeError` and did not actually require `std::error::Error`. added a test that `std::error::Error` is actually required of `Arch::DecodeError` in non-std builds.
140 | 
141 | ## 0.1.0
142 | 
143 | new trait `Reader` to provide a reader of `Arch`-defined `Word`s. in many cases it is acceptable for `Word` to be `u8`, but `yaxpeax-arch` provides pre-defined words `u8`, `U16le`, `U16be`, `U32le`, `U32be`, `U64le`, and `U64be`.
144 | 
145 | `yaxpeax_arch::U8Reader` is a struct to read from `&[u8]` that implements `Reader` for all predefined words. it is suitable to read larger words if the minimum word size is still one byte.
146 | 
147 | `Decoder` now decodes from a `Reader<A::Address, A::Word>`, to prepare for ISAs where instruction sizes are not multiples of 8 bits.
148 | 
149 | `yaxpeax_arch::DecodeError` now requires a `std::error::Error` impl for `std` builds, to support interop with the Rust `error` ecosystem.
150 | 
151 | committed to `AddressDiff` being convertable to a primitive with `AddressDiff::to_const`
152 | - this addresses the need for hacks to translate an instruction length into a usize
153 | 
154 | ## 0.0.5
155 | 
156 | swap the `termion` dependency for `crossterm`. this is motivated by improved cross-platform support (notably Windows) as well as removing a type parameter from `Colored` and `YaxColors`.
157 | 
158 | ## 0.0.4
159 | 
160 | add `AddressDiff`. `LengthedInstruction::len` now return `AddressDiff`. the length of an instruction is the difference between two addresses, not itself an address.
161 | 
162 | ## 0.0.3
163 | 
164 | `ColorSettings` gets a default impl
165 | 
166 | ## 0.0.2
167 | 
168 | add `AddressDisplay` to provide a usable interface to display `Address` implementors.
169 | 
170 | at the same time, remove `Address::stringy()`. it was a very bad interface, and will not be missed.
171 | 
172 | ## 0.0.1
173 | 
174 | history starts here
175 | 


--------------------------------------------------------------------------------
/src/color_new.rs:
--------------------------------------------------------------------------------
  1 | #[non_exhaustive]
  2 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
  3 | pub enum Color {
  4 |     Black,
  5 |     DarkGrey,
  6 |     Red,
  7 |     DarkRed,
  8 |     Green,
  9 |     DarkGreen,
 10 |     Yellow,
 11 |     DarkYellow,
 12 |     Blue,
 13 |     DarkBlue,
 14 |     Magenta,
 15 |     DarkMagenta,
 16 |     Cyan,
 17 |     DarkCyan,
 18 |     White,
 19 |     Grey,
 20 | }
 21 | 
 22 | pub trait YaxColors {
 23 |     fn arithmetic_op(&self) -> Color;
 24 |     fn stack_op(&self) -> Color;
 25 |     fn nop_op(&self) -> Color;
 26 |     fn stop_op(&self) -> Color;
 27 |     fn control_flow_op(&self) -> Color;
 28 |     fn data_op(&self) -> Color;
 29 |     fn comparison_op(&self) -> Color;
 30 |     fn invalid_op(&self) -> Color;
 31 |     fn platform_op(&self) -> Color;
 32 |     fn misc_op(&self) -> Color;
 33 | 
 34 |     fn register(&self) -> Color;
 35 |     fn program_counter(&self) -> Color;
 36 |     fn number(&self) -> Color;
 37 |     fn zero(&self) -> Color;
 38 |     fn one(&self) -> Color;
 39 |     fn minus_one(&self) -> Color;
 40 |     fn address(&self) -> Color;
 41 |     fn symbol(&self) -> Color;
 42 |     fn function(&self) -> Color;
 43 | }
 44 | 
 45 | /// support for colorizing text with ANSI control sequences.
 46 | ///
 47 | /// the most useful item in this module is [`ansi::AnsiDisplaySink`], which interprets span entry
 48 | /// and exit as points at which ANSI sequences may need to be written into the output it wraps -
 49 | /// that output may be any type implementing [`crate::display::DisplaySink`], including
 50 | /// [`crate::display::FmtSink`] to adapt any implementer of `fmt::Write` such as standard out.
 51 | ///
 52 | /// ## example
 53 | ///
 54 | /// to write colored text to standard out:
 55 | ///
 56 | /// ```
 57 | /// # #[cfg(feature="alloc")]
 58 | /// # {
 59 | /// # extern crate alloc;
 60 | /// # use alloc::string::String;
 61 | /// use yaxpeax_arch::color_new::DefaultColors;
 62 | /// use yaxpeax_arch::color_new::ansi::AnsiDisplaySink;
 63 | /// use yaxpeax_arch::display::FmtSink;
 64 | ///
 65 | /// let mut s = String::new();
 66 | /// let mut s_sink = FmtSink::new(&mut s);
 67 | ///
 68 | /// let mut writer = AnsiDisplaySink::new(&mut s_sink, DefaultColors);
 69 | ///
 70 | /// // this might be a yaxpeax crate's `display_into`, or other library implementation code
 71 | /// mod fake_yaxpeax_crate {
 72 | ///     use yaxpeax_arch::display::DisplaySink;
 73 | ///
 74 | ///     pub fn format_memory_operand<T: DisplaySink>(out: &mut T) -> core::fmt::Result {
 75 | ///         out.span_start_immediate();
 76 | ///         out.write_prefixed_u8(0x80)?;
 77 | ///         out.span_end_immediate();
 78 | ///         out.write_fixed_size("(")?;
 79 | ///         out.span_start_register();
 80 | ///         out.write_fixed_size("rbp")?;
 81 | ///         out.span_end_register();
 82 | ///         out.write_fixed_size(")")?;
 83 | ///         Ok(())
 84 | ///     }
 85 | /// }
 86 | ///
 87 | /// // this might be how a user uses `AnsiDisplaySink`, which will write ANSI-ful text to `s` and
 88 | /// // print it.
 89 | ///
 90 | /// fake_yaxpeax_crate::format_memory_operand(&mut writer).expect("write succeeds");
 91 | ///
 92 | /// println!("{}", s);
 93 | /// # }
 94 | /// ```
 95 | pub mod ansi {
 96 |     use crate::color_new::Color;
 97 | 
 98 |     // color sequences as described by ECMA-48 and, apparently, `man 4 console_codes`
 99 |     /// translate [`yaxpeax_arch::color_new::Color`] to an ANSI control code that changes the
100 |     /// foreground color to match.
101 |     #[allow(dead_code)] // allowing this to be dead code because if colors are enabled and alloc is not, there will not be an AnsiDisplaySink, which is the sole user of this function.
102 |     fn color2ansi(color: Color) -> &'static str {
103 |         // for most of these, in 256 color space the darker color can be picked by the same color
104 |         // index as the brighter form (from the 8 color command set). dark grey is an outlier,
105 |         // where 38;5;0 and 30 both are black. there is no "grey" in the shorter command set to
106 |         // map to. but it turns out that 38;5;m is exactly the darker grey to use.
107 |         match color {
108 |             Color::Black => "\x1b[30m",
109 |             Color::DarkGrey => "\x1b[38;5;8m",
110 |             Color::Red => "\x1b[31m",
111 |             Color::DarkRed => "\x1b[38;5;1m",
112 |             Color::Green => "\x1b[32m",
113 |             Color::DarkGreen => "\x1b[38;5;2m",
114 |             Color::Yellow => "\x1b[33m",
115 |             Color::DarkYellow => "\x1b[38;5;3m",
116 |             Color::Blue => "\x1b[34m",
117 |             Color::DarkBlue => "\x1b[38;5;4m",
118 |             Color::Magenta => "\x1b[35m",
119 |             Color::DarkMagenta => "\x1b[38;5;5m",
120 |             Color::Cyan => "\x1b[36m",
121 |             Color::DarkCyan => "\x1b[38;5;6m",
122 |             Color::White => "\x1b[37m",
123 |             Color::Grey => "\x1b[38;5;7m",
124 |         }
125 |     }
126 | 
127 |     // could reasonably be always present, but only used if feature="alloc"
128 |     #[cfg(feature="alloc")]
129 |     const DEFAULT_FG: &'static str = "\x1b[39m";
130 | 
131 |     #[cfg(feature="alloc")]
132 |     mod ansi_display_sink {
133 |         use crate::color_new::{Color, YaxColors};
134 |         use crate::display::DisplaySink;
135 | 
136 |         /// adapter to insert ANSI color command sequences in formatted text to style printed
137 |         /// instructions.
138 |         ///
139 |         /// this enables similar behavior as the deprecated [`crate::Colorize`] trait,
140 |         /// for outputs that can process ANSI color commands.
141 |         ///
142 |         /// `AnsiDisplaySink` will silently ignore errors from writes to the underlying `T:
143 |         /// DisplaySink`. when writing to a string or other growable buffer, errors are likely
144 |         /// inseparable from `abort()`. when writing to stdout or stderr, write failures likely
145 |         /// mean output is piped to a process which has closed the pipe but are otherwise harmless.
146 |         /// `span_enter_*` and `span_exit_*` don't have error reporting mechanisms in their return
147 |         /// type, so the only available error mechanism would be to also `abort()`.
148 |         ///
149 |         /// if this turns out to be a bad decision, it'll have to be rethought!
150 |         pub struct AnsiDisplaySink<'sink, T: DisplaySink, Y: YaxColors> {
151 |             out: &'sink mut T,
152 |             span_stack: alloc::vec::Vec<Color>,
153 |             colors: Y
154 |         }
155 | 
156 |         impl<'sink, T: DisplaySink, Y: YaxColors> AnsiDisplaySink<'sink, T, Y> {
157 |             pub fn new(out: &'sink mut T, colors: Y) -> Self {
158 |                 Self {
159 |                     out,
160 |                     span_stack: alloc::vec::Vec::new(),
161 |                     colors,
162 |                 }
163 |             }
164 | 
165 |             fn push_color(&mut self, color: Color) {
166 |                 self.span_stack.push(color);
167 |                 let _ = self.out.write_fixed_size(super::color2ansi(color));
168 |             }
169 | 
170 |             fn restore_prev_color(&mut self) {
171 |                 let _ = self.span_stack.pop();
172 |                 if let Some(prev_color) = self.span_stack.last() {
173 |                     let _ = self.out.write_fixed_size(super::color2ansi(*prev_color));
174 |                 } else {
175 |                     let _ = self.out.write_fixed_size(super::DEFAULT_FG);
176 |                 };
177 |             }
178 |         }
179 | 
180 |         impl<'sink, T: DisplaySink, Y: YaxColors> core::fmt::Write for AnsiDisplaySink<'sink, T, Y> {
181 |             fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> {
182 |                 self.out.write_str(s)
183 |             }
184 |             fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> {
185 |                 self.out.write_char(c)
186 |             }
187 |         }
188 | 
189 |         impl<'sink, T: DisplaySink, Y: YaxColors> DisplaySink for AnsiDisplaySink<'sink, T, Y> {
190 |             fn span_start_immediate(&mut self) { self.push_color(self.colors.number()); }
191 |             fn span_end_immediate(&mut self) { self.restore_prev_color() }
192 | 
193 |             fn span_start_register(&mut self) { self.push_color(self.colors.register()); }
194 |             fn span_end_register(&mut self) { self.restore_prev_color() }
195 | 
196 |             // ah.. the right way, currently, to colorize opcodes would be to collect text while in the
197 |             // opcode span, and request some kind of user-provided decoder ring to translate mnemonics
198 |             // into the right color. that's very unfortunate. maybe there should be another span for
199 |             // `opcode_kind(u8)` for impls to report what kind of opcode they'll be emitting..
200 |             fn span_start_opcode(&mut self) { self.push_color(self.colors.misc_op()); }
201 |             fn span_end_opcode(&mut self) { self.restore_prev_color() }
202 | 
203 |             fn span_start_program_counter(&mut self) { self.push_color(self.colors.program_counter()); }
204 |             fn span_end_program_counter(&mut self) { self.restore_prev_color() }
205 | 
206 |             fn span_start_number(&mut self) { self.push_color(self.colors.number()); }
207 |             fn span_end_number(&mut self) { self.restore_prev_color() }
208 | 
209 |             fn span_start_address(&mut self) { self.push_color(self.colors.address()); }
210 |             fn span_end_address(&mut self) { self.restore_prev_color() }
211 | 
212 |             fn span_start_function_expr(&mut self) { self.push_color(self.colors.function()); }
213 |             fn span_end_function_expr(&mut self) { self.restore_prev_color() }
214 |         }
215 |     }
216 |     #[cfg(feature="alloc")]
217 |     pub use ansi_display_sink::AnsiDisplaySink;
218 | }
219 | 
220 | pub struct DefaultColors;
221 | 
222 | impl YaxColors for DefaultColors {
223 |     fn arithmetic_op(&self) -> Color {
224 |         Color::Yellow
225 |     }
226 |     fn stack_op(&self) -> Color {
227 |         Color::DarkMagenta
228 |     }
229 |     fn nop_op(&self) -> Color {
230 |         Color::DarkBlue
231 |     }
232 |     fn stop_op(&self) -> Color {
233 |         Color::Red
234 |     }
235 |     fn control_flow_op(&self) -> Color {
236 |         Color::DarkGreen
237 |     }
238 |     fn data_op(&self) -> Color {
239 |         Color::Magenta
240 |     }
241 |     fn comparison_op(&self) -> Color {
242 |         Color::DarkYellow
243 |     }
244 |     fn invalid_op(&self) -> Color {
245 |         Color::DarkRed
246 |     }
247 |     fn misc_op(&self) -> Color {
248 |         Color::Cyan
249 |     }
250 |     fn platform_op(&self) -> Color {
251 |         Color::DarkCyan
252 |     }
253 | 
254 |     fn register(&self) -> Color {
255 |         Color::DarkCyan
256 |     }
257 |     fn program_counter(&self) -> Color {
258 |         Color::DarkRed
259 |     }
260 |     fn number(&self) -> Color {
261 |         Color::White
262 |     }
263 |     fn zero(&self) -> Color {
264 |         Color::White
265 |     }
266 |     fn one(&self) -> Color {
267 |         Color::White
268 |     }
269 |     fn minus_one(&self) -> Color {
270 |         Color::White
271 |     }
272 |     fn address(&self) -> Color {
273 |         Color::DarkGreen
274 |     }
275 |     fn symbol(&self) -> Color {
276 |         Color::Green
277 |     }
278 |     fn function(&self) -> Color {
279 |         Color::Green
280 |     }
281 | }
282 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![no_std]
  2 | #![doc = include_str!("../README.md")]
  3 | 
  4 | #[cfg(feature = "alloc")]
  5 | extern crate alloc;
  6 | 
  7 | use core::fmt::{self, Debug, Display};
  8 | use core::hash::Hash;
  9 | 
 10 | #[cfg(feature="use-serde")]
 11 | #[macro_use] extern crate serde_derive;
 12 | #[cfg(feature="use-serde")]
 13 | use serde::{Serialize, Deserialize};
 14 | 
 15 | mod address;
 16 | pub use address::{Address, AddressBase, AddressDiff, AddressDiffAmount, AddressDisplay};
 17 | pub use address::{AddressDisplayUsize, AddressDisplayU64, AddressDisplayU32, AddressDisplayU16};
 18 | #[cfg(feature="address-parse")]
 19 | pub use address::AddrParse;
 20 | 
 21 | pub mod annotation;
 22 | 
 23 | #[deprecated(since="0.3.0", note="yaxpeax_arch::color conflates output mechanism and styling, leaving it brittle and overly-restrictive. see `yaxpeax_arch::color_new`, which will replace `color` in a future version.")]
 24 | mod color;
 25 | #[allow(deprecated)] // allow exporting the deprecated items here to not break downstreams even further...
 26 | pub use color::{Colorize, NoColors, YaxColors};
 27 | #[cfg(feature="color-new")]
 28 | pub mod color_new;
 29 | 
 30 | pub mod display;
 31 | 
 32 | mod reader;
 33 | pub use reader::{Reader, ReaderBuilder, ReadError, U8Reader, U16le, U16be, U32le, U32be, U64le, U64be};
 34 | 
 35 | pub mod safer_unchecked;
 36 | 
 37 | pub mod testkit;
 38 | 
 39 | /// the minimum set of errors a `yaxpeax-arch` disassembler may produce.
 40 | ///
 41 | /// it is permissible for an implementer of `DecodeError` to have items that return `false` for
 42 | /// all these functions; decoders are permitted to error in way that `yaxpeax-arch` does not know
 43 | /// about.
 44 | pub trait DecodeError: PartialEq + Display + Debug + Send + Sync + 'static {
 45 |     /// did the decoder fail because it reached the end of input?
 46 |     fn data_exhausted(&self) -> bool;
 47 |     /// did the decoder error because the instruction's opcode is invalid?
 48 |     ///
 49 |     /// this may not be a sensical question for some instruction sets - `bad_opcode` should
 50 |     /// generally indicate an issue with the instruction itself. this is in contrast to one
 51 |     /// specific operand being invalid for the instruction, or some other issue to do with decoding
 52 |     /// data beyond the top-level instruction. the "opcode"/"operand" distinction is often fuzzy
 53 |     /// and left as best-effort for decoder implementers.
 54 |     fn bad_opcode(&self) -> bool;
 55 |     /// did the decoder error because an operand of the instruction to decode is invalid?
 56 |     ///
 57 |     /// similar to [`DecodeError::bad_opcode`], this is a subjective distinction and best-effort on
 58 |     /// the part of implementers.
 59 |     fn bad_operand(&self) -> bool;
 60 |     /// a human-friendly description of this decode error.
 61 |     fn description(&self) -> &'static str;
 62 | }
 63 | 
 64 | /// a minimal enum implementing `DecodeError`. this is intended to be enough for a low effort,
 65 | /// low-fidelity error taxonomy, without boilerplate of a `DecodeError` implementation.
 66 | #[derive(Debug, PartialEq, Eq, Copy, Clone)]
 67 | pub enum StandardDecodeError {
 68 |     ExhaustedInput,
 69 |     InvalidOpcode,
 70 |     InvalidOperand,
 71 | }
 72 | 
 73 | /// a slightly less minimal enum `DecodeError`. similar to `StandardDecodeError`, this is an
 74 | /// anti-boilerplate measure. it additionally provides `IncompleteDecoder`, making it suitable to
 75 | /// represent error kinds for decoders that are ... not yet complete.
 76 | #[derive(Debug, PartialEq, Eq, Copy, Clone)]
 77 | pub enum StandardPartialDecoderError {
 78 |     ExhaustedInput,
 79 |     InvalidOpcode,
 80 |     InvalidOperand,
 81 |     IncompleteDecoder,
 82 | }
 83 | 
 84 | #[cfg(feature = "std")]
 85 | extern crate std;
 86 | #[cfg(feature = "std")]
 87 | impl std::error::Error for StandardDecodeError {
 88 |     fn description(&self) -> &str {
 89 |         <Self as DecodeError>::description(self)
 90 |     }
 91 | }
 92 | #[cfg(feature = "std")]
 93 | impl std::error::Error for StandardPartialDecoderError {
 94 |     fn description(&self) -> &str {
 95 |         <Self as DecodeError>::description(self)
 96 |     }
 97 | }
 98 | 
 99 | impl fmt::Display for StandardDecodeError {
100 |     fn fmt(&self, f:  &mut fmt::Formatter) -> fmt::Result {
101 |         f.write_str(self.description())
102 |     }
103 | }
104 | 
105 | impl fmt::Display for StandardPartialDecoderError {
106 |     fn fmt(&self, f:  &mut fmt::Formatter) -> fmt::Result {
107 |         f.write_str(self.description())
108 |     }
109 | }
110 | 
111 | impl DecodeError for StandardDecodeError {
112 |     fn data_exhausted(&self) -> bool { *self == StandardDecodeError::ExhaustedInput }
113 |     fn bad_opcode(&self) -> bool { *self == StandardDecodeError::InvalidOpcode }
114 |     fn bad_operand(&self) -> bool { *self == StandardDecodeError::InvalidOperand }
115 |     fn description(&self) -> &'static str {
116 |         match self {
117 |             StandardDecodeError::ExhaustedInput => "exhausted input",
118 |             StandardDecodeError::InvalidOpcode => "invalid opcode",
119 |             StandardDecodeError::InvalidOperand => "invalid operand",
120 |         }
121 |     }
122 | }
123 | 
124 | impl DecodeError for StandardPartialDecoderError {
125 |     fn data_exhausted(&self) -> bool { *self == StandardPartialDecoderError::ExhaustedInput }
126 |     fn bad_opcode(&self) -> bool { *self == StandardPartialDecoderError::InvalidOpcode }
127 |     fn bad_operand(&self) -> bool { *self == StandardPartialDecoderError::InvalidOperand }
128 |     fn description(&self) -> &'static str {
129 |         match self {
130 |             StandardPartialDecoderError::ExhaustedInput => "exhausted input",
131 |             StandardPartialDecoderError::InvalidOpcode => "invalid opcode",
132 |             StandardPartialDecoderError::InvalidOperand => "invalid operand",
133 |             StandardPartialDecoderError::IncompleteDecoder => "incomplete decoder",
134 |         }
135 |     }
136 | }
137 | 
138 | /*
139 | #[derive(Copy, Clone)]
140 | struct NoDescription {}
141 | 
142 | impl fmt::Display for NoDescription {
143 |     fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
144 |         Ok(())
145 |     }
146 | }
147 | */
148 | 
149 | /// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s. errors are
150 | /// the architecture-defined [`DecodeError`] implemention.
151 | pub trait Decoder<A: Arch + ?Sized> {
152 |     /// decode one instruction for this architecture from the [`crate::Reader`] of this
153 |     /// architecture's `Word`.
154 |     fn decode<T: Reader<A::Address, A::Word>>(&self, words: &mut T) -> Result<A::Instruction, A::DecodeError> {
155 |         let mut inst = A::Instruction::default();
156 |         self.decode_into(&mut inst, words).map(|_: ()| inst)
157 |     }
158 | 
159 |     /// decode one instruction for this architecture from the [`crate::Reader`] of this
160 |     /// architecture's `Word`, writing into the provided `inst`.
161 |     ///
162 |     /// SAFETY:
163 |     ///
164 |     /// while `inst` MUST be left in a state that does not violate Rust's safety guarantees,
165 |     /// implementers are NOT obligated to leave `inst` in a semantically meaningful state if
166 |     /// decoding fails. if `decode_into` returns an error, callers may find contradictory and
167 |     /// useless information in `inst`, as well as *stale data* from whatever was passed in.
168 |     fn decode_into<T: Reader<A::Address, A::Word>>(&self, inst: &mut A::Instruction, words: &mut T) -> Result<(), A::DecodeError>;
169 | }
170 | 
171 | #[cfg(feature = "use-serde")]
172 | pub trait AddressBounds: Address + Debug + Hash + PartialEq + Eq + Serialize + for<'de> Deserialize<'de> {}
173 | #[cfg(not(feature = "use-serde"))]
174 | pub trait AddressBounds: Address + Debug + Hash + PartialEq + Eq {}
175 | 
176 | #[cfg(feature = "use-serde")]
177 | impl<T> AddressBounds for T where T: Address + Debug + Hash + PartialEq + Eq + Serialize + for<'de> Deserialize<'de> {}
178 | #[cfg(not(feature = "use-serde"))]
179 | impl<T> AddressBounds for T where T: Address + Debug + Hash + PartialEq + Eq {}
180 | 
181 | #[cfg(feature = "std")]
182 | /// this is not a particularly interesting trait. it just exists to add a `std::error::Error`
183 | /// bound onto `DecodeError` for `std` builds.
184 | pub trait DecodeErrorBounds: std::error::Error + DecodeError {}
185 | #[cfg(feature = "std")]
186 | impl<T: std::error::Error + DecodeError> DecodeErrorBounds for T {}
187 | #[cfg(not(feature = "std"))]
188 | /// this is not a particularly interesting trait. it just exists to add a `std::error::Error`
189 | /// bound onto `DecodeError` for `std` builds.
190 | pub trait DecodeErrorBounds: DecodeError {}
191 | #[cfg(not(feature = "std"))]
192 | impl<T: DecodeError> DecodeErrorBounds for T {}
193 | 
194 | 
195 | /// a collection of associated type parameters that constitute the definitions for an instruction
196 | /// set. `Arch` provides an `Instruction` and its associated `Operand`s, which is guaranteed to be
197 | /// decodable by this `Arch::Decoder`. `Arch::Decoder` can always be constructed with a `Default`
198 | /// implementation, and decodes from a `Reader<Arch::Address, Arch::Word>`.
199 | ///
200 | /// `Arch` is suitable as the foundational trait to implement more complex logic on top of; for
201 | /// example, it would be entirely expected to have a
202 | /// ```text
203 | /// pub fn emulate<A: Arch, E: Emulator<A>>(
204 | ///     reader: &mut Reader<A::Address, A::Word>,
205 | ///     emu: &mut E
206 | /// ) -> Result<A::Address, DecodeOrEvaluationError>;
207 | /// ```
208 | ///
209 | /// in some library built on top of `yaxpeax-arch`.
210 | pub trait Arch {
211 |     type Word: Debug + Display + PartialEq + Eq;
212 |     type Address: AddressBounds;
213 |     type Instruction: Instruction + LengthedInstruction<Unit=AddressDiff<Self::Address>> + Debug + Default + Sized;
214 |     type DecodeError: DecodeErrorBounds + Debug + Display;
215 |     type Decoder: Decoder<Self> + Default;
216 |     type Operand;
217 | }
218 | 
219 | /// instructions have lengths, and minimum possible sizes for advancing a decoder on error.
220 | ///
221 | /// unfortunately, this means calling `x.len()` for some `Arch::Instruction` requires importing
222 | /// this trait. sorry.
223 | pub trait LengthedInstruction {
224 |     type Unit;
225 |     /// the length, in terms of `Unit`, of this instruction. because `Unit` will be a diff of an
226 |     /// architecture's `Address` type, this almost always is a number of bytes. implementations
227 |     /// should indicate if this is ever not the case.
228 |     fn len(&self) -> Self::Unit;
229 |     /// the length, in terms of `Unit`, of the shortest possible instruction in a given
230 |     /// architecture.. because `Unit` will be a diff of an architecture's `Address` type, this
231 |     /// almost always is a number of bytes. implementations should indicate if this is ever not the
232 |     /// case.
233 |     fn min_size() -> Self::Unit;
234 | }
235 | 
236 | pub trait Instruction {
237 |     fn well_defined(&self) -> bool;
238 | }
239 | 
240 | #[allow(deprecated)]
241 | #[deprecated(since="0.3.0", note="ShowContextual ties YaxColors and fmt::Write in a way that only sometimes composes. simultaneously, it is too generic on Ctx, making it difficult to implement and use. it will be revisited in the future.")]
242 | pub trait ShowContextual<Addr, Ctx: ?Sized, T: fmt::Write, Y: YaxColors> {
243 |     fn contextualize(&self, colors: &Y, address: Addr, context: Option<&Ctx>, out: &mut T) -> fmt::Result;
244 | }
245 | 
246 | /*
247 | impl <C: ?Sized, T: fmt::Write, U: Colorize<T>> ShowContextual<C, T> for U {
248 |     fn contextualize(&self, colors: Option<&ColorSettings>, context: Option<&C>, out: &mut T) -> fmt::Result {
249 |         self.colorize(colors, out)
250 |     }
251 | }
252 | */
253 | 


--------------------------------------------------------------------------------
/src/reader.rs:
--------------------------------------------------------------------------------
  1 | use crate::{StandardDecodeError, StandardPartialDecoderError};
  2 | 
  3 | impl From<ReadError> for StandardDecodeError {
  4 |     fn from(_: ReadError) -> StandardDecodeError {
  5 |         StandardDecodeError::ExhaustedInput
  6 |     }
  7 | }
  8 | 
  9 | impl From<ReadError> for StandardPartialDecoderError {
 10 |     fn from(_: ReadError) -> StandardPartialDecoderError {
 11 |         StandardPartialDecoderError::ExhaustedInput
 12 |     }
 13 | }
 14 | 
 15 | #[derive(Debug, PartialEq, Eq, Copy, Clone)]
 16 | pub enum ReadError {
 17 |     ExhaustedInput,
 18 |     IOError(&'static str),
 19 | }
 20 | 
 21 | /// a trait defining how `Item`-sized words are read at `Address`-positioned offsets into some
 22 | /// stream of data. for *most* uses, [`crate::U8Reader`] probably is sufficient. when
 23 | /// reading from data sources that aren't `&[u8]`, `Address` isn't a multiple of `u8`, or `Item`
 24 | /// isn't a multiple of 8 bits, `U8Reader` won't be sufficient.
 25 | pub trait Reader<Address, Item> {
 26 |     fn next(&mut self) -> Result<Item, ReadError>;
 27 |     /// read `buf`-many items from this reader in bulk.
 28 |     ///
 29 |     /// if `Reader` cannot read `buf`-many items, return `ReadError::ExhaustedInput`.
 30 |     fn next_n(&mut self, buf: &mut [Item]) -> Result<(), ReadError>;
 31 |     /// mark the current position as where to measure `offset` against.
 32 |     fn mark(&mut self);
 33 |     /// the difference, in `Address`, between the current `Reader` position and its last `mark`.
 34 |     /// when created, a `Reader`'s initial position is `mark`ed, so creating a `Reader` and
 35 |     /// immediately calling `offset()` must return `Address::zero()`.
 36 |     fn offset(&mut self) -> Address;
 37 |     /// the difference, in `Address`, between the current `Reader` position and the initial offset
 38 |     /// when constructed.
 39 |     fn total_offset(&mut self) -> Address;
 40 | }
 41 | 
 42 | /// a trait defining how to build a `Reader<Address, Item>` from some data source (`Self`).
 43 | /// definitions of `ReaderBuilder` are provided for `U8Reader` on `Address` and `Word` types that
 44 | /// `yaxpeax_arch` provides - external decoder implementations should also provide `ReaderBuilder`
 45 | /// impls if they use custom `Reader` types.
 46 | pub trait ReaderBuilder<Address: crate::AddressBase, Item> where Self: Sized {
 47 |     type Result: Reader<Address, Item>;
 48 | 
 49 |     /// construct a reader from `data` beginning at `addr` from its beginning.
 50 |     fn read_at(data: Self, addr: Address) -> Self::Result;
 51 |     /// construct a reader from `data` beginning at the start of `data`.
 52 |     fn read_from(data: Self) -> Self::Result {
 53 |         Self::read_at(data, Address::zero())
 54 |     }
 55 | }
 56 | 
 57 | /// a struct for `Reader` impls that can operate on units of `u8`.
 58 | pub struct U8Reader<'a> {
 59 |     start: *const u8,
 60 |     data: *const u8,
 61 |     end: *const u8,
 62 |     mark: *const u8,
 63 |     _lifetime: core::marker::PhantomData<&'a [u8]>,
 64 | }
 65 | 
 66 | impl<'a> U8Reader<'a> {
 67 |     pub fn new(data: &'a [u8]) -> U8Reader<'a> {
 68 | 
 69 |         // WHY: either on <64b systems we panic on `data.len() > isize::MAX`, or we compute end
 70 |         // without `offset` (which would be UB for such huge slices)
 71 |         #[cfg(not(target_pointer_width = "64"))]
 72 |         let end = data.as_ptr().wrapping_add(data.len());
 73 | 
 74 |         // SAFETY: the slice was valid, so data + data.len() does not overflow. at the moment,
 75 |         // there aren't 64-bit systems with 63 bits of virtual address space, so it's not possible
 76 |         // to have a slice length larger than 64-bit isize::MAX.
 77 |         #[cfg(target_pointer_width = "64")]
 78 |         let end = unsafe { data.as_ptr().offset(data.len() as isize) };
 79 | 
 80 |         U8Reader {
 81 |             start: data.as_ptr(),
 82 |             data: data.as_ptr(),
 83 |             end,
 84 |             mark: data.as_ptr(),
 85 |             _lifetime: core::marker::PhantomData,
 86 |         }
 87 |     }
 88 | }
 89 | 
 90 | /* a `std::io::Read`-friendly `Reader` would take some thought. this was an old impl, and now would
 91 |  * require something like
 92 |  * ```
 93 |  * pub struct IoReader<'io, T: std::io::Read> {
 94 |  *   io: &io mut T,
 95 |  *   count: u64,
 96 |  *   start: u64,
 97 |  * }
 98 |  * ```
 99 |  */
100 | /*
101 | #[cfg(feature = "std")]
102 | impl<T: std::io::Read> Reader<u8> for T {
103 |     fn next(&mut self) -> Result<u8, ReadError> {
104 |         let mut buf = [0u8];
105 |         match self.read(&mut buf) {
106 |             Ok(0) => { Err(ReadError::ExhaustedInput) }
107 |             Ok(1) => { Ok(buf[0]) }
108 |             Err(_) => {
109 |                 Err(ReadError::IOError("error"))
110 |             }
111 |         }
112 |     }
113 | }
114 | */
115 | 
116 | macro_rules! word_wrapper {
117 |     ($name:ident, $underlying:ident) => {
118 |         #[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Copy, Clone)]
119 |         pub struct $name(pub $underlying);
120 | 
121 |         impl core::fmt::Display for $name {
122 |             fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
123 |                 write!(f, "{}", self.0)
124 |             }
125 |         }
126 |     }
127 | }
128 | 
129 | word_wrapper!(U16le, u16);
130 | word_wrapper!(U16be, u16);
131 | word_wrapper!(U32le, u32);
132 | word_wrapper!(U32be, u32);
133 | word_wrapper!(U64le, u64);
134 | word_wrapper!(U64be, u64);
135 | 
136 | macro_rules! u8reader_reader_impl {
137 |     ($addr_size:ident, $word:ident, $word_from_slice:expr, $words_from_slice:expr) => {
138 |         impl Reader<$addr_size, $word> for U8Reader<'_> {
139 |             #[inline]
140 |             fn next(&mut self) -> Result<$word, ReadError> {
141 |                 let data_size = self.end as usize - self.data as usize;
142 | 
143 |                 if core::mem::size_of::<$word>() > data_size {
144 |                     return Err(ReadError::ExhaustedInput);
145 |                 }
146 | 
147 |                 // `word_from_slice` knows that we have bounds-checked that `word`-many bytes are
148 |                 // available.
149 |                 let word = $word_from_slice(self.data);
150 |                 unsafe {
151 |                     self.data = self.data.offset(core::mem::size_of::<$word>() as isize);
152 |                 }
153 |                 Ok(word)
154 |             }
155 |             #[inline]
156 |             fn next_n(&mut self, buf: &mut [$word]) -> Result<(), ReadError> {
157 |                 let data_size = self.end as usize - self.data as usize;
158 | 
159 |                 let words_size_bytes = buf.len() * core::mem::size_of::<$word>();
160 |                 if words_size_bytes > data_size {
161 |                     return Err(ReadError::ExhaustedInput);
162 |                 }
163 | 
164 |                 // `word_from_slice` knows that we have bounds-checked that `word`-many bytes are
165 |                 // available.
166 |                 $words_from_slice(self.data, buf);
167 |                 unsafe {
168 |                     self.data = self.data.offset(words_size_bytes as isize);
169 |                 }
170 |                 Ok(())
171 |             }
172 |             #[inline]
173 |             fn mark(&mut self) {
174 |                 self.mark = self.data;
175 |             }
176 |             #[inline]
177 |             fn offset(&mut self) -> $addr_size {
178 |                 (self.data as usize - self.mark as usize) as $addr_size /
179 |                     (core::mem::size_of::<$word>() as $addr_size)
180 |             }
181 |             #[inline]
182 |             fn total_offset(&mut self) -> $addr_size {
183 |                 (self.data as usize - self.start as usize) as $addr_size /
184 |                     (core::mem::size_of::<$word>() as $addr_size)
185 |             }
186 |         }
187 | 
188 |         impl<'data> ReaderBuilder<$addr_size, $word> for &'data [u8] {
189 |             type Result = U8Reader<'data>;
190 | 
191 |             fn read_at(data: Self, addr: $addr_size) -> Self::Result {
192 |                 U8Reader::new(&data[(addr as usize)..])
193 |             }
194 |         }
195 |     }
196 | }
197 | 
198 | macro_rules! u8reader_each_addr_size {
199 |     ($word:ident, $word_from_slice:expr, $words_from_slice:expr) => {
200 |         u8reader_reader_impl!(u64, $word, $word_from_slice, $words_from_slice);
201 |         u8reader_reader_impl!(u32, $word, $word_from_slice, $words_from_slice);
202 |         u8reader_reader_impl!(u16, $word, $word_from_slice, $words_from_slice);
203 |     }
204 | }
205 | u8reader_each_addr_size!(u8,
206 |     |ptr: *const u8| { unsafe { core::ptr::read(ptr) } },
207 |     |ptr: *const u8, buf: &mut [u8]| {
208 |         unsafe {
209 |             core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr(), buf.len())
210 |         }
211 |     }
212 | );
213 | 
214 | u8reader_each_addr_size!(U16le,
215 |     |ptr: *const u8| {
216 |         let mut word = [0u8; 2];
217 |         unsafe {
218 |             core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len());
219 |         }
220 |         U16le(u16::from_le_bytes(word))
221 |     },
222 |     |ptr: *const u8, buf: &mut [U16le]| {
223 |         // `U16le` are layout-identical to u16, so we can just copy into buf
224 |         unsafe {
225 |             core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::<U16le>())
226 |         }
227 |     }
228 | );
229 | 
230 | u8reader_each_addr_size!(U32le,
231 |     |ptr: *const u8| {
232 |         let mut word = [0u8; 4];
233 |         unsafe {
234 |             core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len());
235 |         }
236 |         U32le(u32::from_le_bytes(word))
237 |     },
238 |     |ptr: *const u8, buf: &mut [U32le]| {
239 |         // `U32le` are layout-identical to u32, so we can just copy into buf
240 |         unsafe {
241 |             core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::<U32le>())
242 |         }
243 |     }
244 | );
245 | 
246 | u8reader_each_addr_size!(U64le,
247 |     |ptr: *const u8| {
248 |         let mut word = [0u8; 8];
249 |         unsafe {
250 |             core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len());
251 |         }
252 |         U64le(u64::from_le_bytes(word))
253 |     },
254 |     |ptr: *const u8, buf: &mut [U64le]| {
255 |         // `U64le` are layout-identical to u64, so we can just copy into buf
256 |         unsafe {
257 |             core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::<U64le>())
258 |         }
259 |     }
260 | );
261 | 
262 | u8reader_each_addr_size!(U16be,
263 |     |ptr: *const u8| {
264 |         let mut word = [0u8; 2];
265 |         unsafe {
266 |             core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len());
267 |         }
268 |         U16be(u16::from_be_bytes(word))
269 |     },
270 |     |ptr: *const u8, buf: &mut [U16be]| {
271 |         // `U16be` are layout-identical to u16, so we can just copy into buf
272 |         unsafe {
273 |             core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::<U16be>())
274 |         }
275 | 
276 |         // but now we have to bswap all the words
277 |         for i in 0..buf.len() {
278 |             buf[i] = U16be(buf[i].0.swap_bytes());
279 |         }
280 |     }
281 | );
282 | 
283 | u8reader_each_addr_size!(U32be,
284 |     |ptr: *const u8| {
285 |         let mut word = [0u8; 4];
286 |         unsafe {
287 |             core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len());
288 |         }
289 |         U32be(u32::from_be_bytes(word))
290 |     },
291 |     |ptr: *const u8, buf: &mut [U32be]| {
292 |         // `U32be` are layout-identical to u32, so we can just copy into buf
293 |         unsafe {
294 |             core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::<U32be>())
295 |         }
296 | 
297 |         // but now we have to bswap all the words
298 |         for i in 0..buf.len() {
299 |             buf[i] = U32be(buf[i].0.swap_bytes());
300 |         }
301 |     }
302 | );
303 | 
304 | u8reader_each_addr_size!(U64be,
305 |     |ptr: *const u8| {
306 |         let mut word = [0u8; 8];
307 |         unsafe {
308 |             core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len());
309 |         }
310 |         U64be(u64::from_be_bytes(word))
311 |     },
312 |     |ptr: *const u8, buf: &mut [U64be]| {
313 |         // `U64be` are layout-identical to u64, so we can just copy into buf
314 |         unsafe {
315 |             core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::<U64be>())
316 |         }
317 | 
318 |         // but now we have to bswap all the words
319 |         for i in 0..buf.len() {
320 |             buf[i] = U64be(buf[i].0.swap_bytes());
321 |         }
322 |     }
323 | );
324 | 


--------------------------------------------------------------------------------
/src/address/mod.rs:
--------------------------------------------------------------------------------
  1 | use core::hash::Hash;
  2 | 
  3 | use core::fmt;
  4 | 
  5 | use core::ops::{Add, Sub, AddAssign, SubAssign};
  6 | 
  7 | use num_traits::identities;
  8 | use num_traits::{Bounded, WrappingAdd, WrappingSub, CheckedAdd, Zero, One};
  9 | 
 10 | #[cfg(feature="use-serde")]
 11 | use serde::{Deserialize, Serialize};
 12 | 
 13 | #[cfg(feature="use-serde")]
 14 | pub trait AddressDiffAmount: Copy + Clone + PartialEq + PartialOrd + Eq + Ord + identities::Zero + identities::One + Serialize + for<'de> Deserialize<'de> {}
 15 | #[cfg(not(feature="use-serde"))]
 16 | pub trait AddressDiffAmount: Copy + Clone + PartialEq + PartialOrd + Eq + Ord + identities::Zero + identities::One {}
 17 | 
 18 | impl AddressDiffAmount for u64 {}
 19 | impl AddressDiffAmount for u32 {}
 20 | impl AddressDiffAmount for u16 {}
 21 | impl AddressDiffAmount for usize {}
 22 | 
 23 | /// a struct describing the differece between some pair of `A: Address`. this is primarily useful
 24 | /// in describing the size of an instruction, or the relative offset of a branch.
 25 | ///
 26 | /// for any address type `A`, the following must hold:
 27 | /// ```rust
 28 | /// use yaxpeax_arch::AddressBase;
 29 | /// fn diff_check<A: AddressBase + core::fmt::Debug>(left: A, right: A) {
 30 | ///     let diff = left.diff(&right);
 31 | ///     if let Some(offset) = diff {
 32 | ///         assert_eq!(left.wrapping_offset(offset), right);
 33 | ///     }
 34 | /// }
 35 | /// ```
 36 | ///
 37 | /// which is to say, `yaxpeax` assumes associativity holds when `diff` yields a `Some`.
 38 | #[cfg(feature="use-serde")]
 39 | #[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize)]
 40 | pub struct AddressDiff<T: AddressBase> {
 41 |     // the AddressDiffAmount trait fools `Deserialize`'s proc macro, so we have to explicitly write
 42 |     // the bound serde should use.
 43 |     #[serde(bound(deserialize = "T::Diff: AddressDiffAmount"))]
 44 |     amount: T::Diff,
 45 | }
 46 | /// a struct describing the differece between some pair of `A: Address`. this is primarily useful
 47 | /// in describing the size of an instruction, or the relative offset of a branch.
 48 | ///
 49 | /// for any address type `A`, the following must hold:
 50 | /// ```rust
 51 | /// use yaxpeax_arch::AddressBase;
 52 | /// fn diff_check<A: AddressBase + core::fmt::Debug>(left: A, right: A) {
 53 | ///     let diff = left.diff(&right);
 54 | ///     if let Some(offset) = diff {
 55 | ///         assert_eq!(left.wrapping_offset(offset), right);
 56 | ///     }
 57 | /// }
 58 | /// ```
 59 | ///
 60 | /// which is to say, `yaxpeax` assumes associativity holds when `diff` yields a `Some`.
 61 | #[cfg(not(feature="use-serde"))]
 62 | #[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)]
 63 | pub struct AddressDiff<T: AddressBase> {
 64 |     amount: T::Diff,
 65 | }
 66 | 
 67 | impl<T: Address> AddressDiff<T> {
 68 |     pub fn from_const(amount: T::Diff) -> Self {
 69 |         AddressDiff { amount }
 70 |     }
 71 |     pub fn to_const(&self) -> T::Diff {
 72 |         self.amount
 73 |     }
 74 | }
 75 | 
 76 | impl<T: Address> fmt::Debug for AddressDiff<T> where T::Diff: fmt::Debug {
 77 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 78 |         write!(f, "AddressDiff({:?})", self.amount)
 79 |     }
 80 | }
 81 | 
 82 | impl<T: Address> AddressDiff<T> {
 83 |     pub fn one() -> Self {
 84 |         AddressDiff {
 85 |             amount: <T as AddressBase>::Diff::one(),
 86 |         }
 87 |     }
 88 | 
 89 |     pub fn zero() -> Self {
 90 |         AddressDiff {
 91 |             amount: <T as AddressBase>::Diff::zero(),
 92 |         }
 93 |     }
 94 | }
 95 | 
 96 | impl Sub<AddressDiff<u16>> for u16 {
 97 |     type Output = Self;
 98 | 
 99 |     fn sub(self, other: AddressDiff<Self>) -> Self::Output {
100 |         self - other.amount
101 |     }
102 | }
103 | 
104 | impl Sub<AddressDiff<u32>> for u32 {
105 |     type Output = Self;
106 | 
107 |     fn sub(self, other: AddressDiff<Self>) -> Self::Output {
108 |         self - other.amount
109 |     }
110 | }
111 | 
112 | impl Sub<AddressDiff<u64>> for u64 {
113 |     type Output = Self;
114 | 
115 |     fn sub(self, other: AddressDiff<Self>) -> Self::Output {
116 |         self - other.amount
117 |     }
118 | }
119 | 
120 | impl Sub<AddressDiff<usize>> for usize {
121 |     type Output = Self;
122 | 
123 |     fn sub(self, other: AddressDiff<Self>) -> Self::Output {
124 |         self - other.amount
125 |     }
126 | }
127 | 
128 | impl Add<AddressDiff<u16>> for u16 {
129 |     type Output = Self;
130 | 
131 |     fn add(self, other: AddressDiff<Self>) -> Self::Output {
132 |         self + other.amount
133 |     }
134 | }
135 | 
136 | impl Add<AddressDiff<u32>> for u32 {
137 |     type Output = Self;
138 | 
139 |     fn add(self, other: AddressDiff<Self>) -> Self::Output {
140 |         self + other.amount
141 |     }
142 | }
143 | 
144 | impl Add<AddressDiff<u64>> for u64 {
145 |     type Output = Self;
146 | 
147 |     fn add(self, other: AddressDiff<Self>) -> Self::Output {
148 |         self + other.amount
149 |     }
150 | }
151 | 
152 | impl Add<AddressDiff<usize>> for usize {
153 |     type Output = Self;
154 | 
155 |     fn add(self, other: AddressDiff<Self>) -> Self::Output {
156 |         self + other.amount
157 |     }
158 | }
159 | 
160 | impl SubAssign<AddressDiff<u16>> for u16 {
161 |     fn sub_assign(&mut self, other: AddressDiff<Self>) {
162 |         *self -= other.amount;
163 |     }
164 | }
165 | 
166 | impl SubAssign<AddressDiff<u32>> for u32 {
167 |     fn sub_assign(&mut self, other: AddressDiff<Self>) {
168 |         *self -= other.amount;
169 |     }
170 | }
171 | 
172 | impl SubAssign<AddressDiff<u64>> for u64 {
173 |     fn sub_assign(&mut self, other: AddressDiff<Self>) {
174 |         *self -= other.amount;
175 |     }
176 | }
177 | 
178 | impl SubAssign<AddressDiff<usize>> for usize {
179 |     fn sub_assign(&mut self, other: AddressDiff<Self>) {
180 |         *self -= other.amount;
181 |     }
182 | }
183 | 
184 | impl AddAssign<AddressDiff<u16>> for u16 {
185 |     fn add_assign(&mut self, other: AddressDiff<Self>) {
186 |         *self += other.amount;
187 |     }
188 | }
189 | 
190 | impl AddAssign<AddressDiff<u32>> for u32 {
191 |     fn add_assign(&mut self, other: AddressDiff<Self>) {
192 |         *self += other.amount;
193 |     }
194 | }
195 | 
196 | impl AddAssign<AddressDiff<u64>> for u64 {
197 |     fn add_assign(&mut self, other: AddressDiff<Self>) {
198 |         *self += other.amount;
199 |     }
200 | }
201 | 
202 | impl AddAssign<AddressDiff<usize>> for usize {
203 |     fn add_assign(&mut self, other: AddressDiff<Self>) {
204 |         *self += other.amount;
205 |     }
206 | }
207 | 
208 | pub trait AddressBase where Self:
209 |     AddressDisplay +
210 |     Copy + Clone + Sized + Hash +
211 |     Ord + Eq + PartialEq + Bounded +
212 |     Add<AddressDiff<Self>, Output=Self> + Sub<AddressDiff<Self>, Output=Self> +
213 |     AddAssign<AddressDiff<Self>> + SubAssign<AddressDiff<Self>> +
214 |     identities::Zero +
215 |     Hash {
216 |     type Diff: AddressDiffAmount;
217 |     fn to_linear(&self) -> usize;
218 | 
219 |     /// compute the `AddressDiff` beetween `self` and `other`.
220 |     ///
221 |     /// may return `None` if the two addresses aren't comparable. for example, if a pair of
222 |     /// addresses are a data-space address and code-space address, there may be no scalar that can
223 |     /// describe the difference between them.
224 |     fn diff(&self, other: &Self) -> Option<AddressDiff<Self>>;
225 |     /*
226 |     {
227 |         Some(AddressDiff { amount: self.wrapping_sub(other) })
228 |     }
229 |     */
230 | 
231 |     fn wrapping_offset(&self, other: AddressDiff<Self>) -> Self;
232 |     /*
233 |     {
234 |         self.wrapping_add(&other.amount)
235 |     }
236 |     */
237 | 
238 |     fn checked_offset(&self, other: AddressDiff<Self>) -> Option<Self>;
239 |     /*
240 |     {
241 |         self.checked_add(&other.amount)
242 |     }
243 |     */
244 | }
245 | 
246 | #[cfg(all(feature="use-serde", feature="address-parse"))]
247 | pub trait Address where Self:
248 |     AddressBase +
249 |     Serialize + for<'de> Deserialize<'de> +
250 |     AddrParse {
251 | }
252 | 
253 | #[cfg(all(feature="use-serde", not(feature="address-parse")))]
254 | pub trait Address where Self:
255 |     AddressBase +
256 |     Serialize + for<'de> Deserialize<'de> {
257 | }
258 | 
259 | #[cfg(all(not(feature="use-serde"), feature="address-parse"))]
260 | pub trait Address where Self:
261 |     AddressBase + AddrParse {
262 | }
263 | 
264 | #[cfg(all(not(feature="use-serde"), not(feature="address-parse")))]
265 | pub trait Address where Self: AddressBase { }
266 | 
267 | impl AddressBase for u16 {
268 |     type Diff = Self;
269 |     fn to_linear(&self) -> usize { *self as usize }
270 | 
271 |     fn diff(&self, other: &Self) -> Option<AddressDiff<Self>> {
272 |         Some(AddressDiff { amount: self.wrapping_sub(other) })
273 |     }
274 |     fn wrapping_offset(&self, other: AddressDiff<Self>) -> Self {
275 |         self.wrapping_add(&other.amount)
276 |     }
277 | 
278 |     fn checked_offset(&self, other: AddressDiff<Self>) -> Option<Self> {
279 |         self.checked_add(&other.amount)
280 |     }
281 | }
282 | 
283 | impl Address for u16 {}
284 | 
285 | impl AddressBase for u32 {
286 |     type Diff = Self;
287 |     fn to_linear(&self) -> usize { *self as usize }
288 | 
289 |     fn diff(&self, other: &Self) -> Option<AddressDiff<Self>> {
290 |         Some(AddressDiff { amount: self.wrapping_sub(other) })
291 |     }
292 |     fn wrapping_offset(&self, other: AddressDiff<Self>) -> Self {
293 |         self.wrapping_add(&other.amount)
294 |     }
295 | 
296 |     fn checked_offset(&self, other: AddressDiff<Self>) -> Option<Self> {
297 |         self.checked_add(&other.amount)
298 |     }
299 | }
300 | 
301 | impl Address for u32 {}
302 | 
303 | impl AddressBase for u64 {
304 |     type Diff = Self;
305 |     fn to_linear(&self) -> usize { *self as usize }
306 | 
307 |     fn diff(&self, other: &Self) -> Option<AddressDiff<Self>> {
308 |         Some(AddressDiff { amount: self.wrapping_sub(other) })
309 |     }
310 |     fn wrapping_offset(&self, other: AddressDiff<Self>) -> Self {
311 |         self.wrapping_add(&other.amount)
312 |     }
313 | 
314 |     fn checked_offset(&self, other: AddressDiff<Self>) -> Option<Self> {
315 |         self.checked_add(&other.amount)
316 |     }
317 | }
318 | 
319 | impl Address for u64 {}
320 | 
321 | impl AddressBase for usize {
322 |     type Diff = Self;
323 |     fn to_linear(&self) -> usize { *self }
324 | 
325 |     fn diff(&self, other: &Self) -> Option<AddressDiff<Self>> {
326 |         Some(AddressDiff { amount: self.wrapping_sub(other) })
327 |     }
328 |     fn wrapping_offset(&self, other: AddressDiff<Self>) -> Self {
329 |         self.wrapping_add(&other.amount)
330 |     }
331 | 
332 |     fn checked_offset(&self, other: AddressDiff<Self>) -> Option<Self> {
333 |         self.checked_add(&other.amount)
334 |     }
335 | }
336 | 
337 | impl Address for usize {}
338 | 
339 | pub trait AddressDisplay {
340 |     type Show: fmt::Display;
341 |     fn show(&self) -> Self::Show;
342 | }
343 | 
344 | impl AddressDisplay for usize {
345 |     type Show = AddressDisplayUsize;
346 | 
347 |     fn show(&self) -> AddressDisplayUsize {
348 |         AddressDisplayUsize(*self)
349 |     }
350 | }
351 | 
352 | #[repr(transparent)]
353 | pub struct AddressDisplayUsize(usize);
354 | 
355 | impl fmt::Display for AddressDisplayUsize {
356 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
357 |         write!(f, "{:#x}", self.0)
358 |     }
359 | }
360 | 
361 | impl AddressDisplay for u64 {
362 |     type Show = AddressDisplayU64;
363 | 
364 |     fn show(&self) -> AddressDisplayU64 {
365 |         AddressDisplayU64(*self)
366 |     }
367 | }
368 | 
369 | #[repr(transparent)]
370 | pub struct AddressDisplayU64(u64);
371 | 
372 | impl fmt::Display for AddressDisplayU64 {
373 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
374 |         write!(f, "{:#x}", self.0)
375 |     }
376 | }
377 | 
378 | impl AddressDisplay for u32 {
379 |     type Show = AddressDisplayU32;
380 | 
381 |     fn show(&self) -> AddressDisplayU32 {
382 |         AddressDisplayU32(*self)
383 |     }
384 | }
385 | 
386 | #[repr(transparent)]
387 | pub struct AddressDisplayU32(u32);
388 | 
389 | impl fmt::Display for AddressDisplayU32 {
390 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
391 |         write!(f, "{:#x}", self.0)
392 |     }
393 | }
394 | 
395 | impl AddressDisplay for u16 {
396 |     type Show = AddressDisplayU16;
397 | 
398 |     fn show(&self) -> AddressDisplayU16 {
399 |         AddressDisplayU16(*self)
400 |     }
401 | }
402 | 
403 | #[repr(transparent)]
404 | pub struct AddressDisplayU16(u16);
405 | 
406 | impl fmt::Display for AddressDisplayU16 {
407 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
408 |         write!(f, "{:#x}", self.0)
409 |     }
410 | }
411 | 
412 | /*
413 |  * TODO: this should be FromStr.
414 |  * that would require newtyping address primitives, though
415 |  *
416 |  * this is not out of the question, BUT is way more work than
417 |  * i want to put in right now
418 |  *
419 |  * this is one of those "clean it up later" situations
420 |  */
421 | #[cfg(feature="address-parse")]
422 | use core::str::FromStr;
423 | 
424 | #[cfg(feature="address-parse")]
425 | pub trait AddrParse: Sized {
426 |     type Err;
427 |     fn parse_from(s: &str) -> Result<Self, Self::Err>;
428 | }
429 | 
430 | #[cfg(feature="address-parse")]
431 | impl AddrParse for usize {
432 |     type Err = core::num::ParseIntError;
433 |     fn parse_from(s: &str) -> Result<Self, Self::Err> {
434 |         if s.starts_with("0x") {
435 |             usize::from_str_radix(&s[2..], 16)
436 |         } else {
437 |             usize::from_str(s)
438 |         }
439 |     }
440 | }
441 | 
442 | #[cfg(feature="address-parse")]
443 | impl AddrParse for u64 {
444 |     type Err = core::num::ParseIntError;
445 |     fn parse_from(s: &str) -> Result<Self, Self::Err> {
446 |         if s.starts_with("0x") {
447 |             u64::from_str_radix(&s[2..], 16)
448 |         } else {
449 |             u64::from_str(s)
450 |         }
451 |     }
452 | }
453 | 
454 | #[cfg(feature="address-parse")]
455 | impl AddrParse for u32 {
456 |     type Err = core::num::ParseIntError;
457 |     fn parse_from(s: &str) -> Result<Self, Self::Err> {
458 |         if s.starts_with("0x") {
459 |             u32::from_str_radix(&s[2..], 16)
460 |         } else {
461 |             u32::from_str(s)
462 |         }
463 |     }
464 | }
465 | 
466 | #[cfg(feature="address-parse")]
467 | impl AddrParse for u16 {
468 |     type Err = core::num::ParseIntError;
469 |     fn parse_from(s: &str) -> Result<Self, Self::Err> {
470 |         if s.starts_with("0x") {
471 |             u16::from_str_radix(&s[2..], 16)
472 |         } else {
473 |             u16::from_str(s)
474 |         }
475 |     }
476 | }
477 | 


--------------------------------------------------------------------------------
/src/color.rs:
--------------------------------------------------------------------------------
  1 | use core::fmt::{self, Display, Formatter};
  2 | #[cfg(feature="colors")]
  3 | use crossterm::style;
  4 | 
  5 | #[cfg(feature="colors")]
  6 | pub enum Colored<T: Display> {
  7 |     Color(T, style::Color),
  8 |     Just(T)
  9 | }
 10 | 
 11 | #[cfg(feature="colors")]
 12 | impl <T: Display> Display for Colored<T> {
 13 |     fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
 14 |         match self {
 15 |             Colored::Color(t, before) => {
 16 |                 use crossterm::style::Stylize;
 17 |                 write!(fmt, "{}", style::style(t).with(*before))
 18 |             },
 19 |             Colored::Just(t) => {
 20 |                 write!(fmt, "{}", t)
 21 |             }
 22 |         }
 23 |     }
 24 | }
 25 | 
 26 | #[cfg(not(feature="colors"))]
 27 | pub enum Colored<T: Display> {
 28 |     Just(T)
 29 | }
 30 | 
 31 | #[cfg(not(feature="colors"))]
 32 | impl <T: Display> Display for Colored<T> {
 33 |     fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
 34 |         match self {
 35 |             Colored::Just(t) => {
 36 |                 write!(fmt, "{}", t)
 37 |             }
 38 |         }
 39 |     }
 40 | }
 41 | 
 42 | pub trait YaxColors {
 43 |     fn arithmetic_op<T: Display>(&self, t: T) -> Colored<T>;
 44 |     fn stack_op<T: Display>(&self, t: T) -> Colored<T>;
 45 |     fn nop_op<T: Display>(&self, t: T) -> Colored<T>;
 46 |     fn stop_op<T: Display>(&self, t: T) -> Colored<T>;
 47 |     fn control_flow_op<T: Display>(&self, t: T) -> Colored<T>;
 48 |     fn data_op<T: Display>(&self, t: T) -> Colored<T>;
 49 |     fn comparison_op<T: Display>(&self, t: T) -> Colored<T>;
 50 |     fn invalid_op<T: Display>(&self, t: T) -> Colored<T>;
 51 |     fn platform_op<T: Display>(&self, t: T) -> Colored<T>;
 52 |     fn misc_op<T: Display>(&self, t: T) -> Colored<T>;
 53 | 
 54 |     fn register<T: Display>(&self, t: T) -> Colored<T>;
 55 |     fn program_counter<T: Display>(&self, t: T) -> Colored<T>;
 56 |     fn number<T: Display>(&self, t: T) -> Colored<T>;
 57 |     fn zero<T: Display>(&self, t: T) -> Colored<T>;
 58 |     fn one<T: Display>(&self, t: T) -> Colored<T>;
 59 |     fn minus_one<T: Display>(&self, t: T) -> Colored<T>;
 60 |     fn address<T: Display>(&self, t: T) -> Colored<T>;
 61 |     fn symbol<T: Display>(&self, t: T) -> Colored<T>;
 62 |     fn function<T: Display>(&self, t: T) -> Colored<T>;
 63 | }
 64 | 
 65 | pub struct NoColors;
 66 | 
 67 | impl YaxColors for NoColors {
 68 |     fn arithmetic_op<T: Display>(&self, t: T) -> Colored<T> {
 69 |         Colored::Just(t)
 70 |     }
 71 |     fn stack_op<T: Display>(&self, t: T) -> Colored<T> {
 72 |         Colored::Just(t)
 73 |     }
 74 |     fn nop_op<T: Display>(&self, t: T) -> Colored<T> {
 75 |         Colored::Just(t)
 76 |     }
 77 |     fn stop_op<T: Display>(&self, t: T) -> Colored<T> {
 78 |         Colored::Just(t)
 79 |     }
 80 |     fn control_flow_op<T: Display>(&self, t: T) -> Colored<T> {
 81 |         Colored::Just(t)
 82 |     }
 83 |     fn data_op<T: Display>(&self, t: T) -> Colored<T> {
 84 |         Colored::Just(t)
 85 |     }
 86 |     fn comparison_op<T: Display>(&self, t: T) -> Colored<T> {
 87 |         Colored::Just(t)
 88 |     }
 89 |     fn invalid_op<T: Display>(&self, t: T) -> Colored<T> {
 90 |         Colored::Just(t)
 91 |     }
 92 |     fn platform_op<T: Display>(&self, t: T) -> Colored<T> {
 93 |         Colored::Just(t)
 94 |     }
 95 |     fn misc_op<T: Display>(&self, t: T) -> Colored<T> {
 96 |         Colored::Just(t)
 97 |     }
 98 |     fn register<T: Display>(&self, t: T) -> Colored<T> {
 99 |         Colored::Just(t)
100 |     }
101 |     fn program_counter<T: Display>(&self, t: T) -> Colored<T> {
102 |         Colored::Just(t)
103 |     }
104 |     fn number<T: Display>(&self, t: T) -> Colored<T> {
105 |         Colored::Just(t)
106 |     }
107 |     fn zero<T: Display>(&self, t: T) -> Colored<T> {
108 |         Colored::Just(t)
109 |     }
110 |     fn one<T: Display>(&self, t: T) -> Colored<T> {
111 |         Colored::Just(t)
112 |     }
113 |     fn minus_one<T: Display>(&self, t: T) -> Colored<T> {
114 |         Colored::Just(t)
115 |     }
116 |     fn address<T: Display>(&self, t: T) -> Colored<T> {
117 |         Colored::Just(t)
118 |     }
119 |     fn symbol<T: Display>(&self, t: T) -> Colored<T> {
120 |         Colored::Just(t)
121 |     }
122 |     fn function<T: Display>(&self, t: T) -> Colored<T> {
123 |         Colored::Just(t)
124 |     }
125 | }
126 | 
127 | pub trait Colorize<T: fmt::Write, Y: YaxColors + ?Sized> {
128 |     fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result;
129 | }
130 | 
131 | #[cfg(feature="colors")]
132 | pub use termion_color::ColorSettings;
133 | 
134 | #[cfg(feature="colors")]
135 | mod termion_color {
136 |     use core::fmt::Display;
137 | 
138 |     use crossterm::style;
139 | 
140 |     use crate::color::{Colored, YaxColors};
141 | 
142 |     #[cfg(feature="use-serde")]
143 |     impl serde::Serialize for ColorSettings {
144 |         fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
145 |             use serde::ser::SerializeStruct;
146 |             let s = serializer.serialize_struct("ColorSettings", 0)?;
147 |             s.end()
148 |         }
149 |     }
150 | 
151 |     pub struct ColorSettings {
152 |         arithmetic: style::Color,
153 |         stack: style::Color,
154 |         nop: style::Color,
155 |         stop: style::Color,
156 |         control: style::Color,
157 |         data: style::Color,
158 |         comparison: style::Color,
159 |         invalid: style::Color,
160 |         platform: style::Color,
161 |         misc: style::Color,
162 | 
163 |         register: style::Color,
164 |         program_counter: style::Color,
165 | 
166 |         number: style::Color,
167 |         zero: style::Color,
168 |         one: style::Color,
169 |         minus_one: style::Color,
170 | 
171 |         function: style::Color,
172 |         symbol: style::Color,
173 |         address: style::Color,
174 |     }
175 | 
176 |     impl Default for ColorSettings {
177 |         fn default() -> ColorSettings {
178 |             ColorSettings {
179 |                 arithmetic: style::Color::Yellow,
180 |                 stack: style::Color::DarkMagenta,
181 |                 nop: style::Color::DarkBlue,
182 |                 stop: style::Color::Red,
183 |                 control: style::Color::DarkGreen,
184 |                 data: style::Color::Magenta,
185 |                 comparison: style::Color::DarkYellow,
186 |                 invalid: style::Color::DarkRed,
187 |                 platform: style::Color::DarkCyan,
188 |                 misc: style::Color::Cyan,
189 | 
190 |                 register: style::Color::DarkCyan,
191 |                 program_counter: style::Color::DarkRed,
192 | 
193 |                 number: style::Color::White,
194 |                 zero: style::Color::White,
195 |                 one: style::Color::White,
196 |                 minus_one: style::Color::White,
197 | 
198 |                 function: style::Color::Green,
199 |                 symbol: style::Color::Green,
200 |                 address: style::Color::DarkGreen,
201 |             }
202 |         }
203 |     }
204 | 
205 |     impl YaxColors for ColorSettings {
206 |         fn arithmetic_op<T: Display>(&self, t: T) -> Colored<T> {
207 |             Colored::Color(t, self.arithmetic)
208 |         }
209 |         fn stack_op<T: Display>(&self, t: T) -> Colored<T> {
210 |             Colored::Color(t, self.stack)
211 |         }
212 |         fn nop_op<T: Display>(&self, t: T) -> Colored<T> {
213 |             Colored::Color(t, self.nop)
214 |         }
215 |         fn stop_op<T: Display>(&self, t: T) -> Colored<T> {
216 |             Colored::Color(t, self.stop)
217 |         }
218 |         fn control_flow_op<T: Display>(&self, t: T) -> Colored<T> {
219 |             Colored::Color(t, self.control)
220 |         }
221 |         fn data_op<T: Display>(&self, t: T) -> Colored<T> {
222 |             Colored::Color(t, self.data)
223 |         }
224 |         fn comparison_op<T: Display>(&self, t: T) -> Colored<T> {
225 |             Colored::Color(t, self.comparison)
226 |         }
227 |         fn invalid_op<T: Display>(&self, t: T) -> Colored<T> {
228 |             Colored::Color(t, self.invalid)
229 |         }
230 |         fn misc_op<T: Display>(&self, t: T) -> Colored<T> {
231 |             Colored::Color(t, self.misc)
232 |         }
233 |         fn platform_op<T: Display>(&self, t: T) -> Colored<T> {
234 |             Colored::Color(t, self.platform)
235 |         }
236 | 
237 |         fn register<T: Display>(&self, t: T) -> Colored<T> {
238 |             Colored::Color(t, self.register)
239 |         }
240 |         fn program_counter<T: Display>(&self, t: T) -> Colored<T> {
241 |             Colored::Color(t, self.program_counter)
242 |         }
243 |         fn number<T: Display>(&self, t: T) -> Colored<T> {
244 |             Colored::Color(t, self.number)
245 |         }
246 |         fn zero<T: Display>(&self, t: T) -> Colored<T> {
247 |             Colored::Color(t, self.zero)
248 |         }
249 |         fn one<T: Display>(&self, t: T) -> Colored<T> {
250 |             Colored::Color(t, self.one)
251 |         }
252 |         fn minus_one<T: Display>(&self, t: T) -> Colored<T> {
253 |             Colored::Color(t, self.minus_one)
254 |         }
255 |         fn address<T: Display>(&self, t: T) -> Colored<T> {
256 |             Colored::Color(t, self.address)
257 |         }
258 |         fn symbol<T: Display>(&self, t: T) -> Colored<T> {
259 |             Colored::Color(t, self.symbol)
260 |         }
261 |         fn function<T: Display>(&self, t: T) -> Colored<T> {
262 |             Colored::Color(t, self.function)
263 |         }
264 |     }
265 | 
266 |     impl <'a> YaxColors for Option<&'a ColorSettings> {
267 |         fn arithmetic_op<T: Display>(&self, t: T) -> Colored<T> {
268 |             match self {
269 |                 Some(colors) => { colors.arithmetic_op(t) }
270 |                 None => { Colored::Just(t) }
271 |             }
272 |         }
273 |         fn stack_op<T: Display>(&self, t: T) -> Colored<T> {
274 |             match self {
275 |                 Some(colors) => { colors.stack_op(t) }
276 |                 None => { Colored::Just(t) }
277 |             }
278 |         }
279 |         fn nop_op<T: Display>(&self, t: T) -> Colored<T> {
280 |             match self {
281 |                 Some(colors) => { colors.nop_op(t) }
282 |                 None => { Colored::Just(t) }
283 |             }
284 |         }
285 |         fn stop_op<T: Display>(&self, t: T) -> Colored<T> {
286 |             match self {
287 |                 Some(colors) => { colors.stop_op(t) }
288 |                 None => { Colored::Just(t) }
289 |             }
290 |         }
291 |         fn control_flow_op<T: Display>(&self, t: T) -> Colored<T> {
292 |             match self {
293 |                 Some(colors) => { colors.control_flow_op(t) }
294 |                 None => { Colored::Just(t) }
295 |             }
296 |         }
297 |         fn data_op<T: Display>(&self, t: T) -> Colored<T> {
298 |             match self {
299 |                 Some(colors) => { colors.data_op(t) }
300 |                 None => { Colored::Just(t) }
301 |             }
302 |         }
303 |         fn comparison_op<T: Display>(&self, t: T) -> Colored<T> {
304 |             match self {
305 |                 Some(colors) => { colors.comparison_op(t) }
306 |                 None => { Colored::Just(t) }
307 |             }
308 |         }
309 |         fn invalid_op<T: Display>(&self, t: T) -> Colored<T> {
310 |             match self {
311 |                 Some(colors) => { colors.invalid_op(t) }
312 |                 None => { Colored::Just(t) }
313 |             }
314 |         }
315 |         fn misc_op<T: Display>(&self, t: T) -> Colored<T> {
316 |             match self {
317 |                 Some(colors) => { colors.misc_op(t) }
318 |                 None => { Colored::Just(t) }
319 |             }
320 |         }
321 |         fn platform_op<T: Display>(&self, t: T) -> Colored<T> {
322 |             match self {
323 |                 Some(colors) => { colors.platform_op(t) }
324 |                 None => { Colored::Just(t) }
325 |             }
326 |         }
327 | 
328 |         fn register<T: Display>(&self, t: T) -> Colored<T> {
329 |             match self {
330 |                 Some(colors) => { colors.register(t) }
331 |                 None => { Colored::Just(t) }
332 |             }
333 |         }
334 |         fn program_counter<T: Display>(&self, t: T) -> Colored<T> {
335 |             match self {
336 |                 Some(colors) => { colors.program_counter(t) }
337 |                 None => { Colored::Just(t) }
338 |             }
339 |         }
340 |         fn number<T: Display>(&self, t: T) -> Colored<T> {
341 |             match self {
342 |                 Some(colors) => { colors.number(t) }
343 |                 None => { Colored::Just(t) }
344 |             }
345 |         }
346 |         fn zero<T: Display>(&self, t: T) -> Colored<T> {
347 |             match self {
348 |                 Some(colors) => { colors.zero(t) }
349 |                 None => { Colored::Just(t) }
350 |             }
351 |         }
352 |         fn one<T: Display>(&self, t: T) -> Colored<T> {
353 |             match self {
354 |                 Some(colors) => { colors.one(t) }
355 |                 None => { Colored::Just(t) }
356 |             }
357 |         }
358 |         fn minus_one<T: Display>(&self, t: T) -> Colored<T> {
359 |             match self {
360 |                 Some(colors) => { colors.minus_one(t) }
361 |                 None => { Colored::Just(t) }
362 |             }
363 |         }
364 |         fn address<T: Display>(&self, t: T) -> Colored<T> {
365 |             match self {
366 |                 Some(colors) => { colors.address(t) }
367 |                 None => { Colored::Just(t) }
368 |             }
369 |         }
370 |         fn symbol<T: Display>(&self, t: T) -> Colored<T> {
371 |             match self {
372 |                 Some(colors) => { colors.symbol(t) }
373 |                 None => { Colored::Just(t) }
374 |             }
375 |         }
376 |         fn function<T: Display>(&self, t: T) -> Colored<T> {
377 |             match self {
378 |                 Some(colors) => { colors.function(t) }
379 |                 None => { Colored::Just(t) }
380 |             }
381 |         }
382 |     }
383 | }
384 | 
385 | /*
386 |  * can this be a derivable trait or something?
387 |  */
388 | /*
389 | impl <T: Colorize> Display for T {
390 |     fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
391 |         self.colorize(None, fmt)
392 |     }
393 | }
394 | */
395 | 
396 | /*
397 |  * and make this auto-derive from a ShowContextual impl?
398 |  */
399 | /*
400 | impl <T, U> Colorize for T where T: ShowContextual<Ctx=U> {
401 |     fn colorize(&self, colors: Option<&ColorSettings>, fmt: &mut Formatter) -> fmt::Result {
402 |         self.contextualize(colors, None, fmt)
403 |     }
404 | }
405 | */
406 | 


--------------------------------------------------------------------------------
/src/display/display_sink.rs:
--------------------------------------------------------------------------------
   1 | use core::fmt;
   2 | 
   3 | // `imp_x86.rs` has `asm!()` macros, and so is not portable at all.
   4 | #[cfg(all(feature="alloc", target_arch = "x86_64"))]
   5 | #[path="./display_sink/imp_x86.rs"]
   6 | mod imp;
   7 | 
   8 | // for other architectures, fall back on possibly-slower portable functions.
   9 | #[cfg(all(feature="alloc", not(target_arch = "x86_64")))]
  10 | #[path="./display_sink/imp_generic.rs"]
  11 | mod imp;
  12 | 
  13 | 
  14 | /// `DisplaySink` allows client code to collect output and minimal markup. this is currently used
  15 | /// in formatting instructions for two reasons:
  16 | /// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at
  17 | ///   the same time as collecting output itself.
  18 | /// * `DisplaySink` implementations provide specialized functions for writing strings in
  19 | ///   circumstances where a simple "use `core::fmt`" might incur unwanted overhead.
  20 | ///
  21 | /// ## spans
  22 | ///
  23 | /// spans are out-of-band indicators for the meaning of data written to this sink. when a
  24 | /// `span_start_<foo>` function is called, data written until a matching `span_end_<foo>` can be
  25 | /// considered the text corresponding to `<foo>`.
  26 | ///
  27 | /// spans are entered and exited in a FILO manner. implementations of `DisplaySink` are explicitly
  28 | /// allowed to depend on this fact. functions writing to a `DisplaySink` must exit spans in reverse
  29 | /// order to when they are entered. a function that has a call sequence like
  30 | /// ```text
  31 | /// sink.span_start_operand();
  32 | /// sink.span_start_immediate();
  33 | /// sink.span_end_operand();
  34 | /// ```
  35 | /// is in error.
  36 | ///
  37 | /// spans are reported through the `span_start_*` and `span_end_*` families of functions to avoid
  38 | /// constraining implementations into tracking current output offset (which may not be knowable) or
  39 | /// span size (which may be knowable, but incur additional overhead to compute or track). if the
  40 | /// task for a span is to simply emit VT100 color codes, for example, implementations avoid the
  41 | /// overhead of tracking offsets.
  42 | ///
  43 | /// default implementations of the `span_start_*` and `span_end_*` functions are to do nothing. a
  44 | /// no-op `span_start_*` or `span_end_*` allows rustc to elimiate such calls at compile time for
  45 | /// `DisplaySink` that are uninterested in the corresponding span type.
  46 | ///
  47 | /// # write helpers (`write_*`)
  48 | ///
  49 | /// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in
  50 | /// documentation here to better support writing some kinds of inputs than a fully-general solution
  51 | /// (such as `core::fmt`) might be able to yield.
  52 | ///
  53 | /// currently there are two motivating factors for `write_*` helpers:
  54 | ///
  55 | /// instruction formatting often involves writing small but variable-size strings, such as register
  56 | /// names, which is something of a pathological case for string appending as Rust currently exists:
  57 | /// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an
  58 | /// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference:
  59 | /// <https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232>
  60 | ///
  61 | /// there are similar papercuts around formatting integers as base-16 numbers, such as
  62 | /// <https://github.com/rust-lang/rust/pull/122770>. in isolation and in most applications these are
  63 | /// not a significant source of overhead. but for programs bounded on decoding and printing
  64 | /// instructions, these can add up to significant overhead - on the order of 10-20% of total
  65 | /// runtime.
  66 | ///
  67 | /// ## example
  68 | ///
  69 | /// a simple call sequence to `DisplaySink` might look something like:
  70 | /// ```compile_fail
  71 | /// sink.span_start_operand()
  72 | /// sink.write_char('[')
  73 | /// sink.span_start_register()
  74 | /// sink.write_fixed_size("rbp")
  75 | /// sink.span_end_register()
  76 | /// sink.write_char(']')
  77 | /// sink.span_end_operand()
  78 | /// ```
  79 | /// which writes the text `[rbp]`, telling sinks that the operand begins at `[`, ends after `]`,
  80 | /// and `rbp` is a register in that operand.
  81 | ///
  82 | /// ## extensibility
  83 | ///
  84 | /// additional `span_{start,end}_*` helpers may be added over time - in the above example, one
  85 | /// future addition might be to add a new `effective_address` span that is started before
  86 | /// `register` and ended after `register. for an operand like `\[rbp\]` the effective address span
  87 | /// would exactly match a corresponding register span, but in more complicated scenarios like
  88 | /// `[rsp + rdi * 4 + 0x50]` the effective address would be all of `rsp + rdi * 4 + 0x50`.
  89 | ///
  90 | /// additional spans are expected to be added as needed. it is not immediately clear how to add
  91 | /// support for more architecture-specific concepts (such as itanium predicate registers) would be
  92 | /// supported yet, and so architecture-specific concepts may be expressed on `DisplaySink` if the
  93 | /// need arises.
  94 | ///
  95 | /// new `span_{start,end}_*` helpers will be defaulted as no-op. additions to this trait will be
  96 | /// minor version bumps, so users should take care to not add custom functions starting with
  97 | /// `span_start_` or `span_end_` to structs implementing `DisplaySink`.
  98 | pub trait DisplaySink: fmt::Write {
  99 |     #[inline(always)]
 100 |     fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> {
 101 |         self.write_str(s)
 102 |     }
 103 | 
 104 |     /// write a string to this sink that is less than 32 bytes. this is provided for optimization
 105 |     /// opportunities when writing a variable-length string with known max size.
 106 |     ///
 107 |     /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than
 108 |     /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a
 109 |     /// utf-8 string. this may corrupt Rust strings.
 110 |     unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> {
 111 |         self.write_str(s)
 112 |     }
 113 |     /// write a string to this sink that is less than 16 bytes. this is provided for optimization
 114 |     /// opportunities when writing a variable-length string with known max size.
 115 |     ///
 116 |     /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than
 117 |     /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a
 118 |     /// utf-8 string. this may corrupt Rust strings.
 119 |     unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> {
 120 |         self.write_str(s)
 121 |     }
 122 |     /// write a string to this sink that is less than 8 bytes. this is provided for optimization
 123 |     /// opportunities when writing a variable-length string with known max size.
 124 |     ///
 125 |     /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than
 126 |     /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a
 127 |     /// utf-8 string. this may corrupt Rust strings.
 128 |     unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> {
 129 |         self.write_str(s)
 130 |     }
 131 | 
 132 |     /// write a u8 to the output as a base-16 integer.
 133 |     ///
 134 |     /// this corresponds to the Rust format specifier `{:x}` - see [`std::fmt::LowerHex`] for more.
 135 |     ///
 136 |     /// this is provided for optimization opportunities when the formatted integer can be written
 137 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 138 |     /// followup step)
 139 |     fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> {
 140 |         write!(self, "{:x}", v)
 141 |     }
 142 |     /// write a u8 to the output as a base-16 integer with leading `0x`.
 143 |     ///
 144 |     /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more.
 145 |     ///
 146 |     /// this is provided for optimization opportunities when the formatted integer can be written
 147 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 148 |     /// followup step)
 149 |     fn write_prefixed_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> {
 150 |         self.write_fixed_size("0x")?;
 151 |         self.write_u8(v)
 152 |     }
 153 |     /// write an i8 to the output as a base-16 integer with leading `0x`, and leading `-` if the
 154 |     /// value is negative.
 155 |     ///
 156 |     /// there is no matching `std` formatter, so some examples here:
 157 |     /// ```text
 158 |     /// sink.write_prefixed_i8(-0x60); // writes `-0x60` to the sink
 159 |     /// sink.write_prefixed_i8(127); // writes `0x7f` to the sink
 160 |     /// sink.write_prefixed_i8(-128); // writes `-0x80` to the sink
 161 |     /// ```
 162 |     ///
 163 |     /// this is provided for optimization opportunities when the formatted integer can be written
 164 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 165 |     /// followup step)
 166 |     fn write_prefixed_i8(&mut self, v: i8) -> Result<(), core::fmt::Error> {
 167 |         let v = if v < 0 {
 168 |             self.write_char('-')?;
 169 |             v.unsigned_abs()
 170 |         } else {
 171 |             v as u8
 172 |         };
 173 |         self.write_prefixed_u8(v)
 174 |     }
 175 |     /// write a u16 to the output as a base-16 integer.
 176 |     ///
 177 |     /// this is provided for optimization opportunities when the formatted integer can be written
 178 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 179 |     /// followup step)
 180 |     fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> {
 181 |         write!(self, "{:x}", v)
 182 |     }
 183 |     /// write a u16 to the output as a base-16 integer with leading `0x`.
 184 |     ///
 185 |     /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more.
 186 |     ///
 187 |     /// this is provided for optimization opportunities when the formatted integer can be written
 188 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 189 |     /// followup step)
 190 |     fn write_prefixed_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> {
 191 |         self.write_fixed_size("0x")?;
 192 |         self.write_u16(v)
 193 |     }
 194 |     /// write an i16 to the output as a base-16 integer with leading `0x`, and leading `-` if the
 195 |     /// value is negative.
 196 |     ///
 197 |     /// there is no matching `std` formatter, so some examples here:
 198 |     /// ```text
 199 |     /// sink.write_prefixed_i16(-0x60); // writes `-0x60` to the sink
 200 |     /// sink.write_prefixed_i16(127); // writes `0x7f` to the sink
 201 |     /// sink.write_prefixed_i16(-128); // writes `-0x80` to the sink
 202 |     /// ```
 203 |     ///
 204 |     /// this is provided for optimization opportunities when the formatted integer can be written
 205 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 206 |     /// followup step)
 207 |     fn write_prefixed_i16(&mut self, v: i16) -> Result<(), core::fmt::Error> {
 208 |         let v = if v < 0 {
 209 |             self.write_char('-')?;
 210 |             v.unsigned_abs()
 211 |         } else {
 212 |             v as u16
 213 |         };
 214 |         self.write_prefixed_u16(v)
 215 |     }
 216 |     /// write a u32 to the output as a base-16 integer.
 217 |     ///
 218 |     /// this is provided for optimization opportunities when the formatted integer can be written
 219 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 220 |     /// followup step)
 221 |     fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> {
 222 |         write!(self, "{:x}", v)
 223 |     }
 224 |     /// write a u32 to the output as a base-16 integer with leading `0x`.
 225 |     ///
 226 |     /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more.
 227 |     ///
 228 |     /// this is provided for optimization opportunities when the formatted integer can be written
 229 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 230 |     /// followup step)
 231 |     fn write_prefixed_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> {
 232 |         self.write_fixed_size("0x")?;
 233 |         self.write_u32(v)
 234 |     }
 235 |     /// write an i32 to the output as a base-32 integer with leading `0x`, and leading `-` if the
 236 |     /// value is negative.
 237 |     ///
 238 |     /// there is no matching `std` formatter, so some examples here:
 239 |     /// ```text
 240 |     /// sink.write_prefixed_i32(-0x60); // writes `-0x60` to the sink
 241 |     /// sink.write_prefixed_i32(127); // writes `0x7f` to the sink
 242 |     /// sink.write_prefixed_i32(-128); // writes `-0x80` to the sink
 243 |     /// ```
 244 |     ///
 245 |     /// this is provided for optimization opportunities when the formatted integer can be written
 246 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 247 |     /// followup step)
 248 |     fn write_prefixed_i32(&mut self, v: i32) -> Result<(), core::fmt::Error> {
 249 |         let v = if v < 0 {
 250 |             self.write_char('-')?;
 251 |             v.unsigned_abs()
 252 |         } else {
 253 |             v as u32
 254 |         };
 255 |         self.write_prefixed_u32(v)
 256 |     }
 257 |     /// write a u64 to the output as a base-16 integer.
 258 |     ///
 259 |     /// this is provided for optimization opportunities when the formatted integer can be written
 260 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 261 |     /// followup step)
 262 |     fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> {
 263 |         write!(self, "{:x}", v)
 264 |     }
 265 |     /// write a u64 to the output as a base-16 integer with leading `0x`.
 266 |     ///
 267 |     /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more.
 268 |     ///
 269 |     /// this is provided for optimization opportunities when the formatted integer can be written
 270 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 271 |     /// followup step)
 272 |     fn write_prefixed_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> {
 273 |         self.write_fixed_size("0x")?;
 274 |         self.write_u64(v)
 275 |     }
 276 |     /// write an i64 to the output as a base-64 integer with leading `0x`, and leading `-` if the
 277 |     /// value is negative.
 278 |     ///
 279 |     /// there is no matching `std` formatter, so some examples here:
 280 |     /// ```text
 281 |     /// sink.write_prefixed_i64(-0x60); // writes `-0x60` to the sink
 282 |     /// sink.write_prefixed_i64(127); // writes `0x7f` to the sink
 283 |     /// sink.write_prefixed_i64(-128); // writes `-0x80` to the sink
 284 |     /// ```
 285 |     ///
 286 |     /// this is provided for optimization opportunities when the formatted integer can be written
 287 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 288 |     /// followup step)
 289 |     fn write_prefixed_i64(&mut self, v: i64) -> Result<(), core::fmt::Error> {
 290 |         let v = if v < 0 {
 291 |             self.write_char('-')?;
 292 |             v.unsigned_abs()
 293 |         } else {
 294 |             v as u64
 295 |         };
 296 |         self.write_prefixed_u64(v)
 297 |     }
 298 | 
 299 |     /// enter a region inside which output corresponds to an immediate.
 300 |     fn span_start_immediate(&mut self) { }
 301 |     /// end a region where an immediate was written. see docs on [`DisplaySink`] for more.
 302 |     fn span_end_immediate(&mut self) { }
 303 | 
 304 |     /// enter a region inside which output corresponds to a register.
 305 |     fn span_start_register(&mut self) { }
 306 |     /// end a region where a register was written. see docs on [`DisplaySink`] for more.
 307 |     fn span_end_register(&mut self) { }
 308 | 
 309 |     /// enter a region inside which output corresponds to an opcode.
 310 |     fn span_start_opcode(&mut self) { }
 311 |     /// end a region where an opcode was written. see docs on [`DisplaySink`] for more.
 312 |     fn span_end_opcode(&mut self) { }
 313 | 
 314 |     /// enter a region inside which output corresponds to the program counter.
 315 |     fn span_start_program_counter(&mut self) { }
 316 |     /// end a region where the program counter was written. see docs on [`DisplaySink`] for more.
 317 |     fn span_end_program_counter(&mut self) { }
 318 | 
 319 |     /// enter a region inside which output corresponds to a number, such as a memory offset or
 320 |     /// immediate.
 321 |     fn span_start_number(&mut self) { }
 322 |     /// end a region where a number was written. see docs on [`DisplaySink`] for more.
 323 |     fn span_end_number(&mut self) { }
 324 | 
 325 |     /// enter a region inside which output corresponds to an address. this is a best guess;
 326 |     /// instructions like x86's `lea` may involve an "address" that is not, and arithmetic
 327 |     /// instructions may operate on addresses held in registers.
 328 |     ///
 329 |     /// where possible, the presence of this span will be informed by ISA semantics - if an
 330 |     /// instruction has a memory operand, the effective address calculation of that operand should
 331 |     /// be in an address span.
 332 |     fn span_start_address(&mut self) { }
 333 |     /// end a region where an address was written. the specifics of an "address" are ambiguous and
 334 |     /// best-effort; see [`DisplaySink::span_start_address`] for more about this. otherwise, see
 335 |     /// docs on [`DisplaySink`] for more about spans.
 336 |     fn span_end_address(&mut self) { }
 337 | 
 338 |     /// enter a region inside which output corresponds to a function address, or expression
 339 |     /// evaluating to a function address. this is a best guess; instructions like `call` may call
 340 |     /// to a non-function address, `jmp` may jump to a function (as with tail calls), function
 341 |     /// addresses may be computed via table lookup without semantic hints.
 342 |     ///
 343 |     /// where possible, the presence of this span will be informed by ISA semantics - if an
 344 |     /// instruction is like a "call", an address operand should be a `function` span. if other
 345 |     /// instructions can be expected to handle subroutine starting addresses purely from ISA
 346 |     /// semantics, address operand(s) should be in a `function` span.
 347 |     fn span_start_function_expr(&mut self) { }
 348 |     /// end a region where function address expression was written. the specifics of a "function
 349 |     /// address" are ambiguous and best-effort; see [`DisplaySink::span_start_function_expr`] for more
 350 |     /// about this. otherwise, see docs on [`DisplaySink`] for more about spans.
 351 |     fn span_end_function_expr(&mut self) { }
 352 | }
 353 | 
 354 | /// `FmtSink` can be used to adapt any `fmt::Write`-implementing type into a `DisplaySink` to
 355 | /// format an instruction while discarding all span information at zero cost.
 356 | pub struct FmtSink<'a, T: fmt::Write> {
 357 |     out: &'a mut T,
 358 | }
 359 | 
 360 | impl<'a, T: fmt::Write> FmtSink<'a, T> {
 361 |     pub fn new(f: &'a mut T) -> Self {
 362 |         Self { out: f }
 363 |     }
 364 | 
 365 |     pub fn inner_ref(&self) -> &T {
 366 |         &self.out
 367 |     }
 368 | }
 369 | 
 370 | /// blanket impl that discards all span information, forwards writes to the underlying `fmt::Write`
 371 | /// type.
 372 | impl<'a, T: fmt::Write> DisplaySink for FmtSink<'a, T> { }
 373 | 
 374 | impl<'a, T: fmt::Write> fmt::Write for FmtSink<'a, T> {
 375 |     fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> {
 376 |         self.out.write_str(s)
 377 |     }
 378 |     fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> {
 379 |         self.out.write_char(c)
 380 |     }
 381 |     fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> {
 382 |         self.out.write_fmt(f)
 383 |     }
 384 | }
 385 | 
 386 | #[cfg(feature = "alloc")]
 387 | mod instruction_text_sink {
 388 |     use core::fmt;
 389 | 
 390 |     use super::{DisplaySink, u8_to_hex};
 391 | 
 392 |     /// this is an implementation detail of yaxpeax-arch and related crates. if you are a user of the
 393 |     /// disassemblers, do not use this struct. do not depend on this struct existing. this struct is
 394 |     /// not stable. this struct is not safe for general use. if you use this struct you and your
 395 |     /// program will be eaten by gremlins.
 396 |     ///
 397 |     /// if you are implementing an instruction formatter for the yaxpeax family of crates: this struct
 398 |     /// is guaranteed to contain a string that is long enough to hold a fully-formatted instruction.
 399 |     /// because the buffer is guaranteed to be long enough, writes through `InstructionTextSink` are
 400 |     /// not bounds-checked, and the buffer is never grown.
 401 |     ///
 402 |     /// this is wildly dangerous in general use. the public constructor of `InstructionTextSink` is
 403 |     /// unsafe as a result. as used in `InstructionFormatter`, the buffer is guaranteed to be
 404 |     /// `clear()`ed before use, `InstructionFormatter` ensures the buffer is large enough, *and*
 405 |     /// `InstructionFormatter` never allows `InstructionTextSink` to exist in a context where it would
 406 |     /// be written to without being rewound first.
 407 |     ///
 408 |     /// because this opens a very large hole through which `fmt::Write` can become unsafe, incorrect
 409 |     /// uses of this struct will be hard to debug in general. `InstructionFormatter` is probably at the
 410 |     /// limit of easily-reasoned-about lifecycle of the buffer, which "only" leaves the problem of
 411 |     /// ensuring that instruction formatting impls this buffer is passed to are appropriately sized.
 412 |     ///
 413 |     /// this is intended to be hidden in docs. if you see this in docs, it's a bug.
 414 |     #[doc(hidden)]
 415 |     pub struct InstructionTextSink<'buf> {
 416 |         buf: &'buf mut alloc::string::String
 417 |     }
 418 | 
 419 |     impl<'buf> InstructionTextSink<'buf> {
 420 |         /// create an `InstructionTextSink` using the provided buffer for storage.
 421 |         ///
 422 |         /// SAFETY: callers must ensure that this sink will never have more content written than
 423 |         /// this buffer can hold. while the buffer may appear growable, `write_*` methods here may
 424 |         /// *bypass bounds checks* and so will never trigger the buffer to grow. writing more data
 425 |         /// than the buffer's size when provided to `new` will cause out-of-bounds writes and
 426 |         /// memory corruption.
 427 |         pub unsafe fn new(buf: &'buf mut alloc::string::String) -> Self {
 428 |             Self { buf }
 429 |         }
 430 |     }
 431 | 
 432 |     impl<'buf> fmt::Write for InstructionTextSink<'buf> {
 433 |         fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> {
 434 |             self.buf.write_str(s)
 435 |         }
 436 |         fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> {
 437 |             if cfg!(debug_assertions) {
 438 |                 if self.buf.capacity() < self.buf.len() + 1 {
 439 |                     panic!("InstructionTextSink::write_char would overflow output");
 440 |                 }
 441 |             }
 442 | 
 443 |             // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()`
 444 |             // is valid for writing, but may be uninitialized.
 445 |             //
 446 |             // this function is essentially equivalent to `Vec::push` specialized for the case that
 447 |             // `len < buf.capacity()`:
 448 |             // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006
 449 |             unsafe {
 450 |                 let underlying = self.buf.as_mut_vec();
 451 |                 // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to
 452 |                 // write single ASCII characters. this is wrong in the general case, but `write_char`
 453 |                 // here is not going to be used in the general case.
 454 |                 if cfg!(debug_assertions) {
 455 |                     if c > '\x7f' {
 456 |                         panic!("InstructionTextSink::write_char would truncate output");
 457 |                     }
 458 |                 }
 459 |                 let to_push = c as u8;
 460 |                 // `ptr::write` here because `underlying.add(underlying.len())` may not point to an
 461 |                 // initialized value, which would mean that turning that pointer into a `&mut u8` to
 462 |                 // store through would be UB. `ptr::write` avoids taking the mut ref.
 463 |                 underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push);
 464 |                 // we have initialized all (one) bytes that `set_len` is increasing the length to
 465 |                 // include.
 466 |                 underlying.set_len(underlying.len() + 1);
 467 |             }
 468 |             Ok(())
 469 |         }
 470 |     }
 471 | 
 472 |     impl<'buf> DisplaySink for InstructionTextSink<'buf> {
 473 |         #[inline(always)]
 474 |         fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> {
 475 |             if cfg!(debug_assertions) {
 476 |                 if self.buf.capacity() < self.buf.len() + s.len() {
 477 |                     panic!("InstructionTextSink::write_fixed_size would overflow output");
 478 |                 }
 479 |             }
 480 | 
 481 |             // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 482 |             // be valid utf8
 483 |             let buf = unsafe { self.buf.as_mut_vec() };
 484 |             let new_bytes = s.as_bytes();
 485 | 
 486 |             if new_bytes.len() == 0 {
 487 |                 return Ok(());
 488 |             }
 489 | 
 490 |             unsafe {
 491 |                 let dest = buf.as_mut_ptr().offset(buf.len() as isize);
 492 | 
 493 |                 // this used to be enough to bamboozle llvm away from
 494 |                 // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232
 495 |                 // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped
 496 |                 // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s`
 497 |                 // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this
 498 |                 // unrolls into some kind of appropriate series of `mov`.
 499 |                 dest.offset(0 as isize).write(new_bytes[0]);
 500 |                 for i in 1..new_bytes.len() {
 501 |                     dest.offset(i as isize).write(new_bytes[i]);
 502 |                 }
 503 | 
 504 |                 buf.set_len(buf.len() + new_bytes.len());
 505 |             }
 506 | 
 507 |             Ok(())
 508 |         }
 509 |         unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> {
 510 |             if cfg!(debug_assertions) {
 511 |                 if self.buf.capacity() < self.buf.len() + s.len() {
 512 |                     panic!("InstructionTextSink::write_lt_32 would overflow output");
 513 |                 }
 514 |             }
 515 | 
 516 |             // Safety: `new` requires callers promise there is enough space to hold `s`.
 517 |             unsafe {
 518 |                 super::imp::append_string_lt_32_unchecked(&mut self.buf, s);
 519 |             }
 520 | 
 521 |             Ok(())
 522 |         }
 523 |         unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> {
 524 |             if cfg!(debug_assertions) {
 525 |                 if self.buf.capacity() < self.buf.len() + s.len() {
 526 |                     panic!("InstructionTextSink::write_lt_16 would overflow output");
 527 |                 }
 528 |             }
 529 | 
 530 |             // Safety: `new` requires callers promise there is enough space to hold `s`.
 531 |             unsafe {
 532 |                 super::imp::append_string_lt_16_unchecked(&mut self.buf, s);
 533 |             }
 534 | 
 535 |             Ok(())
 536 |         }
 537 |         unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> {
 538 |             if cfg!(debug_assertions) {
 539 |                 if self.buf.capacity() < self.buf.len() + s.len() {
 540 |                     panic!("InstructionTextSink::write_lt_8 would overflow output");
 541 |                 }
 542 |             }
 543 | 
 544 |             // Safety: `new` requires callers promise there is enough space to hold `s`.
 545 |             unsafe {
 546 |                 super::imp::append_string_lt_8_unchecked(&mut self.buf, s);
 547 |             }
 548 | 
 549 |             Ok(())
 550 |         }
 551 |         /// write a u8 to the output as a base-16 integer.
 552 |         ///
 553 |         /// this is provided for optimization opportunities when the formatted integer can be written
 554 |         /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 555 |         /// followup step)
 556 |         #[inline(always)]
 557 |         fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> {
 558 |             if v == 0 {
 559 |                 return self.write_fixed_size("0");
 560 |             }
 561 |             // we can fairly easily predict the size of a formatted string here with lzcnt, which also
 562 |             // means we can write directly into the correct offsets of the output string.
 563 |             let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize;
 564 | 
 565 |             if cfg!(debug_assertions) {
 566 |                 if self.buf.capacity() < self.buf.len() + printed_size {
 567 |                     panic!("InstructionTextSink::write_u8 would overflow output");
 568 |                 }
 569 |             }
 570 | 
 571 |             // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 572 |             // be valid utf8
 573 |             let buf = unsafe { self.buf.as_mut_vec() };
 574 |             let new_len = buf.len() + printed_size;
 575 | 
 576 |             // Safety: there is no way to exit this function without initializing all bytes up to
 577 |             // `new_len`
 578 |             unsafe {
 579 |                 buf.set_len(new_len);
 580 |             }
 581 |             // Safety: `new()` requires callers promise there is space through to `new_len`
 582 |             let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
 583 | 
 584 |             loop {
 585 |                 let digit = v % 16;
 586 |                 let c = u8_to_hex(digit as u8);
 587 |                 // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
 588 |                 // to a location valid for writing.
 589 |                 unsafe {
 590 |                     p = p.offset(-1);
 591 |                     p.write(c);
 592 |                 }
 593 |                 v = v / 16;
 594 |                 if v == 0 {
 595 |                     break;
 596 |                 }
 597 |             }
 598 | 
 599 |             Ok(())
 600 |         }
 601 |         /// write a u16 to the output as a base-16 integer.
 602 |         ///
 603 |         /// this is provided for optimization opportunities when the formatted integer can be written
 604 |         /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 605 |         /// followup step)
 606 |         #[inline(always)]
 607 |         fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> {
 608 |             if v == 0 {
 609 |                 return self.write_fixed_size("0");
 610 |             }
 611 | 
 612 |             // we can fairly easily predict the size of a formatted string here with lzcnt, which also
 613 |             // means we can write directly into the correct offsets of the output string.
 614 |             let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize;
 615 | 
 616 |             if cfg!(debug_assertions) {
 617 |                 if self.buf.capacity() < self.buf.len() + printed_size {
 618 |                     panic!("InstructionTextSink::write_u16 would overflow output");
 619 |                 }
 620 |             }
 621 | 
 622 |             // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 623 |             // be valid utf8
 624 |             let buf = unsafe { self.buf.as_mut_vec() };
 625 |             let new_len = buf.len() + printed_size;
 626 | 
 627 |             // Safety: there is no way to exit this function without initializing all bytes up to
 628 |             // `new_len`
 629 |             unsafe {
 630 |                 buf.set_len(new_len);
 631 |             }
 632 |             // Safety: `new()` requires callers promise there is space through to `new_len`
 633 |             let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
 634 | 
 635 |             loop {
 636 |                 let digit = v % 16;
 637 |                 let c = u8_to_hex(digit as u8);
 638 |                 // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
 639 |                 // to a location valid for writing.
 640 |                 unsafe {
 641 |                     p = p.offset(-1);
 642 |                     p.write(c);
 643 |                 }
 644 |                 v = v / 16;
 645 |                 if v == 0 {
 646 |                     break;
 647 |                 }
 648 |             }
 649 | 
 650 |             Ok(())
 651 |         }
 652 |         /// write a u32 to the output as a base-16 integer.
 653 |         ///
 654 |         /// this is provided for optimization opportunities when the formatted integer can be written
 655 |         /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 656 |         /// followup step)
 657 |         #[inline(always)]
 658 |         fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> {
 659 |             if v == 0 {
 660 |                 return self.write_fixed_size("0");
 661 |             }
 662 | 
 663 |             // we can fairly easily predict the size of a formatted string here with lzcnt, which also
 664 |             // means we can write directly into the correct offsets of the output string.
 665 |             let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize;
 666 | 
 667 |             if cfg!(debug_assertions) {
 668 |                 if self.buf.capacity() < self.buf.len() + printed_size {
 669 |                     panic!("InstructionTextSink::write_u32 would overflow output");
 670 |                 }
 671 |             }
 672 | 
 673 |             // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 674 |             // be valid utf8
 675 |             let buf = unsafe { self.buf.as_mut_vec() };
 676 |             let new_len = buf.len() + printed_size;
 677 | 
 678 |             // Safety: there is no way to exit this function without initializing all bytes up to
 679 |             // `new_len`
 680 |             unsafe {
 681 |                 buf.set_len(new_len);
 682 |             }
 683 |             // Safety: `new()` requires callers promise there is space through to `new_len`
 684 |             let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
 685 | 
 686 |             loop {
 687 |                 let digit = v % 16;
 688 |                 let c = u8_to_hex(digit as u8);
 689 |                 // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
 690 |                 // to a location valid for writing.
 691 |                 unsafe {
 692 |                     p = p.offset(-1);
 693 |                     p.write(c);
 694 |                 }
 695 |                 v = v / 16;
 696 |                 if v == 0 {
 697 |                     break;
 698 |                 }
 699 |             }
 700 | 
 701 |             Ok(())
 702 |         }
 703 |         /// write a u64 to the output as a base-16 integer.
 704 |         ///
 705 |         /// this is provided for optimization opportunities when the formatted integer can be written
 706 |         /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 707 |         /// followup step)
 708 |         #[inline(always)]
 709 |         fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> {
 710 |             if v == 0 {
 711 |                 return self.write_fixed_size("0");
 712 |             }
 713 | 
 714 |             // we can fairly easily predict the size of a formatted string here with lzcnt, which also
 715 |             // means we can write directly into the correct offsets of the output string.
 716 |             let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize;
 717 | 
 718 |             if cfg!(debug_assertions) {
 719 |                 if self.buf.capacity() < self.buf.len() + printed_size {
 720 |                     panic!("InstructionTextSink::write_u64 would overflow output");
 721 |                 }
 722 |             }
 723 | 
 724 |             // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 725 |             // be valid utf8
 726 |             let buf = unsafe { self.buf.as_mut_vec() };
 727 |             let new_len = buf.len() + printed_size;
 728 | 
 729 |             // Safety: there is no way to exit this function without initializing all bytes up to
 730 |             // `new_len`
 731 |             unsafe {
 732 |                 buf.set_len(new_len);
 733 |             }
 734 |             // Safety: `new()` requires callers promise there is space through to `new_len`
 735 |             let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
 736 | 
 737 |             loop {
 738 |                 let digit = v % 16;
 739 |                 let c = u8_to_hex(digit as u8);
 740 |                 // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
 741 |                 // to a location valid for writing.
 742 |                 unsafe {
 743 |                     p = p.offset(-1);
 744 |                     p.write(c);
 745 |                 }
 746 |                 v = v / 16;
 747 |                 if v == 0 {
 748 |                     break;
 749 |                 }
 750 |             }
 751 | 
 752 |             Ok(())
 753 |         }
 754 |     }
 755 | }
 756 | #[cfg(feature = "alloc")]
 757 | pub use instruction_text_sink::InstructionTextSink;
 758 | 
 759 | 
 760 | #[cfg(feature = "alloc")]
 761 | use crate::display::u8_to_hex;
 762 | 
 763 | /// this [`DisplaySink`] impl exists to support somewhat more performant buffering of the kinds of
 764 | /// strings `yaxpeax-x86` uses in formatting instructions.
 765 | ///
 766 | /// span information is discarded at zero cost.
 767 | #[cfg(feature = "alloc")]
 768 | impl DisplaySink for alloc::string::String {
 769 |     #[inline(always)]
 770 |     fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> {
 771 |         self.reserve(s.len());
 772 |         // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 773 |         // be valid utf8
 774 |         let buf = unsafe { self.as_mut_vec() };
 775 |         let new_bytes = s.as_bytes();
 776 | 
 777 |         if new_bytes.len() == 0 {
 778 |             return Ok(());
 779 |         }
 780 | 
 781 |         // Safety: we have reserved space for all `buf` bytes, above.
 782 |         unsafe {
 783 |             let dest = buf.as_mut_ptr().offset(buf.len() as isize);
 784 | 
 785 |             // this used to be enough to bamboozle llvm away from
 786 |             // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232
 787 |             // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped
 788 |             // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s`
 789 |             // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this
 790 |             // unrolls into some kind of appropriate series of `mov`.
 791 |             dest.offset(0 as isize).write(new_bytes[0]);
 792 |             for i in 1..new_bytes.len() {
 793 |                 dest.offset(i as isize).write(new_bytes[i]);
 794 |             }
 795 | 
 796 |             // Safety: we have initialized all bytes from where `self` initially ended, through to
 797 |             // all `new_bytes` additional elements.
 798 |             buf.set_len(buf.len() + new_bytes.len());
 799 |         }
 800 | 
 801 |         Ok(())
 802 |     }
 803 |     unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> {
 804 |         self.reserve(s.len());
 805 | 
 806 |         // Safety: we have reserved enough space for `s`.
 807 |         unsafe {
 808 |             imp::append_string_lt_32_unchecked(self, s);
 809 |         }
 810 | 
 811 |         Ok(())
 812 |     }
 813 |     unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> {
 814 |         self.reserve(s.len());
 815 | 
 816 |         // Safety: we have reserved enough space for `s`.
 817 |         unsafe {
 818 |             imp::append_string_lt_16_unchecked(self, s);
 819 |         }
 820 | 
 821 |         Ok(())
 822 |     }
 823 |     unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> {
 824 |         self.reserve(s.len());
 825 | 
 826 |         // Safety: we have reserved enough space for `s`.
 827 |         unsafe {
 828 |             imp::append_string_lt_8_unchecked(self, s);
 829 |         }
 830 | 
 831 |         Ok(())
 832 |     }
 833 |     /// write a u8 to the output as a base-16 integer.
 834 |     ///
 835 |     /// this is provided for optimization opportunities when the formatted integer can be written
 836 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 837 |     /// followup step)
 838 |     #[inline(always)]
 839 |     fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> {
 840 |         if v == 0 {
 841 |             return self.write_fixed_size("0");
 842 |         }
 843 |         // we can fairly easily predict the size of a formatted string here with lzcnt, which also
 844 |         // means we can write directly into the correct offsets of the output string.
 845 |         let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize;
 846 | 
 847 |         self.reserve(printed_size);
 848 | 
 849 |         // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 850 |         // be valid utf8
 851 |         let buf = unsafe { self.as_mut_vec() };
 852 |         let new_len = buf.len() + printed_size;
 853 | 
 854 |         // Safety: there is no way to exit this function without initializing all bytes up to
 855 |         // `new_len`
 856 |         unsafe {
 857 |             buf.set_len(new_len);
 858 |         }
 859 |         // Safety: we have reserved space through to `new_len` by calling `reserve` above.
 860 |         let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
 861 | 
 862 |         loop {
 863 |             let digit = v % 16;
 864 |             let c = u8_to_hex(digit as u8);
 865 |             // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
 866 |             // to a location valid for writing.
 867 |             unsafe {
 868 |                 p = p.offset(-1);
 869 |                 p.write(c);
 870 |             }
 871 |             v = v / 16;
 872 |             if v == 0 {
 873 |                 break;
 874 |             }
 875 |         }
 876 | 
 877 |         Ok(())
 878 |     }
 879 |     /// write a u16 to the output as a base-16 integer.
 880 |     ///
 881 |     /// this is provided for optimization opportunities when the formatted integer can be written
 882 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 883 |     /// followup step)
 884 |     #[inline(always)]
 885 |     fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> {
 886 |         if v == 0 {
 887 |             return self.write_fixed_size("0");
 888 |         }
 889 |         // we can fairly easily predict the size of a formatted string here with lzcnt, which also
 890 |         // means we can write directly into the correct offsets of the output string.
 891 |         let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize;
 892 | 
 893 |         self.reserve(printed_size);
 894 | 
 895 |         // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 896 |         // be valid utf8
 897 |         let buf = unsafe { self.as_mut_vec() };
 898 |         let new_len = buf.len() + printed_size;
 899 | 
 900 |         // Safety: there is no way to exit this function without initializing all bytes up to
 901 |         // `new_len`
 902 |         unsafe {
 903 |             buf.set_len(new_len);
 904 |         }
 905 |         // Safety: we have reserved space through to `new_len` by calling `reserve` above.
 906 |         let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
 907 | 
 908 |         loop {
 909 |             let digit = v % 16;
 910 |             let c = u8_to_hex(digit as u8);
 911 |             // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
 912 |             // to a location valid for writing.
 913 |             unsafe {
 914 |                 p = p.offset(-1);
 915 |                 p.write(c);
 916 |             }
 917 |             v = v / 16;
 918 |             if v == 0 {
 919 |                 break;
 920 |             }
 921 |         }
 922 | 
 923 |         Ok(())
 924 |     }
 925 |     /// write a u32 to the output as a base-16 integer.
 926 |     ///
 927 |     /// this is provided for optimization opportunities when the formatted integer can be written
 928 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 929 |     /// followup step)
 930 |     #[inline(always)]
 931 |     fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> {
 932 |         if v == 0 {
 933 |             return self.write_fixed_size("0");
 934 |         }
 935 |         // we can fairly easily predict the size of a formatted string here with lzcnt, which also
 936 |         // means we can write directly into the correct offsets of the output string.
 937 |         let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize;
 938 | 
 939 |         self.reserve(printed_size);
 940 | 
 941 |         // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 942 |         // be valid utf8
 943 |         let buf = unsafe { self.as_mut_vec() };
 944 |         let new_len = buf.len() + printed_size;
 945 | 
 946 |         // Safety: there is no way to exit this function without initializing all bytes up to
 947 |         // `new_len`
 948 |         unsafe {
 949 |             buf.set_len(new_len);
 950 |         }
 951 |         // Safety: we have reserved space through to `new_len` by calling `reserve` above.
 952 |         let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
 953 | 
 954 |         loop {
 955 |             let digit = v % 16;
 956 |             let c = u8_to_hex(digit as u8);
 957 |             // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
 958 |             // to a location valid for writing.
 959 |             unsafe {
 960 |                 p = p.offset(-1);
 961 |                 p.write(c);
 962 |             }
 963 |             v = v / 16;
 964 |             if v == 0 {
 965 |                 break;
 966 |             }
 967 |         }
 968 | 
 969 |         Ok(())
 970 |     }
 971 |     /// write a u64 to the output as a base-16 integer.
 972 |     ///
 973 |     /// this is provided for optimization opportunities when the formatted integer can be written
 974 |     /// directly to the sink (rather than formatted to an intermediate buffer and output as a
 975 |     /// followup step)
 976 |     #[inline(always)]
 977 |     fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> {
 978 |         if v == 0 {
 979 |             return self.write_fixed_size("0");
 980 |         }
 981 |         // we can fairly easily predict the size of a formatted string here with lzcnt, which also
 982 |         // means we can write directly into the correct offsets of the output string.
 983 |         let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize;
 984 | 
 985 |         self.reserve(printed_size);
 986 | 
 987 |         // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
 988 |         // be valid utf8
 989 |         let buf = unsafe { self.as_mut_vec() };
 990 |         let new_len = buf.len() + printed_size;
 991 | 
 992 |         // Safety: there is no way to exit this function without initializing all bytes up to
 993 |         // `new_len`
 994 |         unsafe {
 995 |             buf.set_len(new_len);
 996 |         }
 997 |         // Safety: we have reserved space through to `new_len` by calling `reserve` above.
 998 |         let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
 999 | 
1000 |         loop {
1001 |             let digit = v % 16;
1002 |             let c = u8_to_hex(digit as u8);
1003 |             // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
1004 |             // to a location valid for writing.
1005 |             unsafe {
1006 |                 p = p.offset(-1);
1007 |                 p.write(c);
1008 |             }
1009 |             v = v / 16;
1010 |             if v == 0 {
1011 |                 break;
1012 |             }
1013 |         }
1014 | 
1015 |         Ok(())
1016 |     }
1017 | }
1018 | 


--------------------------------------------------------------------------------