├── .github ├── FUNDING.yml └── workflows │ └── rust.yml ├── .gitignore ├── .vscode └── settings.json ├── Cargo.toml ├── README.md ├── fuzz ├── .gitignore ├── Cargo.toml └── fuzz_targets │ └── from_utf8_lossy.rs ├── rustfmt.toml ├── src ├── _zstring.rs ├── array_zstring.rs ├── char_decoder.rs ├── lib.rs └── zstr.rs └── tests └── all_tests.rs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [Lokathor] 4 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: {} 5 | pull_request: {} 6 | 7 | env: 8 | RUST_BACKTRACE: 1 9 | 10 | jobs: 11 | test: 12 | name: Test Rust ${{ matrix.rust }} on ${{ matrix.os }} 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | include: 17 | - { rust: stable, os: ubuntu-latest } 18 | - { rust: beta, os: ubuntu-latest } 19 | - { rust: nightly, os: ubuntu-latest } 20 | steps: 21 | - uses: hecrj/setup-rust-action@v1 22 | with: 23 | rust-version: ${{ matrix.rust }} 24 | - uses: actions/checkout@v3 25 | - run: | 26 | cargo test --verbose 27 | cargo test --verbose --all-features 28 | 29 | miri-test: 30 | name: Test with miri 31 | runs-on: ubuntu-latest 32 | env: 33 | MIRIFLAGS: -Zmiri-tag-raw-pointers 34 | steps: 35 | - uses: hecrj/setup-rust-action@v1 36 | with: 37 | rust-version: nightly 38 | components: miri 39 | - uses: actions/checkout@v3 40 | - run: | 41 | cargo miri test --verbose 42 | cargo miri test --verbose --all-features 43 | 44 | sanitizer-test: 45 | name: Test with -Zsanitizer=${{ matrix.sanitizer }} 46 | runs-on: ubuntu-latest 47 | strategy: 48 | fail-fast: false 49 | matrix: 50 | sanitizer: [address, memory, leak] 51 | steps: 52 | - uses: actions/checkout@v3 53 | - uses: hecrj/setup-rust-action@v1 54 | with: 55 | rust-version: nightly 56 | components: rust-src 57 | - name: Test with sanitizer 58 | env: 59 | RUSTFLAGS: -Zsanitizer=${{ matrix.sanitizer }} 60 | RUSTDOCFLAGS: -Zsanitizer=${{ matrix.sanitizer }} 61 | ASAN_OPTIONS: detect_stack_use_after_return=1 62 | RUST_BACKTRACE: 0 63 | run: | 64 | cargo test -Zbuild-std --verbose --target=x86_64-unknown-linux-gnu --no-default-features 65 | cargo test -Zbuild-std --verbose --target=x86_64-unknown-linux-gnu --all-features 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "rust-analyzer.cargo.features": "all" 3 | } -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zstring" 3 | description = "Zero-termianted string lib, for use with C FFI." 4 | repository = "https://github.com/Lokathor/zstring" 5 | version = "0.2.4" 6 | edition = "2021" 7 | license = "Zlib OR Apache-2.0 OR MIT" 8 | 9 | [dependencies] 10 | ptr_iter = "0.1.0" 11 | 12 | [dev-dependencies] 13 | bstr = "1" 14 | 15 | [features] 16 | default = [] 17 | alloc = [] 18 | 19 | [package.metadata.docs.rs] 20 | all-features = true 21 | rustdoc-args = ["--cfg","docs_rs"] 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [Docs.rs](https://docs.rs/zstring) 2 | 3 | # zstring 4 | 5 | Zero-terminated string types. 6 | 7 | For working with FFI. 8 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | corpus 3 | artifacts 4 | coverage 5 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "zstring-fuzz" 3 | version = "0.0.0" 4 | publish = false 5 | edition = "2021" 6 | 7 | [package.metadata] 8 | cargo-fuzz = true 9 | 10 | [dependencies] 11 | libfuzzer-sys = "0.4" 12 | 13 | [dependencies.zstring] 14 | path = ".." 15 | 16 | # Prevent this from interfering with workspaces 17 | [workspace] 18 | members = ["."] 19 | 20 | [profile.release] 21 | debug = 1 22 | 23 | [[bin]] 24 | name = "from_utf8_lossy" 25 | path = "fuzz_targets/from_utf8_lossy.rs" 26 | test = false 27 | doc = false 28 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/from_utf8_lossy.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::fuzz_target; 4 | 5 | fuzz_target!(|data: &[u8]| { 6 | let std_result = std::string::String::from_utf8_lossy(data); 7 | let zstring_iter = zstring::CharDecoder::from(data.iter().copied()); 8 | assert!(zstring_iter.eq(std_result.chars())) 9 | }); 10 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | 2 | # Stable 3 | edition = "2018" 4 | fn_args_layout = "Compressed" 5 | max_width = 80 6 | tab_spaces = 2 7 | use_field_init_shorthand = true 8 | use_try_shorthand = true 9 | use_small_heuristics = "Max" 10 | 11 | # Unstable 12 | format_code_in_doc_comments = true 13 | imports_granularity = "Crate" 14 | wrap_comments = true 15 | -------------------------------------------------------------------------------- /src/_zstring.rs: -------------------------------------------------------------------------------- 1 | use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull}; 2 | 3 | use alloc::{boxed::Box, string::String, vec::Vec}; 4 | 5 | use crate::{ZStr, ZStringError}; 6 | 7 | /// Owning and non-null pointer to zero-terminated textual data. 8 | /// 9 | /// Because this is a thin pointer it's suitable for direct FFI usage. 10 | /// 11 | /// The bytes pointed to *should* be utf-8 encoded, but the [`CharDecoder`] used 12 | /// to convert the bytes to `char` values is safe to use even when the bytes are 13 | /// not proper utf-8. 14 | /// 15 | /// ## Safety 16 | /// * This is `repr(transparent)` over a [`NonNull`]. 17 | /// * The wrapped pointer points at a sequence of valid-to-read non-zero byte 18 | /// values followed by at least one zero byte. 19 | /// * The `ZString` owns the data, and will free it on drop. 20 | #[repr(transparent)] 21 | #[cfg_attr(docs_rs, doc(cfg(feature = "alloc")))] 22 | pub struct ZString { 23 | pub(crate) nn: NonNull, 24 | } 25 | impl Drop for ZString { 26 | #[inline] 27 | fn drop(&mut self) { 28 | let len = 1 + self.bytes().count(); 29 | let slice_ptr: *mut [u8] = 30 | core::ptr::slice_from_raw_parts_mut(self.nn.as_ptr(), len); 31 | drop(unsafe { Box::from_raw(slice_ptr) }) 32 | } 33 | } 34 | impl Clone for ZString { 35 | /// Clones the value 36 | /// 37 | /// ``` 38 | /// # use zstring::*; 39 | /// // 40 | /// let zstring1 = ZString::try_from("abc").unwrap(); 41 | /// let zstring2 = zstring1.clone(); 42 | /// assert!(zstring1.chars().eq(zstring2.chars())); 43 | /// ``` 44 | #[inline] 45 | #[must_use] 46 | fn clone(&self) -> Self { 47 | let len = 1 + self.bytes().count(); 48 | let slice_ptr: &[u8] = 49 | unsafe { core::slice::from_raw_parts(self.nn.as_ptr(), len) }; 50 | let vec = Vec::from(slice_ptr); 51 | // Safety: we know this will be utf-8 data because you can only safely 52 | // create a `ZString` from utf-8 sources (`&str` and `String`). 53 | let string = unsafe { String::from_utf8_unchecked(vec) }; 54 | let boxed_str = string.into_boxed_str(); 55 | // Safety: This data is cloned from an existing `ZString`. 56 | unsafe { Self::new_unchecked(boxed_str) } 57 | } 58 | } 59 | impl ZString { 60 | /// Converts a [`Box`] into a [`ZString`] without any additional 61 | /// checking. 62 | /// 63 | /// ## Safety 64 | /// * The data **must** have *exactly* one null byte at the end. 65 | /// * The data **must not** contain interior null bytes. 66 | /// 67 | /// Breaking either of the above rules will cause the wrong amount to be freed 68 | /// when the `ZString` drops. 69 | #[inline] 70 | #[must_use] 71 | pub unsafe fn new_unchecked(b: Box) -> Self { 72 | let p: *mut u8 = Box::leak(b).as_mut_ptr(); 73 | let nn: NonNull = unsafe { NonNull::new_unchecked(p) }; 74 | Self { nn } 75 | } 76 | 77 | /// Borrows this `ZString` as a `ZStr`. 78 | #[inline] 79 | #[must_use] 80 | pub const fn as_zstr(&self) -> ZStr<'_> { 81 | ZStr { nn: self.nn, life: PhantomData } 82 | } 83 | 84 | /// Gets the raw pointer to this data. 85 | #[inline] 86 | #[must_use] 87 | pub const fn as_ptr(&self) -> *const u8 { 88 | self.nn.as_ptr() 89 | } 90 | 91 | /// An iterator over the bytes of this `ZStr`. 92 | /// 93 | /// * This iterator *excludes* the terminating 0 byte. 94 | #[inline] 95 | pub fn bytes(&self) -> impl Iterator + '_ { 96 | self.as_zstr().bytes() 97 | } 98 | 99 | /// An iterator over the decoded `char` values of this `ZStr`. 100 | #[inline] 101 | pub fn chars(&self) -> impl Iterator + '_ { 102 | self.as_zstr().chars() 103 | } 104 | } 105 | impl From> for ZString { 106 | /// This is like a "to owned' style operation. 107 | /// 108 | /// ```rust 109 | /// # use zstring::*; 110 | /// const FOO: ZStr<'static> = ZStr::from_lit("foo\0"); 111 | /// let zstring = ZString::from(FOO); 112 | /// assert_eq!(zstring, FOO); 113 | /// ``` 114 | #[inline] 115 | #[must_use] 116 | fn from(value: ZStr<'_>) -> Self { 117 | let other: ManuallyDrop = 118 | ManuallyDrop::new(ZString { nn: value.nn }); 119 | let other_ref: &ZString = &other; 120 | other_ref.clone() 121 | } 122 | } 123 | impl FromIterator for ZString { 124 | #[inline] 125 | fn from_iter>(iter: T) -> Self { 126 | let iter = iter.into_iter(); 127 | let no_nulls = iter.map(|ch| { 128 | assert_ne!(ch, '\0'); 129 | ch 130 | }); 131 | let null_on_the_end = no_nulls.chain(['\0']); 132 | let s = String::from_iter(null_on_the_end); 133 | // Safety: We've ensures that there's no nulls within the source iteration, 134 | // and that we've added a single null to the end of the iteration. 135 | unsafe { ZString::new_unchecked(s.into_boxed_str()) } 136 | } 137 | } 138 | impl TryFrom<&str> for ZString { 139 | type Error = ZStringError; 140 | /// Trims any trailing nulls and then makes a [`ZString`] from what's left. 141 | /// 142 | /// ``` 143 | /// # use zstring::*; 144 | /// let zstring1 = ZString::try_from("abc").unwrap(); 145 | /// assert!("abc".chars().eq(zstring1.chars())); 146 | /// 147 | /// let zstring2 = ZString::try_from("foo\0\0\0\0").unwrap(); 148 | /// assert!("foo".chars().eq(zstring2.chars())); 149 | /// ``` 150 | /// 151 | /// ## Failure 152 | /// * If there are any interior nulls. 153 | /// 154 | /// ``` 155 | /// # use zstring::*; 156 | /// assert!(ZString::try_from("ab\0c").is_err()); 157 | /// ``` 158 | #[inline] 159 | fn try_from(value: &str) -> Result { 160 | let trimmed = value.trim_end_matches('\0'); 161 | if trimmed.contains('\0') { 162 | Err(ZStringError::InteriorNulls) 163 | } else { 164 | Ok(trimmed.chars().collect()) 165 | } 166 | } 167 | } 168 | impl core::fmt::Display for ZString { 169 | /// Display formats the string (without outer `"`). 170 | /// 171 | /// ```rust 172 | /// # use zstring::*; 173 | /// let zstring = ZString::try_from("foo").unwrap(); 174 | /// let s = format!("{zstring}"); 175 | /// assert_eq!("foo", s); 176 | /// ``` 177 | #[inline] 178 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 179 | core::fmt::Display::fmt(&self.as_zstr(), f) 180 | } 181 | } 182 | impl core::fmt::Debug for ZString { 183 | /// Debug formats with outer `"` around the string. 184 | /// 185 | /// ```rust 186 | /// # use zstring::*; 187 | /// let zstring = ZString::try_from("foo").unwrap(); 188 | /// let s = format!("{zstring:?}"); 189 | /// assert_eq!("\"foo\"", s); 190 | /// ``` 191 | #[inline] 192 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 193 | core::fmt::Debug::fmt(&self.as_zstr(), f) 194 | } 195 | } 196 | impl core::fmt::Pointer for ZString { 197 | /// Formats the wrapped pointer value. 198 | #[inline] 199 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 200 | core::fmt::Pointer::fmt(&self.as_zstr(), f) 201 | } 202 | } 203 | 204 | impl PartialEq for ZString { 205 | /// Two `ZString` are considered equal if they point at the exact same *byte 206 | /// sequence*. 207 | /// 208 | /// This is much faster to compute when the bytes are valid UTF-8, though it 209 | /// is stricter if the bytes are not valid UTF-8 (the character replacement 210 | /// process during decoding *could* make two different byte sequences have the 211 | /// same character sequence). 212 | #[inline] 213 | #[must_use] 214 | fn eq(&self, other: &ZString) -> bool { 215 | self.as_zstr().eq(&other.as_zstr()) 216 | } 217 | } 218 | impl PartialOrd for ZString { 219 | /// Compares based on the *byte sequence* pointed to. 220 | #[inline] 221 | #[must_use] 222 | fn partial_cmp(&self, other: &ZString) -> Option { 223 | self.as_zstr().partial_cmp(&other.as_zstr()) 224 | } 225 | } 226 | 227 | impl PartialEq<&str> for ZString { 228 | /// A `ZStr` equals a `&str` if the bytes match. 229 | #[inline] 230 | #[must_use] 231 | fn eq(&self, other: &&str) -> bool { 232 | self.bytes().eq(other.as_bytes().iter().copied()) 233 | } 234 | } 235 | impl PartialOrd<&str> for ZString { 236 | /// Compares based on the *byte sequence* pointed to. 237 | #[inline] 238 | #[must_use] 239 | fn partial_cmp(&self, other: &&str) -> Option { 240 | Some(self.bytes().cmp(other.as_bytes().iter().copied())) 241 | } 242 | } 243 | 244 | impl PartialEq> for ZString { 245 | /// A `ZString` equals a `ZStr` by calling `ZString::as_zstr` 246 | #[inline] 247 | #[must_use] 248 | fn eq(&self, other: &ZStr<'_>) -> bool { 249 | self.as_zstr().eq(other) 250 | } 251 | } 252 | impl PartialOrd> for ZString { 253 | /// Compares based on the *byte sequence* pointed to. 254 | #[inline] 255 | #[must_use] 256 | fn partial_cmp(&self, other: &ZStr<'_>) -> Option { 257 | self.as_zstr().partial_cmp(other) 258 | } 259 | } 260 | 261 | impl core::hash::Hash for ZString { 262 | #[inline] 263 | fn hash(&self, state: &mut H) { 264 | for b in self.bytes() { 265 | state.write_u8(b) 266 | } 267 | } 268 | } 269 | 270 | /// Re-view a slice of [ZString] as a slice of [ZStr] 271 | /// 272 | /// ``` 273 | /// # use zstring::*; 274 | /// let zstrings = 275 | /// [ZString::try_from("hello").unwrap(), ZString::try_from("world").unwrap()]; 276 | /// let s: &[ZStr<'_>] = zstrings_as_zstrs(&zstrings); 277 | /// let mut iter = s.iter(); 278 | /// assert!("hello".chars().eq(iter.next().unwrap().chars())); 279 | /// assert!("world".chars().eq(iter.next().unwrap().chars())); 280 | /// ``` 281 | #[inline] 282 | #[must_use] 283 | pub fn zstrings_as_zstrs(zstrings: &[ZString]) -> &[ZStr<'_>] { 284 | // Safety: The two types have identical layout. 285 | // what differs is that one is borrowed and one 286 | // is owned. However, behind a slice reference that 287 | // doesn't have any significance. 288 | unsafe { 289 | core::slice::from_raw_parts(zstrings.as_ptr().cast(), zstrings.len()) 290 | } 291 | } 292 | -------------------------------------------------------------------------------- /src/array_zstring.rs: -------------------------------------------------------------------------------- 1 | use crate::{CharDecoder, ZStr, ZStringError}; 2 | 3 | /// An array holding textual data that's zero termianted. 4 | /// 5 | /// This is a newtype over a byte array, with a const generic length `N`. 6 | /// 7 | /// The bytes contained *should* be utf-8 encoded, but the [`CharDecoder`] used 8 | /// to convert the bytes to `char` values is safe to use even when the bytes are 9 | /// not proper utf-8. 10 | /// 11 | /// ## Safety 12 | /// * The [`as_zstr`](ArrayZString::as_zstr) method assumes that there's a 13 | /// null somewhere before the end of the array. Safe code cannot break this 14 | /// rule, but unsafe code must be sure to maintain this invaraint. The array 15 | /// has size `N`, but only `N-1` of the bytes are usable, because there has to 16 | /// be at least one `'\0'` before the end of the array. 17 | #[derive(Clone, Copy)] 18 | #[repr(transparent)] 19 | pub struct ArrayZString([u8; N]); 20 | impl ArrayZString { 21 | /// Gives a zeroed array. 22 | /// 23 | /// This is the same as [`default`](ArrayZString::default), but `const fn`. 24 | #[inline] 25 | #[must_use] 26 | pub const fn const_default() -> Self { 27 | Self([0_u8; N]) 28 | } 29 | 30 | /// Gets a [`ZStr`] to this data. 31 | /// 32 | /// ## Panics 33 | /// * If the length `N` is zero, this will panic. 34 | #[inline] 35 | #[must_use] 36 | pub const fn as_zstr(&self) -> ZStr<'_> { 37 | assert!(N > 0); 38 | unsafe { core::mem::transmute::<*const u8, ZStr<'_>>(self.0.as_ptr()) } 39 | } 40 | 41 | /// View the data as a rust `&str`. 42 | /// 43 | /// ## Panics 44 | /// * If somehow the bytes in the array aren't utf-8 this will panic. Safe 45 | /// code cannot cause this to happen. 46 | #[inline] 47 | #[must_use] 48 | #[track_caller] 49 | pub fn as_str(&self) -> &str { 50 | let null_position = self.0.iter().position(|&b| b == 0).unwrap(); 51 | core::str::from_utf8(&self.0[..null_position]).unwrap() 52 | } 53 | 54 | /// An iterator over the bytes of this `ZStr`. 55 | /// 56 | /// * This iterator *excludes* the terminating 0 byte. 57 | #[inline] 58 | pub fn bytes(&self) -> impl Iterator + '_ { 59 | self.0.iter().copied().take_while(|&b| b != 0) 60 | } 61 | 62 | /// An iterator over the decoded `char` values of this `ZStr`. 63 | #[inline] 64 | pub fn chars(&self) -> impl Iterator + '_ { 65 | CharDecoder::from(self.bytes()) 66 | } 67 | 68 | /// Gets the raw pointer to this data. 69 | #[inline] 70 | #[must_use] 71 | pub const fn as_ptr(self) -> *const u8 { 72 | self.0.as_ptr() 73 | } 74 | } 75 | impl Default for ArrayZString { 76 | #[inline] 77 | #[must_use] 78 | fn default() -> Self { 79 | Self::const_default() 80 | } 81 | } 82 | impl TryFrom<&str> for ArrayZString { 83 | type Error = Option; 84 | /// Attempts to make an `ArrayZString` from a `&str` 85 | /// 86 | /// ``` 87 | /// # use zstring::*; 88 | /// let arr_str: ArrayZString<16> = ArrayZString::try_from("hello").unwrap(); 89 | /// assert_eq!(arr_str.as_str(), "hello"); 90 | /// ``` 91 | /// 92 | /// ## Failure 93 | /// The error type is unfortunately awkward here because 0.2 released with an 94 | /// exhaustive error type. So instead we get an `Option`, where 95 | /// `Some` is an actual [`ZStringError`] and `None` indicates that there was 96 | /// no zstring related issue, just a lack of capacity. 97 | /// 98 | /// * Any number of trailing nulls are allowed, and will be trimmed. 99 | /// * Interior nulls are not allowed (err: 100 | /// `Some(ZStringError::InteriorNulls)`). 101 | /// * The trimmed byte length must be less than or equal to `N-1` (err: 102 | /// `None`). 103 | /// 104 | /// ``` 105 | /// # use zstring::*; 106 | /// let interior_null_err: Option = 107 | /// ArrayZString::<16>::try_from("hel\0lo").unwrap_err(); 108 | /// assert_eq!(interior_null_err, Some(ZStringError::InteriorNulls)); 109 | /// 110 | /// // strings equal to or greater than the array size won't fit. 111 | /// let capacity_err: Option = 112 | /// ArrayZString::<5>::try_from("hello").unwrap_err(); 113 | /// assert_eq!(capacity_err, None); 114 | /// 115 | /// // if the array size exceeds the string size it will fit. 116 | /// assert!(ArrayZString::<6>::try_from("hello").is_ok()); 117 | /// ``` 118 | #[inline] 119 | fn try_from(value: &str) -> Result { 120 | let trimmed = value.trim_end_matches('\0'); 121 | if trimmed.as_bytes().iter().copied().any(|b| b == 0) { 122 | Err(Some(ZStringError::InteriorNulls)) 123 | } else if trimmed.len() <= (N - 1) { 124 | let mut out = Self::const_default(); 125 | out.0[..trimmed.len()].copy_from_slice(trimmed.as_bytes()); 126 | Ok(out) 127 | } else { 128 | Err(None) 129 | } 130 | } 131 | } 132 | impl core::fmt::Display for ArrayZString { 133 | /// Display formats the string (without outer `"`). 134 | /// 135 | /// ```rust 136 | /// # use zstring::*; 137 | /// let arr_str: ArrayZString<16> = ArrayZString::try_from("foo").unwrap(); 138 | /// let s = format!("{arr_str}"); 139 | /// assert_eq!("foo", s); 140 | /// ``` 141 | #[inline] 142 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 143 | core::fmt::Display::fmt(&self.as_zstr(), f) 144 | } 145 | } 146 | impl core::fmt::Debug for ArrayZString { 147 | /// Debug formats with outer `"` around the string. 148 | /// 149 | /// ```rust 150 | /// # use zstring::*; 151 | /// let arr_str: ArrayZString<16> = ArrayZString::try_from("foo").unwrap(); 152 | /// let s = format!("{arr_str:?}"); 153 | /// assert_eq!("\"foo\"", s); 154 | /// ``` 155 | #[inline] 156 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 157 | core::fmt::Debug::fmt(&self.as_zstr(), f) 158 | } 159 | } 160 | 161 | impl PartialEq> 162 | for ArrayZString 163 | { 164 | /// Two `ArrayZString` are equal when the bytes "in" their strings are the 165 | /// same, regardless of capacity differences. 166 | /// 167 | /// ``` 168 | /// # use zstring::*; 169 | /// assert_eq!( 170 | /// ArrayZString::<6>::try_from("hello").unwrap(), 171 | /// ArrayZString::<10>::try_from("hello").unwrap(), 172 | /// ); 173 | /// ``` 174 | #[inline] 175 | #[must_use] 176 | fn eq(&self, other: &ArrayZString) -> bool { 177 | self.bytes().eq(other.bytes()) 178 | } 179 | } 180 | impl PartialOrd> 181 | for ArrayZString 182 | { 183 | /// Two `ArrayZString` are compared by the bytes "in" their strings, 184 | /// regardless of capacity differences. 185 | /// 186 | /// ``` 187 | /// # use zstring::*; 188 | /// # use core::cmp::{PartialOrd, Ordering}; 189 | /// let abc = ArrayZString::<6>::try_from("abc").unwrap(); 190 | /// let def = ArrayZString::<10>::try_from("def").unwrap(); 191 | /// assert_eq!(abc.partial_cmp(&def), Some(Ordering::Less)); 192 | /// ``` 193 | #[inline] 194 | #[must_use] 195 | fn partial_cmp( 196 | &self, other: &ArrayZString, 197 | ) -> Option { 198 | Some(self.bytes().cmp(other.bytes())) 199 | } 200 | } 201 | 202 | impl Eq for ArrayZString {} 203 | impl Ord for ArrayZString { 204 | #[inline] 205 | #[must_use] 206 | fn cmp(&self, other: &Self) -> core::cmp::Ordering { 207 | self.partial_cmp(other).unwrap() 208 | } 209 | } 210 | 211 | impl PartialEq> for ArrayZString { 212 | /// An `ArrayZString` equals a `ZStr` by bytes. 213 | #[inline] 214 | #[must_use] 215 | fn eq(&self, other: &ZStr<'_>) -> bool { 216 | self.bytes().eq(other.bytes()) 217 | } 218 | } 219 | impl PartialOrd> for ArrayZString { 220 | /// An `ArrayZString` compares to a `ZStr` by bytes. 221 | #[inline] 222 | #[must_use] 223 | fn partial_cmp(&self, other: &ZStr<'_>) -> Option { 224 | Some(self.bytes().cmp(other.bytes())) 225 | } 226 | } 227 | 228 | #[cfg(feature = "alloc")] 229 | impl PartialEq for ArrayZString { 230 | /// An `ArrayZString` equals a `ZString` when they contain the same bytes. 231 | #[inline] 232 | #[must_use] 233 | fn eq(&self, other: &crate::ZString) -> bool { 234 | self.eq(&other.as_zstr()) 235 | } 236 | } 237 | #[cfg(feature = "alloc")] 238 | impl PartialOrd for ArrayZString { 239 | /// An `ArrayZString` compares to a `ZString` by bytes. 240 | #[inline] 241 | #[must_use] 242 | fn partial_cmp(&self, other: &crate::ZString) -> Option { 243 | self.partial_cmp(&other.as_zstr()) 244 | } 245 | } 246 | 247 | impl core::hash::Hash for ArrayZString { 248 | #[inline] 249 | fn hash(&self, state: &mut H) { 250 | for b in self.bytes() { 251 | state.write_u8(b) 252 | } 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /src/char_decoder.rs: -------------------------------------------------------------------------------- 1 | #![forbid(unsafe_code)] 2 | 3 | /// Decodes byte sequences as if they were utf-8. 4 | /// 5 | /// If the bytes are not utf-8 you'll automatically get the 6 | /// [`REPLACEMENT_CHARACTER`](char::REPLACEMENT_CHARACTER) within the output, as 7 | /// necessary. 8 | /// 9 | /// Construct this iterator using `from` on any other iterator over `u8`. 10 | /// 11 | /// ```rust 12 | /// # use zstring::CharDecoder; 13 | /// let decoder1 = CharDecoder::from([32, 33, 34].into_iter()); 14 | /// let decoder2 = CharDecoder::from("foobar".as_bytes().iter().copied()); 15 | /// ``` 16 | pub struct CharDecoder> { 17 | iter: core::iter::Peekable, 18 | } 19 | impl> From for CharDecoder { 20 | #[inline] 21 | #[must_use] 22 | fn from(i: I) -> Self { 23 | Self { iter: i.peekable() } 24 | } 25 | } 26 | impl> CharDecoder { 27 | /// Returns the next continuation bits (pre-masked), only if the next byte is 28 | /// a continuation byte. 29 | #[inline] 30 | #[must_use] 31 | fn next_continuation_bits(&mut self) -> Option { 32 | match self.iter.peek()? { 33 | x if x >> 6 == 0b10 => Some((self.iter.next()? as u32) & 0b111111), 34 | _ => None, 35 | } 36 | } 37 | } 38 | impl> Iterator for CharDecoder { 39 | type Item = char; 40 | 41 | #[inline] 42 | #[must_use] 43 | fn next(&mut self) -> Option { 44 | let x = u32::from(self.iter.next()?); 45 | if x < 128 { 46 | // fast path for ascii 47 | Some(x as u8 as char) 48 | } else { 49 | match UTF8_CHAR_WIDTH[x as usize] { 50 | 2 => { 51 | let Some(y) = self.next_continuation_bits() else { 52 | return Some(char::REPLACEMENT_CHARACTER); 53 | }; 54 | let u = ((x & 0b11111) << 6) | y; 55 | Some(char::from_u32(u).unwrap_or(char::REPLACEMENT_CHARACTER)) 56 | } 57 | 3 => { 58 | let Some(y) = self.next_continuation_bits() else { 59 | return Some(char::REPLACEMENT_CHARACTER); 60 | }; 61 | let Some(z) = self.next_continuation_bits() else { 62 | return Some(char::REPLACEMENT_CHARACTER); 63 | }; 64 | let u = ((x & 0b1111) << 12) | y << 6 | z; 65 | Some(char::from_u32(u).unwrap_or(char::REPLACEMENT_CHARACTER)) 66 | } 67 | 4 => { 68 | let Some(y) = self.next_continuation_bits() else { 69 | return Some(char::REPLACEMENT_CHARACTER); 70 | }; 71 | let Some(z) = self.next_continuation_bits() else { 72 | return Some(char::REPLACEMENT_CHARACTER); 73 | }; 74 | let Some(w) = self.next_continuation_bits() else { 75 | return Some(char::REPLACEMENT_CHARACTER); 76 | }; 77 | let u = ((x & 0b111) << 18) | y << 12 | z << 6 | w; 78 | Some(char::from_u32(u).unwrap_or(char::REPLACEMENT_CHARACTER)) 79 | } 80 | _ => Some(char::REPLACEMENT_CHARACTER), 81 | } 82 | } 83 | } 84 | } 85 | 86 | /// You can't copyright facts 87 | const UTF8_CHAR_WIDTH: &[u8; 256] = &[ 88 | // 1 2 3 4 5 6 7 8 9 A B C D E F 89 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 90 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 91 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 92 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 93 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 94 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 95 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 96 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 97 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8 98 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9 99 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A 100 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B 101 | 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C 102 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D 103 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E 104 | 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F 105 | ]; 106 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![warn(missing_docs)] 3 | #![cfg_attr(docs_rs, feature(doc_cfg))] 4 | #![warn(clippy::missing_inline_in_public_items)] 5 | 6 | //! A crate for "thin pointer" zero-termianted string data. 7 | //! 8 | //! Unlike the [`CStr`](core::ffi::CStr) and [`CString`](alloc::ffi::CString) 9 | //! types, these types are compatible with direct FFI usage. 10 | 11 | #[cfg(feature = "alloc")] 12 | extern crate alloc; 13 | 14 | use ptr_iter::*; 15 | 16 | mod char_decoder; 17 | pub use char_decoder::*; 18 | 19 | mod zstr; 20 | pub use zstr::*; 21 | 22 | mod array_zstring; 23 | pub use array_zstring::*; 24 | 25 | #[cfg(feature = "alloc")] 26 | mod _zstring; 27 | #[cfg(feature = "alloc")] 28 | pub use _zstring::*; 29 | -------------------------------------------------------------------------------- /src/zstr.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use core::{cmp::Ordering, fmt::Write, marker::PhantomData, ptr::NonNull}; 3 | 4 | /// Borrowed and non-null pointer to zero-terminated text data. 5 | /// 6 | /// Because this is a thin pointer it's suitable for direct FFI usage. 7 | /// 8 | /// The bytes pointed to *should* be utf-8 encoded, but the [`CharDecoder`] used 9 | /// to convert the bytes to `char` values is safe to use even when the bytes are 10 | /// not proper utf-8. 11 | /// 12 | /// ## Safety 13 | /// * This is `repr(transparent)` over a [`NonNull`]. 14 | /// * The wrapped pointer points at a sequence of valid-to-read non-zero byte 15 | /// values followed by at least one zero byte. 16 | /// * When you create a `ZStr<'a>` value the pointer must be valid for at least 17 | /// as long as the lifetime `'a`. 18 | #[derive(Clone, Copy)] 19 | #[repr(transparent)] 20 | pub struct ZStr<'a> { 21 | pub(crate) nn: NonNull, 22 | pub(crate) life: PhantomData<&'a [u8]>, 23 | } 24 | impl<'a> ZStr<'a> { 25 | /// Makes a `ZStr<'static>` from a `&'static str` 26 | /// 27 | /// This is *intended* for use with string litearls, but if you leak a runtime 28 | /// string into a static string I guess that works too. 29 | /// 30 | /// ```rust 31 | /// # use zstring::*; 32 | /// const FOO: ZStr<'static> = ZStr::from_lit("foo\0"); 33 | /// ``` 34 | /// 35 | /// ## Panics 36 | /// * If `try_from` would return an error, this will panic instead. Because 37 | /// this is intended for compile time constants, the panic will "just" 38 | /// trigger a build error. 39 | #[inline] 40 | #[track_caller] 41 | pub const fn from_lit(s: &'static str) -> ZStr<'static> { 42 | let bytes = s.as_bytes(); 43 | let mut tail_index = bytes.len() - 1; 44 | while bytes[tail_index] == 0 { 45 | tail_index -= 1; 46 | } 47 | assert!(tail_index < bytes.len() - 1, "No trailing nulls."); 48 | let mut i = 0; 49 | while i < tail_index { 50 | if bytes[i] == 0 { 51 | panic!("Input contains interior null."); 52 | } 53 | i += 1; 54 | } 55 | ZStr { 56 | // Safety: References can't ever be null. 57 | nn: unsafe { NonNull::new_unchecked(s.as_ptr() as *mut u8) }, 58 | life: PhantomData, 59 | } 60 | } 61 | 62 | /// An iterator over the bytes of this `ZStr`. 63 | /// 64 | /// * This iterator **excludes** the terminating 0 byte. 65 | #[inline] 66 | pub fn bytes(self) -> impl Iterator + 'a { 67 | // Safety: per the type safety docs, whoever made this `ZStr` promised that 68 | // we can read the pointer's bytes until we find a 0 byte. 69 | unsafe { ConstPtrIter::read_until_default(self.nn.as_ptr()) } 70 | } 71 | 72 | /// An iterator over the decoded `char` values of this `ZStr`. 73 | #[inline] 74 | pub fn chars(self) -> impl Iterator + 'a { 75 | CharDecoder::from(self.bytes()) 76 | } 77 | 78 | /// Gets the raw pointer to this data. 79 | #[inline] 80 | #[must_use] 81 | pub const fn as_ptr(self) -> *const u8 { 82 | self.nn.as_ptr() 83 | } 84 | } 85 | impl<'a> TryFrom<&'a str> for ZStr<'a> { 86 | type Error = ZStringError; 87 | /// Converts the value in place. 88 | /// 89 | /// The trailing nulls of the source `&str` will not "be in" the output 90 | /// sequence of the returned `ZStr`. 91 | /// 92 | /// ```rust 93 | /// # use zstring::*; 94 | /// let z1 = ZStr::try_from("abcd\0").unwrap(); 95 | /// assert!(z1.chars().eq("abcd".chars())); 96 | /// 97 | /// let z2 = ZStr::try_from("abcd\0\0\0").unwrap(); 98 | /// assert!(z2.chars().eq("abcd".chars())); 99 | /// ``` 100 | /// 101 | /// ## Failure 102 | /// * There must be at least one trailing null in the input `&str`. 103 | /// * There must be no nulls followed by a non-null ("interior nulls"). This 104 | /// second condition is not a strict requirement of the type, more of a 105 | /// correctness lint. If interior nulls were allowed then `"ab\0cd\0"` 106 | /// converted to a `ZStr` would only be read as `"ab"`, and the second half 107 | /// of the string would effectively be erased. 108 | #[inline] 109 | fn try_from(value: &'a str) -> Result { 110 | let trimmed = value.trim_end_matches('\0'); 111 | if value.len() == trimmed.len() { 112 | Err(ZStringError::NoTrailingNulls) 113 | } else if trimmed.contains('\0') { 114 | Err(ZStringError::InteriorNulls) 115 | } else { 116 | // Note: We have verified that the starting `str` value contains at 117 | // least one 0 byte. 118 | Ok(Self { 119 | nn: NonNull::new(value.as_ptr() as *mut u8).unwrap(), 120 | life: PhantomData, 121 | }) 122 | } 123 | } 124 | } 125 | impl core::fmt::Display for ZStr<'_> { 126 | /// Display formats the string (without outer `"`). 127 | /// 128 | /// ```rust 129 | /// # use zstring::*; 130 | /// const FOO: ZStr<'static> = ZStr::from_lit("foo\0"); 131 | /// let s = format!("{FOO}"); 132 | /// assert_eq!(s, "foo"); 133 | /// ``` 134 | #[inline] 135 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 136 | for ch in self.chars() { 137 | write!(f, "{ch}")?; 138 | } 139 | Ok(()) 140 | } 141 | } 142 | impl core::fmt::Debug for ZStr<'_> { 143 | /// Debug formats with outer `"` around the string. 144 | /// 145 | /// ```rust 146 | /// # use zstring::*; 147 | /// const FOO: ZStr<'static> = ZStr::from_lit("foo\0"); 148 | /// let s = format!("{FOO:?}"); 149 | /// assert_eq!(s, "\"foo\""); 150 | /// ``` 151 | #[inline] 152 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 153 | f.write_char('"')?; 154 | core::fmt::Display::fmt(self, f)?; 155 | f.write_char('"')?; 156 | Ok(()) 157 | } 158 | } 159 | impl core::fmt::Pointer for ZStr<'_> { 160 | /// Formats the wrapped pointer value. 161 | #[inline] 162 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 163 | core::fmt::Pointer::fmt(&self.nn, f) 164 | } 165 | } 166 | 167 | impl PartialEq> for ZStr<'_> { 168 | /// Two `ZStr` are considered equal if they point at the exact same *byte 169 | /// sequence*. 170 | /// 171 | /// This is much faster to compute when the bytes are valid UTF-8, though it 172 | /// is stricter if the bytes are not valid UTF-8 (the character replacement 173 | /// process during decoding *could* make two different byte sequences have the 174 | /// same character sequence). 175 | /// 176 | /// ```rust 177 | /// # use zstring::*; 178 | /// const FOO1: ZStr<'static> = ZStr::from_lit("foo\0"); 179 | /// const FOO2: ZStr<'static> = ZStr::from_lit("foo\0"); 180 | /// assert_eq!(FOO1, FOO2); 181 | /// ``` 182 | #[inline] 183 | #[must_use] 184 | fn eq(&self, other: &ZStr<'_>) -> bool { 185 | if self.nn == other.nn { 186 | true 187 | } else { 188 | self.bytes().eq(other.bytes()) 189 | } 190 | } 191 | } 192 | impl PartialOrd> for ZStr<'_> { 193 | /// Compares based on the *byte sequence* pointed to. 194 | /// 195 | /// ```rust 196 | /// # use zstring::*; 197 | /// # use core::cmp::{PartialOrd, Ordering}; 198 | /// const ABC: ZStr<'static> = ZStr::from_lit("abc\0"); 199 | /// const DEF: ZStr<'static> = ZStr::from_lit("def\0"); 200 | /// const GHI: ZStr<'static> = ZStr::from_lit("ghi\0"); 201 | /// assert_eq!(ABC.partial_cmp(&DEF), Some(Ordering::Less)); 202 | /// assert_eq!(DEF.partial_cmp(&GHI), Some(Ordering::Less)); 203 | /// assert_eq!(GHI.partial_cmp(&ABC), Some(Ordering::Greater)); 204 | /// ``` 205 | #[inline] 206 | #[must_use] 207 | fn partial_cmp(&self, other: &ZStr<'_>) -> Option { 208 | if self.nn == other.nn { 209 | Some(Ordering::Equal) 210 | } else { 211 | Some(self.bytes().cmp(other.bytes())) 212 | } 213 | } 214 | } 215 | 216 | impl PartialEq<&str> for ZStr<'_> { 217 | /// A `ZStr` equals a `&str` if the bytes match. 218 | #[inline] 219 | #[must_use] 220 | fn eq(&self, other: &&str) -> bool { 221 | self.bytes().eq(other.as_bytes().iter().copied()) 222 | } 223 | } 224 | impl PartialOrd<&str> for ZStr<'_> { 225 | /// Compares based on the *byte sequence* pointed to. 226 | #[inline] 227 | #[must_use] 228 | fn partial_cmp(&self, other: &&str) -> Option { 229 | Some(self.bytes().cmp(other.as_bytes().iter().copied())) 230 | } 231 | } 232 | 233 | #[cfg(feature = "alloc")] 234 | impl PartialEq for ZStr<'_> { 235 | /// A `ZStr` equals a `ZString` by calling `ZString::as_zstr` 236 | #[inline] 237 | #[must_use] 238 | fn eq(&self, other: &ZString) -> bool { 239 | self.eq(&other.as_zstr()) 240 | } 241 | } 242 | #[cfg(feature = "alloc")] 243 | impl PartialOrd for ZStr<'_> { 244 | /// Compares based on the *byte sequence* pointed to. 245 | #[inline] 246 | #[must_use] 247 | fn partial_cmp(&self, other: &ZString) -> Option { 248 | self.partial_cmp(&other.as_zstr()) 249 | } 250 | } 251 | 252 | impl core::hash::Hash for ZStr<'_> { 253 | #[inline] 254 | fn hash(&self, state: &mut H) { 255 | for b in self.bytes() { 256 | state.write_u8(b) 257 | } 258 | } 259 | } 260 | 261 | /// An error occurred while trying to make a [`ZStr`] or [`ZString`]. 262 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 263 | pub enum ZStringError { 264 | /// The provided data didn't have any trailing nulls (`'\0'`). 265 | NoTrailingNulls, 266 | /// The provided data had interior nulls (non-null data *after* a null). 267 | InteriorNulls, 268 | } 269 | -------------------------------------------------------------------------------- /tests/all_tests.rs: -------------------------------------------------------------------------------- 1 | use zstring::CharDecoder; 2 | 3 | #[test] 4 | fn bstr_example() { 5 | let bytes = *b"a\xF0\x9F\x87z"; 6 | let chars: Vec = CharDecoder::from(bytes.iter().copied()).collect(); 7 | assert_eq!(vec!['a', '\u{FFFD}', 'z'], chars); 8 | } 9 | 10 | #[test] 11 | fn fuzz_found_data() { 12 | use bstr::ByteSlice; 13 | 14 | let bytes = [0b11110101, 0b10101111]; 15 | 16 | let s_lossy = String::from_utf8_lossy(&bytes); 17 | let s_bstr = bytes.chars().collect::(); 18 | assert_eq!(s_lossy, s_bstr); // passes, they agree 19 | 20 | let s_decoded = CharDecoder::from(bytes.iter().copied()).collect::(); 21 | assert_eq!(s_lossy, s_decoded); 22 | 23 | // Note: Other byte sequences will still fail! 24 | } 25 | --------------------------------------------------------------------------------