├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── LICENSE.md ├── README.md ├── allman ├── Cargo.toml ├── README.md └── src │ ├── layout.rs │ ├── lib.rs │ └── render.rs ├── buf-trait ├── Cargo.toml ├── README.md └── src │ └── lib.rs ├── byteyarn ├── Cargo.toml ├── README.md └── src │ ├── boxed.rs │ ├── convert.rs │ ├── lib.rs │ ├── raw.rs │ ├── reffed.rs │ └── utf8.rs ├── gilded ├── Cargo.toml ├── README.md ├── attr │ ├── Cargo.toml │ └── lib.rs └── src │ ├── doc │ ├── json.rs │ ├── mod.rs │ └── yaml.rs │ └── lib.rs ├── ilex ├── Cargo.toml ├── README.md ├── attr │ ├── Cargo.toml │ └── lib.rs ├── src │ ├── file │ │ ├── context.rs │ │ └── mod.rs │ ├── fp.rs │ ├── ice.rs │ ├── lib.rs │ ├── report │ │ ├── builtin.rs │ │ ├── diagnostic.rs │ │ ├── mod.rs │ │ └── render.rs │ ├── rt │ │ ├── dfa.rs │ │ ├── emit2.rs │ │ ├── lexer.rs │ │ ├── mod.rs │ │ └── unicode.rs │ ├── rule.rs │ ├── spec.rs │ └── token │ │ ├── mod.rs │ │ ├── stream.rs │ │ └── summary.rs └── tests │ ├── greedy │ ├── greedy.tokens.yaml │ ├── greedy.txt │ ├── main.rs │ ├── newlines.tokens.yaml │ └── newlines.txt │ ├── json │ ├── array.ast.txt │ ├── array.json │ ├── array.tokens.yaml │ ├── main.rs │ ├── null.ast.txt │ ├── null.json │ ├── null.tokens.yaml │ ├── obj.ast.txt │ ├── obj.json │ └── obj.tokens.yaml │ ├── llvm │ ├── main.rs │ ├── smoke.ll │ └── smoke.tokens.yaml │ ├── numbers │ ├── main.rs │ ├── numbers.fp64.txt │ ├── numbers.tokens.yaml │ └── numbers.txt │ └── ui │ ├── ambiguous │ ├── idents.stderr │ ├── idents.txt │ ├── no_xid_after_br.stderr │ ├── no_xid_after_br.txt │ ├── no_xid_after_cm.stderr │ ├── no_xid_after_cm.txt │ ├── no_xid_after_id.stderr │ ├── no_xid_after_id.txt │ ├── no_xid_after_kw.stderr │ ├── no_xid_after_kw.txt │ ├── no_xid_after_nm.stderr │ ├── no_xid_after_nm.txt │ ├── no_xid_after_st.stderr │ ├── no_xid_after_st.txt │ ├── nums.stderr │ ├── nums.txt │ ├── symbols_after_comment.tokens.yaml │ ├── symbols_after_comment.txt │ ├── symbols_after_quoted.tokens.yaml │ └── symbols_after_quoted.txt │ ├── digital │ ├── invalid.stderr │ ├── invalid.txt │ ├── missing.stderr │ ├── missing.txt │ ├── points.stderr │ ├── points.txt │ ├── separators.stderr │ └── separators.txt │ ├── eof │ ├── bracket.stderr │ ├── bracket.txt │ ├── bracket_multiline.stderr │ ├── bracket_multiline.txt │ ├── comment.stderr │ ├── comment.txt │ ├── comment_multiline.stderr │ ├── comment_multiline.txt │ ├── mixed_brackets.stderr │ ├── mixed_brackets.txt │ ├── mixed_brackets_multiline.stderr │ ├── mixed_brackets_multiline.txt │ ├── quoted.stderr │ ├── quoted.txt │ ├── quoted_multiline.stderr │ └── quoted_multiline.txt │ ├── main.rs │ ├── too_small │ ├── cxx_tag.stderr │ ├── cxx_tag.txt │ ├── ident.stderr │ ├── ident.txt │ ├── rust_hashes.stderr │ └── rust_hashes.txt │ └── unrecognized │ ├── unrecognized.stderr │ └── unrecognized.txt ├── proc2decl ├── Cargo.toml ├── README.md └── src │ └── lib.rs ├── rust-toolchain.toml ├── rustfmt.toml └── twie ├── Cargo.toml ├── README.md └── src ├── impls.rs ├── lib.rs ├── poison_trie.txt └── raw ├── dump.rs ├── entries.rs ├── iter.rs ├── mod.rs └── nodes.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | on: 3 | push: 4 | branches: [ main ] 5 | pull_request: 6 | branches: [ main ] 7 | 8 | env: 9 | CARGO_TERM_COLOR: always 10 | NIGHTLY: 'nightly-2025-01-01' 11 | 12 | jobs: 13 | check_lints: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: Check format 19 | run: cargo fmt -- --check --files-with-diff 20 | 21 | - name: Check clippy lints 22 | run: cargo clippy --all-targets --verbose 23 | 24 | build_and_test: 25 | runs-on: ubuntu-latest 26 | steps: 27 | - uses: actions/checkout@v2 28 | 29 | - name: Build with default settings 30 | run: | 31 | cargo build -v 32 | cargo build --release -v 33 | 34 | - name: Build docs 35 | run: cargo doc --verbose 36 | 37 | - name: Run tests 38 | run: cargo test --verbose 39 | 40 | miri: 41 | runs-on: ubuntu-latest 42 | steps: 43 | - uses: actions/checkout@v2 44 | 45 | - name: Install Miri 46 | run: rustup +$NIGHTLY component add miri 47 | 48 | - name: Run tests under Miri 49 | run: cargo +$NIGHTLY miri test --workspace --exclude ilex 50 | 51 | # Most of ilex's tests are extremely slow under Miri. 52 | # The LLVM syntax test alone takes 10 minutes or so on a GH runner. 53 | - name: Run some `ilex` tests under Miri 54 | run: | 55 | cargo +$NIGHTLY miri test -p ilex --lib 56 | cargo +$NIGHTLY miri test -p ilex --test greedy -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "allman", 4 | "byteyarn", 5 | "buf-trait", 6 | "gilded", "gilded/attr", 7 | "ilex", "ilex/attr", 8 | "proc2decl", 9 | "twie", 10 | ] 11 | resolver = "2" 12 | 13 | [workspace.package] 14 | edition = "2021" 15 | 16 | authors = ["Miguel Young de la Sota "] 17 | homepage = "https://github.com/mcy/strings" 18 | repository = "https://github.com/mcy/strings" 19 | keywords = ["string", "text", "binary"] 20 | 21 | license = "Apache-2.0" 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rust String Libraries by mcyoung 2 | 3 | For some reason or another, I keep accumulating libraries for operating on 4 | strings of data. Rather than continue to generate disparate repos, this one repo 5 | will hold all of my strings libraries, which will make it easier for them to 6 | depend on each other. 7 | 8 | ## Table of Contents 9 | 10 | - 📜 [`buf-trait`](https://github.com/mcy/strings/tree/main/buf-trait) - A trait 11 | for abstracting over buffers of POD data. 12 | 13 | - 🧶 [`byteyarn`](https://github.com/mcy/strings/tree/main/byteyarn) - 14 | Space-efficient byte strings. 15 | 16 | - 🌲 [`twie`](https://github.com/mcy/strings/tree/main/twie) - Fast and compact 17 | prefix tries. 18 | 19 | - ⛩️ [`ilex`](https://github.com/mcy/strings/tree/main/ilex) - The last lexer I 20 | ever want to write. 21 | 22 | - 🗒️ [`allman`](https://github.com/mcy/strings/tree/main/allman) - A DOM for 23 | code formatters. 24 | 25 | - 👑 [`gilded`](https://github.com/mcy/strings/tree/main/gilded) - How I learned 26 | to stop worrying and love golden testing. 27 | 28 | - 💢 [`proc2decl`](https://github.com/mcy/strings/tree/main/proc2decl) - Proc 29 | macros suck! 30 | 31 | --- 32 | 33 | All libraries are Apache-2.0 licensed. 34 | -------------------------------------------------------------------------------- /allman/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "allman" 3 | version = "0.1.0" 4 | description = "source code formatting and line reflowing toolkit" 5 | 6 | edition.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | keywords.workspace = true 11 | license.workspace = true 12 | 13 | [dependencies] 14 | byteyarn = { path = "../byteyarn" } 15 | 16 | unicode-width = "0.2.0" -------------------------------------------------------------------------------- /allman/README.md: -------------------------------------------------------------------------------- 1 | # allman 2 | 3 | `allman` - A code formatting and line reflowing toolkit. 🗒️🖋️ 4 | 5 | `allman::Doc` is a DOM-like structure that specifies how indentation, 6 | like breaking, and reflowing should be handled. It is a tree of `Tag`s 7 | that dictate layout information for the source code to format. 8 | 9 | For example, the Allman brace style (for which this crate is named) can 10 | be implemented as follows: 11 | 12 | ```rust 13 | // flat: fn foo() { ... } 14 | // 15 | // broken: 16 | // fn foo() 17 | // { 18 | // // ... 19 | // } 20 | Doc::new() 21 | .tag("fn") 22 | .tag(Tag::Space) 23 | .tag("foo") 24 | .tag("(").tag(")") 25 | .tag_with(Tag::Group(40), |doc| { 26 | doc 27 | .tag_if(Tag::Space, If::Flat) 28 | .tag_if(Tag::Break(1), If::Broken) 29 | .tag("{") 30 | .tag_if(Tag::Space, If::Flat) 31 | .tag_if(Tag::Break(1), If::Broken) 32 | .tag_with(Tag::Indent(2), |doc| { 33 | // Brace contents here... 34 | }) 35 | .tag_if(Tag::Space, If::Flat) 36 | .tag_if(Tag::Break(1), If::Broken) 37 | .tag("}"); 38 | }); 39 | ``` 40 | 41 | When calling `Doc::render()`, the layout algorithm will determine whether 42 | `Tag::Group`s should be "broken", i.e., laid out with newlines inside. 43 | -------------------------------------------------------------------------------- /allman/src/layout.rs: -------------------------------------------------------------------------------- 1 | //! Layout algorithm implementation. 2 | //! 3 | //! The only thing the layout algorithm *actually* has to decide is whether each 4 | //! group breaks or not. The algorithm is as follows. 5 | //! 6 | //! 1. Measure the width of each element recursively. Elements which span 7 | //! multiple lines are treated as being of infinite width. 8 | //! 9 | //! 2. Mark groups as broken recursively: for each group, if at its current 10 | //! position, it would overflow the maximum column length, break it, and 11 | //! recurse into it. 12 | 13 | use unicode_width::UnicodeWidthStr; 14 | 15 | use crate::Cursor; 16 | use crate::Doc; 17 | use crate::If; 18 | use crate::Measure; 19 | use crate::Options; 20 | use crate::Tag; 21 | use crate::TagInfo; 22 | 23 | impl Doc<'_> { 24 | pub(crate) fn do_layout(&self, opts: &Options) { 25 | for (t, c) in self.cursor() { 26 | measure(t, c); 27 | } 28 | 29 | LayoutState { opts, indent: 0, column: 0 }.do_layout(self.cursor()); 30 | } 31 | } 32 | 33 | struct LayoutState<'a> { 34 | opts: &'a Options, 35 | 36 | /// The column to start the next line at. 37 | indent: usize, 38 | 39 | /// The next column that we would be writing at. 40 | column: usize, 41 | } 42 | 43 | impl LayoutState<'_> { 44 | /// Advances state for rendering a tag within a broken group. 45 | fn do_layout(&mut self, cursor: Cursor) { 46 | for (tag, cursor) in cursor { 47 | let cond = tag.cond != Some(If::Flat); 48 | 49 | let mut m = tag.measure.get(); 50 | m.column = self.column; 51 | match &tag.tag { 52 | Tag::Text(text) => match text.rfind("\n") { 53 | Some(nl) => self.column = self.indent + text[nl..].width(), 54 | None => self.column += m.width.unwrap(), 55 | }, 56 | 57 | Tag::Space => self.column += 1, 58 | Tag::Break(0) => {} 59 | Tag::Break(_) => self.column = self.indent, 60 | 61 | Tag::Group(max) => { 62 | let mut width = 63 | m.width.filter(|w| self.column + w <= self.opts.max_columns); 64 | 65 | if width.is_some_and(|w| w > *max) { 66 | width = None; 67 | } 68 | 69 | if let Some(w) = width { 70 | // Don't need to do layout here: everything already fits. 71 | self.column += w; 72 | } else { 73 | m.width = None; 74 | 75 | self.do_layout(cursor); 76 | } 77 | } 78 | 79 | Tag::Indent(columns) => { 80 | if cond { 81 | let prev = self.indent; 82 | self.indent = self.indent.saturating_add_signed(*columns); 83 | self.do_layout(cursor); 84 | self.indent = prev; 85 | } 86 | } 87 | } 88 | tag.measure.set(m); 89 | } 90 | } 91 | } 92 | 93 | /// Calculates the width of each element if it was laid out in one line. 94 | fn measure(tag: &TagInfo, cursor: Cursor) { 95 | let tag_width = match &tag.tag { 96 | _ if tag.cond == Some(If::Broken) => Some(0), 97 | 98 | Tag::Text(text) => (!text.contains("\n")).then(|| text.width()), 99 | Tag::Space => Some(1), 100 | Tag::Break(_) => None, 101 | 102 | _ => Some(0), 103 | }; 104 | 105 | let width = cursor 106 | .map(|(t, c)| { 107 | measure(t, c); 108 | t.measure.get().width 109 | }) 110 | .fold(tag_width, |a, b| a?.checked_add(b?)); 111 | 112 | tag.measure.set(Measure { width, column: 0 }); 113 | } 114 | -------------------------------------------------------------------------------- /allman/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `allman` - A code formatting and line reflowing toolkit. 🗒️🖋️ 2 | //! 3 | //! [`allman::Doc`][Doc] is a DOM-like structure that specifies how indentation, 4 | //! like breaking, and reflowing should be handled. It is a tree of [`Tag`]s 5 | //! that dictate layout information for the source code to format. 6 | //! 7 | //! For example, the Allman brace style (for which this crate is named) can 8 | //! be implemented as follows: 9 | //! 10 | //! ``` 11 | //! # use allman::*; 12 | //! // flat: fn foo() { ... } 13 | //! // 14 | //! // broken: 15 | //! // fn foo() 16 | //! // { 17 | //! // // ... 18 | //! // } 19 | //! Doc::new() 20 | //! .tag("fn") 21 | //! .tag(Tag::Space) 22 | //! .tag("foo") 23 | //! .tag("(").tag(")") 24 | //! .tag_with(Tag::Group(40), |doc| { 25 | //! doc 26 | //! .tag_if(Tag::Space, If::Flat) 27 | //! .tag_if(Tag::Break(1), If::Broken) 28 | //! .tag("{") 29 | //! .tag_if(Tag::Space, If::Flat) 30 | //! .tag_if(Tag::Break(1), If::Broken) 31 | //! .tag_with(Tag::Indent(2), |doc| { 32 | //! // Brace contents here... 33 | //! }) 34 | //! .tag_if(Tag::Space, If::Flat) 35 | //! .tag_if(Tag::Break(1), If::Broken) 36 | //! .tag("}"); 37 | //! }); 38 | //! ``` 39 | //! 40 | //! When calling [`Doc::render()`], the layout algorithm will determine whether 41 | //! [`Tag::Group`]s should be "broken", i.e., laid out with newlines inside. 42 | 43 | use core::slice; 44 | use std::cell::Cell; 45 | use std::fmt; 46 | use std::io; 47 | 48 | use byteyarn::YarnBox; 49 | 50 | mod layout; 51 | mod render; 52 | 53 | /// A source code document, which can be rendered as formatted text. 54 | /// 55 | /// A [`Doc`] is analogous to an HTML DOM, which is text along with markup for 56 | /// laying out that text. The difference being that rather than being converted 57 | /// into raster graphics by a browser engine, a [`Doc`] is rendered as a text 58 | /// file. 59 | #[derive(Clone, Default)] 60 | pub struct Doc<'text> { 61 | /// This is a flattened tree: each node specifies how many elements after it 62 | /// make up its children. The `Cursor` type implements walking this tree. 63 | tags: Vec>, 64 | } 65 | 66 | /// A condition that can be applied to a tag. 67 | /// 68 | /// If a condition is set on a tag, and the condition is false, the tag is 69 | /// treated as a no-op: its contents are not printed. 70 | #[derive(Clone, Copy, PartialEq, Eq, Debug)] 71 | pub enum If { 72 | /// True when the containing group is printed on one line. 73 | Flat, 74 | /// True when the containing group does not fit on one line. 75 | Broken, 76 | } 77 | 78 | /// Options for [`Doc::render()`]. 79 | pub struct Options { 80 | /// The maximum number of columns in a line. 81 | pub max_columns: usize, 82 | } 83 | 84 | impl<'text> Doc<'text> { 85 | /// Returns a new, empty document. 86 | pub fn new() -> Self { 87 | Self::default() 88 | } 89 | 90 | /// Renders this document to the given writer. 91 | pub fn render( 92 | &self, 93 | out: &mut dyn io::Write, 94 | options: &Options, 95 | ) -> io::Result<()> { 96 | self.do_layout(options); 97 | render::Printer::new(out).render(self.cursor(), options, true) 98 | } 99 | 100 | /// Inserts a new self-closing tag into this doc. 101 | pub fn tag(&mut self, tag: impl Into>) -> &mut Self { 102 | self.tag_if_with(tag, None, |_| {}) 103 | } 104 | 105 | /// Inserts a new tag into this doc. The given closure can be used to insert 106 | /// tags into it. 107 | /// 108 | /// # Panics 109 | /// 110 | /// Panics if children are inserted and [`Tag::can_have_children()`] is false. 111 | pub fn tag_with( 112 | &mut self, 113 | tag: impl Into>, 114 | body: impl FnOnce(&mut Self), 115 | ) -> &mut Self { 116 | self.tag_if_with(tag, None, body) 117 | } 118 | 119 | /// Inserts a new tag into this doc, with an optional condition. 120 | pub fn tag_if( 121 | &mut self, 122 | tag: impl Into>, 123 | cond: impl Into>, 124 | ) -> &mut Self { 125 | self.tag_if_with(tag, cond, |_| {}) 126 | } 127 | 128 | /// Inserts a new tag into this doc, with an optional condition. The given 129 | /// closure can be used to insert tags into it. 130 | /// 131 | /// # Panics 132 | /// 133 | /// Panics if children are inserted and [`Tag::can_have_children()`] is false. 134 | pub fn tag_if_with( 135 | &mut self, 136 | tag: impl Into>, 137 | cond: impl Into>, 138 | body: impl FnOnce(&mut Self), 139 | ) -> &mut Self { 140 | let tag = tag.into(); 141 | let compound = tag.can_have_children(); 142 | 143 | let consolidate = matches!( 144 | (&tag, self.tags.last().map(|t| &t.tag)), 145 | (Tag::Space, Some(Tag::Space)) 146 | ); 147 | 148 | let idx = self.tags.len(); 149 | self.tags.push(TagInfo { 150 | tag, 151 | len: 0, 152 | cond: cond.into(), 153 | measure: Cell::default(), 154 | }); 155 | body(self); 156 | 157 | let len = self.tags.len() - idx - 1; 158 | assert!( 159 | compound || len == 0, 160 | "inserted children for {:?}", 161 | &self.tags[idx].tag 162 | ); 163 | 164 | if consolidate { 165 | self.tags.pop(); 166 | } 167 | 168 | self.tags[idx].len = len; 169 | self 170 | } 171 | 172 | fn cursor(&self) -> Cursor { 173 | Cursor { iter: self.tags.iter() } 174 | } 175 | } 176 | 177 | #[derive(Clone, Debug)] 178 | struct TagInfo<'text> { 179 | tag: Tag<'text>, 180 | len: usize, 181 | cond: Option, 182 | 183 | measure: Cell, 184 | } 185 | 186 | #[derive(Copy, Clone, Default, Debug)] 187 | struct Measure { 188 | /// The number of columns this tag takes up when it is formatted on one line. 189 | /// 190 | /// None if its width should be treated as infinite. 191 | width: Option, 192 | column: usize, 193 | } 194 | 195 | /// An element of a [`Doc`]. 196 | #[derive(Clone, PartialEq, Eq, Debug)] 197 | pub enum Tag<'text> { 198 | /// Verbatim text. Line breaks inside of this text cause any groups that 199 | /// contain it to be broken. 200 | Text(YarnBox<'text, str>), 201 | 202 | /// Inserts a space, except if it would end a line. This is intended for 203 | /// ensuring lines do not have trailing whitespace. [`Tag::Text`] containing 204 | /// a space can be used to force a space at the end of a line. 205 | /// 206 | /// Consecutive space tags are consolidated into one. 207 | Space, 208 | 209 | /// Inserts the given number of newlines, and breaks the surrounding group. 210 | /// 211 | /// Consecutive breaks are consolidated into one. A `Break(0)` can be used 212 | /// to force a break without inserting an actual newline. 213 | Break(usize), 214 | 215 | /// A sequence of tags that may either be rendered as one line, or broken into 216 | /// multiple lines if it does not fit. 217 | /// 218 | /// The group will also break itself if it is wider than the given width; 219 | /// use [`usize::MAX`] to disable this. 220 | Group(usize), 221 | 222 | /// Change indentation by the given number of columns. 223 | Indent(isize), 224 | } 225 | 226 | impl Tag<'_> { 227 | /// Returns whether or not this tag can contain child tags. 228 | pub fn can_have_children(&self) -> bool { 229 | matches!(self, Self::Group(..) | Self::Indent(..)) 230 | } 231 | } 232 | 233 | impl<'text, Y: Into>> From for Tag<'text> { 234 | fn from(yarn: Y) -> Self { 235 | Self::Text(yarn.into()) 236 | } 237 | } 238 | 239 | /// A cursor over a piece of a [`Doc`]. 240 | struct Cursor<'a> { 241 | iter: slice::Iter<'a, TagInfo<'a>>, 242 | } 243 | 244 | impl<'a> Iterator for Cursor<'a> { 245 | type Item = (&'a TagInfo<'a>, Cursor<'a>); 246 | 247 | fn next(&mut self) -> Option { 248 | let next = self.iter.next()?; 249 | if next.len == 0 { 250 | // Fast path that avoids an extra bounds check. 251 | return Some((next, Cursor { iter: [].iter() })); 252 | } 253 | 254 | let (contents, rest) = self.iter.as_slice().split_at(next.len); 255 | self.iter = rest.iter(); 256 | Some((next, Cursor { iter: contents.iter() })) 257 | } 258 | } 259 | 260 | impl fmt::Debug for Doc<'_> { 261 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 262 | fn fmt( 263 | indent: usize, 264 | cursor: Cursor, 265 | f: &mut fmt::Formatter, 266 | ) -> fmt::Result { 267 | for (tag, cursor) in cursor { 268 | write!(f, "{:<1$}", "\n", indent + 1)?; 269 | match &tag.tag { 270 | Tag::Text(y) => write!(f, "{y:?}")?, 271 | Tag::Space => write!(f, "")?, 272 | Tag::Break(n) => write!(f, "")?, 273 | Tag::Group(w) => { 274 | if cursor.iter.as_slice().is_empty() { 275 | write!(f, "")?; 276 | continue; 277 | } 278 | 279 | write!(f, "")?; 280 | fmt(indent + 2, cursor, f)?; 281 | write!(f, "")?; 282 | } 283 | Tag::Indent(c) => { 284 | if cursor.iter.as_slice().is_empty() { 285 | write!(f, "")?; 286 | continue; 287 | } 288 | 289 | write!(f, "")?; 290 | fmt(indent + 2, cursor, f)?; 291 | write!(f, "")?; 292 | } 293 | } 294 | } 295 | write!(f, "{:<1$}", "\n", indent - 2 + 1)?; 296 | Ok(()) 297 | } 298 | 299 | fmt(0, self.cursor(), f) 300 | } 301 | } 302 | -------------------------------------------------------------------------------- /allman/src/render.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::io::Write; 3 | use std::mem; 4 | 5 | use crate::If; 6 | use crate::Options; 7 | use crate::Tag; 8 | 9 | /// An indentation-aware pretty-printer. 10 | pub struct Printer<'a> { 11 | out: &'a mut dyn io::Write, 12 | indent: usize, 13 | space: bool, 14 | newlines: usize, 15 | } 16 | 17 | impl<'a> Printer<'a> { 18 | /// Returns a new printer with the given output and options. 19 | pub fn new(out: &'a mut dyn io::Write) -> Self { 20 | Self { 21 | out, 22 | indent: 0, 23 | space: false, 24 | newlines: 0, 25 | } 26 | } 27 | 28 | /// Updates the indentation level with the given diff. 29 | pub fn with_indent( 30 | &mut self, 31 | diff: isize, 32 | body: impl FnOnce(&mut Self) -> R, 33 | ) -> R { 34 | let prev = self.indent; 35 | self.indent = self.indent.saturating_add_signed(diff); 36 | let r = body(self); 37 | self.indent = prev; 38 | r 39 | } 40 | 41 | /// Writes indentation, if necessary. 42 | pub fn write_indent(&mut self) -> io::Result<()> { 43 | if mem::take(&mut self.newlines) == 0 { 44 | return Ok(()); 45 | } 46 | 47 | self.write_spaces(self.indent) 48 | } 49 | 50 | /// Writes len ASCII spaces to the output. 51 | pub fn write_spaces(&mut self, mut len: usize) -> io::Result<()> { 52 | const SPACES: &[u8; 32] = b" "; 53 | 54 | while len > SPACES.len() { 55 | self.out.write_all(SPACES)?; 56 | len -= SPACES.len(); 57 | } 58 | self.out.write_all(&SPACES[..len])?; 59 | Ok(()) 60 | } 61 | 62 | pub fn render( 63 | &mut self, 64 | cursor: crate::Cursor, 65 | _options: &Options, 66 | parent_is_broken: bool, 67 | ) -> io::Result<()> { 68 | for (tag, cursor) in cursor { 69 | let cond = match tag.cond { 70 | Some(If::Broken) => parent_is_broken, 71 | Some(If::Flat) => !parent_is_broken, 72 | None => true, 73 | }; 74 | 75 | match &tag.tag { 76 | Tag::Text(text) => { 77 | if cond { 78 | write!(self, "{text}")?; 79 | } 80 | } 81 | 82 | Tag::Space => self.space |= cond, 83 | Tag::Break(n) => { 84 | if cond { 85 | for _ in self.newlines..*n { 86 | writeln!(self)?; 87 | } 88 | } 89 | } 90 | 91 | Tag::Group(..) => { 92 | let m = tag.measure.get(); 93 | self.render(cursor, _options, m.width.is_none())?; 94 | } 95 | 96 | Tag::Indent(columns) => { 97 | if cond { 98 | self.with_indent(*columns, |p| { 99 | p.render(cursor, _options, parent_is_broken) 100 | })?; 101 | } 102 | } 103 | } 104 | } 105 | 106 | Ok(()) 107 | } 108 | } 109 | 110 | impl io::Write for Printer<'_> { 111 | fn write(&mut self, buf: &[u8]) -> io::Result { 112 | if buf.is_empty() { 113 | return Ok(0); 114 | } 115 | 116 | if mem::take(&mut self.space) && !buf.starts_with(b"\n") { 117 | self.write_all(b" ")?; 118 | } 119 | 120 | for line in buf.split_inclusive(|&b| b == b'\n') { 121 | if line == b"\n" { 122 | self.newlines += 1; 123 | self.out.write_all(line)?; 124 | continue; 125 | } 126 | 127 | self.write_indent()?; 128 | self.out.write_all(line)?; 129 | if line.ends_with(b"\n") { 130 | self.newlines = 1; 131 | } 132 | } 133 | Ok(buf.len()) 134 | } 135 | 136 | fn flush(&mut self) -> io::Result<()> { 137 | self.out.flush() 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /buf-trait/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "buf-trait" 3 | version = "0.4.1" 4 | description = "abstract over [u8], str, and friends" 5 | 6 | edition.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | keywords.workspace = true 11 | license.workspace = true 12 | 13 | [dependencies] 14 | zerocopy = "0.7" 15 | -------------------------------------------------------------------------------- /buf-trait/README.md: -------------------------------------------------------------------------------- 1 | # buf-trait 2 | 3 | The `Buf` trait. 4 | 5 | This crate provides a trait for abstracting over buffer-like types, such 6 | as `str` and `[u8]`. This is a much stronger property than, say, 7 | implementing [`AsRef<[u8]>`]. These are variable-length types that you might 8 | want to store as a raw byte buffer and then transmute to and from `&[u8]`. 9 | 10 | This crate provides all the functionality necessary for doing so safely, 11 | correctly, and in `const`. 12 | -------------------------------------------------------------------------------- /buf-trait/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! The `Buf` trait. 2 | //! 3 | //! This crate provides a trait for abstracting over buffer-like types, such 4 | //! as `str` and `[u8]`. This is a much stronger property than, say, 5 | //! implementing [`AsRef<[u8]>`]. These are variable-length types that you might 6 | //! want to store as a raw byte buffer and then transmute to and from `&[u8]`. 7 | //! 8 | //! This crate provides all the functionality necessary for doing so safely, 9 | //! correctly, and in `const`. 10 | 11 | #![no_std] 12 | 13 | use core::alloc::Layout; 14 | use core::mem; 15 | use core::slice; 16 | use core::slice::SliceIndex; 17 | 18 | /// A trait for abstracting over `str`, `[u8]`, and other byte-string-like 19 | /// types. 20 | /// 21 | /// See the [crate docs](self) for more information. 22 | /// 23 | /// # Safety 24 | /// 25 | /// This trait should only be implemented on types that are, essentially, a 26 | /// `repr(transpartent)` wrapper over a `[T]` for some Copy type `T`. 27 | /// 28 | /// In particular, `B: Buf` the requires that the following must hold: 29 | /// 30 | /// 1. Transmute `&B` to `&[T]`, where `T` is [`zerocopy::AsBytes`]. Transmute 31 | /// here is quite literal: `mem::transmute<&B, &[T]>` MUST be a valid way 32 | /// to convert between them. 33 | /// 34 | /// 2. Transmute `&[T]` to `&B` if the contents of that `&[T]` originated from 35 | /// operation (1). 36 | /// 37 | /// 3. Byte-copy `&B` to a `T`-aligned buffer, and then transmute 38 | /// the resulting `&[T]` to `&B` again. 39 | /// 40 | /// 4. `x == y` implies that `x.as_bytes() == y.as_bytes()`. 41 | /// 42 | /// 5. `B::from_bytes(&[])` and `B::from_bytes_mut(&mut [])` always produce 43 | /// valid values. 44 | /// 45 | /// Notably, none of `CStr`, `OsStr`, or `Path` can implement `Buf` because 46 | /// their layout as slices is not part of their interface. 47 | /// 48 | /// `T` may be zero-sized, but functions will panic in this case. 49 | pub unsafe trait Buf { 50 | /// The element type of the underlying type. This is used for computing e.g. 51 | /// alignment and stride. 52 | type Element: zerocopy::AsBytes + Copy; 53 | 54 | /// The length of this value, in elements. 55 | fn elem_len(&self) -> usize { 56 | mem::size_of_val(self) / mem::size_of::() 57 | } 58 | 59 | /// The length of this value, in bytes. 60 | fn byte_len(&self) -> usize { 61 | mem::size_of_val(self) 62 | } 63 | 64 | /// Creates a new empty [`Buf`]. 65 | fn empty<'a, B: ?Sized + Buf>() -> &'a B { 66 | empty() 67 | } 68 | 69 | /// Converts a reference to a [`Buf`] into its underlying bytes. 70 | fn as_bytes(&self) -> &[u8] { 71 | as_bytes(self) 72 | } 73 | 74 | /// Converts a byte slice to a reference to a [`Buf`]. 75 | /// 76 | /// # Safety 77 | /// 78 | /// `bytes` must have been either constructed via transmuting from `&Self`, 79 | /// or a bytewise copy of a `Self`. 80 | unsafe fn from_bytes(bytes: &[u8]) -> &Self { 81 | as_buf(bytes) 82 | } 83 | 84 | /// Converts a reference to a [`Buf`] into its underlying bytes. 85 | fn as_bytes_mut(&mut self) -> &mut [u8] { 86 | as_bytes_mut(self) 87 | } 88 | 89 | /// Converts a byte slice to a reference to a [`Buf`]. 90 | /// 91 | /// # Safety 92 | /// 93 | /// `bytes` must have been either constructed via transmuting from `&Self`, 94 | /// or a bytewise copy of a `Self`. 95 | unsafe fn from_bytes_mut(bytes: &mut [u8]) -> &mut Self { 96 | as_buf_mut(bytes) 97 | } 98 | 99 | /// Performs a slicing operation on `self` with respect to byte indices. 100 | /// 101 | /// # Safety 102 | /// 103 | /// This function does not perform any checking beyonds bounds checking. For 104 | /// example, if called on `str`, this function may slice through a multi-byte 105 | /// Unicode scalar, producing a `&str` that violate's `str`'s validity 106 | /// constraints (i.e., Undefined Behavior). 107 | unsafe fn slice_along_bytes(&self, index: Idx) -> Option<&Self> 108 | where 109 | Idx: SliceIndex<[u8], Output = [u8]>, 110 | { 111 | self.as_bytes().get(index).map(|b| Self::from_bytes(b)) 112 | } 113 | } 114 | 115 | unsafe impl Buf for [T] { 116 | type Element = T; 117 | } 118 | 119 | unsafe impl Buf for str { 120 | type Element = u8; 121 | } 122 | 123 | /// Computes the layout of `buf`. 124 | /// 125 | /// This function is `const`, unlike [`Layout::for_value()`]. 126 | pub const fn layout_of(buf: &B) -> Layout { 127 | unsafe { 128 | Layout::from_size_align_unchecked( 129 | as_bytes(buf).len(), 130 | mem::align_of::(), 131 | ) 132 | } 133 | } 134 | 135 | /// Creates a new empty [`Buf`]. 136 | /// 137 | /// Unlike [`Buf::empty()`], this function is `const`. 138 | pub const fn empty<'a, B: ?Sized + Buf>() -> &'a B { 139 | unsafe { as_buf(&[]) } 140 | } 141 | 142 | /// Converts a reference to a [`Buf`] into its underlying bytes. 143 | /// 144 | /// Unlike [`Buf::as_bytes()`], this function is `const`. 145 | pub const fn as_bytes(buf: &B) -> &[u8] { 146 | assert!( 147 | mem::size_of::() > 0, 148 | "buf-trait: cannot use ZST as in type-erased context" 149 | ); 150 | 151 | let ptr = &buf as *const &_ as *const &[B::Element]; 152 | 153 | unsafe { 154 | let buf = *ptr; 155 | // SAFETY: The safety rules of `Buf` make this valid. 156 | let ptr = buf as *const _ as *const u8; 157 | let len = buf.len() * mem::size_of::(); 158 | slice::from_raw_parts(ptr, len) 159 | } 160 | } 161 | 162 | /// Converts a mutable reference to a [`Buf`] into its underlying bytes. 163 | pub fn as_bytes_mut(mut buf: &mut B) -> &mut [u8] { 164 | assert!( 165 | mem::size_of::() > 0, 166 | "buf-trait: cannot use ZST as in type-erased context" 167 | ); 168 | 169 | let ptr = &mut buf as *mut &mut _ as *mut &mut [B::Element]; 170 | 171 | unsafe { 172 | let buf = &mut *ptr; 173 | // SAFETY: The safety rules of `Buf` make this valid. 174 | let ptr = buf as *mut _ as *mut u8; 175 | slice::from_raw_parts_mut(ptr, mem::size_of_val(&**buf)) 176 | } 177 | } 178 | 179 | /// Converts a byte slice to a reference to a [`Buf`]. 180 | /// 181 | /// Unlike [`Buf::from_bytes()`], this function is `const`. 182 | /// 183 | /// # Safety 184 | /// 185 | /// See [`Buf::from_bytes()`]. 186 | pub const unsafe fn as_buf(bytes: &[u8]) -> &B { 187 | assert!( 188 | mem::size_of::() > 0, 189 | "buf-trait: cannot use ZST as in type-erased context" 190 | ); 191 | 192 | let buf = slice::from_raw_parts( 193 | bytes.as_ptr().cast::(), 194 | bytes.len() / mem::size_of::(), 195 | ); 196 | 197 | let ptr = &buf as *const &[_] as *const &B; 198 | *ptr 199 | } 200 | 201 | /// Converts a mutable byte slice to a reference to a [`Buf`]. 202 | /// 203 | /// # Safety 204 | /// 205 | /// See [`Buf::from_bytes()`]. 206 | pub unsafe fn as_buf_mut(bytes: &mut [u8]) -> &mut B { 207 | assert!( 208 | mem::size_of::() > 0, 209 | "buf-trait: cannot use ZST as in type-erased context" 210 | ); 211 | 212 | let mut buf = slice::from_raw_parts_mut( 213 | bytes.as_mut_ptr().cast::(), 214 | bytes.len() / mem::size_of::(), 215 | ); 216 | 217 | let ptr = &mut buf as *mut &mut [_] as *mut &mut B; 218 | *ptr 219 | } 220 | -------------------------------------------------------------------------------- /byteyarn/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "byteyarn" 3 | version = "0.5.1" 4 | description = "hyper-compact strings" 5 | 6 | edition.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | keywords.workspace = true 11 | license.workspace = true 12 | 13 | [dependencies] 14 | buf-trait = { version = "0.4", path = "../buf-trait" } 15 | -------------------------------------------------------------------------------- /byteyarn/README.md: -------------------------------------------------------------------------------- 1 | # byteyarn 2 | 3 | `byteyarn` - Space-efficient byte strings 🧶🐈‍⬛ 4 | 5 | A `Yarn` is a highly optimized string type that provides a number of 6 | useful properties over `String`: 7 | 8 | * Always two pointers wide, so it is always passed into and out of functions 9 | in registers. 10 | * Small string optimization (SSO) up to 15 bytes on 64-bit architectures. 11 | * Can be either an owned buffer or a borrowed buffer (like `Cow`). 12 | * Can be upcast to `'static` lifetime if it was constructed from a 13 | known-static string. 14 | * `Option` has the same size and ABI as `Yarn`. 15 | 16 | The main caveat is that `Yarn`s cannot be easily appended to, since they 17 | do not track an internal capacity, and the slice returned by 18 | `Yarn::as_slice()` does not have the same pointer stability properties as 19 | `String` (these are rarely needed, though). 20 | 21 | --- 22 | 23 | Yarns are useful for situations in which a copy-on-write string is necessary 24 | and most of the strings are relatively small. Although `Yarn` itself is 25 | not `Copy`, there is a separate `YarnRef` type that is. These types 26 | have equivalent representations, and can be cheaply cast between each other. 27 | 28 | The easiest way to create a yarn is with the `yarn!()` 29 | macro, which is similar to `format!()`. 30 | 31 | ```rust 32 | // Create a new yarn via `fmt`ing. 33 | let yarn = yarn!("Answer: {}", 42); 34 | 35 | // Convert that yarn into a reference. 36 | let ry: YarnRef = yarn.as_ref(); 37 | 38 | // Try up-casting the yarn into an "immortal yarn" without copying. 39 | let copy: YarnRef<'static, str> = ry.immortalize().unwrap(); 40 | 41 | assert_eq!(yarn, copy); 42 | ``` 43 | 44 | Yarns are intended for storing text, either as UTF-8 or as 45 | probably-UTF-8 bytes; `Yarn` and `Yarn` serve these purposes, 46 | and can be inter-converted with each other. The `Yarn::utf8_chunks()` 47 | function can be used to iterate over definitely-valid-UTF-8 chunks within 48 | a string. 49 | 50 | Both kinds of yarns can be `Debug`ed and `Display`ed, and will print out as 51 | strings would. In particular, invalid UTF-8 is converted into either `\xNN` 52 | escapes or replacement characters (for `Debug` and `Display` respectively). 53 | 54 | ```rust 55 | let invalid = ByteYarn::from_byte(0xff); 56 | assert_eq!(format!("{invalid:?}"), r#""\xFF""#); 57 | assert_eq!(format!("{invalid}"), "�"); 58 | ``` 59 | -------------------------------------------------------------------------------- /byteyarn/src/convert.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Borrow; 2 | use std::fmt; 3 | use std::str::Utf8Error; 4 | 5 | use crate::YarnBox; 6 | use crate::YarnRef; 7 | 8 | #[derive(Clone, Debug)] 9 | pub struct NonCopy(()); 10 | 11 | impl fmt::Display for NonCopy { 12 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 13 | f.write_str("cannot convert yarn to non-owning yarn") 14 | } 15 | } 16 | 17 | impl<'a, Buf> TryFrom> for YarnRef<'a, Buf> 18 | where 19 | Buf: crate::Buf + ?Sized, 20 | { 21 | type Error = NonCopy; 22 | 23 | fn try_from(y: YarnBox<'a, Buf>) -> Result { 24 | y.to_ref().ok_or(NonCopy(())) 25 | } 26 | } 27 | 28 | impl<'a> TryFrom> for YarnBox<'a, str> { 29 | type Error = Utf8Error; 30 | 31 | fn try_from(y: YarnBox<'a, [u8]>) -> Result { 32 | y.to_utf8() 33 | } 34 | } 35 | 36 | impl<'a> TryFrom> for YarnRef<'a, str> { 37 | type Error = Utf8Error; 38 | 39 | fn try_from(y: YarnRef<'a, [u8]>) -> Result { 40 | y.to_utf8() 41 | } 42 | } 43 | 44 | impl<'a> From> for YarnBox<'a, [u8]> { 45 | fn from(y: YarnBox<'a, str>) -> Self { 46 | y.into_bytes() 47 | } 48 | } 49 | 50 | impl<'a> From> for YarnRef<'a, [u8]> { 51 | fn from(y: YarnRef<'a, str>) -> Self { 52 | y.into_bytes() 53 | } 54 | } 55 | 56 | impl From for YarnBox<'_, [u8]> { 57 | fn from(c: u8) -> Self { 58 | Self::from_byte(c) 59 | } 60 | } 61 | 62 | impl From for YarnRef<'_, [u8]> { 63 | fn from(c: u8) -> Self { 64 | Self::from_byte(c) 65 | } 66 | } 67 | 68 | impl From for YarnBox<'_, Buf> 69 | where 70 | Buf: crate::Buf + ?Sized, 71 | { 72 | fn from(c: char) -> Self { 73 | Self::from_char(c) 74 | } 75 | } 76 | 77 | impl From for YarnRef<'_, Buf> 78 | where 79 | Buf: crate::Buf + ?Sized, 80 | { 81 | fn from(c: char) -> Self { 82 | Self::from_char(c) 83 | } 84 | } 85 | 86 | impl<'a, Buf> From<&'a Buf> for YarnBox<'a, Buf> 87 | where 88 | Buf: crate::Buf + ?Sized, 89 | { 90 | fn from(s: &'a Buf) -> Self { 91 | Self::new(s) 92 | } 93 | } 94 | 95 | impl From<[T; N]> for YarnBox<'_, [T]> 96 | where 97 | [T]: crate::Buf, 98 | { 99 | fn from(s: [T; N]) -> Self { 100 | YarnBox::from(s.as_slice()).immortalize() 101 | } 102 | } 103 | 104 | impl<'a, Buf> From<&'a YarnBox<'_, Buf>> for YarnBox<'a, Buf> 105 | where 106 | Buf: crate::Buf + ?Sized, 107 | { 108 | fn from(s: &'a YarnBox<'a, Buf>) -> Self { 109 | s.aliased() 110 | } 111 | } 112 | 113 | impl<'a, Buf> From<&'a YarnBox<'_, Buf>> for YarnRef<'a, Buf> 114 | where 115 | Buf: crate::Buf + ?Sized, 116 | { 117 | fn from(s: &'a YarnBox<'a, Buf>) -> Self { 118 | s.as_ref() 119 | } 120 | } 121 | 122 | impl<'a, Buf> From<&'a Buf> for YarnRef<'a, Buf> 123 | where 124 | Buf: crate::Buf + ?Sized, 125 | { 126 | fn from(s: &'a Buf) -> Self { 127 | Self::new(s) 128 | } 129 | } 130 | 131 | impl From> for YarnBox<'_, Buf> 132 | where 133 | Buf: crate::Buf + ?Sized, 134 | { 135 | fn from(s: Box) -> Self { 136 | Self::from_box(s) 137 | } 138 | } 139 | 140 | impl From> for YarnBox<'_, [T]> 141 | where 142 | [T]: crate::Buf, 143 | { 144 | fn from(s: Vec) -> Self { 145 | Self::from_vec(s) 146 | } 147 | } 148 | 149 | impl From> for YarnBox<'_, [u8]> { 150 | fn from(s: Box) -> Self { 151 | Self::from_boxed_str(s) 152 | } 153 | } 154 | 155 | impl From for YarnBox<'_, str> { 156 | fn from(s: String) -> Self { 157 | Self::from_string(s) 158 | } 159 | } 160 | 161 | impl From for YarnBox<'_, [u8]> { 162 | fn from(s: String) -> Self { 163 | Self::from_string(s) 164 | } 165 | } 166 | 167 | impl From> for Box<[u8]> 168 | where 169 | Buf: crate::Buf + ?Sized, 170 | { 171 | fn from(y: YarnBox) -> Self { 172 | y.into_boxed_bytes() 173 | } 174 | } 175 | 176 | impl From> for Box<[u8]> 177 | where 178 | Buf: crate::Buf + ?Sized, 179 | { 180 | fn from(y: YarnRef) -> Self { 181 | y.to_boxed_bytes() 182 | } 183 | } 184 | 185 | impl From> for Vec 186 | where 187 | Buf: crate::Buf + ?Sized, 188 | { 189 | fn from(y: YarnBox) -> Self { 190 | y.into_byte_vec() 191 | } 192 | } 193 | 194 | impl From> for Vec 195 | where 196 | Buf: crate::Buf + ?Sized, 197 | { 198 | fn from(y: YarnRef) -> Self { 199 | y.to_byte_vec() 200 | } 201 | } 202 | 203 | impl From> for Box { 204 | fn from(y: YarnBox) -> Self { 205 | y.into_boxed_str() 206 | } 207 | } 208 | 209 | impl From> for Box { 210 | fn from(y: YarnRef) -> Self { 211 | y.to_boxed_str() 212 | } 213 | } 214 | 215 | impl From> for String { 216 | fn from(y: YarnBox) -> Self { 217 | y.into_string() 218 | } 219 | } 220 | 221 | impl From> for String { 222 | fn from(y: YarnRef) -> Self { 223 | y.to_string() 224 | } 225 | } 226 | 227 | // AsRef / Borrow 228 | 229 | impl AsRef for YarnBox<'_, Buf> 230 | where 231 | Buf: crate::Buf + ?Sized, 232 | { 233 | fn as_ref(&self) -> &Buf { 234 | self.as_slice() 235 | } 236 | } 237 | 238 | impl AsRef for YarnRef<'_, Buf> 239 | where 240 | Buf: crate::Buf + ?Sized, 241 | { 242 | fn as_ref(&self) -> &Buf { 243 | self.as_slice() 244 | } 245 | } 246 | 247 | impl Borrow for YarnBox<'_, Buf> 248 | where 249 | Buf: crate::Buf + ?Sized, 250 | { 251 | fn borrow(&self) -> &Buf { 252 | self.as_slice() 253 | } 254 | } 255 | 256 | impl Borrow for YarnRef<'_, Buf> 257 | where 258 | Buf: crate::Buf + ?Sized, 259 | { 260 | fn borrow(&self) -> &Buf { 261 | self.as_slice() 262 | } 263 | } 264 | -------------------------------------------------------------------------------- /byteyarn/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `byteyarn` - Space-efficient byte strings 🧶🐈‍⬛ 2 | //! 3 | //! A [`Yarn`] is a highly optimized string type that provides a number of 4 | //! useful properties over [`String`]: 5 | //! 6 | //! * Always two pointers wide, so it is always passed into and out of functions 7 | //! in registers. 8 | //! * Small string optimization (SSO) up to 15 bytes on 64-bit architectures. 9 | //! * Can be either an owned buffer or a borrowed buffer (like [`Cow`]). 10 | //! * Can be upcast to `'static` lifetime if it was constructed from a 11 | //! known-static string. 12 | //! * `Option` has the same size and ABI as `Yarn`. 13 | //! 14 | //! The main caveat is that [`Yarn`]s cannot be easily appended to, since they 15 | //! do not track an internal capacity, and the slice returned by 16 | //! [`Yarn::as_slice()`] does not have the same pointer stability properties as 17 | //! [`String`] (these are rarely needed, though). 18 | //! 19 | //! --- 20 | //! 21 | //! Yarns are useful for situations in which a copy-on-write string is necessary 22 | //! and most of the strings are relatively small. Although [`Yarn`] itself is 23 | //! not [`Copy`], there is a separate [`YarnRef`] type that is. These types 24 | //! have equivalent representations, and can be cheaply cast between each other. 25 | //! 26 | //! The easiest way to create a yarn is with the [`yarn!()`] 27 | //! macro, which is similar to [`format!()`]. 28 | //! 29 | //! ``` 30 | //! # use byteyarn::*; 31 | //! // Create a new yarn via `fmt`ing. 32 | //! let yarn = yarn!("Answer: {}", 42); 33 | //! 34 | //! // Convert that yarn into a reference. 35 | //! let ry: YarnRef = yarn.as_ref(); 36 | //! 37 | //! // Try up-casting the yarn into an "immortal yarn" without copying. 38 | //! let copy: YarnRef<'static, str> = ry.immortalize().unwrap(); 39 | //! 40 | //! assert_eq!(yarn, copy); 41 | //! ``` 42 | //! 43 | //! Yarns are intended for storing text, either as UTF-8 or as 44 | //! probably-UTF-8 bytes; [`Yarn`] and [`Yarn<[u8]>`] serve these purposes, 45 | //! and can be inter-converted with each other. The [`Yarn::utf8_chunks()`] 46 | //! function can be used to iterate over definitely-valid-UTF-8 chunks within 47 | //! a string. 48 | //! 49 | //! Both kinds of yarns can be `Debug`ed and `Display`ed, and will print out as 50 | //! strings would. In particular, invalid UTF-8 is converted into either `\xNN` 51 | //! escapes or replacement characters (for `Debug` and `Display` respectively). 52 | //! 53 | //! ``` 54 | //! # use byteyarn::*; 55 | //! let invalid = ByteYarn::from_byte(0xff); 56 | //! assert_eq!(format!("{invalid:?}"), r#""\xFF""#); 57 | //! assert_eq!(format!("{invalid}"), "�"); 58 | //! ``` 59 | //! 60 | //! That said, they will support anything that implements the [`Buf`] trait. 61 | //! For example, you can have 16-bit yarns: 62 | //! 63 | //! ``` 64 | //! # use byteyarn::*; 65 | //! 66 | //! let sixteen = YarnBox::<[u16]>::from([1, 2, 3, 4, 5, 6, 8, 9, 10, 11]); 67 | //! assert_eq!(sixteen[2], 3u16); 68 | //! ``` 69 | 70 | #![deny(missing_docs)] 71 | 72 | #[cfg(doc)] 73 | use std::borrow::Cow; 74 | 75 | mod boxed; 76 | mod convert; 77 | mod raw; 78 | mod reffed; 79 | mod utf8; 80 | 81 | pub use boxed::YarnBox; 82 | pub use reffed::YarnRef; 83 | pub use utf8::Utf8Chunks; 84 | 85 | pub use buf_trait::Buf; 86 | 87 | // Macro stuff. 88 | #[doc(hidden)] 89 | pub mod m { 90 | pub extern crate std; 91 | } 92 | 93 | /// An optimized Unicode string. 94 | /// 95 | /// See [`YarnBox`] for full type documentation. 96 | pub type Yarn = YarnBox<'static, str>; 97 | 98 | /// An optimized raw byte string. 99 | /// 100 | /// See [`YarnBox`] for full type documentation. 101 | pub type ByteYarn = YarnBox<'static, [u8]>; 102 | 103 | /// Similar to [`format!()`], but returns a [`Yarn`], instead. 104 | /// 105 | /// This macro calls out to [`Yarn::from_fmt()`] internally. 106 | #[macro_export] 107 | macro_rules! yarn { 108 | ($($args:tt)*) => { 109 | $crate::Yarn::from_fmt($crate::m::std::format_args!($($args)*)) 110 | }; 111 | } 112 | -------------------------------------------------------------------------------- /byteyarn/src/utf8.rs: -------------------------------------------------------------------------------- 1 | //! UTF-8 utilities not provided by the standard library. 2 | 3 | use std::str; 4 | 5 | #[cfg(doc)] 6 | use crate::*; 7 | 8 | /// An iterator over UTF-8 chunks in a byte buffer. 9 | /// 10 | /// Any time non-UTF-8 bytes are encountered, they are returned as `Err`s 11 | /// from the iterator. 12 | /// 13 | /// See [`Yarn::utf8_chunks()`]. 14 | #[derive(Copy, Clone)] 15 | pub struct Utf8Chunks<'a> { 16 | buf: &'a [u8], 17 | invalid_prefix: Option, 18 | } 19 | 20 | impl<'a> Utf8Chunks<'a> { 21 | /// Returns the rest of the underlying byte buffer that has not been yielded. 22 | pub fn rest(self) -> &'a [u8] { 23 | self.buf 24 | } 25 | 26 | pub(crate) fn new(buf: &'a [u8]) -> Self { 27 | Self { buf, invalid_prefix: None } 28 | } 29 | 30 | unsafe fn take(&mut self, len: usize) -> &'a [u8] { 31 | debug_assert!(len <= self.buf.len()); 32 | 33 | let pre = self.buf.get_unchecked(..len); 34 | self.buf = self.buf.get_unchecked(len..); 35 | pre 36 | } 37 | } 38 | 39 | impl<'a> Iterator for Utf8Chunks<'a> { 40 | type Item = Result<&'a str, &'a [u8]>; 41 | 42 | fn next(&mut self) -> Option { 43 | if let Some(prefix) = self.invalid_prefix.take() { 44 | let bytes = unsafe { 45 | // SAFETY: self.invalid_prefix is only ever written to in this function, 46 | // where it gets set to a value that is known to be in-range. 47 | self.take(prefix) 48 | }; 49 | 50 | return Some(Err(bytes)); 51 | } 52 | 53 | if self.buf.is_empty() { 54 | return None; 55 | } 56 | 57 | let utf8 = match str::from_utf8(self.buf) { 58 | Ok(utf8) => { 59 | self.buf = &[]; 60 | utf8 61 | } 62 | Err(e) => { 63 | let bytes = unsafe { 64 | // SAFETY: valid_up_to() always returns a value in range of self.buf. 65 | self.take(e.valid_up_to()) 66 | }; 67 | 68 | let utf8 = match cfg!(debug_assertions) { 69 | true => str::from_utf8(bytes).unwrap(), 70 | 71 | // SAFETY: the value of valid_up_to() delimits valid UTF-8, by 72 | // definition. 73 | false => unsafe { str::from_utf8_unchecked(bytes) }, 74 | }; 75 | 76 | self.invalid_prefix = match e.error_len() { 77 | Some(len) => Some(len), 78 | None => Some(self.buf.len()), 79 | }; 80 | 81 | if utf8.is_empty() { 82 | return self.next(); 83 | } 84 | 85 | utf8 86 | } 87 | }; 88 | 89 | Some(Ok(utf8)) 90 | } 91 | } 92 | 93 | /// `const`-enabled UTF-8 encoding. 94 | /// 95 | /// Returns the encoded bytes in a static array, and the number of those bytes 96 | /// that are pertinent. 97 | pub const fn encode_utf8(c: char) -> ([u8; 4], usize) { 98 | const CONT: u8 = 0b1000_0000; 99 | const CONT_MASK: u8 = !CONT >> 1; 100 | 101 | const B1: u8 = 0b0000_0000; 102 | const B1_MASK: u8 = !B1 >> 1; 103 | 104 | const B2: u8 = 0b1100_0000; 105 | const B2_MASK: u8 = !B2 >> 1; 106 | 107 | const B3: u8 = 0b1110_0000; 108 | const B3_MASK: u8 = !B3 >> 1; 109 | 110 | const B4: u8 = 0b1111_0000; 111 | const B4_MASK: u8 = !B4 >> 1; 112 | 113 | const fn sextet(c: char, idx: u32) -> u8 { 114 | ((c as u32) >> (idx * 6)) as u8 115 | } 116 | 117 | match c.len_utf8() { 118 | 1 => ([sextet(c, 0) & B1_MASK | B1, 0, 0, 0], 1), 119 | 2 => { 120 | ([sextet(c, 1) & B2_MASK | B2, sextet(c, 0) & CONT_MASK | CONT, 0, 0], 2) 121 | } 122 | 3 => ( 123 | [ 124 | sextet(c, 2) & B3_MASK | B3, 125 | sextet(c, 1) & CONT_MASK | CONT, 126 | sextet(c, 0) & CONT_MASK | CONT, 127 | 0, 128 | ], 129 | 3, 130 | ), 131 | 4 => ( 132 | [ 133 | sextet(c, 3) & B4_MASK | B4, 134 | sextet(c, 2) & CONT_MASK | CONT, 135 | sextet(c, 1) & CONT_MASK | CONT, 136 | sextet(c, 0) & CONT_MASK | CONT, 137 | ], 138 | 4, 139 | ), 140 | _ => unreachable!(), 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /gilded/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "gilded" 3 | version = "0.1.0" 4 | description = "Dead simple golden tests" 5 | 6 | edition.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | keywords.workspace = true 11 | license.workspace = true 12 | 13 | [dependencies] 14 | gilded-attr = { path = "attr" } 15 | 16 | allman = { path = "../allman" } 17 | byteyarn = { path = "../byteyarn" } 18 | 19 | camino = "1.1.9" 20 | diffy = "0.4.0" 21 | nu-glob = "0.101.0" 22 | unicode-width = "0.2.0" 23 | -------------------------------------------------------------------------------- /gilded/README.md: -------------------------------------------------------------------------------- 1 | # gilded 2 | 3 | `gilded` - Easy-peesy golden testing. 👑 4 | 5 | ## Why Golden Testing? 6 | 7 | A "golden test" is a test that transforms data in some way, and validates it 8 | by diffing it against an expected result: the "golden". 9 | 10 | This is especially useful for testing scenarios that consume an input file 11 | (say, a source code file, for testing a compiler) and generate structured, 12 | diffable textual output (such as JSON or CSV data, or even a `Debug`). 13 | 14 | Golden tests are best for cases where the output must be deterministic, and 15 | where capturing fine-grained detail is valuable. 16 | 17 | Because they simply compare the result to an expected value byte-for-byte, 18 | changes can quickly regenerate the test output by using the output of the 19 | test itself. Diffs can be examined in code review directly. 20 | 21 | This crate also provides the `doc::Doc` type, enabling quick-and-dirty 22 | construction of highly readable structured tree data for golden outputs. 23 | 24 | ## Defining a Test 25 | 26 | A `gilded` test is defined like so: 27 | 28 | ```rust 29 | #[gilded::test("testdata/**/*.txt")] 30 | fn my_test(test: &gilded::Test) { 31 | // ... 32 | } 33 | ``` 34 | 35 | `my_test` will be run as a separate unit test for every file (relative to 36 | the crate root) which matches the glob passed to the attribute. The input 37 | file's path and contents can be accessed through the `Test` accessors. 38 | 39 | To specify golden outputs, use `Test::outputs()`. This specifies the 40 | file extension for the golden, and its computed contents. The extension is 41 | used to construct the path of the result. If the input is `foo/bar.txt`, and 42 | the extension for this output is `csv`, the output will be read/written to 43 | `foo/bar.csv`. 44 | 45 | Panicking within the test body will fail the test as normal, tests should 46 | not contain output assertions; those are handled by the framework. 47 | 48 | ## Generating Goldens 49 | 50 | Once the test is created, simply set the `GILDED_REGENERATE` environment 51 | variable: `GILDED_REGENERATE=1 cargo test`. 52 | 53 | To regenerate a specific test, simply pass its name as a filter to the test. 54 | See `cargo test -- --help` for available flags.` 55 | 56 | Regenerating goldens will cause a `GILDED_CHANGED` file to be crated at the 57 | crate root, which will cause all `gilded` tests in the crate to fail until 58 | it is deleted. Deleting it forces the user to acknowledge that goldens have 59 | been regenerated, to avoid blindly committing them. 60 | 61 | ## Known Issues 62 | 63 | Golden tests can run under MIRI but have extremely large overhead. For the 64 | time being, they are `#[cfg]`'d out in MIRI mode. 65 | -------------------------------------------------------------------------------- /gilded/attr/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "gilded-attr" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | proc2decl = { path = "../../proc2decl" } 8 | 9 | [lib] 10 | path = "lib.rs" 11 | proc-macro = true -------------------------------------------------------------------------------- /gilded/attr/lib.rs: -------------------------------------------------------------------------------- 1 | //! Implementation detail of `gilded`. 2 | 3 | proc2decl::fs_bridge! { 4 | /// Turns a function into a golden test suite. 5 | /// 6 | /// See the [crate documentation][crate] for more information on how to use 7 | /// this attribute. 8 | /// 9 | /// [crate]: https://docs.rs/gilded 10 | macro #[test] => gilded::__test__; 11 | } 12 | -------------------------------------------------------------------------------- /gilded/src/doc/json.rs: -------------------------------------------------------------------------------- 1 | //! Output implementation for JSON. 2 | 3 | use std::fmt; 4 | 5 | use allman::If; 6 | use allman::Tag; 7 | use byteyarn::YarnRef; 8 | 9 | use crate::doc::Doc; 10 | use crate::doc::Elem; 11 | use crate::doc::Options; 12 | 13 | pub fn build<'t>(options: &Options, doc: &Doc<'t>, out: &mut allman::Doc<'t>) { 14 | let is_array = doc.entries.iter().all(|(k, _)| k.is_none()); 15 | if is_array { 16 | out.tag_with(Tag::Group(options.max_array_width), |out| { 17 | out 18 | .tag("[") 19 | .tag_with(Tag::Indent(options.tab_width as isize), |out| { 20 | for (i, (_, entry)) in doc.entries.iter().enumerate() { 21 | if i > 0 { 22 | out.tag(","); 23 | out.tag_if(Tag::Space, If::Flat); 24 | } 25 | out.tag_if("\n", If::Broken); 26 | value(options, entry, out); 27 | } 28 | }) 29 | .tag_if("\n", If::Broken) 30 | .tag("]"); 31 | }); 32 | } else { 33 | out.tag_with(Tag::Group(options.max_object_width), |out| { 34 | out 35 | .tag("{") 36 | .tag_with(Tag::Indent(options.tab_width as isize), |out| { 37 | for (i, (key, entry)) in doc.entries.iter().enumerate() { 38 | if i > 0 { 39 | out.tag(","); 40 | out.tag_if(Tag::Space, If::Flat); 41 | } 42 | out 43 | .tag_if("\n", If::Broken) 44 | .tag( 45 | Escape(key.as_deref().unwrap_or_default().as_bytes()) 46 | .to_string(), 47 | ) 48 | .tag(":") 49 | .tag(Tag::Space); 50 | value(options, entry, out); 51 | } 52 | }) 53 | .tag_if("\n", If::Broken) 54 | .tag("}"); 55 | }); 56 | } 57 | } 58 | 59 | fn value<'t>(options: &Options, v: &Elem<'t>, out: &mut allman::Doc<'t>) { 60 | match v { 61 | Elem::Bool(v) => { 62 | out.tag(v.to_string()); 63 | } 64 | Elem::Int(v) => { 65 | out.tag(v.to_string()); 66 | } 67 | Elem::UInt(v) => { 68 | out.tag(v.to_string()); 69 | } 70 | Elem::Fp(v) => { 71 | out.tag(v.to_string()); 72 | } 73 | Elem::String(v) => { 74 | out.tag(Escape(v).to_string()); 75 | } 76 | Elem::Doc(v) => build(options, v, out), 77 | } 78 | } 79 | 80 | /// A displayable that prints the given data as a JSON string. 81 | pub struct Escape<'a>(&'a [u8]); 82 | 83 | impl fmt::Display for Escape<'_> { 84 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 85 | write!(f, "\"")?; 86 | for chunk in YarnRef::new(self.0).utf8_chunks() { 87 | let chunk = match chunk { 88 | Ok(s) => s, 89 | Err(e) => { 90 | for b in e { 91 | write!(f, "<{b:02x}>")?; 92 | } 93 | continue; 94 | } 95 | }; 96 | 97 | for c in chunk.chars() { 98 | match c { 99 | '\n' => write!(f, "\\n")?, 100 | '\r' => write!(f, "\\r")?, 101 | '\t' => write!(f, "\\t")?, 102 | '\\' => write!(f, "\\\\")?, 103 | '\"' => write!(f, "\\\"")?, 104 | c if !c.is_control() => write!(f, "{c}")?, 105 | c => { 106 | for u in c.encode_utf16(&mut [0, 0]) { 107 | write!(f, "\\u{u:04x}")?; 108 | } 109 | } 110 | } 111 | } 112 | } 113 | 114 | write!(f, "\"") 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /gilded/src/doc/mod.rs: -------------------------------------------------------------------------------- 1 | //! Readable test output generating from tree-structured data. 2 | 3 | use std::io; 4 | use std::io::Write; 5 | 6 | use byteyarn::YarnBox; 7 | 8 | mod json; 9 | mod yaml; 10 | 11 | /// A tree-shaped document that can be pretty-printed, for generating goldens. 12 | /// 13 | /// Golden tests that output tree-shaped data can use `Doc` to generate 14 | /// diff-friendly, readable output. 15 | #[derive(Clone)] 16 | pub struct Doc<'a> { 17 | entries: Vec<(Option>, Elem<'a>)>, 18 | } 19 | 20 | /// The format output to use when rendering a document. 21 | #[derive(Clone, Copy, PartialEq, Eq, Debug)] 22 | pub enum Format { 23 | /// Output as YAML. 24 | Yaml, 25 | /// Output as JSON. 26 | Json, 27 | } 28 | 29 | impl Default for Format { 30 | fn default() -> Self { 31 | Self::Yaml 32 | } 33 | } 34 | 35 | /// Options for rendering a [`Doc`] as a string. 36 | pub struct Options { 37 | // The format to output in; defaults to YAML. 38 | pub format: Format, 39 | // The number of spaces to use for indentation. 40 | pub tab_width: usize, 41 | 42 | // The maximum number of columns to have before wrapping occurs. 43 | pub max_columns: usize, 44 | // The maximum number of columns for a one-line array. 45 | pub max_array_width: usize, 46 | // The maximum number of columns for a one-line object. 47 | pub max_object_width: usize, 48 | } 49 | 50 | impl Default for Options { 51 | fn default() -> Self { 52 | Self { 53 | format: Format::default(), 54 | tab_width: 2, 55 | max_columns: 80, 56 | max_array_width: 50, 57 | max_object_width: 40, 58 | } 59 | } 60 | } 61 | 62 | /// A type which can be an element of a [`Doc`]. 63 | /// 64 | /// All of the primitive number types and types which convert to `YarnBox<[u8]>` 65 | /// can be used as `Doc` values. `Option` for `T: DocValue` can also be 66 | /// used, and will only be inserted if it is `Some`. 67 | pub trait Value<'a> { 68 | fn append_to(self, doc: &mut Doc<'a>); 69 | } 70 | 71 | impl<'a> Doc<'a> { 72 | /// Returns a new, empty `Doc`. 73 | pub fn new() -> Self { 74 | Self { entries: Vec::new() } 75 | } 76 | 77 | /// Returns a new `Doc` with a single entry. 78 | pub fn single( 79 | name: impl Into>, 80 | value: impl Value<'a>, 81 | ) -> Self { 82 | Self::new().entry(name, value) 83 | } 84 | 85 | /// Appends a sequence of values to this document. 86 | pub fn push(mut self, elements: impl IntoIterator>) -> Self { 87 | for e in elements { 88 | e.append_to(&mut self); 89 | } 90 | self 91 | } 92 | 93 | /// Appends an entry with the given name to this document. 94 | pub fn entry( 95 | mut self, 96 | name: impl Into>, 97 | value: impl Value<'a>, 98 | ) -> Self { 99 | let prev = self.entries.len(); 100 | value.append_to(&mut self); 101 | if prev < self.entries.len() { 102 | self.entries.last_mut().unwrap().0 = Some(name.into()); 103 | } 104 | self 105 | } 106 | 107 | /// Appends an entry which is an array with the given elements. 108 | pub fn array( 109 | self, 110 | name: impl Into>, 111 | elements: impl IntoIterator>, 112 | ) -> Self { 113 | self.entry(name, Self::new().push(elements)) 114 | } 115 | 116 | // Converts this document into a string, using the given options. 117 | pub fn to_string(&self, options: &Options) -> String { 118 | let mut out = Vec::new(); 119 | let _ = self.render(&mut out, options); 120 | String::from_utf8(out).unwrap() 121 | } 122 | 123 | /// Converts this document into a string, writing it to the given output with 124 | /// the given options. 125 | pub fn render( 126 | &self, 127 | out: &mut dyn Write, 128 | options: &Options, 129 | ) -> io::Result<()> { 130 | let mut doc = allman::Doc::new(); 131 | 132 | match options.format { 133 | Format::Yaml => yaml::build( 134 | yaml::Args { options, root: true, in_list: false }, 135 | self, 136 | &mut doc, 137 | ), 138 | Format::Json => json::build(options, self, &mut doc), 139 | } 140 | 141 | doc.render(out, &allman::Options { max_columns: options.max_columns }) 142 | } 143 | } 144 | 145 | impl Default for Doc<'_> { 146 | fn default() -> Self { 147 | Self::new() 148 | } 149 | } 150 | 151 | #[derive(Clone)] 152 | enum Elem<'a> { 153 | Bool(bool), 154 | Int(i128), 155 | UInt(u128), 156 | Fp(f64), 157 | String(YarnBox<'a>), 158 | Doc(Doc<'a>), 159 | } 160 | 161 | impl<'a, T: Value<'a>> Value<'a> for Option { 162 | fn append_to(self, doc: &mut Doc<'a>) { 163 | if let Some(v) = self { 164 | v.append_to(doc) 165 | } 166 | } 167 | } 168 | impl<'a> Value<'a> for Doc<'a> { 169 | fn append_to(self, doc: &mut Doc<'a>) { 170 | doc.entries.push((None, Elem::Doc(self))) 171 | } 172 | } 173 | 174 | macro_rules! impl_from { 175 | ($({$($T:ty),*} => $V:ident,)*) => {$($( 176 | impl<'a> Value<'a> for $T { 177 | fn append_to(self, doc: &mut Doc<'a>) { 178 | doc.entries.push((None, Elem::$V(self as _))) 179 | } 180 | } 181 | )*)*} 182 | } 183 | 184 | impl_from! { 185 | {bool} => Bool, 186 | {i8, i16, i32, i64, i128, isize} => Int, 187 | {u8, u16, u32, u64, u128, usize} => UInt, 188 | {f32, f64} => Fp, 189 | } 190 | 191 | macro_rules! impl_from_yarn { 192 | ($(for<$lt:lifetime> $($T:ty),* => $U:ty,)*) => {$($( 193 | impl<$lt> Value<$lt> for $T { 194 | fn append_to(self, doc: &mut Doc<$lt>) { 195 | doc.entries.push((None, Elem::String(<$U>::from(self).into_bytes()))) 196 | } 197 | } 198 | )*)*} 199 | } 200 | 201 | impl_from_yarn! { 202 | for<'a> &'a [u8], Vec, YarnBox<'a, [u8]> => YarnBox<'a, [u8]>, 203 | for<'a> char, &'a str, String, YarnBox<'a, str> => YarnBox<'a, str>, 204 | } 205 | -------------------------------------------------------------------------------- /gilded/src/doc/yaml.rs: -------------------------------------------------------------------------------- 1 | //! Output implementation for YAML. 2 | 3 | use std::fmt; 4 | 5 | use allman::If; 6 | use allman::Tag; 7 | use byteyarn::YarnRef; 8 | 9 | use crate::doc::Doc; 10 | use crate::doc::Elem; 11 | use crate::doc::Options; 12 | 13 | pub struct Args<'a> { 14 | pub root: bool, 15 | pub in_list: bool, 16 | pub options: &'a Options, 17 | } 18 | 19 | pub fn build<'t>(args: Args, doc: &'t Doc<'t>, out: &mut allman::Doc<'t>) { 20 | let is_array = doc.entries.iter().all(|(k, _)| k.is_none()); 21 | if is_array { 22 | out.tag_with(Tag::Group(args.options.max_array_width), |out| { 23 | out.tag_if("[", If::Flat); 24 | if !args.root { 25 | out.tag_if(Tag::Break(1), If::Broken); 26 | } 27 | for (i, (_, entry)) in doc.entries.iter().enumerate() { 28 | if i > 0 { 29 | out.tag_if(",", If::Flat); 30 | out.tag_if(Tag::Space, If::Flat); 31 | } 32 | 33 | out.tag_if("-", If::Broken); 34 | out.tag_if(Tag::Space, If::Broken); 35 | //out.tag_with(Tag::Indent(args.options.tab_width as isize), |out| { 36 | value(Args { root: false, in_list: true, ..args }, entry, out); 37 | //}); 38 | 39 | out.tag_if(Tag::Break(1), If::Broken); 40 | } 41 | out.tag_if("]", If::Flat); 42 | }); 43 | } else { 44 | out.tag_with(Tag::Group(args.options.max_object_width), |out| { 45 | let in_map = !args.root && !args.in_list; 46 | if in_map { 47 | out.tag_if(Tag::Break(1), If::Broken); 48 | } 49 | out 50 | .tag_if("{", If::Flat) 51 | .tag_with(Tag::Indent(args.options.tab_width as isize), |out| { 52 | for (i, (key, entry)) in doc.entries.iter().enumerate() { 53 | if i > 0 { 54 | out.tag_if(",", If::Flat); 55 | out.tag_if(Tag::Space, If::Flat); 56 | } 57 | 58 | let key_bytes = key.as_deref().unwrap_or_default().as_bytes(); 59 | let ident = is_ident(key_bytes); 60 | 61 | if let Some(ident) = ident { 62 | out.tag(ident.to_box()); 63 | 64 | let mut entry = entry; 65 | while let Elem::Doc(d) = entry { 66 | let [(Some(k), v)] = d.entries.as_slice() else { break }; 67 | let Some(ident) = is_ident(k.as_bytes()) else { break }; 68 | 69 | out.tag(".").tag(ident.to_box()); 70 | entry = v; 71 | } 72 | } else { 73 | out.tag(Escape(key_bytes).to_string()); 74 | } 75 | out.tag(":").tag(Tag::Space); 76 | 77 | value(Args { root: false, in_list: false, ..args }, entry, out); 78 | out.tag_if(Tag::Break(1), If::Broken); 79 | } 80 | }) 81 | .tag_if("}", If::Flat); 82 | }); 83 | } 84 | } 85 | 86 | fn value<'t>(args: Args, v: &'t Elem<'t>, out: &mut allman::Doc<'t>) { 87 | match v { 88 | Elem::Bool(v) => { 89 | out.tag(v.to_string()); 90 | } 91 | Elem::Int(v) => { 92 | out.tag(v.to_string()); 93 | } 94 | Elem::UInt(v) => { 95 | out.tag(v.to_string()); 96 | } 97 | Elem::Fp(v) => { 98 | out.tag(v.to_string()); 99 | } 100 | Elem::String(v) => { 101 | if is_raw_string(v.as_ref()) { 102 | out.tag("|").tag(Tag::Break(1)).tag_with( 103 | Tag::Indent(args.options.tab_width as isize), 104 | |out| { 105 | out.tag(v.as_ref().to_utf8().unwrap().to_box()); 106 | }, 107 | ); 108 | return; 109 | } 110 | out.tag(Escape(v).to_string()); 111 | } 112 | Elem::Doc(v) => build(args, v, out), 113 | } 114 | } 115 | 116 | /// A displayable that prints the given data as a JSON string. 117 | pub struct Escape<'a>(&'a [u8]); 118 | 119 | impl fmt::Display for Escape<'_> { 120 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 121 | write!(f, "\"")?; 122 | for chunk in YarnRef::new(self.0).utf8_chunks() { 123 | let chunk = match chunk { 124 | Ok(s) => s, 125 | Err(e) => { 126 | for b in e { 127 | write!(f, "\\x{b:02x}")?; 128 | } 129 | continue; 130 | } 131 | }; 132 | 133 | for c in chunk.chars() { 134 | match c { 135 | '\0' => write!(f, "\\0")?, 136 | '\n' => write!(f, "\\n")?, 137 | '\r' => write!(f, "\\r")?, 138 | '\t' => write!(f, "\\t")?, 139 | '\\' => write!(f, "\\\\")?, 140 | '\"' => write!(f, "\\\"")?, 141 | c if !c.is_control() => write!(f, "{c}")?, 142 | c => { 143 | for u in c.encode_utf16(&mut [0, 0]) { 144 | write!(f, "\\u{u:04x}")?; 145 | } 146 | } 147 | } 148 | } 149 | } 150 | 151 | write!(f, "\"") 152 | } 153 | } 154 | 155 | fn is_raw_string(data: YarnRef<[u8]>) -> bool { 156 | data.to_utf8().is_ok_and(|s| { 157 | s.contains("\n") && s.chars().all(|c| c == '\n' || !c.is_control()) 158 | }) 159 | } 160 | 161 | fn is_ident(data: &[u8]) -> Option> { 162 | fn is_start(c: char) -> bool { 163 | c.is_alphabetic() || c == '_' || c == '-' 164 | } 165 | fn is_continue(c: char) -> bool { 166 | is_start(c) || c.is_numeric() 167 | } 168 | 169 | let s = YarnRef::from(data).to_utf8().ok()?; 170 | 171 | let mut chars = s.chars(); 172 | let is_ident = chars.next().is_some_and(is_start) && chars.all(is_continue); 173 | is_ident.then_some(s) 174 | } 175 | -------------------------------------------------------------------------------- /ilex/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ilex" 3 | version = "0.6.0" 4 | description = "quick and easy lexers for C-like languages" 5 | 6 | edition.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | keywords.workspace = true 11 | license.workspace = true 12 | 13 | [dependencies] 14 | byteyarn = { version = "0.5", path = "../byteyarn" } 15 | gilded = { path = "../gilded" } 16 | twie = { version = "0.5", path = "../twie" } 17 | 18 | ilex-attr = { version = "0.5.0", path = "attr" } 19 | 20 | annotate-snippets = "0.10.0" 21 | camino = "1.1.6" 22 | num-traits = "0.2.17" 23 | regex-syntax = "0.8.2" 24 | regex-automata = "0.4.3" # Bless Andrew for his patience. 25 | rustc_apfloat = "0.2.0" # By eddyb's recommendation. 26 | unicode-xid = "0.2.4" 27 | bitvec = "1.0.1" 28 | -------------------------------------------------------------------------------- /ilex/README.md: -------------------------------------------------------------------------------- 1 | # ilex 2 | 3 | `ilex` - painless lexing for C-like languages. ⛩️🎋 4 | 5 | This crate provides a general lexer for a "C-like language", also sometimes 6 | called a "curly brace language". It is highly configurable and has comprehensive 7 | [`Span`] support. This library is based off of a specific parser stack I have 8 | copied from project to project and re-written verbatim many times over in my 9 | career. 10 | 11 | Internally it uses lazy DFAs from [`regex_automata`] for much of the 12 | heavy-lifting, so it should be reasonably performant, although speed is not a 13 | priority. 14 | 15 | The goals of this library are as follows. 16 | 17 | - **Predictably greedy.** Always parse the longest token at any particular 18 | position, with user-defined disambiguation between same-length tokens. 19 | 20 | - **Easy to set up.** Writing lexers is a bunch of pain, and they all look the 21 | same more-or-less, and you want to be "in and out". 22 | 23 | - **Flexible.** It can lex a reasonably large number of grammars. It should be 24 | able to do any language with a cursory resemblance to C, such as Rust, 25 | JavaScript (and JSON), LLVM IR, Go, Protobuf, Perl, and so on. 26 | 27 | - Some exotic lexemes are not supported. This includes Python and YAML 28 | significant whitespace, user-defined operators that mess with the lexer like 29 | in Haskell, and ALGOL-style `end` when there isn't a clear pair of tokens to 30 | lex as a pair of open/close delimiters (Ruby has this problem). 31 | 32 | - **Unicode support.** This means that e.g. `エルフーン` is an identifier by 33 | default. ASCII-only filters exist for backwards compatibility with old stuff. 34 | `ilex` will only support UTF-8-encoded input files, and always uses the 35 | Unicode definition of whitespace for delimiting tokens, not just ASCII 36 | whitespace (`" \t\n\t"`). 37 | 38 | - **Diagnostics and spans.** The lexer should be able to generate pretty good 39 | diagnostics, and this API is exposed for tools built on top of the lexer to 40 | emit diagnostics. Spans are interned automatically. 41 | 42 | - Custom error recovery is hard, so I don't plan to support that. 43 | 44 | - **Token trees.** Token trees are a far better abstraction than token streams, 45 | because many LR(k) curly-brace languages become regular or close to regular if 46 | you decide that every pair of braces or parentheses with unknown contents is 47 | inside 48 | 49 | This library also provides basic software float support. You should _never_ 50 | convert user-provided text into hardware floats if you care about byte-for-byte 51 | portability. This library helps with that. 52 | 53 | ### Stability Ground Rules 54 | 55 | I have tried to define exactly how rules map onto the internal finite automata, 56 | but breaking changes happen! I will try not to break things across patch 57 | releases, but I can't promise perfect stability across even minor releases. 58 | 59 | Write good tests for your frontend and don't expose your `ilex` guts if you can. 60 | This will make it easier for you to just pin a version and avoid thinking about 61 | this problem. 62 | 63 | Diagnostics are completely unstable. Don't try to parse them, don't write golden 64 | tests against them. If you must, use [`testing::check_report()`] so that you can 65 | regenerate them. 66 | -------------------------------------------------------------------------------- /ilex/attr/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ilex-attr" 3 | version = "0.5.0" 4 | description = "attributes for the ilex crate" 5 | 6 | edition.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | keywords.workspace = true 11 | license.workspace = true 12 | 13 | [lib] 14 | path = "lib.rs" 15 | proc-macro = true 16 | 17 | [dependencies] 18 | proc2decl = { path = "../../proc2decl" } -------------------------------------------------------------------------------- /ilex/attr/lib.rs: -------------------------------------------------------------------------------- 1 | //! Implementation detail of `ilex`. 2 | 3 | use proc_macro::TokenStream; 4 | 5 | // This helper exists only to make the #[spec] field attributes inert. 6 | #[doc(hidden)] 7 | #[proc_macro_derive(derive_hack, attributes(named, rule))] 8 | pub fn derive(_: TokenStream) -> TokenStream { 9 | TokenStream::new() 10 | } 11 | 12 | proc2decl::bridge! { 13 | /// Generates a lexer spec struct. 14 | /// 15 | /// This macro generates the type of struct described in the 16 | /// [crate documentation][crate]. The syntax is as follows. 17 | /// 18 | /// ```ignore 19 | /// use ilex::rule::Keyword; 20 | /// use ilex::Lexeme; 21 | /// 22 | /// /// My cool spec. 23 | /// #[ilex::spec] 24 | /// struct MySpec { 25 | /// #[named("...")] 26 | /// #[rule(/* ... */)] 27 | /// dollar: Lexeme = "$", 28 | /// } 29 | /// ``` 30 | /// 31 | /// The type of each field must be a [`Lexeme`] with a [`Rule`] type as its 32 | /// parameter. There are two special attributes that can follow. 33 | /// 34 | /// - `#[named]` makes the rule into a *named* rule. This name can be used by 35 | /// diagnostics, and corresponds to calling `Spec::named_rule()`. 36 | /// 37 | /// - `#[rule]` is the value to use to construct the rule, which must be 38 | /// `Into`, where `R` is the type inside `Lexeme` (so, above, the rule 39 | /// value must be `Into`). By default, this value is the name of the 40 | /// rule, to make the common case of declaring a keyword as simple as writing 41 | /// `nullptr: Lexeme`, assuming Rust itself doesn't already use that 42 | /// keyword. 43 | /// 44 | /// Note that *order matters* for the fields: when breaking a tie between two 45 | /// potential tokens of the same length, the first one in the struct will win. 46 | /// In practice, this means you should put keywords before identifiers. 47 | /// 48 | /// Additionally, the following functions will be defined for the `MySpec` type. 49 | /// 50 | /// ``` 51 | /// # struct Spec; 52 | /// # struct MySpec; 53 | /// # fn norun(_: i32) { 54 | /// impl MySpec { 55 | /// /// Gets the global instance of this spec. 56 | /// pub fn get() -> &'static Self { 57 | /// // ... 58 | /// # todo!() 59 | /// } 60 | /// 61 | /// /// Gets the actual compiled spec. 62 | /// pub fn spec(&self) -> &Spec { 63 | /// // ... 64 | /// # todo!() 65 | /// } 66 | /// } 67 | /// # } 68 | /// ``` 69 | /// 70 | // God cross-trait links suck. 71 | /// [`Lexeme`]: https://docs.rs/ilex/latest/ilex/struct.Lexeme.html 72 | /// [`Rule`]: https://docs.rs/ilex/latest/ilex/rule/trait.Rule.html 73 | /// [crate]: https://docs.rs/ilex 74 | macro #[spec] => ilex::__spec__; 75 | } 76 | -------------------------------------------------------------------------------- /ilex/src/file/context.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | use std::sync::Arc; 3 | use std::sync::RwLock; 4 | 5 | use camino::Utf8Path; 6 | 7 | use crate::f; 8 | use crate::file::File; 9 | use crate::file::CTX_FOR_SPAN_DEBUG; 10 | use crate::report; 11 | use crate::report::Fatal; 12 | use crate::report::Report; 13 | 14 | #[cfg(doc)] 15 | use crate::Span; 16 | 17 | /// A source context, which owns source code files. 18 | /// 19 | /// A `Context` contains the full text of all the loaded source files, which 20 | /// [`Span`]s ultimately refer to. 21 | #[derive(Default)] 22 | pub struct Context { 23 | state: Arc>, 24 | } 25 | 26 | #[derive(Default)] 27 | pub struct State { 28 | // Each file is laid out as the length of the text, followed by the text data, 29 | // followed by the path. 30 | // 31 | // TODO(mcyoung): Be smarter about this and use something something concurrent 32 | // vector? We don't need to have all this stuff behind a lock I think. 33 | files: Vec<(usize, String)>, 34 | } 35 | 36 | unsafe impl Send for Context {} 37 | unsafe impl Sync for Context {} 38 | 39 | impl Context { 40 | /// Creates a new source context. 41 | pub fn new() -> Self { 42 | Self::default() 43 | } 44 | 45 | pub(crate) fn copy(&self) -> Context { 46 | Self { state: self.state.clone() } 47 | } 48 | 49 | /// Sets this thread to use this [`Context`] in `fmt::Debug`. 50 | /// 51 | /// By default, `dbg!(some_span)` produces a string like `""`, since 52 | /// spans do not know what context they came from. This function sets a thread 53 | /// local that `` looks at when printing; this is useful 54 | /// for when dumping e.g. an AST when debugging. 55 | /// 56 | /// Returns an RAII type that undoes the effects of this function when leaving 57 | /// scope, so that if the caller also called this function, it doesn't get 58 | /// clobbered. 59 | #[must_use = "Context::use_for_debugging_spans() returns an RAII object"] 60 | pub fn use_for_debugging_spans(&self) -> impl Drop { 61 | struct Replacer(Option); 62 | impl Drop for Replacer { 63 | fn drop(&mut self) { 64 | CTX_FOR_SPAN_DEBUG.with(|v| *v.borrow_mut() = self.0.take()) 65 | } 66 | } 67 | 68 | Replacer(CTX_FOR_SPAN_DEBUG.with(|v| v.replace(Some(self.copy())))) 69 | } 70 | 71 | /// Creates a new [`Report`] based on this context. 72 | pub fn new_report(&self) -> Report { 73 | Report::new(self, Default::default()) 74 | } 75 | 76 | /// Creates a new [`Report`] based on this context, with the specified 77 | /// options. 78 | pub fn new_report_with(&self, options: report::Options) -> Report { 79 | Report::new(self, options) 80 | } 81 | 82 | /// Adds a new file to this source context. 83 | pub fn new_file<'a>( 84 | &self, 85 | path: impl Into<&'a Utf8Path>, 86 | text: impl Into, 87 | ) -> File { 88 | let mut text = text.into(); 89 | text.push(' '); // This space only exists to be somewhere for an EOF span 90 | // to point to in diagnostics; user code will never see 91 | // it. 92 | let len = text.len(); 93 | text.push_str(path.into().as_str()); 94 | 95 | let idx = { 96 | let mut state = self.state.write().unwrap(); 97 | state.files.push((len, text)); 98 | state.files.len() - 1 99 | }; 100 | 101 | self.file(idx).unwrap() 102 | } 103 | 104 | /// Adds a new file to this source context, validating that it is valid 105 | /// UTF-8. 106 | pub fn new_file_from_bytes<'a>( 107 | &self, 108 | path: impl Into<&'a Utf8Path>, 109 | text: impl Into>, 110 | report: &Report, 111 | ) -> Result { 112 | let path = path.into(); 113 | let text = String::from_utf8(text.into()).map_err(|e| { 114 | let n = e.utf8_error().valid_up_to(); 115 | let b = e.as_bytes()[n]; 116 | 117 | report 118 | .error(f!("input file `{path}` was not valid UTF-8")) 119 | .note(f!("encountered non-UTF-8 byte {b:#02x} at offset {n}")); 120 | report.fatal().unwrap() 121 | })?; 122 | 123 | Ok(self.new_file(path, text)) 124 | } 125 | 126 | /// Adds a new file to this source context by opening `name` and reading it 127 | /// from the file system. 128 | pub fn open_file<'a>( 129 | &self, 130 | path: impl Into<&'a Utf8Path>, 131 | report: &Report, 132 | ) -> Result { 133 | let path = path.into(); 134 | 135 | let bytes = match fs::read(path) { 136 | Ok(bytes) => bytes, 137 | Err(e) => { 138 | report.error(f!("could not open input file `{path}`: {e}")); 139 | return report.fatal(); 140 | } 141 | }; 142 | 143 | self.new_file_from_bytes(path, bytes, report) 144 | } 145 | 146 | /// Gets the `idx`th file in this source context. 147 | pub fn file(&self, idx: usize) -> Option { 148 | let state = self.state.read().unwrap(); 149 | let (len, text) = state.files.get(idx)?; 150 | let text = unsafe { 151 | // SAFETY: The pointer to the file's text is immutable and pointer-stable, 152 | // so we can safely extend its lifetime here. 153 | &*(text.as_str() as *const str) 154 | }; 155 | 156 | Some(File { len: *len, text, ctx: self, idx }) 157 | } 158 | 159 | /// Gets the number of files currently tracked by this source context. 160 | pub fn file_count(&self) -> usize { 161 | self.state.read().unwrap().files.len() 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /ilex/src/ice.rs: -------------------------------------------------------------------------------- 1 | //! Helpers for working with internal compiler errors (ICEs). 2 | //! 3 | //! This module provides types and other things to make sure you can provide 4 | //! useful crash reports for your users. 5 | 6 | use std::backtrace::Backtrace; 7 | use std::backtrace::BacktraceStatus; 8 | use std::io; 9 | use std::panic; 10 | use std::panic::AssertUnwindSafe; 11 | use std::panic::PanicHookInfo; 12 | use std::panic::UnwindSafe; 13 | use std::sync::Mutex; 14 | use std::thread; 15 | 16 | use crate::f; 17 | use crate::file::Context; 18 | use crate::report::Report; 19 | 20 | /// Executes a "compiler main function". 21 | /// 22 | /// This function takes care of setting up a panic hook for us that will catch 23 | /// any [`Ice`]s for us. 24 | /// 25 | /// Generally, the way using this function would look is something like this: 26 | /// 27 | // Delete "should_panic" to see what the ICE errors look like. 28 | /// ```should_panic 29 | /// use ilex::ice; 30 | /// use ilex::report; 31 | /// 32 | /// fn compile(ctx: &mut ilex::Context) -> Result<(), report::Fatal> { 33 | /// panic!("its not done yet, im too busy writing a lexer library !!! 😡") 34 | /// } 35 | /// 36 | /// fn main() { 37 | /// let mut ctx = ilex::Context::new(); 38 | /// let report = ctx.new_report(); 39 | /// # let report = ctx.new_report_with(report::Options { 40 | /// # color: true, 41 | /// # show_report_locations: false, 42 | /// # }); 43 | /// 44 | /// let opts = ice::Options { 45 | /// what_panicked: Some("my test".to_string()), 46 | /// report_bugs_at: Some("https://github.com/mcy/strings/issues".into()), 47 | /// extra_notes: vec![format!("ilex {}", env!("CARGO_PKG_VERSION"))], 48 | /// ..ice::Options::default() 49 | /// }; 50 | /// 51 | /// let result = ice::handle(&mut ctx, &report, opts, |ctx| { 52 | /// // Business logic that may panic. 53 | /// compile(ctx) 54 | /// }); 55 | /// 56 | /// if let Err(fatal) = result { 57 | /// fatal.terminate(); 58 | /// } 59 | /// } 60 | /// ``` 61 | #[allow(clippy::needless_doctest_main)] 62 | pub fn handle( 63 | ctx: &mut Context, 64 | report: &Report, 65 | options: Options, 66 | callback: Cb, 67 | ) -> R 68 | where 69 | Cb: FnOnce(&mut Context) -> R, 70 | Cb: UnwindSafe, 71 | { 72 | static ICE: Mutex> = Mutex::new(None); 73 | 74 | let options2 = options.clone(); 75 | panic::set_hook(Box::new(move |panic| { 76 | // We currently generate ICEs from any thread. It may be useful to mark 77 | // threads that will catch their panics? Unclear. 78 | 79 | // Generate an ICE and save it for later, if this panic actually makes it 80 | // out to the main function. 81 | *ICE.lock().unwrap() = Some(Ice::generate(panic, options2.clone())); 82 | })); 83 | 84 | panic::catch_unwind(AssertUnwindSafe(|| callback(ctx))).unwrap_or_else(|e| { 85 | let ice = ICE 86 | .lock() 87 | .unwrap() 88 | .take() 89 | .unwrap_or_else(|| Ice::with_no_context(options)); 90 | ice.report(report); 91 | // We have to do this here, and not in, say, the panic hook, because we want 92 | // the report to be silently dropped. 93 | let _ignored = report.write_out(io::stderr()); 94 | panic::resume_unwind(e) 95 | }) 96 | } 97 | 98 | /// An internal compiler error (ICE), captured from a panic handler. 99 | /// 100 | /// This is a separate type that can be ferried around between locations, 101 | /// because the panic hook executes *before* unwinding, but you may not want to 102 | /// print that as a diagnostic unless that panic bubbles all the way up to your 103 | /// main function. 104 | #[derive(Default)] 105 | pub struct Ice { 106 | what: Option, 107 | where_: Option<(String, Option)>, 108 | why: Option, 109 | options: Options, 110 | } 111 | 112 | /// Options for generating an ICE. 113 | #[derive(Default, Clone)] 114 | pub struct Options { 115 | /// Whether to show a backtrace. By default, uses the same rules as normal 116 | /// Rust (i.e. `RUST_BACKTRACE`). You may want to override it with something 117 | /// more in-style for your project. 118 | pub show_backtrace: Option, 119 | 120 | /// Configures what "unexpectedly panicked" in the output. Defaults to 121 | /// something generic like "the compiler". 122 | pub what_panicked: Option, 123 | 124 | /// Configures a link to show users after the "unexpectedly panicked" message. 125 | /// This should probably look like `https://github.com/me/my-project/issues`. 126 | pub report_bugs_at: Option, 127 | 128 | /// A static list of notes to append to an error before the backtrace. 129 | /// For example, rustc's ICE handler shows a GitHub link for filing issues, 130 | /// the version, git commit, and date the compiler was built at, and some 131 | /// subset of the flags of the compiler. 132 | pub extra_notes: Vec, 133 | } 134 | 135 | impl Ice { 136 | /// Generates an ICE with no context. Useful for when you caught a panic but 137 | /// didn't stow an ICE as expected. 138 | pub fn with_no_context(options: Options) -> Self { 139 | Self { 140 | what: None, 141 | where_: None, 142 | why: None, 143 | options, 144 | } 145 | } 146 | 147 | /// Generates an ICE from a panic message. 148 | /// 149 | /// The results are "best effort". The Rust backtrace API is incomplete, so we 150 | /// make do with some... cleverness around parsing the backtrace itself. 151 | pub fn generate(panic: &PanicHookInfo, options: Options) -> Self { 152 | let msg = panic.payload(); 153 | let msg = Option::or( 154 | msg.downcast_ref::<&str>().copied().map(str::to_string), 155 | msg.downcast_ref::().cloned(), 156 | ); 157 | 158 | let thread = thread::current(); 159 | let thread_name = match thread.name() { 160 | Some(name) => name.into(), 161 | _ => format!("{:?}", thread.id()), 162 | }; 163 | let location = panic.location().map(ToString::to_string); 164 | 165 | let backtrace = if options.show_backtrace.is_none() { 166 | Some(Backtrace::capture()) 167 | .filter(|bt| bt.status() == BacktraceStatus::Captured) 168 | } else if options.show_backtrace == Some(true) { 169 | Some(Backtrace::force_capture()) 170 | } else { 171 | None 172 | }; 173 | 174 | Self { 175 | what: msg, 176 | where_: Some((thread_name, location)), 177 | why: backtrace, 178 | options, 179 | } 180 | } 181 | 182 | /// Dumps this ICE into a report. 183 | pub fn report(self, report: &Report) { 184 | report.error(f!( 185 | "internal compiler error: {}", 186 | self.what.as_deref().unwrap_or("unknown panic") 187 | )); 188 | 189 | report.note(f!( 190 | "{} unexpectedly panicked. this is a bug", 191 | self 192 | .options 193 | .what_panicked 194 | .as_deref() 195 | .unwrap_or("the compiler"), 196 | )); 197 | 198 | if let Some(at) = self.options.report_bugs_at { 199 | report.note(f!("please file a bug at: {at}")); 200 | } 201 | 202 | for note in self.options.extra_notes { 203 | report.note(f!("{note}")); 204 | } 205 | 206 | if let Some(bt) = self.why { 207 | match self.where_ { 208 | Some((thread, Some(loc))) => { 209 | report.note(f!("thread \"{thread}\" panicked at {loc}\n{bt}")) 210 | } 211 | Some((thread, _)) => { 212 | report.note(f!("thread \"{thread}\" panicked\n{bt}")) 213 | } 214 | None => report.note(f!("backtrace:\n{bt}")), 215 | }; 216 | } 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /ilex/src/report/diagnostic.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::mem; 3 | use std::panic; 4 | 5 | use crate::file; 6 | use crate::file::Spanned; 7 | use crate::report::Report; 8 | 9 | /// A diagnostic that is being built up. 10 | /// 11 | /// [`Diagnostic`]s are not committed to the report that owns them until they 12 | /// are dropped. In general, this is not a problem because diagnostics are 13 | /// almost always temporaries, e.g. 14 | /// 15 | /// ``` 16 | /// # fn x(report: &ilex::Report, span: ilex::Span) { 17 | /// report.error("my error message") 18 | /// .saying(span, "this is bad code"); 19 | /// # } 20 | /// ``` 21 | /// 22 | /// However, holding a diagnostic in a variable will delay it until the end of 23 | /// the scope, or until [`Diagnostic::commit()`] is called. Once a diagnostic 24 | /// is added to a report, it cannot be modified. 25 | /// 26 | /// See e.g. [`Report::error()`]. 27 | pub struct Diagnostic { 28 | pub(super) report: Report, 29 | pub(super) info: Info, 30 | pub(super) speculative: bool, 31 | } 32 | 33 | pub use annotate_snippets::AnnotationType as Kind; 34 | 35 | pub struct Info { 36 | pub kind: Kind, 37 | pub message: String, 38 | pub snippets: Vec>, 39 | pub notes: Vec<(String, Kind)>, 40 | pub reported_at: Option<&'static panic::Location<'static>>, 41 | } 42 | 43 | impl Diagnostic { 44 | pub(super) fn new(report: Report, kind: Kind, message: String) -> Self { 45 | Diagnostic { 46 | report, 47 | speculative: false, 48 | info: Info { 49 | message, 50 | kind, 51 | snippets: Vec::new(), 52 | notes: Vec::new(), 53 | reported_at: None, 54 | }, 55 | } 56 | } 57 | 58 | /// Marks this diagnostic as "speculative", meaning that it will not be 59 | /// applied until [`Diagnostic::commit()`] is called. 60 | pub fn speculate(mut self) -> Self { 61 | self.speculative = true; 62 | self 63 | } 64 | 65 | /// Commits this diagnostic to its report, even if it was marked as 66 | /// speculative. 67 | pub fn commit(mut self) { 68 | self.speculative = false; 69 | drop(self); 70 | } 71 | 72 | /// Adds a new relevant snippet at the given location. 73 | pub fn at<'s>(self, span: impl Spanned<'s>) -> Self { 74 | self.saying(span, "") 75 | } 76 | 77 | /// Adds a new diagnostic location, with the given message attached to it. 78 | pub fn saying<'s>( 79 | self, 80 | span: impl Spanned<'s>, 81 | message: impl fmt::Display, 82 | ) -> Self { 83 | self.snippet(span, message, None) 84 | } 85 | 86 | /// Like `saying`, but the underline is as for a "note" rather than the 87 | /// overall diagnostic. 88 | pub fn remark<'s>( 89 | self, 90 | span: impl Spanned<'s>, 91 | message: impl fmt::Display, 92 | ) -> Self { 93 | self.snippet(span, message, Some(Kind::Help)) 94 | } 95 | 96 | fn snippet<'s>( 97 | mut self, 98 | span: impl Spanned<'s>, 99 | message: impl fmt::Display, 100 | kind: Option, 101 | ) -> Self { 102 | if self.info.snippets.is_empty() { 103 | self.info.snippets = vec![vec![]]; 104 | } 105 | 106 | self.info.snippets.last_mut().unwrap().push(( 107 | span.span().span3(), 108 | message.to_string(), 109 | kind.unwrap_or(self.info.kind), 110 | )); 111 | self 112 | } 113 | 114 | /// Starts a new snippet, even if the next range is in the same file. 115 | pub fn new_snippet(mut self) -> Self { 116 | self.info.snippets.push(Vec::new()); 117 | self 118 | } 119 | 120 | /// Appends a note to the bottom of the diagnostic. 121 | pub fn note(mut self, message: impl fmt::Display) -> Self { 122 | // HACK: annotate-snippets really likes to convert __ into bold, like 123 | // Markdown, which is a problem for display correctness. We work around this 124 | // by inserting a zero-width space between every two underscores. 125 | let mut note = message.to_string(); 126 | note = note.replace("__", "_\u{200b}_"); 127 | 128 | self.info.notes.push((note, Kind::Note)); 129 | self 130 | } 131 | 132 | /// Appends a help tip to the bottom of the diagnostic. 133 | pub fn help(mut self, message: impl fmt::Display) -> Self { 134 | // HACK: annotate-snippets really likes to convert __ into bold, like 135 | // Markdown, which is a problem for display correctness. We work around this 136 | // by inserting a zero-width space between every two underscores. 137 | let mut note = message.to_string(); 138 | note = note.replace("__", "_\u{200b}_"); 139 | 140 | self.info.notes.push((note, Kind::Help)); 141 | self 142 | } 143 | 144 | /// Updates the "reported at" information for this diagnostic. 145 | /// 146 | /// This information is only intended to be used for tool developers to 147 | /// debug where diagnostics are being emitted. 148 | pub fn reported_at(mut self, at: &'static panic::Location<'static>) -> Self { 149 | if self.report.state.opts.show_report_locations { 150 | self.info.reported_at = Some(at) 151 | } 152 | self 153 | } 154 | } 155 | 156 | impl Drop for Diagnostic { 157 | fn drop(&mut self) { 158 | if !self.speculative { 159 | self.report.state.insert_diagnostic(mem::replace( 160 | &mut self.info, 161 | Info { 162 | message: "".to_string(), 163 | kind: Kind::Error, 164 | snippets: Vec::new(), 165 | notes: Vec::new(), 166 | reported_at: None, 167 | }, 168 | )); 169 | } 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /ilex/src/report/mod.rs: -------------------------------------------------------------------------------- 1 | //! Diagnostics and error reports. 2 | //! 3 | //! This module contains types for generating an *error report*: a collection of 4 | //! diagnostics that describe why an operation failed in detail. Diagnostics 5 | //! are basically fancy compiler errors: they use [`Span`]s to present faulty 6 | //! input in context. 7 | //! 8 | //! The [`Report`] type is a reference-counted list of diagnostics, which is 9 | //! typically passed by reference into functions, but can be copied to simplify 10 | //! lifetimes, since it's reference-counted. 11 | 12 | use std::fmt; 13 | use std::io; 14 | use std::panic; 15 | use std::panic::Location; 16 | use std::process; 17 | use std::sync::Arc; 18 | 19 | use crate::file::Context; 20 | use crate::spec::Spec; 21 | 22 | mod builtin; 23 | mod diagnostic; 24 | mod render; 25 | 26 | pub use builtin::Builtins; 27 | pub use builtin::Expected; 28 | pub use diagnostic::Diagnostic; 29 | use diagnostic::Kind; 30 | 31 | #[cfg(doc)] 32 | use crate::Span; 33 | 34 | /// A collection of errors can may built up over the course of an operation. 35 | /// 36 | /// To construct a report, see [`Context::new_report()`]. The context that 37 | /// constructs a report is the only one whose [`Span`]s should be passed into 38 | /// it; doing otherwise will result in unspecified output (or probably a panic). 39 | pub struct Report { 40 | ctx: Context, 41 | state: Arc, 42 | } 43 | 44 | /// Options for a [`Report`]. 45 | pub struct Options { 46 | /// Whether to color the output when rendered. 47 | pub color: bool, 48 | /// Whether to add a note to each diagnostic showing where in the source 49 | /// code it was reported. `ilex` makes a best-case effort to ensure this 50 | /// location is in *your* code. 51 | pub show_report_locations: bool, 52 | } 53 | 54 | impl Default for Options { 55 | fn default() -> Self { 56 | Self { 57 | color: true, 58 | show_report_locations: cfg!(debug_assertions), 59 | } 60 | } 61 | } 62 | 63 | impl Report { 64 | pub(crate) fn copy(&self) -> Report { 65 | Self { 66 | ctx: self.ctx.copy(), 67 | state: self.state.clone(), 68 | } 69 | } 70 | 71 | /// Returns a wrapper for accessing commonly-used, built-in message types. 72 | /// 73 | /// See [`Builtins`]. 74 | pub fn builtins<'a>(&'a self, spec: &'a Spec) -> Builtins<'a> { 75 | Builtins { report: self, spec } 76 | } 77 | 78 | /// Adds a new error to this report. 79 | /// 80 | /// The returned [`Diagnostic`] object can be used to add spans, notes, and 81 | /// remarks, to generate a richer diagnostic. 82 | #[track_caller] 83 | pub fn error(&self, message: impl fmt::Display) -> Diagnostic { 84 | self.new_diagnostic(Kind::Error, message.to_string()) 85 | } 86 | 87 | /// Adds a new warning to this report. 88 | /// 89 | /// The returned [`Diagnostic`] object can be used to add spans, notes, and 90 | /// remarks, to generate a richer diagnostic. 91 | #[track_caller] 92 | pub fn warn(&self, message: impl fmt::Display) -> Diagnostic { 93 | self.new_diagnostic(Kind::Warning, message.to_string()) 94 | } 95 | 96 | /// Adds a new top-level note to this report. 97 | /// 98 | /// The returned [`Diagnostic`] object can be used to add spans, notes, and 99 | /// remarks, to generate a richer diagnostic. 100 | #[track_caller] 101 | pub fn note(&self, message: impl fmt::Display) -> Diagnostic { 102 | self.new_diagnostic(Kind::Note, message.to_string()) 103 | } 104 | 105 | #[track_caller] 106 | fn new_diagnostic(&self, kind: Kind, message: String) -> Diagnostic { 107 | Diagnostic::new(self.copy(), kind, message).reported_at(Location::caller()) 108 | } 109 | 110 | /// Returns a [`Fatal`] regardless of whether this report contains any errors. 111 | pub fn fatal(&self) -> Result { 112 | Err(Fatal(self.copy())) 113 | } 114 | 115 | /// If this report contains any errors, returns [`Err(Fatal)`][Fatal]; 116 | /// otherwise, it returns `Ok(ok)`. 117 | /// 118 | /// This is a useful function for completing some operation that could have 119 | /// generated error diagnostics. 120 | /// 121 | /// See [`Fatal`]. 122 | pub fn fatal_or(&self, ok: T) -> Result { 123 | if !self.state.has_error() { 124 | return Ok(ok); 125 | } 126 | 127 | self.fatal() 128 | } 129 | 130 | /// Collates all of the "unsorted diagnostics" into the "sorted diagnostics", 131 | /// sorting them by thread id. 132 | /// 133 | /// This ensures that all diagnostics coming from a particular thread are 134 | /// together. 135 | pub fn collate(&self) { 136 | self.state.collate() 137 | } 138 | 139 | /// Writes out the contents of this diagnostic to `sink`. 140 | pub fn write_out(&self, sink: impl io::Write) -> io::Result<()> { 141 | render::finish(self, sink) 142 | } 143 | 144 | pub(crate) fn new(ctx: &Context, opts: Options) -> Self { 145 | Self { 146 | ctx: ctx.copy(), 147 | state: Arc::new(render::State::new(opts)), 148 | } 149 | } 150 | } 151 | 152 | /// An error type for making returning a [`Result`] that will trigger 153 | /// diagnostics printing when unwrapped. 154 | /// 155 | /// This is useful for functions that are [`Result`]-ey, like reading a file, 156 | /// but which want to generate diagnostics, too. 157 | pub struct Fatal(Report); 158 | 159 | impl Fatal { 160 | /// Prints all diagnostics to stderr and terminates the program. 161 | pub fn terminate(self) -> ! { 162 | eprintln!("{self}"); 163 | process::exit(1); 164 | } 165 | 166 | /// Panics with the [`Report`]'s diagnostics as the panic message. 167 | pub fn panic(self) -> ! { 168 | panic::panic_any(self.to_string()) 169 | } 170 | } 171 | 172 | impl fmt::Debug for Fatal { 173 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 174 | render::render_fmt(&self.0, &self.0.state.opts, f) 175 | } 176 | } 177 | 178 | impl fmt::Display for Fatal { 179 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 180 | fmt::Debug::fmt(self, f) 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /ilex/src/report/render.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::io; 3 | use std::mem; 4 | use std::sync::atomic::AtomicBool; 5 | use std::sync::atomic::AtomicU64; 6 | use std::sync::atomic::Ordering; 7 | use std::sync::Mutex; 8 | 9 | use annotate_snippets::renderer::AnsiColor; 10 | use annotate_snippets::renderer::Style; 11 | use annotate_snippets::Annotation; 12 | use annotate_snippets::AnnotationType; 13 | use annotate_snippets::Renderer; 14 | use annotate_snippets::Slice; 15 | use annotate_snippets::Snippet; 16 | use annotate_snippets::SourceAnnotation; 17 | 18 | use crate::report::diagnostic; 19 | use crate::report::diagnostic::Info; 20 | use crate::report::diagnostic::Kind; 21 | use crate::report::Options; 22 | use crate::report::Report; 23 | 24 | pub struct State { 25 | pub opts: Options, 26 | has_error: AtomicBool, 27 | sorted_diagnostics: Mutex>, 28 | recent_diagnostics: Mutex>, 29 | } 30 | 31 | impl State { 32 | pub fn new(opts: Options) -> Self { 33 | Self { 34 | opts, 35 | has_error: AtomicBool::new(false), 36 | sorted_diagnostics: Default::default(), 37 | recent_diagnostics: Default::default(), 38 | } 39 | } 40 | 41 | pub fn has_error(&self) -> bool { 42 | self.has_error.load(Ordering::SeqCst) 43 | } 44 | 45 | /// Collates all of the "unsorted diagnostics" into the "sorted diagnostics", 46 | /// sorting them by thread id. This ensures that all diagnostics coming from 47 | /// a particular thread are together. 48 | pub fn collate(&self) { 49 | let mut recent = self.recent_diagnostics.lock().unwrap(); 50 | let mut sorted = self.sorted_diagnostics.lock().unwrap(); 51 | 52 | recent.sort_by_key(|&(id, _)| id); 53 | sorted.extend(recent.drain(..).map(|(_, i)| i)); 54 | } 55 | 56 | pub fn insert_diagnostic(&self, info: Info) { 57 | if info.kind == Kind::Error { 58 | self.has_error.store(true, Ordering::SeqCst); 59 | } 60 | 61 | static COUNTER: AtomicU64 = AtomicU64::new(0); 62 | thread_local! { 63 | static ID: u64 = COUNTER.fetch_add(1, Ordering::Relaxed); 64 | }; 65 | 66 | let mut recent = self.recent_diagnostics.lock().unwrap(); 67 | recent.push((ID.with(|&x| x), info)) 68 | } 69 | } 70 | 71 | /// Consumes this `Report` and dumps its diagnostics to `sink`. 72 | pub fn finish(report: &Report, sink: impl io::Write) -> io::Result<()> { 73 | struct Writer { 74 | sink: W, 75 | error: Option, 76 | } 77 | 78 | impl fmt::Write for Writer { 79 | fn write_str(&mut self, s: &str) -> fmt::Result { 80 | self.sink.write_all(s.as_bytes()).map_err(|e| { 81 | self.error = Some(e); 82 | fmt::Error 83 | }) 84 | } 85 | } 86 | 87 | let mut out = Writer { sink, error: None }; 88 | render_fmt(report, &report.state.opts, &mut out).map_err(|_| { 89 | if let Some(e) = out.error.take() { 90 | return e; 91 | } 92 | 93 | io::Error::new(io::ErrorKind::Other, "formatter error") 94 | }) 95 | } 96 | 97 | /// Dumps this collection of errors as user-displayable text into `sink`. 98 | pub fn render_fmt( 99 | report: &Report, 100 | opts: &Options, 101 | sink: &mut dyn fmt::Write, 102 | ) -> fmt::Result { 103 | report.state.collate(); 104 | let mut errors = 0; 105 | 106 | let mut renderer = Renderer::plain(); 107 | #[rustfmt::skip] 108 | #[allow(clippy::let_unit_value)] 109 | let _ = if opts.color { 110 | renderer = Renderer::styled() 111 | .error(Style::new().fg_color(Some(AnsiColor::BrightRed.into())).bold()) 112 | .warning(Style::new().fg_color(Some(AnsiColor::BrightYellow.into())).bold()) 113 | .note(Style::new().fg_color(Some(AnsiColor::BrightGreen.into())).bold()) 114 | .info(Style::new().fg_color(Some(AnsiColor::BrightBlue.into())).bold()) 115 | .help(Style::new().fg_color(Some(AnsiColor::BrightBlue.into())).bold()); 116 | }; 117 | 118 | for e in report.state.sorted_diagnostics.lock().unwrap().iter() { 119 | if e.kind == Kind::Error { 120 | errors += 1; 121 | }; 122 | 123 | let mut snippet = Snippet { 124 | title: Some(Annotation { 125 | id: None, 126 | label: Some(&e.message), 127 | annotation_type: e.kind, 128 | }), 129 | footer: Vec::new(), 130 | slices: Vec::new(), 131 | }; 132 | 133 | for snips in &e.snippets { 134 | let mut cur_file = None; 135 | let mut cur_slice = None::; 136 | let mut has_eof = false; 137 | for (span, text, kind) in snips { 138 | let span = span.get(&report.ctx); 139 | let file = span.file(); 140 | if cur_file != Some(file) { 141 | cur_file = Some(file); 142 | if let Some(mut slice) = cur_slice.take() { 143 | if !mem::take(&mut has_eof) { 144 | slice.source = &slice.source[..slice.source.len() - 1]; 145 | } 146 | snippet.slices.push(slice); 147 | } 148 | 149 | cur_slice = Some(Slice { 150 | source: file.text_with_extra_space(), 151 | line_start: 1, 152 | origin: Some(file.path().as_str()), 153 | annotations: Vec::new(), 154 | fold: true, 155 | }); 156 | } 157 | 158 | let slice = cur_slice.as_mut().unwrap(); 159 | let mut start = span.start(); 160 | let mut end = span.end(); 161 | 162 | // Ensure that all ranges have length at least one, and try to get them 163 | // to point just after non-whitespace. 164 | // If this is the EOF, it will point at the extra space. 165 | if start == end { 166 | let chunk = &slice.source[..end]; 167 | let ws_suf = 168 | chunk.len() - chunk.trim_end_matches(char::is_whitespace).len(); 169 | start -= ws_suf; 170 | end -= ws_suf; 171 | end += 1; 172 | has_eof |= end == slice.source.len(); 173 | } else { 174 | // Crop a range so that it does not contain leading or trailing 175 | // whitespace. 176 | let chunk = &slice.source[start..end]; 177 | let ws_pre = 178 | chunk.len() - chunk.trim_start_matches(char::is_whitespace).len(); 179 | let ws_suf = 180 | chunk.len() - chunk.trim_end_matches(char::is_whitespace).len(); 181 | start += ws_pre; 182 | end -= ws_suf; 183 | } 184 | 185 | slice.annotations.push(SourceAnnotation { 186 | range: (start, end), 187 | label: text, 188 | annotation_type: *kind, 189 | }); 190 | } 191 | 192 | if let Some(mut slice) = cur_slice.take() { 193 | if !mem::take(&mut has_eof) { 194 | slice.source = &slice.source[..slice.source.len() - 1]; 195 | } 196 | snippet.slices.push(slice); 197 | } 198 | } 199 | 200 | // Crop the starts of each slice to only incorporate the annotations. 201 | for slice in &mut snippet.slices { 202 | let earliest_start = slice 203 | .annotations 204 | .iter() 205 | .map(|a| a.range.0) 206 | .min() 207 | .unwrap_or(0); 208 | let (count, start_idx) = slice.source[..earliest_start] 209 | .bytes() 210 | .enumerate() 211 | .filter_map(|(i, c)| (c == b'\n').then_some(i + 1)) 212 | .enumerate() 213 | .map(|(i, j)| (i + 1, j)) 214 | .last() 215 | .unwrap_or_default(); 216 | 217 | slice.line_start = count + 1; 218 | slice.source = &slice.source[start_idx..]; 219 | for a in &mut slice.annotations { 220 | a.range.0 -= start_idx; 221 | a.range.1 -= start_idx; 222 | } 223 | } 224 | 225 | for (note, kind) in &e.notes { 226 | snippet.footer.push(Annotation { 227 | id: None, 228 | label: Some(note), 229 | annotation_type: *kind, 230 | }); 231 | } 232 | 233 | let footer; 234 | if opts.show_report_locations { 235 | footer = format!("reported at: {}", e.reported_at.unwrap()); 236 | snippet.footer.push(Annotation { 237 | id: None, 238 | label: Some(&footer), 239 | annotation_type: AnnotationType::Note, 240 | }); 241 | } 242 | 243 | write!(sink, "{}\n\n", renderer.render(snippet))?; 244 | } 245 | 246 | if errors != 0 { 247 | let message = match errors { 248 | 1 => "aborting due to previous error".into(), 249 | n => format!("aborting due to {n} errors"), 250 | }; 251 | 252 | let aborting = Snippet { 253 | title: Some(Annotation { 254 | id: None, 255 | label: Some(&message), 256 | annotation_type: AnnotationType::Error, 257 | }), 258 | footer: Vec::new(), 259 | slices: Vec::new(), 260 | }; 261 | 262 | writeln!(sink, "{}", renderer.render(aborting))?; 263 | } 264 | 265 | Ok(()) 266 | } 267 | -------------------------------------------------------------------------------- /ilex/src/rt/mod.rs: -------------------------------------------------------------------------------- 1 | //! The lexer runtime. 2 | 3 | use std::cell::Cell; 4 | 5 | use crate::file::File; 6 | use crate::file::Span; 7 | use crate::file::Span2; 8 | use crate::report::Fatal; 9 | use crate::report::Report; 10 | use crate::rule; 11 | use crate::rule::Sign; 12 | use crate::spec::Lexeme; 13 | use crate::spec::Spec; 14 | use crate::token; 15 | 16 | mod emit2; 17 | pub mod lexer; 18 | mod unicode; 19 | 20 | mod dfa; 21 | pub use dfa::compile; 22 | pub use dfa::Dfa; 23 | 24 | pub fn lex<'ctx>( 25 | file: File<'ctx>, 26 | report: &Report, 27 | spec: &'ctx Spec, 28 | ) -> Result, Fatal> { 29 | let mut lexer = lexer::Lexer::new(file, report, spec); 30 | 31 | let unexpected = Cell::new(None); 32 | let diagnose_unexpected = |end: usize| { 33 | let Some(start) = unexpected.take() else { return }; 34 | report 35 | .builtins(spec) 36 | .unexpected_token(file.span(start..end)); 37 | }; 38 | 39 | loop { 40 | let start = lexer.cursor(); 41 | if lexer.skip_whitespace() { 42 | diagnose_unexpected(start); 43 | } 44 | 45 | let start = lexer.cursor(); 46 | let Some(next) = lexer.text(lexer.cursor()..).chars().next() else { break }; 47 | 48 | lexer.pop_closer(); 49 | if lexer.cursor() > start { 50 | diagnose_unexpected(start); 51 | continue; 52 | } 53 | 54 | emit2::emit(&mut lexer); 55 | if lexer.cursor() > start { 56 | diagnose_unexpected(start); 57 | continue; 58 | } 59 | 60 | lexer.add_token(UNEXPECTED, next.len_utf8(), None); 61 | if unexpected.get().is_none() { 62 | unexpected.set(Some(start)) 63 | } 64 | } 65 | 66 | report.fatal_or(lexer.finish()) 67 | } 68 | 69 | /// The internal representation of a token inside of a token stream. 70 | #[derive(Clone)] 71 | pub struct Token { 72 | pub lexeme: Lexeme, 73 | pub end: u32, 74 | } 75 | #[derive(Clone, Default)] 76 | pub struct Metadata { 77 | pub kind: Option, 78 | pub comments: Vec, 79 | } 80 | 81 | #[derive(Clone)] 82 | pub enum Kind { 83 | Quoted(Quoted), 84 | Digital(Digital), 85 | Offset { cursor: i32, meta: i32 }, 86 | } 87 | 88 | #[derive(Clone)] 89 | pub struct Quoted { 90 | // Offsets for the components of the string. First mark is the end of the 91 | // open quote; following are alternating marks for textual and escape content. 92 | // Adjacent escapes are separated by empty text content. 93 | // 94 | // Each text component consists of one mark, its end. Each escape consists of 95 | // four marks, which refer to the end of the escape sequence prefix, the start of extra data, its end, and the 96 | // end of the whole escape. This means that when we encounter \xNN, the 97 | // positions of the marks are \x||NN||. When we encounter \u{NN}, the positions 98 | // are \u|{|NN|}|. For \n, the positions are \n||||. 99 | pub marks: Vec, 100 | } 101 | 102 | #[derive(Clone, Default)] 103 | pub struct Digital { 104 | pub digits: DigitBlocks, 105 | pub exponents: Vec, 106 | } 107 | 108 | #[derive(Clone, Default)] 109 | pub struct DigitBlocks { 110 | pub prefix: Span2, 111 | pub sign: Option<(Sign, Span2)>, 112 | pub blocks: Vec, 113 | pub which_exp: usize, 114 | } 115 | 116 | impl DigitBlocks { 117 | pub fn prefix<'ctx>(&self, file: File<'ctx>) -> Option> { 118 | if self.prefix == Span2::default() { 119 | return None; 120 | } 121 | Some(self.prefix.get(file)) 122 | } 123 | 124 | pub fn sign<'ctx>(&self, file: File<'ctx>) -> Option> { 125 | self.sign.map(|(_, s)| s.get(file)) 126 | } 127 | 128 | pub fn blocks<'a, 'ctx: 'a>( 129 | &'a self, 130 | file: File<'ctx>, 131 | ) -> impl Iterator> + 'a { 132 | self.blocks.iter().map(move |s| s.get(file)) 133 | } 134 | } 135 | 136 | pub const WHITESPACE: Lexeme = Lexeme::new(-1); 137 | pub const UNEXPECTED: Lexeme = Lexeme::new(-2); 138 | pub const PREFIX: Lexeme = Lexeme::new(-3); 139 | pub const SUFFIX: Lexeme = Lexeme::new(-4); 140 | -------------------------------------------------------------------------------- /ilex/src/token/summary.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of `Stream::summary()`. 2 | 3 | use gilded::doc::Doc; 4 | 5 | use crate::file::Span; 6 | use crate::file::Spanned; 7 | use crate::token::Any; 8 | use crate::token::Cursor; 9 | use crate::token::Stream; 10 | 11 | use crate::token::Sign; 12 | use crate::token::Token; 13 | 14 | use super::Content; 15 | 16 | impl Stream<'_> { 17 | /// Returns a string that summarizes the contents of this token stream. 18 | pub fn summary(&self) -> String { 19 | self.cursor().summary().to_string(&Default::default()) 20 | } 21 | } 22 | 23 | impl<'a> Cursor<'a> { 24 | fn summary(&self) -> Doc<'a> { 25 | Doc::new().push({ *self }.map(|token| { 26 | let doc = Doc::new() 27 | .entry("lexeme", token.lexeme().index()) 28 | .entry("span", span2doc(token.span())); 29 | 30 | match token { 31 | Any::Eof(..) => Doc::single("eof", doc), 32 | Any::Keyword(..) => Doc::single("keyword", doc), 33 | Any::Bracket(tok) => Doc::single( 34 | "bracket", 35 | doc 36 | .array("delims", tok.delimiters().into_iter().map(span2doc)) 37 | .entry("contents", tok.contents().summary()), 38 | ), 39 | 40 | Any::Ident(tok) => Doc::single( 41 | "ident", 42 | doc 43 | .entry("prefix", tok.prefix().map(span2doc)) 44 | .entry("suffix", tok.suffix().map(span2doc)) 45 | .entry("name", span2doc(tok.name())), 46 | ), 47 | 48 | Any::Digital(tok) => Doc::single( 49 | "ident", 50 | doc 51 | .entry("prefix", tok.prefix().map(span2doc)) 52 | .entry("suffix", tok.suffix().map(span2doc)) 53 | .entry("radix", tok.radix()) 54 | .entry("sign", tok.sign().map(sign2str)) 55 | .array("blocks", tok.digit_blocks().map(span2doc)) 56 | .array( 57 | "exponents", 58 | tok.exponents().map(|exp| { 59 | Doc::new() 60 | .entry("span", span2doc(exp.span())) 61 | .entry("prefix", exp.prefix().map(span2doc)) 62 | .entry("radix", exp.radix()) 63 | .entry("sign", exp.sign().map(sign2str)) 64 | .array("blocks", exp.digit_blocks().map(span2doc)) 65 | }), 66 | ), 67 | ), 68 | 69 | Any::Quoted(tok) => Doc::single( 70 | "quoted", 71 | doc 72 | .entry("prefix", tok.prefix().map(span2doc)) 73 | .entry("suffix", tok.suffix().map(span2doc)) 74 | .array("delims", tok.delimiters().into_iter().map(span2doc)) 75 | .array( 76 | "contents", 77 | tok.raw_content().map(|c| match c { 78 | Content::Lit(lit) => Doc::single("text", span2doc(lit)), 79 | Content::Esc(esc, data) => Doc::new() 80 | .entry("esc", span2doc(esc)) 81 | .entry("data", data.map(span2doc)), 82 | }), 83 | ), 84 | ), 85 | } 86 | })) 87 | } 88 | } 89 | 90 | fn span2doc(span: Span) -> Doc { 91 | Doc::new() 92 | .array("span", [span.start(), span.end()]) 93 | .entry("text", span.text()) 94 | } 95 | 96 | fn sign2str(s: Sign) -> &'static str { 97 | match s { 98 | Sign::Pos => "+", 99 | Sign::Neg => "-", 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /ilex/tests/greedy/greedy.tokens.yaml: -------------------------------------------------------------------------------- 1 | - keyword: 2 | lexeme: 3 3 | span: {span: [0, 6], text: "poison"} 4 | - keyword: 5 | lexeme: 5 6 | span: 7 | span: [6, 7] 8 | text: | 9 | 10 | - ident: 11 | lexeme: 4 12 | span: {span: [7, 16], text: "poisonous"} 13 | name: {span: [7, 16], text: "poisonous"} 14 | - keyword: 15 | lexeme: 5 16 | span: 17 | span: [16, 17] 18 | text: | 19 | 20 | - quoted: 21 | lexeme: 0 22 | span: 23 | span: [17, 32] 24 | text: "poisonous[xyz]>" 25 | delims: 26 | - {span: [17, 27], text: "poisonous["} 27 | - {span: [30, 32], text: "]>"} 28 | contents: [{text: {span: [27, 30], text: "xyz"}}] 29 | - keyword: 30 | lexeme: 5 31 | span: 32 | span: [32, 33] 33 | text: | 34 | 35 | - quoted: 36 | lexeme: 0 37 | span: 38 | span: [33, 56] 39 | text: "poisonous#%#%[xyz]#%#%>" 40 | delims: 41 | - {span: [33, 47], text: "poisonous#%#%["} 42 | - {span: [50, 56], text: "]#%#%>"} 43 | contents: [{text: {span: [47, 50], text: "xyz"}}] 44 | - keyword: 45 | lexeme: 5 46 | span: 47 | span: [56, 57] 48 | text: | 49 | 50 | - ident: 51 | lexeme: 4 52 | span: {span: [57, 66], text: "poisonous"} 53 | name: {span: [57, 66], text: "poisonous"} 54 | - bracket: 55 | lexeme: 2 56 | span: {span: [67, 72], text: "[xyz]"} 57 | delims: 58 | - {span: [67, 68], text: "["} 59 | - {span: [71, 72], text: "]"} 60 | contents: 61 | - ident: 62 | lexeme: 4 63 | span: {span: [68, 71], text: "xyz"} 64 | name: {span: [68, 71], text: "xyz"} 65 | - keyword: 66 | lexeme: 5 67 | span: 68 | span: [72, 73] 69 | text: | 70 | 71 | - quoted: 72 | lexeme: 1 73 | span: 74 | span: [73, 93] 75 | text: "R\"cc(some c++)\" )cc\"" 76 | delims: 77 | - {span: [73, 78], text: "R\"cc("} 78 | - {span: [89, 93], text: ")cc\""} 79 | contents: [{text: {span: [78, 89], text: "some c++)\" "}}] 80 | - eof: 81 | lexeme: 2147483647 82 | span: {span: [93, 93], text: ""} 83 | -------------------------------------------------------------------------------- /ilex/tests/greedy/greedy.txt: -------------------------------------------------------------------------------- 1 | poison 2 | poisonous 3 | poisonous[xyz]> 4 | poisonous#%#%[xyz]#%#%> 5 | poisonous [xyz] 6 | R"cc(some c++)" )cc" -------------------------------------------------------------------------------- /ilex/tests/greedy/main.rs: -------------------------------------------------------------------------------- 1 | use ilex::rule::*; 2 | use ilex::Context; 3 | use ilex::Lexeme; 4 | 5 | #[gilded::test("tests/greedy/*.txt")] 6 | fn greedy(test: &gilded::Test) { 7 | // This test verifies that lexing is greedy in *most* cases. 8 | 9 | #[ilex::spec] 10 | struct Greedy { 11 | #[rule(Quoted::with(Bracket::rust_style( 12 | "#%", 13 | ("poisonous", "["), 14 | ("]", ">"), 15 | )))] 16 | rust_like: Lexeme, 17 | 18 | #[rule(Quoted::with(Bracket::cxx_style( 19 | Ident::new(), 20 | ("R\"", "("), 21 | (")", "\""), 22 | )))] 23 | cpp_like: Lexeme, 24 | 25 | #[rule("[", "]")] 26 | array: Lexeme, 27 | 28 | poison: Lexeme, 29 | 30 | #[rule(Ident::new())] 31 | ident: Lexeme, 32 | 33 | #[rule(LineEnd::cancellable("\\"))] 34 | nl: Lexeme, 35 | 36 | #[rule(Comment::line("//"))] 37 | comment: Lexeme, 38 | } 39 | 40 | let ctx = Context::new(); 41 | let report = ctx.new_report(); 42 | let file = ctx 43 | .new_file_from_bytes(test.path(), test.text(), &report) 44 | .unwrap(); 45 | 46 | let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); 47 | match file.lex(Greedy::get().spec(), &report) { 48 | Ok(stream) => tokens(stream.summary()), 49 | Err(fatal) => stderr(fatal.to_string()), 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /ilex/tests/greedy/newlines.tokens.yaml: -------------------------------------------------------------------------------- 1 | - keyword: 2 | lexeme: 3 3 | span: {span: [0, 6], text: "poison"} 4 | - keyword: 5 | lexeme: 5 6 | span: 7 | span: [6, 7] 8 | text: | 9 | 10 | - keyword: 11 | lexeme: 3 12 | span: {span: [7, 13], text: "poison"} 13 | - keyword: 14 | lexeme: 3 15 | span: {span: [16, 22], text: "poison"} 16 | - keyword: 17 | lexeme: 3 18 | span: {span: [36, 42], text: "poison"} 19 | - keyword: 20 | lexeme: 5 21 | span: 22 | span: [53, 54] 23 | text: | 24 | 25 | - keyword: 26 | lexeme: 3 27 | span: {span: [54, 60], text: "poison"} 28 | - eof: 29 | lexeme: 2147483647 30 | span: {span: [60, 60], text: ""} 31 | -------------------------------------------------------------------------------- /ilex/tests/greedy/newlines.txt: -------------------------------------------------------------------------------- 1 | poison 2 | poison \ 3 | poison \ // comment 4 | poison // comment 5 | poison -------------------------------------------------------------------------------- /ilex/tests/json/array.ast.txt: -------------------------------------------------------------------------------- 1 | Arr( 2 | [ 3 | Null, 4 | Bool( 5 | true, 6 | ), 7 | Bool( 8 | false, 9 | ), 10 | ], 11 | ) -------------------------------------------------------------------------------- /ilex/tests/json/array.json: -------------------------------------------------------------------------------- 1 | [null, true, false] -------------------------------------------------------------------------------- /ilex/tests/json/array.tokens.yaml: -------------------------------------------------------------------------------- 1 | - bracket: 2 | lexeme: 5 3 | span: 4 | span: [0, 19] 5 | text: "[null, true, false]" 6 | delims: 7 | - {span: [0, 1], text: "["} 8 | - {span: [18, 19], text: "]"} 9 | contents: 10 | - keyword: 11 | lexeme: 4 12 | span: {span: [1, 5], text: "null"} 13 | - keyword: 14 | lexeme: 0 15 | span: {span: [5, 6], text: ","} 16 | - keyword: 17 | lexeme: 2 18 | span: {span: [7, 11], text: "true"} 19 | - keyword: 20 | lexeme: 0 21 | span: {span: [11, 12], text: ","} 22 | - keyword: 23 | lexeme: 3 24 | span: {span: [13, 18], text: "false"} 25 | - eof: 26 | lexeme: 2147483647 27 | span: {span: [19, 19], text: ""} 28 | -------------------------------------------------------------------------------- /ilex/tests/json/main.rs: -------------------------------------------------------------------------------- 1 | use ilex::fp::Fp64; 2 | use ilex::report::Expected; 3 | use ilex::report::Report; 4 | use ilex::rule::*; 5 | use ilex::token; 6 | use ilex::token::Cursor; 7 | use ilex::Context; 8 | use ilex::Lexeme; 9 | 10 | #[ilex::spec] 11 | struct JsonSpec { 12 | #[rule(",")] 13 | comma: Lexeme, 14 | 15 | #[rule(":")] 16 | colon: Lexeme, 17 | 18 | #[rule("true")] 19 | true_: Lexeme, 20 | 21 | #[rule("false")] 22 | false_: Lexeme, 23 | 24 | #[rule("null")] 25 | null: Lexeme, 26 | 27 | #[named] 28 | #[rule("[", "]")] 29 | array: Lexeme, 30 | 31 | #[named] 32 | #[rule("{", "}")] 33 | object: Lexeme, 34 | 35 | #[named] 36 | #[rule(Quoted::new('"') 37 | .invalid_escape(r"\") 38 | .escapes([ 39 | "\\\"", r"\\", r"\/", 40 | r"\b", r"\f", r"\n", r"\t", r"\r", 41 | ]) 42 | .fixed_length_escape(r"\u", 4))] 43 | string: Lexeme, 44 | 45 | #[named] 46 | #[rule(Digital::new(10) 47 | .minus() 48 | .point_limit(0..2) 49 | .exponents(["e", "E"], Digits::new(10).plus().minus()))] 50 | number: Lexeme, 51 | } 52 | 53 | #[gilded::test("tests/json/*.json")] 54 | fn check_tokens(test: &gilded::Test) { 55 | let ctx = Context::new(); 56 | let report = ctx.new_report(); 57 | let file = ctx 58 | .new_file_from_bytes(test.path(), test.text(), &report) 59 | .unwrap(); 60 | 61 | let [tokens, ast, stderr] = 62 | test.outputs(["tokens.yaml", "ast.txt", "stderr"]); 63 | 64 | let stream = match file.lex(JsonSpec::get().spec(), &report) { 65 | Ok(stream) => stream, 66 | Err(fatal) => { 67 | stderr(fatal.to_string()); 68 | return; 69 | } 70 | }; 71 | 72 | tokens(stream.summary()); 73 | 74 | let json = parse(&report, JsonSpec::get(), &mut stream.cursor()); 75 | ast(format!("{json:#?}")); 76 | 77 | if let Err(fatal) = report.fatal_or(()) { 78 | stderr(fatal.to_string()); 79 | } 80 | } 81 | 82 | #[derive(Clone, Debug, PartialEq)] 83 | enum Json { 84 | Null, 85 | Num(f64), 86 | Bool(bool), 87 | Str(String), 88 | Arr(Vec), 89 | Obj(Vec<(String, Json)>), 90 | } 91 | 92 | fn parse(report: &Report, json: &JsonSpec, cursor: &mut Cursor) -> Json { 93 | let quote2str = |str: token::Quoted| -> String { 94 | str.to_utf8(|key, data, buf| { 95 | let char = match key.text() { 96 | "\\\"" => '\"', 97 | r"\\" => '\\', 98 | r"\/" => '/', 99 | r"\b" => '\x08', 100 | r"\f" => '\x0c', 101 | r"\n" => '\n', 102 | r"\t" => '\t', 103 | r"\r" => '\r', 104 | // This is sloppy about surrogates but this is just an example. 105 | r"\u" => { 106 | let data = data.unwrap(); 107 | let code = 108 | u16::from_str_radix(data.text(), 16).unwrap_or_else(|_| { 109 | report.builtins(json.spec()).expected( 110 | [Expected::Name("hex-encoded u16".into())], 111 | data.text(), 112 | data, 113 | ); 114 | 0 115 | }); 116 | for c in char::decode_utf16([code]) { 117 | buf.push(c.unwrap_or('😢')) 118 | } 119 | return; 120 | } 121 | esc => panic!("{}", esc), 122 | }; 123 | buf.push(char); 124 | }) 125 | }; 126 | 127 | let value = token::switch() 128 | .case(json.null, |_, _| Json::Null) 129 | .case(json.false_, |_, _| Json::Bool(false)) 130 | .case(json.true_, |_, _| Json::Bool(true)) 131 | .case(json.string, |str: token::Quoted, _| Json::Str(quote2str(str))) 132 | .case(json.number, |num: token::Digital, _| { 133 | Json::Num(num.to_float::(.., report).unwrap().to_hard()) 134 | }) 135 | .case(json.array, |array: token::Bracket, _| { 136 | let mut trailing = None; 137 | let vec = array 138 | .contents() 139 | .delimited(json.comma, |c| Some(parse(report, json, c))) 140 | .map(|(e, c)| { 141 | trailing = c; 142 | e 143 | }) 144 | .collect(); 145 | 146 | if let Some(comma) = trailing { 147 | report 148 | .error("trailing commas are not allowed in JSON") 149 | .saying(comma, "remove this comma"); 150 | } 151 | 152 | Json::Arr(vec) 153 | }) 154 | .case(json.object, |object: token::Bracket, _| { 155 | let mut trailing = None; 156 | let vec = object 157 | .contents() 158 | .delimited(json.comma, |c| { 159 | let key = c 160 | .take(json.string, report) 161 | .map(|q| quote2str(q)) 162 | .unwrap_or("😢".into()); 163 | c.take(json.colon, report); 164 | let value = parse(report, json, c); 165 | Some((key, value)) 166 | }) 167 | .map(|(e, c)| { 168 | trailing = c; 169 | e 170 | }) 171 | .collect(); 172 | 173 | if let Some(comma) = trailing { 174 | report 175 | .error("trailing commas are not allowed in JSON") 176 | .saying(comma, "remove this comma"); 177 | } 178 | 179 | Json::Obj(vec) 180 | }) 181 | .take(cursor, report); 182 | value.unwrap_or(Json::Null) 183 | } 184 | -------------------------------------------------------------------------------- /ilex/tests/json/null.ast.txt: -------------------------------------------------------------------------------- 1 | Null -------------------------------------------------------------------------------- /ilex/tests/json/null.json: -------------------------------------------------------------------------------- 1 | null -------------------------------------------------------------------------------- /ilex/tests/json/null.tokens.yaml: -------------------------------------------------------------------------------- 1 | - keyword: 2 | lexeme: 4 3 | span: {span: [0, 4], text: "null"} 4 | - eof: 5 | lexeme: 2147483647 6 | span: {span: [4, 4], text: ""} 7 | -------------------------------------------------------------------------------- /ilex/tests/json/obj.ast.txt: -------------------------------------------------------------------------------- 1 | Obj( 2 | [ 3 | ( 4 | "keywords", 5 | Arr( 6 | [ 7 | Null, 8 | Bool( 9 | true, 10 | ), 11 | Bool( 12 | false, 13 | ), 14 | ], 15 | ), 16 | ), 17 | ( 18 | "string", 19 | Str( 20 | "abcdefg", 21 | ), 22 | ), 23 | ( 24 | "number", 25 | Num( 26 | 42.0, 27 | ), 28 | ), 29 | ( 30 | "int", 31 | Num( 32 | 42.0, 33 | ), 34 | ), 35 | ( 36 | "frac", 37 | Num( 38 | 0.42, 39 | ), 40 | ), 41 | ( 42 | "neg", 43 | Num( 44 | -42.0, 45 | ), 46 | ), 47 | ( 48 | "exp", 49 | Num( 50 | 4.2e43, 51 | ), 52 | ), 53 | ( 54 | "nest", 55 | Obj( 56 | [ 57 | ( 58 | "escapes\n", 59 | Str( 60 | "\"\\/\u{8}\u{c}\n\t\r\0ሴ\u{ffff}", 61 | ), 62 | ), 63 | ], 64 | ), 65 | ), 66 | ], 67 | ) -------------------------------------------------------------------------------- /ilex/tests/json/obj.json: -------------------------------------------------------------------------------- 1 | { 2 | "keywords": [null, true, false], 3 | "string": "abcdefg", 4 | "number": 42, 5 | "int": 42.0, 6 | "frac": 0.42, 7 | "neg": -42, 8 | "exp": 42e+42, 9 | "nest": { 10 | "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" 11 | } 12 | } -------------------------------------------------------------------------------- /ilex/tests/json/obj.tokens.yaml: -------------------------------------------------------------------------------- 1 | - bracket: 2 | lexeme: 6 3 | span: 4 | span: [0, 209] 5 | text: | 6 | { 7 | "keywords": [null, true, false], 8 | "string": "abcdefg", 9 | "number": 42, 10 | "int": 42.0, 11 | "frac": 0.42, 12 | "neg": -42, 13 | "exp": 42e+42, 14 | "nest": { 15 | "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" 16 | } 17 | } 18 | delims: 19 | - {span: [0, 1], text: "{"} 20 | - {span: [208, 209], text: "}"} 21 | contents: 22 | - quoted: 23 | lexeme: 7 24 | span: {span: [4, 14], text: "\"keywords\""} 25 | delims: 26 | - {span: [4, 5], text: "\""} 27 | - {span: [13, 14], text: "\""} 28 | contents: [{text: {span: [5, 13], text: "keywords"}}] 29 | - keyword: 30 | lexeme: 1 31 | span: {span: [14, 15], text: ":"} 32 | - bracket: 33 | lexeme: 5 34 | span: 35 | span: [16, 35] 36 | text: "[null, true, false]" 37 | delims: 38 | - {span: [16, 17], text: "["} 39 | - {span: [34, 35], text: "]"} 40 | contents: 41 | - keyword: 42 | lexeme: 4 43 | span: {span: [17, 21], text: "null"} 44 | - keyword: 45 | lexeme: 0 46 | span: {span: [21, 22], text: ","} 47 | - keyword: 48 | lexeme: 2 49 | span: {span: [23, 27], text: "true"} 50 | - keyword: 51 | lexeme: 0 52 | span: {span: [27, 28], text: ","} 53 | - keyword: 54 | lexeme: 3 55 | span: {span: [29, 34], text: "false"} 56 | - keyword: 57 | lexeme: 0 58 | span: {span: [35, 36], text: ","} 59 | - quoted: 60 | lexeme: 7 61 | span: {span: [39, 47], text: "\"string\""} 62 | delims: 63 | - {span: [39, 40], text: "\""} 64 | - {span: [46, 47], text: "\""} 65 | contents: [{text: {span: [40, 46], text: "string"}}] 66 | - keyword: 67 | lexeme: 1 68 | span: {span: [47, 48], text: ":"} 69 | - quoted: 70 | lexeme: 7 71 | span: {span: [49, 58], text: "\"abcdefg\""} 72 | delims: 73 | - {span: [49, 50], text: "\""} 74 | - {span: [57, 58], text: "\""} 75 | contents: [{text: {span: [50, 57], text: "abcdefg"}}] 76 | - keyword: 77 | lexeme: 0 78 | span: {span: [58, 59], text: ","} 79 | - quoted: 80 | lexeme: 7 81 | span: {span: [62, 70], text: "\"number\""} 82 | delims: 83 | - {span: [62, 63], text: "\""} 84 | - {span: [69, 70], text: "\""} 85 | contents: [{text: {span: [63, 69], text: "number"}}] 86 | - keyword: 87 | lexeme: 1 88 | span: {span: [70, 71], text: ":"} 89 | - ident: 90 | lexeme: 8 91 | span: {span: [72, 74], text: "42"} 92 | radix: 10 93 | blocks: [{span: [72, 74], text: "42"}] 94 | exponents: [] 95 | - keyword: 96 | lexeme: 0 97 | span: {span: [74, 75], text: ","} 98 | - quoted: 99 | lexeme: 7 100 | span: {span: [78, 83], text: "\"int\""} 101 | delims: 102 | - {span: [78, 79], text: "\""} 103 | - {span: [82, 83], text: "\""} 104 | contents: [{text: {span: [79, 82], text: "int"}}] 105 | - keyword: 106 | lexeme: 1 107 | span: {span: [83, 84], text: ":"} 108 | - ident: 109 | lexeme: 8 110 | span: {span: [85, 89], text: "42.0"} 111 | radix: 10 112 | blocks: 113 | - {span: [85, 87], text: "42"} 114 | - {span: [88, 89], text: "0"} 115 | exponents: [] 116 | - keyword: 117 | lexeme: 0 118 | span: {span: [89, 90], text: ","} 119 | - quoted: 120 | lexeme: 7 121 | span: {span: [93, 99], text: "\"frac\""} 122 | delims: 123 | - {span: [93, 94], text: "\""} 124 | - {span: [98, 99], text: "\""} 125 | contents: [{text: {span: [94, 98], text: "frac"}}] 126 | - keyword: 127 | lexeme: 1 128 | span: {span: [99, 100], text: ":"} 129 | - ident: 130 | lexeme: 8 131 | span: {span: [101, 105], text: "0.42"} 132 | radix: 10 133 | blocks: 134 | - {span: [101, 102], text: "0"} 135 | - {span: [103, 105], text: "42"} 136 | exponents: [] 137 | - keyword: 138 | lexeme: 0 139 | span: {span: [105, 106], text: ","} 140 | - quoted: 141 | lexeme: 7 142 | span: {span: [109, 114], text: "\"neg\""} 143 | delims: 144 | - {span: [109, 110], text: "\""} 145 | - {span: [113, 114], text: "\""} 146 | contents: [{text: {span: [110, 113], text: "neg"}}] 147 | - keyword: 148 | lexeme: 1 149 | span: {span: [114, 115], text: ":"} 150 | - ident: 151 | lexeme: 8 152 | span: {span: [116, 119], text: "-42"} 153 | radix: 10 154 | sign: "-" 155 | blocks: [{span: [117, 119], text: "42"}] 156 | exponents: [] 157 | - keyword: 158 | lexeme: 0 159 | span: {span: [119, 120], text: ","} 160 | - quoted: 161 | lexeme: 7 162 | span: {span: [123, 128], text: "\"exp\""} 163 | delims: 164 | - {span: [123, 124], text: "\""} 165 | - {span: [127, 128], text: "\""} 166 | contents: [{text: {span: [124, 127], text: "exp"}}] 167 | - keyword: 168 | lexeme: 1 169 | span: {span: [128, 129], text: ":"} 170 | - ident: 171 | lexeme: 8 172 | span: {span: [130, 136], text: "42e+42"} 173 | radix: 10 174 | blocks: [{span: [130, 132], text: "42"}] 175 | exponents: 176 | - span: {span: [130, 136], text: "42e+42"} 177 | prefix: {span: [132, 133], text: "e"} 178 | radix: 10 179 | sign: "+" 180 | blocks: [{span: [134, 136], text: "42"}] 181 | - keyword: 182 | lexeme: 0 183 | span: {span: [136, 137], text: ","} 184 | - quoted: 185 | lexeme: 7 186 | span: {span: [140, 146], text: "\"nest\""} 187 | delims: 188 | - {span: [140, 141], text: "\""} 189 | - {span: [145, 146], text: "\""} 190 | contents: [{text: {span: [141, 145], text: "nest"}}] 191 | - keyword: 192 | lexeme: 1 193 | span: {span: [146, 147], text: ":"} 194 | - bracket: 195 | lexeme: 6 196 | span: 197 | span: [148, 207] 198 | text: | 199 | { 200 | "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" 201 | } 202 | delims: 203 | - {span: [148, 149], text: "{"} 204 | - {span: [206, 207], text: "}"} 205 | contents: 206 | - quoted: 207 | lexeme: 7 208 | span: 209 | span: [154, 165] 210 | text: "\"escapes\\n\"" 211 | delims: 212 | - {span: [154, 155], text: "\""} 213 | - {span: [164, 165], text: "\""} 214 | contents: 215 | - text: {span: [155, 162], text: "escapes"} 216 | - {esc: {span: [162, 164], text: "\\n"}} 217 | - keyword: 218 | lexeme: 1 219 | span: {span: [165, 166], text: ":"} 220 | - quoted: 221 | lexeme: 7 222 | span: 223 | span: [167, 203] 224 | text: "\"\\\"\\\\\\/\\b\\f\\n\\t\\r\\u0000\\u1234\\uffff\"" 225 | delims: 226 | - {span: [167, 168], text: "\""} 227 | - {span: [202, 203], text: "\""} 228 | contents: 229 | - {esc: {span: [168, 170], text: "\\\""}} 230 | - {esc: {span: [170, 172], text: "\\\\"}} 231 | - {esc: {span: [172, 174], text: "\\/"}} 232 | - {esc: {span: [174, 176], text: "\\b"}} 233 | - {esc: {span: [176, 178], text: "\\f"}} 234 | - {esc: {span: [178, 180], text: "\\n"}} 235 | - {esc: {span: [180, 182], text: "\\t"}} 236 | - {esc: {span: [182, 184], text: "\\r"}} 237 | - esc: {span: [184, 186], text: "\\u"} 238 | data: {span: [186, 190], text: "0000"} 239 | - esc: {span: [190, 192], text: "\\u"} 240 | data: {span: [192, 196], text: "1234"} 241 | - esc: {span: [196, 198], text: "\\u"} 242 | data: {span: [198, 202], text: "ffff"} 243 | - eof: 244 | lexeme: 2147483647 245 | span: {span: [209, 209], text: ""} 246 | -------------------------------------------------------------------------------- /ilex/tests/llvm/main.rs: -------------------------------------------------------------------------------- 1 | use ilex::rule::*; 2 | use ilex::Context; 3 | use ilex::Lexeme; 4 | 5 | #[ilex::spec] 6 | struct Llvm { 7 | #[rule(";")] 8 | comment: Lexeme, 9 | 10 | #[rule('(', ')')] 11 | parens: Lexeme, 12 | #[rule('[', ']')] 13 | brackets: Lexeme, 14 | #[rule('<', '>')] 15 | vector: Lexeme, 16 | #[rule('{', '}')] 17 | braces: Lexeme, 18 | #[rule("<{", "}>")] 19 | packed: Lexeme, 20 | #[rule("!{", "}")] 21 | meta: Lexeme, 22 | 23 | #[rule(',')] 24 | comma: Lexeme, 25 | #[rule('=')] 26 | equal: Lexeme, 27 | #[rule('*')] 28 | star: Lexeme, 29 | #[rule('x')] 30 | times: Lexeme, 31 | 32 | br: Lexeme, 33 | call: Lexeme, 34 | icmp: Lexeme, 35 | #[rule("eq")] 36 | icmp_eq: Lexeme, 37 | ret: Lexeme, 38 | unreachable: Lexeme, 39 | 40 | constant: Lexeme, 41 | declare: Lexeme, 42 | define: Lexeme, 43 | global: Lexeme, 44 | 45 | label: Lexeme, 46 | null: Lexeme, 47 | ptr: Lexeme, 48 | #[rule(Digital::new(10).prefix("i"))] 49 | int: Lexeme, 50 | void: Lexeme, 51 | 52 | private: Lexeme, 53 | unnamed_addr: Lexeme, 54 | nocapture: Lexeme, 55 | nounwind: Lexeme, 56 | 57 | #[named] 58 | #[rule(Quoted::new('"') 59 | .fixed_length_escape(r"\", 2) 60 | .prefixes(["", "c"]))] 61 | string: Lexeme, 62 | 63 | #[named("identifier")] 64 | #[rule(Ident::new() 65 | .ascii_only() 66 | .extra_starts(".0123456789".chars()) 67 | .suffix(":"))] 68 | label_ident: Lexeme, 69 | 70 | #[named("identifier")] 71 | #[rule(Ident::new() 72 | .ascii_only() 73 | .extra_starts(".0123456789".chars()) 74 | .prefixes(["!", "@", "%"]))] 75 | bare: Lexeme, 76 | 77 | #[named("quoted identifier")] 78 | #[rule(Quoted::new('"') 79 | .fixed_length_escape(r"\", 2) 80 | .prefixes(["!", "@", "%"]))] 81 | quoted: Lexeme, 82 | 83 | #[named = "number"] 84 | #[rule(Digital::new(10) 85 | .minus() 86 | .point_limit(0..2) 87 | .exponents(["e", "E"], Digits::new(10).plus().minus()))] 88 | dec: Lexeme, 89 | 90 | #[named = "number"] 91 | #[rule(Digital::new(16).minus().prefix("0x"))] 92 | hex: Lexeme, 93 | } 94 | 95 | #[gilded::test("tests/llvm/*.ll")] 96 | fn llvm(test: &gilded::Test) { 97 | let ctx = Context::new(); 98 | let report = ctx.new_report(); 99 | let file = ctx 100 | .new_file_from_bytes(test.path(), test.text(), &report) 101 | .unwrap(); 102 | 103 | let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); 104 | match file.lex(Llvm::get().spec(), &report) { 105 | Ok(stream) => tokens(stream.summary()), 106 | Err(fatal) => stderr(fatal.to_string()), 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /ilex/tests/llvm/smoke.ll: -------------------------------------------------------------------------------- 1 | ; Declare the string constant as a global constant. 2 | @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00" 3 | 4 | ; External declaration of the puts function 5 | declare i32 @"non trivial name"(ptr nocapture) nounwind 6 | 7 | ; Definition of main function 8 | define i32 @main(i32 %0, ptr %1) { 9 | ; Call puts function to write out the string to stdout. 10 | call i32 @"non trivial name"(ptr @.str) 11 | ret i32 0 12 | } 13 | 14 | ; Named metadata 15 | !0 = !{i32 42, null, !"string"} 16 | !foo = !{!0} 17 | @glb = global i8 0 18 | 19 | define void @f(ptr %a) { 20 | %c = icmp eq ptr %a, @glb 21 | br i1 %c, label %BB_EXIT, label %BB_CONTINUE ; escapes %a 22 | BB_EXIT: 23 | call void @exit() 24 | unreachable 25 | BB_CONTINUE: 26 | ret void 27 | } -------------------------------------------------------------------------------- /ilex/tests/numbers/main.rs: -------------------------------------------------------------------------------- 1 | use ilex::fp::Fp64; 2 | use ilex::report::Report; 3 | use ilex::rule::*; 4 | use ilex::token; 5 | use ilex::Context; 6 | use ilex::Lexeme; 7 | 8 | #[ilex::spec] 9 | struct Numbers { 10 | #[rule(",")] 11 | comma: Lexeme, 12 | 13 | #[named("binary number")] 14 | #[rule(Digital::new(2) 15 | .separator('_') 16 | .plus().minus() 17 | .point_limit(0..2) 18 | .exponent("2", Digits::new(2).plus().minus()) 19 | .prefixes(["0b", "0B", "%"]))] 20 | bin: Lexeme, 21 | 22 | #[named = "hexadecimal number"] 23 | #[rule(Digital::new(16) 24 | .separator('_') 25 | .plus().minus() 26 | .point_limit(0..2) 27 | .exponents(["p", "P"], Digits::new(10).plus().minus()) 28 | .prefixes(["0x", "0X", "$"]))] 29 | hex: Lexeme, 30 | 31 | #[named = "quaternary number"] 32 | #[rule(Digital::new(4) 33 | .separator('_') 34 | .plus().minus() 35 | .point_limit(0..2) 36 | .exponents(["p", "P"], Digits::new(10).plus().minus()) 37 | .prefixes(["0q", "0Q"]))] 38 | qua: Lexeme, 39 | 40 | #[named = "octal number"] 41 | #[rule(Digital::new(8) 42 | .separator('_') 43 | .plus().minus() 44 | .point_limit(0..2) 45 | .exponents(["p", "P"], Digits::new(10).plus().minus()) 46 | .prefixes(["0o", "0O", "0"]))] 47 | oct: Lexeme, 48 | 49 | #[named = "decimal number"] 50 | #[rule(Digital::new(10) 51 | .separator('_') 52 | .plus().minus() 53 | .point_limit(0..2) 54 | .exponents(["e", "E"], Digits::new(10).plus().minus()) 55 | .exponent("^", Digits::new(16).plus().minus()))] 56 | dec: Lexeme, 57 | } 58 | 59 | #[gilded::test("tests/numbers/*.txt")] 60 | fn numbers(test: &gilded::Test) { 61 | let ctx = Context::new(); 62 | let report = ctx.new_report(); 63 | let file = ctx 64 | .new_file_from_bytes(test.path(), test.text(), &report) 65 | .unwrap(); 66 | 67 | let [tokens, fp64, stderr] = 68 | test.outputs(["tokens.yaml", "fp64.txt", "stderr"]); 69 | 70 | match file.lex(Numbers::get().spec(), &report) { 71 | Ok(stream) => { 72 | tokens(stream.summary()); 73 | match parse(Numbers::get(), stream.cursor(), &report) { 74 | Ok(v) => fp64(format!("{v:#?}")), 75 | Err(fatal) => stderr(fatal.to_string()), 76 | } 77 | } 78 | 79 | Err(fatal) => stderr(fatal.to_string()), 80 | } 81 | } 82 | 83 | fn parse( 84 | lex: &Numbers, 85 | mut cursor: ilex::token::Cursor, 86 | report: &Report, 87 | ) -> Result, ilex::report::Fatal> { 88 | let numbers = cursor 89 | .delimited(lex.comma, |cursor| loop { 90 | let value = token::switch() 91 | .case(Lexeme::eof(), |_, _| Err(false)) 92 | .cases([lex.dec, lex.bin, lex.oct, lex.hex, lex.qua], |num, _| { 93 | Ok(num.to_float::(.., report).unwrap()) 94 | }) 95 | .take(cursor, report); 96 | match value { 97 | None => { 98 | cursor.back_up(1); 99 | return Some(Fp64::nan()); 100 | } 101 | Some(Err(false)) => return None, 102 | Some(Err(true)) => continue, 103 | Some(Ok(v)) => return Some(v), 104 | } 105 | }) 106 | .map(|(v, _)| v) 107 | .collect::>(); 108 | cursor.expect_finished(report); 109 | report.fatal_or(numbers) 110 | } 111 | -------------------------------------------------------------------------------- /ilex/tests/numbers/numbers.fp64.txt: -------------------------------------------------------------------------------- 1 | [ 2 | 0.0, 3 | -0.0, 4 | -0.0, 5 | 1.23456e+80, 6 | 9.0e+9, 7 | -9.0e+9, 8 | 9.0e+9, 9 | 8.9999999999999995e-9, 10 | -511.0, 11 | 511.0, 12 | 4.0, 13 | 0.0048828125, 14 | 0.0048828125, 15 | 2.03125, 16 | 4194232.0, 17 | 3735928559.0, 18 | -27.10546875, 19 | 3.0e+10, 20 | ] -------------------------------------------------------------------------------- /ilex/tests/numbers/numbers.tokens.yaml: -------------------------------------------------------------------------------- 1 | - ident: 2 | lexeme: 5 3 | span: {span: [0, 1], text: "0"} 4 | radix: 10 5 | blocks: [{span: [0, 1], text: "0"}] 6 | exponents: [] 7 | - keyword: 8 | lexeme: 0 9 | span: {span: [1, 2], text: ","} 10 | - ident: 11 | lexeme: 4 12 | span: {span: [3, 6], text: "-00"} 13 | prefix: {span: [3, 4], text: "-"} 14 | radix: 8 15 | sign: "-" 16 | blocks: [{span: [5, 6], text: "0"}] 17 | exponents: [] 18 | - keyword: 19 | lexeme: 0 20 | span: {span: [6, 7], text: ","} 21 | - ident: 22 | lexeme: 5 23 | span: {span: [8, 12], text: "-0.0"} 24 | radix: 10 25 | sign: "-" 26 | blocks: 27 | - {span: [9, 10], text: "0"} 28 | - {span: [11, 12], text: "0"} 29 | exponents: [] 30 | - keyword: 31 | lexeme: 0 32 | span: {span: [12, 13], text: ","} 33 | - ident: 34 | lexeme: 5 35 | span: {span: [14, 24], text: "123.456e78"} 36 | radix: 10 37 | blocks: 38 | - {span: [14, 17], text: "123"} 39 | - {span: [18, 21], text: "456"} 40 | exponents: 41 | - span: {span: [14, 24], text: "123.456e78"} 42 | prefix: {span: [21, 22], text: "e"} 43 | radix: 10 44 | blocks: [{span: [22, 24], text: "78"}] 45 | - keyword: 46 | lexeme: 0 47 | span: {span: [24, 25], text: ","} 48 | - ident: 49 | lexeme: 5 50 | span: {span: [26, 29], text: "9e9"} 51 | radix: 10 52 | blocks: [{span: [26, 27], text: "9"}] 53 | exponents: 54 | - span: {span: [26, 29], text: "9e9"} 55 | prefix: {span: [27, 28], text: "e"} 56 | radix: 10 57 | blocks: [{span: [28, 29], text: "9"}] 58 | - keyword: 59 | lexeme: 0 60 | span: {span: [29, 30], text: ","} 61 | - ident: 62 | lexeme: 5 63 | span: {span: [31, 35], text: "-9e9"} 64 | radix: 10 65 | sign: "-" 66 | blocks: [{span: [32, 33], text: "9"}] 67 | exponents: 68 | - span: {span: [31, 35], text: "-9e9"} 69 | prefix: {span: [33, 34], text: "e"} 70 | radix: 10 71 | blocks: [{span: [34, 35], text: "9"}] 72 | - keyword: 73 | lexeme: 0 74 | span: {span: [35, 36], text: ","} 75 | - ident: 76 | lexeme: 5 77 | span: {span: [37, 42], text: "+9e+9"} 78 | radix: 10 79 | sign: "+" 80 | blocks: [{span: [38, 39], text: "9"}] 81 | exponents: 82 | - span: {span: [37, 42], text: "+9e+9"} 83 | prefix: {span: [39, 40], text: "e"} 84 | radix: 10 85 | sign: "+" 86 | blocks: [{span: [41, 42], text: "9"}] 87 | - keyword: 88 | lexeme: 0 89 | span: {span: [42, 43], text: ","} 90 | - ident: 91 | lexeme: 5 92 | span: {span: [44, 48], text: "9e-9"} 93 | radix: 10 94 | blocks: [{span: [44, 45], text: "9"}] 95 | exponents: 96 | - span: {span: [44, 48], text: "9e-9"} 97 | prefix: {span: [45, 46], text: "e"} 98 | radix: 10 99 | sign: "-" 100 | blocks: [{span: [47, 48], text: "9"}] 101 | - keyword: 102 | lexeme: 0 103 | span: {span: [48, 49], text: ","} 104 | - ident: 105 | lexeme: 4 106 | span: {span: [50, 55], text: "-0777"} 107 | prefix: {span: [50, 51], text: "-"} 108 | radix: 8 109 | sign: "-" 110 | blocks: [{span: [52, 55], text: "777"}] 111 | exponents: [] 112 | - keyword: 113 | lexeme: 0 114 | span: {span: [55, 56], text: ","} 115 | - ident: 116 | lexeme: 4 117 | span: {span: [57, 62], text: "0o777"} 118 | prefix: {span: [57, 59], text: "0o"} 119 | radix: 8 120 | blocks: [{span: [59, 62], text: "777"}] 121 | exponents: [] 122 | - keyword: 123 | lexeme: 0 124 | span: {span: [62, 63], text: ","} 125 | - ident: 126 | lexeme: 1 127 | span: {span: [64, 69], text: "%1210"} 128 | prefix: {span: [64, 65], text: "%"} 129 | radix: 2 130 | blocks: [{span: [65, 66], text: "1"}] 131 | exponents: 132 | - span: {span: [64, 69], text: "%1210"} 133 | prefix: {span: [66, 67], text: "2"} 134 | radix: 2 135 | blocks: [{span: [67, 69], text: "10"}] 136 | - keyword: 137 | lexeme: 0 138 | span: {span: [69, 70], text: ","} 139 | - ident: 140 | lexeme: 1 141 | span: {span: [71, 85], text: "0b0.0000000101"} 142 | prefix: {span: [71, 73], text: "0b"} 143 | radix: 2 144 | blocks: 145 | - {span: [73, 74], text: "0"} 146 | - {span: [75, 85], text: "0000000101"} 147 | exponents: [] 148 | - keyword: 149 | lexeme: 0 150 | span: {span: [85, 86], text: ","} 151 | - ident: 152 | lexeme: 4 153 | span: {span: [87, 95], text: "0o0.0024"} 154 | prefix: {span: [87, 89], text: "0o"} 155 | radix: 8 156 | blocks: 157 | - {span: [89, 90], text: "0"} 158 | - {span: [91, 95], text: "0024"} 159 | exponents: [] 160 | - keyword: 161 | lexeme: 0 162 | span: {span: [95, 96], text: ","} 163 | - ident: 164 | lexeme: 4 165 | span: {span: [97, 106], text: "0O1.01p01"} 166 | prefix: {span: [97, 99], text: "0O"} 167 | radix: 8 168 | blocks: 169 | - {span: [99, 100], text: "1"} 170 | - {span: [101, 103], text: "01"} 171 | exponents: 172 | - span: {span: [97, 106], text: "0O1.01p01"} 173 | prefix: {span: [103, 104], text: "p"} 174 | radix: 10 175 | blocks: [{span: [104, 106], text: "01"}] 176 | - keyword: 177 | lexeme: 0 178 | span: {span: [106, 107], text: ","} 179 | - ident: 180 | lexeme: 2 181 | span: {span: [108, 120], text: "0xfff.eep+10"} 182 | prefix: {span: [108, 110], text: "0x"} 183 | radix: 16 184 | blocks: 185 | - {span: [110, 113], text: "fff"} 186 | - {span: [114, 116], text: "ee"} 187 | exponents: 188 | - span: {span: [108, 120], text: "0xfff.eep+10"} 189 | prefix: {span: [116, 117], text: "p"} 190 | radix: 10 191 | sign: "+" 192 | blocks: [{span: [118, 120], text: "10"}] 193 | - keyword: 194 | lexeme: 0 195 | span: {span: [120, 121], text: ","} 196 | - ident: 197 | lexeme: 2 198 | span: {span: [122, 131], text: "$DEADBEEF"} 199 | prefix: {span: [122, 123], text: "$"} 200 | radix: 16 201 | blocks: [{span: [123, 131], text: "DEADBEEF"}] 202 | exponents: [] 203 | - keyword: 204 | lexeme: 0 205 | span: {span: [131, 132], text: ","} 206 | - ident: 207 | lexeme: 3 208 | span: {span: [133, 145], text: "-0q0123.0123"} 209 | prefix: {span: [133, 135], text: "-0"} 210 | radix: 4 211 | sign: "-" 212 | blocks: 213 | - {span: [136, 140], text: "0123"} 214 | - {span: [141, 145], text: "0123"} 215 | exponents: [] 216 | - keyword: 217 | lexeme: 0 218 | span: {span: [145, 146], text: ","} 219 | - ident: 220 | lexeme: 5 221 | span: {span: [147, 150], text: "3^a"} 222 | radix: 10 223 | blocks: [{span: [147, 148], text: "3"}] 224 | exponents: 225 | - span: {span: [147, 150], text: "3^a"} 226 | prefix: {span: [148, 149], text: "^"} 227 | radix: 16 228 | blocks: [{span: [149, 150], text: "a"}] 229 | - keyword: 230 | lexeme: 0 231 | span: {span: [150, 151], text: ","} 232 | - eof: 233 | lexeme: 2147483647 234 | span: {span: [151, 151], text: ""} 235 | -------------------------------------------------------------------------------- /ilex/tests/numbers/numbers.txt: -------------------------------------------------------------------------------- 1 | 0, 2 | -00, 3 | -0.0, 4 | 123.456e78, 5 | 9e9, 6 | -9e9, 7 | +9e+9, 8 | 9e-9, 9 | -0777, 10 | 0o777, 11 | %1210, 12 | 0b0.0000000101, 13 | 0o0.0024, 14 | 0O1.01p01, 15 | 0xfff.eep+10, 16 | $DEADBEEF, 17 | -0q0123.0123, 18 | 3^a, -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/idents.stderr: -------------------------------------------------------------------------------- 1 | error: unexpected `b` in `/`-suffixed number 2 | --> ambiguous/idents.txt:1:6 3 | | 4 | 1 | /foo/bar/ 5 | | ^ 6 | | --- help: because this value is decimal (base 10), digits should be within '0'..='9' 7 | | 8 | = note: reported at: ilex/src/rt/emit2.rs:593:34 9 | 10 | error: unexpected `a` in `/`-suffixed number 11 | --> ambiguous/idents.txt:1:7 12 | | 13 | 1 | /foo/bar/ 14 | | ^ 15 | | --- help: because this value is decimal (base 10), digits should be within '0'..='9' 16 | | 17 | = note: reported at: ilex/src/rt/emit2.rs:593:34 18 | 19 | error: unexpected `r` in `/`-suffixed number 20 | --> ambiguous/idents.txt:1:8 21 | | 22 | 1 | /foo/bar/ 23 | | ^ 24 | | --- help: because this value is decimal (base 10), digits should be within '0'..='9' 25 | | 26 | = note: reported at: ilex/src/rt/emit2.rs:593:34 27 | 28 | error: aborting due to 3 errors 29 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/idents.txt: -------------------------------------------------------------------------------- 1 | /foo/bar/ 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_br.stderr: -------------------------------------------------------------------------------- 1 | error: unexpected closing `able` 2 | --> ambiguous/no_xid_after_br.txt:1:28 3 | | 4 | 1 | $[] $null[]null $null[]nullable 5 | | ^^^^ expected to be opened by `--able` 6 | | 7 | = note: reported at: ilex/src/rt/emit2.rs:254:22 8 | 9 | error: aborting due to previous error 10 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_br.txt: -------------------------------------------------------------------------------- 1 | $[] $null[]null $null[]nullable 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_cm.stderr: -------------------------------------------------------------------------------- 1 | error: extraneous characters after `--null ... null` 2 | --> ambiguous/no_xid_after_cm.txt:1:51 3 | | 4 | 1 | --null some stuff null --null some more stuff nullnull 5 | | ^^^^ 6 | | -- help: maybe you meant to include a space here 7 | | 8 | = note: reported at: ilex/src/rt/emit2.rs:810:10 9 | 10 | error: aborting due to previous error 11 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_cm.txt: -------------------------------------------------------------------------------- 1 | --null some stuff null --null some more stuff nullnull -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_id.stderr: -------------------------------------------------------------------------------- 1 | error: extraneous characters after `/`-prefixed, `%q`-suffixed identifier 2 | --> ambiguous/no_xid_after_id.txt:1:22 3 | | 4 | 1 | /foo%q /null%q /foo%qua 5 | | ^^ 6 | | -- help: maybe you meant to include a space here 7 | | 8 | = note: reported at: ilex/src/rt/emit2.rs:810:10 9 | 10 | error: aborting due to previous error 11 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_id.txt: -------------------------------------------------------------------------------- 1 | /foo%q /null%q /foo%qua 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_kw.stderr: -------------------------------------------------------------------------------- 1 | error: unexpected closing `nullable` 2 | --> ambiguous/no_xid_after_kw.txt:1:6 3 | | 4 | 1 | null nullable 5 | | ^^^^^^^^ expected to be opened by `--nullable` 6 | | 7 | = note: reported at: ilex/src/rt/emit2.rs:254:22 8 | 9 | error: aborting due to previous error 10 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_kw.txt: -------------------------------------------------------------------------------- 1 | null nullable 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_nm.stderr: -------------------------------------------------------------------------------- 1 | error: unexpected `q` in `%`-prefixed number 2 | --> ambiguous/no_xid_after_nm.txt:1:10 3 | | 4 | 1 | %123 %123qua 5 | | ^ 6 | | ------- help: because this value is decimal (base 10), digits should be within '0'..='9' 7 | | 8 | = note: reported at: ilex/src/rt/emit2.rs:593:34 9 | 10 | error: unexpected `u` in `%`-prefixed number 11 | --> ambiguous/no_xid_after_nm.txt:1:11 12 | | 13 | 1 | %123 %123qua 14 | | ^ 15 | | ------- help: because this value is decimal (base 10), digits should be within '0'..='9' 16 | | 17 | = note: reported at: ilex/src/rt/emit2.rs:593:34 18 | 19 | error: unexpected `a` in `%`-prefixed number 20 | --> ambiguous/no_xid_after_nm.txt:1:12 21 | | 22 | 1 | %123 %123qua 23 | | ^ 24 | | ------- help: because this value is decimal (base 10), digits should be within '0'..='9' 25 | | 26 | = note: reported at: ilex/src/rt/emit2.rs:593:34 27 | 28 | error: aborting due to 3 errors 29 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_nm.txt: -------------------------------------------------------------------------------- 1 | %123 %123qua 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_st.stderr: -------------------------------------------------------------------------------- 1 | error: extraneous characters after `%'...'q` 2 | --> ambiguous/no_xid_after_st.txt:1:16 3 | | 4 | 1 | q'xyz'q %'xyz'qua 5 | | ^^ 6 | | -- help: maybe you meant to include a space here 7 | | 8 | = note: reported at: ilex/src/rt/emit2.rs:810:10 9 | 10 | error: aborting due to previous error 11 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/no_xid_after_st.txt: -------------------------------------------------------------------------------- 1 | q'xyz'q %'xyz'qua -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/nums.stderr: -------------------------------------------------------------------------------- 1 | error: unexpected closing `xyz` 2 | --> ambiguous/nums.txt:1:16 3 | | 4 | 1 | 1234%1234 1234/xyz 5 | | ^^^ expected to be opened by `--xyz` 6 | | 7 | = note: reported at: ilex/src/rt/emit2.rs:254:22 8 | 9 | error: aborting due to previous error 10 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/nums.txt: -------------------------------------------------------------------------------- 1 | 1234%1234 1234/xyz 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/symbols_after_comment.tokens.yaml: -------------------------------------------------------------------------------- 1 | - keyword: 2 | lexeme: 0 3 | span: {span: [14, 18], text: "null"} 4 | - keyword: 5 | lexeme: 1 6 | span: {span: [37, 42], text: "-null"} 7 | - keyword: 8 | lexeme: 0 9 | span: {span: [43, 47], text: "null"} 10 | - eof: 11 | lexeme: 2147483647 12 | span: {span: [48, 48], text: ""} 13 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/symbols_after_comment.txt: -------------------------------------------------------------------------------- 1 | -/ comment /- null -/ more comment /--null null 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/symbols_after_quoted.tokens.yaml: -------------------------------------------------------------------------------- 1 | - quoted: 2 | lexeme: 9 3 | span: {span: [0, 12], text: "qnull(a)null"} 4 | delims: 5 | - {span: [0, 6], text: "qnull("} 6 | - {span: [7, 12], text: ")null"} 7 | contents: [{text: {span: [6, 7], text: "a"}}] 8 | - eof: 9 | lexeme: 2147483647 10 | span: {span: [13, 13], text: ""} 11 | -------------------------------------------------------------------------------- /ilex/tests/ui/ambiguous/symbols_after_quoted.txt: -------------------------------------------------------------------------------- 1 | qnull(a)null 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/digital/invalid.stderr: -------------------------------------------------------------------------------- 1 | error: unexpected `8` in `0o`-prefixed number 2 | --> digital/invalid.txt:2:3 3 | | 4 | 2 | 0o8 5 | | ^ 6 | | --- help: because this value is octal (base 8), digits should be within '0'..='7' 7 | | 8 | = note: reported at: ilex/src/rt/emit2.rs:593:34 9 | 10 | error: unexpected `8` in `0o`-prefixed number 11 | --> digital/invalid.txt:3:4 12 | | 13 | 3 | 0o08 14 | | ^ 15 | | ---- help: because this value is octal (base 8), digits should be within '0'..='7' 16 | | 17 | = note: reported at: ilex/src/rt/emit2.rs:593:34 18 | 19 | error: unexpected `a` in number 20 | --> digital/invalid.txt:4:5 21 | | 22 | 4 | 0/0/aa11g 23 | | ^ 24 | | --------- help: because this value is decimal (base 10), digits should be within '0'..='9' 25 | | 26 | = note: reported at: ilex/src/rt/emit2.rs:593:34 27 | 28 | error: unexpected `a` in number 29 | --> digital/invalid.txt:4:6 30 | | 31 | 4 | 0/0/aa11g 32 | | ^ 33 | | --------- help: because this value is decimal (base 10), digits should be within '0'..='9' 34 | | 35 | = note: reported at: ilex/src/rt/emit2.rs:593:34 36 | 37 | error: unexpected `g` in number 38 | --> digital/invalid.txt:4:9 39 | | 40 | 4 | 0/0/aa11g 41 | | ^ 42 | | --------- help: because this value is decimal (base 10), digits should be within '0'..='9' 43 | | 44 | = note: reported at: ilex/src/rt/emit2.rs:593:34 45 | 46 | error: aborting due to 5 errors 47 | -------------------------------------------------------------------------------- /ilex/tests/ui/digital/invalid.txt: -------------------------------------------------------------------------------- 1 | 0o777 2 | 0o8 3 | 0o08 4 | 0/0/aa11g 5 | -------------------------------------------------------------------------------- /ilex/tests/ui/digital/missing.stderr: -------------------------------------------------------------------------------- 1 | error: expected digits after `0x`, but found ` ` 2 | --> digital/missing.txt:2:3 3 | | 4 | 2 | 0x 0xf 5 | | ^ expected digits after `0x` 6 | | ^^ because of this prefix 7 | | 8 | = note: reported at: ilex/src/rt/emit2.rs:571:18 9 | 10 | error: aborting due to previous error 11 | -------------------------------------------------------------------------------- /ilex/tests/ui/digital/missing.txt: -------------------------------------------------------------------------------- 1 | 0xdeadbeef 2 | 0x 0xf 3 | -------------------------------------------------------------------------------- /ilex/tests/ui/digital/points.stderr: -------------------------------------------------------------------------------- 1 | error: expected at least 2 `/`s 2 | --> digital/points.txt:2:7 3 | | 4 | 2 | 1/2/3/4e4/5 5 | | ^ 6 | | 7 | = note: reported at: ilex/src/rt/emit2.rs:554:16 8 | 9 | error: unrecognized character 10 | --> digital/points.txt:2:6 11 | | 12 | 2 | 1/2/3/4e4/5 13 | | ^ 14 | | 15 | = note: reported at: ilex/src/rt/mod.rs:36:8 16 | 17 | error: expected at least 2 `/`s 18 | --> digital/points.txt:3:1 19 | | 20 | 3 | 1/2e4/5 21 | | ^^^ 22 | | 23 | = note: reported at: ilex/src/rt/emit2.rs:554:16 24 | 25 | error: expected at least 2 `/`s 26 | --> digital/points.txt:4:11 27 | | 28 | 4 | 1/2/3e4/5/6 29 | | ^ 30 | | 31 | = note: reported at: ilex/src/rt/emit2.rs:554:16 32 | 33 | error: unrecognized character 34 | --> digital/points.txt:4:10 35 | | 36 | 4 | 1/2/3e4/5/6 37 | | ^ 38 | | 39 | = note: reported at: ilex/src/rt/mod.rs:36:8 40 | 41 | error: expected at least 1 `/` 42 | --> digital/points.txt:5:6 43 | | 44 | 5 | 1/2/3e4 45 | | ^^ 46 | | 47 | = note: reported at: ilex/src/rt/emit2.rs:554:16 48 | 49 | error: aborting due to 6 errors 50 | -------------------------------------------------------------------------------- /ilex/tests/ui/digital/points.txt: -------------------------------------------------------------------------------- 1 | 1/2/3e4/5 2 | 1/2/3/4e4/5 3 | 1/2e4/5 4 | 1/2/3e4/5/6 5 | 1/2/3e4 -------------------------------------------------------------------------------- /ilex/tests/ui/digital/separators.stderr: -------------------------------------------------------------------------------- 1 | error: unexpected digit separator in `no_prefix@`-prefixed number 2 | --> digital/separators.txt:2:11 3 | | 4 | 2 | no_prefix@_123_._456_e_789_._012_ 5 | | ^ 6 | | 7 | = note: reported at: ilex/src/rt/emit2.rs:418:36 8 | 9 | error: unexpected digit separator in `no_suffix@`-prefixed number 10 | --> digital/separators.txt:3:33 11 | | 12 | 3 | no_suffix@_123_._456_e_789_._012_ 13 | | ^ 14 | | 15 | = note: reported at: ilex/src/rt/emit2.rs:505:28 16 | 17 | error: unexpected digit separator in `no_point@`-prefixed number 18 | --> digital/separators.txt:4:15 19 | | 20 | 4 | no_point@_123_._456_e_789_._012_ 21 | | ^ 22 | | 23 | = note: reported at: ilex/src/rt/emit2.rs:435:32 24 | 25 | error: unexpected digit separator in `no_point@`-prefixed number 26 | --> digital/separators.txt:4:16 27 | | 28 | 4 | no_point@_123_._456_e_789_._012_ 29 | | ^ 30 | | 31 | = note: reported at: ilex/src/rt/emit2.rs:418:36 32 | 33 | error: unexpected digit separator in `no_point@`-prefixed number 34 | --> digital/separators.txt:4:27 35 | | 36 | 4 | no_point@_123_._456_e_789_._012_ 37 | | ^ 38 | | 39 | = note: reported at: ilex/src/rt/emit2.rs:435:32 40 | 41 | error: unexpected digit separator in `no_point@`-prefixed number 42 | --> digital/separators.txt:4:28 43 | | 44 | 4 | no_point@_123_._456_e_789_._012_ 45 | | ^ 46 | | 47 | = note: reported at: ilex/src/rt/emit2.rs:418:36 48 | 49 | error: unexpected digit separator in `no_exp@`-prefixed number 50 | --> digital/separators.txt:5:19 51 | | 52 | 5 | no_exp@_123_._456_e_789_._012_ 53 | | ^ 54 | | 55 | = note: reported at: ilex/src/rt/emit2.rs:455:34 56 | 57 | error: unexpected digit separator in `no_exp@`-prefixed number 58 | --> digital/separators.txt:5:20 59 | | 60 | 5 | no_exp@_123_._456_e_789_._012_ 61 | | ^ 62 | | 63 | = note: reported at: ilex/src/rt/emit2.rs:418:36 64 | 65 | error: aborting due to 8 errors 66 | -------------------------------------------------------------------------------- /ilex/tests/ui/digital/separators.txt: -------------------------------------------------------------------------------- 1 | all_ok@_123_._456_e_789_._012_ 2 | no_prefix@_123_._456_e_789_._012_ 3 | no_suffix@_123_._456_e_789_._012_ 4 | no_point@_123_._456_e_789_._012_ 5 | no_exp@_123_._456_e_789_._012_ -------------------------------------------------------------------------------- /ilex/tests/ui/eof/bracket.stderr: -------------------------------------------------------------------------------- 1 | error: expected closing `]`, but found 2 | --> eof/bracket.txt:1:11 3 | | 4 | 1 | [[[]]] [[] 5 | | ^ expected `]` here 6 | | - help: previously opened here 7 | | 8 | = note: reported at: ilex/src/rt/lexer.rs:323:10 9 | 10 | error: aborting due to previous error 11 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/bracket.txt: -------------------------------------------------------------------------------- 1 | [[[]]] [[] 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/bracket_multiline.stderr: -------------------------------------------------------------------------------- 1 | error: expected closing `]`, but found 2 | --> eof/bracket_multiline.txt:3:3 3 | | 4 | 3 | ][ 5 | | ^ expected `]` here 6 | | - help: previously opened here 7 | | 8 | = note: reported at: ilex/src/rt/lexer.rs:323:10 9 | 10 | error: aborting due to previous error 11 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/bracket_multiline.txt: -------------------------------------------------------------------------------- 1 | [ 2 | [] 3 | ][ -------------------------------------------------------------------------------- /ilex/tests/ui/eof/comment.stderr: -------------------------------------------------------------------------------- 1 | error: expected closing `*/`, but found 2 | --> eof/comment.txt:1:38 3 | | 4 | 1 | /* ok /* nested */ */ /* /* not ok */ 5 | | ^ expected `*/` here 6 | | -- help: previously opened here 7 | | 8 | = note: reported at: ilex/src/rt/emit2.rs:331:14 9 | 10 | error: aborting due to previous error 11 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/comment.txt: -------------------------------------------------------------------------------- 1 | /* ok /* nested */ */ /* /* not ok */ -------------------------------------------------------------------------------- /ilex/tests/ui/eof/comment_multiline.stderr: -------------------------------------------------------------------------------- 1 | error: expected closing `*/`, but found 2 | --> eof/comment_multiline.txt:4:15 3 | | 4 | 3 | /* 5 | | -- help: previously opened here 6 | 4 | /* not ok */ 7 | | ^ expected `*/` here 8 | | 9 | = note: reported at: ilex/src/rt/emit2.rs:331:14 10 | 11 | error: aborting due to previous error 12 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/comment_multiline.txt: -------------------------------------------------------------------------------- 1 | /* ok 2 | /* nested */ */ 3 | /* 4 | /* not ok */ 5 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/mixed_brackets.stderr: -------------------------------------------------------------------------------- 1 | error: unexpected closing `)` 2 | --> eof/mixed_brackets.txt:1:8 3 | | 4 | 1 | [] () [) (] [(]) 5 | | ^ expected to be opened by `(` 6 | | 7 | = note: reported at: ilex/src/rt/emit2.rs:254:22 8 | 9 | error: expected closing `)`, but found `]` 10 | --> eof/mixed_brackets.txt:1:11 11 | | 12 | 1 | [] () [) (] [(]) 13 | | ^ expected `)` here 14 | | - help: previously opened here 15 | | 16 | = note: reported at: ilex/src/rt/lexer.rs:207:23 17 | 18 | error: expected closing `)`, but found `]` 19 | --> eof/mixed_brackets.txt:1:15 20 | | 21 | 1 | [] () [) (] [(]) 22 | | ^ expected `)` here 23 | | - help: previously opened here 24 | | 25 | = note: reported at: ilex/src/rt/lexer.rs:207:23 26 | 27 | error: expected closing `)`, but found 28 | --> eof/mixed_brackets.txt:1:17 29 | | 30 | 1 | [] () [) (] [(]) 31 | | ^ expected `)` here 32 | | - help: previously opened here 33 | | 34 | = note: reported at: ilex/src/rt/lexer.rs:323:10 35 | 36 | error: aborting due to 4 errors 37 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/mixed_brackets.txt: -------------------------------------------------------------------------------- 1 | [] () [) (] [(]) -------------------------------------------------------------------------------- /ilex/tests/ui/eof/mixed_brackets_multiline.stderr: -------------------------------------------------------------------------------- 1 | error: expected closing `)`, but found `]` 2 | --> eof/mixed_brackets_multiline.txt:6:3 3 | | 4 | 5 | ( 5 | | - help: previously opened here 6 | 6 | ] 7 | | ^ expected `)` here 8 | | 9 | = note: reported at: ilex/src/rt/lexer.rs:207:23 10 | 11 | error: unexpected closing `)` 12 | --> eof/mixed_brackets_multiline.txt:9:3 13 | | 14 | 9 | ) 15 | | ^ expected to be opened by `(` 16 | | 17 | = note: reported at: ilex/src/rt/emit2.rs:254:22 18 | 19 | error: expected closing `)`, but found `]` 20 | --> eof/mixed_brackets_multiline.txt:11:1 21 | | 22 | 10 | ( 23 | | - help: previously opened here 24 | 11 | ] 25 | | ^ expected `)` here 26 | | 27 | = note: reported at: ilex/src/rt/lexer.rs:207:23 28 | 29 | error: expected closing `)`, but found 30 | --> eof/mixed_brackets_multiline.txt:11:2 31 | | 32 | 10 | ( 33 | | - help: previously opened here 34 | 11 | ] 35 | | ^ expected `)` here 36 | | 37 | = note: reported at: ilex/src/rt/lexer.rs:323:10 38 | 39 | error: aborting due to 4 errors 40 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/mixed_brackets_multiline.txt: -------------------------------------------------------------------------------- 1 | [ 2 | () 3 | ] 4 | [ 5 | ( 6 | ] 7 | ) 8 | [ 9 | ) 10 | ( 11 | ] -------------------------------------------------------------------------------- /ilex/tests/ui/eof/quoted.stderr: -------------------------------------------------------------------------------- 1 | error: expected closing `'`, but found 2 | --> eof/quoted.txt:1:14 3 | | 4 | 1 | 'foo' '' 'bar 5 | | ^ expected `'` here 6 | | - help: previously opened here 7 | | 8 | = note: reported at: ilex/src/rt/emit2.rs:722:14 9 | 10 | error: aborting due to previous error 11 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/quoted.txt: -------------------------------------------------------------------------------- 1 | 'foo' '' 'bar 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/quoted_multiline.stderr: -------------------------------------------------------------------------------- 1 | error: expected closing `'`, but found 2 | --> eof/quoted_multiline.txt:3:5 3 | | 4 | 3 | 'bar 5 | | ^ expected `'` here 6 | | - help: previously opened here 7 | | 8 | = note: reported at: ilex/src/rt/emit2.rs:722:14 9 | 10 | error: aborting due to previous error 11 | -------------------------------------------------------------------------------- /ilex/tests/ui/eof/quoted_multiline.txt: -------------------------------------------------------------------------------- 1 | 'foo' 2 | '' 3 | 'bar 4 | -------------------------------------------------------------------------------- /ilex/tests/ui/main.rs: -------------------------------------------------------------------------------- 1 | use ilex::report::Options; 2 | use ilex::rule::*; 3 | use ilex::Context; 4 | use ilex::Lexeme; 5 | 6 | #[gilded::test("tests/ui/ambiguous/*.txt")] 7 | fn ambiguous(test: &gilded::Test) { 8 | #[ilex::spec] 9 | struct Spec { 10 | #[rule("null")] 11 | kw: Lexeme, 12 | #[rule("-null")] 13 | kw2: Lexeme, 14 | #[rule(")null")] 15 | kw3: Lexeme, 16 | 17 | #[rule(Comment::nesting(Bracket::rust_style( 18 | "/", 19 | ("-", ""), 20 | ("", "-"), 21 | )))] 22 | cm: Lexeme, 23 | #[rule(Comment::nesting(Bracket::cxx_style( 24 | Ident::new().min_len(1), 25 | ("--", ""), 26 | ("", ""), 27 | )))] 28 | cm2: Lexeme, 29 | #[rule(Bracket::cxx_style( 30 | Ident::new(), 31 | ("$", "["), 32 | ("]", ""), 33 | ))] 34 | br: Lexeme, 35 | #[rule(Ident::new() 36 | .prefix("/") 37 | .suffixes(["", "%q", "/"]))] 38 | id: Lexeme, 39 | #[rule(Digital::new(10) 40 | .prefixes(["", "%"]) 41 | .suffixes(["", "%", "q", "/"]))] 42 | nm: Lexeme, 43 | #[rule(Quoted::new("'") 44 | .prefixes(["%", "q"]) 45 | .suffixes(["", "%", "q"]))] 46 | st: Lexeme, 47 | #[rule(Quoted::with(Bracket::cxx_style( 48 | Ident::new(), 49 | ("q", "("), 50 | (")", ""), 51 | )))] 52 | st2: Lexeme, 53 | } 54 | 55 | let ctx = Context::new(); 56 | let report = 57 | ctx.new_report_with(Options { color: false, ..Default::default() }); 58 | let file = ctx 59 | .new_file_from_bytes(test.path(), test.text(), &report) 60 | .unwrap(); 61 | 62 | let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); 63 | match file.lex(Spec::get().spec(), &report) { 64 | Ok(stream) => tokens(stream.summary()), 65 | Err(fatal) => stderr(fatal.to_string()), 66 | } 67 | } 68 | 69 | #[gilded::test("tests/ui/digital/*.txt")] 70 | fn digital(test: &gilded::Test) { 71 | #[ilex::spec] 72 | struct Spec { 73 | #[rule(Digital::new(16).prefix("0x"))] 74 | m1: Lexeme, 75 | #[rule(Digital::new(8).prefix("0o"))] 76 | m2: Lexeme, 77 | 78 | #[rule( Digital::new(10) 79 | .point_limit(2..3) 80 | .point('/') 81 | .exponent("e", Digits::new(10).point_limit(1..2)) 82 | .separator_with("_", 83 | SeparatorCornerCases { 84 | prefix: true, 85 | suffix: true, 86 | around_point: true, 87 | around_exp: true, 88 | }))] 89 | m0: Lexeme, 90 | #[rule(Digital::new(10) 91 | .prefix("all_ok@") 92 | .point_limit(0..3) 93 | .exponent("e", Digits::new(10).point_limit(0..3)) 94 | .separator_with("_", 95 | SeparatorCornerCases { 96 | prefix: true, 97 | suffix: true, 98 | around_point: true, 99 | around_exp: true, 100 | }))] 101 | n0: Lexeme, 102 | #[rule( Digital::new(10) 103 | .prefix("no_prefix@") 104 | .point_limit(0..3) 105 | .exponent("e", Digits::new(10).point_limit(0..3)) 106 | .separator_with("_", 107 | SeparatorCornerCases { 108 | prefix: false, 109 | suffix: true, 110 | around_point: true, 111 | around_exp: true, 112 | }))] 113 | n1: Lexeme, 114 | #[rule(Digital::new(10) 115 | .prefix("no_suffix@") 116 | .point_limit(0..3) 117 | .exponent("e", Digits::new(10).point_limit(0..3)) 118 | .separator_with("_", 119 | SeparatorCornerCases { 120 | prefix: true, 121 | suffix: false, 122 | around_point: true, 123 | around_exp: true, 124 | }))] 125 | n2: Lexeme, 126 | #[rule( Digital::new(10) 127 | .prefix("no_point@") 128 | .point_limit(0..3) 129 | .exponent("e", Digits::new(10).point_limit(0..3)) 130 | .separator_with("_", 131 | SeparatorCornerCases { 132 | prefix: true, 133 | suffix: true, 134 | around_point: false, 135 | around_exp: true, 136 | }))] 137 | n3: Lexeme, 138 | #[rule(Digital::new(10) 139 | .prefix("no_exp@") 140 | .point_limit(0..3) 141 | .exponent("e", Digits::new(10).point_limit(0..3)) 142 | .separator_with("_", 143 | SeparatorCornerCases { 144 | prefix: true, 145 | suffix: true, 146 | around_point: true, 147 | around_exp: false, 148 | }))] 149 | n4: Lexeme, 150 | } 151 | 152 | let ctx = Context::new(); 153 | let report = 154 | ctx.new_report_with(Options { color: false, ..Default::default() }); 155 | let file = ctx 156 | .new_file_from_bytes(test.path(), test.text(), &report) 157 | .unwrap(); 158 | 159 | let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); 160 | match file.lex(Spec::get().spec(), &report) { 161 | Ok(stream) => tokens(stream.summary()), 162 | Err(fatal) => stderr(fatal.to_string()), 163 | } 164 | } 165 | 166 | #[gilded::test("tests/ui/eof/*.txt")] 167 | fn eof(test: &gilded::Test) { 168 | #[ilex::spec] 169 | struct Spec { 170 | #[rule("/*", "*/")] 171 | c1: Lexeme, 172 | 173 | #[rule("[", "]")] 174 | b1: Lexeme, 175 | 176 | #[rule("(", ")")] 177 | b2: Lexeme, 178 | 179 | #[rule(Quoted::new("'"))] 180 | q1: Lexeme, 181 | } 182 | 183 | let ctx = Context::new(); 184 | let report = 185 | ctx.new_report_with(Options { color: false, ..Default::default() }); 186 | let file = ctx 187 | .new_file_from_bytes(test.path(), test.text(), &report) 188 | .unwrap(); 189 | 190 | let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); 191 | match file.lex(Spec::get().spec(), &report) { 192 | Ok(stream) => tokens(stream.summary()), 193 | Err(fatal) => stderr(fatal.to_string()), 194 | } 195 | } 196 | 197 | #[gilded::test("tests/ui/too_small/*.txt")] 198 | fn too_small(test: &gilded::Test) { 199 | #[ilex::spec] 200 | struct Spec { 201 | #[rule(Ident::new().prefix("%"))] 202 | i1: Lexeme, 203 | #[rule(Ident::new().prefix("$").min_len(3))] 204 | i2: Lexeme, 205 | 206 | #[rule(Bracket::rust_style("#", ("r#", "'"), ("'#", "")))] 207 | r1: Lexeme, 208 | #[rule(Bracket::rust_style("#", ("q###", "'"), ("'###", "")))] 209 | r2: Lexeme, 210 | 211 | #[rule(Bracket::cxx_style(Ident::new().min_len(1), ("R'", "("), (")", "'")))] 212 | c1: Lexeme, 213 | #[rule(Bracket::cxx_style(Ident::new().min_len(3), ("Q'", "("), (")", "'")))] 214 | c2: Lexeme, 215 | } 216 | 217 | let ctx = Context::new(); 218 | let report = 219 | ctx.new_report_with(Options { color: false, ..Default::default() }); 220 | let file = ctx 221 | .new_file_from_bytes(test.path(), test.text(), &report) 222 | .unwrap(); 223 | 224 | let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); 225 | match file.lex(Spec::get().spec(), &report) { 226 | Ok(stream) => tokens(stream.summary()), 227 | Err(fatal) => stderr(fatal.to_string()), 228 | } 229 | } 230 | 231 | #[gilded::test("tests/ui/unrecognized/*.txt")] 232 | fn unrecognized(test: &gilded::Test) { 233 | #[ilex::spec] 234 | struct Spec { 235 | null: Lexeme, 236 | 237 | #[rule("[", "]")] 238 | cm: Lexeme, 239 | } 240 | 241 | let ctx = Context::new(); 242 | let report = 243 | ctx.new_report_with(Options { color: false, ..Default::default() }); 244 | let file = ctx 245 | .new_file_from_bytes(test.path(), test.text(), &report) 246 | .unwrap(); 247 | 248 | let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); 249 | match file.lex(Spec::get().spec(), &report) { 250 | Ok(stream) => tokens(stream.summary()), 251 | Err(fatal) => stderr(fatal.to_string()), 252 | } 253 | } 254 | -------------------------------------------------------------------------------- /ilex/tests/ui/too_small/cxx_tag.stderr: -------------------------------------------------------------------------------- 1 | error: expected at least 1 character in identifier, but found none 2 | --> too_small/cxx_tag.txt:1:14 3 | | 4 | 1 | R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo' 5 | | ^ expected at least 1 here 6 | | 7 | = help: this appears to be an empty identifier 8 | = note: reported at: ilex/src/rt/emit2.rs:223:14 9 | 10 | error: expected at least 3 characters in identifier, but found only 2 11 | --> too_small/cxx_tag.txt:1:38 12 | | 13 | 1 | R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo' 14 | | ^^ expected at least 3 here 15 | | 16 | = note: reported at: ilex/src/rt/emit2.rs:223:14 17 | 18 | error: aborting due to 2 errors 19 | -------------------------------------------------------------------------------- /ilex/tests/ui/too_small/cxx_tag.txt: -------------------------------------------------------------------------------- 1 | R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo' 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/too_small/ident.stderr: -------------------------------------------------------------------------------- 1 | error: expected at least 3 characters in identifier, but found only 2 2 | --> too_small/ident.txt:1:13 3 | | 4 | 1 | %foo $bar % $oo 5 | | ^^^ expected at least 3 here 6 | | 7 | = note: reported at: ilex/src/rt/emit2.rs:346:28 8 | 9 | error: aborting due to previous error 10 | -------------------------------------------------------------------------------- /ilex/tests/ui/too_small/ident.txt: -------------------------------------------------------------------------------- 1 | %foo $bar % $oo 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/too_small/rust_hashes.stderr: -------------------------------------------------------------------------------- 1 | error: unrecognized characters 2 | --> too_small/rust_hashes.txt:1:10 3 | | 4 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'## 5 | | ^^^^^^ 6 | | 7 | = note: reported at: ilex/src/rt/mod.rs:36:8 8 | 9 | error: unexpected closing `'##` 10 | --> too_small/rust_hashes.txt:1:37 11 | | 12 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'## 13 | | ^^^ expected to be opened by `r##'` 14 | | 15 | = note: reported at: ilex/src/rt/emit2.rs:254:22 16 | 17 | error: unrecognized characters 18 | --> too_small/rust_hashes.txt:1:30 19 | | 20 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'## 21 | | ^^^^^^^ 22 | | 23 | = note: reported at: ilex/src/rt/mod.rs:36:8 24 | 25 | error: aborting due to 3 errors 26 | -------------------------------------------------------------------------------- /ilex/tests/ui/too_small/rust_hashes.txt: -------------------------------------------------------------------------------- 1 | r#'foo'# r'foo' q###'bar'### q##'bar'## 2 | -------------------------------------------------------------------------------- /ilex/tests/ui/unrecognized/unrecognized.stderr: -------------------------------------------------------------------------------- 1 | error: unrecognized characters 2 | --> unrecognized.txt:1:1 3 | | 4 | 1 | multiple, null, [unrecognized], chunks!~ 5 | | ^^^^^^^^^ 6 | | 7 | = note: reported at: ilex/src/rt/mod.rs:36:8 8 | 9 | error: unrecognized character 10 | --> unrecognized.txt:1:15 11 | | 12 | 1 | multiple, null, [unrecognized], chunks!~ 13 | | ^ 14 | | 15 | = note: reported at: ilex/src/rt/mod.rs:36:8 16 | 17 | error: unrecognized characters 18 | --> unrecognized.txt:1:18 19 | | 20 | 1 | multiple, null, [unrecognized], chunks!~ 21 | | ^^^^^^^^^^^^ 22 | | 23 | = note: reported at: ilex/src/rt/mod.rs:36:8 24 | 25 | error: unrecognized character 26 | --> unrecognized.txt:1:31 27 | | 28 | 1 | multiple, null, [unrecognized], chunks!~ 29 | | ^ 30 | | 31 | = note: reported at: ilex/src/rt/mod.rs:36:8 32 | 33 | error: unrecognized characters 34 | --> unrecognized.txt:1:33 35 | | 36 | 1 | multiple, null, [unrecognized], chunks!~ 37 | | ^^^^^^^^ 38 | | 39 | = note: reported at: ilex/src/rt/mod.rs:36:8 40 | 41 | error: aborting due to 5 errors 42 | -------------------------------------------------------------------------------- /ilex/tests/ui/unrecognized/unrecognized.txt: -------------------------------------------------------------------------------- 1 | multiple, null, [unrecognized], chunks!~ 2 | -------------------------------------------------------------------------------- /proc2decl/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "proc2decl" 3 | version = "0.1.0" 4 | description = "Write less frickin' proc macro code" 5 | 6 | edition.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | keywords.workspace = true 11 | license.workspace = true 12 | 13 | [dependencies] 14 | nu-glob = "0.101.0" 15 | unicode-xid = "0.2.6" 16 | walkdir = "2.5.0" 17 | -------------------------------------------------------------------------------- /proc2decl/README.md: -------------------------------------------------------------------------------- 1 | # proc2decl 2 | 3 | `proc2decl` exists for one reason only: because proc macros are a toxic 4 | ecosystem. 5 | 6 | Sometimes, you want to use an attribute to define a macro. Unfortunately, 7 | Rust does not support declarative macros (also called macros-by-example) 8 | for attributes, for reasons that essentially boil down to cookie-licking. 9 | 10 | This crate exists for one purpose only, and that is ot facilitate writing 11 | declarative macros that an attribute converts into. 12 | 13 | ## How Uo Use 14 | 15 | 1. Define the macro-by-example you wish to use as the main implementation of 16 | your attribute or derive. 17 | 18 | 2. Crate a proc-macro crate. This is where the documentation for your 19 | attribute will need to live. Your actual crate should depend on this 20 | crate. 21 | 22 | 3. Use `bridge!()` to define your bridge proc macros. These 23 | macro calls should be documented, since their doc comments are the ones 24 | that will appear in rustdoc for your macros. 25 | 26 | 4. `pub use` the macros in your actual crate. 27 | 28 | Proc macros suck! 29 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "1.83.0" 3 | profile = "default" 4 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 80 2 | tab_spaces = 2 3 | 4 | struct_lit_width = 40 5 | struct_variant_width = 40 6 | chain_width = 60 7 | use_small_heuristics = "Max" -------------------------------------------------------------------------------- /twie/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "twie" 3 | version = "0.5.0" 4 | description = "fast and compact prefix tries" 5 | 6 | edition.workspace = true 7 | authors.workspace = true 8 | homepage.workspace = true 9 | repository.workspace = true 10 | keywords.workspace = true 11 | license.workspace = true 12 | 13 | [dependencies] 14 | buf-trait = { version = "0.4", path = "../buf-trait" } 15 | byteyarn = { version = "0.5", path = "../byteyarn" } 16 | 17 | boxy = "0.1.0" 18 | -------------------------------------------------------------------------------- /twie/README.md: -------------------------------------------------------------------------------- 1 | # twie 2 | 3 | `twie` \[twaɪ\] - Fast, compressed prefix tries. 4 | 5 | This crate provides a `Trie` type that implements an associative container 6 | with slice-like keys. It has the following properties. 7 | 8 | - Most one-shot operations are worst-case O(n), where n is the length of 9 | the key in bytes. This may require at most 2n tree hops, but the internal 10 | representation tries to minimize this where possible. 11 | 12 | - Finding all prefixes of a string that are in the trie is also O(n). These 13 | prefixes are provided in order. 14 | 15 | - Building a trie out of, e.g., an iterator is quadratic. 16 | 17 | - Subtries of the whole trie (i.e. all entries with some particular prefix) 18 | can be operated on like regular tries (insertion is only supported from 19 | the root, unfortunately). 20 | 21 | - Memory for storing keys is shared. 22 | 23 | - The trie's internal indexing type is configurable, which allows trading 24 | off maximum key size for shrinking the size of tree nodes, and thus, 25 | memory usage. 26 | 27 | ```rust 28 | let words = Trie::::from([ 29 | ("poise", 0), 30 | ("poison", 1), 31 | ("poisonous", 2), 32 | ("poison #9", 3), 33 | ]); 34 | 35 | assert_eq!( 36 | words.prefixes("poisonous snake").map(|(k, _)| k).collect::>(), 37 | ["poison", "poisonous"], 38 | ) 39 | ``` 40 | -------------------------------------------------------------------------------- /twie/src/impls.rs: -------------------------------------------------------------------------------- 1 | /**** TRIE ****/ 2 | 3 | use std::fmt; 4 | 5 | use buf_trait::Buf; 6 | 7 | use crate::Index; 8 | use crate::Iter; 9 | use crate::IterMut; 10 | use crate::Sub; 11 | use crate::SubMut; 12 | use crate::Trie; 13 | 14 | impl Clone for Trie 15 | where 16 | K: Buf + ?Sized, 17 | V: Clone, 18 | I: Index, 19 | { 20 | fn clone(&self) -> Self { 21 | Self { raw: self.raw.clone() } 22 | } 23 | } 24 | 25 | impl Default for Trie 26 | where 27 | K: Buf + ?Sized, 28 | I: Index, 29 | { 30 | fn default() -> Self { 31 | Self::new() 32 | } 33 | } 34 | 35 | impl fmt::Debug for Trie 36 | where 37 | K: Buf + ?Sized + fmt::Debug, 38 | V: fmt::Debug, 39 | I: Index, 40 | { 41 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 42 | fmt::Debug::fmt(&self.as_ref(), f) 43 | } 44 | } 45 | 46 | impl<'a, K, V, I> IntoIterator for &'a Trie 47 | where 48 | K: Buf + ?Sized, 49 | I: Index, 50 | { 51 | type Item = (&'a K, &'a V); 52 | type IntoIter = Iter<'a, K, V, I>; 53 | 54 | fn into_iter(self) -> Self::IntoIter { 55 | self.iter() 56 | } 57 | } 58 | 59 | impl<'a, K, V, I> IntoIterator for &'a mut Trie 60 | where 61 | K: Buf + ?Sized, 62 | I: Index, 63 | { 64 | type Item = (&'a K, &'a mut V); 65 | type IntoIter = IterMut<'a, K, V, I>; 66 | 67 | fn into_iter(self) -> Self::IntoIter { 68 | self.iter_mut() 69 | } 70 | } 71 | 72 | impl<'a, K, V, I> FromIterator<(&'a K, V)> for Trie 73 | where 74 | K: Buf + ?Sized, 75 | I: Index, 76 | { 77 | fn from_iter(iter: Iter) -> Self 78 | where 79 | Iter: IntoIterator, 80 | { 81 | let mut trie = Trie::new(); 82 | for (k, v) in iter { 83 | trie.insert(k, v); 84 | } 85 | trie 86 | } 87 | } 88 | 89 | impl From<[(&K, V); N]> for Trie 90 | where 91 | K: Buf + ?Sized, 92 | I: Index, 93 | { 94 | fn from(value: [(&K, V); N]) -> Self { 95 | value.into_iter().collect() 96 | } 97 | } 98 | 99 | impl From<[&K; N]> for Trie 100 | where 101 | K: Buf + ?Sized, 102 | V: Default, 103 | I: Index, 104 | { 105 | fn from(value: [&K; N]) -> Self { 106 | value.into_iter().map(|k| (k, V::default())).collect() 107 | } 108 | } 109 | 110 | /**** SUB ****/ 111 | 112 | impl<'a, K, V, I> From<&'a Trie> for Sub<'a, K, V, I> 113 | where 114 | K: Buf + ?Sized, 115 | I: Index, 116 | { 117 | fn from(value: &'a Trie) -> Self { 118 | value.as_ref() 119 | } 120 | } 121 | 122 | impl<'a, K, V, I> From> for Sub<'a, K, V, I> 123 | where 124 | K: Buf + ?Sized, 125 | I: Index, 126 | { 127 | fn from(value: SubMut<'a, K, V, I>) -> Self { 128 | Sub { raw: value.raw, node: value.node } 129 | } 130 | } 131 | 132 | impl<'a, K, V, I> From<&'a SubMut<'_, K, V, I>> for Sub<'a, K, V, I> 133 | where 134 | K: Buf + ?Sized, 135 | I: Index, 136 | { 137 | fn from(value: &'a SubMut<'_, K, V, I>) -> Self { 138 | value.as_ref() 139 | } 140 | } 141 | 142 | impl Clone for Sub<'_, K, V, I> 143 | where 144 | K: Buf + ?Sized, 145 | I: Index, 146 | { 147 | fn clone(&self) -> Self { 148 | *self 149 | } 150 | } 151 | 152 | impl Copy for Sub<'_, K, V, I> 153 | where 154 | K: Buf + ?Sized, 155 | I: Index, 156 | { 157 | } 158 | 159 | impl fmt::Debug for Sub<'_, K, V, I> 160 | where 161 | K: Buf + ?Sized + fmt::Debug, 162 | V: fmt::Debug, 163 | I: Index, 164 | { 165 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 166 | f.debug_map().entries(self.iter()).finish() 167 | } 168 | } 169 | 170 | impl<'a, K, V, I> IntoIterator for &'a Sub<'_, K, V, I> 171 | where 172 | K: Buf + ?Sized, 173 | I: Index, 174 | { 175 | type Item = (&'a K, &'a V); 176 | type IntoIter = Iter<'a, K, V, I>; 177 | 178 | fn into_iter(self) -> Self::IntoIter { 179 | self.iter() 180 | } 181 | } 182 | 183 | impl<'a, K, V, I> IntoIterator for Sub<'a, K, V, I> 184 | where 185 | K: Buf + ?Sized, 186 | I: Index, 187 | { 188 | type Item = (&'a K, &'a V); 189 | type IntoIter = Iter<'a, K, V, I>; 190 | 191 | fn into_iter(self) -> Self::IntoIter { 192 | self.iter() 193 | } 194 | } 195 | 196 | /**** SUB MUT ****/ 197 | 198 | impl<'a, K, V, I> From<&'a mut Trie> for SubMut<'a, K, V, I> 199 | where 200 | K: Buf + ?Sized, 201 | I: Index, 202 | { 203 | fn from(value: &'a mut Trie) -> Self { 204 | value.as_mut() 205 | } 206 | } 207 | 208 | impl<'a, K, V, I> From<&'a mut SubMut<'_, K, V, I>> for SubMut<'a, K, V, I> 209 | where 210 | K: Buf + ?Sized, 211 | I: Index, 212 | { 213 | fn from(value: &'a mut SubMut<'_, K, V, I>) -> Self { 214 | value.as_mut() 215 | } 216 | } 217 | 218 | impl fmt::Debug for SubMut<'_, K, V, I> 219 | where 220 | K: Buf + ?Sized + fmt::Debug, 221 | V: fmt::Debug, 222 | I: Index, 223 | { 224 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 225 | fmt::Debug::fmt(&self.as_ref(), f) 226 | } 227 | } 228 | 229 | impl<'a, K, V, I> IntoIterator for &'a SubMut<'_, K, V, I> 230 | where 231 | K: Buf + ?Sized, 232 | I: Index, 233 | { 234 | type Item = (&'a K, &'a V); 235 | type IntoIter = Iter<'a, K, V, I>; 236 | 237 | fn into_iter(self) -> Self::IntoIter { 238 | self.as_ref().iter() 239 | } 240 | } 241 | 242 | impl<'a, K, V, I> IntoIterator for &'a mut SubMut<'_, K, V, I> 243 | where 244 | K: Buf + ?Sized, 245 | I: Index, 246 | { 247 | type Item = (&'a K, &'a mut V); 248 | type IntoIter = IterMut<'a, K, V, I>; 249 | 250 | fn into_iter(self) -> Self::IntoIter { 251 | self.as_mut().iter_mut() 252 | } 253 | } 254 | 255 | impl<'a, K, V, I> IntoIterator for SubMut<'a, K, V, I> 256 | where 257 | K: Buf + ?Sized, 258 | I: Index, 259 | { 260 | type Item = (&'a K, &'a mut V); 261 | type IntoIter = IterMut<'a, K, V, I>; 262 | 263 | fn into_iter(self) -> Self::IntoIter { 264 | self.iter_mut() 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /twie/src/poison_trie.txt: -------------------------------------------------------------------------------- 1 | ┬╴[0]: "" 2 | │ ptrs: --|-/-/-/1|--|-- 3 | │ 4 | └┬╴[1]: "?" 5 | │ hi: 0x7 'p'..='\u{7f}' 6 | │ ptrs: 2/-/-/-|--|--|-- 7 | │ 8 | └┬╴[2]: "p" 9 | │ lo: 0x0 'p' 10 | │ ptrs: --|-/-/3/-|--|-- 11 | │ 12 | └┬╴[3]: "p?" 13 | │ hi: 0x6 '`'..='o' 14 | │ ptrs: --|--|--|-/-/-/4 15 | │ 16 | └┬╴[4]: "po" 17 | │ lo: 0xf 'o' 18 | │ ptrs: --|-/-/5/-|--|-- 19 | │ 20 | └┬╴[5]: "po?" 21 | │ hi: 0x6 '`'..='o' 22 | │ ptrs: --|--|-/6/-/-|-- 23 | │ 24 | └┬╴[6]: "poi" 25 | │ lo: 0x9 'i' 26 | │ ptrs: --|-/-/-/7|--|-- 27 | │ 28 | └┬╴[7]: "poi?" 29 | │ hi: 0x7 'p'..='\u{7f}' 30 | │ ptrs: -/-/-/8|--|--|-- 31 | │ 32 | └┬╴[8]: "pois" 33 | │ lo: 0x3 's' 34 | │ ptrs: --|-/-/9/-|--|-- 35 | │ 36 | └┬╴[9]: "pois?" 37 | │ hi: 0x6 '`'..='o' 38 | │ ptrs: --|-/10/-/-|--|-/-/-/12 39 | │ 40 | ├─╴[10]: "poise" 41 | │ lo: 0x5 'e' 42 | │ ptrs: --|--|--|-- 43 | │ [0]: 0x55b3382cfc10 "poise" -> 0x55b3382cfc88 0 44 | │ 45 | └┬╴[12]: "poiso" 46 | │ lo: 0xf 'o' 47 | │ ptrs: --|-/-/11/-|--|-- 48 | │ 49 | └┬╴[11]: "poiso?" 50 | │ hi: 0x6 '`'..='o' 51 | │ ptrs: --|--|--|-/-/14/- 52 | │ 53 | └┬╴[14]: "poison" 54 | │ lo: 0xe 'n' 55 | │ ptrs: -/-/15/-|-/-/13/-|--|-- 56 | │ [1]: 0x55b3382d0570 "poison" -> 0x55b3382cfc94 1 57 | │ 58 | ├┬╴[15]: "poison?" 59 | ││ hi: 0x2 ' '..='/' 60 | ││ ptrs: 18/-/-/-|--|--|-- 61 | ││ 62 | │└─╴[18]: "poison " 63 | │ lo: 0x0 ' ' 64 | │ ptrs: --|--|--|-- 65 | │ [3]: 0x55b3382cfae0 "poison #9" -> 0x55b3382cfcac 3 66 | │ 67 | └┬╴[13]: "poison?" 68 | │ hi: 0x6 '`'..='o' 69 | │ ptrs: --|--|--|-/-/-/16 70 | │ 71 | └─╴[16]: "poisono" 72 | lo: 0xf 'o' 73 | ptrs: --|--|--|-- 74 | [2]: 0x55b3382d0570 "poisonous" -> 0x55b3382cfca0 2 75 | -------------------------------------------------------------------------------- /twie/src/raw/dump.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::fmt::Write; 3 | 4 | use buf_trait::Buf; 5 | 6 | use crate::raw::entries::Entries; 7 | use crate::raw::nodes::Node; 8 | use crate::raw::nodes::Nodes; 9 | use crate::raw::RawTrie; 10 | use crate::DebugBytes; 11 | use crate::Index; 12 | 13 | /// Dumps the contents of this trie to a string in an unspecified format. 14 | pub fn dump( 15 | trie: &RawTrie, 16 | root: Node, 17 | ) -> String { 18 | if root.is_empty() { 19 | return "".to_string(); 20 | } 21 | 22 | let mut out = String::new(); 23 | let _ignored = 24 | dump0(&mut out, &trie.nodes, &trie.data, root, None, &mut Vec::new()); 25 | 26 | out.truncate(out.trim_end().len()); 27 | out 28 | } 29 | 30 | fn dump0( 31 | out: &mut String, 32 | nodes: &Nodes, 33 | entries: &Entries, 34 | node: Node, 35 | hi: Option, 36 | bus: &mut Vec, 37 | ) -> Result<(), fmt::Error> { 38 | use boxy::Char; 39 | 40 | let is_hi = hi.is_none(); 41 | let array = match hi { 42 | Some(hi) => nodes.lo(node.ptr, hi), 43 | None => nodes.hi(node.ptr), 44 | }; 45 | 46 | let has = array.iter().any(|&x| !x.is_empty()); 47 | 48 | // All this crap is just so we get a pretty tree. 49 | let last = bus 50 | .iter() 51 | .enumerate() 52 | .filter_map(|(i, x)| x.then_some(i + 1)) 53 | .last() 54 | .unwrap_or(0); 55 | for (i, &flag) in bus.iter().enumerate().take(last) { 56 | if flag { 57 | if i == bus.len() - 1 { 58 | write!(out, "{}", Char::right_tee(boxy::Weight::Normal))?; 59 | } else { 60 | write!(out, "{}", Char::vertical(boxy::Weight::Normal))?; 61 | } 62 | } else { 63 | write!(out, "{}", Char::empty())?; 64 | } 65 | } 66 | for i in last..bus.len() { 67 | if i == bus.len() - 1 { 68 | write!(out, "{}", Char::lower_left(boxy::Weight::Normal))?; 69 | } else { 70 | write!(out, "{}", Char::empty())?; 71 | } 72 | } 73 | if has { 74 | write!(out, "{}", Char::down_tee(boxy::Weight::Normal))?; 75 | } else { 76 | write!(out, "{}", Char::horizontal(boxy::Weight::Normal))?; 77 | } 78 | write!(out, "{}", Char::left_half(boxy::Weight::Normal))?; 79 | 80 | let new_line = |out: &mut String| { 81 | writeln!(out)?; 82 | for &flag in bus.iter().take(last) { 83 | if flag { 84 | write!(out, "{}", Char::vertical(boxy::Weight::Normal))?; 85 | } else { 86 | write!(out, "{}", Char::empty())?; 87 | } 88 | } 89 | for _ in last..bus.len() { 90 | write!(out, "{}", Char::empty())?; 91 | } 92 | if has { 93 | write!(out, "{}", Char::vertical(boxy::Weight::Normal))?; 94 | } 95 | 96 | write!(out, " ") 97 | }; 98 | 99 | write!(out, "[{}]", node.ptr.idx() * 2 + (!is_hi as usize),)?; 100 | 101 | if is_hi { 102 | let key = nodes.key(node, Some(usize::MAX)); 103 | write!( 104 | out, 105 | ": {:?}/{:?}", 106 | DebugBytes(&key[..node.depth]), 107 | DebugBytes(&key[node.depth..]), 108 | )?; 109 | } 110 | 111 | if let Some(hi) = hi { 112 | new_line(out)?; 113 | let start = hi << 4; 114 | let end = start + 0xf; 115 | let _ = write!(out, "hi: 0x{hi:x} {:?}..={:?}", start as char, end as char); 116 | } else if let Some(&ch) = nodes.key(node, None).last() { 117 | new_line(out)?; 118 | let lo = ch & 0xf; 119 | write!(out, "lo: 0x{lo:x} {:?}", ch as char)?; 120 | } 121 | 122 | new_line(out)?; 123 | write!(out, "ptrs: ")?; 124 | for (i, chunk) in array.chunks(4).enumerate() { 125 | if i != 0 { 126 | write!(out, "|")?; 127 | } 128 | 129 | if chunk.iter().all(|n| n.is_empty()) { 130 | write!(out, "--")?; 131 | continue; 132 | } 133 | 134 | for (i, &ptr) in chunk.iter().enumerate() { 135 | if i != 0 { 136 | write!(out, "/")?; 137 | } 138 | 139 | match usize::try_from(ptr) { 140 | Ok(idx) => { 141 | write!(out, "{}", if !is_hi { idx * 2 } else { idx * 2 + 1 }) 142 | } 143 | Err(_) => write!(out, "-"), 144 | }?; 145 | } 146 | } 147 | 148 | if is_hi { 149 | if let Some(sparse) = nodes.get(node) { 150 | if let Some((len, v)) = entries.get(sparse) { 151 | let k = nodes.key(node, Some(len)); 152 | new_line(out)?; 153 | write!(out, "[{sparse}]: {k:p} {:?} -> {v:p} {v:?}", DebugBytes(k),)?; 154 | } 155 | } 156 | } 157 | 158 | new_line(out)?; 159 | writeln!(out)?; 160 | 161 | let last = array 162 | .iter() 163 | .enumerate() 164 | .filter_map(|(i, &node)| (!node.is_empty()).then_some(i)) 165 | .last() 166 | .unwrap_or(0); 167 | 168 | bus.push(true); 169 | for (i, &n) in array.iter().enumerate() { 170 | if n.is_empty() { 171 | continue; 172 | } 173 | 174 | if i == last { 175 | bus.pop(); 176 | bus.push(false); 177 | } 178 | 179 | let (node, hi) = match hi { 180 | None => (node, Some(i as u8)), 181 | Some(hi) => { 182 | let byte = hi << 4 | (i as u8); 183 | (nodes.walk(node, &[byte]).0, None) 184 | } 185 | }; 186 | 187 | dump0(out, nodes, entries, node, hi, bus)?; 188 | } 189 | bus.pop(); 190 | 191 | Ok(()) 192 | } 193 | -------------------------------------------------------------------------------- /twie/src/raw/entries.rs: -------------------------------------------------------------------------------- 1 | use std::mem; 2 | use std::mem::ManuallyDrop; 3 | use std::mem::MaybeUninit; 4 | use std::ptr::NonNull; 5 | 6 | use crate::raw::nodes::Index; 7 | use crate::raw::nodes::OutOfIndices; 8 | use crate::raw::nodes::Ptr; 9 | 10 | /// The actual user-provided data stored by the trie, separate from the tree 11 | /// structure. 12 | pub struct Entries { 13 | values: StealthVec>, 14 | } 15 | 16 | // SAFETY: Although there are some sketchy functions that go & -> &mut, these 17 | // MUST NOT be called except through a &Entries that was derived from an &mut 18 | // Entries. They exist only so that iterators can vend multiple distinct 19 | // elements without making MIRI lose its mind. 20 | unsafe impl Send for Entries {} 21 | unsafe impl Sync for Entries {} 22 | 23 | pub struct Entry { 24 | /// The length of the key, which may be longer than this entry's depth in the 25 | /// trie. This entry is empty if `key_len` is Ptr::EMPTY. 26 | key_len: Ptr, 27 | 28 | /// The value itself. 29 | value: MaybeUninit, 30 | } 31 | 32 | impl Drop for Entry { 33 | fn drop(&mut self) { 34 | if !self.key_len.is_empty() { 35 | unsafe { 36 | self.value.assume_init_drop(); 37 | } 38 | } 39 | } 40 | } 41 | 42 | /// A deconstructed `Vec` that ensures we only ever manipulate the data for 43 | /// the entries vector through raw pointers, except when creating new entries. 44 | /// 45 | /// This is necessary to allow disjoint borrows of its elements without tripping 46 | /// MIRI. 47 | struct StealthVec { 48 | ptr: *mut T, 49 | cap: usize, 50 | len: usize, 51 | } 52 | 53 | impl StealthVec { 54 | fn new() -> Self { 55 | Self { 56 | ptr: NonNull::dangling().as_ptr(), 57 | cap: 0, 58 | len: 0, 59 | } 60 | } 61 | 62 | fn get(&self, entry: usize) -> &T { 63 | debug_assert!( 64 | entry < self.len, 65 | "trie: entry index {entry} out of bounds {}; this is a bug", 66 | self.len 67 | ); 68 | 69 | unsafe { &*self.ptr.add(entry) } 70 | } 71 | 72 | // SAFETY: This is has no aliasing guardrails! 73 | // 74 | // Also, calling with_entries_vec() invalidates any references created by this 75 | // function. 76 | #[allow(clippy::mut_from_ref)] 77 | unsafe fn get_mut_may_alias(&self, entry: usize) -> &mut T { 78 | debug_assert!( 79 | entry < self.len, 80 | "trie: entry index {entry} out of bounds {}; this is a bug", 81 | self.len 82 | ); 83 | 84 | unsafe { &mut *self.ptr.add(entry) } 85 | } 86 | 87 | // SAFETY: `cb` MUST NOT panic. 88 | unsafe fn with_vec(&mut self, cb: impl FnOnce(&mut Vec)) { 89 | let mut vec = ManuallyDrop::new(unsafe { 90 | Vec::from_raw_parts(self.ptr, self.len, self.cap) 91 | }); 92 | cb(&mut vec); 93 | self.ptr = vec.as_mut_ptr(); 94 | self.cap = vec.capacity(); 95 | self.len = vec.len(); 96 | } 97 | } 98 | 99 | impl Drop for StealthVec { 100 | fn drop(&mut self) { 101 | let mut to_drop = Vec::new(); 102 | unsafe { 103 | // SAFETY: mem::swap cannot panic. We use swap instead of replace, because 104 | // dropping an intermediate Vec inside the closure could cause a panic 105 | // inside of an element's dtor. 106 | self.with_vec(|v| mem::swap(v, &mut to_drop)); 107 | } 108 | } 109 | } 110 | 111 | impl Clone for Entries { 112 | fn clone(&self) -> Self { 113 | let mut data = Self::new(); 114 | 115 | let new_entries = (0..self.values.len) 116 | .map(|i| { 117 | let e = self.values.get(i); 118 | 119 | let value = if e.key_len.is_empty() { 120 | MaybeUninit::uninit() 121 | } else { 122 | MaybeUninit::new(unsafe { e.value.assume_init_ref().clone() }) 123 | }; 124 | 125 | Entry { key_len: e.key_len, value } 126 | }) 127 | .collect(); 128 | 129 | unsafe { 130 | // The destructor of Vec cannot panic here, because it is empty. 131 | data.values.with_vec(|v| *v = new_entries); 132 | } 133 | data 134 | } 135 | } 136 | 137 | impl Entries { 138 | pub fn new() -> Self { 139 | Self { values: StealthVec::new() } 140 | } 141 | 142 | pub fn new_entry(&mut self) -> Result { 143 | let new = self.values.len; 144 | unsafe { 145 | // SAFETY: Vec::push does not panic unless we try to allocate half of 146 | // the address space, which we can assume can't happen here. 147 | self.values.with_vec(|v| { 148 | v.push(Entry { 149 | key_len: Ptr::EMPTY, 150 | value: MaybeUninit::uninit(), 151 | }) 152 | }); 153 | } 154 | Ok(new) 155 | } 156 | 157 | /// Returns whether a value is initialized. 158 | pub fn is_init(&self, entry: usize) -> bool { 159 | !self.values.get(entry).key_len.is_empty() 160 | } 161 | 162 | /// Gets the value in `entry`, if present. 163 | pub fn get(&self, entry: usize) -> Option<(usize, &V)> { 164 | let e = self.values.get(entry); 165 | unsafe { Some((e.key_len.idx(), e.value.assume_init_ref())) } 166 | } 167 | 168 | /// Gets the value in `entry`, if present. 169 | /// 170 | /// # Safety 171 | /// 172 | /// It is the caller's responsibility to not cause aliasing hazards using 173 | /// this function. 174 | pub unsafe fn get_mut_may_alias( 175 | &self, 176 | entry: usize, 177 | ) -> Option<(usize, &mut V)> { 178 | let e = self.values.get_mut_may_alias(entry); 179 | unsafe { Some((e.key_len.idx(), e.value.assume_init_mut())) } 180 | } 181 | 182 | /// Initializes `entry` if it isn't. 183 | pub unsafe fn init( 184 | &mut self, 185 | entry: usize, 186 | key_len: usize, 187 | cb: impl FnOnce() -> V, 188 | ) -> &mut V { 189 | if self.is_init(entry) { 190 | // SAFETY: Nothing else in this code path access the entries vector. 191 | return unsafe { 192 | self.values.get_mut_may_alias(entry).value.assume_init_mut() 193 | }; 194 | } 195 | 196 | // SAFETY: cb() must be called before key_len is written to, so that the 197 | // entry is left untouched if cb() panics. 198 | let new = cb(); 199 | 200 | let e = unsafe { 201 | // SAFETY: Nothing else in this code path accesses the entries vector. 202 | self.values.get_mut_may_alias(entry) 203 | }; 204 | e.key_len = Ptr::must(key_len); 205 | e.value.write(new) 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /twie/src/raw/iter.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use buf_trait::Buf; 4 | 5 | use crate::raw::nodes; 6 | use crate::raw::nodes::Index; 7 | use crate::raw::nodes::Node; 8 | use crate::raw::RawTrie; 9 | use crate::Sub; 10 | 11 | use super::entries::Entries; 12 | 13 | /// An iterator over all values of a [`Trie`][crate::Trie] whose keys start with 14 | /// a particular prefix. 15 | /// 16 | /// See [`Trie::prefixes()`][crate::Trie::prefixes]. 17 | pub struct Prefixes<'a, 'key, K: Buf + ?Sized, V, I: Index> { 18 | root: Node, 19 | key: &'key [u8], 20 | data: &'a Entries, 21 | prefixes: nodes::Prefixes<'a, 'key, I>, 22 | _ph: PhantomData &'a K>, 23 | } 24 | 25 | impl<'a, 'key, K: Buf + ?Sized, V, I: Index> Prefixes<'a, 'key, K, V, I> { 26 | pub(crate) fn new( 27 | trie: &'a RawTrie, 28 | root: Node, 29 | key: &'key K, 30 | ) -> Self { 31 | let key = key.as_bytes(); 32 | Self { 33 | root, 34 | key, 35 | prefixes: trie.nodes.prefixes(root, key), 36 | data: &trie.data, 37 | _ph: PhantomData, 38 | } 39 | } 40 | } 41 | 42 | impl<'a, K: Buf + ?Sized, V, I: Index> Iterator for Prefixes<'a, '_, K, V, I> { 43 | type Item = (&'a K, &'a V); 44 | 45 | fn next(&mut self) -> Option { 46 | loop { 47 | let (node, Some(next), last) = self.prefixes.next()? else { 48 | continue; 49 | }; 50 | let Some((key_len, value)) = self.data.get(next) else { 51 | continue; 52 | }; 53 | 54 | let key = self.prefixes.nodes.key(node, Some(key_len)); 55 | let key_rest = &key.as_bytes()[self.root.depth..]; 56 | if last && !self.key.starts_with(key_rest) { 57 | return None; 58 | } 59 | 60 | unsafe { 61 | // SAFETY: This is a trie element, and not an intermediate, so this 62 | // is key is not "torn". 63 | return Some((K::from_bytes(key), value)); 64 | } 65 | } 66 | } 67 | } 68 | 69 | /// A mutable iterator over all values of a [`Trie`][crate::Trie] whose keys 70 | /// start with a particular prefix. 71 | /// 72 | /// See [`Trie::prefixes_mut()`][crate::Trie::prefixes_mut]. 73 | pub struct PrefixesMut<'a, 'key, K: Buf + ?Sized, V, I: Index> { 74 | root: Node, 75 | key: &'key [u8], 76 | data: &'a Entries, 77 | prefixes: nodes::Prefixes<'a, 'key, I>, 78 | _ph: PhantomData (&'a K, &'a mut V)>, 79 | } 80 | 81 | impl<'a, 'key, K: Buf + ?Sized, V, I: Index> PrefixesMut<'a, 'key, K, V, I> { 82 | pub(crate) fn new( 83 | trie: &'a mut RawTrie, 84 | root: Node, 85 | key: &'key K, 86 | ) -> Self { 87 | let key = key.as_bytes(); 88 | Self { 89 | root, 90 | key, 91 | prefixes: trie.nodes.prefixes(root, key), 92 | data: &mut trie.data, 93 | _ph: PhantomData, 94 | } 95 | } 96 | } 97 | 98 | impl<'a, K: Buf + ?Sized, V, I: Index> Iterator 99 | for PrefixesMut<'a, '_, K, V, I> 100 | { 101 | type Item = (&'a K, &'a mut V); 102 | 103 | fn next(&mut self) -> Option { 104 | loop { 105 | let (node, Some(next), last) = self.prefixes.next()? else { 106 | continue; 107 | }; 108 | 109 | let entry = unsafe { 110 | // SAFETY: nodes::Prefixes will never repeat the indices it produces. 111 | self.data.get_mut_may_alias(next) 112 | }; 113 | let Some((key_len, value)) = entry else { continue }; 114 | 115 | let key = self.prefixes.nodes.key(node, Some(key_len)); 116 | let key_rest = &key.as_bytes()[self.root.depth..]; 117 | if last && !self.key.starts_with(key_rest) { 118 | return None; 119 | } 120 | 121 | unsafe { 122 | // SAFETY: This is a trie element, and not an intermediate, so this 123 | // is key is not "torn". 124 | return Some((K::from_bytes(key), value)); 125 | } 126 | } 127 | } 128 | } 129 | 130 | /// A depth-first iterator over all nonempty subtries of a 131 | /// [`Trie`][crate::Trie]. 132 | /// 133 | /// See [`Trie::subs()`][crate::Trie::subs]. 134 | pub struct Subs<'a, K: Buf + ?Sized, V, I: Index> { 135 | raw: &'a RawTrie, 136 | dfs: nodes::Dfs<'a, I>, 137 | _ph: PhantomData (&'a K, &'a V)>, 138 | } 139 | 140 | impl<'a, K: Buf + ?Sized, V, I: Index> Subs<'a, K, V, I> { 141 | pub(crate) fn new(trie: &'a RawTrie, root: Node) -> Self { 142 | Self { 143 | raw: trie, 144 | dfs: trie.nodes.dfs(root), 145 | _ph: PhantomData, 146 | } 147 | } 148 | } 149 | 150 | impl<'a, K: Buf + ?Sized, V, I: Index> Iterator for Subs<'a, K, V, I> { 151 | type Item = Sub<'a, K, V, I>; 152 | 153 | fn next(&mut self) -> Option { 154 | if let Some(next) = self.dfs.next() { 155 | return Some(Sub { raw: self.raw, node: next }); 156 | } 157 | 158 | None 159 | } 160 | } 161 | 162 | /// An in-order iterator over all values of a [`Trie`][crate::Trie]. 163 | /// 164 | /// See [`Trie::iter()`][crate::Trie::iter]. 165 | pub struct Iter<'a, K: Buf + ?Sized, V, I: Index> { 166 | data: &'a Entries, 167 | dfs: nodes::Dfs<'a, I>, 168 | _ph: PhantomData &'a K>, 169 | } 170 | 171 | impl<'a, K: Buf + ?Sized, V, I: Index> Iter<'a, K, V, I> { 172 | pub(crate) fn new(trie: &'a RawTrie, root: Node) -> Self { 173 | Self { 174 | dfs: trie.nodes.dfs(root), 175 | data: &trie.data, 176 | _ph: PhantomData, 177 | } 178 | } 179 | } 180 | 181 | impl<'a, K: Buf + ?Sized, V, I: Index> Iterator for Iter<'a, K, V, I> { 182 | type Item = (&'a K, &'a V); 183 | 184 | fn next(&mut self) -> Option { 185 | while let Some(next) = self.dfs.next() { 186 | let Some(idx) = self.dfs.nodes.get(next) else { 187 | continue; 188 | }; 189 | let Some((key_len, value)) = self.data.get(idx) else { 190 | continue; 191 | }; 192 | if key_len != next.depth { 193 | continue; 194 | } 195 | 196 | let key = self.dfs.nodes.key(next, Some(key_len)); 197 | unsafe { 198 | // SAFETY: This is a trie element, and not an intermediate, so this 199 | // is key is not "torn". 200 | return Some((K::from_bytes(key), value)); 201 | } 202 | } 203 | 204 | None 205 | } 206 | } 207 | 208 | /// An in-order mutable iterator over all values of a [`Trie`][crate::Trie]. 209 | /// 210 | /// See [`Trie::iter_mut()`][crate::Trie::iter_mut]. 211 | pub struct IterMut<'a, K: Buf + ?Sized, V, I: Index> { 212 | data: &'a Entries, 213 | dfs: nodes::Dfs<'a, I>, 214 | _ph: PhantomData (&'a K, &'a mut V)>, 215 | } 216 | 217 | impl<'a, K: Buf + ?Sized, V, I: Index> IterMut<'a, K, V, I> { 218 | pub(crate) fn new(trie: &'a mut RawTrie, root: Node) -> Self { 219 | Self { 220 | dfs: trie.nodes.dfs(root), 221 | data: &mut trie.data, 222 | _ph: PhantomData, 223 | } 224 | } 225 | } 226 | 227 | impl<'a, K: Buf + ?Sized, V, I: Index> Iterator for IterMut<'a, K, V, I> { 228 | type Item = (&'a K, &'a mut V); 229 | 230 | fn next(&mut self) -> Option { 231 | while let Some(next) = self.dfs.next() { 232 | let Some(idx) = self.dfs.nodes.get(next) else { 233 | continue; 234 | }; 235 | let entry = unsafe { 236 | // SAFETY: nodes::Prefixes will never repeat the indices it produces. 237 | self.data.get_mut_may_alias(idx) 238 | }; 239 | 240 | let Some((key_len, value)) = entry else { 241 | continue; 242 | }; 243 | if key_len != next.depth { 244 | continue; 245 | } 246 | 247 | let key = self.dfs.nodes.key(next, Some(key_len)); 248 | unsafe { 249 | // SAFETY: This is a trie element, and not an intermediate, so this 250 | // is key is not "torn". 251 | return Some((K::from_bytes(key), value)); 252 | } 253 | } 254 | 255 | None 256 | } 257 | } 258 | -------------------------------------------------------------------------------- /twie/src/raw/mod.rs: -------------------------------------------------------------------------------- 1 | // Core implementation of the trie. 2 | 3 | use std::marker::PhantomData; 4 | 5 | use buf_trait::Buf; 6 | 7 | use crate::raw::entries::Entries; 8 | use crate::raw::nodes::Index; 9 | use crate::raw::nodes::Node; 10 | use crate::raw::nodes::Nodes; 11 | use crate::raw::nodes::OutOfIndices; 12 | 13 | mod dump; 14 | mod entries; 15 | 16 | pub mod iter; 17 | pub mod nodes; 18 | 19 | pub use dump::dump; 20 | 21 | /// The core trie implementation. 22 | /// 23 | /// This type is a map from `[u8] -> Option` backed by a [`Nodes`]. But, the 24 | /// way this is realized is somewhat subtle. For a given key, call its 25 | /// *canonical* location the node you'd get from `nodes.build(nodes.root(), key)`, 26 | /// called `canon(key)`. We could put `key` at `canon(key)` and call it a day. 27 | /// For a given node, we call the key that would be there `keyat(node)`. 28 | /// 29 | /// This is wasteful: if the keys are `"foo"` and `"bar"`, there will be 30 | /// seven full `[Node]`s in the trie: 31 | /// 32 | /// ```text 33 | /// -> f -> o -> o 34 | /// -> b -> a -> r 35 | /// ``` 36 | /// 37 | /// However, these keys have no common prefix, so only the ->f and ->b links are 38 | /// actually needed. 39 | /// 40 | /// So, rather than say that `key` must be at `canon(key)`, instead we say that 41 | /// for any given `node`, if there is an entry there, then: 42 | /// - `node.key.starts_with(keyat(node))` 43 | /// - At least one of `node.key == keyat(node)` OR `node` has no children. 44 | /// 45 | /// This means that DFS-ing the trie still yields keys in lexicographic order. 46 | /// 47 | /// It may be possible to reduce the last requirement to `node.key < all_its_children`. 48 | /// This mostly preserves DFS behavior, but screws with subtries. It is unclear 49 | /// if this can be made to work. 50 | pub struct RawTrie { 51 | pub nodes: Nodes, 52 | pub data: Entries, 53 | pub _ph: PhantomData &mut K>, 54 | } 55 | 56 | impl Clone for RawTrie { 57 | fn clone(&self) -> Self { 58 | Self { 59 | nodes: self.nodes.clone(), 60 | data: self.data.clone(), 61 | _ph: PhantomData, 62 | } 63 | } 64 | } 65 | 66 | impl RawTrie { 67 | /// Creates a new trie. 68 | pub fn new() -> Self { 69 | Self { 70 | nodes: Nodes::new(), 71 | data: Entries::new(), 72 | _ph: PhantomData, 73 | } 74 | } 75 | 76 | /// Low-level mutation operation. 77 | /// 78 | /// This operation mutates the subtree pointed to by `root` (an index into 79 | /// `hi`) and a two-part key, and returns a possibly uninitialized entry 80 | /// for the key. 81 | /// 82 | /// After this function returns, an entry will exist for `[prefix, suffix]`. 83 | /// This makes this operation a fused find/insert operation. 84 | /// 85 | /// # Safety 86 | /// 87 | /// First, `root` must be a valid `hi` index. Then, `prefix` must be 88 | /// *exactly* the prefix string for the subtrie defined by `root`. The reason 89 | /// for the two-part key is that this allows mutation through a subtrie 90 | /// reference. 91 | pub unsafe fn mutate( 92 | &mut self, 93 | root: Node, 94 | key: &[u8], 95 | ) -> Result { 96 | let insert_at = self.pre_mutate(root, key)?; 97 | 98 | if let Some(entry) = self.nodes.get(insert_at) { 99 | return Ok(entry); 100 | } 101 | 102 | let new = self.data.new_entry()?; 103 | self.nodes.set(insert_at, new); 104 | Ok(new) 105 | } 106 | 107 | /// Prepares for a mutation. 108 | /// 109 | /// This operation finds the slot at which it could place `suffix` and does 110 | /// so. 111 | pub unsafe fn pre_mutate( 112 | &mut self, 113 | root: Node, 114 | key: &[u8], 115 | ) -> Result, OutOfIndices> { 116 | // Next, we want to walk down as far as we can without mutating anything. 117 | self.nodes.init_root(); 118 | let (mut node, rest) = self.nodes.walk(root, key); 119 | let depth = node.depth; 120 | 121 | // We've hit a point at which we may need to create new nodes. Here's the 122 | // decision tree. 123 | // 124 | // 1. The value at `node` is `None`, Then, we insert at this spot. 125 | // 126 | // This case also applies if `node` is not `None` but points at an 127 | // empty slot, but since we don't support removal, this case cannot 128 | // happen. 129 | // 130 | // 2. `node.key == key`. This means `key` is present. We are done. 131 | // 132 | // 3. Otherwise, we have to kick the thing in this slot one level down, 133 | // andepth g 134 | // | 135 | // "fog" 136 | // 137 | // Or, in the case that we're a prefix of the node we're replacing, 138 | // 139 | // Before: After: 140 | // 141 | // "" -> f "" -> f -> o -> o -> b 142 | // | | | 143 | // "foobar" "foo"| 144 | // "foobar" 145 | 146 | let idx = self.nodes.get(node); 147 | 148 | let lookup = idx.and_then(|e| self.data.get(e).map(|(k, _)| (e, k))); 149 | let Some((entry, key_len)) = lookup else { 150 | // Case 1. 151 | if let [next, rest @ ..] = rest { 152 | node = self.nodes.build(node, &[*next])?; 153 | self.nodes.extend_key(node, rest); 154 | } 155 | return Ok(node); 156 | }; 157 | 158 | let key_rest = &self.nodes.key(node, Some(key_len))[depth..]; 159 | if key_rest == rest { 160 | // Case 2. 161 | return Ok(node); 162 | } 163 | 164 | // Case 3. 165 | let common_prefix = key_rest 166 | .iter() 167 | .zip(rest) 168 | .take_while(|(a, b)| a == b) 169 | .count(); 170 | 171 | self.nodes.clear(node); 172 | node = self.nodes.build(node, &rest[..common_prefix])?; 173 | let build_from = node; 174 | 175 | // Need to recompute key_rest here to make the borrow checker happy. 176 | let key_rest = &self.nodes.key(node, Some(key_len))[depth..]; 177 | 178 | // Note that because the keys are distinct, `key_rest.len() > common_prefix`. 179 | if let Some(&next) = key_rest.get(common_prefix) { 180 | let move_to = self.nodes.build(node, &[next])?; 181 | self.nodes.set(move_to, entry); 182 | // Don't need to call extend_key() here; by construction, the key at 183 | // the moved node is already long enough. 184 | } else { 185 | self.nodes.set(node, entry); 186 | } 187 | 188 | if let [next, rest @ ..] = &rest[common_prefix..] { 189 | node = self.nodes.build(build_from, &[*next])?; 190 | self.nodes.extend_key(node, rest); 191 | }; 192 | 193 | Ok(node) 194 | } 195 | } 196 | --------------------------------------------------------------------------------