├── .github
    └── workflows
    │   └── rust.yml
├── .gitignore
├── Cargo.toml
├── LICENSE.md
├── README.md
├── allman
    ├── Cargo.toml
    ├── README.md
    └── src
    │   ├── layout.rs
    │   ├── lib.rs
    │   └── render.rs
├── buf-trait
    ├── Cargo.toml
    ├── README.md
    └── src
    │   └── lib.rs
├── byteyarn
    ├── Cargo.toml
    ├── README.md
    └── src
    │   ├── boxed.rs
    │   ├── convert.rs
    │   ├── lib.rs
    │   ├── raw.rs
    │   ├── reffed.rs
    │   └── utf8.rs
├── gilded
    ├── Cargo.toml
    ├── README.md
    ├── attr
    │   ├── Cargo.toml
    │   └── lib.rs
    └── src
    │   ├── doc
    │       ├── json.rs
    │       ├── mod.rs
    │       └── yaml.rs
    │   └── lib.rs
├── ilex
    ├── Cargo.toml
    ├── README.md
    ├── attr
    │   ├── Cargo.toml
    │   └── lib.rs
    ├── src
    │   ├── file
    │   │   ├── context.rs
    │   │   └── mod.rs
    │   ├── fp.rs
    │   ├── ice.rs
    │   ├── lib.rs
    │   ├── report
    │   │   ├── builtin.rs
    │   │   ├── diagnostic.rs
    │   │   ├── mod.rs
    │   │   └── render.rs
    │   ├── rt
    │   │   ├── dfa.rs
    │   │   ├── emit2.rs
    │   │   ├── lexer.rs
    │   │   ├── mod.rs
    │   │   └── unicode.rs
    │   ├── rule.rs
    │   ├── spec.rs
    │   └── token
    │   │   ├── mod.rs
    │   │   ├── stream.rs
    │   │   └── summary.rs
    └── tests
    │   ├── greedy
    │       ├── greedy.tokens.yaml
    │       ├── greedy.txt
    │       ├── main.rs
    │       ├── newlines.tokens.yaml
    │       └── newlines.txt
    │   ├── json
    │       ├── array.ast.txt
    │       ├── array.json
    │       ├── array.tokens.yaml
    │       ├── main.rs
    │       ├── null.ast.txt
    │       ├── null.json
    │       ├── null.tokens.yaml
    │       ├── obj.ast.txt
    │       ├── obj.json
    │       └── obj.tokens.yaml
    │   ├── llvm
    │       ├── main.rs
    │       ├── smoke.ll
    │       └── smoke.tokens.yaml
    │   ├── numbers
    │       ├── main.rs
    │       ├── numbers.fp64.txt
    │       ├── numbers.tokens.yaml
    │       └── numbers.txt
    │   └── ui
    │       ├── ambiguous
    │           ├── idents.stderr
    │           ├── idents.txt
    │           ├── no_xid_after_br.stderr
    │           ├── no_xid_after_br.txt
    │           ├── no_xid_after_cm.stderr
    │           ├── no_xid_after_cm.txt
    │           ├── no_xid_after_id.stderr
    │           ├── no_xid_after_id.txt
    │           ├── no_xid_after_kw.stderr
    │           ├── no_xid_after_kw.txt
    │           ├── no_xid_after_nm.stderr
    │           ├── no_xid_after_nm.txt
    │           ├── no_xid_after_st.stderr
    │           ├── no_xid_after_st.txt
    │           ├── nums.stderr
    │           ├── nums.txt
    │           ├── symbols_after_comment.tokens.yaml
    │           ├── symbols_after_comment.txt
    │           ├── symbols_after_quoted.tokens.yaml
    │           └── symbols_after_quoted.txt
    │       ├── digital
    │           ├── invalid.stderr
    │           ├── invalid.txt
    │           ├── missing.stderr
    │           ├── missing.txt
    │           ├── points.stderr
    │           ├── points.txt
    │           ├── separators.stderr
    │           └── separators.txt
    │       ├── eof
    │           ├── bracket.stderr
    │           ├── bracket.txt
    │           ├── bracket_multiline.stderr
    │           ├── bracket_multiline.txt
    │           ├── comment.stderr
    │           ├── comment.txt
    │           ├── comment_multiline.stderr
    │           ├── comment_multiline.txt
    │           ├── mixed_brackets.stderr
    │           ├── mixed_brackets.txt
    │           ├── mixed_brackets_multiline.stderr
    │           ├── mixed_brackets_multiline.txt
    │           ├── quoted.stderr
    │           ├── quoted.txt
    │           ├── quoted_multiline.stderr
    │           └── quoted_multiline.txt
    │       ├── main.rs
    │       ├── too_small
    │           ├── cxx_tag.stderr
    │           ├── cxx_tag.txt
    │           ├── ident.stderr
    │           ├── ident.txt
    │           ├── rust_hashes.stderr
    │           └── rust_hashes.txt
    │       └── unrecognized
    │           ├── unrecognized.stderr
    │           └── unrecognized.txt
├── proc2decl
    ├── Cargo.toml
    ├── README.md
    └── src
    │   └── lib.rs
├── rust-toolchain.toml
├── rustfmt.toml
└── twie
    ├── Cargo.toml
    ├── README.md
    └── src
        ├── impls.rs
        ├── lib.rs
        ├── poison_trie.txt
        └── raw
            ├── dump.rs
            ├── entries.rs
            ├── iter.rs
            ├── mod.rs
            └── nodes.rs


/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | on:
 3 |   push:
 4 |     branches: [ main ]
 5 |   pull_request:
 6 |     branches: [ main ]
 7 | 
 8 | env:
 9 |   CARGO_TERM_COLOR: always
10 |   NIGHTLY: 'nightly-2025-01-01'
11 | 
12 | jobs:
13 |   check_lints:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |     - uses: actions/checkout@v2
17 | 
18 |     - name: Check format
19 |       run: cargo fmt -- --check --files-with-diff
20 | 
21 |     - name: Check clippy lints
22 |       run: cargo clippy --all-targets --verbose
23 | 
24 |   build_and_test:
25 |     runs-on: ubuntu-latest
26 |     steps:
27 |     - uses: actions/checkout@v2
28 | 
29 |     - name: Build with default settings
30 |       run: |
31 |         cargo build -v
32 |         cargo build --release -v
33 | 
34 |     - name: Build docs
35 |       run: cargo doc --verbose
36 | 
37 |     - name: Run tests
38 |       run: cargo test --verbose
39 | 
40 |   miri:
41 |     runs-on: ubuntu-latest
42 |     steps:
43 |     - uses: actions/checkout@v2
44 |     
45 |     - name: Install Miri
46 |       run: rustup +$NIGHTLY component add miri
47 | 
48 |     - name: Run tests under Miri
49 |       run: cargo +$NIGHTLY miri test --workspace --exclude ilex
50 | 
51 |     # Most of ilex's tests are extremely slow under Miri.
52 |     # The LLVM syntax test alone takes 10 minutes or so on a GH runner.
53 |     - name: Run some `ilex` tests under Miri
54 |       run: |
55 |         cargo +$NIGHTLY miri test -p ilex --lib
56 |         cargo +$NIGHTLY miri test -p ilex --test greedy


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /Cargo.lock
3 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members = [
 3 |   "allman",
 4 |   "byteyarn",
 5 |   "buf-trait",
 6 |   "gilded", "gilded/attr",
 7 |   "ilex", "ilex/attr",
 8 |   "proc2decl",
 9 |   "twie",
10 | ]
11 | resolver = "2"
12 | 
13 | [workspace.package]
14 | edition = "2021"
15 | 
16 | authors = ["Miguel Young de la Sota <mcyoung@mit.edu>"]
17 | homepage = "https://github.com/mcy/strings"
18 | repository = "https://github.com/mcy/strings"
19 | keywords = ["string", "text", "binary"]
20 | 
21 | license = "Apache-2.0"
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Rust String Libraries by mcyoung
 2 | 
 3 | For some reason or another, I keep accumulating libraries for operating on
 4 | strings of data. Rather than continue to generate disparate repos, this one repo
 5 | will hold all of my strings libraries, which will make it easier for them to
 6 | depend on each other.
 7 | 
 8 | ## Table of Contents
 9 | 
10 | - 📜 [`buf-trait`](https://github.com/mcy/strings/tree/main/buf-trait) - A trait
11 |   for abstracting over buffers of POD data.
12 | 
13 | - 🧶 [`byteyarn`](https://github.com/mcy/strings/tree/main/byteyarn) -
14 |   Space-efficient byte strings.
15 | 
16 | - 🌲 [`twie`](https://github.com/mcy/strings/tree/main/twie) - Fast and compact
17 |   prefix tries.
18 | 
19 | - ⛩️ [`ilex`](https://github.com/mcy/strings/tree/main/ilex) - The last lexer I
20 |   ever want to write.
21 | 
22 | - 🗒️ [`allman`](https://github.com/mcy/strings/tree/main/allman) - A DOM for
23 |   code formatters.
24 | 
25 | - 👑 [`gilded`](https://github.com/mcy/strings/tree/main/gilded) - How I learned
26 |   to stop worrying and love golden testing.
27 | 
28 | - 💢 [`proc2decl`](https://github.com/mcy/strings/tree/main/proc2decl) - Proc
29 |   macros suck!
30 | 
31 | ---
32 | 
33 | All libraries are Apache-2.0 licensed.
34 | 


--------------------------------------------------------------------------------
/allman/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "allman"
 3 | version = "0.1.0"
 4 | description = "source code formatting and line reflowing toolkit"
 5 | 
 6 | edition.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | keywords.workspace = true
11 | license.workspace = true
12 | 
13 | [dependencies]
14 | byteyarn = { path = "../byteyarn" }
15 | 
16 | unicode-width = "0.2.0"


--------------------------------------------------------------------------------
/allman/README.md:
--------------------------------------------------------------------------------
 1 | # allman
 2 | 
 3 | `allman` - A code formatting and line reflowing toolkit. 🗒️🖋️
 4 | 
 5 | `allman::Doc` is a DOM-like structure that specifies how indentation,
 6 | like breaking, and reflowing should be handled. It is a tree of `Tag`s
 7 | that dictate layout information for the source code to format.
 8 | 
 9 | For example, the Allman brace style (for which this crate is named) can
10 | be implemented as follows:
11 | 
12 | ```rust
13 | // flat: fn foo() { ... }
14 | //
15 | // broken:
16 | // fn foo()
17 | // {
18 | //   // ...
19 | // }
20 | Doc::new()
21 |   .tag("fn")
22 |   .tag(Tag::Space)
23 |   .tag("foo")
24 |   .tag("(").tag(")")
25 |   .tag_with(Tag::Group(40), |doc| {
26 |     doc
27 |       .tag_if(Tag::Space, If::Flat)
28 |       .tag_if(Tag::Break(1), If::Broken)
29 |       .tag("{")
30 |       .tag_if(Tag::Space, If::Flat)
31 |       .tag_if(Tag::Break(1), If::Broken)
32 |       .tag_with(Tag::Indent(2), |doc| {
33 |         // Brace contents here...
34 |       })
35 |       .tag_if(Tag::Space, If::Flat)
36 |       .tag_if(Tag::Break(1), If::Broken)
37 |       .tag("}");
38 |   });
39 | ```
40 | 
41 | When calling `Doc::render()`, the layout algorithm will determine whether
42 | `Tag::Group`s should be "broken", i.e., laid out with newlines inside.
43 | 


--------------------------------------------------------------------------------
/allman/src/layout.rs:
--------------------------------------------------------------------------------
  1 | //! Layout algorithm implementation.
  2 | //!
  3 | //! The only thing the layout algorithm *actually* has to decide is whether each
  4 | //! group breaks or not. The algorithm is as follows.
  5 | //!
  6 | //! 1. Measure the width of each element recursively. Elements which span
  7 | //!    multiple lines are treated as being of infinite width.
  8 | //!
  9 | //! 2. Mark groups as broken recursively: for each group, if at its current
 10 | //!    position, it would overflow the maximum column length, break it, and
 11 | //!    recurse into it.
 12 | 
 13 | use unicode_width::UnicodeWidthStr;
 14 | 
 15 | use crate::Cursor;
 16 | use crate::Doc;
 17 | use crate::If;
 18 | use crate::Measure;
 19 | use crate::Options;
 20 | use crate::Tag;
 21 | use crate::TagInfo;
 22 | 
 23 | impl Doc<'_> {
 24 |   pub(crate) fn do_layout(&self, opts: &Options) {
 25 |     for (t, c) in self.cursor() {
 26 |       measure(t, c);
 27 |     }
 28 | 
 29 |     LayoutState { opts, indent: 0, column: 0 }.do_layout(self.cursor());
 30 |   }
 31 | }
 32 | 
 33 | struct LayoutState<'a> {
 34 |   opts: &'a Options,
 35 | 
 36 |   /// The column to start the next line at.
 37 |   indent: usize,
 38 | 
 39 |   /// The next column that we would be writing at.
 40 |   column: usize,
 41 | }
 42 | 
 43 | impl LayoutState<'_> {
 44 |   /// Advances state for rendering a tag within a broken group.
 45 |   fn do_layout(&mut self, cursor: Cursor) {
 46 |     for (tag, cursor) in cursor {
 47 |       let cond = tag.cond != Some(If::Flat);
 48 | 
 49 |       let mut m = tag.measure.get();
 50 |       m.column = self.column;
 51 |       match &tag.tag {
 52 |         Tag::Text(text) => match text.rfind("\n") {
 53 |           Some(nl) => self.column = self.indent + text[nl..].width(),
 54 |           None => self.column += m.width.unwrap(),
 55 |         },
 56 | 
 57 |         Tag::Space => self.column += 1,
 58 |         Tag::Break(0) => {}
 59 |         Tag::Break(_) => self.column = self.indent,
 60 | 
 61 |         Tag::Group(max) => {
 62 |           let mut width =
 63 |             m.width.filter(|w| self.column + w <= self.opts.max_columns);
 64 | 
 65 |           if width.is_some_and(|w| w > *max) {
 66 |             width = None;
 67 |           }
 68 | 
 69 |           if let Some(w) = width {
 70 |             // Don't need to do layout here: everything already fits.
 71 |             self.column += w;
 72 |           } else {
 73 |             m.width = None;
 74 | 
 75 |             self.do_layout(cursor);
 76 |           }
 77 |         }
 78 | 
 79 |         Tag::Indent(columns) => {
 80 |           if cond {
 81 |             let prev = self.indent;
 82 |             self.indent = self.indent.saturating_add_signed(*columns);
 83 |             self.do_layout(cursor);
 84 |             self.indent = prev;
 85 |           }
 86 |         }
 87 |       }
 88 |       tag.measure.set(m);
 89 |     }
 90 |   }
 91 | }
 92 | 
 93 | /// Calculates the width of each element if it was laid out in one line.
 94 | fn measure(tag: &TagInfo, cursor: Cursor) {
 95 |   let tag_width = match &tag.tag {
 96 |     _ if tag.cond == Some(If::Broken) => Some(0),
 97 | 
 98 |     Tag::Text(text) => (!text.contains("\n")).then(|| text.width()),
 99 |     Tag::Space => Some(1),
100 |     Tag::Break(_) => None,
101 | 
102 |     _ => Some(0),
103 |   };
104 | 
105 |   let width = cursor
106 |     .map(|(t, c)| {
107 |       measure(t, c);
108 |       t.measure.get().width
109 |     })
110 |     .fold(tag_width, |a, b| a?.checked_add(b?));
111 | 
112 |   tag.measure.set(Measure { width, column: 0 });
113 | }
114 | 


--------------------------------------------------------------------------------
/allman/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! `allman` - A code formatting and line reflowing toolkit. 🗒️🖋️
  2 | //!
  3 | //! [`allman::Doc`][Doc] is a DOM-like structure that specifies how indentation,
  4 | //! like breaking, and reflowing should be handled. It is a tree of [`Tag`]s
  5 | //! that dictate layout information for the source code to format.
  6 | //!
  7 | //! For example, the Allman brace style (for which this crate is named) can
  8 | //! be implemented as follows:
  9 | //!
 10 | //! ```
 11 | //! # use allman::*;
 12 | //! // flat: fn foo() { ... }
 13 | //! //
 14 | //! // broken:
 15 | //! // fn foo()
 16 | //! // {
 17 | //! //   // ...
 18 | //! // }
 19 | //! Doc::new()
 20 | //!   .tag("fn")
 21 | //!   .tag(Tag::Space)
 22 | //!   .tag("foo")
 23 | //!   .tag("(").tag(")")
 24 | //!   .tag_with(Tag::Group(40), |doc| {
 25 | //!     doc
 26 | //!       .tag_if(Tag::Space, If::Flat)
 27 | //!       .tag_if(Tag::Break(1), If::Broken)
 28 | //!       .tag("{")
 29 | //!       .tag_if(Tag::Space, If::Flat)
 30 | //!       .tag_if(Tag::Break(1), If::Broken)
 31 | //!       .tag_with(Tag::Indent(2), |doc| {
 32 | //!         // Brace contents here...
 33 | //!       })
 34 | //!       .tag_if(Tag::Space, If::Flat)
 35 | //!       .tag_if(Tag::Break(1), If::Broken)
 36 | //!       .tag("}");
 37 | //!   });
 38 | //! ```
 39 | //!
 40 | //! When calling [`Doc::render()`], the layout algorithm will determine whether
 41 | //! [`Tag::Group`]s should be "broken", i.e., laid out with newlines inside.
 42 | 
 43 | use core::slice;
 44 | use std::cell::Cell;
 45 | use std::fmt;
 46 | use std::io;
 47 | 
 48 | use byteyarn::YarnBox;
 49 | 
 50 | mod layout;
 51 | mod render;
 52 | 
 53 | /// A source code document, which can be rendered as formatted text.
 54 | ///
 55 | /// A [`Doc`] is analogous to an HTML DOM, which is text along with markup for
 56 | /// laying out that text. The difference being that rather than being converted
 57 | /// into raster graphics by a browser engine, a [`Doc`] is rendered as a text
 58 | /// file.
 59 | #[derive(Clone, Default)]
 60 | pub struct Doc<'text> {
 61 |   /// This is a flattened tree: each node specifies how many elements after it
 62 |   /// make up its children. The `Cursor` type implements walking this tree.
 63 |   tags: Vec<TagInfo<'text>>,
 64 | }
 65 | 
 66 | /// A condition that can be applied to a tag.
 67 | ///
 68 | /// If a condition is set on a tag, and the condition is false, the tag is
 69 | /// treated as a no-op: its contents are not printed.
 70 | #[derive(Clone, Copy, PartialEq, Eq, Debug)]
 71 | pub enum If {
 72 |   /// True when the containing group is printed on one line.
 73 |   Flat,
 74 |   /// True when the containing group does not fit on one line.
 75 |   Broken,
 76 | }
 77 | 
 78 | /// Options for [`Doc::render()`].
 79 | pub struct Options {
 80 |   /// The maximum number of columns in a line.
 81 |   pub max_columns: usize,
 82 | }
 83 | 
 84 | impl<'text> Doc<'text> {
 85 |   /// Returns a new, empty document.
 86 |   pub fn new() -> Self {
 87 |     Self::default()
 88 |   }
 89 | 
 90 |   /// Renders this document to the given writer.
 91 |   pub fn render(
 92 |     &self,
 93 |     out: &mut dyn io::Write,
 94 |     options: &Options,
 95 |   ) -> io::Result<()> {
 96 |     self.do_layout(options);
 97 |     render::Printer::new(out).render(self.cursor(), options, true)
 98 |   }
 99 | 
100 |   /// Inserts a new self-closing tag into this doc.
101 |   pub fn tag(&mut self, tag: impl Into<Tag<'text>>) -> &mut Self {
102 |     self.tag_if_with(tag, None, |_| {})
103 |   }
104 | 
105 |   /// Inserts a new tag into this doc. The given closure can be used to insert
106 |   /// tags into it.
107 |   ///
108 |   /// # Panics
109 |   ///
110 |   /// Panics if children are inserted and [`Tag::can_have_children()`] is false.
111 |   pub fn tag_with(
112 |     &mut self,
113 |     tag: impl Into<Tag<'text>>,
114 |     body: impl FnOnce(&mut Self),
115 |   ) -> &mut Self {
116 |     self.tag_if_with(tag, None, body)
117 |   }
118 | 
119 |   /// Inserts a new tag into this doc, with an optional condition.
120 |   pub fn tag_if(
121 |     &mut self,
122 |     tag: impl Into<Tag<'text>>,
123 |     cond: impl Into<Option<If>>,
124 |   ) -> &mut Self {
125 |     self.tag_if_with(tag, cond, |_| {})
126 |   }
127 | 
128 |   /// Inserts a new tag into this doc, with an optional condition. The given
129 |   /// closure can be used to insert tags into it.
130 |   ///
131 |   /// # Panics
132 |   ///
133 |   /// Panics if children are inserted and [`Tag::can_have_children()`] is false.
134 |   pub fn tag_if_with(
135 |     &mut self,
136 |     tag: impl Into<Tag<'text>>,
137 |     cond: impl Into<Option<If>>,
138 |     body: impl FnOnce(&mut Self),
139 |   ) -> &mut Self {
140 |     let tag = tag.into();
141 |     let compound = tag.can_have_children();
142 | 
143 |     let consolidate = matches!(
144 |       (&tag, self.tags.last().map(|t| &t.tag)),
145 |       (Tag::Space, Some(Tag::Space))
146 |     );
147 | 
148 |     let idx = self.tags.len();
149 |     self.tags.push(TagInfo {
150 |       tag,
151 |       len: 0,
152 |       cond: cond.into(),
153 |       measure: Cell::default(),
154 |     });
155 |     body(self);
156 | 
157 |     let len = self.tags.len() - idx - 1;
158 |     assert!(
159 |       compound || len == 0,
160 |       "inserted children for {:?}",
161 |       &self.tags[idx].tag
162 |     );
163 | 
164 |     if consolidate {
165 |       self.tags.pop();
166 |     }
167 | 
168 |     self.tags[idx].len = len;
169 |     self
170 |   }
171 | 
172 |   fn cursor(&self) -> Cursor {
173 |     Cursor { iter: self.tags.iter() }
174 |   }
175 | }
176 | 
177 | #[derive(Clone, Debug)]
178 | struct TagInfo<'text> {
179 |   tag: Tag<'text>,
180 |   len: usize,
181 |   cond: Option<If>,
182 | 
183 |   measure: Cell<Measure>,
184 | }
185 | 
186 | #[derive(Copy, Clone, Default, Debug)]
187 | struct Measure {
188 |   /// The number of columns this tag takes up when it is formatted on one line.
189 |   ///
190 |   /// None if its width should be treated as infinite.
191 |   width: Option<usize>,
192 |   column: usize,
193 | }
194 | 
195 | /// An element of a [`Doc`].
196 | #[derive(Clone, PartialEq, Eq, Debug)]
197 | pub enum Tag<'text> {
198 |   /// Verbatim text. Line breaks inside of this text cause any groups that
199 |   /// contain it to be broken.
200 |   Text(YarnBox<'text, str>),
201 | 
202 |   /// Inserts a space, except if it would end a line. This is intended for
203 |   /// ensuring lines do not have trailing whitespace. [`Tag::Text`] containing
204 |   /// a space can be used to force a space at the end of a line.
205 |   ///
206 |   /// Consecutive space tags are consolidated into one.
207 |   Space,
208 | 
209 |   /// Inserts the given number of newlines, and breaks the surrounding group.
210 |   ///
211 |   /// Consecutive breaks are consolidated into one. A `Break(0)` can be used
212 |   /// to force a break without inserting an actual newline.
213 |   Break(usize),
214 | 
215 |   /// A sequence of tags that may either be rendered as one line, or broken into
216 |   /// multiple lines if it does not fit.
217 |   ///
218 |   /// The group will also break itself if it is wider than the given width;
219 |   /// use [`usize::MAX`] to disable this.
220 |   Group(usize),
221 | 
222 |   /// Change indentation by the given number of columns.
223 |   Indent(isize),
224 | }
225 | 
226 | impl Tag<'_> {
227 |   /// Returns whether or not this tag can contain child tags.
228 |   pub fn can_have_children(&self) -> bool {
229 |     matches!(self, Self::Group(..) | Self::Indent(..))
230 |   }
231 | }
232 | 
233 | impl<'text, Y: Into<YarnBox<'text, str>>> From<Y> for Tag<'text> {
234 |   fn from(yarn: Y) -> Self {
235 |     Self::Text(yarn.into())
236 |   }
237 | }
238 | 
239 | /// A cursor over a piece of a [`Doc`].
240 | struct Cursor<'a> {
241 |   iter: slice::Iter<'a, TagInfo<'a>>,
242 | }
243 | 
244 | impl<'a> Iterator for Cursor<'a> {
245 |   type Item = (&'a TagInfo<'a>, Cursor<'a>);
246 | 
247 |   fn next(&mut self) -> Option<Self::Item> {
248 |     let next = self.iter.next()?;
249 |     if next.len == 0 {
250 |       // Fast path that avoids an extra bounds check.
251 |       return Some((next, Cursor { iter: [].iter() }));
252 |     }
253 | 
254 |     let (contents, rest) = self.iter.as_slice().split_at(next.len);
255 |     self.iter = rest.iter();
256 |     Some((next, Cursor { iter: contents.iter() }))
257 |   }
258 | }
259 | 
260 | impl fmt::Debug for Doc<'_> {
261 |   fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
262 |     fn fmt(
263 |       indent: usize,
264 |       cursor: Cursor,
265 |       f: &mut fmt::Formatter,
266 |     ) -> fmt::Result {
267 |       for (tag, cursor) in cursor {
268 |         write!(f, "{:<1$}", "\n", indent + 1)?;
269 |         match &tag.tag {
270 |           Tag::Text(y) => write!(f, "<text>{y:?}</text>")?,
271 |           Tag::Space => write!(f, "<space/>")?,
272 |           Tag::Break(n) => write!(f, "<break count={n}/>")?,
273 |           Tag::Group(w) => {
274 |             if cursor.iter.as_slice().is_empty() {
275 |               write!(f, "<group width={w}/>")?;
276 |               continue;
277 |             }
278 | 
279 |             write!(f, "<group width={w}>")?;
280 |             fmt(indent + 2, cursor, f)?;
281 |             write!(f, "</group>")?;
282 |           }
283 |           Tag::Indent(c) => {
284 |             if cursor.iter.as_slice().is_empty() {
285 |               write!(f, "<indent cols={c}/>")?;
286 |               continue;
287 |             }
288 | 
289 |             write!(f, "<indent cols={c}>")?;
290 |             fmt(indent + 2, cursor, f)?;
291 |             write!(f, "</indent>")?;
292 |           }
293 |         }
294 |       }
295 |       write!(f, "{:<1$}", "\n", indent - 2 + 1)?;
296 |       Ok(())
297 |     }
298 | 
299 |     fmt(0, self.cursor(), f)
300 |   }
301 | }
302 | 


--------------------------------------------------------------------------------
/allman/src/render.rs:
--------------------------------------------------------------------------------
  1 | use std::io;
  2 | use std::io::Write;
  3 | use std::mem;
  4 | 
  5 | use crate::If;
  6 | use crate::Options;
  7 | use crate::Tag;
  8 | 
  9 | /// An indentation-aware pretty-printer.
 10 | pub struct Printer<'a> {
 11 |   out: &'a mut dyn io::Write,
 12 |   indent: usize,
 13 |   space: bool,
 14 |   newlines: usize,
 15 | }
 16 | 
 17 | impl<'a> Printer<'a> {
 18 |   /// Returns a new printer with the given output and options.
 19 |   pub fn new(out: &'a mut dyn io::Write) -> Self {
 20 |     Self {
 21 |       out,
 22 |       indent: 0,
 23 |       space: false,
 24 |       newlines: 0,
 25 |     }
 26 |   }
 27 | 
 28 |   /// Updates the indentation level with the given diff.
 29 |   pub fn with_indent<R>(
 30 |     &mut self,
 31 |     diff: isize,
 32 |     body: impl FnOnce(&mut Self) -> R,
 33 |   ) -> R {
 34 |     let prev = self.indent;
 35 |     self.indent = self.indent.saturating_add_signed(diff);
 36 |     let r = body(self);
 37 |     self.indent = prev;
 38 |     r
 39 |   }
 40 | 
 41 |   /// Writes indentation, if necessary.
 42 |   pub fn write_indent(&mut self) -> io::Result<()> {
 43 |     if mem::take(&mut self.newlines) == 0 {
 44 |       return Ok(());
 45 |     }
 46 | 
 47 |     self.write_spaces(self.indent)
 48 |   }
 49 | 
 50 |   /// Writes len ASCII spaces to the output.
 51 |   pub fn write_spaces(&mut self, mut len: usize) -> io::Result<()> {
 52 |     const SPACES: &[u8; 32] = b"                                ";
 53 | 
 54 |     while len > SPACES.len() {
 55 |       self.out.write_all(SPACES)?;
 56 |       len -= SPACES.len();
 57 |     }
 58 |     self.out.write_all(&SPACES[..len])?;
 59 |     Ok(())
 60 |   }
 61 | 
 62 |   pub fn render(
 63 |     &mut self,
 64 |     cursor: crate::Cursor,
 65 |     _options: &Options,
 66 |     parent_is_broken: bool,
 67 |   ) -> io::Result<()> {
 68 |     for (tag, cursor) in cursor {
 69 |       let cond = match tag.cond {
 70 |         Some(If::Broken) => parent_is_broken,
 71 |         Some(If::Flat) => !parent_is_broken,
 72 |         None => true,
 73 |       };
 74 | 
 75 |       match &tag.tag {
 76 |         Tag::Text(text) => {
 77 |           if cond {
 78 |             write!(self, "{text}")?;
 79 |           }
 80 |         }
 81 | 
 82 |         Tag::Space => self.space |= cond,
 83 |         Tag::Break(n) => {
 84 |           if cond {
 85 |             for _ in self.newlines..*n {
 86 |               writeln!(self)?;
 87 |             }
 88 |           }
 89 |         }
 90 | 
 91 |         Tag::Group(..) => {
 92 |           let m = tag.measure.get();
 93 |           self.render(cursor, _options, m.width.is_none())?;
 94 |         }
 95 | 
 96 |         Tag::Indent(columns) => {
 97 |           if cond {
 98 |             self.with_indent(*columns, |p| {
 99 |               p.render(cursor, _options, parent_is_broken)
100 |             })?;
101 |           }
102 |         }
103 |       }
104 |     }
105 | 
106 |     Ok(())
107 |   }
108 | }
109 | 
110 | impl io::Write for Printer<'_> {
111 |   fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
112 |     if buf.is_empty() {
113 |       return Ok(0);
114 |     }
115 | 
116 |     if mem::take(&mut self.space) && !buf.starts_with(b"\n") {
117 |       self.write_all(b" ")?;
118 |     }
119 | 
120 |     for line in buf.split_inclusive(|&b| b == b'\n') {
121 |       if line == b"\n" {
122 |         self.newlines += 1;
123 |         self.out.write_all(line)?;
124 |         continue;
125 |       }
126 | 
127 |       self.write_indent()?;
128 |       self.out.write_all(line)?;
129 |       if line.ends_with(b"\n") {
130 |         self.newlines = 1;
131 |       }
132 |     }
133 |     Ok(buf.len())
134 |   }
135 | 
136 |   fn flush(&mut self) -> io::Result<()> {
137 |     self.out.flush()
138 |   }
139 | }
140 | 


--------------------------------------------------------------------------------
/buf-trait/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "buf-trait"
 3 | version = "0.4.1"
 4 | description = "abstract over [u8], str, and friends"
 5 | 
 6 | edition.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | keywords.workspace = true
11 | license.workspace = true
12 | 
13 | [dependencies]
14 | zerocopy = "0.7"
15 | 


--------------------------------------------------------------------------------
/buf-trait/README.md:
--------------------------------------------------------------------------------
 1 | # buf-trait
 2 | 
 3 | The `Buf` trait.
 4 | 
 5 | This crate provides a trait for abstracting over buffer-like types, such
 6 | as `str` and `[u8]`. This is a much stronger property than, say,
 7 | implementing [`AsRef<[u8]>`]. These are variable-length types that you might
 8 | want to store as a raw byte buffer and then transmute to and from `&[u8]`.
 9 | 
10 | This crate provides all the functionality necessary for doing so safely,
11 | correctly, and in `const`.
12 | 


--------------------------------------------------------------------------------
/buf-trait/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! The `Buf` trait.
  2 | //!
  3 | //! This crate provides a trait for abstracting over buffer-like types, such
  4 | //! as `str` and `[u8]`. This is a much stronger property than, say,
  5 | //! implementing [`AsRef<[u8]>`]. These are variable-length types that you might
  6 | //! want to store as a raw byte buffer and then transmute to and from `&[u8]`.
  7 | //!
  8 | //! This crate provides all the functionality necessary for doing so safely,
  9 | //! correctly, and in `const`.
 10 | 
 11 | #![no_std]
 12 | 
 13 | use core::alloc::Layout;
 14 | use core::mem;
 15 | use core::slice;
 16 | use core::slice::SliceIndex;
 17 | 
 18 | /// A trait for abstracting over `str`, `[u8]`, and other byte-string-like
 19 | /// types.
 20 | ///
 21 | /// See the [crate docs](self) for more information.
 22 | ///
 23 | /// # Safety
 24 | ///
 25 | /// This trait should only be implemented on types that are, essentially, a
 26 | /// `repr(transpartent)` wrapper over a `[T]` for some Copy type `T`.
 27 | ///
 28 | /// In particular, `B: Buf` the requires that the following must hold:
 29 | ///
 30 | ///   1. Transmute `&B` to `&[T]`, where `T` is [`zerocopy::AsBytes`]. Transmute
 31 | ///      here is quite literal: `mem::transmute<&B, &[T]>` MUST be a valid way
 32 | ///      to convert between them.
 33 | ///
 34 | ///   2. Transmute `&[T]` to `&B` if the contents of that `&[T]` originated from
 35 | ///      operation (1).
 36 | ///
 37 | ///   3. Byte-copy `&B` to a `T`-aligned buffer, and then transmute
 38 | ///      the resulting `&[T]` to `&B` again.
 39 | ///
 40 | ///   4. `x == y` implies that `x.as_bytes() == y.as_bytes()`.
 41 | ///
 42 | ///   5. `B::from_bytes(&[])` and `B::from_bytes_mut(&mut [])` always produce
 43 | ///      valid values.
 44 | ///
 45 | /// Notably, none of `CStr`, `OsStr`, or `Path` can implement `Buf` because
 46 | /// their layout as slices is not part of their interface.
 47 | ///
 48 | /// `T` may be zero-sized, but functions will panic in this case.
 49 | pub unsafe trait Buf {
 50 |   /// The element type of the underlying type. This is used for computing e.g.
 51 |   /// alignment and stride.
 52 |   type Element: zerocopy::AsBytes + Copy;
 53 | 
 54 |   /// The length of this value, in elements.
 55 |   fn elem_len(&self) -> usize {
 56 |     mem::size_of_val(self) / mem::size_of::<Self::Element>()
 57 |   }
 58 | 
 59 |   /// The length of this value, in bytes.
 60 |   fn byte_len(&self) -> usize {
 61 |     mem::size_of_val(self)
 62 |   }
 63 | 
 64 |   /// Creates a new empty [`Buf`].
 65 |   fn empty<'a, B: ?Sized + Buf>() -> &'a B {
 66 |     empty()
 67 |   }
 68 | 
 69 |   /// Converts a reference to a [`Buf`] into its underlying bytes.
 70 |   fn as_bytes(&self) -> &[u8] {
 71 |     as_bytes(self)
 72 |   }
 73 | 
 74 |   /// Converts a byte slice to a reference to a [`Buf`].
 75 |   ///
 76 |   /// # Safety
 77 |   ///
 78 |   /// `bytes` must have been either constructed via transmuting from `&Self`,
 79 |   /// or a bytewise copy of a `Self`.
 80 |   unsafe fn from_bytes(bytes: &[u8]) -> &Self {
 81 |     as_buf(bytes)
 82 |   }
 83 | 
 84 |   /// Converts a reference to a [`Buf`] into its underlying bytes.
 85 |   fn as_bytes_mut(&mut self) -> &mut [u8] {
 86 |     as_bytes_mut(self)
 87 |   }
 88 | 
 89 |   /// Converts a byte slice to a reference to a [`Buf`].
 90 |   ///
 91 |   /// # Safety
 92 |   ///
 93 |   /// `bytes` must have been either constructed via transmuting from `&Self`,
 94 |   /// or a bytewise copy of a `Self`.
 95 |   unsafe fn from_bytes_mut(bytes: &mut [u8]) -> &mut Self {
 96 |     as_buf_mut(bytes)
 97 |   }
 98 | 
 99 |   /// Performs a slicing operation on `self` with respect to byte indices.
100 |   ///
101 |   /// # Safety
102 |   ///
103 |   /// This function does not perform any checking beyonds bounds checking. For
104 |   /// example, if called on `str`, this function may slice through a multi-byte
105 |   /// Unicode scalar, producing a `&str` that violate's `str`'s validity
106 |   /// constraints (i.e., Undefined Behavior).
107 |   unsafe fn slice_along_bytes<Idx>(&self, index: Idx) -> Option<&Self>
108 |   where
109 |     Idx: SliceIndex<[u8], Output = [u8]>,
110 |   {
111 |     self.as_bytes().get(index).map(|b| Self::from_bytes(b))
112 |   }
113 | }
114 | 
115 | unsafe impl<T: zerocopy::AsBytes + Copy> Buf for [T] {
116 |   type Element = T;
117 | }
118 | 
119 | unsafe impl Buf for str {
120 |   type Element = u8;
121 | }
122 | 
123 | /// Computes the layout of `buf`.
124 | ///
125 | /// This function is `const`, unlike [`Layout::for_value()`].
126 | pub const fn layout_of<B: ?Sized + Buf>(buf: &B) -> Layout {
127 |   unsafe {
128 |     Layout::from_size_align_unchecked(
129 |       as_bytes(buf).len(),
130 |       mem::align_of::<B::Element>(),
131 |     )
132 |   }
133 | }
134 | 
135 | /// Creates a new empty [`Buf`].
136 | ///
137 | /// Unlike [`Buf::empty()`], this function is `const`.
138 | pub const fn empty<'a, B: ?Sized + Buf>() -> &'a B {
139 |   unsafe { as_buf(&[]) }
140 | }
141 | 
142 | /// Converts a reference to a [`Buf`] into its underlying bytes.
143 | ///
144 | /// Unlike [`Buf::as_bytes()`], this function is `const`.
145 | pub const fn as_bytes<B: ?Sized + Buf>(buf: &B) -> &[u8] {
146 |   assert!(
147 |     mem::size_of::<B::Element>() > 0,
148 |     "buf-trait: cannot use ZST as in type-erased context"
149 |   );
150 | 
151 |   let ptr = &buf as *const &_ as *const &[B::Element];
152 | 
153 |   unsafe {
154 |     let buf = *ptr;
155 |     // SAFETY: The safety rules of `Buf` make this valid.
156 |     let ptr = buf as *const _ as *const u8;
157 |     let len = buf.len() * mem::size_of::<B::Element>();
158 |     slice::from_raw_parts(ptr, len)
159 |   }
160 | }
161 | 
162 | /// Converts a mutable reference to a [`Buf`] into its underlying bytes.
163 | pub fn as_bytes_mut<B: ?Sized + Buf>(mut buf: &mut B) -> &mut [u8] {
164 |   assert!(
165 |     mem::size_of::<B::Element>() > 0,
166 |     "buf-trait: cannot use ZST as in type-erased context"
167 |   );
168 | 
169 |   let ptr = &mut buf as *mut &mut _ as *mut &mut [B::Element];
170 | 
171 |   unsafe {
172 |     let buf = &mut *ptr;
173 |     // SAFETY: The safety rules of `Buf` make this valid.
174 |     let ptr = buf as *mut _ as *mut u8;
175 |     slice::from_raw_parts_mut(ptr, mem::size_of_val(&**buf))
176 |   }
177 | }
178 | 
179 | /// Converts a byte slice to a reference to a [`Buf`].
180 | ///
181 | /// Unlike [`Buf::from_bytes()`], this function is `const`.
182 | ///
183 | /// # Safety
184 | ///
185 | /// See [`Buf::from_bytes()`].
186 | pub const unsafe fn as_buf<B: ?Sized + Buf>(bytes: &[u8]) -> &B {
187 |   assert!(
188 |     mem::size_of::<B::Element>() > 0,
189 |     "buf-trait: cannot use ZST as in type-erased context"
190 |   );
191 | 
192 |   let buf = slice::from_raw_parts(
193 |     bytes.as_ptr().cast::<B::Element>(),
194 |     bytes.len() / mem::size_of::<B::Element>(),
195 |   );
196 | 
197 |   let ptr = &buf as *const &[_] as *const &B;
198 |   *ptr
199 | }
200 | 
201 | /// Converts a mutable byte slice to a reference to a [`Buf`].
202 | ///
203 | /// # Safety
204 | ///
205 | /// See [`Buf::from_bytes()`].
206 | pub unsafe fn as_buf_mut<B: ?Sized + Buf>(bytes: &mut [u8]) -> &mut B {
207 |   assert!(
208 |     mem::size_of::<B::Element>() > 0,
209 |     "buf-trait: cannot use ZST as in type-erased context"
210 |   );
211 | 
212 |   let mut buf = slice::from_raw_parts_mut(
213 |     bytes.as_mut_ptr().cast::<B::Element>(),
214 |     bytes.len() / mem::size_of::<B::Element>(),
215 |   );
216 | 
217 |   let ptr = &mut buf as *mut &mut [_] as *mut &mut B;
218 |   *ptr
219 | }
220 | 


--------------------------------------------------------------------------------
/byteyarn/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "byteyarn"
 3 | version = "0.5.1"
 4 | description = "hyper-compact strings"
 5 | 
 6 | edition.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | keywords.workspace = true
11 | license.workspace = true
12 | 
13 | [dependencies]
14 | buf-trait = { version = "0.4", path = "../buf-trait" }
15 | 


--------------------------------------------------------------------------------
/byteyarn/README.md:
--------------------------------------------------------------------------------
 1 | # byteyarn
 2 | 
 3 | `byteyarn` - Space-efficient byte strings 🧶🐈‍⬛
 4 | 
 5 | A `Yarn` is a highly optimized string type that provides a number of
 6 | useful properties over `String`:
 7 | 
 8 | * Always two pointers wide, so it is always passed into and out of functions
 9 |   in registers.
10 | * Small string optimization (SSO) up to 15 bytes on 64-bit architectures.
11 | * Can be either an owned buffer or a borrowed buffer (like `Cow<str>`).
12 | * Can be upcast to `'static` lifetime if it was constructed from a
13 |   known-static string.
14 | * `Option<Yarn>` has the same size and ABI as `Yarn`.
15 | 
16 | The main caveat is that `Yarn`s cannot be easily appended to, since they
17 | do not track an internal capacity, and the slice returned by
18 | `Yarn::as_slice()` does not have the same pointer stability properties as
19 | `String` (these are rarely needed, though).
20 | 
21 | ---
22 | 
23 | Yarns are useful for situations in which a copy-on-write string is necessary
24 | and most of the strings are relatively small. Although `Yarn` itself is
25 | not `Copy`, there is a separate `YarnRef` type that is. These types
26 | have equivalent representations, and can be cheaply cast between each other.
27 | 
28 | The easiest way to create a yarn is with the `yarn!()`
29 | macro, which is similar to `format!()`.
30 | 
31 | ```rust
32 | // Create a new yarn via `fmt`ing.
33 | let yarn = yarn!("Answer: {}", 42);
34 | 
35 | // Convert that yarn into a reference.
36 | let ry: YarnRef<str> = yarn.as_ref();
37 | 
38 | // Try up-casting the yarn into an "immortal yarn" without copying.
39 | let copy: YarnRef<'static, str> = ry.immortalize().unwrap();
40 | 
41 | assert_eq!(yarn, copy);
42 | ```
43 | 
44 | Yarns are intended for storing text, either as UTF-8 or as
45 | probably-UTF-8 bytes; `Yarn<str>` and `Yarn<u8>` serve these purposes,
46 | and can be inter-converted with each other. The `Yarn::utf8_chunks()`
47 | function can be used to iterate over definitely-valid-UTF-8 chunks within
48 | a string.
49 | 
50 | Both kinds of yarns can be `Debug`ed and `Display`ed, and will print out as
51 | strings would. In particular, invalid UTF-8 is converted into either `\xNN`
52 | escapes or replacement characters (for `Debug` and `Display` respectively).
53 | 
54 | ```rust
55 | let invalid = ByteYarn::from_byte(0xff);
56 | assert_eq!(format!("{invalid:?}"), r#""\xFF""#);
57 | assert_eq!(format!("{invalid}"), "�");
58 | ```
59 | 


--------------------------------------------------------------------------------
/byteyarn/src/convert.rs:
--------------------------------------------------------------------------------
  1 | use std::borrow::Borrow;
  2 | use std::fmt;
  3 | use std::str::Utf8Error;
  4 | 
  5 | use crate::YarnBox;
  6 | use crate::YarnRef;
  7 | 
  8 | #[derive(Clone, Debug)]
  9 | pub struct NonCopy(());
 10 | 
 11 | impl fmt::Display for NonCopy {
 12 |   fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 13 |     f.write_str("cannot convert yarn to non-owning yarn")
 14 |   }
 15 | }
 16 | 
 17 | impl<'a, Buf> TryFrom<YarnBox<'a, Buf>> for YarnRef<'a, Buf>
 18 | where
 19 |   Buf: crate::Buf + ?Sized,
 20 | {
 21 |   type Error = NonCopy;
 22 | 
 23 |   fn try_from(y: YarnBox<'a, Buf>) -> Result<Self, NonCopy> {
 24 |     y.to_ref().ok_or(NonCopy(()))
 25 |   }
 26 | }
 27 | 
 28 | impl<'a> TryFrom<YarnBox<'a, [u8]>> for YarnBox<'a, str> {
 29 |   type Error = Utf8Error;
 30 | 
 31 |   fn try_from(y: YarnBox<'a, [u8]>) -> Result<Self, Utf8Error> {
 32 |     y.to_utf8()
 33 |   }
 34 | }
 35 | 
 36 | impl<'a> TryFrom<YarnRef<'a, [u8]>> for YarnRef<'a, str> {
 37 |   type Error = Utf8Error;
 38 | 
 39 |   fn try_from(y: YarnRef<'a, [u8]>) -> Result<Self, Utf8Error> {
 40 |     y.to_utf8()
 41 |   }
 42 | }
 43 | 
 44 | impl<'a> From<YarnBox<'a, str>> for YarnBox<'a, [u8]> {
 45 |   fn from(y: YarnBox<'a, str>) -> Self {
 46 |     y.into_bytes()
 47 |   }
 48 | }
 49 | 
 50 | impl<'a> From<YarnRef<'a, str>> for YarnRef<'a, [u8]> {
 51 |   fn from(y: YarnRef<'a, str>) -> Self {
 52 |     y.into_bytes()
 53 |   }
 54 | }
 55 | 
 56 | impl From<u8> for YarnBox<'_, [u8]> {
 57 |   fn from(c: u8) -> Self {
 58 |     Self::from_byte(c)
 59 |   }
 60 | }
 61 | 
 62 | impl From<u8> for YarnRef<'_, [u8]> {
 63 |   fn from(c: u8) -> Self {
 64 |     Self::from_byte(c)
 65 |   }
 66 | }
 67 | 
 68 | impl<Buf> From<char> for YarnBox<'_, Buf>
 69 | where
 70 |   Buf: crate::Buf + ?Sized,
 71 | {
 72 |   fn from(c: char) -> Self {
 73 |     Self::from_char(c)
 74 |   }
 75 | }
 76 | 
 77 | impl<Buf> From<char> for YarnRef<'_, Buf>
 78 | where
 79 |   Buf: crate::Buf + ?Sized,
 80 | {
 81 |   fn from(c: char) -> Self {
 82 |     Self::from_char(c)
 83 |   }
 84 | }
 85 | 
 86 | impl<'a, Buf> From<&'a Buf> for YarnBox<'a, Buf>
 87 | where
 88 |   Buf: crate::Buf + ?Sized,
 89 | {
 90 |   fn from(s: &'a Buf) -> Self {
 91 |     Self::new(s)
 92 |   }
 93 | }
 94 | 
 95 | impl<T, const N: usize> From<[T; N]> for YarnBox<'_, [T]>
 96 | where
 97 |   [T]: crate::Buf,
 98 | {
 99 |   fn from(s: [T; N]) -> Self {
100 |     YarnBox::from(s.as_slice()).immortalize()
101 |   }
102 | }
103 | 
104 | impl<'a, Buf> From<&'a YarnBox<'_, Buf>> for YarnBox<'a, Buf>
105 | where
106 |   Buf: crate::Buf + ?Sized,
107 | {
108 |   fn from(s: &'a YarnBox<'a, Buf>) -> Self {
109 |     s.aliased()
110 |   }
111 | }
112 | 
113 | impl<'a, Buf> From<&'a YarnBox<'_, Buf>> for YarnRef<'a, Buf>
114 | where
115 |   Buf: crate::Buf + ?Sized,
116 | {
117 |   fn from(s: &'a YarnBox<'a, Buf>) -> Self {
118 |     s.as_ref()
119 |   }
120 | }
121 | 
122 | impl<'a, Buf> From<&'a Buf> for YarnRef<'a, Buf>
123 | where
124 |   Buf: crate::Buf + ?Sized,
125 | {
126 |   fn from(s: &'a Buf) -> Self {
127 |     Self::new(s)
128 |   }
129 | }
130 | 
131 | impl<Buf> From<Box<Buf>> for YarnBox<'_, Buf>
132 | where
133 |   Buf: crate::Buf + ?Sized,
134 | {
135 |   fn from(s: Box<Buf>) -> Self {
136 |     Self::from_box(s)
137 |   }
138 | }
139 | 
140 | impl<T> From<Vec<T>> for YarnBox<'_, [T]>
141 | where
142 |   [T]: crate::Buf,
143 | {
144 |   fn from(s: Vec<T>) -> Self {
145 |     Self::from_vec(s)
146 |   }
147 | }
148 | 
149 | impl From<Box<str>> for YarnBox<'_, [u8]> {
150 |   fn from(s: Box<str>) -> Self {
151 |     Self::from_boxed_str(s)
152 |   }
153 | }
154 | 
155 | impl From<String> for YarnBox<'_, str> {
156 |   fn from(s: String) -> Self {
157 |     Self::from_string(s)
158 |   }
159 | }
160 | 
161 | impl From<String> for YarnBox<'_, [u8]> {
162 |   fn from(s: String) -> Self {
163 |     Self::from_string(s)
164 |   }
165 | }
166 | 
167 | impl<Buf> From<YarnBox<'_, Buf>> for Box<[u8]>
168 | where
169 |   Buf: crate::Buf + ?Sized,
170 | {
171 |   fn from(y: YarnBox<Buf>) -> Self {
172 |     y.into_boxed_bytes()
173 |   }
174 | }
175 | 
176 | impl<Buf> From<YarnRef<'_, Buf>> for Box<[u8]>
177 | where
178 |   Buf: crate::Buf + ?Sized,
179 | {
180 |   fn from(y: YarnRef<Buf>) -> Self {
181 |     y.to_boxed_bytes()
182 |   }
183 | }
184 | 
185 | impl<Buf> From<YarnBox<'_, Buf>> for Vec<u8>
186 | where
187 |   Buf: crate::Buf + ?Sized,
188 | {
189 |   fn from(y: YarnBox<Buf>) -> Self {
190 |     y.into_byte_vec()
191 |   }
192 | }
193 | 
194 | impl<Buf> From<YarnRef<'_, Buf>> for Vec<u8>
195 | where
196 |   Buf: crate::Buf + ?Sized,
197 | {
198 |   fn from(y: YarnRef<Buf>) -> Self {
199 |     y.to_byte_vec()
200 |   }
201 | }
202 | 
203 | impl From<YarnBox<'_, str>> for Box<str> {
204 |   fn from(y: YarnBox<str>) -> Self {
205 |     y.into_boxed_str()
206 |   }
207 | }
208 | 
209 | impl From<YarnRef<'_, str>> for Box<str> {
210 |   fn from(y: YarnRef<str>) -> Self {
211 |     y.to_boxed_str()
212 |   }
213 | }
214 | 
215 | impl From<YarnBox<'_, str>> for String {
216 |   fn from(y: YarnBox<str>) -> Self {
217 |     y.into_string()
218 |   }
219 | }
220 | 
221 | impl From<YarnRef<'_, str>> for String {
222 |   fn from(y: YarnRef<str>) -> Self {
223 |     y.to_string()
224 |   }
225 | }
226 | 
227 | // AsRef / Borrow
228 | 
229 | impl<Buf> AsRef<Buf> for YarnBox<'_, Buf>
230 | where
231 |   Buf: crate::Buf + ?Sized,
232 | {
233 |   fn as_ref(&self) -> &Buf {
234 |     self.as_slice()
235 |   }
236 | }
237 | 
238 | impl<Buf> AsRef<Buf> for YarnRef<'_, Buf>
239 | where
240 |   Buf: crate::Buf + ?Sized,
241 | {
242 |   fn as_ref(&self) -> &Buf {
243 |     self.as_slice()
244 |   }
245 | }
246 | 
247 | impl<Buf> Borrow<Buf> for YarnBox<'_, Buf>
248 | where
249 |   Buf: crate::Buf + ?Sized,
250 | {
251 |   fn borrow(&self) -> &Buf {
252 |     self.as_slice()
253 |   }
254 | }
255 | 
256 | impl<Buf> Borrow<Buf> for YarnRef<'_, Buf>
257 | where
258 |   Buf: crate::Buf + ?Sized,
259 | {
260 |   fn borrow(&self) -> &Buf {
261 |     self.as_slice()
262 |   }
263 | }
264 | 


--------------------------------------------------------------------------------
/byteyarn/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! `byteyarn` - Space-efficient byte strings 🧶🐈‍⬛
  2 | //!
  3 | //! A [`Yarn`] is a highly optimized string type that provides a number of
  4 | //! useful properties over [`String`]:
  5 | //!
  6 | //! * Always two pointers wide, so it is always passed into and out of functions
  7 | //!   in registers.
  8 | //! * Small string optimization (SSO) up to 15 bytes on 64-bit architectures.
  9 | //! * Can be either an owned buffer or a borrowed buffer (like [`Cow<str>`]).
 10 | //! * Can be upcast to `'static` lifetime if it was constructed from a
 11 | //!   known-static string.
 12 | //! * `Option<Yarn>` has the same size and ABI as `Yarn`.
 13 | //!
 14 | //! The main caveat is that [`Yarn`]s cannot be easily appended to, since they
 15 | //! do not track an internal capacity, and the slice returned by
 16 | //! [`Yarn::as_slice()`] does not have the same pointer stability properties as
 17 | //! [`String`] (these are rarely needed, though).
 18 | //!
 19 | //! ---
 20 | //!
 21 | //! Yarns are useful for situations in which a copy-on-write string is necessary
 22 | //! and most of the strings are relatively small. Although [`Yarn`] itself is
 23 | //! not [`Copy`], there is a separate [`YarnRef`] type that is. These types
 24 | //! have equivalent representations, and can be cheaply cast between each other.
 25 | //!
 26 | //! The easiest way to create a yarn is with the [`yarn!()`]
 27 | //! macro, which is similar to [`format!()`].
 28 | //!
 29 | //! ```
 30 | //! # use byteyarn::*;
 31 | //! // Create a new yarn via `fmt`ing.
 32 | //! let yarn = yarn!("Answer: {}", 42);
 33 | //!
 34 | //! // Convert that yarn into a reference.
 35 | //! let ry: YarnRef<str> = yarn.as_ref();
 36 | //!
 37 | //! // Try up-casting the yarn into an "immortal yarn" without copying.
 38 | //! let copy: YarnRef<'static, str> = ry.immortalize().unwrap();
 39 | //!
 40 | //! assert_eq!(yarn, copy);
 41 | //! ```
 42 | //!
 43 | //! Yarns are intended for storing text, either as UTF-8 or as
 44 | //! probably-UTF-8 bytes; [`Yarn<str>`] and [`Yarn<[u8]>`] serve these purposes,
 45 | //! and can be inter-converted with each other. The [`Yarn::utf8_chunks()`]
 46 | //! function can be used to iterate over definitely-valid-UTF-8 chunks within
 47 | //! a string.
 48 | //!
 49 | //! Both kinds of yarns can be `Debug`ed and `Display`ed, and will print out as
 50 | //! strings would. In particular, invalid UTF-8 is converted into either `\xNN`
 51 | //! escapes or replacement characters (for `Debug` and `Display` respectively).
 52 | //!
 53 | //! ```
 54 | //! # use byteyarn::*;
 55 | //! let invalid = ByteYarn::from_byte(0xff);
 56 | //! assert_eq!(format!("{invalid:?}"), r#""\xFF""#);
 57 | //! assert_eq!(format!("{invalid}"), "�");
 58 | //! ```
 59 | //!
 60 | //! That said, they will support anything that implements the [`Buf`] trait.
 61 | //! For example, you can have 16-bit yarns:
 62 | //!
 63 | //! ```
 64 | //! # use byteyarn::*;
 65 | //!
 66 | //! let sixteen = YarnBox::<[u16]>::from([1, 2, 3, 4, 5, 6, 8, 9, 10, 11]);
 67 | //! assert_eq!(sixteen[2], 3u16);
 68 | //! ```
 69 | 
 70 | #![deny(missing_docs)]
 71 | 
 72 | #[cfg(doc)]
 73 | use std::borrow::Cow;
 74 | 
 75 | mod boxed;
 76 | mod convert;
 77 | mod raw;
 78 | mod reffed;
 79 | mod utf8;
 80 | 
 81 | pub use boxed::YarnBox;
 82 | pub use reffed::YarnRef;
 83 | pub use utf8::Utf8Chunks;
 84 | 
 85 | pub use buf_trait::Buf;
 86 | 
 87 | // Macro stuff.
 88 | #[doc(hidden)]
 89 | pub mod m {
 90 |   pub extern crate std;
 91 | }
 92 | 
 93 | /// An optimized Unicode string.
 94 | ///
 95 | /// See [`YarnBox`] for full type documentation.
 96 | pub type Yarn = YarnBox<'static, str>;
 97 | 
 98 | /// An optimized raw byte string.
 99 | ///
100 | /// See [`YarnBox`] for full type documentation.
101 | pub type ByteYarn = YarnBox<'static, [u8]>;
102 | 
103 | /// Similar to [`format!()`], but returns a [`Yarn`], instead.
104 | ///
105 | /// This macro calls out to [`Yarn::from_fmt()`] internally.
106 | #[macro_export]
107 | macro_rules! yarn {
108 |   ($($args:tt)*) => {
109 |     $crate::Yarn::from_fmt($crate::m::std::format_args!($($args)*))
110 |   };
111 | }
112 | 


--------------------------------------------------------------------------------
/byteyarn/src/utf8.rs:
--------------------------------------------------------------------------------
  1 | //! UTF-8 utilities not provided by the standard library.
  2 | 
  3 | use std::str;
  4 | 
  5 | #[cfg(doc)]
  6 | use crate::*;
  7 | 
  8 | /// An iterator over UTF-8 chunks in a byte buffer.
  9 | ///
 10 | /// Any time non-UTF-8 bytes are encountered, they are returned as `Err`s
 11 | /// from the iterator.
 12 | ///
 13 | /// See [`Yarn::utf8_chunks()`].
 14 | #[derive(Copy, Clone)]
 15 | pub struct Utf8Chunks<'a> {
 16 |   buf: &'a [u8],
 17 |   invalid_prefix: Option<usize>,
 18 | }
 19 | 
 20 | impl<'a> Utf8Chunks<'a> {
 21 |   /// Returns the rest of the underlying byte buffer that has not been yielded.
 22 |   pub fn rest(self) -> &'a [u8] {
 23 |     self.buf
 24 |   }
 25 | 
 26 |   pub(crate) fn new(buf: &'a [u8]) -> Self {
 27 |     Self { buf, invalid_prefix: None }
 28 |   }
 29 | 
 30 |   unsafe fn take(&mut self, len: usize) -> &'a [u8] {
 31 |     debug_assert!(len <= self.buf.len());
 32 | 
 33 |     let pre = self.buf.get_unchecked(..len);
 34 |     self.buf = self.buf.get_unchecked(len..);
 35 |     pre
 36 |   }
 37 | }
 38 | 
 39 | impl<'a> Iterator for Utf8Chunks<'a> {
 40 |   type Item = Result<&'a str, &'a [u8]>;
 41 | 
 42 |   fn next(&mut self) -> Option<Self::Item> {
 43 |     if let Some(prefix) = self.invalid_prefix.take() {
 44 |       let bytes = unsafe {
 45 |         // SAFETY: self.invalid_prefix is only ever written to in this function,
 46 |         // where it gets set to a value that is known to be in-range.
 47 |         self.take(prefix)
 48 |       };
 49 | 
 50 |       return Some(Err(bytes));
 51 |     }
 52 | 
 53 |     if self.buf.is_empty() {
 54 |       return None;
 55 |     }
 56 | 
 57 |     let utf8 = match str::from_utf8(self.buf) {
 58 |       Ok(utf8) => {
 59 |         self.buf = &[];
 60 |         utf8
 61 |       }
 62 |       Err(e) => {
 63 |         let bytes = unsafe {
 64 |           // SAFETY: valid_up_to() always returns a value in range of self.buf.
 65 |           self.take(e.valid_up_to())
 66 |         };
 67 | 
 68 |         let utf8 = match cfg!(debug_assertions) {
 69 |           true => str::from_utf8(bytes).unwrap(),
 70 | 
 71 |           // SAFETY: the value of valid_up_to() delimits valid UTF-8, by
 72 |           // definition.
 73 |           false => unsafe { str::from_utf8_unchecked(bytes) },
 74 |         };
 75 | 
 76 |         self.invalid_prefix = match e.error_len() {
 77 |           Some(len) => Some(len),
 78 |           None => Some(self.buf.len()),
 79 |         };
 80 | 
 81 |         if utf8.is_empty() {
 82 |           return self.next();
 83 |         }
 84 | 
 85 |         utf8
 86 |       }
 87 |     };
 88 | 
 89 |     Some(Ok(utf8))
 90 |   }
 91 | }
 92 | 
 93 | /// `const`-enabled UTF-8 encoding.
 94 | ///
 95 | /// Returns the encoded bytes in a static array, and the number of those bytes
 96 | /// that are pertinent.
 97 | pub const fn encode_utf8(c: char) -> ([u8; 4], usize) {
 98 |   const CONT: u8 = 0b1000_0000;
 99 |   const CONT_MASK: u8 = !CONT >> 1;
100 | 
101 |   const B1: u8 = 0b0000_0000;
102 |   const B1_MASK: u8 = !B1 >> 1;
103 | 
104 |   const B2: u8 = 0b1100_0000;
105 |   const B2_MASK: u8 = !B2 >> 1;
106 | 
107 |   const B3: u8 = 0b1110_0000;
108 |   const B3_MASK: u8 = !B3 >> 1;
109 | 
110 |   const B4: u8 = 0b1111_0000;
111 |   const B4_MASK: u8 = !B4 >> 1;
112 | 
113 |   const fn sextet(c: char, idx: u32) -> u8 {
114 |     ((c as u32) >> (idx * 6)) as u8
115 |   }
116 | 
117 |   match c.len_utf8() {
118 |     1 => ([sextet(c, 0) & B1_MASK | B1, 0, 0, 0], 1),
119 |     2 => {
120 |       ([sextet(c, 1) & B2_MASK | B2, sextet(c, 0) & CONT_MASK | CONT, 0, 0], 2)
121 |     }
122 |     3 => (
123 |       [
124 |         sextet(c, 2) & B3_MASK | B3,
125 |         sextet(c, 1) & CONT_MASK | CONT,
126 |         sextet(c, 0) & CONT_MASK | CONT,
127 |         0,
128 |       ],
129 |       3,
130 |     ),
131 |     4 => (
132 |       [
133 |         sextet(c, 3) & B4_MASK | B4,
134 |         sextet(c, 2) & CONT_MASK | CONT,
135 |         sextet(c, 1) & CONT_MASK | CONT,
136 |         sextet(c, 0) & CONT_MASK | CONT,
137 |       ],
138 |       4,
139 |     ),
140 |     _ => unreachable!(),
141 |   }
142 | }
143 | 


--------------------------------------------------------------------------------
/gilded/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "gilded"
 3 | version = "0.1.0"
 4 | description = "Dead simple golden tests"
 5 | 
 6 | edition.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | keywords.workspace = true
11 | license.workspace = true
12 | 
13 | [dependencies]
14 | gilded-attr = { path = "attr" }
15 | 
16 | allman = { path = "../allman" }
17 | byteyarn = { path = "../byteyarn" }
18 | 
19 | camino = "1.1.9"
20 | diffy = "0.4.0"
21 | nu-glob = "0.101.0"
22 | unicode-width = "0.2.0"
23 | 


--------------------------------------------------------------------------------
/gilded/README.md:
--------------------------------------------------------------------------------
 1 | # gilded
 2 | 
 3 | `gilded` - Easy-peesy golden testing. 👑
 4 | 
 5 | ## Why Golden Testing?
 6 | 
 7 | A "golden test" is a test that transforms data in some way, and validates it
 8 | by diffing it against an expected result: the "golden".
 9 | 
10 | This is especially useful for testing scenarios that consume an input file
11 | (say, a source code file, for testing a compiler) and generate structured,
12 | diffable textual output (such as JSON or CSV data, or even a `Debug`).
13 | 
14 | Golden tests are best for cases where the output must be deterministic, and
15 | where capturing fine-grained detail is valuable.
16 | 
17 | Because they simply compare the result to an expected value byte-for-byte,
18 | changes can quickly regenerate the test output by using the output of the
19 | test itself. Diffs can be examined in code review directly.
20 | 
21 | This crate also provides the `doc::Doc` type, enabling quick-and-dirty
22 | construction of highly readable structured tree data for golden outputs.
23 | 
24 | ## Defining a Test
25 | 
26 | A `gilded` test is defined like so:
27 | 
28 | ```rust
29 | #[gilded::test("testdata/**/*.txt")]
30 | fn my_test(test: &gilded::Test) {
31 |   // ...
32 | }
33 | ```
34 | 
35 | `my_test` will be run as a separate unit test for every file (relative to
36 | the crate root) which matches the glob passed to the attribute. The input
37 | file's path and contents can be accessed through the `Test` accessors.
38 | 
39 | To specify golden outputs, use `Test::outputs()`. This specifies the
40 | file extension for the golden, and its computed contents. The extension is
41 | used to construct the path of the result. If the input is `foo/bar.txt`, and
42 | the extension for this output is `csv`, the output will be read/written to
43 | `foo/bar.csv`.
44 | 
45 | Panicking within the test body will fail the test as normal, tests should
46 | not contain output assertions; those are handled by the framework.
47 | 
48 | ## Generating Goldens
49 | 
50 | Once the test is created, simply set the `GILDED_REGENERATE` environment
51 | variable: `GILDED_REGENERATE=1 cargo test`.
52 | 
53 | To regenerate a specific test, simply pass its name as a filter to the test.
54 | See `cargo test -- --help` for available flags.`
55 | 
56 | Regenerating goldens will cause a `GILDED_CHANGED` file to be crated at the
57 | crate root, which will cause all `gilded` tests in the crate to fail until
58 | it is deleted. Deleting it forces the user to acknowledge that goldens have
59 | been regenerated, to avoid blindly committing them.
60 | 
61 | ## Known Issues
62 | 
63 | Golden tests can run under MIRI but have extremely large overhead. For the
64 | time being, they are `#[cfg]`'d out in MIRI mode.
65 | 


--------------------------------------------------------------------------------
/gilded/attr/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "gilded-attr"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | proc2decl = { path = "../../proc2decl" }
 8 | 
 9 | [lib]
10 | path = "lib.rs"
11 | proc-macro = true


--------------------------------------------------------------------------------
/gilded/attr/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Implementation detail of `gilded`.
 2 | 
 3 | proc2decl::fs_bridge! {
 4 |   /// Turns a function into a golden test suite.
 5 |   ///
 6 |   /// See the [crate documentation][crate] for more information on how to use
 7 |   /// this attribute.
 8 |   ///
 9 |   /// [crate]: https://docs.rs/gilded
10 |   macro #[test] => gilded::__test__;
11 | }
12 | 


--------------------------------------------------------------------------------
/gilded/src/doc/json.rs:
--------------------------------------------------------------------------------
  1 | //! Output implementation for JSON.
  2 | 
  3 | use std::fmt;
  4 | 
  5 | use allman::If;
  6 | use allman::Tag;
  7 | use byteyarn::YarnRef;
  8 | 
  9 | use crate::doc::Doc;
 10 | use crate::doc::Elem;
 11 | use crate::doc::Options;
 12 | 
 13 | pub fn build<'t>(options: &Options, doc: &Doc<'t>, out: &mut allman::Doc<'t>) {
 14 |   let is_array = doc.entries.iter().all(|(k, _)| k.is_none());
 15 |   if is_array {
 16 |     out.tag_with(Tag::Group(options.max_array_width), |out| {
 17 |       out
 18 |         .tag("[")
 19 |         .tag_with(Tag::Indent(options.tab_width as isize), |out| {
 20 |           for (i, (_, entry)) in doc.entries.iter().enumerate() {
 21 |             if i > 0 {
 22 |               out.tag(",");
 23 |               out.tag_if(Tag::Space, If::Flat);
 24 |             }
 25 |             out.tag_if("\n", If::Broken);
 26 |             value(options, entry, out);
 27 |           }
 28 |         })
 29 |         .tag_if("\n", If::Broken)
 30 |         .tag("]");
 31 |     });
 32 |   } else {
 33 |     out.tag_with(Tag::Group(options.max_object_width), |out| {
 34 |       out
 35 |         .tag("{")
 36 |         .tag_with(Tag::Indent(options.tab_width as isize), |out| {
 37 |           for (i, (key, entry)) in doc.entries.iter().enumerate() {
 38 |             if i > 0 {
 39 |               out.tag(",");
 40 |               out.tag_if(Tag::Space, If::Flat);
 41 |             }
 42 |             out
 43 |               .tag_if("\n", If::Broken)
 44 |               .tag(
 45 |                 Escape(key.as_deref().unwrap_or_default().as_bytes())
 46 |                   .to_string(),
 47 |               )
 48 |               .tag(":")
 49 |               .tag(Tag::Space);
 50 |             value(options, entry, out);
 51 |           }
 52 |         })
 53 |         .tag_if("\n", If::Broken)
 54 |         .tag("}");
 55 |     });
 56 |   }
 57 | }
 58 | 
 59 | fn value<'t>(options: &Options, v: &Elem<'t>, out: &mut allman::Doc<'t>) {
 60 |   match v {
 61 |     Elem::Bool(v) => {
 62 |       out.tag(v.to_string());
 63 |     }
 64 |     Elem::Int(v) => {
 65 |       out.tag(v.to_string());
 66 |     }
 67 |     Elem::UInt(v) => {
 68 |       out.tag(v.to_string());
 69 |     }
 70 |     Elem::Fp(v) => {
 71 |       out.tag(v.to_string());
 72 |     }
 73 |     Elem::String(v) => {
 74 |       out.tag(Escape(v).to_string());
 75 |     }
 76 |     Elem::Doc(v) => build(options, v, out),
 77 |   }
 78 | }
 79 | 
 80 | /// A displayable that prints the given data as a JSON string.
 81 | pub struct Escape<'a>(&'a [u8]);
 82 | 
 83 | impl fmt::Display for Escape<'_> {
 84 |   fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 85 |     write!(f, "\"")?;
 86 |     for chunk in YarnRef::new(self.0).utf8_chunks() {
 87 |       let chunk = match chunk {
 88 |         Ok(s) => s,
 89 |         Err(e) => {
 90 |           for b in e {
 91 |             write!(f, "<{b:02x}>")?;
 92 |           }
 93 |           continue;
 94 |         }
 95 |       };
 96 | 
 97 |       for c in chunk.chars() {
 98 |         match c {
 99 |           '\n' => write!(f, "\\n")?,
100 |           '\r' => write!(f, "\\r")?,
101 |           '\t' => write!(f, "\\t")?,
102 |           '\\' => write!(f, "\\\\")?,
103 |           '\"' => write!(f, "\\\"")?,
104 |           c if !c.is_control() => write!(f, "{c}")?,
105 |           c => {
106 |             for u in c.encode_utf16(&mut [0, 0]) {
107 |               write!(f, "\\u{u:04x}")?;
108 |             }
109 |           }
110 |         }
111 |       }
112 |     }
113 | 
114 |     write!(f, "\"")
115 |   }
116 | }
117 | 


--------------------------------------------------------------------------------
/gilded/src/doc/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Readable test output generating from tree-structured data.
  2 | 
  3 | use std::io;
  4 | use std::io::Write;
  5 | 
  6 | use byteyarn::YarnBox;
  7 | 
  8 | mod json;
  9 | mod yaml;
 10 | 
 11 | /// A tree-shaped document that can be pretty-printed, for generating goldens.
 12 | ///
 13 | /// Golden tests that output tree-shaped data can use `Doc` to generate
 14 | /// diff-friendly, readable output.
 15 | #[derive(Clone)]
 16 | pub struct Doc<'a> {
 17 |   entries: Vec<(Option<YarnBox<'a, str>>, Elem<'a>)>,
 18 | }
 19 | 
 20 | /// The format output to use when rendering a document.
 21 | #[derive(Clone, Copy, PartialEq, Eq, Debug)]
 22 | pub enum Format {
 23 |   /// Output as YAML.
 24 |   Yaml,
 25 |   /// Output as JSON.
 26 |   Json,
 27 | }
 28 | 
 29 | impl Default for Format {
 30 |   fn default() -> Self {
 31 |     Self::Yaml
 32 |   }
 33 | }
 34 | 
 35 | /// Options for rendering a [`Doc`] as a string.
 36 | pub struct Options {
 37 |   // The format to output in; defaults to YAML.
 38 |   pub format: Format,
 39 |   // The number of spaces to use for indentation.
 40 |   pub tab_width: usize,
 41 | 
 42 |   // The maximum number of columns to have before wrapping occurs.
 43 |   pub max_columns: usize,
 44 |   // The maximum number of columns for a one-line array.
 45 |   pub max_array_width: usize,
 46 |   // The maximum number of columns for a one-line object.
 47 |   pub max_object_width: usize,
 48 | }
 49 | 
 50 | impl Default for Options {
 51 |   fn default() -> Self {
 52 |     Self {
 53 |       format: Format::default(),
 54 |       tab_width: 2,
 55 |       max_columns: 80,
 56 |       max_array_width: 50,
 57 |       max_object_width: 40,
 58 |     }
 59 |   }
 60 | }
 61 | 
 62 | /// A type which can be an element of a [`Doc`].
 63 | ///
 64 | /// All of the primitive number types and types which convert to `YarnBox<[u8]>`
 65 | /// can be used as `Doc` values. `Option<T>` for `T: DocValue` can also be
 66 | /// used, and will only be inserted if it is `Some`.
 67 | pub trait Value<'a> {
 68 |   fn append_to(self, doc: &mut Doc<'a>);
 69 | }
 70 | 
 71 | impl<'a> Doc<'a> {
 72 |   /// Returns a new, empty `Doc`.
 73 |   pub fn new() -> Self {
 74 |     Self { entries: Vec::new() }
 75 |   }
 76 | 
 77 |   /// Returns a new `Doc` with a single entry.
 78 |   pub fn single(
 79 |     name: impl Into<YarnBox<'a, str>>,
 80 |     value: impl Value<'a>,
 81 |   ) -> Self {
 82 |     Self::new().entry(name, value)
 83 |   }
 84 | 
 85 |   /// Appends a sequence of values to this document.
 86 |   pub fn push(mut self, elements: impl IntoIterator<Item: Value<'a>>) -> Self {
 87 |     for e in elements {
 88 |       e.append_to(&mut self);
 89 |     }
 90 |     self
 91 |   }
 92 | 
 93 |   /// Appends an entry with the given name to this document.
 94 |   pub fn entry(
 95 |     mut self,
 96 |     name: impl Into<YarnBox<'a, str>>,
 97 |     value: impl Value<'a>,
 98 |   ) -> Self {
 99 |     let prev = self.entries.len();
100 |     value.append_to(&mut self);
101 |     if prev < self.entries.len() {
102 |       self.entries.last_mut().unwrap().0 = Some(name.into());
103 |     }
104 |     self
105 |   }
106 | 
107 |   /// Appends an entry which is an array with the given elements.
108 |   pub fn array(
109 |     self,
110 |     name: impl Into<YarnBox<'a, str>>,
111 |     elements: impl IntoIterator<Item: Value<'a>>,
112 |   ) -> Self {
113 |     self.entry(name, Self::new().push(elements))
114 |   }
115 | 
116 |   // Converts this document into a string, using the given options.
117 |   pub fn to_string(&self, options: &Options) -> String {
118 |     let mut out = Vec::new();
119 |     let _ = self.render(&mut out, options);
120 |     String::from_utf8(out).unwrap()
121 |   }
122 | 
123 |   /// Converts this document into a string, writing it to the given output with
124 |   /// the given options.
125 |   pub fn render(
126 |     &self,
127 |     out: &mut dyn Write,
128 |     options: &Options,
129 |   ) -> io::Result<()> {
130 |     let mut doc = allman::Doc::new();
131 | 
132 |     match options.format {
133 |       Format::Yaml => yaml::build(
134 |         yaml::Args { options, root: true, in_list: false },
135 |         self,
136 |         &mut doc,
137 |       ),
138 |       Format::Json => json::build(options, self, &mut doc),
139 |     }
140 | 
141 |     doc.render(out, &allman::Options { max_columns: options.max_columns })
142 |   }
143 | }
144 | 
145 | impl Default for Doc<'_> {
146 |   fn default() -> Self {
147 |     Self::new()
148 |   }
149 | }
150 | 
151 | #[derive(Clone)]
152 | enum Elem<'a> {
153 |   Bool(bool),
154 |   Int(i128),
155 |   UInt(u128),
156 |   Fp(f64),
157 |   String(YarnBox<'a>),
158 |   Doc(Doc<'a>),
159 | }
160 | 
161 | impl<'a, T: Value<'a>> Value<'a> for Option<T> {
162 |   fn append_to(self, doc: &mut Doc<'a>) {
163 |     if let Some(v) = self {
164 |       v.append_to(doc)
165 |     }
166 |   }
167 | }
168 | impl<'a> Value<'a> for Doc<'a> {
169 |   fn append_to(self, doc: &mut Doc<'a>) {
170 |     doc.entries.push((None, Elem::Doc(self)))
171 |   }
172 | }
173 | 
174 | macro_rules! impl_from {
175 |   ($({$($T:ty),*} => $V:ident,)*) => {$($(
176 |     impl<'a> Value<'a> for $T {
177 |       fn append_to(self, doc: &mut Doc<'a>) {
178 |         doc.entries.push((None, Elem::$V(self as _)))
179 |       }
180 |     }
181 |   )*)*}
182 | }
183 | 
184 | impl_from! {
185 |   {bool} => Bool,
186 |   {i8, i16, i32, i64, i128, isize} => Int,
187 |   {u8, u16, u32, u64, u128, usize} => UInt,
188 |   {f32, f64} => Fp,
189 | }
190 | 
191 | macro_rules! impl_from_yarn {
192 |   ($(for<$lt:lifetime> $($T:ty),* => $U:ty,)*) => {$($(
193 |     impl<$lt> Value<$lt> for $T {
194 |       fn append_to(self, doc: &mut Doc<$lt>) {
195 |         doc.entries.push((None, Elem::String(<$U>::from(self).into_bytes())))
196 |       }
197 |     }
198 |   )*)*}
199 | }
200 | 
201 | impl_from_yarn! {
202 |   for<'a> &'a [u8], Vec<u8>, YarnBox<'a, [u8]> => YarnBox<'a, [u8]>,
203 |   for<'a> char, &'a str, String, YarnBox<'a, str> => YarnBox<'a, str>,
204 | }
205 | 


--------------------------------------------------------------------------------
/gilded/src/doc/yaml.rs:
--------------------------------------------------------------------------------
  1 | //! Output implementation for YAML.
  2 | 
  3 | use std::fmt;
  4 | 
  5 | use allman::If;
  6 | use allman::Tag;
  7 | use byteyarn::YarnRef;
  8 | 
  9 | use crate::doc::Doc;
 10 | use crate::doc::Elem;
 11 | use crate::doc::Options;
 12 | 
 13 | pub struct Args<'a> {
 14 |   pub root: bool,
 15 |   pub in_list: bool,
 16 |   pub options: &'a Options,
 17 | }
 18 | 
 19 | pub fn build<'t>(args: Args, doc: &'t Doc<'t>, out: &mut allman::Doc<'t>) {
 20 |   let is_array = doc.entries.iter().all(|(k, _)| k.is_none());
 21 |   if is_array {
 22 |     out.tag_with(Tag::Group(args.options.max_array_width), |out| {
 23 |       out.tag_if("[", If::Flat);
 24 |       if !args.root {
 25 |         out.tag_if(Tag::Break(1), If::Broken);
 26 |       }
 27 |       for (i, (_, entry)) in doc.entries.iter().enumerate() {
 28 |         if i > 0 {
 29 |           out.tag_if(",", If::Flat);
 30 |           out.tag_if(Tag::Space, If::Flat);
 31 |         }
 32 | 
 33 |         out.tag_if("-", If::Broken);
 34 |         out.tag_if(Tag::Space, If::Broken);
 35 |         //out.tag_with(Tag::Indent(args.options.tab_width as isize), |out| {
 36 |         value(Args { root: false, in_list: true, ..args }, entry, out);
 37 |         //});
 38 | 
 39 |         out.tag_if(Tag::Break(1), If::Broken);
 40 |       }
 41 |       out.tag_if("]", If::Flat);
 42 |     });
 43 |   } else {
 44 |     out.tag_with(Tag::Group(args.options.max_object_width), |out| {
 45 |       let in_map = !args.root && !args.in_list;
 46 |       if in_map {
 47 |         out.tag_if(Tag::Break(1), If::Broken);
 48 |       }
 49 |       out
 50 |         .tag_if("{", If::Flat)
 51 |         .tag_with(Tag::Indent(args.options.tab_width as isize), |out| {
 52 |           for (i, (key, entry)) in doc.entries.iter().enumerate() {
 53 |             if i > 0 {
 54 |               out.tag_if(",", If::Flat);
 55 |               out.tag_if(Tag::Space, If::Flat);
 56 |             }
 57 | 
 58 |             let key_bytes = key.as_deref().unwrap_or_default().as_bytes();
 59 |             let ident = is_ident(key_bytes);
 60 | 
 61 |             if let Some(ident) = ident {
 62 |               out.tag(ident.to_box());
 63 | 
 64 |               let mut entry = entry;
 65 |               while let Elem::Doc(d) = entry {
 66 |                 let [(Some(k), v)] = d.entries.as_slice() else { break };
 67 |                 let Some(ident) = is_ident(k.as_bytes()) else { break };
 68 | 
 69 |                 out.tag(".").tag(ident.to_box());
 70 |                 entry = v;
 71 |               }
 72 |             } else {
 73 |               out.tag(Escape(key_bytes).to_string());
 74 |             }
 75 |             out.tag(":").tag(Tag::Space);
 76 | 
 77 |             value(Args { root: false, in_list: false, ..args }, entry, out);
 78 |             out.tag_if(Tag::Break(1), If::Broken);
 79 |           }
 80 |         })
 81 |         .tag_if("}", If::Flat);
 82 |     });
 83 |   }
 84 | }
 85 | 
 86 | fn value<'t>(args: Args, v: &'t Elem<'t>, out: &mut allman::Doc<'t>) {
 87 |   match v {
 88 |     Elem::Bool(v) => {
 89 |       out.tag(v.to_string());
 90 |     }
 91 |     Elem::Int(v) => {
 92 |       out.tag(v.to_string());
 93 |     }
 94 |     Elem::UInt(v) => {
 95 |       out.tag(v.to_string());
 96 |     }
 97 |     Elem::Fp(v) => {
 98 |       out.tag(v.to_string());
 99 |     }
100 |     Elem::String(v) => {
101 |       if is_raw_string(v.as_ref()) {
102 |         out.tag("|").tag(Tag::Break(1)).tag_with(
103 |           Tag::Indent(args.options.tab_width as isize),
104 |           |out| {
105 |             out.tag(v.as_ref().to_utf8().unwrap().to_box());
106 |           },
107 |         );
108 |         return;
109 |       }
110 |       out.tag(Escape(v).to_string());
111 |     }
112 |     Elem::Doc(v) => build(args, v, out),
113 |   }
114 | }
115 | 
116 | /// A displayable that prints the given data as a JSON string.
117 | pub struct Escape<'a>(&'a [u8]);
118 | 
119 | impl fmt::Display for Escape<'_> {
120 |   fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
121 |     write!(f, "\"")?;
122 |     for chunk in YarnRef::new(self.0).utf8_chunks() {
123 |       let chunk = match chunk {
124 |         Ok(s) => s,
125 |         Err(e) => {
126 |           for b in e {
127 |             write!(f, "\\x{b:02x}")?;
128 |           }
129 |           continue;
130 |         }
131 |       };
132 | 
133 |       for c in chunk.chars() {
134 |         match c {
135 |           '\0' => write!(f, "\\0")?,
136 |           '\n' => write!(f, "\\n")?,
137 |           '\r' => write!(f, "\\r")?,
138 |           '\t' => write!(f, "\\t")?,
139 |           '\\' => write!(f, "\\\\")?,
140 |           '\"' => write!(f, "\\\"")?,
141 |           c if !c.is_control() => write!(f, "{c}")?,
142 |           c => {
143 |             for u in c.encode_utf16(&mut [0, 0]) {
144 |               write!(f, "\\u{u:04x}")?;
145 |             }
146 |           }
147 |         }
148 |       }
149 |     }
150 | 
151 |     write!(f, "\"")
152 |   }
153 | }
154 | 
155 | fn is_raw_string(data: YarnRef<[u8]>) -> bool {
156 |   data.to_utf8().is_ok_and(|s| {
157 |     s.contains("\n") && s.chars().all(|c| c == '\n' || !c.is_control())
158 |   })
159 | }
160 | 
161 | fn is_ident(data: &[u8]) -> Option<YarnRef<str>> {
162 |   fn is_start(c: char) -> bool {
163 |     c.is_alphabetic() || c == '_' || c == '-'
164 |   }
165 |   fn is_continue(c: char) -> bool {
166 |     is_start(c) || c.is_numeric()
167 |   }
168 | 
169 |   let s = YarnRef::from(data).to_utf8().ok()?;
170 | 
171 |   let mut chars = s.chars();
172 |   let is_ident = chars.next().is_some_and(is_start) && chars.all(is_continue);
173 |   is_ident.then_some(s)
174 | }
175 | 


--------------------------------------------------------------------------------
/ilex/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "ilex"
 3 | version = "0.6.0"
 4 | description = "quick and easy lexers for C-like languages"
 5 | 
 6 | edition.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | keywords.workspace = true
11 | license.workspace = true
12 | 
13 | [dependencies]
14 | byteyarn = { version = "0.5", path = "../byteyarn" }
15 | gilded = { path = "../gilded" }
16 | twie = { version = "0.5", path = "../twie" }
17 | 
18 | ilex-attr = { version = "0.5.0", path = "attr" }
19 | 
20 | annotate-snippets = "0.10.0"
21 | camino = "1.1.6"
22 | num-traits = "0.2.17"
23 | regex-syntax = "0.8.2"
24 | regex-automata = "0.4.3"  # Bless Andrew for his patience.
25 | rustc_apfloat = "0.2.0"  # By eddyb's recommendation.
26 | unicode-xid = "0.2.4"
27 | bitvec = "1.0.1"
28 | 


--------------------------------------------------------------------------------
/ilex/README.md:
--------------------------------------------------------------------------------
 1 | # ilex
 2 | 
 3 | `ilex` - painless lexing for C-like languages. ⛩️🎋
 4 | 
 5 | This crate provides a general lexer for a "C-like language", also sometimes
 6 | called a "curly brace language". It is highly configurable and has comprehensive
 7 | [`Span`] support. This library is based off of a specific parser stack I have
 8 | copied from project to project and re-written verbatim many times over in my
 9 | career.
10 | 
11 | Internally it uses lazy DFAs from [`regex_automata`] for much of the
12 | heavy-lifting, so it should be reasonably performant, although speed is not a
13 | priority.
14 | 
15 | The goals of this library are as follows.
16 | 
17 | - **Predictably greedy.** Always parse the longest token at any particular
18 |   position, with user-defined disambiguation between same-length tokens.
19 | 
20 | - **Easy to set up.** Writing lexers is a bunch of pain, and they all look the
21 |   same more-or-less, and you want to be "in and out".
22 | 
23 | - **Flexible.** It can lex a reasonably large number of grammars. It should be
24 |   able to do any language with a cursory resemblance to C, such as Rust,
25 |   JavaScript (and JSON), LLVM IR, Go, Protobuf, Perl, and so on.
26 | 
27 |   - Some exotic lexemes are not supported. This includes Python and YAML
28 |     significant whitespace, user-defined operators that mess with the lexer like
29 |     in Haskell, and ALGOL-style `end` when there isn't a clear pair of tokens to
30 |     lex as a pair of open/close delimiters (Ruby has this problem).
31 | 
32 | - **Unicode support.** This means that e.g. `エルフーン` is an identifier by
33 |   default. ASCII-only filters exist for backwards compatibility with old stuff.
34 |   `ilex` will only support UTF-8-encoded input files, and always uses the
35 |   Unicode definition of whitespace for delimiting tokens, not just ASCII
36 |   whitespace (`" \t\n\t"`).
37 | 
38 | - **Diagnostics and spans.** The lexer should be able to generate pretty good
39 |   diagnostics, and this API is exposed for tools built on top of the lexer to
40 |   emit diagnostics. Spans are interned automatically.
41 | 
42 |   - Custom error recovery is hard, so I don't plan to support that.
43 | 
44 | - **Token trees.** Token trees are a far better abstraction than token streams,
45 |   because many LR(k) curly-brace languages become regular or close to regular if
46 |   you decide that every pair of braces or parentheses with unknown contents is
47 |   inside
48 | 
49 | This library also provides basic software float support. You should _never_
50 | convert user-provided text into hardware floats if you care about byte-for-byte
51 | portability. This library helps with that.
52 | 
53 | ### Stability Ground Rules
54 | 
55 | I have tried to define exactly how rules map onto the internal finite automata,
56 | but breaking changes happen! I will try not to break things across patch
57 | releases, but I can't promise perfect stability across even minor releases.
58 | 
59 | Write good tests for your frontend and don't expose your `ilex` guts if you can.
60 | This will make it easier for you to just pin a version and avoid thinking about
61 | this problem.
62 | 
63 | Diagnostics are completely unstable. Don't try to parse them, don't write golden
64 | tests against them. If you must, use [`testing::check_report()`] so that you can
65 | regenerate them.
66 | 


--------------------------------------------------------------------------------
/ilex/attr/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "ilex-attr"
 3 | version = "0.5.0"
 4 | description = "attributes for the ilex crate"
 5 | 
 6 | edition.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | keywords.workspace = true
11 | license.workspace = true
12 | 
13 | [lib]
14 | path = "lib.rs"
15 | proc-macro = true
16 | 
17 | [dependencies]
18 | proc2decl = { path = "../../proc2decl" }


--------------------------------------------------------------------------------
/ilex/attr/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Implementation detail of `ilex`.
 2 | 
 3 | use proc_macro::TokenStream;
 4 | 
 5 | // This helper exists only to make the #[spec] field attributes inert.
 6 | #[doc(hidden)]
 7 | #[proc_macro_derive(derive_hack, attributes(named, rule))]
 8 | pub fn derive(_: TokenStream) -> TokenStream {
 9 |   TokenStream::new()
10 | }
11 | 
12 | proc2decl::bridge! {
13 |   /// Generates a lexer spec struct.
14 |   ///
15 |   /// This macro generates the type of struct described in the
16 |   /// [crate documentation][crate]. The syntax is as follows.
17 |   ///
18 |   /// ```ignore
19 |   /// use ilex::rule::Keyword;
20 |   /// use ilex::Lexeme;
21 |   ///
22 |   /// /// My cool spec.
23 |   /// #[ilex::spec]
24 |   /// struct MySpec {
25 |   ///   #[named("...")]
26 |   ///   #[rule(/* ... */)]
27 |   ///   dollar: Lexeme<Keyword> = "$",
28 |   /// }
29 |   /// ```
30 |   ///
31 |   /// The type of each field must be a [`Lexeme`] with a [`Rule`] type as its
32 |   /// parameter. There are two special attributes that can follow.
33 |   ///
34 |   /// - `#[named]` makes the rule into a *named* rule. This name can be used by
35 |   ///   diagnostics, and corresponds to calling `Spec::named_rule()`.
36 |   ///
37 |   /// - `#[rule]` is the value to use to construct the rule, which must be
38 |   ///   `Into<R>`, where `R` is the type inside `Lexeme` (so, above, the rule
39 |   ///   value must be `Into<Keyword>`). By default, this value is the name of the
40 |   ///   rule, to make the common case of declaring a keyword as simple as writing
41 |   ///   `nullptr: Lexeme<Keyword>`, assuming Rust itself doesn't already use that
42 |   ///   keyword.
43 |   ///
44 |   /// Note that *order matters* for the fields: when breaking a tie between two
45 |   /// potential tokens of the same length, the first one in the struct will win.
46 |   /// In practice, this means you should put keywords before identifiers.
47 |   ///
48 |   /// Additionally, the following functions will be defined for the `MySpec` type.
49 |   ///
50 |   /// ```
51 |   /// # struct Spec;
52 |   /// # struct MySpec;
53 |   /// # fn norun(_: i32) {
54 |   /// impl MySpec {
55 |   ///   /// Gets the global instance of this spec.
56 |   ///   pub fn get() -> &'static Self {
57 |   ///     // ...
58 |   /// #   todo!()
59 |   ///   }
60 |   ///
61 |   ///   /// Gets the actual compiled spec.
62 |   ///   pub fn spec(&self) -> &Spec {
63 |   ///     // ...
64 |   /// #   todo!()
65 |   ///   }
66 |   /// }
67 |   /// # }
68 |   /// ```
69 |   ///
70 |   // God cross-trait links suck.
71 |   /// [`Lexeme`]: https://docs.rs/ilex/latest/ilex/struct.Lexeme.html
72 |   /// [`Rule`]: https://docs.rs/ilex/latest/ilex/rule/trait.Rule.html
73 |   /// [crate]: https://docs.rs/ilex
74 |   macro #[spec] => ilex::__spec__;
75 | }
76 | 


--------------------------------------------------------------------------------
/ilex/src/file/context.rs:
--------------------------------------------------------------------------------
  1 | use std::fs;
  2 | use std::sync::Arc;
  3 | use std::sync::RwLock;
  4 | 
  5 | use camino::Utf8Path;
  6 | 
  7 | use crate::f;
  8 | use crate::file::File;
  9 | use crate::file::CTX_FOR_SPAN_DEBUG;
 10 | use crate::report;
 11 | use crate::report::Fatal;
 12 | use crate::report::Report;
 13 | 
 14 | #[cfg(doc)]
 15 | use crate::Span;
 16 | 
 17 | /// A source context, which owns source code files.
 18 | ///
 19 | /// A `Context` contains the full text of all the loaded source files, which
 20 | /// [`Span`]s ultimately refer to.
 21 | #[derive(Default)]
 22 | pub struct Context {
 23 |   state: Arc<RwLock<State>>,
 24 | }
 25 | 
 26 | #[derive(Default)]
 27 | pub struct State {
 28 |   // Each file is laid out as the length of the text, followed by the text data,
 29 |   // followed by the path.
 30 |   //
 31 |   // TODO(mcyoung): Be smarter about this and use something something concurrent
 32 |   // vector? We don't need to have all this stuff behind a lock I think.
 33 |   files: Vec<(usize, String)>,
 34 | }
 35 | 
 36 | unsafe impl Send for Context {}
 37 | unsafe impl Sync for Context {}
 38 | 
 39 | impl Context {
 40 |   /// Creates a new source context.
 41 |   pub fn new() -> Self {
 42 |     Self::default()
 43 |   }
 44 | 
 45 |   pub(crate) fn copy(&self) -> Context {
 46 |     Self { state: self.state.clone() }
 47 |   }
 48 | 
 49 |   /// Sets this thread to use this [`Context`] in `fmt::Debug`.
 50 |   ///
 51 |   /// By default, `dbg!(some_span)` produces a string like `"<elided>"`, since
 52 |   /// spans do not know what context they came from. This function sets a thread
 53 |   /// local that `<SpanId as fmt::Debug>` looks at when printing; this is useful
 54 |   /// for when dumping e.g. an AST when debugging.
 55 |   ///
 56 |   /// Returns an RAII type that undoes the effects of this function when leaving
 57 |   /// scope, so that if the caller also called this function, it doesn't get
 58 |   /// clobbered.
 59 |   #[must_use = "Context::use_for_debugging_spans() returns an RAII object"]
 60 |   pub fn use_for_debugging_spans(&self) -> impl Drop {
 61 |     struct Replacer(Option<Context>);
 62 |     impl Drop for Replacer {
 63 |       fn drop(&mut self) {
 64 |         CTX_FOR_SPAN_DEBUG.with(|v| *v.borrow_mut() = self.0.take())
 65 |       }
 66 |     }
 67 | 
 68 |     Replacer(CTX_FOR_SPAN_DEBUG.with(|v| v.replace(Some(self.copy()))))
 69 |   }
 70 | 
 71 |   /// Creates a new [`Report`] based on this context.
 72 |   pub fn new_report(&self) -> Report {
 73 |     Report::new(self, Default::default())
 74 |   }
 75 | 
 76 |   /// Creates a new [`Report`] based on this context, with the specified
 77 |   /// options.
 78 |   pub fn new_report_with(&self, options: report::Options) -> Report {
 79 |     Report::new(self, options)
 80 |   }
 81 | 
 82 |   /// Adds a new file to this source context.
 83 |   pub fn new_file<'a>(
 84 |     &self,
 85 |     path: impl Into<&'a Utf8Path>,
 86 |     text: impl Into<String>,
 87 |   ) -> File {
 88 |     let mut text = text.into();
 89 |     text.push(' '); // This space only exists to be somewhere for an EOF span
 90 |                     // to point to in diagnostics; user code will never see
 91 |                     // it.
 92 |     let len = text.len();
 93 |     text.push_str(path.into().as_str());
 94 | 
 95 |     let idx = {
 96 |       let mut state = self.state.write().unwrap();
 97 |       state.files.push((len, text));
 98 |       state.files.len() - 1
 99 |     };
100 | 
101 |     self.file(idx).unwrap()
102 |   }
103 | 
104 |   /// Adds a new file to this source context, validating that it is valid
105 |   /// UTF-8.
106 |   pub fn new_file_from_bytes<'a>(
107 |     &self,
108 |     path: impl Into<&'a Utf8Path>,
109 |     text: impl Into<Vec<u8>>,
110 |     report: &Report,
111 |   ) -> Result<File, Fatal> {
112 |     let path = path.into();
113 |     let text = String::from_utf8(text.into()).map_err(|e| {
114 |       let n = e.utf8_error().valid_up_to();
115 |       let b = e.as_bytes()[n];
116 | 
117 |       report
118 |         .error(f!("input file `{path}` was not valid UTF-8"))
119 |         .note(f!("encountered non-UTF-8 byte {b:#02x} at offset {n}"));
120 |       report.fatal().unwrap()
121 |     })?;
122 | 
123 |     Ok(self.new_file(path, text))
124 |   }
125 | 
126 |   /// Adds a new file to this source context by opening `name` and reading it
127 |   /// from the file system.
128 |   pub fn open_file<'a>(
129 |     &self,
130 |     path: impl Into<&'a Utf8Path>,
131 |     report: &Report,
132 |   ) -> Result<File, Fatal> {
133 |     let path = path.into();
134 | 
135 |     let bytes = match fs::read(path) {
136 |       Ok(bytes) => bytes,
137 |       Err(e) => {
138 |         report.error(f!("could not open input file `{path}`: {e}"));
139 |         return report.fatal();
140 |       }
141 |     };
142 | 
143 |     self.new_file_from_bytes(path, bytes, report)
144 |   }
145 | 
146 |   /// Gets the `idx`th file in this source context.
147 |   pub fn file(&self, idx: usize) -> Option<File> {
148 |     let state = self.state.read().unwrap();
149 |     let (len, text) = state.files.get(idx)?;
150 |     let text = unsafe {
151 |       // SAFETY: The pointer to the file's text is immutable and pointer-stable,
152 |       // so we can safely extend its lifetime here.
153 |       &*(text.as_str() as *const str)
154 |     };
155 | 
156 |     Some(File { len: *len, text, ctx: self, idx })
157 |   }
158 | 
159 |   /// Gets the number of files currently tracked by this source context.
160 |   pub fn file_count(&self) -> usize {
161 |     self.state.read().unwrap().files.len()
162 |   }
163 | }
164 | 


--------------------------------------------------------------------------------
/ilex/src/ice.rs:
--------------------------------------------------------------------------------
  1 | //! Helpers for working with internal compiler errors (ICEs).
  2 | //!
  3 | //! This module provides types and other things to make sure you can provide
  4 | //! useful crash reports for your users.
  5 | 
  6 | use std::backtrace::Backtrace;
  7 | use std::backtrace::BacktraceStatus;
  8 | use std::io;
  9 | use std::panic;
 10 | use std::panic::AssertUnwindSafe;
 11 | use std::panic::PanicHookInfo;
 12 | use std::panic::UnwindSafe;
 13 | use std::sync::Mutex;
 14 | use std::thread;
 15 | 
 16 | use crate::f;
 17 | use crate::file::Context;
 18 | use crate::report::Report;
 19 | 
 20 | /// Executes a "compiler main function".
 21 | ///
 22 | /// This function takes care of setting up a panic hook for us that will catch
 23 | /// any [`Ice`]s for us.
 24 | ///
 25 | /// Generally, the way using this function would look is something like this:
 26 | ///
 27 | // Delete "should_panic" to see what the ICE errors look like.
 28 | /// ```should_panic
 29 | /// use ilex::ice;
 30 | /// use ilex::report;
 31 | ///
 32 | /// fn compile(ctx: &mut ilex::Context) -> Result<(), report::Fatal> {
 33 | ///   panic!("its not done yet, im too busy writing a lexer library !!! 😡")
 34 | /// }
 35 | ///
 36 | /// fn main() {
 37 | ///   let mut ctx = ilex::Context::new();
 38 | ///   let report = ctx.new_report();
 39 | /// # let report = ctx.new_report_with(report::Options {
 40 | /// #   color: true,
 41 | /// #   show_report_locations: false,
 42 | /// # });
 43 | ///
 44 | ///   let opts = ice::Options {
 45 | ///     what_panicked: Some("my test".to_string()),
 46 | ///     report_bugs_at: Some("https://github.com/mcy/strings/issues".into()),
 47 | ///     extra_notes: vec![format!("ilex {}", env!("CARGO_PKG_VERSION"))],
 48 | ///     ..ice::Options::default()
 49 | ///   };
 50 | ///
 51 | ///   let result = ice::handle(&mut ctx, &report, opts, |ctx| {
 52 | ///     // Business logic that may panic.
 53 | ///     compile(ctx)
 54 | ///   });
 55 | ///
 56 | ///   if let Err(fatal) = result {
 57 | ///     fatal.terminate();
 58 | ///   }
 59 | /// }
 60 | /// ```
 61 | #[allow(clippy::needless_doctest_main)]
 62 | pub fn handle<R, Cb>(
 63 |   ctx: &mut Context,
 64 |   report: &Report,
 65 |   options: Options,
 66 |   callback: Cb,
 67 | ) -> R
 68 | where
 69 |   Cb: FnOnce(&mut Context) -> R,
 70 |   Cb: UnwindSafe,
 71 | {
 72 |   static ICE: Mutex<Option<Ice>> = Mutex::new(None);
 73 | 
 74 |   let options2 = options.clone();
 75 |   panic::set_hook(Box::new(move |panic| {
 76 |     // We currently generate ICEs from any thread. It may be useful to mark
 77 |     // threads that will catch their panics? Unclear.
 78 | 
 79 |     // Generate an ICE and save it for later, if this panic actually makes it
 80 |     // out to the main function.
 81 |     *ICE.lock().unwrap() = Some(Ice::generate(panic, options2.clone()));
 82 |   }));
 83 | 
 84 |   panic::catch_unwind(AssertUnwindSafe(|| callback(ctx))).unwrap_or_else(|e| {
 85 |     let ice = ICE
 86 |       .lock()
 87 |       .unwrap()
 88 |       .take()
 89 |       .unwrap_or_else(|| Ice::with_no_context(options));
 90 |     ice.report(report);
 91 |     // We have to do this here, and not in, say, the panic hook, because we want
 92 |     // the report to be silently dropped.
 93 |     let _ignored = report.write_out(io::stderr());
 94 |     panic::resume_unwind(e)
 95 |   })
 96 | }
 97 | 
 98 | /// An internal compiler error (ICE), captured from a panic handler.
 99 | ///
100 | /// This is a separate type that can be ferried around between locations,
101 | /// because the panic hook executes *before* unwinding, but you may not want to
102 | /// print that as a diagnostic unless that panic bubbles all the way up to your
103 | /// main function.
104 | #[derive(Default)]
105 | pub struct Ice {
106 |   what: Option<String>,
107 |   where_: Option<(String, Option<String>)>,
108 |   why: Option<Backtrace>,
109 |   options: Options,
110 | }
111 | 
112 | /// Options for generating an ICE.
113 | #[derive(Default, Clone)]
114 | pub struct Options {
115 |   /// Whether to show a backtrace. By default, uses the same rules as normal
116 |   /// Rust (i.e. `RUST_BACKTRACE`). You may want to override it with something
117 |   /// more in-style for your project.
118 |   pub show_backtrace: Option<bool>,
119 | 
120 |   /// Configures what "unexpectedly panicked" in the output. Defaults to
121 |   /// something generic like "the compiler".
122 |   pub what_panicked: Option<String>,
123 | 
124 |   /// Configures a link to show users after the "unexpectedly panicked" message.
125 |   /// This should probably look like `https://github.com/me/my-project/issues`.
126 |   pub report_bugs_at: Option<String>,
127 | 
128 |   /// A static list of notes to append to an error before the backtrace.
129 |   /// For example, rustc's ICE handler shows a GitHub link for filing issues,
130 |   /// the version, git commit, and date the compiler was built at, and some
131 |   /// subset of the flags of the compiler.
132 |   pub extra_notes: Vec<String>,
133 | }
134 | 
135 | impl Ice {
136 |   /// Generates an ICE with no context. Useful for when you caught a panic but
137 |   /// didn't stow an ICE as expected.
138 |   pub fn with_no_context(options: Options) -> Self {
139 |     Self {
140 |       what: None,
141 |       where_: None,
142 |       why: None,
143 |       options,
144 |     }
145 |   }
146 | 
147 |   /// Generates an ICE from a panic message.
148 |   ///
149 |   /// The results are "best effort". The Rust backtrace API is incomplete, so we
150 |   /// make do with some... cleverness around parsing the backtrace itself.
151 |   pub fn generate(panic: &PanicHookInfo, options: Options) -> Self {
152 |     let msg = panic.payload();
153 |     let msg = Option::or(
154 |       msg.downcast_ref::<&str>().copied().map(str::to_string),
155 |       msg.downcast_ref::<String>().cloned(),
156 |     );
157 | 
158 |     let thread = thread::current();
159 |     let thread_name = match thread.name() {
160 |       Some(name) => name.into(),
161 |       _ => format!("{:?}", thread.id()),
162 |     };
163 |     let location = panic.location().map(ToString::to_string);
164 | 
165 |     let backtrace = if options.show_backtrace.is_none() {
166 |       Some(Backtrace::capture())
167 |         .filter(|bt| bt.status() == BacktraceStatus::Captured)
168 |     } else if options.show_backtrace == Some(true) {
169 |       Some(Backtrace::force_capture())
170 |     } else {
171 |       None
172 |     };
173 | 
174 |     Self {
175 |       what: msg,
176 |       where_: Some((thread_name, location)),
177 |       why: backtrace,
178 |       options,
179 |     }
180 |   }
181 | 
182 |   /// Dumps this ICE into a report.
183 |   pub fn report(self, report: &Report) {
184 |     report.error(f!(
185 |       "internal compiler error: {}",
186 |       self.what.as_deref().unwrap_or("unknown panic")
187 |     ));
188 | 
189 |     report.note(f!(
190 |       "{} unexpectedly panicked. this is a bug",
191 |       self
192 |         .options
193 |         .what_panicked
194 |         .as_deref()
195 |         .unwrap_or("the compiler"),
196 |     ));
197 | 
198 |     if let Some(at) = self.options.report_bugs_at {
199 |       report.note(f!("please file a bug at: {at}"));
200 |     }
201 | 
202 |     for note in self.options.extra_notes {
203 |       report.note(f!("{note}"));
204 |     }
205 | 
206 |     if let Some(bt) = self.why {
207 |       match self.where_ {
208 |         Some((thread, Some(loc))) => {
209 |           report.note(f!("thread \"{thread}\" panicked at {loc}\n{bt}"))
210 |         }
211 |         Some((thread, _)) => {
212 |           report.note(f!("thread \"{thread}\" panicked\n{bt}"))
213 |         }
214 |         None => report.note(f!("backtrace:\n{bt}")),
215 |       };
216 |     }
217 |   }
218 | }
219 | 


--------------------------------------------------------------------------------
/ilex/src/report/diagnostic.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt;
  2 | use std::mem;
  3 | use std::panic;
  4 | 
  5 | use crate::file;
  6 | use crate::file::Spanned;
  7 | use crate::report::Report;
  8 | 
  9 | /// A diagnostic that is being built up.
 10 | ///
 11 | /// [`Diagnostic`]s are not committed to the report that owns them until they
 12 | /// are dropped. In general, this is not a problem because diagnostics are
 13 | /// almost always temporaries, e.g.
 14 | ///
 15 | /// ```
 16 | /// # fn x(report: &ilex::Report, span: ilex::Span) {
 17 | /// report.error("my error message")
 18 | ///   .saying(span, "this is bad code");
 19 | /// # }
 20 | /// ```
 21 | ///
 22 | /// However, holding a diagnostic in a variable will delay it until the end of
 23 | /// the scope, or until [`Diagnostic::commit()`] is called. Once a diagnostic
 24 | /// is added to a report, it cannot be modified.
 25 | ///
 26 | /// See e.g. [`Report::error()`].
 27 | pub struct Diagnostic {
 28 |   pub(super) report: Report,
 29 |   pub(super) info: Info,
 30 |   pub(super) speculative: bool,
 31 | }
 32 | 
 33 | pub use annotate_snippets::AnnotationType as Kind;
 34 | 
 35 | pub struct Info {
 36 |   pub kind: Kind,
 37 |   pub message: String,
 38 |   pub snippets: Vec<Vec<(file::Span3, String, Kind)>>,
 39 |   pub notes: Vec<(String, Kind)>,
 40 |   pub reported_at: Option<&'static panic::Location<'static>>,
 41 | }
 42 | 
 43 | impl Diagnostic {
 44 |   pub(super) fn new(report: Report, kind: Kind, message: String) -> Self {
 45 |     Diagnostic {
 46 |       report,
 47 |       speculative: false,
 48 |       info: Info {
 49 |         message,
 50 |         kind,
 51 |         snippets: Vec::new(),
 52 |         notes: Vec::new(),
 53 |         reported_at: None,
 54 |       },
 55 |     }
 56 |   }
 57 | 
 58 |   /// Marks this diagnostic as "speculative", meaning that it will not be
 59 |   /// applied until [`Diagnostic::commit()`] is called.
 60 |   pub fn speculate(mut self) -> Self {
 61 |     self.speculative = true;
 62 |     self
 63 |   }
 64 | 
 65 |   /// Commits this diagnostic to its report, even if it was marked as
 66 |   /// speculative.
 67 |   pub fn commit(mut self) {
 68 |     self.speculative = false;
 69 |     drop(self);
 70 |   }
 71 | 
 72 |   /// Adds a new relevant snippet at the given location.
 73 |   pub fn at<'s>(self, span: impl Spanned<'s>) -> Self {
 74 |     self.saying(span, "")
 75 |   }
 76 | 
 77 |   /// Adds a new diagnostic location, with the given message attached to it.
 78 |   pub fn saying<'s>(
 79 |     self,
 80 |     span: impl Spanned<'s>,
 81 |     message: impl fmt::Display,
 82 |   ) -> Self {
 83 |     self.snippet(span, message, None)
 84 |   }
 85 | 
 86 |   /// Like `saying`, but the underline is as for a "note" rather than the
 87 |   /// overall diagnostic.
 88 |   pub fn remark<'s>(
 89 |     self,
 90 |     span: impl Spanned<'s>,
 91 |     message: impl fmt::Display,
 92 |   ) -> Self {
 93 |     self.snippet(span, message, Some(Kind::Help))
 94 |   }
 95 | 
 96 |   fn snippet<'s>(
 97 |     mut self,
 98 |     span: impl Spanned<'s>,
 99 |     message: impl fmt::Display,
100 |     kind: Option<Kind>,
101 |   ) -> Self {
102 |     if self.info.snippets.is_empty() {
103 |       self.info.snippets = vec![vec![]];
104 |     }
105 | 
106 |     self.info.snippets.last_mut().unwrap().push((
107 |       span.span().span3(),
108 |       message.to_string(),
109 |       kind.unwrap_or(self.info.kind),
110 |     ));
111 |     self
112 |   }
113 | 
114 |   /// Starts a new snippet, even if the next range is in the same file.
115 |   pub fn new_snippet(mut self) -> Self {
116 |     self.info.snippets.push(Vec::new());
117 |     self
118 |   }
119 | 
120 |   /// Appends a note to the bottom of the diagnostic.
121 |   pub fn note(mut self, message: impl fmt::Display) -> Self {
122 |     // HACK: annotate-snippets really likes to convert __ into bold, like
123 |     // Markdown, which is a problem for display correctness. We work around this
124 |     // by inserting a zero-width space between every two underscores.
125 |     let mut note = message.to_string();
126 |     note = note.replace("__", "_\u{200b}_");
127 | 
128 |     self.info.notes.push((note, Kind::Note));
129 |     self
130 |   }
131 | 
132 |   /// Appends a help tip to the bottom of the diagnostic.
133 |   pub fn help(mut self, message: impl fmt::Display) -> Self {
134 |     // HACK: annotate-snippets really likes to convert __ into bold, like
135 |     // Markdown, which is a problem for display correctness. We work around this
136 |     // by inserting a zero-width space between every two underscores.
137 |     let mut note = message.to_string();
138 |     note = note.replace("__", "_\u{200b}_");
139 | 
140 |     self.info.notes.push((note, Kind::Help));
141 |     self
142 |   }
143 | 
144 |   /// Updates the "reported at" information for this diagnostic.
145 |   ///
146 |   /// This information is only intended to be used for tool developers to
147 |   /// debug where diagnostics are being emitted.
148 |   pub fn reported_at(mut self, at: &'static panic::Location<'static>) -> Self {
149 |     if self.report.state.opts.show_report_locations {
150 |       self.info.reported_at = Some(at)
151 |     }
152 |     self
153 |   }
154 | }
155 | 
156 | impl Drop for Diagnostic {
157 |   fn drop(&mut self) {
158 |     if !self.speculative {
159 |       self.report.state.insert_diagnostic(mem::replace(
160 |         &mut self.info,
161 |         Info {
162 |           message: "".to_string(),
163 |           kind: Kind::Error,
164 |           snippets: Vec::new(),
165 |           notes: Vec::new(),
166 |           reported_at: None,
167 |         },
168 |       ));
169 |     }
170 |   }
171 | }
172 | 


--------------------------------------------------------------------------------
/ilex/src/report/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Diagnostics and error reports.
  2 | //!
  3 | //! This module contains types for generating an *error report*: a collection of
  4 | //! diagnostics that describe why an operation failed in detail. Diagnostics
  5 | //! are basically fancy compiler errors: they use [`Span`]s to present faulty
  6 | //! input in context.
  7 | //!
  8 | //! The [`Report`] type is a reference-counted list of diagnostics, which is
  9 | //! typically passed by reference into functions, but can be copied to simplify
 10 | //! lifetimes, since it's reference-counted.
 11 | 
 12 | use std::fmt;
 13 | use std::io;
 14 | use std::panic;
 15 | use std::panic::Location;
 16 | use std::process;
 17 | use std::sync::Arc;
 18 | 
 19 | use crate::file::Context;
 20 | use crate::spec::Spec;
 21 | 
 22 | mod builtin;
 23 | mod diagnostic;
 24 | mod render;
 25 | 
 26 | pub use builtin::Builtins;
 27 | pub use builtin::Expected;
 28 | pub use diagnostic::Diagnostic;
 29 | use diagnostic::Kind;
 30 | 
 31 | #[cfg(doc)]
 32 | use crate::Span;
 33 | 
 34 | /// A collection of errors can may built up over the course of an operation.
 35 | ///
 36 | /// To construct a report, see [`Context::new_report()`]. The context that
 37 | /// constructs a report is the only one whose [`Span`]s should be passed into
 38 | /// it; doing otherwise will result in unspecified output (or probably a panic).
 39 | pub struct Report {
 40 |   ctx: Context,
 41 |   state: Arc<render::State>,
 42 | }
 43 | 
 44 | /// Options for a [`Report`].
 45 | pub struct Options {
 46 |   /// Whether to color the output when rendered.
 47 |   pub color: bool,
 48 |   /// Whether to add a note to each diagnostic showing where in the source
 49 |   /// code it was reported. `ilex` makes a best-case effort to ensure this
 50 |   /// location is in *your* code.
 51 |   pub show_report_locations: bool,
 52 | }
 53 | 
 54 | impl Default for Options {
 55 |   fn default() -> Self {
 56 |     Self {
 57 |       color: true,
 58 |       show_report_locations: cfg!(debug_assertions),
 59 |     }
 60 |   }
 61 | }
 62 | 
 63 | impl Report {
 64 |   pub(crate) fn copy(&self) -> Report {
 65 |     Self {
 66 |       ctx: self.ctx.copy(),
 67 |       state: self.state.clone(),
 68 |     }
 69 |   }
 70 | 
 71 |   /// Returns a wrapper for accessing commonly-used, built-in message types.
 72 |   ///
 73 |   /// See [`Builtins`].
 74 |   pub fn builtins<'a>(&'a self, spec: &'a Spec) -> Builtins<'a> {
 75 |     Builtins { report: self, spec }
 76 |   }
 77 | 
 78 |   /// Adds a new error to this report.
 79 |   ///
 80 |   /// The returned [`Diagnostic`] object can be used to add spans, notes, and
 81 |   /// remarks, to generate a richer diagnostic.
 82 |   #[track_caller]
 83 |   pub fn error(&self, message: impl fmt::Display) -> Diagnostic {
 84 |     self.new_diagnostic(Kind::Error, message.to_string())
 85 |   }
 86 | 
 87 |   /// Adds a new warning to this report.
 88 |   ///
 89 |   /// The returned [`Diagnostic`] object can be used to add spans, notes, and
 90 |   /// remarks, to generate a richer diagnostic.
 91 |   #[track_caller]
 92 |   pub fn warn(&self, message: impl fmt::Display) -> Diagnostic {
 93 |     self.new_diagnostic(Kind::Warning, message.to_string())
 94 |   }
 95 | 
 96 |   /// Adds a new top-level note to this report.
 97 |   ///
 98 |   /// The returned [`Diagnostic`] object can be used to add spans, notes, and
 99 |   /// remarks, to generate a richer diagnostic.
100 |   #[track_caller]
101 |   pub fn note(&self, message: impl fmt::Display) -> Diagnostic {
102 |     self.new_diagnostic(Kind::Note, message.to_string())
103 |   }
104 | 
105 |   #[track_caller]
106 |   fn new_diagnostic(&self, kind: Kind, message: String) -> Diagnostic {
107 |     Diagnostic::new(self.copy(), kind, message).reported_at(Location::caller())
108 |   }
109 | 
110 |   /// Returns a [`Fatal`] regardless of whether this report contains any errors.
111 |   pub fn fatal<T>(&self) -> Result<T, Fatal> {
112 |     Err(Fatal(self.copy()))
113 |   }
114 | 
115 |   /// If this report contains any errors, returns [`Err(Fatal)`][Fatal];
116 |   /// otherwise, it returns `Ok(ok)`.
117 |   ///
118 |   /// This is a useful function for completing some operation that could have
119 |   /// generated error diagnostics.
120 |   ///
121 |   /// See [`Fatal`].
122 |   pub fn fatal_or<T>(&self, ok: T) -> Result<T, Fatal> {
123 |     if !self.state.has_error() {
124 |       return Ok(ok);
125 |     }
126 | 
127 |     self.fatal()
128 |   }
129 | 
130 |   /// Collates all of the "unsorted diagnostics" into the "sorted diagnostics",
131 |   /// sorting them by thread id.
132 |   ///
133 |   /// This ensures that all diagnostics coming from a particular thread are
134 |   /// together.
135 |   pub fn collate(&self) {
136 |     self.state.collate()
137 |   }
138 | 
139 |   /// Writes out the contents of this diagnostic to `sink`.
140 |   pub fn write_out(&self, sink: impl io::Write) -> io::Result<()> {
141 |     render::finish(self, sink)
142 |   }
143 | 
144 |   pub(crate) fn new(ctx: &Context, opts: Options) -> Self {
145 |     Self {
146 |       ctx: ctx.copy(),
147 |       state: Arc::new(render::State::new(opts)),
148 |     }
149 |   }
150 | }
151 | 
152 | /// An error type for making returning a [`Result`] that will trigger
153 | /// diagnostics printing when unwrapped.
154 | ///
155 | /// This is useful for functions that are [`Result`]-ey, like reading a file,
156 | /// but which want to generate diagnostics, too.
157 | pub struct Fatal(Report);
158 | 
159 | impl Fatal {
160 |   /// Prints all diagnostics to stderr and terminates the program.
161 |   pub fn terminate(self) -> ! {
162 |     eprintln!("{self}");
163 |     process::exit(1);
164 |   }
165 | 
166 |   /// Panics with the [`Report`]'s diagnostics as the panic message.
167 |   pub fn panic(self) -> ! {
168 |     panic::panic_any(self.to_string())
169 |   }
170 | }
171 | 
172 | impl fmt::Debug for Fatal {
173 |   fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174 |     render::render_fmt(&self.0, &self.0.state.opts, f)
175 |   }
176 | }
177 | 
178 | impl fmt::Display for Fatal {
179 |   fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
180 |     fmt::Debug::fmt(self, f)
181 |   }
182 | }
183 | 


--------------------------------------------------------------------------------
/ilex/src/report/render.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt;
  2 | use std::io;
  3 | use std::mem;
  4 | use std::sync::atomic::AtomicBool;
  5 | use std::sync::atomic::AtomicU64;
  6 | use std::sync::atomic::Ordering;
  7 | use std::sync::Mutex;
  8 | 
  9 | use annotate_snippets::renderer::AnsiColor;
 10 | use annotate_snippets::renderer::Style;
 11 | use annotate_snippets::Annotation;
 12 | use annotate_snippets::AnnotationType;
 13 | use annotate_snippets::Renderer;
 14 | use annotate_snippets::Slice;
 15 | use annotate_snippets::Snippet;
 16 | use annotate_snippets::SourceAnnotation;
 17 | 
 18 | use crate::report::diagnostic;
 19 | use crate::report::diagnostic::Info;
 20 | use crate::report::diagnostic::Kind;
 21 | use crate::report::Options;
 22 | use crate::report::Report;
 23 | 
 24 | pub struct State {
 25 |   pub opts: Options,
 26 |   has_error: AtomicBool,
 27 |   sorted_diagnostics: Mutex<Vec<diagnostic::Info>>,
 28 |   recent_diagnostics: Mutex<Vec<(u64, diagnostic::Info)>>,
 29 | }
 30 | 
 31 | impl State {
 32 |   pub fn new(opts: Options) -> Self {
 33 |     Self {
 34 |       opts,
 35 |       has_error: AtomicBool::new(false),
 36 |       sorted_diagnostics: Default::default(),
 37 |       recent_diagnostics: Default::default(),
 38 |     }
 39 |   }
 40 | 
 41 |   pub fn has_error(&self) -> bool {
 42 |     self.has_error.load(Ordering::SeqCst)
 43 |   }
 44 | 
 45 |   /// Collates all of the "unsorted diagnostics" into the "sorted diagnostics",
 46 |   /// sorting them by thread id. This ensures that all diagnostics coming from
 47 |   /// a particular thread are together.
 48 |   pub fn collate(&self) {
 49 |     let mut recent = self.recent_diagnostics.lock().unwrap();
 50 |     let mut sorted = self.sorted_diagnostics.lock().unwrap();
 51 | 
 52 |     recent.sort_by_key(|&(id, _)| id);
 53 |     sorted.extend(recent.drain(..).map(|(_, i)| i));
 54 |   }
 55 | 
 56 |   pub fn insert_diagnostic(&self, info: Info) {
 57 |     if info.kind == Kind::Error {
 58 |       self.has_error.store(true, Ordering::SeqCst);
 59 |     }
 60 | 
 61 |     static COUNTER: AtomicU64 = AtomicU64::new(0);
 62 |     thread_local! {
 63 |       static ID: u64 = COUNTER.fetch_add(1, Ordering::Relaxed);
 64 |     };
 65 | 
 66 |     let mut recent = self.recent_diagnostics.lock().unwrap();
 67 |     recent.push((ID.with(|&x| x), info))
 68 |   }
 69 | }
 70 | 
 71 | /// Consumes this `Report` and dumps its diagnostics to `sink`.
 72 | pub fn finish(report: &Report, sink: impl io::Write) -> io::Result<()> {
 73 |   struct Writer<W: io::Write> {
 74 |     sink: W,
 75 |     error: Option<io::Error>,
 76 |   }
 77 | 
 78 |   impl<W: io::Write> fmt::Write for Writer<W> {
 79 |     fn write_str(&mut self, s: &str) -> fmt::Result {
 80 |       self.sink.write_all(s.as_bytes()).map_err(|e| {
 81 |         self.error = Some(e);
 82 |         fmt::Error
 83 |       })
 84 |     }
 85 |   }
 86 | 
 87 |   let mut out = Writer { sink, error: None };
 88 |   render_fmt(report, &report.state.opts, &mut out).map_err(|_| {
 89 |     if let Some(e) = out.error.take() {
 90 |       return e;
 91 |     }
 92 | 
 93 |     io::Error::new(io::ErrorKind::Other, "formatter error")
 94 |   })
 95 | }
 96 | 
 97 | /// Dumps this collection of errors as user-displayable text into `sink`.
 98 | pub fn render_fmt(
 99 |   report: &Report,
100 |   opts: &Options,
101 |   sink: &mut dyn fmt::Write,
102 | ) -> fmt::Result {
103 |   report.state.collate();
104 |   let mut errors = 0;
105 | 
106 |   let mut renderer = Renderer::plain();
107 |   #[rustfmt::skip]
108 |   #[allow(clippy::let_unit_value)]
109 |   let _ = if opts.color {
110 |     renderer = Renderer::styled()
111 |       .error(Style::new().fg_color(Some(AnsiColor::BrightRed.into())).bold())
112 |       .warning(Style::new().fg_color(Some(AnsiColor::BrightYellow.into())).bold())
113 |       .note(Style::new().fg_color(Some(AnsiColor::BrightGreen.into())).bold())
114 |       .info(Style::new().fg_color(Some(AnsiColor::BrightBlue.into())).bold())
115 |       .help(Style::new().fg_color(Some(AnsiColor::BrightBlue.into())).bold());
116 |   };
117 | 
118 |   for e in report.state.sorted_diagnostics.lock().unwrap().iter() {
119 |     if e.kind == Kind::Error {
120 |       errors += 1;
121 |     };
122 | 
123 |     let mut snippet = Snippet {
124 |       title: Some(Annotation {
125 |         id: None,
126 |         label: Some(&e.message),
127 |         annotation_type: e.kind,
128 |       }),
129 |       footer: Vec::new(),
130 |       slices: Vec::new(),
131 |     };
132 | 
133 |     for snips in &e.snippets {
134 |       let mut cur_file = None;
135 |       let mut cur_slice = None::<Slice>;
136 |       let mut has_eof = false;
137 |       for (span, text, kind) in snips {
138 |         let span = span.get(&report.ctx);
139 |         let file = span.file();
140 |         if cur_file != Some(file) {
141 |           cur_file = Some(file);
142 |           if let Some(mut slice) = cur_slice.take() {
143 |             if !mem::take(&mut has_eof) {
144 |               slice.source = &slice.source[..slice.source.len() - 1];
145 |             }
146 |             snippet.slices.push(slice);
147 |           }
148 | 
149 |           cur_slice = Some(Slice {
150 |             source: file.text_with_extra_space(),
151 |             line_start: 1,
152 |             origin: Some(file.path().as_str()),
153 |             annotations: Vec::new(),
154 |             fold: true,
155 |           });
156 |         }
157 | 
158 |         let slice = cur_slice.as_mut().unwrap();
159 |         let mut start = span.start();
160 |         let mut end = span.end();
161 | 
162 |         // Ensure that all ranges have length at least one, and try to get them
163 |         // to point just after non-whitespace.
164 |         // If this is the EOF, it will point at the extra space.
165 |         if start == end {
166 |           let chunk = &slice.source[..end];
167 |           let ws_suf =
168 |             chunk.len() - chunk.trim_end_matches(char::is_whitespace).len();
169 |           start -= ws_suf;
170 |           end -= ws_suf;
171 |           end += 1;
172 |           has_eof |= end == slice.source.len();
173 |         } else {
174 |           // Crop a range so that it does not contain leading or trailing
175 |           // whitespace.
176 |           let chunk = &slice.source[start..end];
177 |           let ws_pre =
178 |             chunk.len() - chunk.trim_start_matches(char::is_whitespace).len();
179 |           let ws_suf =
180 |             chunk.len() - chunk.trim_end_matches(char::is_whitespace).len();
181 |           start += ws_pre;
182 |           end -= ws_suf;
183 |         }
184 | 
185 |         slice.annotations.push(SourceAnnotation {
186 |           range: (start, end),
187 |           label: text,
188 |           annotation_type: *kind,
189 |         });
190 |       }
191 | 
192 |       if let Some(mut slice) = cur_slice.take() {
193 |         if !mem::take(&mut has_eof) {
194 |           slice.source = &slice.source[..slice.source.len() - 1];
195 |         }
196 |         snippet.slices.push(slice);
197 |       }
198 |     }
199 | 
200 |     // Crop the starts of each slice to only incorporate the annotations.
201 |     for slice in &mut snippet.slices {
202 |       let earliest_start = slice
203 |         .annotations
204 |         .iter()
205 |         .map(|a| a.range.0)
206 |         .min()
207 |         .unwrap_or(0);
208 |       let (count, start_idx) = slice.source[..earliest_start]
209 |         .bytes()
210 |         .enumerate()
211 |         .filter_map(|(i, c)| (c == b'\n').then_some(i + 1))
212 |         .enumerate()
213 |         .map(|(i, j)| (i + 1, j))
214 |         .last()
215 |         .unwrap_or_default();
216 | 
217 |       slice.line_start = count + 1;
218 |       slice.source = &slice.source[start_idx..];
219 |       for a in &mut slice.annotations {
220 |         a.range.0 -= start_idx;
221 |         a.range.1 -= start_idx;
222 |       }
223 |     }
224 | 
225 |     for (note, kind) in &e.notes {
226 |       snippet.footer.push(Annotation {
227 |         id: None,
228 |         label: Some(note),
229 |         annotation_type: *kind,
230 |       });
231 |     }
232 | 
233 |     let footer;
234 |     if opts.show_report_locations {
235 |       footer = format!("reported at: {}", e.reported_at.unwrap());
236 |       snippet.footer.push(Annotation {
237 |         id: None,
238 |         label: Some(&footer),
239 |         annotation_type: AnnotationType::Note,
240 |       });
241 |     }
242 | 
243 |     write!(sink, "{}\n\n", renderer.render(snippet))?;
244 |   }
245 | 
246 |   if errors != 0 {
247 |     let message = match errors {
248 |       1 => "aborting due to previous error".into(),
249 |       n => format!("aborting due to {n} errors"),
250 |     };
251 | 
252 |     let aborting = Snippet {
253 |       title: Some(Annotation {
254 |         id: None,
255 |         label: Some(&message),
256 |         annotation_type: AnnotationType::Error,
257 |       }),
258 |       footer: Vec::new(),
259 |       slices: Vec::new(),
260 |     };
261 | 
262 |     writeln!(sink, "{}", renderer.render(aborting))?;
263 |   }
264 | 
265 |   Ok(())
266 | }
267 | 


--------------------------------------------------------------------------------
/ilex/src/rt/mod.rs:
--------------------------------------------------------------------------------
  1 | //! The lexer runtime.
  2 | 
  3 | use std::cell::Cell;
  4 | 
  5 | use crate::file::File;
  6 | use crate::file::Span;
  7 | use crate::file::Span2;
  8 | use crate::report::Fatal;
  9 | use crate::report::Report;
 10 | use crate::rule;
 11 | use crate::rule::Sign;
 12 | use crate::spec::Lexeme;
 13 | use crate::spec::Spec;
 14 | use crate::token;
 15 | 
 16 | mod emit2;
 17 | pub mod lexer;
 18 | mod unicode;
 19 | 
 20 | mod dfa;
 21 | pub use dfa::compile;
 22 | pub use dfa::Dfa;
 23 | 
 24 | pub fn lex<'ctx>(
 25 |   file: File<'ctx>,
 26 |   report: &Report,
 27 |   spec: &'ctx Spec,
 28 | ) -> Result<token::Stream<'ctx>, Fatal> {
 29 |   let mut lexer = lexer::Lexer::new(file, report, spec);
 30 | 
 31 |   let unexpected = Cell::new(None);
 32 |   let diagnose_unexpected = |end: usize| {
 33 |     let Some(start) = unexpected.take() else { return };
 34 |     report
 35 |       .builtins(spec)
 36 |       .unexpected_token(file.span(start..end));
 37 |   };
 38 | 
 39 |   loop {
 40 |     let start = lexer.cursor();
 41 |     if lexer.skip_whitespace() {
 42 |       diagnose_unexpected(start);
 43 |     }
 44 | 
 45 |     let start = lexer.cursor();
 46 |     let Some(next) = lexer.text(lexer.cursor()..).chars().next() else { break };
 47 | 
 48 |     lexer.pop_closer();
 49 |     if lexer.cursor() > start {
 50 |       diagnose_unexpected(start);
 51 |       continue;
 52 |     }
 53 | 
 54 |     emit2::emit(&mut lexer);
 55 |     if lexer.cursor() > start {
 56 |       diagnose_unexpected(start);
 57 |       continue;
 58 |     }
 59 | 
 60 |     lexer.add_token(UNEXPECTED, next.len_utf8(), None);
 61 |     if unexpected.get().is_none() {
 62 |       unexpected.set(Some(start))
 63 |     }
 64 |   }
 65 | 
 66 |   report.fatal_or(lexer.finish())
 67 | }
 68 | 
 69 | /// The internal representation of a token inside of a token stream.
 70 | #[derive(Clone)]
 71 | pub struct Token {
 72 |   pub lexeme: Lexeme<rule::Any>,
 73 |   pub end: u32,
 74 | }
 75 | #[derive(Clone, Default)]
 76 | pub struct Metadata {
 77 |   pub kind: Option<Kind>,
 78 |   pub comments: Vec<token::Id>,
 79 | }
 80 | 
 81 | #[derive(Clone)]
 82 | pub enum Kind {
 83 |   Quoted(Quoted),
 84 |   Digital(Digital),
 85 |   Offset { cursor: i32, meta: i32 },
 86 | }
 87 | 
 88 | #[derive(Clone)]
 89 | pub struct Quoted {
 90 |   // Offsets for the components of the string. First mark is the end of the
 91 |   // open quote; following are alternating marks for textual and escape content.
 92 |   // Adjacent escapes are separated by empty text content.
 93 |   //
 94 |   // Each text component consists of one mark, its end. Each escape consists of
 95 |   // four marks, which refer to the end of the escape sequence prefix, the start of extra data, its end, and the
 96 |   // end of the whole escape. This means that when we encounter \xNN, the
 97 |   // positions of the marks are \x||NN||. When we encounter \u{NN}, the positions
 98 |   // are \u|{|NN|}|. For \n, the positions are \n||||.
 99 |   pub marks: Vec<u32>,
100 | }
101 | 
102 | #[derive(Clone, Default)]
103 | pub struct Digital {
104 |   pub digits: DigitBlocks,
105 |   pub exponents: Vec<DigitBlocks>,
106 | }
107 | 
108 | #[derive(Clone, Default)]
109 | pub struct DigitBlocks {
110 |   pub prefix: Span2,
111 |   pub sign: Option<(Sign, Span2)>,
112 |   pub blocks: Vec<Span2>,
113 |   pub which_exp: usize,
114 | }
115 | 
116 | impl DigitBlocks {
117 |   pub fn prefix<'ctx>(&self, file: File<'ctx>) -> Option<Span<'ctx>> {
118 |     if self.prefix == Span2::default() {
119 |       return None;
120 |     }
121 |     Some(self.prefix.get(file))
122 |   }
123 | 
124 |   pub fn sign<'ctx>(&self, file: File<'ctx>) -> Option<Span<'ctx>> {
125 |     self.sign.map(|(_, s)| s.get(file))
126 |   }
127 | 
128 |   pub fn blocks<'a, 'ctx: 'a>(
129 |     &'a self,
130 |     file: File<'ctx>,
131 |   ) -> impl Iterator<Item = Span<'ctx>> + 'a {
132 |     self.blocks.iter().map(move |s| s.get(file))
133 |   }
134 | }
135 | 
136 | pub const WHITESPACE: Lexeme<rule::Any> = Lexeme::new(-1);
137 | pub const UNEXPECTED: Lexeme<rule::Any> = Lexeme::new(-2);
138 | pub const PREFIX: Lexeme<rule::Any> = Lexeme::new(-3);
139 | pub const SUFFIX: Lexeme<rule::Any> = Lexeme::new(-4);
140 | 


--------------------------------------------------------------------------------
/ilex/src/token/summary.rs:
--------------------------------------------------------------------------------
  1 | //! Implementation of `Stream::summary()`.
  2 | 
  3 | use gilded::doc::Doc;
  4 | 
  5 | use crate::file::Span;
  6 | use crate::file::Spanned;
  7 | use crate::token::Any;
  8 | use crate::token::Cursor;
  9 | use crate::token::Stream;
 10 | 
 11 | use crate::token::Sign;
 12 | use crate::token::Token;
 13 | 
 14 | use super::Content;
 15 | 
 16 | impl Stream<'_> {
 17 |   /// Returns a string that summarizes the contents of this token stream.
 18 |   pub fn summary(&self) -> String {
 19 |     self.cursor().summary().to_string(&Default::default())
 20 |   }
 21 | }
 22 | 
 23 | impl<'a> Cursor<'a> {
 24 |   fn summary(&self) -> Doc<'a> {
 25 |     Doc::new().push({ *self }.map(|token| {
 26 |       let doc = Doc::new()
 27 |         .entry("lexeme", token.lexeme().index())
 28 |         .entry("span", span2doc(token.span()));
 29 | 
 30 |       match token {
 31 |         Any::Eof(..) => Doc::single("eof", doc),
 32 |         Any::Keyword(..) => Doc::single("keyword", doc),
 33 |         Any::Bracket(tok) => Doc::single(
 34 |           "bracket",
 35 |           doc
 36 |             .array("delims", tok.delimiters().into_iter().map(span2doc))
 37 |             .entry("contents", tok.contents().summary()),
 38 |         ),
 39 | 
 40 |         Any::Ident(tok) => Doc::single(
 41 |           "ident",
 42 |           doc
 43 |             .entry("prefix", tok.prefix().map(span2doc))
 44 |             .entry("suffix", tok.suffix().map(span2doc))
 45 |             .entry("name", span2doc(tok.name())),
 46 |         ),
 47 | 
 48 |         Any::Digital(tok) => Doc::single(
 49 |           "ident",
 50 |           doc
 51 |             .entry("prefix", tok.prefix().map(span2doc))
 52 |             .entry("suffix", tok.suffix().map(span2doc))
 53 |             .entry("radix", tok.radix())
 54 |             .entry("sign", tok.sign().map(sign2str))
 55 |             .array("blocks", tok.digit_blocks().map(span2doc))
 56 |             .array(
 57 |               "exponents",
 58 |               tok.exponents().map(|exp| {
 59 |                 Doc::new()
 60 |                   .entry("span", span2doc(exp.span()))
 61 |                   .entry("prefix", exp.prefix().map(span2doc))
 62 |                   .entry("radix", exp.radix())
 63 |                   .entry("sign", exp.sign().map(sign2str))
 64 |                   .array("blocks", exp.digit_blocks().map(span2doc))
 65 |               }),
 66 |             ),
 67 |         ),
 68 | 
 69 |         Any::Quoted(tok) => Doc::single(
 70 |           "quoted",
 71 |           doc
 72 |             .entry("prefix", tok.prefix().map(span2doc))
 73 |             .entry("suffix", tok.suffix().map(span2doc))
 74 |             .array("delims", tok.delimiters().into_iter().map(span2doc))
 75 |             .array(
 76 |               "contents",
 77 |               tok.raw_content().map(|c| match c {
 78 |                 Content::Lit(lit) => Doc::single("text", span2doc(lit)),
 79 |                 Content::Esc(esc, data) => Doc::new()
 80 |                   .entry("esc", span2doc(esc))
 81 |                   .entry("data", data.map(span2doc)),
 82 |               }),
 83 |             ),
 84 |         ),
 85 |       }
 86 |     }))
 87 |   }
 88 | }
 89 | 
 90 | fn span2doc(span: Span) -> Doc {
 91 |   Doc::new()
 92 |     .array("span", [span.start(), span.end()])
 93 |     .entry("text", span.text())
 94 | }
 95 | 
 96 | fn sign2str(s: Sign) -> &'static str {
 97 |   match s {
 98 |     Sign::Pos => "+",
 99 |     Sign::Neg => "-",
100 |   }
101 | }
102 | 


--------------------------------------------------------------------------------
/ilex/tests/greedy/greedy.tokens.yaml:
--------------------------------------------------------------------------------
 1 | - keyword:
 2 |     lexeme: 3
 3 |     span: {span: [0, 6], text: "poison"}
 4 | - keyword:
 5 |     lexeme: 5
 6 |     span:
 7 |       span: [6, 7]
 8 |       text: |
 9 | 
10 | - ident:
11 |     lexeme: 4
12 |     span: {span: [7, 16], text: "poisonous"}
13 |     name: {span: [7, 16], text: "poisonous"}
14 | - keyword:
15 |     lexeme: 5
16 |     span:
17 |       span: [16, 17]
18 |       text: |
19 | 
20 | - quoted:
21 |     lexeme: 0
22 |     span:
23 |       span: [17, 32]
24 |       text: "poisonous[xyz]>"
25 |     delims:
26 |     - {span: [17, 27], text: "poisonous["}
27 |     - {span: [30, 32], text: "]>"}
28 |     contents: [{text: {span: [27, 30], text: "xyz"}}]
29 | - keyword:
30 |     lexeme: 5
31 |     span:
32 |       span: [32, 33]
33 |       text: |
34 | 
35 | - quoted:
36 |     lexeme: 0
37 |     span:
38 |       span: [33, 56]
39 |       text: "poisonous#%#%[xyz]#%#%>"
40 |     delims:
41 |     - {span: [33, 47], text: "poisonous#%#%["}
42 |     - {span: [50, 56], text: "]#%#%>"}
43 |     contents: [{text: {span: [47, 50], text: "xyz"}}]
44 | - keyword:
45 |     lexeme: 5
46 |     span:
47 |       span: [56, 57]
48 |       text: |
49 | 
50 | - ident:
51 |     lexeme: 4
52 |     span: {span: [57, 66], text: "poisonous"}
53 |     name: {span: [57, 66], text: "poisonous"}
54 | - bracket:
55 |     lexeme: 2
56 |     span: {span: [67, 72], text: "[xyz]"}
57 |     delims:
58 |     - {span: [67, 68], text: "["}
59 |     - {span: [71, 72], text: "]"}
60 |     contents:
61 |     - ident:
62 |         lexeme: 4
63 |         span: {span: [68, 71], text: "xyz"}
64 |         name: {span: [68, 71], text: "xyz"}
65 | - keyword:
66 |     lexeme: 5
67 |     span:
68 |       span: [72, 73]
69 |       text: |
70 | 
71 | - quoted:
72 |     lexeme: 1
73 |     span:
74 |       span: [73, 93]
75 |       text: "R\"cc(some c++)\" )cc\""
76 |     delims:
77 |     - {span: [73, 78], text: "R\"cc("}
78 |     - {span: [89, 93], text: ")cc\""}
79 |     contents: [{text: {span: [78, 89], text: "some c++)\" "}}]
80 | - eof:
81 |     lexeme: 2147483647
82 |     span: {span: [93, 93], text: ""}
83 | 


--------------------------------------------------------------------------------
/ilex/tests/greedy/greedy.txt:
--------------------------------------------------------------------------------
1 | poison
2 | poisonous
3 | poisonous[xyz]>
4 | poisonous#%#%[xyz]#%#%>
5 | poisonous [xyz]
6 | R"cc(some c++)" )cc"


--------------------------------------------------------------------------------
/ilex/tests/greedy/main.rs:
--------------------------------------------------------------------------------
 1 | use ilex::rule::*;
 2 | use ilex::Context;
 3 | use ilex::Lexeme;
 4 | 
 5 | #[gilded::test("tests/greedy/*.txt")]
 6 | fn greedy(test: &gilded::Test) {
 7 |   // This test verifies that lexing is greedy in *most* cases.
 8 | 
 9 |   #[ilex::spec]
10 |   struct Greedy {
11 |     #[rule(Quoted::with(Bracket::rust_style(
12 |       "#%",
13 |       ("poisonous", "["),
14 |       ("]", ">"),
15 |     )))]
16 |     rust_like: Lexeme<Quoted>,
17 | 
18 |     #[rule(Quoted::with(Bracket::cxx_style(
19 |       Ident::new(),
20 |       ("R\"", "("),
21 |       (")", "\""),
22 |     )))]
23 |     cpp_like: Lexeme<Quoted>,
24 | 
25 |     #[rule("[", "]")]
26 |     array: Lexeme<Bracket>,
27 | 
28 |     poison: Lexeme<Keyword>,
29 | 
30 |     #[rule(Ident::new())]
31 |     ident: Lexeme<Ident>,
32 | 
33 |     #[rule(LineEnd::cancellable("\\"))]
34 |     nl: Lexeme<LineEnd>,
35 | 
36 |     #[rule(Comment::line("//"))]
37 |     comment: Lexeme<Comment>,
38 |   }
39 | 
40 |   let ctx = Context::new();
41 |   let report = ctx.new_report();
42 |   let file = ctx
43 |     .new_file_from_bytes(test.path(), test.text(), &report)
44 |     .unwrap();
45 | 
46 |   let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]);
47 |   match file.lex(Greedy::get().spec(), &report) {
48 |     Ok(stream) => tokens(stream.summary()),
49 |     Err(fatal) => stderr(fatal.to_string()),
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/ilex/tests/greedy/newlines.tokens.yaml:
--------------------------------------------------------------------------------
 1 | - keyword:
 2 |     lexeme: 3
 3 |     span: {span: [0, 6], text: "poison"}
 4 | - keyword:
 5 |     lexeme: 5
 6 |     span:
 7 |       span: [6, 7]
 8 |       text: |
 9 | 
10 | - keyword:
11 |     lexeme: 3
12 |     span: {span: [7, 13], text: "poison"}
13 | - keyword:
14 |     lexeme: 3
15 |     span: {span: [16, 22], text: "poison"}
16 | - keyword:
17 |     lexeme: 3
18 |     span: {span: [36, 42], text: "poison"}
19 | - keyword:
20 |     lexeme: 5
21 |     span:
22 |       span: [53, 54]
23 |       text: |
24 | 
25 | - keyword:
26 |     lexeme: 3
27 |     span: {span: [54, 60], text: "poison"}
28 | - eof:
29 |     lexeme: 2147483647
30 |     span: {span: [60, 60], text: ""}
31 | 


--------------------------------------------------------------------------------
/ilex/tests/greedy/newlines.txt:
--------------------------------------------------------------------------------
1 | poison
2 | poison \
3 | poison \ // comment
4 | poison // comment
5 | poison


--------------------------------------------------------------------------------
/ilex/tests/json/array.ast.txt:
--------------------------------------------------------------------------------
 1 | Arr(
 2 |     [
 3 |         Null,
 4 |         Bool(
 5 |             true,
 6 |         ),
 7 |         Bool(
 8 |             false,
 9 |         ),
10 |     ],
11 | )


--------------------------------------------------------------------------------
/ilex/tests/json/array.json:
--------------------------------------------------------------------------------
1 | [null, true, false]


--------------------------------------------------------------------------------
/ilex/tests/json/array.tokens.yaml:
--------------------------------------------------------------------------------
 1 | - bracket:
 2 |     lexeme: 5
 3 |     span:
 4 |       span: [0, 19]
 5 |       text: "[null, true, false]"
 6 |     delims:
 7 |     - {span: [0, 1], text: "["}
 8 |     - {span: [18, 19], text: "]"}
 9 |     contents:
10 |     - keyword:
11 |         lexeme: 4
12 |         span: {span: [1, 5], text: "null"}
13 |     - keyword:
14 |         lexeme: 0
15 |         span: {span: [5, 6], text: ","}
16 |     - keyword:
17 |         lexeme: 2
18 |         span: {span: [7, 11], text: "true"}
19 |     - keyword:
20 |         lexeme: 0
21 |         span: {span: [11, 12], text: ","}
22 |     - keyword:
23 |         lexeme: 3
24 |         span: {span: [13, 18], text: "false"}
25 | - eof:
26 |     lexeme: 2147483647
27 |     span: {span: [19, 19], text: ""}
28 | 


--------------------------------------------------------------------------------
/ilex/tests/json/main.rs:
--------------------------------------------------------------------------------
  1 | use ilex::fp::Fp64;
  2 | use ilex::report::Expected;
  3 | use ilex::report::Report;
  4 | use ilex::rule::*;
  5 | use ilex::token;
  6 | use ilex::token::Cursor;
  7 | use ilex::Context;
  8 | use ilex::Lexeme;
  9 | 
 10 | #[ilex::spec]
 11 | struct JsonSpec {
 12 |   #[rule(",")]
 13 |   comma: Lexeme<Keyword>,
 14 | 
 15 |   #[rule(":")]
 16 |   colon: Lexeme<Keyword>,
 17 | 
 18 |   #[rule("true")]
 19 |   true_: Lexeme<Keyword>,
 20 | 
 21 |   #[rule("false")]
 22 |   false_: Lexeme<Keyword>,
 23 | 
 24 |   #[rule("null")]
 25 |   null: Lexeme<Keyword>,
 26 | 
 27 |   #[named]
 28 |   #[rule("[", "]")]
 29 |   array: Lexeme<Bracket>,
 30 | 
 31 |   #[named]
 32 |   #[rule("{", "}")]
 33 |   object: Lexeme<Bracket>,
 34 | 
 35 |   #[named]
 36 |   #[rule(Quoted::new('"')
 37 |     .invalid_escape(r"\")
 38 |     .escapes([
 39 |       "\\\"", r"\\", r"\/",
 40 |       r"\b", r"\f",  r"\n", r"\t", r"\r",
 41 |     ])
 42 |     .fixed_length_escape(r"\u", 4))]
 43 |   string: Lexeme<Quoted>,
 44 | 
 45 |   #[named]
 46 |   #[rule(Digital::new(10)
 47 |     .minus()
 48 |     .point_limit(0..2)
 49 |     .exponents(["e", "E"], Digits::new(10).plus().minus()))]
 50 |   number: Lexeme<Digital>,
 51 | }
 52 | 
 53 | #[gilded::test("tests/json/*.json")]
 54 | fn check_tokens(test: &gilded::Test) {
 55 |   let ctx = Context::new();
 56 |   let report = ctx.new_report();
 57 |   let file = ctx
 58 |     .new_file_from_bytes(test.path(), test.text(), &report)
 59 |     .unwrap();
 60 | 
 61 |   let [tokens, ast, stderr] =
 62 |     test.outputs(["tokens.yaml", "ast.txt", "stderr"]);
 63 | 
 64 |   let stream = match file.lex(JsonSpec::get().spec(), &report) {
 65 |     Ok(stream) => stream,
 66 |     Err(fatal) => {
 67 |       stderr(fatal.to_string());
 68 |       return;
 69 |     }
 70 |   };
 71 | 
 72 |   tokens(stream.summary());
 73 | 
 74 |   let json = parse(&report, JsonSpec::get(), &mut stream.cursor());
 75 |   ast(format!("{json:#?}"));
 76 | 
 77 |   if let Err(fatal) = report.fatal_or(()) {
 78 |     stderr(fatal.to_string());
 79 |   }
 80 | }
 81 | 
 82 | #[derive(Clone, Debug, PartialEq)]
 83 | enum Json {
 84 |   Null,
 85 |   Num(f64),
 86 |   Bool(bool),
 87 |   Str(String),
 88 |   Arr(Vec<Json>),
 89 |   Obj(Vec<(String, Json)>),
 90 | }
 91 | 
 92 | fn parse(report: &Report, json: &JsonSpec, cursor: &mut Cursor) -> Json {
 93 |   let quote2str = |str: token::Quoted| -> String {
 94 |     str.to_utf8(|key, data, buf| {
 95 |       let char = match key.text() {
 96 |         "\\\"" => '\"',
 97 |         r"\\" => '\\',
 98 |         r"\/" => '/',
 99 |         r"\b" => '\x08',
100 |         r"\f" => '\x0c',
101 |         r"\n" => '\n',
102 |         r"\t" => '\t',
103 |         r"\r" => '\r',
104 |         // This is sloppy about surrogates but this is just an example.
105 |         r"\u" => {
106 |           let data = data.unwrap();
107 |           let code =
108 |             u16::from_str_radix(data.text(), 16).unwrap_or_else(|_| {
109 |               report.builtins(json.spec()).expected(
110 |                 [Expected::Name("hex-encoded u16".into())],
111 |                 data.text(),
112 |                 data,
113 |               );
114 |               0
115 |             });
116 |           for c in char::decode_utf16([code]) {
117 |             buf.push(c.unwrap_or('😢'))
118 |           }
119 |           return;
120 |         }
121 |         esc => panic!("{}", esc),
122 |       };
123 |       buf.push(char);
124 |     })
125 |   };
126 | 
127 |   let value = token::switch()
128 |     .case(json.null, |_, _| Json::Null)
129 |     .case(json.false_, |_, _| Json::Bool(false))
130 |     .case(json.true_, |_, _| Json::Bool(true))
131 |     .case(json.string, |str: token::Quoted, _| Json::Str(quote2str(str)))
132 |     .case(json.number, |num: token::Digital, _| {
133 |       Json::Num(num.to_float::<Fp64>(.., report).unwrap().to_hard())
134 |     })
135 |     .case(json.array, |array: token::Bracket, _| {
136 |       let mut trailing = None;
137 |       let vec = array
138 |         .contents()
139 |         .delimited(json.comma, |c| Some(parse(report, json, c)))
140 |         .map(|(e, c)| {
141 |           trailing = c;
142 |           e
143 |         })
144 |         .collect();
145 | 
146 |       if let Some(comma) = trailing {
147 |         report
148 |           .error("trailing commas are not allowed in JSON")
149 |           .saying(comma, "remove this comma");
150 |       }
151 | 
152 |       Json::Arr(vec)
153 |     })
154 |     .case(json.object, |object: token::Bracket, _| {
155 |       let mut trailing = None;
156 |       let vec = object
157 |         .contents()
158 |         .delimited(json.comma, |c| {
159 |           let key = c
160 |             .take(json.string, report)
161 |             .map(|q| quote2str(q))
162 |             .unwrap_or("😢".into());
163 |           c.take(json.colon, report);
164 |           let value = parse(report, json, c);
165 |           Some((key, value))
166 |         })
167 |         .map(|(e, c)| {
168 |           trailing = c;
169 |           e
170 |         })
171 |         .collect();
172 | 
173 |       if let Some(comma) = trailing {
174 |         report
175 |           .error("trailing commas are not allowed in JSON")
176 |           .saying(comma, "remove this comma");
177 |       }
178 | 
179 |       Json::Obj(vec)
180 |     })
181 |     .take(cursor, report);
182 |   value.unwrap_or(Json::Null)
183 | }
184 | 


--------------------------------------------------------------------------------
/ilex/tests/json/null.ast.txt:
--------------------------------------------------------------------------------
1 | Null


--------------------------------------------------------------------------------
/ilex/tests/json/null.json:
--------------------------------------------------------------------------------
1 | null


--------------------------------------------------------------------------------
/ilex/tests/json/null.tokens.yaml:
--------------------------------------------------------------------------------
1 | - keyword:
2 |     lexeme: 4
3 |     span: {span: [0, 4], text: "null"}
4 | - eof:
5 |     lexeme: 2147483647
6 |     span: {span: [4, 4], text: ""}
7 | 


--------------------------------------------------------------------------------
/ilex/tests/json/obj.ast.txt:
--------------------------------------------------------------------------------
 1 | Obj(
 2 |     [
 3 |         (
 4 |             "keywords",
 5 |             Arr(
 6 |                 [
 7 |                     Null,
 8 |                     Bool(
 9 |                         true,
10 |                     ),
11 |                     Bool(
12 |                         false,
13 |                     ),
14 |                 ],
15 |             ),
16 |         ),
17 |         (
18 |             "string",
19 |             Str(
20 |                 "abcdefg",
21 |             ),
22 |         ),
23 |         (
24 |             "number",
25 |             Num(
26 |                 42.0,
27 |             ),
28 |         ),
29 |         (
30 |             "int",
31 |             Num(
32 |                 42.0,
33 |             ),
34 |         ),
35 |         (
36 |             "frac",
37 |             Num(
38 |                 0.42,
39 |             ),
40 |         ),
41 |         (
42 |             "neg",
43 |             Num(
44 |                 -42.0,
45 |             ),
46 |         ),
47 |         (
48 |             "exp",
49 |             Num(
50 |                 4.2e43,
51 |             ),
52 |         ),
53 |         (
54 |             "nest",
55 |             Obj(
56 |                 [
57 |                     (
58 |                         "escapes\n",
59 |                         Str(
60 |                             "\"\\/\u{8}\u{c}\n\t\r\0ሴ\u{ffff}",
61 |                         ),
62 |                     ),
63 |                 ],
64 |             ),
65 |         ),
66 |     ],
67 | )


--------------------------------------------------------------------------------
/ilex/tests/json/obj.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "keywords": [null, true, false],
 3 |   "string": "abcdefg",
 4 |   "number": 42,
 5 |   "int": 42.0,
 6 |   "frac": 0.42,
 7 |   "neg": -42,
 8 |   "exp": 42e+42,
 9 |   "nest": {
10 |     "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff"
11 |   }
12 | }


--------------------------------------------------------------------------------
/ilex/tests/json/obj.tokens.yaml:
--------------------------------------------------------------------------------
  1 | - bracket:
  2 |     lexeme: 6
  3 |     span:
  4 |       span: [0, 209]
  5 |       text: |
  6 |         {
  7 |           "keywords": [null, true, false],
  8 |           "string": "abcdefg",
  9 |           "number": 42,
 10 |           "int": 42.0,
 11 |           "frac": 0.42,
 12 |           "neg": -42,
 13 |           "exp": 42e+42,
 14 |           "nest": {
 15 |             "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff"
 16 |           }
 17 |         }
 18 |     delims:
 19 |     - {span: [0, 1], text: "{"}
 20 |     - {span: [208, 209], text: "}"}
 21 |     contents:
 22 |     - quoted:
 23 |         lexeme: 7
 24 |         span: {span: [4, 14], text: "\"keywords\""}
 25 |         delims:
 26 |         - {span: [4, 5], text: "\""}
 27 |         - {span: [13, 14], text: "\""}
 28 |         contents: [{text: {span: [5, 13], text: "keywords"}}]
 29 |     - keyword:
 30 |         lexeme: 1
 31 |         span: {span: [14, 15], text: ":"}
 32 |     - bracket:
 33 |         lexeme: 5
 34 |         span:
 35 |           span: [16, 35]
 36 |           text: "[null, true, false]"
 37 |         delims:
 38 |         - {span: [16, 17], text: "["}
 39 |         - {span: [34, 35], text: "]"}
 40 |         contents:
 41 |         - keyword:
 42 |             lexeme: 4
 43 |             span: {span: [17, 21], text: "null"}
 44 |         - keyword:
 45 |             lexeme: 0
 46 |             span: {span: [21, 22], text: ","}
 47 |         - keyword:
 48 |             lexeme: 2
 49 |             span: {span: [23, 27], text: "true"}
 50 |         - keyword:
 51 |             lexeme: 0
 52 |             span: {span: [27, 28], text: ","}
 53 |         - keyword:
 54 |             lexeme: 3
 55 |             span: {span: [29, 34], text: "false"}
 56 |     - keyword:
 57 |         lexeme: 0
 58 |         span: {span: [35, 36], text: ","}
 59 |     - quoted:
 60 |         lexeme: 7
 61 |         span: {span: [39, 47], text: "\"string\""}
 62 |         delims:
 63 |         - {span: [39, 40], text: "\""}
 64 |         - {span: [46, 47], text: "\""}
 65 |         contents: [{text: {span: [40, 46], text: "string"}}]
 66 |     - keyword:
 67 |         lexeme: 1
 68 |         span: {span: [47, 48], text: ":"}
 69 |     - quoted:
 70 |         lexeme: 7
 71 |         span: {span: [49, 58], text: "\"abcdefg\""}
 72 |         delims:
 73 |         - {span: [49, 50], text: "\""}
 74 |         - {span: [57, 58], text: "\""}
 75 |         contents: [{text: {span: [50, 57], text: "abcdefg"}}]
 76 |     - keyword:
 77 |         lexeme: 0
 78 |         span: {span: [58, 59], text: ","}
 79 |     - quoted:
 80 |         lexeme: 7
 81 |         span: {span: [62, 70], text: "\"number\""}
 82 |         delims:
 83 |         - {span: [62, 63], text: "\""}
 84 |         - {span: [69, 70], text: "\""}
 85 |         contents: [{text: {span: [63, 69], text: "number"}}]
 86 |     - keyword:
 87 |         lexeme: 1
 88 |         span: {span: [70, 71], text: ":"}
 89 |     - ident:
 90 |         lexeme: 8
 91 |         span: {span: [72, 74], text: "42"}
 92 |         radix: 10
 93 |         blocks: [{span: [72, 74], text: "42"}]
 94 |         exponents: []
 95 |     - keyword:
 96 |         lexeme: 0
 97 |         span: {span: [74, 75], text: ","}
 98 |     - quoted:
 99 |         lexeme: 7
100 |         span: {span: [78, 83], text: "\"int\""}
101 |         delims:
102 |         - {span: [78, 79], text: "\""}
103 |         - {span: [82, 83], text: "\""}
104 |         contents: [{text: {span: [79, 82], text: "int"}}]
105 |     - keyword:
106 |         lexeme: 1
107 |         span: {span: [83, 84], text: ":"}
108 |     - ident:
109 |         lexeme: 8
110 |         span: {span: [85, 89], text: "42.0"}
111 |         radix: 10
112 |         blocks:
113 |         - {span: [85, 87], text: "42"}
114 |         - {span: [88, 89], text: "0"}
115 |         exponents: []
116 |     - keyword:
117 |         lexeme: 0
118 |         span: {span: [89, 90], text: ","}
119 |     - quoted:
120 |         lexeme: 7
121 |         span: {span: [93, 99], text: "\"frac\""}
122 |         delims:
123 |         - {span: [93, 94], text: "\""}
124 |         - {span: [98, 99], text: "\""}
125 |         contents: [{text: {span: [94, 98], text: "frac"}}]
126 |     - keyword:
127 |         lexeme: 1
128 |         span: {span: [99, 100], text: ":"}
129 |     - ident:
130 |         lexeme: 8
131 |         span: {span: [101, 105], text: "0.42"}
132 |         radix: 10
133 |         blocks:
134 |         - {span: [101, 102], text: "0"}
135 |         - {span: [103, 105], text: "42"}
136 |         exponents: []
137 |     - keyword:
138 |         lexeme: 0
139 |         span: {span: [105, 106], text: ","}
140 |     - quoted:
141 |         lexeme: 7
142 |         span: {span: [109, 114], text: "\"neg\""}
143 |         delims:
144 |         - {span: [109, 110], text: "\""}
145 |         - {span: [113, 114], text: "\""}
146 |         contents: [{text: {span: [110, 113], text: "neg"}}]
147 |     - keyword:
148 |         lexeme: 1
149 |         span: {span: [114, 115], text: ":"}
150 |     - ident:
151 |         lexeme: 8
152 |         span: {span: [116, 119], text: "-42"}
153 |         radix: 10
154 |         sign: "-"
155 |         blocks: [{span: [117, 119], text: "42"}]
156 |         exponents: []
157 |     - keyword:
158 |         lexeme: 0
159 |         span: {span: [119, 120], text: ","}
160 |     - quoted:
161 |         lexeme: 7
162 |         span: {span: [123, 128], text: "\"exp\""}
163 |         delims:
164 |         - {span: [123, 124], text: "\""}
165 |         - {span: [127, 128], text: "\""}
166 |         contents: [{text: {span: [124, 127], text: "exp"}}]
167 |     - keyword:
168 |         lexeme: 1
169 |         span: {span: [128, 129], text: ":"}
170 |     - ident:
171 |         lexeme: 8
172 |         span: {span: [130, 136], text: "42e+42"}
173 |         radix: 10
174 |         blocks: [{span: [130, 132], text: "42"}]
175 |         exponents:
176 |         - span: {span: [130, 136], text: "42e+42"}
177 |           prefix: {span: [132, 133], text: "e"}
178 |           radix: 10
179 |           sign: "+"
180 |           blocks: [{span: [134, 136], text: "42"}]
181 |     - keyword:
182 |         lexeme: 0
183 |         span: {span: [136, 137], text: ","}
184 |     - quoted:
185 |         lexeme: 7
186 |         span: {span: [140, 146], text: "\"nest\""}
187 |         delims:
188 |         - {span: [140, 141], text: "\""}
189 |         - {span: [145, 146], text: "\""}
190 |         contents: [{text: {span: [141, 145], text: "nest"}}]
191 |     - keyword:
192 |         lexeme: 1
193 |         span: {span: [146, 147], text: ":"}
194 |     - bracket:
195 |         lexeme: 6
196 |         span:
197 |           span: [148, 207]
198 |           text: |
199 |             {
200 |                 "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff"
201 |               }
202 |         delims:
203 |         - {span: [148, 149], text: "{"}
204 |         - {span: [206, 207], text: "}"}
205 |         contents:
206 |         - quoted:
207 |             lexeme: 7
208 |             span:
209 |               span: [154, 165]
210 |               text: "\"escapes\\n\""
211 |             delims:
212 |             - {span: [154, 155], text: "\""}
213 |             - {span: [164, 165], text: "\""}
214 |             contents:
215 |             - text: {span: [155, 162], text: "escapes"}
216 |             - {esc: {span: [162, 164], text: "\\n"}}
217 |         - keyword:
218 |             lexeme: 1
219 |             span: {span: [165, 166], text: ":"}
220 |         - quoted:
221 |             lexeme: 7
222 |             span:
223 |               span: [167, 203]
224 |               text: "\"\\\"\\\\\\/\\b\\f\\n\\t\\r\\u0000\\u1234\\uffff\""
225 |             delims:
226 |             - {span: [167, 168], text: "\""}
227 |             - {span: [202, 203], text: "\""}
228 |             contents:
229 |             - {esc: {span: [168, 170], text: "\\\""}}
230 |             - {esc: {span: [170, 172], text: "\\\\"}}
231 |             - {esc: {span: [172, 174], text: "\\/"}}
232 |             - {esc: {span: [174, 176], text: "\\b"}}
233 |             - {esc: {span: [176, 178], text: "\\f"}}
234 |             - {esc: {span: [178, 180], text: "\\n"}}
235 |             - {esc: {span: [180, 182], text: "\\t"}}
236 |             - {esc: {span: [182, 184], text: "\\r"}}
237 |             - esc: {span: [184, 186], text: "\\u"}
238 |               data: {span: [186, 190], text: "0000"}
239 |             - esc: {span: [190, 192], text: "\\u"}
240 |               data: {span: [192, 196], text: "1234"}
241 |             - esc: {span: [196, 198], text: "\\u"}
242 |               data: {span: [198, 202], text: "ffff"}
243 | - eof:
244 |     lexeme: 2147483647
245 |     span: {span: [209, 209], text: ""}
246 | 


--------------------------------------------------------------------------------
/ilex/tests/llvm/main.rs:
--------------------------------------------------------------------------------
  1 | use ilex::rule::*;
  2 | use ilex::Context;
  3 | use ilex::Lexeme;
  4 | 
  5 | #[ilex::spec]
  6 | struct Llvm {
  7 |   #[rule(";")]
  8 |   comment: Lexeme<Comment>,
  9 | 
 10 |   #[rule('(', ')')]
 11 |   parens: Lexeme<Bracket>,
 12 |   #[rule('[', ']')]
 13 |   brackets: Lexeme<Bracket>,
 14 |   #[rule('<', '>')]
 15 |   vector: Lexeme<Bracket>,
 16 |   #[rule('{', '}')]
 17 |   braces: Lexeme<Bracket>,
 18 |   #[rule("<{", "}>")]
 19 |   packed: Lexeme<Bracket>,
 20 |   #[rule("!{", "}")]
 21 |   meta: Lexeme<Bracket>,
 22 | 
 23 |   #[rule(',')]
 24 |   comma: Lexeme<Keyword>,
 25 |   #[rule('=')]
 26 |   equal: Lexeme<Keyword>,
 27 |   #[rule('*')]
 28 |   star: Lexeme<Keyword>,
 29 |   #[rule('x')]
 30 |   times: Lexeme<Keyword>,
 31 | 
 32 |   br: Lexeme<Keyword>,
 33 |   call: Lexeme<Keyword>,
 34 |   icmp: Lexeme<Keyword>,
 35 |   #[rule("eq")]
 36 |   icmp_eq: Lexeme<Keyword>,
 37 |   ret: Lexeme<Keyword>,
 38 |   unreachable: Lexeme<Keyword>,
 39 | 
 40 |   constant: Lexeme<Keyword>,
 41 |   declare: Lexeme<Keyword>,
 42 |   define: Lexeme<Keyword>,
 43 |   global: Lexeme<Keyword>,
 44 | 
 45 |   label: Lexeme<Keyword>,
 46 |   null: Lexeme<Keyword>,
 47 |   ptr: Lexeme<Keyword>,
 48 |   #[rule(Digital::new(10).prefix("i"))]
 49 |   int: Lexeme<Digital>,
 50 |   void: Lexeme<Keyword>,
 51 | 
 52 |   private: Lexeme<Keyword>,
 53 |   unnamed_addr: Lexeme<Keyword>,
 54 |   nocapture: Lexeme<Keyword>,
 55 |   nounwind: Lexeme<Keyword>,
 56 | 
 57 |   #[named]
 58 |   #[rule(Quoted::new('"')
 59 |     .fixed_length_escape(r"\", 2)
 60 |     .prefixes(["", "c"]))]
 61 |   string: Lexeme<Quoted>,
 62 | 
 63 |   #[named("identifier")]
 64 |   #[rule(Ident::new()
 65 |     .ascii_only()
 66 |     .extra_starts(".0123456789".chars())
 67 |     .suffix(":"))]
 68 |   label_ident: Lexeme<Ident>,
 69 | 
 70 |   #[named("identifier")]
 71 |   #[rule(Ident::new()
 72 |     .ascii_only()
 73 |     .extra_starts(".0123456789".chars())
 74 |     .prefixes(["!", "@", "%"]))]
 75 |   bare: Lexeme<Ident>,
 76 | 
 77 |   #[named("quoted identifier")]
 78 |   #[rule(Quoted::new('"')
 79 |     .fixed_length_escape(r"\", 2)
 80 |     .prefixes(["!", "@", "%"]))]
 81 |   quoted: Lexeme<Quoted>,
 82 | 
 83 |   #[named = "number"]
 84 |   #[rule(Digital::new(10)
 85 |     .minus()
 86 |     .point_limit(0..2)
 87 |     .exponents(["e", "E"], Digits::new(10).plus().minus()))]
 88 |   dec: Lexeme<Digital>,
 89 | 
 90 |   #[named = "number"]
 91 |   #[rule(Digital::new(16).minus().prefix("0x"))]
 92 |   hex: Lexeme<Digital>,
 93 | }
 94 | 
 95 | #[gilded::test("tests/llvm/*.ll")]
 96 | fn llvm(test: &gilded::Test) {
 97 |   let ctx = Context::new();
 98 |   let report = ctx.new_report();
 99 |   let file = ctx
100 |     .new_file_from_bytes(test.path(), test.text(), &report)
101 |     .unwrap();
102 | 
103 |   let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]);
104 |   match file.lex(Llvm::get().spec(), &report) {
105 |     Ok(stream) => tokens(stream.summary()),
106 |     Err(fatal) => stderr(fatal.to_string()),
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/ilex/tests/llvm/smoke.ll:
--------------------------------------------------------------------------------
 1 | ; Declare the string constant as a global constant.
 2 | @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00"
 3 | 
 4 | ; External declaration of the puts function
 5 | declare i32 @"non trivial name"(ptr nocapture) nounwind
 6 | 
 7 | ; Definition of main function
 8 | define i32 @main(i32 %0, ptr %1) {
 9 |   ; Call puts function to write out the string to stdout.
10 |   call i32 @"non trivial name"(ptr @.str)
11 |   ret i32 0
12 | }
13 | 
14 | ; Named metadata
15 | !0 = !{i32 42, null, !"string"}
16 | !foo = !{!0}
17 | @glb = global i8 0
18 | 
19 | define void @f(ptr %a) {
20 |   %c = icmp eq ptr %a, @glb
21 |   br i1 %c, label %BB_EXIT, label %BB_CONTINUE ; escapes %a
22 | BB_EXIT:
23 |   call void @exit()
24 |   unreachable
25 | BB_CONTINUE:
26 |   ret void
27 | }


--------------------------------------------------------------------------------
/ilex/tests/numbers/main.rs:
--------------------------------------------------------------------------------
  1 | use ilex::fp::Fp64;
  2 | use ilex::report::Report;
  3 | use ilex::rule::*;
  4 | use ilex::token;
  5 | use ilex::Context;
  6 | use ilex::Lexeme;
  7 | 
  8 | #[ilex::spec]
  9 | struct Numbers {
 10 |   #[rule(",")]
 11 |   comma: Lexeme<Keyword>,
 12 | 
 13 |   #[named("binary number")]
 14 |   #[rule(Digital::new(2)
 15 |     .separator('_')
 16 |     .plus().minus()
 17 |     .point_limit(0..2)
 18 |     .exponent("2", Digits::new(2).plus().minus())
 19 |     .prefixes(["0b", "0B", "%"]))]
 20 |   bin: Lexeme<Digital>,
 21 | 
 22 |   #[named = "hexadecimal number"]
 23 |   #[rule(Digital::new(16)
 24 |     .separator('_')
 25 |     .plus().minus()
 26 |     .point_limit(0..2)
 27 |     .exponents(["p", "P"], Digits::new(10).plus().minus())
 28 |     .prefixes(["0x", "0X", "$"]))]
 29 |   hex: Lexeme<Digital>,
 30 | 
 31 |   #[named = "quaternary number"]
 32 |   #[rule(Digital::new(4)
 33 |     .separator('_')
 34 |     .plus().minus()
 35 |     .point_limit(0..2)
 36 |     .exponents(["p", "P"], Digits::new(10).plus().minus())
 37 |     .prefixes(["0q", "0Q"]))]
 38 |   qua: Lexeme<Digital>,
 39 | 
 40 |   #[named = "octal number"]
 41 |   #[rule(Digital::new(8)
 42 |     .separator('_')
 43 |     .plus().minus()
 44 |     .point_limit(0..2)
 45 |     .exponents(["p", "P"], Digits::new(10).plus().minus())
 46 |     .prefixes(["0o", "0O", "0"]))]
 47 |   oct: Lexeme<Digital>,
 48 | 
 49 |   #[named = "decimal number"]
 50 |   #[rule(Digital::new(10)
 51 |     .separator('_')
 52 |     .plus().minus()
 53 |     .point_limit(0..2)
 54 |     .exponents(["e", "E"], Digits::new(10).plus().minus())
 55 |     .exponent("^", Digits::new(16).plus().minus()))]
 56 |   dec: Lexeme<Digital>,
 57 | }
 58 | 
 59 | #[gilded::test("tests/numbers/*.txt")]
 60 | fn numbers(test: &gilded::Test) {
 61 |   let ctx = Context::new();
 62 |   let report = ctx.new_report();
 63 |   let file = ctx
 64 |     .new_file_from_bytes(test.path(), test.text(), &report)
 65 |     .unwrap();
 66 | 
 67 |   let [tokens, fp64, stderr] =
 68 |     test.outputs(["tokens.yaml", "fp64.txt", "stderr"]);
 69 | 
 70 |   match file.lex(Numbers::get().spec(), &report) {
 71 |     Ok(stream) => {
 72 |       tokens(stream.summary());
 73 |       match parse(Numbers::get(), stream.cursor(), &report) {
 74 |         Ok(v) => fp64(format!("{v:#?}")),
 75 |         Err(fatal) => stderr(fatal.to_string()),
 76 |       }
 77 |     }
 78 | 
 79 |     Err(fatal) => stderr(fatal.to_string()),
 80 |   }
 81 | }
 82 | 
 83 | fn parse(
 84 |   lex: &Numbers,
 85 |   mut cursor: ilex::token::Cursor,
 86 |   report: &Report,
 87 | ) -> Result<Vec<Fp64>, ilex::report::Fatal> {
 88 |   let numbers = cursor
 89 |     .delimited(lex.comma, |cursor| loop {
 90 |       let value = token::switch()
 91 |         .case(Lexeme::eof(), |_, _| Err(false))
 92 |         .cases([lex.dec, lex.bin, lex.oct, lex.hex, lex.qua], |num, _| {
 93 |           Ok(num.to_float::<Fp64>(.., report).unwrap())
 94 |         })
 95 |         .take(cursor, report);
 96 |       match value {
 97 |         None => {
 98 |           cursor.back_up(1);
 99 |           return Some(Fp64::nan());
100 |         }
101 |         Some(Err(false)) => return None,
102 |         Some(Err(true)) => continue,
103 |         Some(Ok(v)) => return Some(v),
104 |       }
105 |     })
106 |     .map(|(v, _)| v)
107 |     .collect::<Vec<_>>();
108 |   cursor.expect_finished(report);
109 |   report.fatal_or(numbers)
110 | }
111 | 


--------------------------------------------------------------------------------
/ilex/tests/numbers/numbers.fp64.txt:
--------------------------------------------------------------------------------
 1 | [
 2 |     0.0,
 3 |     -0.0,
 4 |     -0.0,
 5 |     1.23456e+80,
 6 |     9.0e+9,
 7 |     -9.0e+9,
 8 |     9.0e+9,
 9 |     8.9999999999999995e-9,
10 |     -511.0,
11 |     511.0,
12 |     4.0,
13 |     0.0048828125,
14 |     0.0048828125,
15 |     2.03125,
16 |     4194232.0,
17 |     3735928559.0,
18 |     -27.10546875,
19 |     3.0e+10,
20 | ]


--------------------------------------------------------------------------------
/ilex/tests/numbers/numbers.tokens.yaml:
--------------------------------------------------------------------------------
  1 | - ident:
  2 |     lexeme: 5
  3 |     span: {span: [0, 1], text: "0"}
  4 |     radix: 10
  5 |     blocks: [{span: [0, 1], text: "0"}]
  6 |     exponents: []
  7 | - keyword:
  8 |     lexeme: 0
  9 |     span: {span: [1, 2], text: ","}
 10 | - ident:
 11 |     lexeme: 4
 12 |     span: {span: [3, 6], text: "-00"}
 13 |     prefix: {span: [3, 4], text: "-"}
 14 |     radix: 8
 15 |     sign: "-"
 16 |     blocks: [{span: [5, 6], text: "0"}]
 17 |     exponents: []
 18 | - keyword:
 19 |     lexeme: 0
 20 |     span: {span: [6, 7], text: ","}
 21 | - ident:
 22 |     lexeme: 5
 23 |     span: {span: [8, 12], text: "-0.0"}
 24 |     radix: 10
 25 |     sign: "-"
 26 |     blocks:
 27 |     - {span: [9, 10], text: "0"}
 28 |     - {span: [11, 12], text: "0"}
 29 |     exponents: []
 30 | - keyword:
 31 |     lexeme: 0
 32 |     span: {span: [12, 13], text: ","}
 33 | - ident:
 34 |     lexeme: 5
 35 |     span: {span: [14, 24], text: "123.456e78"}
 36 |     radix: 10
 37 |     blocks:
 38 |     - {span: [14, 17], text: "123"}
 39 |     - {span: [18, 21], text: "456"}
 40 |     exponents:
 41 |     - span: {span: [14, 24], text: "123.456e78"}
 42 |       prefix: {span: [21, 22], text: "e"}
 43 |       radix: 10
 44 |       blocks: [{span: [22, 24], text: "78"}]
 45 | - keyword:
 46 |     lexeme: 0
 47 |     span: {span: [24, 25], text: ","}
 48 | - ident:
 49 |     lexeme: 5
 50 |     span: {span: [26, 29], text: "9e9"}
 51 |     radix: 10
 52 |     blocks: [{span: [26, 27], text: "9"}]
 53 |     exponents:
 54 |     - span: {span: [26, 29], text: "9e9"}
 55 |       prefix: {span: [27, 28], text: "e"}
 56 |       radix: 10
 57 |       blocks: [{span: [28, 29], text: "9"}]
 58 | - keyword:
 59 |     lexeme: 0
 60 |     span: {span: [29, 30], text: ","}
 61 | - ident:
 62 |     lexeme: 5
 63 |     span: {span: [31, 35], text: "-9e9"}
 64 |     radix: 10
 65 |     sign: "-"
 66 |     blocks: [{span: [32, 33], text: "9"}]
 67 |     exponents:
 68 |     - span: {span: [31, 35], text: "-9e9"}
 69 |       prefix: {span: [33, 34], text: "e"}
 70 |       radix: 10
 71 |       blocks: [{span: [34, 35], text: "9"}]
 72 | - keyword:
 73 |     lexeme: 0
 74 |     span: {span: [35, 36], text: ","}
 75 | - ident:
 76 |     lexeme: 5
 77 |     span: {span: [37, 42], text: "+9e+9"}
 78 |     radix: 10
 79 |     sign: "+"
 80 |     blocks: [{span: [38, 39], text: "9"}]
 81 |     exponents:
 82 |     - span: {span: [37, 42], text: "+9e+9"}
 83 |       prefix: {span: [39, 40], text: "e"}
 84 |       radix: 10
 85 |       sign: "+"
 86 |       blocks: [{span: [41, 42], text: "9"}]
 87 | - keyword:
 88 |     lexeme: 0
 89 |     span: {span: [42, 43], text: ","}
 90 | - ident:
 91 |     lexeme: 5
 92 |     span: {span: [44, 48], text: "9e-9"}
 93 |     radix: 10
 94 |     blocks: [{span: [44, 45], text: "9"}]
 95 |     exponents:
 96 |     - span: {span: [44, 48], text: "9e-9"}
 97 |       prefix: {span: [45, 46], text: "e"}
 98 |       radix: 10
 99 |       sign: "-"
100 |       blocks: [{span: [47, 48], text: "9"}]
101 | - keyword:
102 |     lexeme: 0
103 |     span: {span: [48, 49], text: ","}
104 | - ident:
105 |     lexeme: 4
106 |     span: {span: [50, 55], text: "-0777"}
107 |     prefix: {span: [50, 51], text: "-"}
108 |     radix: 8
109 |     sign: "-"
110 |     blocks: [{span: [52, 55], text: "777"}]
111 |     exponents: []
112 | - keyword:
113 |     lexeme: 0
114 |     span: {span: [55, 56], text: ","}
115 | - ident:
116 |     lexeme: 4
117 |     span: {span: [57, 62], text: "0o777"}
118 |     prefix: {span: [57, 59], text: "0o"}
119 |     radix: 8
120 |     blocks: [{span: [59, 62], text: "777"}]
121 |     exponents: []
122 | - keyword:
123 |     lexeme: 0
124 |     span: {span: [62, 63], text: ","}
125 | - ident:
126 |     lexeme: 1
127 |     span: {span: [64, 69], text: "%1210"}
128 |     prefix: {span: [64, 65], text: "%"}
129 |     radix: 2
130 |     blocks: [{span: [65, 66], text: "1"}]
131 |     exponents:
132 |     - span: {span: [64, 69], text: "%1210"}
133 |       prefix: {span: [66, 67], text: "2"}
134 |       radix: 2
135 |       blocks: [{span: [67, 69], text: "10"}]
136 | - keyword:
137 |     lexeme: 0
138 |     span: {span: [69, 70], text: ","}
139 | - ident:
140 |     lexeme: 1
141 |     span: {span: [71, 85], text: "0b0.0000000101"}
142 |     prefix: {span: [71, 73], text: "0b"}
143 |     radix: 2
144 |     blocks:
145 |     - {span: [73, 74], text: "0"}
146 |     - {span: [75, 85], text: "0000000101"}
147 |     exponents: []
148 | - keyword:
149 |     lexeme: 0
150 |     span: {span: [85, 86], text: ","}
151 | - ident:
152 |     lexeme: 4
153 |     span: {span: [87, 95], text: "0o0.0024"}
154 |     prefix: {span: [87, 89], text: "0o"}
155 |     radix: 8
156 |     blocks:
157 |     - {span: [89, 90], text: "0"}
158 |     - {span: [91, 95], text: "0024"}
159 |     exponents: []
160 | - keyword:
161 |     lexeme: 0
162 |     span: {span: [95, 96], text: ","}
163 | - ident:
164 |     lexeme: 4
165 |     span: {span: [97, 106], text: "0O1.01p01"}
166 |     prefix: {span: [97, 99], text: "0O"}
167 |     radix: 8
168 |     blocks:
169 |     - {span: [99, 100], text: "1"}
170 |     - {span: [101, 103], text: "01"}
171 |     exponents:
172 |     - span: {span: [97, 106], text: "0O1.01p01"}
173 |       prefix: {span: [103, 104], text: "p"}
174 |       radix: 10
175 |       blocks: [{span: [104, 106], text: "01"}]
176 | - keyword:
177 |     lexeme: 0
178 |     span: {span: [106, 107], text: ","}
179 | - ident:
180 |     lexeme: 2
181 |     span: {span: [108, 120], text: "0xfff.eep+10"}
182 |     prefix: {span: [108, 110], text: "0x"}
183 |     radix: 16
184 |     blocks:
185 |     - {span: [110, 113], text: "fff"}
186 |     - {span: [114, 116], text: "ee"}
187 |     exponents:
188 |     - span: {span: [108, 120], text: "0xfff.eep+10"}
189 |       prefix: {span: [116, 117], text: "p"}
190 |       radix: 10
191 |       sign: "+"
192 |       blocks: [{span: [118, 120], text: "10"}]
193 | - keyword:
194 |     lexeme: 0
195 |     span: {span: [120, 121], text: ","}
196 | - ident:
197 |     lexeme: 2
198 |     span: {span: [122, 131], text: "$DEADBEEF"}
199 |     prefix: {span: [122, 123], text: "$"}
200 |     radix: 16
201 |     blocks: [{span: [123, 131], text: "DEADBEEF"}]
202 |     exponents: []
203 | - keyword:
204 |     lexeme: 0
205 |     span: {span: [131, 132], text: ","}
206 | - ident:
207 |     lexeme: 3
208 |     span: {span: [133, 145], text: "-0q0123.0123"}
209 |     prefix: {span: [133, 135], text: "-0"}
210 |     radix: 4
211 |     sign: "-"
212 |     blocks:
213 |     - {span: [136, 140], text: "0123"}
214 |     - {span: [141, 145], text: "0123"}
215 |     exponents: []
216 | - keyword:
217 |     lexeme: 0
218 |     span: {span: [145, 146], text: ","}
219 | - ident:
220 |     lexeme: 5
221 |     span: {span: [147, 150], text: "3^a"}
222 |     radix: 10
223 |     blocks: [{span: [147, 148], text: "3"}]
224 |     exponents:
225 |     - span: {span: [147, 150], text: "3^a"}
226 |       prefix: {span: [148, 149], text: "^"}
227 |       radix: 16
228 |       blocks: [{span: [149, 150], text: "a"}]
229 | - keyword:
230 |     lexeme: 0
231 |     span: {span: [150, 151], text: ","}
232 | - eof:
233 |     lexeme: 2147483647
234 |     span: {span: [151, 151], text: ""}
235 | 


--------------------------------------------------------------------------------
/ilex/tests/numbers/numbers.txt:
--------------------------------------------------------------------------------
 1 | 0,
 2 | -00,
 3 | -0.0,
 4 | 123.456e78,
 5 | 9e9,
 6 | -9e9,
 7 | +9e+9,
 8 | 9e-9,
 9 | -0777,
10 | 0o777,
11 | %1210,
12 | 0b0.0000000101,
13 | 0o0.0024,
14 | 0O1.01p01,
15 | 0xfff.eep+10,
16 | $DEADBEEF,
17 | -0q0123.0123,
18 | 3^a,


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/idents.stderr:
--------------------------------------------------------------------------------
 1 | error: unexpected `b` in `/`-suffixed number
 2 |  --> ambiguous/idents.txt:1:6
 3 |   |
 4 | 1 | /foo/bar/
 5 |   |      ^
 6 |   |      --- help: because this value is decimal (base 10), digits should be within '0'..='9'
 7 |   |
 8 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
 9 | 
10 | error: unexpected `a` in `/`-suffixed number
11 |  --> ambiguous/idents.txt:1:7
12 |   |
13 | 1 | /foo/bar/
14 |   |       ^
15 |   |      --- help: because this value is decimal (base 10), digits should be within '0'..='9'
16 |   |
17 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
18 | 
19 | error: unexpected `r` in `/`-suffixed number
20 |  --> ambiguous/idents.txt:1:8
21 |   |
22 | 1 | /foo/bar/
23 |   |        ^
24 |   |      --- help: because this value is decimal (base 10), digits should be within '0'..='9'
25 |   |
26 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
27 | 
28 | error: aborting due to 3 errors
29 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/idents.txt:
--------------------------------------------------------------------------------
1 | /foo/bar/
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_br.stderr:
--------------------------------------------------------------------------------
 1 | error: unexpected closing `able`
 2 |  --> ambiguous/no_xid_after_br.txt:1:28
 3 |   |
 4 | 1 | $[] $null[]null $null[]nullable
 5 |   |                            ^^^^ expected to be opened by `--able`
 6 |   |
 7 |   = note: reported at: ilex/src/rt/emit2.rs:254:22
 8 | 
 9 | error: aborting due to previous error
10 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_br.txt:
--------------------------------------------------------------------------------
1 | $[] $null[]null $null[]nullable
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_cm.stderr:
--------------------------------------------------------------------------------
 1 | error: extraneous characters after `--null ... null`
 2 |  --> ambiguous/no_xid_after_cm.txt:1:51
 3 |   |
 4 | 1 | --null some stuff null --null some more stuff nullnull
 5 |   |                                                   ^^^^
 6 |   |                                                  -- help: maybe you meant to include a space here
 7 |   |
 8 |   = note: reported at: ilex/src/rt/emit2.rs:810:10
 9 | 
10 | error: aborting due to previous error
11 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_cm.txt:
--------------------------------------------------------------------------------
1 | --null some stuff null --null some more stuff nullnull


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_id.stderr:
--------------------------------------------------------------------------------
 1 | error: extraneous characters after `/`-prefixed, `%q`-suffixed identifier
 2 |  --> ambiguous/no_xid_after_id.txt:1:22
 3 |   |
 4 | 1 | /foo%q /null%q /foo%qua
 5 |   |                      ^^
 6 |   |                     -- help: maybe you meant to include a space here
 7 |   |
 8 |   = note: reported at: ilex/src/rt/emit2.rs:810:10
 9 | 
10 | error: aborting due to previous error
11 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_id.txt:
--------------------------------------------------------------------------------
1 | /foo%q /null%q /foo%qua
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_kw.stderr:
--------------------------------------------------------------------------------
 1 | error: unexpected closing `nullable`
 2 |  --> ambiguous/no_xid_after_kw.txt:1:6
 3 |   |
 4 | 1 | null nullable
 5 |   |      ^^^^^^^^ expected to be opened by `--nullable`
 6 |   |
 7 |   = note: reported at: ilex/src/rt/emit2.rs:254:22
 8 | 
 9 | error: aborting due to previous error
10 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_kw.txt:
--------------------------------------------------------------------------------
1 | null nullable
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_nm.stderr:
--------------------------------------------------------------------------------
 1 | error: unexpected `q` in `%`-prefixed number
 2 |  --> ambiguous/no_xid_after_nm.txt:1:10
 3 |   |
 4 | 1 | %123 %123qua
 5 |   |          ^
 6 |   |      ------- help: because this value is decimal (base 10), digits should be within '0'..='9'
 7 |   |
 8 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
 9 | 
10 | error: unexpected `u` in `%`-prefixed number
11 |  --> ambiguous/no_xid_after_nm.txt:1:11
12 |   |
13 | 1 | %123 %123qua
14 |   |           ^
15 |   |      ------- help: because this value is decimal (base 10), digits should be within '0'..='9'
16 |   |
17 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
18 | 
19 | error: unexpected `a` in `%`-prefixed number
20 |  --> ambiguous/no_xid_after_nm.txt:1:12
21 |   |
22 | 1 | %123 %123qua
23 |   |            ^
24 |   |      ------- help: because this value is decimal (base 10), digits should be within '0'..='9'
25 |   |
26 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
27 | 
28 | error: aborting due to 3 errors
29 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_nm.txt:
--------------------------------------------------------------------------------
1 | %123 %123qua
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_st.stderr:
--------------------------------------------------------------------------------
 1 | error: extraneous characters after `%'...'q`
 2 |  --> ambiguous/no_xid_after_st.txt:1:16
 3 |   |
 4 | 1 | q'xyz'q %'xyz'qua
 5 |   |                ^^
 6 |   |               -- help: maybe you meant to include a space here
 7 |   |
 8 |   = note: reported at: ilex/src/rt/emit2.rs:810:10
 9 | 
10 | error: aborting due to previous error
11 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/no_xid_after_st.txt:
--------------------------------------------------------------------------------
1 | q'xyz'q %'xyz'qua


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/nums.stderr:
--------------------------------------------------------------------------------
 1 | error: unexpected closing `xyz`
 2 |  --> ambiguous/nums.txt:1:16
 3 |   |
 4 | 1 | 1234%1234 1234/xyz
 5 |   |                ^^^ expected to be opened by `--xyz`
 6 |   |
 7 |   = note: reported at: ilex/src/rt/emit2.rs:254:22
 8 | 
 9 | error: aborting due to previous error
10 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/nums.txt:
--------------------------------------------------------------------------------
1 | 1234%1234 1234/xyz
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/symbols_after_comment.tokens.yaml:
--------------------------------------------------------------------------------
 1 | - keyword:
 2 |     lexeme: 0
 3 |     span: {span: [14, 18], text: "null"}
 4 | - keyword:
 5 |     lexeme: 1
 6 |     span: {span: [37, 42], text: "-null"}
 7 | - keyword:
 8 |     lexeme: 0
 9 |     span: {span: [43, 47], text: "null"}
10 | - eof:
11 |     lexeme: 2147483647
12 |     span: {span: [48, 48], text: ""}
13 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/symbols_after_comment.txt:
--------------------------------------------------------------------------------
1 | -/ comment /- null -/ more comment /--null null
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/symbols_after_quoted.tokens.yaml:
--------------------------------------------------------------------------------
 1 | - quoted:
 2 |     lexeme: 9
 3 |     span: {span: [0, 12], text: "qnull(a)null"}
 4 |     delims:
 5 |     - {span: [0, 6], text: "qnull("}
 6 |     - {span: [7, 12], text: ")null"}
 7 |     contents: [{text: {span: [6, 7], text: "a"}}]
 8 | - eof:
 9 |     lexeme: 2147483647
10 |     span: {span: [13, 13], text: ""}
11 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/ambiguous/symbols_after_quoted.txt:
--------------------------------------------------------------------------------
1 | qnull(a)null
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/digital/invalid.stderr:
--------------------------------------------------------------------------------
 1 | error: unexpected `8` in `0o`-prefixed number
 2 |  --> digital/invalid.txt:2:3
 3 |   |
 4 | 2 | 0o8
 5 |   |   ^
 6 |   | --- help: because this value is octal (base 8), digits should be within '0'..='7'
 7 |   |
 8 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
 9 | 
10 | error: unexpected `8` in `0o`-prefixed number
11 |  --> digital/invalid.txt:3:4
12 |   |
13 | 3 | 0o08
14 |   |    ^
15 |   | ---- help: because this value is octal (base 8), digits should be within '0'..='7'
16 |   |
17 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
18 | 
19 | error: unexpected `a` in number
20 |  --> digital/invalid.txt:4:5
21 |   |
22 | 4 | 0/0/aa11g
23 |   |     ^
24 |   | --------- help: because this value is decimal (base 10), digits should be within '0'..='9'
25 |   |
26 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
27 | 
28 | error: unexpected `a` in number
29 |  --> digital/invalid.txt:4:6
30 |   |
31 | 4 | 0/0/aa11g
32 |   |      ^
33 |   | --------- help: because this value is decimal (base 10), digits should be within '0'..='9'
34 |   |
35 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
36 | 
37 | error: unexpected `g` in number
38 |  --> digital/invalid.txt:4:9
39 |   |
40 | 4 | 0/0/aa11g
41 |   |         ^
42 |   | --------- help: because this value is decimal (base 10), digits should be within '0'..='9'
43 |   |
44 |   = note: reported at: ilex/src/rt/emit2.rs:593:34
45 | 
46 | error: aborting due to 5 errors
47 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/digital/invalid.txt:
--------------------------------------------------------------------------------
1 | 0o777
2 | 0o8
3 | 0o08
4 | 0/0/aa11g
5 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/digital/missing.stderr:
--------------------------------------------------------------------------------
 1 | error: expected digits after `0x`, but found ` `
 2 |  --> digital/missing.txt:2:3
 3 |   |
 4 | 2 | 0x 0xf
 5 |   |   ^ expected digits after `0x`
 6 |   | ^^ because of this prefix
 7 |   |
 8 |   = note: reported at: ilex/src/rt/emit2.rs:571:18
 9 | 
10 | error: aborting due to previous error
11 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/digital/missing.txt:
--------------------------------------------------------------------------------
1 | 0xdeadbeef
2 | 0x 0xf
3 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/digital/points.stderr:
--------------------------------------------------------------------------------
 1 | error: expected at least 2 `/`s
 2 |  --> digital/points.txt:2:7
 3 |   |
 4 | 2 | 1/2/3/4e4/5
 5 |   |       ^
 6 |   |
 7 |   = note: reported at: ilex/src/rt/emit2.rs:554:16
 8 | 
 9 | error: unrecognized character
10 |  --> digital/points.txt:2:6
11 |   |
12 | 2 | 1/2/3/4e4/5
13 |   |      ^
14 |   |
15 |   = note: reported at: ilex/src/rt/mod.rs:36:8
16 | 
17 | error: expected at least 2 `/`s
18 |  --> digital/points.txt:3:1
19 |   |
20 | 3 | 1/2e4/5
21 |   | ^^^
22 |   |
23 |   = note: reported at: ilex/src/rt/emit2.rs:554:16
24 | 
25 | error: expected at least 2 `/`s
26 |  --> digital/points.txt:4:11
27 |   |
28 | 4 | 1/2/3e4/5/6
29 |   |           ^
30 |   |
31 |   = note: reported at: ilex/src/rt/emit2.rs:554:16
32 | 
33 | error: unrecognized character
34 |  --> digital/points.txt:4:10
35 |   |
36 | 4 | 1/2/3e4/5/6
37 |   |          ^
38 |   |
39 |   = note: reported at: ilex/src/rt/mod.rs:36:8
40 | 
41 | error: expected at least 1 `/`
42 |  --> digital/points.txt:5:6
43 |   |
44 | 5 | 1/2/3e4
45 |   |      ^^
46 |   |
47 |   = note: reported at: ilex/src/rt/emit2.rs:554:16
48 | 
49 | error: aborting due to 6 errors
50 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/digital/points.txt:
--------------------------------------------------------------------------------
1 | 1/2/3e4/5
2 | 1/2/3/4e4/5
3 | 1/2e4/5
4 | 1/2/3e4/5/6
5 | 1/2/3e4


--------------------------------------------------------------------------------
/ilex/tests/ui/digital/separators.stderr:
--------------------------------------------------------------------------------
 1 | error: unexpected digit separator in `no_prefix@`-prefixed number
 2 |  --> digital/separators.txt:2:11
 3 |   |
 4 | 2 | no_prefix@_123_._456_e_789_._012_
 5 |   |           ^
 6 |   |
 7 |   = note: reported at: ilex/src/rt/emit2.rs:418:36
 8 | 
 9 | error: unexpected digit separator in `no_suffix@`-prefixed number
10 |  --> digital/separators.txt:3:33
11 |   |
12 | 3 | no_suffix@_123_._456_e_789_._012_
13 |   |                                 ^
14 |   |
15 |   = note: reported at: ilex/src/rt/emit2.rs:505:28
16 | 
17 | error: unexpected digit separator in `no_point@`-prefixed number
18 |  --> digital/separators.txt:4:15
19 |   |
20 | 4 | no_point@_123_._456_e_789_._012_
21 |   |               ^
22 |   |
23 |   = note: reported at: ilex/src/rt/emit2.rs:435:32
24 | 
25 | error: unexpected digit separator in `no_point@`-prefixed number
26 |  --> digital/separators.txt:4:16
27 |   |
28 | 4 | no_point@_123_._456_e_789_._012_
29 |   |                ^
30 |   |
31 |   = note: reported at: ilex/src/rt/emit2.rs:418:36
32 | 
33 | error: unexpected digit separator in `no_point@`-prefixed number
34 |  --> digital/separators.txt:4:27
35 |   |
36 | 4 | no_point@_123_._456_e_789_._012_
37 |   |                           ^
38 |   |
39 |   = note: reported at: ilex/src/rt/emit2.rs:435:32
40 | 
41 | error: unexpected digit separator in `no_point@`-prefixed number
42 |  --> digital/separators.txt:4:28
43 |   |
44 | 4 | no_point@_123_._456_e_789_._012_
45 |   |                            ^
46 |   |
47 |   = note: reported at: ilex/src/rt/emit2.rs:418:36
48 | 
49 | error: unexpected digit separator in `no_exp@`-prefixed number
50 |  --> digital/separators.txt:5:19
51 |   |
52 | 5 | no_exp@_123_._456_e_789_._012_
53 |   |                   ^
54 |   |
55 |   = note: reported at: ilex/src/rt/emit2.rs:455:34
56 | 
57 | error: unexpected digit separator in `no_exp@`-prefixed number
58 |  --> digital/separators.txt:5:20
59 |   |
60 | 5 | no_exp@_123_._456_e_789_._012_
61 |   |                    ^
62 |   |
63 |   = note: reported at: ilex/src/rt/emit2.rs:418:36
64 | 
65 | error: aborting due to 8 errors
66 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/digital/separators.txt:
--------------------------------------------------------------------------------
1 | all_ok@_123_._456_e_789_._012_
2 | no_prefix@_123_._456_e_789_._012_
3 | no_suffix@_123_._456_e_789_._012_
4 | no_point@_123_._456_e_789_._012_
5 | no_exp@_123_._456_e_789_._012_


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/bracket.stderr:
--------------------------------------------------------------------------------
 1 | error: expected closing `]`, but found <eof>
 2 |  --> eof/bracket.txt:1:11
 3 |   |
 4 | 1 | [[[]]] [[]
 5 |   |           ^ expected `]` here
 6 |   |        - help: previously opened here
 7 |   |
 8 |   = note: reported at: ilex/src/rt/lexer.rs:323:10
 9 | 
10 | error: aborting due to previous error
11 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/bracket.txt:
--------------------------------------------------------------------------------
1 | [[[]]] [[]
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/bracket_multiline.stderr:
--------------------------------------------------------------------------------
 1 | error: expected closing `]`, but found <eof>
 2 |  --> eof/bracket_multiline.txt:3:3
 3 |   |
 4 | 3 | ][ 
 5 |   |   ^ expected `]` here
 6 |   |  - help: previously opened here
 7 |   |
 8 |   = note: reported at: ilex/src/rt/lexer.rs:323:10
 9 | 
10 | error: aborting due to previous error
11 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/bracket_multiline.txt:
--------------------------------------------------------------------------------
1 | [
2 |   []
3 | ][


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/comment.stderr:
--------------------------------------------------------------------------------
 1 | error: expected closing `*/`, but found <eof>
 2 |  --> eof/comment.txt:1:38
 3 |   |
 4 | 1 | /* ok /* nested */ */ /* /* not ok */ 
 5 |   |                                      ^ expected `*/` here
 6 |   |                       -- help: previously opened here
 7 |   |
 8 |   = note: reported at: ilex/src/rt/emit2.rs:331:14
 9 | 
10 | error: aborting due to previous error
11 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/comment.txt:
--------------------------------------------------------------------------------
1 | /* ok /* nested */ */ /* /* not ok */


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/comment_multiline.stderr:
--------------------------------------------------------------------------------
 1 | error: expected closing `*/`, but found <eof>
 2 |  --> eof/comment_multiline.txt:4:15
 3 |   |
 4 | 3 | /*
 5 |   | -- help: previously opened here
 6 | 4 |   /* not ok */
 7 |   |               ^ expected `*/` here
 8 |   |
 9 |   = note: reported at: ilex/src/rt/emit2.rs:331:14
10 | 
11 | error: aborting due to previous error
12 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/comment_multiline.txt:
--------------------------------------------------------------------------------
1 | /* ok
2 |   /* nested */ */
3 | /*
4 |   /* not ok */
5 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/mixed_brackets.stderr:
--------------------------------------------------------------------------------
 1 | error: unexpected closing `)`
 2 |  --> eof/mixed_brackets.txt:1:8
 3 |   |
 4 | 1 | [] () [) (] [(])
 5 |   |        ^ expected to be opened by `(`
 6 |   |
 7 |   = note: reported at: ilex/src/rt/emit2.rs:254:22
 8 | 
 9 | error: expected closing `)`, but found `]`
10 |  --> eof/mixed_brackets.txt:1:11
11 |   |
12 | 1 | [] () [) (] [(])
13 |   |           ^ expected `)` here
14 |   |          - help: previously opened here
15 |   |
16 |   = note: reported at: ilex/src/rt/lexer.rs:207:23
17 | 
18 | error: expected closing `)`, but found `]`
19 |  --> eof/mixed_brackets.txt:1:15
20 |   |
21 | 1 | [] () [) (] [(])
22 |   |               ^ expected `)` here
23 |   |              - help: previously opened here
24 |   |
25 |   = note: reported at: ilex/src/rt/lexer.rs:207:23
26 | 
27 | error: expected closing `)`, but found <eof>
28 |  --> eof/mixed_brackets.txt:1:17
29 |   |
30 | 1 | [] () [) (] [(]) 
31 |   |                 ^ expected `)` here
32 |   |          - help: previously opened here
33 |   |
34 |   = note: reported at: ilex/src/rt/lexer.rs:323:10
35 | 
36 | error: aborting due to 4 errors
37 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/mixed_brackets.txt:
--------------------------------------------------------------------------------
1 | [] () [) (] [(])


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/mixed_brackets_multiline.stderr:
--------------------------------------------------------------------------------
 1 | error: expected closing `)`, but found `]`
 2 |  --> eof/mixed_brackets_multiline.txt:6:3
 3 |   |
 4 | 5 |   (
 5 |   |   - help: previously opened here
 6 | 6 |   ]
 7 |   |   ^ expected `)` here
 8 |   |
 9 |   = note: reported at: ilex/src/rt/lexer.rs:207:23
10 | 
11 | error: unexpected closing `)`
12 |  --> eof/mixed_brackets_multiline.txt:9:3
13 |   |
14 | 9 |   )
15 |   |   ^ expected to be opened by `(`
16 |   |
17 |   = note: reported at: ilex/src/rt/emit2.rs:254:22
18 | 
19 | error: expected closing `)`, but found `]`
20 |   --> eof/mixed_brackets_multiline.txt:11:1
21 |    |
22 | 10 |   (
23 |    |   - help: previously opened here
24 | 11 | ]
25 |    | ^ expected `)` here
26 |    |
27 |    = note: reported at: ilex/src/rt/lexer.rs:207:23
28 | 
29 | error: expected closing `)`, but found <eof>
30 |   --> eof/mixed_brackets_multiline.txt:11:2
31 |    |
32 | 10 |   (
33 |    |   - help: previously opened here
34 | 11 | ] 
35 |    |  ^ expected `)` here
36 |    |
37 |    = note: reported at: ilex/src/rt/lexer.rs:323:10
38 | 
39 | error: aborting due to 4 errors
40 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/mixed_brackets_multiline.txt:
--------------------------------------------------------------------------------
 1 | [
 2 |   ()
 3 | ]
 4 | [
 5 |   (
 6 |   ]
 7 | )
 8 | [
 9 |   )
10 |   (
11 | ]


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/quoted.stderr:
--------------------------------------------------------------------------------
 1 | error: expected closing `'`, but found <eof>
 2 |  --> eof/quoted.txt:1:14
 3 |   |
 4 | 1 | 'foo' '' 'bar
 5 |   |              ^ expected `'` here
 6 |   |          - help: previously opened here
 7 |   |
 8 |   = note: reported at: ilex/src/rt/emit2.rs:722:14
 9 | 
10 | error: aborting due to previous error
11 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/quoted.txt:
--------------------------------------------------------------------------------
1 | 'foo' '' 'bar
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/quoted_multiline.stderr:
--------------------------------------------------------------------------------
 1 | error: expected closing `'`, but found <eof>
 2 |  --> eof/quoted_multiline.txt:3:5
 3 |   |
 4 | 3 | 'bar
 5 |   |     ^ expected `'` here
 6 |   | - help: previously opened here
 7 |   |
 8 |   = note: reported at: ilex/src/rt/emit2.rs:722:14
 9 | 
10 | error: aborting due to previous error
11 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/eof/quoted_multiline.txt:
--------------------------------------------------------------------------------
1 | 'foo'
2 | ''
3 | 'bar
4 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/main.rs:
--------------------------------------------------------------------------------
  1 | use ilex::report::Options;
  2 | use ilex::rule::*;
  3 | use ilex::Context;
  4 | use ilex::Lexeme;
  5 | 
  6 | #[gilded::test("tests/ui/ambiguous/*.txt")]
  7 | fn ambiguous(test: &gilded::Test) {
  8 |   #[ilex::spec]
  9 |   struct Spec {
 10 |     #[rule("null")]
 11 |     kw: Lexeme<Keyword>,
 12 |     #[rule("-null")]
 13 |     kw2: Lexeme<Keyword>,
 14 |     #[rule(")null")]
 15 |     kw3: Lexeme<Keyword>,
 16 | 
 17 |     #[rule(Comment::nesting(Bracket::rust_style(
 18 |         "/",
 19 |         ("-", ""),
 20 |         ("", "-"),
 21 |       )))]
 22 |     cm: Lexeme<Comment>,
 23 |     #[rule(Comment::nesting(Bracket::cxx_style(
 24 |         Ident::new().min_len(1),
 25 |         ("--", ""),
 26 |         ("", ""),
 27 |       )))]
 28 |     cm2: Lexeme<Comment>,
 29 |     #[rule(Bracket::cxx_style(
 30 |         Ident::new(),
 31 |         ("$", "["),
 32 |         ("]", ""),
 33 |       ))]
 34 |     br: Lexeme<Bracket>,
 35 |     #[rule(Ident::new()
 36 |       .prefix("/")
 37 |       .suffixes(["", "%q", "/"]))]
 38 |     id: Lexeme<Ident>,
 39 |     #[rule(Digital::new(10)
 40 |         .prefixes(["", "%"])
 41 |         .suffixes(["", "%", "q", "/"]))]
 42 |     nm: Lexeme<Digital>,
 43 |     #[rule(Quoted::new("'")
 44 |         .prefixes(["%", "q"])
 45 |         .suffixes(["", "%", "q"]))]
 46 |     st: Lexeme<Quoted>,
 47 |     #[rule(Quoted::with(Bracket::cxx_style(
 48 |           Ident::new(),
 49 |           ("q", "("),
 50 |           (")", ""),
 51 |         )))]
 52 |     st2: Lexeme<Quoted>,
 53 |   }
 54 | 
 55 |   let ctx = Context::new();
 56 |   let report =
 57 |     ctx.new_report_with(Options { color: false, ..Default::default() });
 58 |   let file = ctx
 59 |     .new_file_from_bytes(test.path(), test.text(), &report)
 60 |     .unwrap();
 61 | 
 62 |   let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]);
 63 |   match file.lex(Spec::get().spec(), &report) {
 64 |     Ok(stream) => tokens(stream.summary()),
 65 |     Err(fatal) => stderr(fatal.to_string()),
 66 |   }
 67 | }
 68 | 
 69 | #[gilded::test("tests/ui/digital/*.txt")]
 70 | fn digital(test: &gilded::Test) {
 71 |   #[ilex::spec]
 72 |   struct Spec {
 73 |     #[rule(Digital::new(16).prefix("0x"))]
 74 |     m1: Lexeme<Digital>,
 75 |     #[rule(Digital::new(8).prefix("0o"))]
 76 |     m2: Lexeme<Digital>,
 77 | 
 78 |     #[rule( Digital::new(10)
 79 |       .point_limit(2..3)
 80 |       .point('/')
 81 |       .exponent("e", Digits::new(10).point_limit(1..2))
 82 |       .separator_with("_",
 83 |         SeparatorCornerCases {
 84 |           prefix: true,
 85 |           suffix: true,
 86 |           around_point: true,
 87 |           around_exp: true,
 88 |         }))]
 89 |     m0: Lexeme<Digital>,
 90 |     #[rule(Digital::new(10)
 91 |           .prefix("all_ok@")
 92 |           .point_limit(0..3)
 93 |           .exponent("e", Digits::new(10).point_limit(0..3))
 94 |           .separator_with("_",
 95 |             SeparatorCornerCases {
 96 |               prefix: true,
 97 |               suffix: true,
 98 |               around_point: true,
 99 |               around_exp: true,
100 |             }))]
101 |     n0: Lexeme<Digital>,
102 |     #[rule( Digital::new(10)
103 |         .prefix("no_prefix@")
104 |         .point_limit(0..3)
105 |         .exponent("e", Digits::new(10).point_limit(0..3))
106 |         .separator_with("_",
107 |           SeparatorCornerCases {
108 |             prefix: false,
109 |             suffix: true,
110 |             around_point: true,
111 |             around_exp: true,
112 |           }))]
113 |     n1: Lexeme<Digital>,
114 |     #[rule(Digital::new(10)
115 |         .prefix("no_suffix@")
116 |         .point_limit(0..3)
117 |         .exponent("e", Digits::new(10).point_limit(0..3))
118 |         .separator_with("_",
119 |           SeparatorCornerCases {
120 |             prefix: true,
121 |             suffix: false,
122 |             around_point: true,
123 |             around_exp: true,
124 |           }))]
125 |     n2: Lexeme<Digital>,
126 |     #[rule( Digital::new(10)
127 |         .prefix("no_point@")
128 |         .point_limit(0..3)
129 |         .exponent("e", Digits::new(10).point_limit(0..3))
130 |         .separator_with("_",
131 |           SeparatorCornerCases {
132 |             prefix: true,
133 |             suffix: true,
134 |             around_point: false,
135 |             around_exp: true,
136 |           }))]
137 |     n3: Lexeme<Digital>,
138 |     #[rule(Digital::new(10)
139 |         .prefix("no_exp@")
140 |         .point_limit(0..3)
141 |         .exponent("e", Digits::new(10).point_limit(0..3))
142 |         .separator_with("_",
143 |           SeparatorCornerCases {
144 |             prefix: true,
145 |             suffix: true,
146 |             around_point: true,
147 |             around_exp: false,
148 |           }))]
149 |     n4: Lexeme<Digital>,
150 |   }
151 | 
152 |   let ctx = Context::new();
153 |   let report =
154 |     ctx.new_report_with(Options { color: false, ..Default::default() });
155 |   let file = ctx
156 |     .new_file_from_bytes(test.path(), test.text(), &report)
157 |     .unwrap();
158 | 
159 |   let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]);
160 |   match file.lex(Spec::get().spec(), &report) {
161 |     Ok(stream) => tokens(stream.summary()),
162 |     Err(fatal) => stderr(fatal.to_string()),
163 |   }
164 | }
165 | 
166 | #[gilded::test("tests/ui/eof/*.txt")]
167 | fn eof(test: &gilded::Test) {
168 |   #[ilex::spec]
169 |   struct Spec {
170 |     #[rule("/*", "*/")]
171 |     c1: Lexeme<Comment>,
172 | 
173 |     #[rule("[", "]")]
174 |     b1: Lexeme<Bracket>,
175 | 
176 |     #[rule("(", ")")]
177 |     b2: Lexeme<Bracket>,
178 | 
179 |     #[rule(Quoted::new("'"))]
180 |     q1: Lexeme<Quoted>,
181 |   }
182 | 
183 |   let ctx = Context::new();
184 |   let report =
185 |     ctx.new_report_with(Options { color: false, ..Default::default() });
186 |   let file = ctx
187 |     .new_file_from_bytes(test.path(), test.text(), &report)
188 |     .unwrap();
189 | 
190 |   let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]);
191 |   match file.lex(Spec::get().spec(), &report) {
192 |     Ok(stream) => tokens(stream.summary()),
193 |     Err(fatal) => stderr(fatal.to_string()),
194 |   }
195 | }
196 | 
197 | #[gilded::test("tests/ui/too_small/*.txt")]
198 | fn too_small(test: &gilded::Test) {
199 |   #[ilex::spec]
200 |   struct Spec {
201 |     #[rule(Ident::new().prefix("%"))]
202 |     i1: Lexeme<Ident>,
203 |     #[rule(Ident::new().prefix("$").min_len(3))]
204 |     i2: Lexeme<Ident>,
205 | 
206 |     #[rule(Bracket::rust_style("#", ("r#", "'"), ("'#", "")))]
207 |     r1: Lexeme<Quoted>,
208 |     #[rule(Bracket::rust_style("#", ("q###", "'"), ("'###", "")))]
209 |     r2: Lexeme<Quoted>,
210 | 
211 |     #[rule(Bracket::cxx_style(Ident::new().min_len(1), ("R'", "("), (")", "'")))]
212 |     c1: Lexeme<Quoted>,
213 |     #[rule(Bracket::cxx_style(Ident::new().min_len(3), ("Q'", "("), (")", "'")))]
214 |     c2: Lexeme<Quoted>,
215 |   }
216 | 
217 |   let ctx = Context::new();
218 |   let report =
219 |     ctx.new_report_with(Options { color: false, ..Default::default() });
220 |   let file = ctx
221 |     .new_file_from_bytes(test.path(), test.text(), &report)
222 |     .unwrap();
223 | 
224 |   let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]);
225 |   match file.lex(Spec::get().spec(), &report) {
226 |     Ok(stream) => tokens(stream.summary()),
227 |     Err(fatal) => stderr(fatal.to_string()),
228 |   }
229 | }
230 | 
231 | #[gilded::test("tests/ui/unrecognized/*.txt")]
232 | fn unrecognized(test: &gilded::Test) {
233 |   #[ilex::spec]
234 |   struct Spec {
235 |     null: Lexeme<Keyword>,
236 | 
237 |     #[rule("[", "]")]
238 |     cm: Lexeme<Bracket>,
239 |   }
240 | 
241 |   let ctx = Context::new();
242 |   let report =
243 |     ctx.new_report_with(Options { color: false, ..Default::default() });
244 |   let file = ctx
245 |     .new_file_from_bytes(test.path(), test.text(), &report)
246 |     .unwrap();
247 | 
248 |   let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]);
249 |   match file.lex(Spec::get().spec(), &report) {
250 |     Ok(stream) => tokens(stream.summary()),
251 |     Err(fatal) => stderr(fatal.to_string()),
252 |   }
253 | }
254 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/too_small/cxx_tag.stderr:
--------------------------------------------------------------------------------
 1 | error: expected at least 1 character in identifier, but found none
 2 |  --> too_small/cxx_tag.txt:1:14
 3 |   |
 4 | 1 | R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo'
 5 |   |              ^ expected at least 1 here
 6 |   |
 7 |   = help: this appears to be an empty identifier
 8 |   = note: reported at: ilex/src/rt/emit2.rs:223:14
 9 | 
10 | error: expected at least 3 characters in identifier, but found only 2
11 |  --> too_small/cxx_tag.txt:1:38
12 |   |
13 | 1 | R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo'
14 |   |                                      ^^ expected at least 3 here
15 |   |
16 |   = note: reported at: ilex/src/rt/emit2.rs:223:14
17 | 
18 | error: aborting due to 2 errors
19 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/too_small/cxx_tag.txt:
--------------------------------------------------------------------------------
1 | R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo'
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/too_small/ident.stderr:
--------------------------------------------------------------------------------
 1 | error: expected at least 3 characters in identifier, but found only 2
 2 |  --> too_small/ident.txt:1:13
 3 |   |
 4 | 1 | %foo $bar % $oo
 5 |   |             ^^^ expected at least 3 here
 6 |   |
 7 |   = note: reported at: ilex/src/rt/emit2.rs:346:28
 8 | 
 9 | error: aborting due to previous error
10 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/too_small/ident.txt:
--------------------------------------------------------------------------------
1 | %foo $bar % $oo
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/too_small/rust_hashes.stderr:
--------------------------------------------------------------------------------
 1 | error: unrecognized characters
 2 |  --> too_small/rust_hashes.txt:1:10
 3 |   |
 4 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'##
 5 |   |          ^^^^^^
 6 |   |
 7 |   = note: reported at: ilex/src/rt/mod.rs:36:8
 8 | 
 9 | error: unexpected closing `'##`
10 |  --> too_small/rust_hashes.txt:1:37
11 |   |
12 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'##
13 |   |                                     ^^^ expected to be opened by `r##'`
14 |   |
15 |   = note: reported at: ilex/src/rt/emit2.rs:254:22
16 | 
17 | error: unrecognized characters
18 |  --> too_small/rust_hashes.txt:1:30
19 |   |
20 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'##
21 |   |                              ^^^^^^^
22 |   |
23 |   = note: reported at: ilex/src/rt/mod.rs:36:8
24 | 
25 | error: aborting due to 3 errors
26 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/too_small/rust_hashes.txt:
--------------------------------------------------------------------------------
1 | r#'foo'# r'foo' q###'bar'### q##'bar'##
2 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/unrecognized/unrecognized.stderr:
--------------------------------------------------------------------------------
 1 | error: unrecognized characters
 2 |  --> unrecognized.txt:1:1
 3 |   |
 4 | 1 | multiple, null, [unrecognized], chunks!~  
 5 |   | ^^^^^^^^^
 6 |   |
 7 |   = note: reported at: ilex/src/rt/mod.rs:36:8
 8 | 
 9 | error: unrecognized character
10 |  --> unrecognized.txt:1:15
11 |   |
12 | 1 | multiple, null, [unrecognized], chunks!~  
13 |   |               ^
14 |   |
15 |   = note: reported at: ilex/src/rt/mod.rs:36:8
16 | 
17 | error: unrecognized characters
18 |  --> unrecognized.txt:1:18
19 |   |
20 | 1 | multiple, null, [unrecognized], chunks!~  
21 |   |                  ^^^^^^^^^^^^
22 |   |
23 |   = note: reported at: ilex/src/rt/mod.rs:36:8
24 | 
25 | error: unrecognized character
26 |  --> unrecognized.txt:1:31
27 |   |
28 | 1 | multiple, null, [unrecognized], chunks!~  
29 |   |                               ^
30 |   |
31 |   = note: reported at: ilex/src/rt/mod.rs:36:8
32 | 
33 | error: unrecognized characters
34 |  --> unrecognized.txt:1:33
35 |   |
36 | 1 | multiple, null, [unrecognized], chunks!~  
37 |   |                                 ^^^^^^^^
38 |   |
39 |   = note: reported at: ilex/src/rt/mod.rs:36:8
40 | 
41 | error: aborting due to 5 errors
42 | 


--------------------------------------------------------------------------------
/ilex/tests/ui/unrecognized/unrecognized.txt:
--------------------------------------------------------------------------------
1 | multiple, null, [unrecognized], chunks!~  
2 | 


--------------------------------------------------------------------------------
/proc2decl/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "proc2decl"
 3 | version = "0.1.0"
 4 | description = "Write less frickin' proc macro code"
 5 | 
 6 | edition.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | keywords.workspace = true
11 | license.workspace = true
12 | 
13 | [dependencies]
14 | nu-glob = "0.101.0"
15 | unicode-xid = "0.2.6"
16 | walkdir = "2.5.0"
17 | 


--------------------------------------------------------------------------------
/proc2decl/README.md:
--------------------------------------------------------------------------------
 1 | # proc2decl
 2 | 
 3 | `proc2decl` exists for one reason only: because proc macros are a toxic
 4 | ecosystem.
 5 | 
 6 | Sometimes, you want to use an attribute to define a macro. Unfortunately,
 7 | Rust does not support declarative macros (also called macros-by-example)
 8 | for attributes, for reasons that essentially boil down to cookie-licking.
 9 | 
10 | This crate exists for one purpose only, and that is ot facilitate writing
11 | declarative macros that an attribute converts into.
12 | 
13 | ## How Uo Use
14 | 
15 | 1. Define the macro-by-example you wish to use as the main implementation of
16 |    your attribute or derive.
17 | 
18 | 2. Crate a proc-macro crate. This is where the documentation for your
19 |    attribute will need to live. Your actual crate should depend on this
20 |    crate.
21 | 
22 | 3. Use `bridge!()` to define your bridge proc macros. These
23 |    macro calls should be documented, since their doc comments are the ones
24 |    that will appear in rustdoc for your macros.
25 | 
26 | 4. `pub use` the macros in your actual crate.
27 | 
28 | Proc macros suck!
29 | 


--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "1.83.0"
3 | profile = "default"
4 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 80
2 | tab_spaces = 2
3 | 
4 | struct_lit_width = 40
5 | struct_variant_width = 40
6 | chain_width = 60
7 | use_small_heuristics = "Max"


--------------------------------------------------------------------------------
/twie/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "twie"
 3 | version = "0.5.0"
 4 | description = "fast and compact prefix tries"
 5 | 
 6 | edition.workspace = true
 7 | authors.workspace = true
 8 | homepage.workspace = true
 9 | repository.workspace = true
10 | keywords.workspace = true
11 | license.workspace = true
12 | 
13 | [dependencies]
14 | buf-trait = { version = "0.4", path = "../buf-trait" }
15 | byteyarn = { version = "0.5", path = "../byteyarn" }
16 | 
17 | boxy = "0.1.0"
18 | 


--------------------------------------------------------------------------------
/twie/README.md:
--------------------------------------------------------------------------------
 1 | # twie
 2 | 
 3 | `twie` \[twaɪ\] - Fast, compressed prefix tries.
 4 | 
 5 | This crate provides a `Trie` type that implements an associative container
 6 | with slice-like keys. It has the following properties.
 7 | 
 8 | - Most one-shot operations are worst-case O(n), where n is the length of
 9 |   the key in bytes. This may require at most 2n tree hops, but the internal
10 |   representation tries to minimize this where possible.
11 | 
12 | - Finding all prefixes of a string that are in the trie is also O(n). These
13 |   prefixes are provided in order.
14 | 
15 | - Building a trie out of, e.g., an iterator is quadratic.
16 | 
17 | - Subtries of the whole trie (i.e. all entries with some particular prefix)
18 |   can be operated on like regular tries (insertion is only supported from
19 |   the root, unfortunately).
20 | 
21 | - Memory for storing keys is shared.
22 | 
23 | - The trie's internal indexing type is configurable, which allows trading
24 |   off maximum key size for shrinking the size of tree nodes, and thus,
25 |   memory usage.
26 | 
27 | ```rust
28 | let words = Trie::<str, i32>::from([
29 |   ("poise", 0),
30 |   ("poison", 1),
31 |   ("poisonous", 2),
32 |   ("poison #9", 3),
33 | ]);
34 | 
35 | assert_eq!(
36 |   words.prefixes("poisonous snake").map(|(k, _)| k).collect::<Vec<_>>(),
37 |   ["poison", "poisonous"],
38 | )
39 | ```
40 | 


--------------------------------------------------------------------------------
/twie/src/impls.rs:
--------------------------------------------------------------------------------
  1 | /**** TRIE ****/
  2 | 
  3 | use std::fmt;
  4 | 
  5 | use buf_trait::Buf;
  6 | 
  7 | use crate::Index;
  8 | use crate::Iter;
  9 | use crate::IterMut;
 10 | use crate::Sub;
 11 | use crate::SubMut;
 12 | use crate::Trie;
 13 | 
 14 | impl<K, V, I> Clone for Trie<K, V, I>
 15 | where
 16 |   K: Buf + ?Sized,
 17 |   V: Clone,
 18 |   I: Index,
 19 | {
 20 |   fn clone(&self) -> Self {
 21 |     Self { raw: self.raw.clone() }
 22 |   }
 23 | }
 24 | 
 25 | impl<K, V, I> Default for Trie<K, V, I>
 26 | where
 27 |   K: Buf + ?Sized,
 28 |   I: Index,
 29 | {
 30 |   fn default() -> Self {
 31 |     Self::new()
 32 |   }
 33 | }
 34 | 
 35 | impl<K, V, I> fmt::Debug for Trie<K, V, I>
 36 | where
 37 |   K: Buf + ?Sized + fmt::Debug,
 38 |   V: fmt::Debug,
 39 |   I: Index,
 40 | {
 41 |   fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 42 |     fmt::Debug::fmt(&self.as_ref(), f)
 43 |   }
 44 | }
 45 | 
 46 | impl<'a, K, V, I> IntoIterator for &'a Trie<K, V, I>
 47 | where
 48 |   K: Buf + ?Sized,
 49 |   I: Index,
 50 | {
 51 |   type Item = (&'a K, &'a V);
 52 |   type IntoIter = Iter<'a, K, V, I>;
 53 | 
 54 |   fn into_iter(self) -> Self::IntoIter {
 55 |     self.iter()
 56 |   }
 57 | }
 58 | 
 59 | impl<'a, K, V, I> IntoIterator for &'a mut Trie<K, V, I>
 60 | where
 61 |   K: Buf + ?Sized,
 62 |   I: Index,
 63 | {
 64 |   type Item = (&'a K, &'a mut V);
 65 |   type IntoIter = IterMut<'a, K, V, I>;
 66 | 
 67 |   fn into_iter(self) -> Self::IntoIter {
 68 |     self.iter_mut()
 69 |   }
 70 | }
 71 | 
 72 | impl<'a, K, V, I> FromIterator<(&'a K, V)> for Trie<K, V, I>
 73 | where
 74 |   K: Buf + ?Sized,
 75 |   I: Index,
 76 | {
 77 |   fn from_iter<Iter>(iter: Iter) -> Self
 78 |   where
 79 |     Iter: IntoIterator<Item = (&'a K, V)>,
 80 |   {
 81 |     let mut trie = Trie::new();
 82 |     for (k, v) in iter {
 83 |       trie.insert(k, v);
 84 |     }
 85 |     trie
 86 |   }
 87 | }
 88 | 
 89 | impl<K, V, I, const N: usize> From<[(&K, V); N]> for Trie<K, V, I>
 90 | where
 91 |   K: Buf + ?Sized,
 92 |   I: Index,
 93 | {
 94 |   fn from(value: [(&K, V); N]) -> Self {
 95 |     value.into_iter().collect()
 96 |   }
 97 | }
 98 | 
 99 | impl<K, V, I, const N: usize> From<[&K; N]> for Trie<K, V, I>
100 | where
101 |   K: Buf + ?Sized,
102 |   V: Default,
103 |   I: Index,
104 | {
105 |   fn from(value: [&K; N]) -> Self {
106 |     value.into_iter().map(|k| (k, V::default())).collect()
107 |   }
108 | }
109 | 
110 | /**** SUB ****/
111 | 
112 | impl<'a, K, V, I> From<&'a Trie<K, V, I>> for Sub<'a, K, V, I>
113 | where
114 |   K: Buf + ?Sized,
115 |   I: Index,
116 | {
117 |   fn from(value: &'a Trie<K, V, I>) -> Self {
118 |     value.as_ref()
119 |   }
120 | }
121 | 
122 | impl<'a, K, V, I> From<SubMut<'a, K, V, I>> for Sub<'a, K, V, I>
123 | where
124 |   K: Buf + ?Sized,
125 |   I: Index,
126 | {
127 |   fn from(value: SubMut<'a, K, V, I>) -> Self {
128 |     Sub { raw: value.raw, node: value.node }
129 |   }
130 | }
131 | 
132 | impl<'a, K, V, I> From<&'a SubMut<'_, K, V, I>> for Sub<'a, K, V, I>
133 | where
134 |   K: Buf + ?Sized,
135 |   I: Index,
136 | {
137 |   fn from(value: &'a SubMut<'_, K, V, I>) -> Self {
138 |     value.as_ref()
139 |   }
140 | }
141 | 
142 | impl<K, V, I> Clone for Sub<'_, K, V, I>
143 | where
144 |   K: Buf + ?Sized,
145 |   I: Index,
146 | {
147 |   fn clone(&self) -> Self {
148 |     *self
149 |   }
150 | }
151 | 
152 | impl<K, V, I> Copy for Sub<'_, K, V, I>
153 | where
154 |   K: Buf + ?Sized,
155 |   I: Index,
156 | {
157 | }
158 | 
159 | impl<K, V, I> fmt::Debug for Sub<'_, K, V, I>
160 | where
161 |   K: Buf + ?Sized + fmt::Debug,
162 |   V: fmt::Debug,
163 |   I: Index,
164 | {
165 |   fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
166 |     f.debug_map().entries(self.iter()).finish()
167 |   }
168 | }
169 | 
170 | impl<'a, K, V, I> IntoIterator for &'a Sub<'_, K, V, I>
171 | where
172 |   K: Buf + ?Sized,
173 |   I: Index,
174 | {
175 |   type Item = (&'a K, &'a V);
176 |   type IntoIter = Iter<'a, K, V, I>;
177 | 
178 |   fn into_iter(self) -> Self::IntoIter {
179 |     self.iter()
180 |   }
181 | }
182 | 
183 | impl<'a, K, V, I> IntoIterator for Sub<'a, K, V, I>
184 | where
185 |   K: Buf + ?Sized,
186 |   I: Index,
187 | {
188 |   type Item = (&'a K, &'a V);
189 |   type IntoIter = Iter<'a, K, V, I>;
190 | 
191 |   fn into_iter(self) -> Self::IntoIter {
192 |     self.iter()
193 |   }
194 | }
195 | 
196 | /**** SUB MUT ****/
197 | 
198 | impl<'a, K, V, I> From<&'a mut Trie<K, V, I>> for SubMut<'a, K, V, I>
199 | where
200 |   K: Buf + ?Sized,
201 |   I: Index,
202 | {
203 |   fn from(value: &'a mut Trie<K, V, I>) -> Self {
204 |     value.as_mut()
205 |   }
206 | }
207 | 
208 | impl<'a, K, V, I> From<&'a mut SubMut<'_, K, V, I>> for SubMut<'a, K, V, I>
209 | where
210 |   K: Buf + ?Sized,
211 |   I: Index,
212 | {
213 |   fn from(value: &'a mut SubMut<'_, K, V, I>) -> Self {
214 |     value.as_mut()
215 |   }
216 | }
217 | 
218 | impl<K, V, I> fmt::Debug for SubMut<'_, K, V, I>
219 | where
220 |   K: Buf + ?Sized + fmt::Debug,
221 |   V: fmt::Debug,
222 |   I: Index,
223 | {
224 |   fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
225 |     fmt::Debug::fmt(&self.as_ref(), f)
226 |   }
227 | }
228 | 
229 | impl<'a, K, V, I> IntoIterator for &'a SubMut<'_, K, V, I>
230 | where
231 |   K: Buf + ?Sized,
232 |   I: Index,
233 | {
234 |   type Item = (&'a K, &'a V);
235 |   type IntoIter = Iter<'a, K, V, I>;
236 | 
237 |   fn into_iter(self) -> Self::IntoIter {
238 |     self.as_ref().iter()
239 |   }
240 | }
241 | 
242 | impl<'a, K, V, I> IntoIterator for &'a mut SubMut<'_, K, V, I>
243 | where
244 |   K: Buf + ?Sized,
245 |   I: Index,
246 | {
247 |   type Item = (&'a K, &'a mut V);
248 |   type IntoIter = IterMut<'a, K, V, I>;
249 | 
250 |   fn into_iter(self) -> Self::IntoIter {
251 |     self.as_mut().iter_mut()
252 |   }
253 | }
254 | 
255 | impl<'a, K, V, I> IntoIterator for SubMut<'a, K, V, I>
256 | where
257 |   K: Buf + ?Sized,
258 |   I: Index,
259 | {
260 |   type Item = (&'a K, &'a mut V);
261 |   type IntoIter = IterMut<'a, K, V, I>;
262 | 
263 |   fn into_iter(self) -> Self::IntoIter {
264 |     self.iter_mut()
265 |   }
266 | }
267 | 


--------------------------------------------------------------------------------
/twie/src/poison_trie.txt:
--------------------------------------------------------------------------------
 1 |   ┬╴[0]: ""
 2 |   │  ptrs: --|-/-/-/1|--|--
 3 |   │  
 4 |   └┬╴[1]: "?"
 5 |    │  hi: 0x7 'p'..='\u{7f}'
 6 |    │  ptrs: 2/-/-/-|--|--|--
 7 |    │  
 8 |    └┬╴[2]: "p"
 9 |     │  lo: 0x0 'p'
10 |     │  ptrs: --|-/-/3/-|--|--
11 |     │  
12 |     └┬╴[3]: "p?"
13 |      │  hi: 0x6 '`'..='o'
14 |      │  ptrs: --|--|--|-/-/-/4
15 |      │  
16 |      └┬╴[4]: "po"
17 |       │  lo: 0xf 'o'
18 |       │  ptrs: --|-/-/5/-|--|--
19 |       │  
20 |       └┬╴[5]: "po?"
21 |        │  hi: 0x6 '`'..='o'
22 |        │  ptrs: --|--|-/6/-/-|--
23 |        │  
24 |        └┬╴[6]: "poi"
25 |         │  lo: 0x9 'i'
26 |         │  ptrs: --|-/-/-/7|--|--
27 |         │  
28 |         └┬╴[7]: "poi?"
29 |          │  hi: 0x7 'p'..='\u{7f}'
30 |          │  ptrs: -/-/-/8|--|--|--
31 |          │  
32 |          └┬╴[8]: "pois"
33 |           │  lo: 0x3 's'
34 |           │  ptrs: --|-/-/9/-|--|--
35 |           │  
36 |           └┬╴[9]: "pois?"
37 |            │  hi: 0x6 '`'..='o'
38 |            │  ptrs: --|-/10/-/-|--|-/-/-/12
39 |            │  
40 |            ├─╴[10]: "poise"
41 |            │  lo: 0x5 'e'
42 |            │  ptrs: --|--|--|--
43 |            │  [0]: 0x55b3382cfc10 "poise" -> 0x55b3382cfc88 0
44 |            │  
45 |            └┬╴[12]: "poiso"
46 |             │  lo: 0xf 'o'
47 |             │  ptrs: --|-/-/11/-|--|--
48 |             │  
49 |             └┬╴[11]: "poiso?"
50 |              │  hi: 0x6 '`'..='o'
51 |              │  ptrs: --|--|--|-/-/14/-
52 |              │  
53 |              └┬╴[14]: "poison"
54 |               │  lo: 0xe 'n'
55 |               │  ptrs: -/-/15/-|-/-/13/-|--|--
56 |               │  [1]: 0x55b3382d0570 "poison" -> 0x55b3382cfc94 1
57 |               │  
58 |               ├┬╴[15]: "poison?"
59 |               ││  hi: 0x2 ' '..='/'
60 |               ││  ptrs: 18/-/-/-|--|--|--
61 |               ││  
62 |               │└─╴[18]: "poison "
63 |               │   lo: 0x0 ' '
64 |               │   ptrs: --|--|--|--
65 |               │   [3]: 0x55b3382cfae0 "poison #9" -> 0x55b3382cfcac 3
66 |               │   
67 |               └┬╴[13]: "poison?"
68 |                │  hi: 0x6 '`'..='o'
69 |                │  ptrs: --|--|--|-/-/-/16
70 |                │  
71 |                └─╴[16]: "poisono"
72 |                   lo: 0xf 'o'
73 |                   ptrs: --|--|--|--
74 |                   [2]: 0x55b3382d0570 "poisonous" -> 0x55b3382cfca0 2
75 |  


--------------------------------------------------------------------------------
/twie/src/raw/dump.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt;
  2 | use std::fmt::Write;
  3 | 
  4 | use buf_trait::Buf;
  5 | 
  6 | use crate::raw::entries::Entries;
  7 | use crate::raw::nodes::Node;
  8 | use crate::raw::nodes::Nodes;
  9 | use crate::raw::RawTrie;
 10 | use crate::DebugBytes;
 11 | use crate::Index;
 12 | 
 13 | /// Dumps the contents of this trie to a string in an unspecified format.
 14 | pub fn dump<K: Buf + ?Sized, V: fmt::Debug, I: Index>(
 15 |   trie: &RawTrie<K, V, I>,
 16 |   root: Node<I>,
 17 | ) -> String {
 18 |   if root.is_empty() {
 19 |     return "</>".to_string();
 20 |   }
 21 | 
 22 |   let mut out = String::new();
 23 |   let _ignored =
 24 |     dump0(&mut out, &trie.nodes, &trie.data, root, None, &mut Vec::new());
 25 | 
 26 |   out.truncate(out.trim_end().len());
 27 |   out
 28 | }
 29 | 
 30 | fn dump0<V: fmt::Debug, I: Index>(
 31 |   out: &mut String,
 32 |   nodes: &Nodes<I>,
 33 |   entries: &Entries<V, I>,
 34 |   node: Node<I>,
 35 |   hi: Option<u8>,
 36 |   bus: &mut Vec<bool>,
 37 | ) -> Result<(), fmt::Error> {
 38 |   use boxy::Char;
 39 | 
 40 |   let is_hi = hi.is_none();
 41 |   let array = match hi {
 42 |     Some(hi) => nodes.lo(node.ptr, hi),
 43 |     None => nodes.hi(node.ptr),
 44 |   };
 45 | 
 46 |   let has = array.iter().any(|&x| !x.is_empty());
 47 | 
 48 |   // All this crap is just so we get a pretty tree.
 49 |   let last = bus
 50 |     .iter()
 51 |     .enumerate()
 52 |     .filter_map(|(i, x)| x.then_some(i + 1))
 53 |     .last()
 54 |     .unwrap_or(0);
 55 |   for (i, &flag) in bus.iter().enumerate().take(last) {
 56 |     if flag {
 57 |       if i == bus.len() - 1 {
 58 |         write!(out, "{}", Char::right_tee(boxy::Weight::Normal))?;
 59 |       } else {
 60 |         write!(out, "{}", Char::vertical(boxy::Weight::Normal))?;
 61 |       }
 62 |     } else {
 63 |       write!(out, "{}", Char::empty())?;
 64 |     }
 65 |   }
 66 |   for i in last..bus.len() {
 67 |     if i == bus.len() - 1 {
 68 |       write!(out, "{}", Char::lower_left(boxy::Weight::Normal))?;
 69 |     } else {
 70 |       write!(out, "{}", Char::empty())?;
 71 |     }
 72 |   }
 73 |   if has {
 74 |     write!(out, "{}", Char::down_tee(boxy::Weight::Normal))?;
 75 |   } else {
 76 |     write!(out, "{}", Char::horizontal(boxy::Weight::Normal))?;
 77 |   }
 78 |   write!(out, "{}", Char::left_half(boxy::Weight::Normal))?;
 79 | 
 80 |   let new_line = |out: &mut String| {
 81 |     writeln!(out)?;
 82 |     for &flag in bus.iter().take(last) {
 83 |       if flag {
 84 |         write!(out, "{}", Char::vertical(boxy::Weight::Normal))?;
 85 |       } else {
 86 |         write!(out, "{}", Char::empty())?;
 87 |       }
 88 |     }
 89 |     for _ in last..bus.len() {
 90 |       write!(out, "{}", Char::empty())?;
 91 |     }
 92 |     if has {
 93 |       write!(out, "{}", Char::vertical(boxy::Weight::Normal))?;
 94 |     }
 95 | 
 96 |     write!(out, "  ")
 97 |   };
 98 | 
 99 |   write!(out, "[{}]", node.ptr.idx() * 2 + (!is_hi as usize),)?;
100 | 
101 |   if is_hi {
102 |     let key = nodes.key(node, Some(usize::MAX));
103 |     write!(
104 |       out,
105 |       ": {:?}/{:?}",
106 |       DebugBytes(&key[..node.depth]),
107 |       DebugBytes(&key[node.depth..]),
108 |     )?;
109 |   }
110 | 
111 |   if let Some(hi) = hi {
112 |     new_line(out)?;
113 |     let start = hi << 4;
114 |     let end = start + 0xf;
115 |     let _ = write!(out, "hi: 0x{hi:x} {:?}..={:?}", start as char, end as char);
116 |   } else if let Some(&ch) = nodes.key(node, None).last() {
117 |     new_line(out)?;
118 |     let lo = ch & 0xf;
119 |     write!(out, "lo: 0x{lo:x} {:?}", ch as char)?;
120 |   }
121 | 
122 |   new_line(out)?;
123 |   write!(out, "ptrs: ")?;
124 |   for (i, chunk) in array.chunks(4).enumerate() {
125 |     if i != 0 {
126 |       write!(out, "|")?;
127 |     }
128 | 
129 |     if chunk.iter().all(|n| n.is_empty()) {
130 |       write!(out, "--")?;
131 |       continue;
132 |     }
133 | 
134 |     for (i, &ptr) in chunk.iter().enumerate() {
135 |       if i != 0 {
136 |         write!(out, "/")?;
137 |       }
138 | 
139 |       match usize::try_from(ptr) {
140 |         Ok(idx) => {
141 |           write!(out, "{}", if !is_hi { idx * 2 } else { idx * 2 + 1 })
142 |         }
143 |         Err(_) => write!(out, "-"),
144 |       }?;
145 |     }
146 |   }
147 | 
148 |   if is_hi {
149 |     if let Some(sparse) = nodes.get(node) {
150 |       if let Some((len, v)) = entries.get(sparse) {
151 |         let k = nodes.key(node, Some(len));
152 |         new_line(out)?;
153 |         write!(out, "[{sparse}]: {k:p} {:?} -> {v:p} {v:?}", DebugBytes(k),)?;
154 |       }
155 |     }
156 |   }
157 | 
158 |   new_line(out)?;
159 |   writeln!(out)?;
160 | 
161 |   let last = array
162 |     .iter()
163 |     .enumerate()
164 |     .filter_map(|(i, &node)| (!node.is_empty()).then_some(i))
165 |     .last()
166 |     .unwrap_or(0);
167 | 
168 |   bus.push(true);
169 |   for (i, &n) in array.iter().enumerate() {
170 |     if n.is_empty() {
171 |       continue;
172 |     }
173 | 
174 |     if i == last {
175 |       bus.pop();
176 |       bus.push(false);
177 |     }
178 | 
179 |     let (node, hi) = match hi {
180 |       None => (node, Some(i as u8)),
181 |       Some(hi) => {
182 |         let byte = hi << 4 | (i as u8);
183 |         (nodes.walk(node, &[byte]).0, None)
184 |       }
185 |     };
186 | 
187 |     dump0(out, nodes, entries, node, hi, bus)?;
188 |   }
189 |   bus.pop();
190 | 
191 |   Ok(())
192 | }
193 | 


--------------------------------------------------------------------------------
/twie/src/raw/entries.rs:
--------------------------------------------------------------------------------
  1 | use std::mem;
  2 | use std::mem::ManuallyDrop;
  3 | use std::mem::MaybeUninit;
  4 | use std::ptr::NonNull;
  5 | 
  6 | use crate::raw::nodes::Index;
  7 | use crate::raw::nodes::OutOfIndices;
  8 | use crate::raw::nodes::Ptr;
  9 | 
 10 | /// The actual user-provided data stored by the trie, separate from the tree
 11 | /// structure.
 12 | pub struct Entries<V, I: Index> {
 13 |   values: StealthVec<Entry<V, I>>,
 14 | }
 15 | 
 16 | // SAFETY: Although there are some sketchy functions that go & -> &mut, these
 17 | // MUST NOT be called except through a &Entries that was derived from an &mut
 18 | // Entries. They exist only so that iterators can vend multiple distinct
 19 | // elements without making MIRI lose its mind.
 20 | unsafe impl<V, I: Index> Send for Entries<V, I> {}
 21 | unsafe impl<V, I: Index> Sync for Entries<V, I> {}
 22 | 
 23 | pub struct Entry<V, I: Index> {
 24 |   /// The length of the key, which may be longer than this entry's depth in the
 25 |   /// trie. This entry is empty if `key_len` is Ptr::EMPTY.
 26 |   key_len: Ptr<I>,
 27 | 
 28 |   /// The value itself.
 29 |   value: MaybeUninit<V>,
 30 | }
 31 | 
 32 | impl<V, I: Index> Drop for Entry<V, I> {
 33 |   fn drop(&mut self) {
 34 |     if !self.key_len.is_empty() {
 35 |       unsafe {
 36 |         self.value.assume_init_drop();
 37 |       }
 38 |     }
 39 |   }
 40 | }
 41 | 
 42 | /// A deconstructed `Vec<T>` that ensures we only ever manipulate the data for
 43 | /// the entries vector through raw pointers, except when creating new entries.
 44 | ///
 45 | /// This is necessary to allow disjoint borrows of its elements without tripping
 46 | /// MIRI.
 47 | struct StealthVec<T> {
 48 |   ptr: *mut T,
 49 |   cap: usize,
 50 |   len: usize,
 51 | }
 52 | 
 53 | impl<T> StealthVec<T> {
 54 |   fn new() -> Self {
 55 |     Self {
 56 |       ptr: NonNull::dangling().as_ptr(),
 57 |       cap: 0,
 58 |       len: 0,
 59 |     }
 60 |   }
 61 | 
 62 |   fn get(&self, entry: usize) -> &T {
 63 |     debug_assert!(
 64 |       entry < self.len,
 65 |       "trie: entry index {entry} out of bounds {}; this is a bug",
 66 |       self.len
 67 |     );
 68 | 
 69 |     unsafe { &*self.ptr.add(entry) }
 70 |   }
 71 | 
 72 |   // SAFETY: This is has no aliasing guardrails!
 73 |   //
 74 |   // Also, calling with_entries_vec() invalidates any references created by this
 75 |   // function.
 76 |   #[allow(clippy::mut_from_ref)]
 77 |   unsafe fn get_mut_may_alias(&self, entry: usize) -> &mut T {
 78 |     debug_assert!(
 79 |       entry < self.len,
 80 |       "trie: entry index {entry} out of bounds {}; this is a bug",
 81 |       self.len
 82 |     );
 83 | 
 84 |     unsafe { &mut *self.ptr.add(entry) }
 85 |   }
 86 | 
 87 |   // SAFETY: `cb` MUST NOT panic.
 88 |   unsafe fn with_vec(&mut self, cb: impl FnOnce(&mut Vec<T>)) {
 89 |     let mut vec = ManuallyDrop::new(unsafe {
 90 |       Vec::from_raw_parts(self.ptr, self.len, self.cap)
 91 |     });
 92 |     cb(&mut vec);
 93 |     self.ptr = vec.as_mut_ptr();
 94 |     self.cap = vec.capacity();
 95 |     self.len = vec.len();
 96 |   }
 97 | }
 98 | 
 99 | impl<T> Drop for StealthVec<T> {
100 |   fn drop(&mut self) {
101 |     let mut to_drop = Vec::new();
102 |     unsafe {
103 |       // SAFETY: mem::swap cannot panic. We use swap instead of replace, because
104 |       // dropping an intermediate Vec<T> inside the closure could cause a panic
105 |       // inside of an element's dtor.
106 |       self.with_vec(|v| mem::swap(v, &mut to_drop));
107 |     }
108 |   }
109 | }
110 | 
111 | impl<V: Clone, I: Index> Clone for Entries<V, I> {
112 |   fn clone(&self) -> Self {
113 |     let mut data = Self::new();
114 | 
115 |     let new_entries = (0..self.values.len)
116 |       .map(|i| {
117 |         let e = self.values.get(i);
118 | 
119 |         let value = if e.key_len.is_empty() {
120 |           MaybeUninit::uninit()
121 |         } else {
122 |           MaybeUninit::new(unsafe { e.value.assume_init_ref().clone() })
123 |         };
124 | 
125 |         Entry { key_len: e.key_len, value }
126 |       })
127 |       .collect();
128 | 
129 |     unsafe {
130 |       // The destructor of Vec cannot panic here, because it is empty.
131 |       data.values.with_vec(|v| *v = new_entries);
132 |     }
133 |     data
134 |   }
135 | }
136 | 
137 | impl<V, I: Index> Entries<V, I> {
138 |   pub fn new() -> Self {
139 |     Self { values: StealthVec::new() }
140 |   }
141 | 
142 |   pub fn new_entry(&mut self) -> Result<usize, OutOfIndices> {
143 |     let new = self.values.len;
144 |     unsafe {
145 |       // SAFETY: Vec::push does not panic unless we try to allocate half of
146 |       // the address space, which we can assume can't happen here.
147 |       self.values.with_vec(|v| {
148 |         v.push(Entry {
149 |           key_len: Ptr::EMPTY,
150 |           value: MaybeUninit::uninit(),
151 |         })
152 |       });
153 |     }
154 |     Ok(new)
155 |   }
156 | 
157 |   /// Returns whether a value is initialized.
158 |   pub fn is_init(&self, entry: usize) -> bool {
159 |     !self.values.get(entry).key_len.is_empty()
160 |   }
161 | 
162 |   /// Gets the value in `entry`, if present.
163 |   pub fn get(&self, entry: usize) -> Option<(usize, &V)> {
164 |     let e = self.values.get(entry);
165 |     unsafe { Some((e.key_len.idx(), e.value.assume_init_ref())) }
166 |   }
167 | 
168 |   /// Gets the value in `entry`, if present.
169 |   ///
170 |   /// # Safety
171 |   ///
172 |   /// It is the caller's responsibility to not cause aliasing hazards using
173 |   /// this function.
174 |   pub unsafe fn get_mut_may_alias(
175 |     &self,
176 |     entry: usize,
177 |   ) -> Option<(usize, &mut V)> {
178 |     let e = self.values.get_mut_may_alias(entry);
179 |     unsafe { Some((e.key_len.idx(), e.value.assume_init_mut())) }
180 |   }
181 | 
182 |   /// Initializes `entry` if it isn't.
183 |   pub unsafe fn init(
184 |     &mut self,
185 |     entry: usize,
186 |     key_len: usize,
187 |     cb: impl FnOnce() -> V,
188 |   ) -> &mut V {
189 |     if self.is_init(entry) {
190 |       // SAFETY: Nothing else in this code path access the entries vector.
191 |       return unsafe {
192 |         self.values.get_mut_may_alias(entry).value.assume_init_mut()
193 |       };
194 |     }
195 | 
196 |     // SAFETY: cb() must be called before key_len is written to, so that the
197 |     // entry is left untouched if cb() panics.
198 |     let new = cb();
199 | 
200 |     let e = unsafe {
201 |       // SAFETY: Nothing else in this code path accesses the entries vector.
202 |       self.values.get_mut_may_alias(entry)
203 |     };
204 |     e.key_len = Ptr::must(key_len);
205 |     e.value.write(new)
206 |   }
207 | }
208 | 


--------------------------------------------------------------------------------
/twie/src/raw/iter.rs:
--------------------------------------------------------------------------------
  1 | use std::marker::PhantomData;
  2 | 
  3 | use buf_trait::Buf;
  4 | 
  5 | use crate::raw::nodes;
  6 | use crate::raw::nodes::Index;
  7 | use crate::raw::nodes::Node;
  8 | use crate::raw::RawTrie;
  9 | use crate::Sub;
 10 | 
 11 | use super::entries::Entries;
 12 | 
 13 | /// An iterator over all values of a [`Trie`][crate::Trie] whose keys start with
 14 | /// a particular prefix.
 15 | ///
 16 | /// See [`Trie::prefixes()`][crate::Trie::prefixes].
 17 | pub struct Prefixes<'a, 'key, K: Buf + ?Sized, V, I: Index> {
 18 |   root: Node<I>,
 19 |   key: &'key [u8],
 20 |   data: &'a Entries<V, I>,
 21 |   prefixes: nodes::Prefixes<'a, 'key, I>,
 22 |   _ph: PhantomData<fn() -> &'a K>,
 23 | }
 24 | 
 25 | impl<'a, 'key, K: Buf + ?Sized, V, I: Index> Prefixes<'a, 'key, K, V, I> {
 26 |   pub(crate) fn new(
 27 |     trie: &'a RawTrie<K, V, I>,
 28 |     root: Node<I>,
 29 |     key: &'key K,
 30 |   ) -> Self {
 31 |     let key = key.as_bytes();
 32 |     Self {
 33 |       root,
 34 |       key,
 35 |       prefixes: trie.nodes.prefixes(root, key),
 36 |       data: &trie.data,
 37 |       _ph: PhantomData,
 38 |     }
 39 |   }
 40 | }
 41 | 
 42 | impl<'a, K: Buf + ?Sized, V, I: Index> Iterator for Prefixes<'a, '_, K, V, I> {
 43 |   type Item = (&'a K, &'a V);
 44 | 
 45 |   fn next(&mut self) -> Option<Self::Item> {
 46 |     loop {
 47 |       let (node, Some(next), last) = self.prefixes.next()? else {
 48 |         continue;
 49 |       };
 50 |       let Some((key_len, value)) = self.data.get(next) else {
 51 |         continue;
 52 |       };
 53 | 
 54 |       let key = self.prefixes.nodes.key(node, Some(key_len));
 55 |       let key_rest = &key.as_bytes()[self.root.depth..];
 56 |       if last && !self.key.starts_with(key_rest) {
 57 |         return None;
 58 |       }
 59 | 
 60 |       unsafe {
 61 |         // SAFETY: This is a trie element, and not an intermediate, so this
 62 |         // is key is not "torn".
 63 |         return Some((K::from_bytes(key), value));
 64 |       }
 65 |     }
 66 |   }
 67 | }
 68 | 
 69 | /// A mutable iterator over all values of a [`Trie`][crate::Trie] whose keys
 70 | /// start with a particular prefix.
 71 | ///
 72 | /// See [`Trie::prefixes_mut()`][crate::Trie::prefixes_mut].
 73 | pub struct PrefixesMut<'a, 'key, K: Buf + ?Sized, V, I: Index> {
 74 |   root: Node<I>,
 75 |   key: &'key [u8],
 76 |   data: &'a Entries<V, I>,
 77 |   prefixes: nodes::Prefixes<'a, 'key, I>,
 78 |   _ph: PhantomData<fn() -> (&'a K, &'a mut V)>,
 79 | }
 80 | 
 81 | impl<'a, 'key, K: Buf + ?Sized, V, I: Index> PrefixesMut<'a, 'key, K, V, I> {
 82 |   pub(crate) fn new(
 83 |     trie: &'a mut RawTrie<K, V, I>,
 84 |     root: Node<I>,
 85 |     key: &'key K,
 86 |   ) -> Self {
 87 |     let key = key.as_bytes();
 88 |     Self {
 89 |       root,
 90 |       key,
 91 |       prefixes: trie.nodes.prefixes(root, key),
 92 |       data: &mut trie.data,
 93 |       _ph: PhantomData,
 94 |     }
 95 |   }
 96 | }
 97 | 
 98 | impl<'a, K: Buf + ?Sized, V, I: Index> Iterator
 99 |   for PrefixesMut<'a, '_, K, V, I>
100 | {
101 |   type Item = (&'a K, &'a mut V);
102 | 
103 |   fn next(&mut self) -> Option<Self::Item> {
104 |     loop {
105 |       let (node, Some(next), last) = self.prefixes.next()? else {
106 |         continue;
107 |       };
108 | 
109 |       let entry = unsafe {
110 |         // SAFETY: nodes::Prefixes will never repeat the indices it produces.
111 |         self.data.get_mut_may_alias(next)
112 |       };
113 |       let Some((key_len, value)) = entry else { continue };
114 | 
115 |       let key = self.prefixes.nodes.key(node, Some(key_len));
116 |       let key_rest = &key.as_bytes()[self.root.depth..];
117 |       if last && !self.key.starts_with(key_rest) {
118 |         return None;
119 |       }
120 | 
121 |       unsafe {
122 |         // SAFETY: This is a trie element, and not an intermediate, so this
123 |         // is key is not "torn".
124 |         return Some((K::from_bytes(key), value));
125 |       }
126 |     }
127 |   }
128 | }
129 | 
130 | /// A depth-first iterator over all nonempty subtries of a
131 | /// [`Trie`][crate::Trie].
132 | ///
133 | /// See [`Trie::subs()`][crate::Trie::subs].
134 | pub struct Subs<'a, K: Buf + ?Sized, V, I: Index> {
135 |   raw: &'a RawTrie<K, V, I>,
136 |   dfs: nodes::Dfs<'a, I>,
137 |   _ph: PhantomData<fn() -> (&'a K, &'a V)>,
138 | }
139 | 
140 | impl<'a, K: Buf + ?Sized, V, I: Index> Subs<'a, K, V, I> {
141 |   pub(crate) fn new(trie: &'a RawTrie<K, V, I>, root: Node<I>) -> Self {
142 |     Self {
143 |       raw: trie,
144 |       dfs: trie.nodes.dfs(root),
145 |       _ph: PhantomData,
146 |     }
147 |   }
148 | }
149 | 
150 | impl<'a, K: Buf + ?Sized, V, I: Index> Iterator for Subs<'a, K, V, I> {
151 |   type Item = Sub<'a, K, V, I>;
152 | 
153 |   fn next(&mut self) -> Option<Self::Item> {
154 |     if let Some(next) = self.dfs.next() {
155 |       return Some(Sub { raw: self.raw, node: next });
156 |     }
157 | 
158 |     None
159 |   }
160 | }
161 | 
162 | /// An in-order iterator over all values of a [`Trie`][crate::Trie].
163 | ///
164 | /// See [`Trie::iter()`][crate::Trie::iter].
165 | pub struct Iter<'a, K: Buf + ?Sized, V, I: Index> {
166 |   data: &'a Entries<V, I>,
167 |   dfs: nodes::Dfs<'a, I>,
168 |   _ph: PhantomData<fn() -> &'a K>,
169 | }
170 | 
171 | impl<'a, K: Buf + ?Sized, V, I: Index> Iter<'a, K, V, I> {
172 |   pub(crate) fn new(trie: &'a RawTrie<K, V, I>, root: Node<I>) -> Self {
173 |     Self {
174 |       dfs: trie.nodes.dfs(root),
175 |       data: &trie.data,
176 |       _ph: PhantomData,
177 |     }
178 |   }
179 | }
180 | 
181 | impl<'a, K: Buf + ?Sized, V, I: Index> Iterator for Iter<'a, K, V, I> {
182 |   type Item = (&'a K, &'a V);
183 | 
184 |   fn next(&mut self) -> Option<Self::Item> {
185 |     while let Some(next) = self.dfs.next() {
186 |       let Some(idx) = self.dfs.nodes.get(next) else {
187 |         continue;
188 |       };
189 |       let Some((key_len, value)) = self.data.get(idx) else {
190 |         continue;
191 |       };
192 |       if key_len != next.depth {
193 |         continue;
194 |       }
195 | 
196 |       let key = self.dfs.nodes.key(next, Some(key_len));
197 |       unsafe {
198 |         // SAFETY: This is a trie element, and not an intermediate, so this
199 |         // is key is not "torn".
200 |         return Some((K::from_bytes(key), value));
201 |       }
202 |     }
203 | 
204 |     None
205 |   }
206 | }
207 | 
208 | /// An in-order mutable iterator over all values of a [`Trie`][crate::Trie].
209 | ///
210 | /// See [`Trie::iter_mut()`][crate::Trie::iter_mut].
211 | pub struct IterMut<'a, K: Buf + ?Sized, V, I: Index> {
212 |   data: &'a Entries<V, I>,
213 |   dfs: nodes::Dfs<'a, I>,
214 |   _ph: PhantomData<fn() -> (&'a K, &'a mut V)>,
215 | }
216 | 
217 | impl<'a, K: Buf + ?Sized, V, I: Index> IterMut<'a, K, V, I> {
218 |   pub(crate) fn new(trie: &'a mut RawTrie<K, V, I>, root: Node<I>) -> Self {
219 |     Self {
220 |       dfs: trie.nodes.dfs(root),
221 |       data: &mut trie.data,
222 |       _ph: PhantomData,
223 |     }
224 |   }
225 | }
226 | 
227 | impl<'a, K: Buf + ?Sized, V, I: Index> Iterator for IterMut<'a, K, V, I> {
228 |   type Item = (&'a K, &'a mut V);
229 | 
230 |   fn next(&mut self) -> Option<Self::Item> {
231 |     while let Some(next) = self.dfs.next() {
232 |       let Some(idx) = self.dfs.nodes.get(next) else {
233 |         continue;
234 |       };
235 |       let entry = unsafe {
236 |         // SAFETY: nodes::Prefixes will never repeat the indices it produces.
237 |         self.data.get_mut_may_alias(idx)
238 |       };
239 | 
240 |       let Some((key_len, value)) = entry else {
241 |         continue;
242 |       };
243 |       if key_len != next.depth {
244 |         continue;
245 |       }
246 | 
247 |       let key = self.dfs.nodes.key(next, Some(key_len));
248 |       unsafe {
249 |         // SAFETY: This is a trie element, and not an intermediate, so this
250 |         // is key is not "torn".
251 |         return Some((K::from_bytes(key), value));
252 |       }
253 |     }
254 | 
255 |     None
256 |   }
257 | }
258 | 


--------------------------------------------------------------------------------
/twie/src/raw/mod.rs:
--------------------------------------------------------------------------------
  1 | // Core implementation of the trie.
  2 | 
  3 | use std::marker::PhantomData;
  4 | 
  5 | use buf_trait::Buf;
  6 | 
  7 | use crate::raw::entries::Entries;
  8 | use crate::raw::nodes::Index;
  9 | use crate::raw::nodes::Node;
 10 | use crate::raw::nodes::Nodes;
 11 | use crate::raw::nodes::OutOfIndices;
 12 | 
 13 | mod dump;
 14 | mod entries;
 15 | 
 16 | pub mod iter;
 17 | pub mod nodes;
 18 | 
 19 | pub use dump::dump;
 20 | 
 21 | /// The core trie implementation.
 22 | ///
 23 | /// This type is a map from `[u8] -> Option<V>` backed by a [`Nodes`]. But, the
 24 | /// way this is realized is somewhat subtle. For a given key, call its
 25 | /// *canonical* location the node you'd get from `nodes.build(nodes.root(), key)`,
 26 | /// called `canon(key)`. We could put `key` at `canon(key)` and call it a day.
 27 | /// For a given node, we call the key that would be there `keyat(node)`.
 28 | ///
 29 | /// This is wasteful: if the keys are `"foo"` and `"bar"`, there will be
 30 | /// seven full `[Node]`s in the trie:
 31 | ///
 32 | /// ```text
 33 | /// <root> -> f -> o -> o
 34 | ///        -> b -> a -> r
 35 | /// ```
 36 | ///
 37 | /// However, these keys have no common prefix, so only the ->f and ->b links are
 38 | /// actually needed.
 39 | ///
 40 | /// So, rather than say that `key` must be at `canon(key)`, instead we say that
 41 | /// for any given `node`, if there is an entry there, then:
 42 | ///   - `node.key.starts_with(keyat(node))`
 43 | ///   - At least one of `node.key == keyat(node)` OR `node` has no children.
 44 | ///
 45 | /// This means that DFS-ing the trie still yields keys in lexicographic order.
 46 | ///
 47 | /// It may be possible to reduce the last requirement to `node.key < all_its_children`.
 48 | /// This mostly preserves DFS behavior, but screws with subtries. It is unclear
 49 | /// if this can be made to work.
 50 | pub struct RawTrie<K: Buf + ?Sized, V, I: Index> {
 51 |   pub nodes: Nodes<I>,
 52 |   pub data: Entries<V, I>,
 53 |   pub _ph: PhantomData<fn(&mut Self) -> &mut K>,
 54 | }
 55 | 
 56 | impl<K: Buf + ?Sized, V: Clone, I: Index> Clone for RawTrie<K, V, I> {
 57 |   fn clone(&self) -> Self {
 58 |     Self {
 59 |       nodes: self.nodes.clone(),
 60 |       data: self.data.clone(),
 61 |       _ph: PhantomData,
 62 |     }
 63 |   }
 64 | }
 65 | 
 66 | impl<K: Buf + ?Sized, V, I: Index> RawTrie<K, V, I> {
 67 |   /// Creates a new trie.
 68 |   pub fn new() -> Self {
 69 |     Self {
 70 |       nodes: Nodes::new(),
 71 |       data: Entries::new(),
 72 |       _ph: PhantomData,
 73 |     }
 74 |   }
 75 | 
 76 |   /// Low-level mutation operation.
 77 |   ///
 78 |   /// This operation mutates the subtree pointed to by `root` (an index into
 79 |   /// `hi`) and a two-part key, and returns a possibly uninitialized entry
 80 |   /// for the key.
 81 |   ///
 82 |   /// After this function returns, an entry will exist for `[prefix, suffix]`.
 83 |   /// This makes this operation a fused find/insert operation.
 84 |   ///
 85 |   /// # Safety
 86 |   ///
 87 |   /// First, `root` must be a valid `hi` index. Then, `prefix` must be
 88 |   /// *exactly* the prefix string for the subtrie defined by `root`. The reason
 89 |   /// for the two-part key is that this allows mutation through a subtrie
 90 |   /// reference.
 91 |   pub unsafe fn mutate(
 92 |     &mut self,
 93 |     root: Node<I>,
 94 |     key: &[u8],
 95 |   ) -> Result<usize, OutOfIndices> {
 96 |     let insert_at = self.pre_mutate(root, key)?;
 97 | 
 98 |     if let Some(entry) = self.nodes.get(insert_at) {
 99 |       return Ok(entry);
100 |     }
101 | 
102 |     let new = self.data.new_entry()?;
103 |     self.nodes.set(insert_at, new);
104 |     Ok(new)
105 |   }
106 | 
107 |   /// Prepares for a mutation.
108 |   ///
109 |   /// This operation finds the slot at which it could place `suffix` and does
110 |   /// so.
111 |   pub unsafe fn pre_mutate(
112 |     &mut self,
113 |     root: Node<I>,
114 |     key: &[u8],
115 |   ) -> Result<Node<I>, OutOfIndices> {
116 |     // Next, we want to walk down as far as we can without mutating anything.
117 |     self.nodes.init_root();
118 |     let (mut node, rest) = self.nodes.walk(root, key);
119 |     let depth = node.depth;
120 | 
121 |     // We've hit a point at which we may need to create new nodes. Here's the
122 |     // decision tree.
123 |     //
124 |     //   1. The value at `node` is `None`, Then, we insert at this spot.
125 |     //
126 |     //      This case also applies if `node` is not `None` but points at an
127 |     //      empty slot, but since we don't support removal, this case cannot
128 |     //      happen.
129 |     //
130 |     //   2. `node.key == key`. This means `key` is present. We are done.
131 |     //
132 |     //   3. Otherwise, we have to kick the thing in this slot one level down,
133 |     //      andepth                             g
134 |     //                                     |
135 |     //                                    "fog"
136 |     //
137 |     //      Or, in the case that we're a prefix of the node we're replacing,
138 |     //
139 |     //      Before:             After:
140 |     //
141 |     //      "" -> f             "" -> f -> o -> o -> b
142 |     //            |                             |    |
143 |     //           "foobar"                       "foo"|
144 |     //                                               "foobar"
145 | 
146 |     let idx = self.nodes.get(node);
147 | 
148 |     let lookup = idx.and_then(|e| self.data.get(e).map(|(k, _)| (e, k)));
149 |     let Some((entry, key_len)) = lookup else {
150 |       // Case 1.
151 |       if let [next, rest @ ..] = rest {
152 |         node = self.nodes.build(node, &[*next])?;
153 |         self.nodes.extend_key(node, rest);
154 |       }
155 |       return Ok(node);
156 |     };
157 | 
158 |     let key_rest = &self.nodes.key(node, Some(key_len))[depth..];
159 |     if key_rest == rest {
160 |       // Case 2.
161 |       return Ok(node);
162 |     }
163 | 
164 |     // Case 3.
165 |     let common_prefix = key_rest
166 |       .iter()
167 |       .zip(rest)
168 |       .take_while(|(a, b)| a == b)
169 |       .count();
170 | 
171 |     self.nodes.clear(node);
172 |     node = self.nodes.build(node, &rest[..common_prefix])?;
173 |     let build_from = node;
174 | 
175 |     // Need to recompute key_rest here to make the borrow checker happy.
176 |     let key_rest = &self.nodes.key(node, Some(key_len))[depth..];
177 | 
178 |     // Note that because the keys are distinct, `key_rest.len() > common_prefix`.
179 |     if let Some(&next) = key_rest.get(common_prefix) {
180 |       let move_to = self.nodes.build(node, &[next])?;
181 |       self.nodes.set(move_to, entry);
182 |       // Don't need to call extend_key() here; by construction, the key at
183 |       // the moved node is already long enough.
184 |     } else {
185 |       self.nodes.set(node, entry);
186 |     }
187 | 
188 |     if let [next, rest @ ..] = &rest[common_prefix..] {
189 |       node = self.nodes.build(build_from, &[*next])?;
190 |       self.nodes.extend_key(node, rest);
191 |     };
192 | 
193 |     Ok(node)
194 |   }
195 | }
196 | 


--------------------------------------------------------------------------------